commit 84f28e2b008ec0d51d66fd7bae09749a90a2fd53
parent 5344093cdf6b73c444dbd42242b03dea455076ea
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 27 Apr 2026 04:03:09 -0700
cc: add debug-log facility + per-phase heap-usage instrumentation
cc/util.scm gets a debug-log primitive with sticky on/off flag
(default off). When on, debug-log writes display-style lines to fd 2
without aborting (unlike die).
cc/main.scm scans argv for --cc-debug and toggles the flag. Then
logs heap-usage at each phase boundary: start, slurp, lex, pp, parse,
cg-finish. Used to find the 64 MiB heap exhaustion when compiling
cc-e2e/01-kitchen-sink.c.
First findings on the kitchen-sink (9782 bytes input):
start: heap 538328 (~525 KB; cc.scm + prelude resident)
slurp: heap 591944 (slurp added ~54 KB)
lex: heap exhausted before phase completes
So lex is the runaway. Next step: instrument lex internals.
Diffstat:
| M | cc/main.scm | | | 47 | ++++++++++++++++++++++++++++++++++++----------- |
| M | cc/util.scm | | | 32 | ++++++++++++++++++++++++++++++++ |
2 files changed, 68 insertions(+), 11 deletions(-)
diff --git a/cc/main.scm b/cc/main.scm
@@ -30,20 +30,45 @@
(close p)
0)))
+;; CC_DEBUG=1 in the env doesn't fly here (no getenv); instead, scan
+;; argv for a sentinel "--cc-debug" flag. When present, debug-log
+;; prints heap usage between phases to fd 2.
+(define (%cc-flag? args flag)
+ (cond ((null? args) #f)
+ ((bv= (car args) flag) #t)
+ (else (%cc-flag? (cdr args) flag))))
+
+(define (%cc-strip-flag args flag)
+ (cond ((null? args) '())
+ ((bv= (car args) flag) (cdr args))
+ (else (cons (car args) (%cc-strip-flag (cdr args) flag)))))
+
(define (cc-main av)
- (let ((args (cdr (cdr av))))
+ (let* ((raw (cdr (cdr av)))
+ (dbg (%cc-flag? raw "--cc-debug"))
+ (args (%cc-strip-flag raw "--cc-debug")))
+ (cond (dbg (debug-log-on!)))
(cond
((or (null? args) (null? (cdr args)))
- (die #f "usage: cc <input.c> <output.P1pp>")))
+ (die #f "usage: cc [--cc-debug] <input.c> <output.P1pp>")))
(let* ((in-path (car args))
- (out-path (car (cdr args)))
- (src (%cc-slurp in-path))
- (toks (lex-tokenize src in-path))
- (expanded (pp-expand toks '()))
- (cg (cg-init))
- (ps (make-pstate expanded cg)))
- (parse-translation-unit ps)
- (%cc-write out-path (cg-finish cg))
- 0)))
+ (out-path (car (cdr args))))
+ (debug-log "phase=start" "heap" (heap-usage))
+ (let* ((src (%cc-slurp in-path))
+ (_1 (debug-log "phase=slurp" "heap" (heap-usage)
+ "src-bytes" (bytevector-length src)))
+ (toks (lex-tokenize src in-path))
+ (_2 (debug-log "phase=lex" "heap" (heap-usage)))
+ (expanded (pp-expand toks '()))
+ (_3 (debug-log "phase=pp" "heap" (heap-usage)))
+ (cg (cg-init))
+ (ps (make-pstate expanded cg)))
+ (parse-translation-unit ps)
+ (debug-log "phase=parse" "heap" (heap-usage))
+ (let ((out (cg-finish cg)))
+ (debug-log "phase=cg-finish" "heap" (heap-usage)
+ "out-bytes" (bytevector-length out))
+ (%cc-write out-path out))
+ 0))))
(sys-exit (cc-main (argv)))
diff --git a/cc/util.scm b/cc/util.scm
@@ -190,6 +190,38 @@
(else (loop (+ off (cdr r))))))))))
;; --------------------------------------------------------------------
+;; debug logging
+;;
+;; Cheap sticky on/off: the cc compiler is single-threaded and short-
+;; lived, so a top-level mutable flag is fine. Toggle via
+;; (debug-log-on!) / (debug-log-off!). When on, (debug-log msg . irr)
+;; writes one line to fd 2 in the same display-style format as `die`,
+;; but doesn't abort. The intent is to trace heap usage between cc
+;; phases (lex/pp/parse/cg-finish) without compile-time conditionals.
+;; --------------------------------------------------------------------
+(define %debug-log-enabled #f)
+(define (debug-log-on!) (set! %debug-log-enabled #t))
+(define (debug-log-off!) (set! %debug-log-enabled #f))
+(define (debug-log? ) %debug-log-enabled)
+
+(define (debug-log msg . irritants)
+ (cond
+ (%debug-log-enabled
+ (let* ((head (bytevector-append "[cc] " (format "~a" msg)))
+ (tail (if (null? irritants)
+ (list NL-BV)
+ (let walk ((xs irritants) (sep ": ") (acc '()))
+ (if (null? xs)
+ (reverse (cons NL-BV acc))
+ (walk (cdr xs)
+ " "
+ (cons (format "~a" (car xs))
+ (cons sep acc)))))))
+ (out (bv-cat (cons head tail))))
+ (write-bv-fd 2 out)))
+ (else #t)))
+
+;; --------------------------------------------------------------------
;; fresh-name generator (used for cg label counters, etc.)
;; --------------------------------------------------------------------
(define (make-namer prefix)