boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 84f28e2b008ec0d51d66fd7bae09749a90a2fd53
parent 5344093cdf6b73c444dbd42242b03dea455076ea
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 27 Apr 2026 04:03:09 -0700

cc: add debug-log facility + per-phase heap-usage instrumentation

cc/util.scm gets a debug-log primitive with sticky on/off flag
(default off). When on, debug-log writes display-style lines to fd 2
without aborting (unlike die).

cc/main.scm scans argv for --cc-debug and toggles the flag. Then
logs heap-usage at each phase boundary: start, slurp, lex, pp, parse,
cg-finish. Used to find the 64 MiB heap exhaustion when compiling
cc-e2e/01-kitchen-sink.c.

First findings on the kitchen-sink (9782 bytes input):
  start:   heap 538328  (~525 KB; cc.scm + prelude resident)
  slurp:   heap 591944  (slurp added ~54 KB)
  lex:     heap exhausted before phase completes

So lex is the runaway. Next step: instrument lex internals.

Diffstat:
Mcc/main.scm | 47++++++++++++++++++++++++++++++++++++-----------
Mcc/util.scm | 32++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 11 deletions(-)

diff --git a/cc/main.scm b/cc/main.scm @@ -30,20 +30,45 @@ (close p) 0))) +;; CC_DEBUG=1 in the env doesn't fly here (no getenv); instead, scan +;; argv for a sentinel "--cc-debug" flag. When present, debug-log +;; prints heap usage between phases to fd 2. +(define (%cc-flag? args flag) + (cond ((null? args) #f) + ((bv= (car args) flag) #t) + (else (%cc-flag? (cdr args) flag)))) + +(define (%cc-strip-flag args flag) + (cond ((null? args) '()) + ((bv= (car args) flag) (cdr args)) + (else (cons (car args) (%cc-strip-flag (cdr args) flag))))) + (define (cc-main av) - (let ((args (cdr (cdr av)))) + (let* ((raw (cdr (cdr av))) + (dbg (%cc-flag? raw "--cc-debug")) + (args (%cc-strip-flag raw "--cc-debug"))) + (cond (dbg (debug-log-on!))) (cond ((or (null? args) (null? (cdr args))) - (die #f "usage: cc <input.c> <output.P1pp>"))) + (die #f "usage: cc [--cc-debug] <input.c> <output.P1pp>"))) (let* ((in-path (car args)) - (out-path (car (cdr args))) - (src (%cc-slurp in-path)) - (toks (lex-tokenize src in-path)) - (expanded (pp-expand toks '())) - (cg (cg-init)) - (ps (make-pstate expanded cg))) - (parse-translation-unit ps) - (%cc-write out-path (cg-finish cg)) - 0))) + (out-path (car (cdr args)))) + (debug-log "phase=start" "heap" (heap-usage)) + (let* ((src (%cc-slurp in-path)) + (_1 (debug-log "phase=slurp" "heap" (heap-usage) + "src-bytes" (bytevector-length src))) + (toks (lex-tokenize src in-path)) + (_2 (debug-log "phase=lex" "heap" (heap-usage))) + (expanded (pp-expand toks '())) + (_3 (debug-log "phase=pp" "heap" (heap-usage))) + (cg (cg-init)) + (ps (make-pstate expanded cg))) + (parse-translation-unit ps) + (debug-log "phase=parse" "heap" (heap-usage)) + (let ((out (cg-finish cg))) + (debug-log "phase=cg-finish" "heap" (heap-usage) + "out-bytes" (bytevector-length out)) + (%cc-write out-path out)) + 0)))) (sys-exit (cc-main (argv))) diff --git a/cc/util.scm b/cc/util.scm @@ -190,6 +190,38 @@ (else (loop (+ off (cdr r)))))))))) ;; -------------------------------------------------------------------- +;; debug logging +;; +;; Cheap sticky on/off: the cc compiler is single-threaded and short- +;; lived, so a top-level mutable flag is fine. Toggle via +;; (debug-log-on!) / (debug-log-off!). When on, (debug-log msg . irr) +;; writes one line to fd 2 in the same display-style format as `die`, +;; but doesn't abort. The intent is to trace heap usage between cc +;; phases (lex/pp/parse/cg-finish) without compile-time conditionals. +;; -------------------------------------------------------------------- +(define %debug-log-enabled #f) +(define (debug-log-on!) (set! %debug-log-enabled #t)) +(define (debug-log-off!) (set! %debug-log-enabled #f)) +(define (debug-log? ) %debug-log-enabled) + +(define (debug-log msg . irritants) + (cond + (%debug-log-enabled + (let* ((head (bytevector-append "[cc] " (format "~a" msg))) + (tail (if (null? irritants) + (list NL-BV) + (let walk ((xs irritants) (sep ": ") (acc '())) + (if (null? xs) + (reverse (cons NL-BV acc)) + (walk (cdr xs) + " " + (cons (format "~a" (car xs)) + (cons sep acc))))))) + (out (bv-cat (cons head tail)))) + (write-bv-fd 2 out))) + (else #t))) + +;; -------------------------------------------------------------------- ;; fresh-name generator (used for cg label counters, etc.) ;; -------------------------------------------------------------------- (define (make-namer prefix)