commit dc6191087efce80273772703215890a2fcf6193e
parent 3df97ae35fed2a45793173fd0735a647df41e514
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 27 Apr 2026 09:01:58 -0700
cc: pre-allocate global outputs, use mark+reset per fn
Diffstat:
| M | cc/cg.scm | | | 117 | ++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------- |
| M | cc/data.scm | | | 29 | +++++++++++++++++++++++++---- |
| M | cc/parse.scm | | | 56 | ++++++++++++++++++++++++++++++++++++++++++++++++++++---- |
| M | cc/util.scm | | | 70 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------- |
4 files changed, 214 insertions(+), 58 deletions(-)
diff --git a/cc/cg.scm b/cc/cg.scm
@@ -14,7 +14,7 @@
;; M1pp macro `<fn>__SO` defined just before the `%fn(...)` block.
(define (%cg-emit-buf cg)
- (let ((fb (cg-fn-buf cg))) (if fb fb (cg-text cg))))
+ (cond ((cg-in-fn? cg) (cg-fn-buf cg)) (else (cg-text cg))))
(define (%cg-emit cg bv)
(buf-push! (%cg-emit-buf cg) bv))
@@ -24,12 +24,14 @@
(define (%n n) (number->string n 10))
-;; Per-fn metadata (name, ret-slot, ret-type) is stashed on cg-globals
-;; under symbol keys that don't collide with bv name keys.
+;; Per-fn metadata (name, ret-slot, ret-type, switch-case lists, ...)
+;; lives on cg-fn-meta, reset at every cg-fn-begin/v. Keeping it off
+;; cg-globals means cg-globals only mutates when the user emits a real
+;; global, which is what parse-fn-body's rewind-safety check needs.
(define (%cg-fn-set! cg key val)
- (cg-globals-set! cg (alist-update key (lambda (_) val) (cg-globals cg))))
+ (cg-fn-meta-set! cg (alist-update key (lambda (_) val) (cg-fn-meta cg))))
-(define (%cg-fn-get cg key) (alist-ref/eq key (cg-globals cg)))
+(define (%cg-fn-get cg key) (alist-ref/eq key (cg-fn-meta cg)))
(define (%cg-fresh-label cg prefix)
(let* ((n (cg-label-ctr cg))
@@ -234,7 +236,19 @@
;; --------------------------------------------------------------------
(define (cg-init)
- (%cg (make-buf) (make-buf) (make-buf) '() 0 0 '() '() #f #f 0))
+ (%cg (make-buf/cap %BUF-CAP-TEXT) ; text
+ (make-buf/cap %BUF-CAP-DATA) ; data
+ (make-buf/cap %BUF-CAP-BSS) ; bss
+ '() ; vstack
+ 0 ; frame-hi
+ 0 ; label-ctr
+ '() ; str-pool
+ '() ; globals
+ '() ; fn-meta
+ (make-buf/cap %BUF-CAP-FN) ; fn-buf (reused per fn)
+ (make-buf/cap %BUF-CAP-PROLOGUE) ; prologue-buf (reused per fn)
+ 0 ; max-outgoing
+ #f)) ; in-fn?
(define (cg-finish cg)
;; Entry stub. P1's program-entry contract (docs/P1.md §Program Entry)
@@ -243,12 +257,11 @@
;; them unchanged. The 16-byte frame is just enough for %enter's
;; saved-fp/lr to fit; cc__main builds its own frame on top.
;; (CC-CONTRACTS §J.1, §5.4.)
- (let ((stub (bv-cat (list
- "# entry stub: forwards argc=a0, argv=a1 to cc__main\n"
- "%fn(p1_main, 16, {\n"
- "%call(&cc__main)\n"
- "})\n"))))
- (buf-push! (cg-text cg) stub))
+ (let ((tb (cg-text cg)))
+ (buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to cc__main\n")
+ (buf-push! tb "%fn(p1_main, 16, {\n")
+ (buf-push! tb "%call(&cc__main)\n")
+ (buf-push! tb "})\n"))
;; Every P1pp translation unit must end with :ELF_end so the ELF
;; header can compute file-size and ph_memsz boundaries.
(bv-cat (list (buf-flush (cg-text cg))
@@ -269,12 +282,14 @@
;; are read. Limit of 15 variadic args (after named) is enough for
;; tcc.c's logging shapes; bump VARARG_WINDOW if you need more.
(define (cg-fn-begin/v cg name params return-type variadic?)
- (cg-fn-buf-set! cg (make-buf))
- (cg-prologue-buf-set! cg (make-buf))
+ (buf-reset! (cg-fn-buf cg))
+ (buf-reset! (cg-prologue-buf cg))
+ (cg-in-fn?-set! cg #t)
(cg-vstack-set! cg '())
(cg-frame-hi-set! cg 0)
(cg-label-ctr-set! cg 0)
(cg-max-outgoing-set! cg 0)
+ (cg-fn-meta-set! cg '())
(%cg-fn-set! cg '%fn-name name)
(%cg-fn-set! cg '%fn-ret-type return-type)
(%cg-fn-set! cg '%indirect-slots '())
@@ -345,34 +360,50 @@
(or first-slot off)))))))
(define (cg-fn-end cg)
- (let* ((name (%cg-fn-get cg '%fn-name))
- (ret-slot (%cg-fn-get cg '%fn-ret-slot))
- (ret-type (%cg-fn-get cg '%fn-ret-type))
- (locals-hi (cg-frame-hi cg))
+ ;; Drain prologue-buf and fn-buf directly into cg-text via buf-drain!
+ ;; (memcpy, no allocation). Header/footer pieces go through buf-push!
+ ;; on cg-text — also memcpy. Net result: zero net heap allocation in
+ ;; cg-fn-end other than the small (%n N) bvs for staging-bytes /
+ ;; frame-size, which the surrounding parse-fn-body's heap-rewind!
+ ;; reclaims.
+ (let* ((name (%cg-fn-get cg '%fn-name))
+ (ret-slot (%cg-fn-get cg '%fn-ret-slot))
+ (ret-type (%cg-fn-get cg '%fn-ret-type))
+ (locals-hi (cg-frame-hi cg))
(staging-bytes (* 8 (cg-max-outgoing cg)))
- (raw-size (+ staging-bytes locals-hi))
- (frame-size (align-up raw-size 16))
- (ret-block
- (cond
- ((eq? (ctype-kind ret-type) 'void)
- (bv-cat (list "::ret\n%li(a0, 0)\n")))
- (else
- (bv-cat (list "::ret\n%ld(a0, sp, "
- (%cg-slot-expr cg ret-slot) ")\n")))))
- (so-macro
- (bv-cat (list "%macro " name "__SO()\n"
- (%n staging-bytes) "\n%endm\n")))
- (prologue (buf-flush (cg-prologue-buf cg)))
- (body (buf-flush (cg-fn-buf cg)))
- (mangled (%cg-mangle-global name))
- (fn-block (bv-cat (list
- so-macro
- "%fn(" mangled ", " (%n frame-size) ", {\n"
- prologue body ret-block
- "})\n"))))
- (buf-push! (cg-text cg) fn-block)
- (cg-fn-buf-set! cg #f)
- (cg-prologue-buf-set! cg #f)
+ (raw-size (+ staging-bytes locals-hi))
+ (frame-size (align-up raw-size 16))
+ (mangled (%cg-mangle-global name))
+ (tb (cg-text cg)))
+ ;; Now that the body is fully emitted, leave fn dispatch so any
+ ;; trailing emits in this function (including the ret-block below)
+ ;; route to cg-text directly.
+ (cg-in-fn?-set! cg #f)
+ ;; staging-size macro
+ (buf-push! tb "%macro ")
+ (buf-push! tb name)
+ (buf-push! tb "__SO()\n")
+ (buf-push! tb (%n staging-bytes))
+ (buf-push! tb "\n%endm\n")
+ ;; %fn header
+ (buf-push! tb "%fn(")
+ (buf-push! tb mangled)
+ (buf-push! tb ", ")
+ (buf-push! tb (%n frame-size))
+ (buf-push! tb ", {\n")
+ ;; prologue + body, drained byte-for-byte
+ (buf-drain! tb (cg-prologue-buf cg))
+ (buf-drain! tb (cg-fn-buf cg))
+ ;; ret block
+ (buf-push! tb "::ret\n")
+ (cond
+ ((eq? (ctype-kind ret-type) 'void)
+ (buf-push! tb "%li(a0, 0)\n"))
+ (else
+ (buf-push! tb "%ld(a0, sp, ")
+ (buf-push! tb (%cg-slot-expr cg ret-slot))
+ (buf-push! tb ")\n")))
+ (buf-push! tb "})\n")
(cg-vstack-set! cg '())
(cg-frame-hi-set! cg 0)
(cg-max-outgoing-set! cg 0)
@@ -1100,7 +1131,7 @@
(let* ((lbl (%cg-fresh-lbl cg))
(key (string->symbol
(bytevector-append "%sw_cases__" (swctx-end-tag sw))))
- (cur (or (alist-ref/eq key (cg-globals cg)) '()))
+ (cur (or (%cg-fn-get cg key) '()))
(entry (cons const-int lbl)))
(%cg-fn-set! cg key (cons entry cur))
(%cg-emit-many cg (list "::" lbl "\n"))))
@@ -1113,7 +1144,7 @@
(define (cg-switch-end cg sw)
(let* ((tag (swctx-end-tag sw))
(key (string->symbol (bytevector-append "%sw_cases__" tag)))
- (cases (reverse (or (alist-ref/eq key (cg-globals cg)) '())))
+ (cases (reverse (or (%cg-fn-get cg key) '())))
(default-lbl (swctx-default-lbl sw))
(disp-lbl (bytevector-append "sw_disp_" tag)))
(%cg-emit-many cg (list "%break(" tag ")\n"
diff --git a/cc/data.scm b/cc/data.scm
@@ -137,8 +137,27 @@
;; --------------------------------------------------------------------
;; cg — codegen state. Owned by cg.scm.
;; --------------------------------------------------------------------
+;; fn-buf and prologue-buf are pre-allocated (cg-init) and reused across
+;; functions — cg-fn-begin/v calls buf-reset! on them, cg-fn-end drains
+;; them into cg-text via buf-drain!. No per-fn allocation, which is what
+;; lets parse-fn-body wrap the body in heap-mark/heap-rewind! safely:
+;; the destination buf storage lives pre-mark, byte writes are stable
+;; across rewind, and the parse/cg scratch dies cleanly.
+;;
+;; in-fn? discriminates "currently inside a function body" so
+;; %cg-emit-buf can route emits to fn-buf during the body and cg-text
+;; outside it (entry stub, etc.).
+;; cg-globals: user-visible globals only (cg-emit-global / cg-emit-extern).
+;; Stable except when user code adds a global — which is exactly what the
+;; parse-fn-body rewind-safety check probes.
+;;
+;; cg-fn-meta: transient per-function state (fn-name, ret-slot, ret-type,
+;; vararg-first-slot, indirect-slots, switch-case lists, ...). Reset on
+;; cg-fn-begin/v; reads via %cg-fn-get / writes via %cg-fn-set!. Kept
+;; out of cg-globals so rewind-safety checks on cg-globals aren't
+;; tripped by every fn-begin.
(define-record-type cg
- (%cg text data bss vstack frame-hi label-ctr str-pool globals fn-buf prologue-buf max-outgoing)
+ (%cg text data bss vstack frame-hi label-ctr str-pool globals fn-meta fn-buf prologue-buf max-outgoing in-fn?)
cg?
(text cg-text)
(data cg-data)
@@ -148,9 +167,11 @@
(label-ctr cg-label-ctr cg-label-ctr-set!)
(str-pool cg-str-pool cg-str-pool-set!)
(globals cg-globals cg-globals-set!)
- (fn-buf cg-fn-buf cg-fn-buf-set!)
- (prologue-buf cg-prologue-buf cg-prologue-buf-set!)
- (max-outgoing cg-max-outgoing cg-max-outgoing-set!))
+ (fn-meta cg-fn-meta cg-fn-meta-set!)
+ (fn-buf cg-fn-buf)
+ (prologue-buf cg-prologue-buf)
+ (max-outgoing cg-max-outgoing cg-max-outgoing-set!)
+ (in-fn? cg-in-fn? cg-in-fn?-set!))
;; --------------------------------------------------------------------
;; Symbol alphabets — canonical alists. See CC-CONTRACTS §1.
diff --git a/cc/parse.scm b/cc/parse.scm
@@ -867,13 +867,61 @@
(else (cdr rest))))))))))
+;; A → B → C arena pattern from tests/scheme1/93-heap-mark-rewind.scm:
+;;
+;; A = parse-decl-or-fn (caller, arena-unaware)
+;; B = parse-fn-body (this fn — arena boundary)
+;; C = %parse-fn-body-inner (the real per-fn parse + cg work)
+;;
+;; B's "out" is the cg's fixed-storage bufs (cg-text and friends): they
+;; were allocated at cg-init (pre-mark) and only mutate via byte writes,
+;; so byte-level work survives heap-rewind!. Everything C allocates —
+;; vstack opnds, intermediate bvs, ctype scratch, scope frames, switch
+;; case alists — is post-mark and discarded.
+;;
+;; The fn-name binding into the surrounding scope (used so recursive
+;; calls can resolve the name during the body) is done BEFORE the mark
+;; so it survives. Inner scope frames are popped via scope-leave! before
+;; the rewind, so their cells become unreachable; rewind reclaims them.
+;;
+;; Rewind-safety guard: the body might add user-visible globals
+;; (block-statics), strings (literals), tags, or typedefs. Those entries
+;; are post-mark and would dangle on rewind. We snapshot the relevant
+;; alists before parsing and skip the rewind if any changed — paying
+;; full heap cost only for functions that genuinely mutate global state.
(define (parse-fn-body ps name dt)
+ ;; Hoist the recursive-binding scope-bind! out of the marked region
+ ;; so the fn-sym cons survives rewind.
+ (cond ((not (scope-lookup ps name))
+ (scope-bind! ps name
+ (%sym name 'fn 'extern dt
+ (bytevector-append "cc__" name)))))
+ (let* ((cg (ps-cg ps))
+ (mark (heap-mark))
+ (globals-before (cg-globals cg))
+ (str-pool-before (cg-str-pool cg))
+ (typedefs-before (ps-typedefs ps))
+ (tags-before (ps-tags ps)))
+ (%parse-fn-body-inner ps name dt)
+ (cond
+ ((and (eq? globals-before (cg-globals cg))
+ (eq? str-pool-before (cg-str-pool cg))
+ (eq? typedefs-before (ps-typedefs ps))
+ (eq? tags-before (ps-tags ps)))
+ ;; cg-fn-meta points at post-mark alist conses (fn metadata,
+ ;; switch-case lists, indirect-slots). Drop the reference before
+ ;; rewinding so the cg record holds no dangling pointers — the
+ ;; next cg-fn-begin/v would reset it anyway, but if this is the
+ ;; last fn, leaving it set leaves a latent landmine.
+ (cg-fn-meta-set! cg '())
+ (heap-rewind! mark)
+ (debug-log "fn-rewound" name "heap" (heap-usage)))
+ (else
+ (debug-log "fn-kept" name "heap" (heap-usage))))))
+
+(define (%parse-fn-body-inner ps name dt)
(let* ((e (ctype-ext dt)) (ret (car e))
(par (cadr e)) (var (car (cddr e))))
- (cond ((not (scope-lookup ps name))
- (scope-bind! ps name
- (%sym name 'fn 'extern dt
- (bytevector-append "cc__" name)))))
(let ((psyms (cg-fn-begin/v (ps-cg ps) name par ret var)))
(ps-fn-ctx-set! ps
(%fn-ctx name ret (map cdr psyms) var '()))
diff --git a/cc/util.scm b/cc/util.scm
@@ -108,21 +108,77 @@
(bit-and (+ n mask) (bit-not mask))))
;; --------------------------------------------------------------------
-;; output buffer (reversed list of bv chunks; flush concats once)
+;; output buffer (fixed-size pre-allocated byte storage)
+;;
+;; Every buf owns one bytevector of `cap` bytes, plus a write `offset`.
+;; buf-push! is bytevector-copy! into storage — zero allocation per
+;; push, no chunks list to chase. This is what makes per-function
+;; heap-mark/heap-rewind! safe in cg: the destination buf is fixed-
+;; storage (allocated once, lives pre-mark), so byte-level mutations
+;; survive a rewind that discards the parse/cg scratch.
+;;
+;; Sizing knobs live in one place so they're easy to tune as inputs
+;; grow. cg-init picks per-buf caps; the per-fn bufs are reused
+;; across functions (reset, not re-allocated).
;; --------------------------------------------------------------------
+
+;; Tuning constants — total fixed pre-allocation ≈ 12.27 MiB on a
+;; 64 MiB heap. Bump these when a workload overflows; the buf-overflow
+;; die() reports off/len/cap so misses are easy to diagnose.
+;;
+;; Each cap is one less than a power of two: scheme1's bv_capacity_for
+;; rounds the requested length up to the smallest power of two STRICTLY
+;; GREATER than it, so asking for 2^k bytes actually consumes 2^(k+1)
+;; of heap. Using 2^k - 1 gives the next-pow2 step EQUAL to 2^k, which
+;; is the storage we actually intended.
+(define %BUF-CAP-TEXT 8388607) ; ≈8 MiB: .text + entry stub
+(define %BUF-CAP-DATA 2097151) ; ≈2 MiB: .data (strings, globals)
+(define %BUF-CAP-BSS 2097151) ; ≈2 MiB: .bss
+(define %BUF-CAP-FN 262143) ; ≈256 KiB: per-fn body asm
+(define %BUF-CAP-PROLOGUE 16383) ; ≈16 KiB: per-fn prologue
+(define %BUF-CAP-DEFAULT 65535) ; ≈64 KiB: make-buf fallback
+
(define-record-type buf
- (%buf chunks)
+ (%buf storage offset cap)
buf?
- (chunks buf-chunks buf-chunks-set!))
+ (storage buf-storage) ; bv: pre-allocated, never resized
+ (offset buf-offset buf-offset-set!) ; fixnum: bytes written so far
+ (cap buf-cap)) ; fixnum: storage capacity
+
+(define (make-buf/cap cap)
+ (%buf (make-bytevector cap 0) 0 cap))
-(define (make-buf) (%buf '()))
+(define (make-buf) (make-buf/cap %BUF-CAP-DEFAULT))
(define (buf-push! b bv)
- (buf-chunks-set! b (cons bv (buf-chunks b))))
+ (let* ((n (bytevector-length bv))
+ (off (buf-offset b))
+ (newoff (+ off n)))
+ (cond
+ ((> newoff (buf-cap b))
+ (die #f "buf overflow" off n (buf-cap b))))
+ (bytevector-copy! (buf-storage b) off bv 0 n)
+ (buf-offset-set! b newoff)))
(define (buf-flush b)
- ;; Reverse the chunk list once, then concat in one allocation.
- (bv-cat (reverse (buf-chunks b))))
+ ;; Snapshot the used prefix as a fresh bv. One allocation; the
+ ;; underlying storage is unchanged.
+ (bytevector-copy (buf-storage b) 0 (buf-offset b)))
+
+(define (buf-reset! b) (buf-offset-set! b 0))
+
+(define (buf-drain! dst src)
+ ;; Copy src's used bytes into dst at dst's current write head; reset
+ ;; src to empty. dst and src must be distinct bufs.
+ (let* ((slen (buf-offset src))
+ (doff (buf-offset dst))
+ (newoff (+ doff slen)))
+ (cond
+ ((> newoff (buf-cap dst))
+ (die #f "buf-drain overflow" doff slen (buf-cap dst))))
+ (bytevector-copy! (buf-storage dst) doff (buf-storage src) 0 slen)
+ (buf-offset-set! dst newoff)
+ (buf-offset-set! src 0)))
;; --------------------------------------------------------------------
;; diagnostics + I/O