boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit dc6191087efce80273772703215890a2fcf6193e
parent 3df97ae35fed2a45793173fd0735a647df41e514
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 27 Apr 2026 09:01:58 -0700

cc: pre-allocate global outputs, use mark+reset per fn

Diffstat:
Mcc/cg.scm | 117++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Mcc/data.scm | 29+++++++++++++++++++++++++----
Mcc/parse.scm | 56++++++++++++++++++++++++++++++++++++++++++++++++++++----
Mcc/util.scm | 70+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
4 files changed, 214 insertions(+), 58 deletions(-)

diff --git a/cc/cg.scm b/cc/cg.scm @@ -14,7 +14,7 @@ ;; M1pp macro `<fn>__SO` defined just before the `%fn(...)` block. (define (%cg-emit-buf cg) - (let ((fb (cg-fn-buf cg))) (if fb fb (cg-text cg)))) + (cond ((cg-in-fn? cg) (cg-fn-buf cg)) (else (cg-text cg)))) (define (%cg-emit cg bv) (buf-push! (%cg-emit-buf cg) bv)) @@ -24,12 +24,14 @@ (define (%n n) (number->string n 10)) -;; Per-fn metadata (name, ret-slot, ret-type) is stashed on cg-globals -;; under symbol keys that don't collide with bv name keys. +;; Per-fn metadata (name, ret-slot, ret-type, switch-case lists, ...) +;; lives on cg-fn-meta, reset at every cg-fn-begin/v. Keeping it off +;; cg-globals means cg-globals only mutates when the user emits a real +;; global, which is what parse-fn-body's rewind-safety check needs. (define (%cg-fn-set! cg key val) - (cg-globals-set! cg (alist-update key (lambda (_) val) (cg-globals cg)))) + (cg-fn-meta-set! cg (alist-update key (lambda (_) val) (cg-fn-meta cg)))) -(define (%cg-fn-get cg key) (alist-ref/eq key (cg-globals cg))) +(define (%cg-fn-get cg key) (alist-ref/eq key (cg-fn-meta cg))) (define (%cg-fresh-label cg prefix) (let* ((n (cg-label-ctr cg)) @@ -234,7 +236,19 @@ ;; -------------------------------------------------------------------- (define (cg-init) - (%cg (make-buf) (make-buf) (make-buf) '() 0 0 '() '() #f #f 0)) + (%cg (make-buf/cap %BUF-CAP-TEXT) ; text + (make-buf/cap %BUF-CAP-DATA) ; data + (make-buf/cap %BUF-CAP-BSS) ; bss + '() ; vstack + 0 ; frame-hi + 0 ; label-ctr + '() ; str-pool + '() ; globals + '() ; fn-meta + (make-buf/cap %BUF-CAP-FN) ; fn-buf (reused per fn) + (make-buf/cap %BUF-CAP-PROLOGUE) ; prologue-buf (reused per fn) + 0 ; max-outgoing + #f)) ; in-fn? (define (cg-finish cg) ;; Entry stub. P1's program-entry contract (docs/P1.md §Program Entry) @@ -243,12 +257,11 @@ ;; them unchanged. The 16-byte frame is just enough for %enter's ;; saved-fp/lr to fit; cc__main builds its own frame on top. ;; (CC-CONTRACTS §J.1, §5.4.) - (let ((stub (bv-cat (list - "# entry stub: forwards argc=a0, argv=a1 to cc__main\n" - "%fn(p1_main, 16, {\n" - "%call(&cc__main)\n" - "})\n")))) - (buf-push! (cg-text cg) stub)) + (let ((tb (cg-text cg))) + (buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to cc__main\n") + (buf-push! tb "%fn(p1_main, 16, {\n") + (buf-push! tb "%call(&cc__main)\n") + (buf-push! tb "})\n")) ;; Every P1pp translation unit must end with :ELF_end so the ELF ;; header can compute file-size and ph_memsz boundaries. (bv-cat (list (buf-flush (cg-text cg)) @@ -269,12 +282,14 @@ ;; are read. Limit of 15 variadic args (after named) is enough for ;; tcc.c's logging shapes; bump VARARG_WINDOW if you need more. (define (cg-fn-begin/v cg name params return-type variadic?) - (cg-fn-buf-set! cg (make-buf)) - (cg-prologue-buf-set! cg (make-buf)) + (buf-reset! (cg-fn-buf cg)) + (buf-reset! (cg-prologue-buf cg)) + (cg-in-fn?-set! cg #t) (cg-vstack-set! cg '()) (cg-frame-hi-set! cg 0) (cg-label-ctr-set! cg 0) (cg-max-outgoing-set! cg 0) + (cg-fn-meta-set! cg '()) (%cg-fn-set! cg '%fn-name name) (%cg-fn-set! cg '%fn-ret-type return-type) (%cg-fn-set! cg '%indirect-slots '()) @@ -345,34 +360,50 @@ (or first-slot off))))))) (define (cg-fn-end cg) - (let* ((name (%cg-fn-get cg '%fn-name)) - (ret-slot (%cg-fn-get cg '%fn-ret-slot)) - (ret-type (%cg-fn-get cg '%fn-ret-type)) - (locals-hi (cg-frame-hi cg)) + ;; Drain prologue-buf and fn-buf directly into cg-text via buf-drain! + ;; (memcpy, no allocation). Header/footer pieces go through buf-push! + ;; on cg-text — also memcpy. Net result: zero net heap allocation in + ;; cg-fn-end other than the small (%n N) bvs for staging-bytes / + ;; frame-size, which the surrounding parse-fn-body's heap-rewind! + ;; reclaims. + (let* ((name (%cg-fn-get cg '%fn-name)) + (ret-slot (%cg-fn-get cg '%fn-ret-slot)) + (ret-type (%cg-fn-get cg '%fn-ret-type)) + (locals-hi (cg-frame-hi cg)) (staging-bytes (* 8 (cg-max-outgoing cg))) - (raw-size (+ staging-bytes locals-hi)) - (frame-size (align-up raw-size 16)) - (ret-block - (cond - ((eq? (ctype-kind ret-type) 'void) - (bv-cat (list "::ret\n%li(a0, 0)\n"))) - (else - (bv-cat (list "::ret\n%ld(a0, sp, " - (%cg-slot-expr cg ret-slot) ")\n"))))) - (so-macro - (bv-cat (list "%macro " name "__SO()\n" - (%n staging-bytes) "\n%endm\n"))) - (prologue (buf-flush (cg-prologue-buf cg))) - (body (buf-flush (cg-fn-buf cg))) - (mangled (%cg-mangle-global name)) - (fn-block (bv-cat (list - so-macro - "%fn(" mangled ", " (%n frame-size) ", {\n" - prologue body ret-block - "})\n")))) - (buf-push! (cg-text cg) fn-block) - (cg-fn-buf-set! cg #f) - (cg-prologue-buf-set! cg #f) + (raw-size (+ staging-bytes locals-hi)) + (frame-size (align-up raw-size 16)) + (mangled (%cg-mangle-global name)) + (tb (cg-text cg))) + ;; Now that the body is fully emitted, leave fn dispatch so any + ;; trailing emits in this function (including the ret-block below) + ;; route to cg-text directly. + (cg-in-fn?-set! cg #f) + ;; staging-size macro + (buf-push! tb "%macro ") + (buf-push! tb name) + (buf-push! tb "__SO()\n") + (buf-push! tb (%n staging-bytes)) + (buf-push! tb "\n%endm\n") + ;; %fn header + (buf-push! tb "%fn(") + (buf-push! tb mangled) + (buf-push! tb ", ") + (buf-push! tb (%n frame-size)) + (buf-push! tb ", {\n") + ;; prologue + body, drained byte-for-byte + (buf-drain! tb (cg-prologue-buf cg)) + (buf-drain! tb (cg-fn-buf cg)) + ;; ret block + (buf-push! tb "::ret\n") + (cond + ((eq? (ctype-kind ret-type) 'void) + (buf-push! tb "%li(a0, 0)\n")) + (else + (buf-push! tb "%ld(a0, sp, ") + (buf-push! tb (%cg-slot-expr cg ret-slot)) + (buf-push! tb ")\n"))) + (buf-push! tb "})\n") (cg-vstack-set! cg '()) (cg-frame-hi-set! cg 0) (cg-max-outgoing-set! cg 0) @@ -1100,7 +1131,7 @@ (let* ((lbl (%cg-fresh-lbl cg)) (key (string->symbol (bytevector-append "%sw_cases__" (swctx-end-tag sw)))) - (cur (or (alist-ref/eq key (cg-globals cg)) '())) + (cur (or (%cg-fn-get cg key) '())) (entry (cons const-int lbl))) (%cg-fn-set! cg key (cons entry cur)) (%cg-emit-many cg (list "::" lbl "\n")))) @@ -1113,7 +1144,7 @@ (define (cg-switch-end cg sw) (let* ((tag (swctx-end-tag sw)) (key (string->symbol (bytevector-append "%sw_cases__" tag))) - (cases (reverse (or (alist-ref/eq key (cg-globals cg)) '()))) + (cases (reverse (or (%cg-fn-get cg key) '()))) (default-lbl (swctx-default-lbl sw)) (disp-lbl (bytevector-append "sw_disp_" tag))) (%cg-emit-many cg (list "%break(" tag ")\n" diff --git a/cc/data.scm b/cc/data.scm @@ -137,8 +137,27 @@ ;; -------------------------------------------------------------------- ;; cg — codegen state. Owned by cg.scm. ;; -------------------------------------------------------------------- +;; fn-buf and prologue-buf are pre-allocated (cg-init) and reused across +;; functions — cg-fn-begin/v calls buf-reset! on them, cg-fn-end drains +;; them into cg-text via buf-drain!. No per-fn allocation, which is what +;; lets parse-fn-body wrap the body in heap-mark/heap-rewind! safely: +;; the destination buf storage lives pre-mark, byte writes are stable +;; across rewind, and the parse/cg scratch dies cleanly. +;; +;; in-fn? discriminates "currently inside a function body" so +;; %cg-emit-buf can route emits to fn-buf during the body and cg-text +;; outside it (entry stub, etc.). +;; cg-globals: user-visible globals only (cg-emit-global / cg-emit-extern). +;; Stable except when user code adds a global — which is exactly what the +;; parse-fn-body rewind-safety check probes. +;; +;; cg-fn-meta: transient per-function state (fn-name, ret-slot, ret-type, +;; vararg-first-slot, indirect-slots, switch-case lists, ...). Reset on +;; cg-fn-begin/v; reads via %cg-fn-get / writes via %cg-fn-set!. Kept +;; out of cg-globals so rewind-safety checks on cg-globals aren't +;; tripped by every fn-begin. (define-record-type cg - (%cg text data bss vstack frame-hi label-ctr str-pool globals fn-buf prologue-buf max-outgoing) + (%cg text data bss vstack frame-hi label-ctr str-pool globals fn-meta fn-buf prologue-buf max-outgoing in-fn?) cg? (text cg-text) (data cg-data) @@ -148,9 +167,11 @@ (label-ctr cg-label-ctr cg-label-ctr-set!) (str-pool cg-str-pool cg-str-pool-set!) (globals cg-globals cg-globals-set!) - (fn-buf cg-fn-buf cg-fn-buf-set!) - (prologue-buf cg-prologue-buf cg-prologue-buf-set!) - (max-outgoing cg-max-outgoing cg-max-outgoing-set!)) + (fn-meta cg-fn-meta cg-fn-meta-set!) + (fn-buf cg-fn-buf) + (prologue-buf cg-prologue-buf) + (max-outgoing cg-max-outgoing cg-max-outgoing-set!) + (in-fn? cg-in-fn? cg-in-fn?-set!)) ;; -------------------------------------------------------------------- ;; Symbol alphabets — canonical alists. See CC-CONTRACTS §1. diff --git a/cc/parse.scm b/cc/parse.scm @@ -867,13 +867,61 @@ (else (cdr rest)))))))))) +;; A → B → C arena pattern from tests/scheme1/93-heap-mark-rewind.scm: +;; +;; A = parse-decl-or-fn (caller, arena-unaware) +;; B = parse-fn-body (this fn — arena boundary) +;; C = %parse-fn-body-inner (the real per-fn parse + cg work) +;; +;; B's "out" is the cg's fixed-storage bufs (cg-text and friends): they +;; were allocated at cg-init (pre-mark) and only mutate via byte writes, +;; so byte-level work survives heap-rewind!. Everything C allocates — +;; vstack opnds, intermediate bvs, ctype scratch, scope frames, switch +;; case alists — is post-mark and discarded. +;; +;; The fn-name binding into the surrounding scope (used so recursive +;; calls can resolve the name during the body) is done BEFORE the mark +;; so it survives. Inner scope frames are popped via scope-leave! before +;; the rewind, so their cells become unreachable; rewind reclaims them. +;; +;; Rewind-safety guard: the body might add user-visible globals +;; (block-statics), strings (literals), tags, or typedefs. Those entries +;; are post-mark and would dangle on rewind. We snapshot the relevant +;; alists before parsing and skip the rewind if any changed — paying +;; full heap cost only for functions that genuinely mutate global state. (define (parse-fn-body ps name dt) + ;; Hoist the recursive-binding scope-bind! out of the marked region + ;; so the fn-sym cons survives rewind. + (cond ((not (scope-lookup ps name)) + (scope-bind! ps name + (%sym name 'fn 'extern dt + (bytevector-append "cc__" name))))) + (let* ((cg (ps-cg ps)) + (mark (heap-mark)) + (globals-before (cg-globals cg)) + (str-pool-before (cg-str-pool cg)) + (typedefs-before (ps-typedefs ps)) + (tags-before (ps-tags ps))) + (%parse-fn-body-inner ps name dt) + (cond + ((and (eq? globals-before (cg-globals cg)) + (eq? str-pool-before (cg-str-pool cg)) + (eq? typedefs-before (ps-typedefs ps)) + (eq? tags-before (ps-tags ps))) + ;; cg-fn-meta points at post-mark alist conses (fn metadata, + ;; switch-case lists, indirect-slots). Drop the reference before + ;; rewinding so the cg record holds no dangling pointers — the + ;; next cg-fn-begin/v would reset it anyway, but if this is the + ;; last fn, leaving it set leaves a latent landmine. + (cg-fn-meta-set! cg '()) + (heap-rewind! mark) + (debug-log "fn-rewound" name "heap" (heap-usage))) + (else + (debug-log "fn-kept" name "heap" (heap-usage)))))) + +(define (%parse-fn-body-inner ps name dt) (let* ((e (ctype-ext dt)) (ret (car e)) (par (cadr e)) (var (car (cddr e)))) - (cond ((not (scope-lookup ps name)) - (scope-bind! ps name - (%sym name 'fn 'extern dt - (bytevector-append "cc__" name))))) (let ((psyms (cg-fn-begin/v (ps-cg ps) name par ret var))) (ps-fn-ctx-set! ps (%fn-ctx name ret (map cdr psyms) var '())) diff --git a/cc/util.scm b/cc/util.scm @@ -108,21 +108,77 @@ (bit-and (+ n mask) (bit-not mask)))) ;; -------------------------------------------------------------------- -;; output buffer (reversed list of bv chunks; flush concats once) +;; output buffer (fixed-size pre-allocated byte storage) +;; +;; Every buf owns one bytevector of `cap` bytes, plus a write `offset`. +;; buf-push! is bytevector-copy! into storage — zero allocation per +;; push, no chunks list to chase. This is what makes per-function +;; heap-mark/heap-rewind! safe in cg: the destination buf is fixed- +;; storage (allocated once, lives pre-mark), so byte-level mutations +;; survive a rewind that discards the parse/cg scratch. +;; +;; Sizing knobs live in one place so they're easy to tune as inputs +;; grow. cg-init picks per-buf caps; the per-fn bufs are reused +;; across functions (reset, not re-allocated). ;; -------------------------------------------------------------------- + +;; Tuning constants — total fixed pre-allocation ≈ 12.27 MiB on a +;; 64 MiB heap. Bump these when a workload overflows; the buf-overflow +;; die() reports off/len/cap so misses are easy to diagnose. +;; +;; Each cap is one less than a power of two: scheme1's bv_capacity_for +;; rounds the requested length up to the smallest power of two STRICTLY +;; GREATER than it, so asking for 2^k bytes actually consumes 2^(k+1) +;; of heap. Using 2^k - 1 gives the next-pow2 step EQUAL to 2^k, which +;; is the storage we actually intended. +(define %BUF-CAP-TEXT 8388607) ; ≈8 MiB: .text + entry stub +(define %BUF-CAP-DATA 2097151) ; ≈2 MiB: .data (strings, globals) +(define %BUF-CAP-BSS 2097151) ; ≈2 MiB: .bss +(define %BUF-CAP-FN 262143) ; ≈256 KiB: per-fn body asm +(define %BUF-CAP-PROLOGUE 16383) ; ≈16 KiB: per-fn prologue +(define %BUF-CAP-DEFAULT 65535) ; ≈64 KiB: make-buf fallback + (define-record-type buf - (%buf chunks) + (%buf storage offset cap) buf? - (chunks buf-chunks buf-chunks-set!)) + (storage buf-storage) ; bv: pre-allocated, never resized + (offset buf-offset buf-offset-set!) ; fixnum: bytes written so far + (cap buf-cap)) ; fixnum: storage capacity + +(define (make-buf/cap cap) + (%buf (make-bytevector cap 0) 0 cap)) -(define (make-buf) (%buf '())) +(define (make-buf) (make-buf/cap %BUF-CAP-DEFAULT)) (define (buf-push! b bv) - (buf-chunks-set! b (cons bv (buf-chunks b)))) + (let* ((n (bytevector-length bv)) + (off (buf-offset b)) + (newoff (+ off n))) + (cond + ((> newoff (buf-cap b)) + (die #f "buf overflow" off n (buf-cap b)))) + (bytevector-copy! (buf-storage b) off bv 0 n) + (buf-offset-set! b newoff))) (define (buf-flush b) - ;; Reverse the chunk list once, then concat in one allocation. - (bv-cat (reverse (buf-chunks b)))) + ;; Snapshot the used prefix as a fresh bv. One allocation; the + ;; underlying storage is unchanged. + (bytevector-copy (buf-storage b) 0 (buf-offset b))) + +(define (buf-reset! b) (buf-offset-set! b 0)) + +(define (buf-drain! dst src) + ;; Copy src's used bytes into dst at dst's current write head; reset + ;; src to empty. dst and src must be distinct bufs. + (let* ((slen (buf-offset src)) + (doff (buf-offset dst)) + (newoff (+ doff slen))) + (cond + ((> newoff (buf-cap dst)) + (die #f "buf-drain overflow" doff slen (buf-cap dst)))) + (bytevector-copy! (buf-storage dst) doff (buf-storage src) 0 slen) + (buf-offset-set! dst newoff) + (buf-offset-set! src 0))) ;; -------------------------------------------------------------------- ;; diagnostics + I/O