boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit d1195e5542db523f480175d9e965855cd9dd62e6
parent 413966a3ad56085de31311652ee172f73d7ed787
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 28 Apr 2026 14:48:49 -0700

scheme1: prelude wrappers for the two heap-arena patterns

call-with-heap-rewind, call-with-scratch-deep-copy, and
call-with-scratch-cycle replace the boilerplate around heap-mark /
heap-rewind! and the use/use/reset triple. Callers stop reaching for
the raw primitives: cc.scm's lex-iter-step, lex-read-ident,
lex-read-string, pp-eval-cexpr, and parse-translation-unit all switch
to the wrappers. Tests 093/115/121-124 stay on the primitives as
contract coverage; 125-heap-wrappers exercises the new surface.

Diffstat:
Mcc/cc.scm | 199++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mdocs/SCHEME1.md | 5++++-
Mscheme1/prelude.scm | 45+++++++++++++++++++++++++++++++++++++++++++++
Atests/scheme1/125-heap-wrappers.expected-exit | 1+
Atests/scheme1/125-heap-wrappers.scm | 109+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 259 insertions(+), 100 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -551,23 +551,24 @@ ;; same level as whitespace. NL tokens are emitted at every physical ;; newline so pp can use them to terminate directives. ;; -;; Heap discipline (per tests/scheme1/93-heap-mark-rewind.scm): -;; token-producing helpers wrap their inner work in a heap-mark / -;; heap-rewind! arena. Slots that must survive the rewind (start-loc -;; and the integer holders for npos/nline/ncol) are bound *before* -;; the (set! mark (heap-mark)) so the let's env extensions live below -;; the mark. The byte-run scanners' tail-call env frames and any -;; %lex-peek 4-lists are above the mark and get reclaimed. For -;; helpers that produce a fresh bytevector (ident, string), the bv is -;; allocated post-rewind so it persists into the parent arena. -;; Numeric digit runs accumulate inline via %accum-int-while. +;; Heap discipline (per tests/scheme1/093-heap-mark-rewind.scm): +;; token-producing helpers wrap their inner work in call-with-heap- +;; rewind. Slots that must survive the rewind (start-loc and the +;; integer holders for npos/nline/ncol) are bound by an outer let +;; *before* the call-with-heap-rewind invocation, so the let's env +;; extensions live below the mark. The byte-run scanners' tail-call +;; env frames and any %lex-peek 4-lists are above the mark and get +;; reclaimed. For helpers that produce a fresh bytevector (ident, +;; string), the bv is allocated between the two calls so it persists +;; into the parent arena. Numeric digit runs accumulate inline via +;; %accum-int-while. ;; ;; %lex-iter-pull wraps each token-emitting iteration in an outer -;; heap-mark / heap-rewind!. The helper allocates its own tok+loc+bv +;; call-with-heap-rewind. The helper allocates its own tok+loc+bv ;; above this outer mark; the driver reads the scalar fields and ;; copies any bv contents into %lex-scratch (sticky, pre-mark) before -;; rewinding. Post-rewind it rebuilds a fresh tok+loc and a fresh bv -;; (for IDENT/STR) sized to the actual content. +;; the wrapper rewinds. Post-rewind it rebuilds a fresh tok+loc and a +;; fresh bv (for IDENT/STR) sized to the actual content. ;; -------------------------------------------------------------------- ;; Cross-rewind transport for IDENT / STR bv values. @@ -925,10 +926,10 @@ ;; Returns (tok npos nline ncol). Caller has already verified that the ;; first byte at `pos` satisfies %ident-start?. ;; -;; Two-pass with heap-mark/rewind: pass 1 (%scan-while) sizes the run, -;; then we rewind, allocate `name` bv post-rewind so it survives, then -;; pass 2 (%fill-while-bv) writes into it under a fresh mark. The -;; integer slots count/npos/nline/ncol are bound *before* the mark so +;; Two-pass with call-with-heap-rewind: pass 1 (%scan-while) sizes the +;; run, then between the two calls we allocate `name` bv so it survives +;; the second rewind, then pass 2 (%fill-while-bv) writes into it. The +;; integer slots count/npos/nline/ncol are bound by the outer let so ;; they survive both rewinds. ;; -------------------------------------------------------------------- (define (lex-read-ident src pos file) @@ -937,20 +938,18 @@ (define (%lex-read-ident src pos line col file) (let ((start-loc (%loc file line col)) - (count 0) (npos 0) (nline 0) (ncol 0) - (mark 0)) - (set! mark (heap-mark)) - (let ((sres (%scan-while %ident-cont? src pos line col))) - (set! count (car sres)) - (set! npos (car (cdr sres))) - (set! nline (car (cdr (cdr sres)))) - (set! ncol (car (cdr (cdr (cdr sres)))))) - (heap-rewind! mark) - (let ((name (make-bytevector count 0)) - (mark2 0)) - (set! mark2 (heap-mark)) - (%fill-while-bv %ident-cont? src pos line col name 0) - (heap-rewind! mark2) + (count 0) (npos 0) (nline 0) (ncol 0)) + (call-with-heap-rewind + (lambda () + (let ((sres (%scan-while %ident-cont? src pos line col))) + (set! count (car sres)) + (set! npos (car (cdr sres))) + (set! nline (car (cdr (cdr sres)))) + (set! ncol (car (cdr (cdr (cdr sres)))))))) + (let ((name (make-bytevector count 0))) + (call-with-heap-rewind + (lambda () + (%fill-while-bv %ident-cont? src pos line col name 0))) (let ((kw (alist-ref name %keyword-alist))) (cons (if kw (make-tok 'KW kw start-loc) @@ -1147,8 +1146,7 @@ (define (%lex-read-string src pos line col file) (let ((start-loc (%loc file line col)) - (cnt 0) (npos 0) (nline 0) (ncol 0) - (mark 0)) + (cnt 0) (npos 0) (nline 0) (ncol 0)) ;; '"' (34) is a fast-byte and never a trigraph result, so the ;; physical byte at `pos` is exactly the opening quote. (cond @@ -1156,19 +1154,18 @@ (not (= (bytevector-u8-ref src pos) 34))) (die start-loc "internal: string reader on non-quote")) (else - (set! mark (heap-mark)) - (let ((sres (%string-pass src (+ pos 1) line (+ col 1) - file start-loc #f))) - (set! cnt (car sres)) - (set! npos (car (cdr sres))) - (set! nline (car (cdr (cdr sres)))) - (set! ncol (car (cdr (cdr (cdr sres)))))) - (heap-rewind! mark) - (let ((bv (make-bytevector cnt 0)) - (mark2 0)) - (set! mark2 (heap-mark)) - (%string-pass src (+ pos 1) line (+ col 1) file start-loc bv) - (heap-rewind! mark2) + (call-with-heap-rewind + (lambda () + (let ((sres (%string-pass src (+ pos 1) line (+ col 1) + file start-loc #f))) + (set! cnt (car sres)) + (set! npos (car (cdr sres))) + (set! nline (car (cdr (cdr sres)))) + (set! ncol (car (cdr (cdr (cdr sres)))))))) + (let ((bv (make-bytevector cnt 0))) + (call-with-heap-rewind + (lambda () + (%string-pass src (+ pos 1) line (+ col 1) file start-loc bv))) (cons (make-tok 'STR bv start-loc) (list npos nline ncol))))))) @@ -1462,13 +1459,13 @@ ;; emitting `HASH` instead of `(PUNCT hash …)` for a line-leading `#`. ;; ;; Heap discipline: each call to %lex-iter-pull is wrapped in a -;; heap-mark / heap-rewind!. All scratch the helper allocates (the +;; call-with-heap-rewind. All scratch the helper allocates (the ;; helper's own tok/loc, the `(cons tok 4-list)` it returns, every ;; bind_params / let* / eval_args env-cons consumed getting in and out) ;; lives above the mark and is reclaimed before returning. Per-token ;; scratch (kind/val/vlen/loc-line/loc-col/npos/nline/ncol/nbol?) is -;; allocated in the outer `let` BEFORE the mark — set! mutates those -;; cells in place across the rewind. Bv contents survive via the +;; allocated in the outer `let` BEFORE the wrapper call — set! mutates +;; those cells in place across the rewind. Bv contents survive via the ;; sticky %lex-scratch buffer + %lex-scratch->bv (allocated post-rewind). ;; The survivors per token are tok (48 B) + loc (40 B) + bv if any. (define-record-type lex-state @@ -1504,14 +1501,15 @@ (line (lex-state-line st)) (col (lex-state-col st)) (bol? (lex-state-bol? st)) - ;; Per-iteration scratch — must be allocated BEFORE the mark - ;; so that set!s after heap-rewind! still find live cells. + ;; Per-iteration scratch — must be allocated BEFORE the call to + ;; call-with-heap-rewind so that set!s issued from inside the + ;; thunk still find live cells after the rewind. (kind #f) (val #f) (vlen 0) (loc-line 1) (loc-col 1) - (npos 0) (nline 1) (ncol 1) (nbol? #f) - (mark 0)) - (set! mark (heap-mark)) - (let* ((sw (%skip-ws-and-comments src pos line col file)) + (npos 0) (nline 1) (ncol 1) (nbol? #f)) + (call-with-heap-rewind + (lambda () + (let* ((sw (%skip-ws-and-comments src pos line col file)) (pos1 (car sw)) (line1 (car (cdr sw))) (col1 (car (cdr (cdr sw)))) @@ -1613,8 +1611,7 @@ (set! kind 'PUNCT) (set! val (tok-value tok)) (set! npos (car rest)) (set! nline (car (cdr rest))) - (set! ncol (car (cdr (cdr rest)))))))) - (heap-rewind! mark) + (set! ncol (car (cdr (cdr rest)))))))))) ;; Reconstruct the survivor below the mark and advance state. (cond ((eq? kind 'EOF) @@ -2393,24 +2390,25 @@ ;; Steps: resolve `defined NAME`, macro-expand the rest, treat any ;; remaining IDENT as 0, parse with recursive descent. ;; -;; Arena boundary (test_93 A→B→C pattern). Everything between the mark -;; and the rewind is scratch: `s1`/`s2`/`s3` (each a fresh token list, -;; where `s2` runs the full macro-expansion engine), plus the recursive -;; parser's (val . rest) cons cell at every level. The result is a -;; fixnum, so no pre-allocated out cell is needed — `val` survives the -;; rewind by virtue of being an immediate. The error path goes through -;; `die` (which sys-exits), so no rewind there. +;; Arena boundary (test 093 A→B→C pattern). Everything inside the +;; call-with-heap-rewind thunk is scratch: `s1`/`s2`/`s3` (each a fresh +;; token list, where `s2` runs the full macro-expansion engine), plus +;; the recursive parser's (val . rest) cons cell at every level. The +;; result is a fixnum, so no pre-allocated out cell is needed — `val` +;; survives the rewind by virtue of being an immediate. The error path +;; goes through `die` (which sys-exits), so no rewind there. (define (pp-eval-cexpr toks macros) - (let ((mark (heap-mark))) - (let* ((state (%pp-state macros '() #f 0 #f '() '())) - (s1 (%pp-resolve-defined toks state)) - (s2 (%pp-expand-line s1 state)) - (s3 (%pp-idents-as-zero s2))) - (let-values (((val rest) (%pp-cx-expr s3))) - (cond - ((null? rest) (heap-rewind! mark) val) - (else (die (tok-loc (car rest)) "#if: garbage at end of expression" - (tok-kind (car rest))))))))) + (call-with-heap-rewind + (lambda () + (let* ((state (%pp-state macros '() #f 0 #f '() '())) + (s1 (%pp-resolve-defined toks state)) + (s2 (%pp-expand-line s1 state)) + (s3 (%pp-idents-as-zero s2))) + (let-values (((val rest) (%pp-cx-expr s3))) + (cond + ((null? rest) val) + (else (die (tok-loc (car rest)) "#if: garbage at end of expression" + (tok-kind (car rest)))))))))) (define (%pp-expand-line toks state) (let ((out (make-buf-list))) @@ -4823,9 +4821,9 @@ ;; Phase 3: parse-decl-or-fn boundary — scratch by default, promote ;; surviving roots into main and reset scratch at each top-level decl. ;; -;; Promotion uses the prelude's generic deep-copy (see -;; docs/DEEP-COPY.md); the per-decl identity-preserving map is folded -;; into the deep-copy context. +;; Promotion uses the prelude's generic deep-copy via +;; call-with-scratch-cycle; the per-decl identity-preserving map is +;; folded into a single deep-copy context shared across all roots. ;; ;; Per-decl mutable state retained on the cc side: ;; %promote-pending-completions: list of struct/union ctypes that @@ -4877,28 +4875,31 @@ (define (parse-translation-unit ps) (let loop () - (use-scratch-heap!) - (cond - ((eq? (tok-kind (peek ps)) 'EOF) #t) - (else - (cond - ((debug-log?) - (let ((loc (tok-loc (peek ps)))) - (debug-log "decl" "line" (loc-line loc) - "heap" (heap-usage))))) - (parse-decl-or-fn ps) - (use-main-heap!) - (let ((ctx (make-deep-copy-context))) - (promote-roots! (ps-world ps) ctx) - (promote-iter-buffers! (ps-iter ps) ctx)) - ;; cg-fn-meta may hold scratch alist conses left over from the - ;; just-finished function; cg-fn-begin would reset it, but a - ;; trailing fn means it'd dangle past reset-scratch-heap!. - (cg-fn-meta-set! (ps-cg ps) '()) - (reset-scratch-heap!) - (loop)))) - (use-main-heap!) - #t) + (let ((at-eof? #f)) + (call-with-scratch-cycle + (lambda () + (cond + ((eq? (tok-kind (peek ps)) 'EOF) (set! at-eof? #t)) + (else + (cond + ((debug-log?) + (let ((loc (tok-loc (peek ps)))) + (debug-log "decl" "line" (loc-line loc) + "heap" (heap-usage))))) + (parse-decl-or-fn ps)))) + (lambda () + (cond + ((not at-eof?) + (let ((ctx (make-deep-copy-context))) + (promote-roots! (ps-world ps) ctx) + (promote-iter-buffers! (ps-iter ps) ctx)) + ;; cg-fn-meta may hold scratch alist conses left over from + ;; the just-finished function; cg-fn-begin would reset it, + ;; but a trailing fn means it'd dangle past the reset. + (cg-fn-meta-set! (ps-cg ps) '()))))) + (cond + (at-eof? #t) + (else (loop)))))) (define (parse-decl-or-fn ps) (let-values (((sto b) (parse-decl-spec ps))) diff --git a/docs/SCHEME1.md b/docs/SCHEME1.md @@ -209,7 +209,10 @@ single-value context; 0 or 2+ args produce an MV-pack consumable by `heap-mark` / `heap-rewind!` discard everything allocated after the mark on whichever heap is current; the scratch heap can be reset wholesale. UNSAFE: the runtime does not track liveness, so any -surviving reference into a freed region becomes dangling. +surviving reference into a freed region becomes dangling. Most callers +should reach for the prelude wrappers `call-with-heap-rewind`, +`call-with-scratch-deep-copy`, and `call-with-scratch-cycle` rather +than driving these primitives directly. ## Error semantics diff --git a/scheme1/prelude.scm b/scheme1/prelude.scm @@ -425,6 +425,51 @@ (error "deep-copy: cannot copy procedure" obj)) (else obj))) +;; --- Heap arena wrappers ------------------------------------------- +;; Two-pattern API on top of the raw heap-mark / heap-rewind! / scratch +;; primitives. Most callers should reach for these instead of driving +;; the primitives directly. See tests/scheme1/093-heap-mark-rewind.scm +;; and tests/scheme1/115-two-heap.scm for the underlying contract. + +;; Pattern 1 — mark/rewind. Run thunk inside a heap-mark/rewind arena +;; on the current heap. All heap allocations performed by thunk are +;; reclaimed on return; thunk's return value MUST be either an immediate +;; (fixnum, boolean, symbol, '()) or a cell allocated by the caller +;; *before* call-with-heap-rewind ran. The classic A→B→C shape pre- +;; allocates an `out` cell, calls this with a thunk that mutates `out`, +;; and returns `out` to its own caller. +(define (call-with-heap-rewind thunk) + (let ((mark (heap-mark))) + (let ((r (thunk))) + (heap-rewind! mark) + r))) + +;; Pattern 2a — scratch + deep-copy of a single root. Run thunk with +;; the scratch heap selected, switch back to main, deep-copy thunk's +;; result into main, reset scratch, return the main-heap copy. Use for +;; the common case of "build a graph in scratch, hand the caller a +;; main-heap clone, reclaim scratch". +(define (call-with-scratch-deep-copy thunk) + (use-scratch-heap!) + (let ((s (thunk))) + (use-main-heap!) + (let ((m (deep-copy (make-deep-copy-context) s))) + (reset-scratch-heap!) + m))) + +;; Pattern 2b — scratch + multi-root promote. Lower-level cycle: select +;; scratch, run (in-scratch), select main, run (promote), reset scratch. +;; The (promote) thunk is responsible for deep-copying every survivor +;; root from scratch into main (typically across several caller-owned +;; slots, sharing a single deep-copy context). Returns unspec; survivors +;; must reach the caller via slots that promote rewrites in place. +(define (call-with-scratch-cycle in-scratch promote) + (use-scratch-heap!) + (in-scratch) + (use-main-heap!) + (promote) + (reset-scratch-heap!)) + ;; --- Vector <-> list -- need make-vector / vector-ref / vector-set! / ;; vector-length, none of which are yet primitives. ------------------ ; (define (vector->list-helper v i acc) diff --git a/tests/scheme1/125-heap-wrappers.expected-exit b/tests/scheme1/125-heap-wrappers.expected-exit @@ -0,0 +1 @@ +42 diff --git a/tests/scheme1/125-heap-wrappers.scm b/tests/scheme1/125-heap-wrappers.scm @@ -0,0 +1,109 @@ +; Prelude heap-arena wrappers built on top of heap-mark / heap-rewind! +; and the two-heap primitives. See scheme1/prelude.scm for the contract. + +(define-record-type cell + (mk-cell head tail) + cell? + (head cell-head) + (tail cell-tail)) + +;; ---- call-with-heap-rewind: A→B→C with the wrapper ---------------- +;; B pre-allocates `out`, calls the wrapper to run C in scratch, returns +;; out. The thunk's return value is dropped — survivors travel via +;; mutation of the pre-mark `out` cell. +(define (sum-and-count-into out xs) + (let loop ((xs xs) (sum 0) (count 0)) + (cond + ((null? xs) + (set-car! out sum) + (set-cdr! out count)) + (else (loop (cdr xs) (+ sum (car xs)) (+ count 1)))))) + +(define (sum-and-count xs) + (let ((out (cons 0 0))) + (call-with-heap-rewind + (lambda () (sum-and-count-into out xs))) + out)) + +(define input (cons 1 (cons 2 (cons 3 (cons 4 (cons 5 '())))))) + +;; Wrapper version: scratch reclaimed. +(define m0 (heap-mark)) +(define r (sum-and-count input)) +(define m1 (heap-mark)) + +;; No-rewind sibling: same call, no wrapper. Recursion-loop env frames +;; leak. +(define out2 (cons 0 0)) +(define m2-before (heap-mark)) +(sum-and-count-into out2 input) +(define m2-after (heap-mark)) + +(if (= 15 (car r)) 0 (sys-exit 1)) +(if (= 5 (cdr r)) 0 (sys-exit 2)) +;; Wrapped delta must be strictly less than the raw delta. +(if (< (- m1 m0) (- m2-after m2-before)) 0 (sys-exit 3)) + +;; Immediate return value flows back through the wrapper. +(define v + (call-with-heap-rewind + (lambda () + ;; Allocate transient garbage that must be reclaimed. + (let* ((tmp1 (cons 1 2)) + (tmp2 (cons tmp1 tmp1))) + 99)))) +(if (= 99 v) 0 (sys-exit 4)) + +;; ---- call-with-scratch-deep-copy: scratch-build, main-clone, reset -- +(define (build-list-in-current n) + (let loop ((i n) (acc '())) + (if (= i 0) acc (loop (- i 1) (cons i acc))))) + +(define m1 (call-with-scratch-deep-copy + (lambda () (build-list-in-current 4)))) +(if (heap-in-main? m1) 0 (sys-exit 10)) +(if (equal? m1 '(1 2 3 4)) 0 (sys-exit 11)) + +;; Survives even after a fresh scratch cycle stomps the original arena. +(define m2 (call-with-scratch-deep-copy + (lambda () (mk-cell 'tag (build-list-in-current 3))))) +(if (cell? m2) 0 (sys-exit 12)) +(if (eq? 'tag (cell-head m2)) 0 (sys-exit 13)) +(if (equal? '(1 2 3) (cell-tail m2)) 0 (sys-exit 14)) +(if (heap-in-main? m2) 0 (sys-exit 15)) +(if (heap-in-main? (cell-tail m2)) 0 (sys-exit 16)) +;; m1 still readable after the second cycle's reset. +(if (equal? m1 '(1 2 3 4)) 0 (sys-exit 17)) + +;; ---- call-with-scratch-cycle: multi-root promote ------------------- +;; Caller-owned slots; each cycle parses garbage in scratch and rewrites +;; the slots in main via a shared deep-copy context. +(define slot-a #f) +(define slot-b #f) + +(define (one-cycle) + (call-with-scratch-cycle + (lambda () + ;; "Parse" output — both slots end up scratch-resident. + (set! slot-a (cons 'a (cons 1 (cons 2 '())))) + (set! slot-b (mk-cell 'b slot-a))) ; b shares slot-a's tail + (lambda () + (let ((ctx (make-deep-copy-context))) + (set! slot-a (deep-copy ctx slot-a)) + (set! slot-b (deep-copy ctx slot-b)))))) + +(one-cycle) +(if (heap-in-main? slot-a) 0 (sys-exit 20)) +(if (heap-in-main? slot-b) 0 (sys-exit 21)) +(if (eq? slot-a (cell-tail slot-b)) 0 (sys-exit 22)) ; sharing preserved +(if (equal? slot-a '(a 1 2)) 0 (sys-exit 23)) +(if (eq? 'b (cell-head slot-b)) 0 (sys-exit 24)) + +;; A second cycle resets scratch; prior slot values must remain valid. +(define old-a slot-a) +(define old-b slot-b) +(one-cycle) +(if (equal? old-a '(a 1 2)) 0 (sys-exit 25)) +(if (eq? 'b (cell-head old-b)) 0 (sys-exit 26)) + +(sys-exit 42)