commit d1195e5542db523f480175d9e965855cd9dd62e6
parent 413966a3ad56085de31311652ee172f73d7ed787
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 28 Apr 2026 14:48:49 -0700
scheme1: prelude wrappers for the two heap-arena patterns
call-with-heap-rewind, call-with-scratch-deep-copy, and
call-with-scratch-cycle replace the boilerplate around heap-mark /
heap-rewind! and the use/use/reset triple. Callers stop reaching for
the raw primitives: cc.scm's lex-iter-step, lex-read-ident,
lex-read-string, pp-eval-cexpr, and parse-translation-unit all switch
to the wrappers. Tests 093/115/121-124 stay on the primitives as
contract coverage; 125-heap-wrappers exercises the new surface.
Diffstat:
5 files changed, 259 insertions(+), 100 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -551,23 +551,24 @@
;; same level as whitespace. NL tokens are emitted at every physical
;; newline so pp can use them to terminate directives.
;;
-;; Heap discipline (per tests/scheme1/93-heap-mark-rewind.scm):
-;; token-producing helpers wrap their inner work in a heap-mark /
-;; heap-rewind! arena. Slots that must survive the rewind (start-loc
-;; and the integer holders for npos/nline/ncol) are bound *before*
-;; the (set! mark (heap-mark)) so the let's env extensions live below
-;; the mark. The byte-run scanners' tail-call env frames and any
-;; %lex-peek 4-lists are above the mark and get reclaimed. For
-;; helpers that produce a fresh bytevector (ident, string), the bv is
-;; allocated post-rewind so it persists into the parent arena.
-;; Numeric digit runs accumulate inline via %accum-int-while.
+;; Heap discipline (per tests/scheme1/093-heap-mark-rewind.scm):
+;; token-producing helpers wrap their inner work in call-with-heap-
+;; rewind. Slots that must survive the rewind (start-loc and the
+;; integer holders for npos/nline/ncol) are bound by an outer let
+;; *before* the call-with-heap-rewind invocation, so the let's env
+;; extensions live below the mark. The byte-run scanners' tail-call
+;; env frames and any %lex-peek 4-lists are above the mark and get
+;; reclaimed. For helpers that produce a fresh bytevector (ident,
+;; string), the bv is allocated between the two calls so it persists
+;; into the parent arena. Numeric digit runs accumulate inline via
+;; %accum-int-while.
;;
;; %lex-iter-pull wraps each token-emitting iteration in an outer
-;; heap-mark / heap-rewind!. The helper allocates its own tok+loc+bv
+;; call-with-heap-rewind. The helper allocates its own tok+loc+bv
;; above this outer mark; the driver reads the scalar fields and
;; copies any bv contents into %lex-scratch (sticky, pre-mark) before
-;; rewinding. Post-rewind it rebuilds a fresh tok+loc and a fresh bv
-;; (for IDENT/STR) sized to the actual content.
+;; the wrapper rewinds. Post-rewind it rebuilds a fresh tok+loc and a
+;; fresh bv (for IDENT/STR) sized to the actual content.
;; --------------------------------------------------------------------
;; Cross-rewind transport for IDENT / STR bv values.
@@ -925,10 +926,10 @@
;; Returns (tok npos nline ncol). Caller has already verified that the
;; first byte at `pos` satisfies %ident-start?.
;;
-;; Two-pass with heap-mark/rewind: pass 1 (%scan-while) sizes the run,
-;; then we rewind, allocate `name` bv post-rewind so it survives, then
-;; pass 2 (%fill-while-bv) writes into it under a fresh mark. The
-;; integer slots count/npos/nline/ncol are bound *before* the mark so
+;; Two-pass with call-with-heap-rewind: pass 1 (%scan-while) sizes the
+;; run, then between the two calls we allocate `name` bv so it survives
+;; the second rewind, then pass 2 (%fill-while-bv) writes into it. The
+;; integer slots count/npos/nline/ncol are bound by the outer let so
;; they survive both rewinds.
;; --------------------------------------------------------------------
(define (lex-read-ident src pos file)
@@ -937,20 +938,18 @@
(define (%lex-read-ident src pos line col file)
(let ((start-loc (%loc file line col))
- (count 0) (npos 0) (nline 0) (ncol 0)
- (mark 0))
- (set! mark (heap-mark))
- (let ((sres (%scan-while %ident-cont? src pos line col)))
- (set! count (car sres))
- (set! npos (car (cdr sres)))
- (set! nline (car (cdr (cdr sres))))
- (set! ncol (car (cdr (cdr (cdr sres))))))
- (heap-rewind! mark)
- (let ((name (make-bytevector count 0))
- (mark2 0))
- (set! mark2 (heap-mark))
- (%fill-while-bv %ident-cont? src pos line col name 0)
- (heap-rewind! mark2)
+ (count 0) (npos 0) (nline 0) (ncol 0))
+ (call-with-heap-rewind
+ (lambda ()
+ (let ((sres (%scan-while %ident-cont? src pos line col)))
+ (set! count (car sres))
+ (set! npos (car (cdr sres)))
+ (set! nline (car (cdr (cdr sres))))
+ (set! ncol (car (cdr (cdr (cdr sres))))))))
+ (let ((name (make-bytevector count 0)))
+ (call-with-heap-rewind
+ (lambda ()
+ (%fill-while-bv %ident-cont? src pos line col name 0)))
(let ((kw (alist-ref name %keyword-alist)))
(cons (if kw
(make-tok 'KW kw start-loc)
@@ -1147,8 +1146,7 @@
(define (%lex-read-string src pos line col file)
(let ((start-loc (%loc file line col))
- (cnt 0) (npos 0) (nline 0) (ncol 0)
- (mark 0))
+ (cnt 0) (npos 0) (nline 0) (ncol 0))
;; '"' (34) is a fast-byte and never a trigraph result, so the
;; physical byte at `pos` is exactly the opening quote.
(cond
@@ -1156,19 +1154,18 @@
(not (= (bytevector-u8-ref src pos) 34)))
(die start-loc "internal: string reader on non-quote"))
(else
- (set! mark (heap-mark))
- (let ((sres (%string-pass src (+ pos 1) line (+ col 1)
- file start-loc #f)))
- (set! cnt (car sres))
- (set! npos (car (cdr sres)))
- (set! nline (car (cdr (cdr sres))))
- (set! ncol (car (cdr (cdr (cdr sres))))))
- (heap-rewind! mark)
- (let ((bv (make-bytevector cnt 0))
- (mark2 0))
- (set! mark2 (heap-mark))
- (%string-pass src (+ pos 1) line (+ col 1) file start-loc bv)
- (heap-rewind! mark2)
+ (call-with-heap-rewind
+ (lambda ()
+ (let ((sres (%string-pass src (+ pos 1) line (+ col 1)
+ file start-loc #f)))
+ (set! cnt (car sres))
+ (set! npos (car (cdr sres)))
+ (set! nline (car (cdr (cdr sres))))
+ (set! ncol (car (cdr (cdr (cdr sres))))))))
+ (let ((bv (make-bytevector cnt 0)))
+ (call-with-heap-rewind
+ (lambda ()
+ (%string-pass src (+ pos 1) line (+ col 1) file start-loc bv)))
(cons (make-tok 'STR bv start-loc)
(list npos nline ncol)))))))
@@ -1462,13 +1459,13 @@
;; emitting `HASH` instead of `(PUNCT hash …)` for a line-leading `#`.
;;
;; Heap discipline: each call to %lex-iter-pull is wrapped in a
-;; heap-mark / heap-rewind!. All scratch the helper allocates (the
+;; call-with-heap-rewind. All scratch the helper allocates (the
;; helper's own tok/loc, the `(cons tok 4-list)` it returns, every
;; bind_params / let* / eval_args env-cons consumed getting in and out)
;; lives above the mark and is reclaimed before returning. Per-token
;; scratch (kind/val/vlen/loc-line/loc-col/npos/nline/ncol/nbol?) is
-;; allocated in the outer `let` BEFORE the mark — set! mutates those
-;; cells in place across the rewind. Bv contents survive via the
+;; allocated in the outer `let` BEFORE the wrapper call — set! mutates
+;; those cells in place across the rewind. Bv contents survive via the
;; sticky %lex-scratch buffer + %lex-scratch->bv (allocated post-rewind).
;; The survivors per token are tok (48 B) + loc (40 B) + bv if any.
(define-record-type lex-state
@@ -1504,14 +1501,15 @@
(line (lex-state-line st))
(col (lex-state-col st))
(bol? (lex-state-bol? st))
- ;; Per-iteration scratch — must be allocated BEFORE the mark
- ;; so that set!s after heap-rewind! still find live cells.
+ ;; Per-iteration scratch — must be allocated BEFORE the call to
+ ;; call-with-heap-rewind so that set!s issued from inside the
+ ;; thunk still find live cells after the rewind.
(kind #f) (val #f) (vlen 0)
(loc-line 1) (loc-col 1)
- (npos 0) (nline 1) (ncol 1) (nbol? #f)
- (mark 0))
- (set! mark (heap-mark))
- (let* ((sw (%skip-ws-and-comments src pos line col file))
+ (npos 0) (nline 1) (ncol 1) (nbol? #f))
+ (call-with-heap-rewind
+ (lambda ()
+ (let* ((sw (%skip-ws-and-comments src pos line col file))
(pos1 (car sw))
(line1 (car (cdr sw)))
(col1 (car (cdr (cdr sw))))
@@ -1613,8 +1611,7 @@
(set! kind 'PUNCT) (set! val (tok-value tok))
(set! npos (car rest))
(set! nline (car (cdr rest)))
- (set! ncol (car (cdr (cdr rest))))))))
- (heap-rewind! mark)
+ (set! ncol (car (cdr (cdr rest))))))))))
;; Reconstruct the survivor below the mark and advance state.
(cond
((eq? kind 'EOF)
@@ -2393,24 +2390,25 @@
;; Steps: resolve `defined NAME`, macro-expand the rest, treat any
;; remaining IDENT as 0, parse with recursive descent.
;;
-;; Arena boundary (test_93 A→B→C pattern). Everything between the mark
-;; and the rewind is scratch: `s1`/`s2`/`s3` (each a fresh token list,
-;; where `s2` runs the full macro-expansion engine), plus the recursive
-;; parser's (val . rest) cons cell at every level. The result is a
-;; fixnum, so no pre-allocated out cell is needed — `val` survives the
-;; rewind by virtue of being an immediate. The error path goes through
-;; `die` (which sys-exits), so no rewind there.
+;; Arena boundary (test 093 A→B→C pattern). Everything inside the
+;; call-with-heap-rewind thunk is scratch: `s1`/`s2`/`s3` (each a fresh
+;; token list, where `s2` runs the full macro-expansion engine), plus
+;; the recursive parser's (val . rest) cons cell at every level. The
+;; result is a fixnum, so no pre-allocated out cell is needed — `val`
+;; survives the rewind by virtue of being an immediate. The error path
+;; goes through `die` (which sys-exits), so no rewind there.
(define (pp-eval-cexpr toks macros)
- (let ((mark (heap-mark)))
- (let* ((state (%pp-state macros '() #f 0 #f '() '()))
- (s1 (%pp-resolve-defined toks state))
- (s2 (%pp-expand-line s1 state))
- (s3 (%pp-idents-as-zero s2)))
- (let-values (((val rest) (%pp-cx-expr s3)))
- (cond
- ((null? rest) (heap-rewind! mark) val)
- (else (die (tok-loc (car rest)) "#if: garbage at end of expression"
- (tok-kind (car rest)))))))))
+ (call-with-heap-rewind
+ (lambda ()
+ (let* ((state (%pp-state macros '() #f 0 #f '() '()))
+ (s1 (%pp-resolve-defined toks state))
+ (s2 (%pp-expand-line s1 state))
+ (s3 (%pp-idents-as-zero s2)))
+ (let-values (((val rest) (%pp-cx-expr s3)))
+ (cond
+ ((null? rest) val)
+ (else (die (tok-loc (car rest)) "#if: garbage at end of expression"
+ (tok-kind (car rest))))))))))
(define (%pp-expand-line toks state)
(let ((out (make-buf-list)))
@@ -4823,9 +4821,9 @@
;; Phase 3: parse-decl-or-fn boundary — scratch by default, promote
;; surviving roots into main and reset scratch at each top-level decl.
;;
-;; Promotion uses the prelude's generic deep-copy (see
-;; docs/DEEP-COPY.md); the per-decl identity-preserving map is folded
-;; into the deep-copy context.
+;; Promotion uses the prelude's generic deep-copy via
+;; call-with-scratch-cycle; the per-decl identity-preserving map is
+;; folded into a single deep-copy context shared across all roots.
;;
;; Per-decl mutable state retained on the cc side:
;; %promote-pending-completions: list of struct/union ctypes that
@@ -4877,28 +4875,31 @@
(define (parse-translation-unit ps)
(let loop ()
- (use-scratch-heap!)
- (cond
- ((eq? (tok-kind (peek ps)) 'EOF) #t)
- (else
- (cond
- ((debug-log?)
- (let ((loc (tok-loc (peek ps))))
- (debug-log "decl" "line" (loc-line loc)
- "heap" (heap-usage)))))
- (parse-decl-or-fn ps)
- (use-main-heap!)
- (let ((ctx (make-deep-copy-context)))
- (promote-roots! (ps-world ps) ctx)
- (promote-iter-buffers! (ps-iter ps) ctx))
- ;; cg-fn-meta may hold scratch alist conses left over from the
- ;; just-finished function; cg-fn-begin would reset it, but a
- ;; trailing fn means it'd dangle past reset-scratch-heap!.
- (cg-fn-meta-set! (ps-cg ps) '())
- (reset-scratch-heap!)
- (loop))))
- (use-main-heap!)
- #t)
+ (let ((at-eof? #f))
+ (call-with-scratch-cycle
+ (lambda ()
+ (cond
+ ((eq? (tok-kind (peek ps)) 'EOF) (set! at-eof? #t))
+ (else
+ (cond
+ ((debug-log?)
+ (let ((loc (tok-loc (peek ps))))
+ (debug-log "decl" "line" (loc-line loc)
+ "heap" (heap-usage)))))
+ (parse-decl-or-fn ps))))
+ (lambda ()
+ (cond
+ ((not at-eof?)
+ (let ((ctx (make-deep-copy-context)))
+ (promote-roots! (ps-world ps) ctx)
+ (promote-iter-buffers! (ps-iter ps) ctx))
+ ;; cg-fn-meta may hold scratch alist conses left over from
+ ;; the just-finished function; cg-fn-begin would reset it,
+ ;; but a trailing fn means it'd dangle past the reset.
+ (cg-fn-meta-set! (ps-cg ps) '())))))
+ (cond
+ (at-eof? #t)
+ (else (loop))))))
(define (parse-decl-or-fn ps)
(let-values (((sto b) (parse-decl-spec ps)))
diff --git a/docs/SCHEME1.md b/docs/SCHEME1.md
@@ -209,7 +209,10 @@ single-value context; 0 or 2+ args produce an MV-pack consumable by
`heap-mark` / `heap-rewind!` discard everything allocated after the
mark on whichever heap is current; the scratch heap can be reset
wholesale. UNSAFE: the runtime does not track liveness, so any
-surviving reference into a freed region becomes dangling.
+surviving reference into a freed region becomes dangling. Most callers
+should reach for the prelude wrappers `call-with-heap-rewind`,
+`call-with-scratch-deep-copy`, and `call-with-scratch-cycle` rather
+than driving these primitives directly.
## Error semantics
diff --git a/scheme1/prelude.scm b/scheme1/prelude.scm
@@ -425,6 +425,51 @@
(error "deep-copy: cannot copy procedure" obj))
(else obj)))
+;; --- Heap arena wrappers -------------------------------------------
+;; Two-pattern API on top of the raw heap-mark / heap-rewind! / scratch
+;; primitives. Most callers should reach for these instead of driving
+;; the primitives directly. See tests/scheme1/093-heap-mark-rewind.scm
+;; and tests/scheme1/115-two-heap.scm for the underlying contract.
+
+;; Pattern 1 — mark/rewind. Run thunk inside a heap-mark/rewind arena
+;; on the current heap. All heap allocations performed by thunk are
+;; reclaimed on return; thunk's return value MUST be either an immediate
+;; (fixnum, boolean, symbol, '()) or a cell allocated by the caller
+;; *before* call-with-heap-rewind ran. The classic A→B→C shape pre-
+;; allocates an `out` cell, calls this with a thunk that mutates `out`,
+;; and returns `out` to its own caller.
+(define (call-with-heap-rewind thunk)
+ (let ((mark (heap-mark)))
+ (let ((r (thunk)))
+ (heap-rewind! mark)
+ r)))
+
+;; Pattern 2a — scratch + deep-copy of a single root. Run thunk with
+;; the scratch heap selected, switch back to main, deep-copy thunk's
+;; result into main, reset scratch, return the main-heap copy. Use for
+;; the common case of "build a graph in scratch, hand the caller a
+;; main-heap clone, reclaim scratch".
+(define (call-with-scratch-deep-copy thunk)
+ (use-scratch-heap!)
+ (let ((s (thunk)))
+ (use-main-heap!)
+ (let ((m (deep-copy (make-deep-copy-context) s)))
+ (reset-scratch-heap!)
+ m)))
+
+;; Pattern 2b — scratch + multi-root promote. Lower-level cycle: select
+;; scratch, run (in-scratch), select main, run (promote), reset scratch.
+;; The (promote) thunk is responsible for deep-copying every survivor
+;; root from scratch into main (typically across several caller-owned
+;; slots, sharing a single deep-copy context). Returns unspec; survivors
+;; must reach the caller via slots that promote rewrites in place.
+(define (call-with-scratch-cycle in-scratch promote)
+ (use-scratch-heap!)
+ (in-scratch)
+ (use-main-heap!)
+ (promote)
+ (reset-scratch-heap!))
+
;; --- Vector <-> list -- need make-vector / vector-ref / vector-set! /
;; vector-length, none of which are yet primitives. ------------------
; (define (vector->list-helper v i acc)
diff --git a/tests/scheme1/125-heap-wrappers.expected-exit b/tests/scheme1/125-heap-wrappers.expected-exit
@@ -0,0 +1 @@
+42
diff --git a/tests/scheme1/125-heap-wrappers.scm b/tests/scheme1/125-heap-wrappers.scm
@@ -0,0 +1,109 @@
+; Prelude heap-arena wrappers built on top of heap-mark / heap-rewind!
+; and the two-heap primitives. See scheme1/prelude.scm for the contract.
+
+(define-record-type cell
+ (mk-cell head tail)
+ cell?
+ (head cell-head)
+ (tail cell-tail))
+
+;; ---- call-with-heap-rewind: A→B→C with the wrapper ----------------
+;; B pre-allocates `out`, calls the wrapper to run C in scratch, returns
+;; out. The thunk's return value is dropped — survivors travel via
+;; mutation of the pre-mark `out` cell.
+(define (sum-and-count-into out xs)
+ (let loop ((xs xs) (sum 0) (count 0))
+ (cond
+ ((null? xs)
+ (set-car! out sum)
+ (set-cdr! out count))
+ (else (loop (cdr xs) (+ sum (car xs)) (+ count 1))))))
+
+(define (sum-and-count xs)
+ (let ((out (cons 0 0)))
+ (call-with-heap-rewind
+ (lambda () (sum-and-count-into out xs)))
+ out))
+
+(define input (cons 1 (cons 2 (cons 3 (cons 4 (cons 5 '()))))))
+
+;; Wrapper version: scratch reclaimed.
+(define m0 (heap-mark))
+(define r (sum-and-count input))
+(define m1 (heap-mark))
+
+;; No-rewind sibling: same call, no wrapper. Recursion-loop env frames
+;; leak.
+(define out2 (cons 0 0))
+(define m2-before (heap-mark))
+(sum-and-count-into out2 input)
+(define m2-after (heap-mark))
+
+(if (= 15 (car r)) 0 (sys-exit 1))
+(if (= 5 (cdr r)) 0 (sys-exit 2))
+;; Wrapped delta must be strictly less than the raw delta.
+(if (< (- m1 m0) (- m2-after m2-before)) 0 (sys-exit 3))
+
+;; Immediate return value flows back through the wrapper.
+(define v
+ (call-with-heap-rewind
+ (lambda ()
+ ;; Allocate transient garbage that must be reclaimed.
+ (let* ((tmp1 (cons 1 2))
+ (tmp2 (cons tmp1 tmp1)))
+ 99))))
+(if (= 99 v) 0 (sys-exit 4))
+
+;; ---- call-with-scratch-deep-copy: scratch-build, main-clone, reset --
+(define (build-list-in-current n)
+ (let loop ((i n) (acc '()))
+ (if (= i 0) acc (loop (- i 1) (cons i acc)))))
+
+(define m1 (call-with-scratch-deep-copy
+ (lambda () (build-list-in-current 4))))
+(if (heap-in-main? m1) 0 (sys-exit 10))
+(if (equal? m1 '(1 2 3 4)) 0 (sys-exit 11))
+
+;; Survives even after a fresh scratch cycle stomps the original arena.
+(define m2 (call-with-scratch-deep-copy
+ (lambda () (mk-cell 'tag (build-list-in-current 3)))))
+(if (cell? m2) 0 (sys-exit 12))
+(if (eq? 'tag (cell-head m2)) 0 (sys-exit 13))
+(if (equal? '(1 2 3) (cell-tail m2)) 0 (sys-exit 14))
+(if (heap-in-main? m2) 0 (sys-exit 15))
+(if (heap-in-main? (cell-tail m2)) 0 (sys-exit 16))
+;; m1 still readable after the second cycle's reset.
+(if (equal? m1 '(1 2 3 4)) 0 (sys-exit 17))
+
+;; ---- call-with-scratch-cycle: multi-root promote -------------------
+;; Caller-owned slots; each cycle parses garbage in scratch and rewrites
+;; the slots in main via a shared deep-copy context.
+(define slot-a #f)
+(define slot-b #f)
+
+(define (one-cycle)
+ (call-with-scratch-cycle
+ (lambda ()
+ ;; "Parse" output — both slots end up scratch-resident.
+ (set! slot-a (cons 'a (cons 1 (cons 2 '()))))
+ (set! slot-b (mk-cell 'b slot-a))) ; b shares slot-a's tail
+ (lambda ()
+ (let ((ctx (make-deep-copy-context)))
+ (set! slot-a (deep-copy ctx slot-a))
+ (set! slot-b (deep-copy ctx slot-b))))))
+
+(one-cycle)
+(if (heap-in-main? slot-a) 0 (sys-exit 20))
+(if (heap-in-main? slot-b) 0 (sys-exit 21))
+(if (eq? slot-a (cell-tail slot-b)) 0 (sys-exit 22)) ; sharing preserved
+(if (equal? slot-a '(a 1 2)) 0 (sys-exit 23))
+(if (eq? 'b (cell-head slot-b)) 0 (sys-exit 24))
+
+;; A second cycle resets scratch; prior slot values must remain valid.
+(define old-a slot-a)
+(define old-b slot-b)
+(one-cycle)
+(if (equal? old-a '(a 1 2)) 0 (sys-exit 25))
+(if (eq? 'b (cell-head old-b)) 0 (sys-exit 26))
+
+(sys-exit 42)