boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 65d998517f0acfba3f6722061c65af87d1f5ab4a
parent e7c67d645ce2b03ace2833ff40b259f849b1c4a3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 28 Apr 2026 13:13:56 -0700

cc: replace per-type promote walkers with deep-copy

The parse-decl-or-fn boundary now uses scheme1's generic deep-copy for
all root promotion: tags, scope, str-pool, and the iter-buffer carry.
heap-in-current? short-circuits prior-decl entries automatically, so the
snapshot-tail and per-walker identity map are no longer needed. Only
%promote-pending-completions stays cc-side, since it mutates pre-existing
main-heap ctypes rather than copying.

Drop docs/DEEP-COPY.md now that the rewrite is in.

Diffstat:
Mcc/cc.scm | 338++++++++++++-------------------------------------------------------------------
Ddocs/DEEP-COPY.md | 120-------------------------------------------------------------------------------
2 files changed, 51 insertions(+), 407 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -4711,290 +4711,56 @@ ;; Phase 3: parse-decl-or-fn boundary — scratch by default, promote ;; surviving roots into main and reset scratch at each top-level decl. ;; -;; Per-decl mutable state: -;; %promote-map: alist mapping scratch-ptr -> main-ptr, cleared at -;; each boundary. Preserves eq? identity for objects that appear -;; multiple times (one ctype referenced from several syms; one -;; file bv shared by many tok locs). +;; Promotion uses the prelude's generic deep-copy (see +;; docs/DEEP-COPY.md); the per-decl identity-preserving map is folded +;; into the deep-copy context. +;; +;; Per-decl mutable state retained on the cc side: ;; %promote-pending-completions: list of struct/union ctypes that -;; complete-agg! mutated during the current decl. Each main-heap -;; entry needs its now-scratch ext rewritten to main before the -;; scratch reset. +;; complete-agg! mutated during the current decl. These are +;; pre-existing main-heap records whose ext now points at scratch; +;; ext is rewritten via deep-copy before the scratch reset. ;; ==================================================================== -(define %promote-map '()) (define %promote-pending-completions '()) -(define (%promote-map-ref obj) - (alist-ref/eq obj %promote-map)) - -(define (%promote-map-add! obj cloned) - (set! %promote-map (cons (cons obj cloned) %promote-map)) - cloned) - -(define (%promote-clear!) - (set! %promote-map '()) +(define (rewrite-pending-completions! ctx) + (for-each + (lambda (c) + (cond ((heap-in-main? c) + (ctype-ext-set! c (deep-copy ctx (ctype-ext c)))))) + %promote-pending-completions) (set! %promote-pending-completions '())) -;; Deep-copy a bytevector into the current heap. Idempotent on -;; already-promoted bvs via the promote-map (same content but a fresh -;; bv each scratch instance, so eq? matters). #f passes through (used -;; for anonymous fields, missing tags, missing param names). -(define (promote-bv bv) - (cond - ((not bv) bv) - ((heap-in-main? bv) bv) - (else - (let ((c (%promote-map-ref bv))) - (cond - (c c) - (else - (let ((n (bytevector-length bv))) - (%promote-map-add! bv (bytevector-copy bv 0 n))))))))) - -(define (promote-loc l) - (cond - ((not l) l) - ((heap-in-main? l) l) - (else - (let ((c (%promote-map-ref l))) - (cond - (c c) - (else - (%promote-map-add! l - (%loc (promote-bv (loc-file l)) - (loc-line l) (loc-col l))))))))) - -;; A list of bvs. Each element is a bv (no recursion). -(define (promote-bv-list xs) - (cond ((null? xs) '()) - (else (cons (promote-bv (car xs)) - (promote-bv-list (cdr xs)))))) - -(define (promote-tok t) - (cond - ((not t) t) - ((heap-in-main? t) t) - (else - (let ((c (%promote-map-ref t))) - (cond - (c c) - (else - (let* ((v (tok-value t)) - (v* (cond ((bytevector? v) (promote-bv v)) - (else v))) ; symbol / fixnum / #f - (loc* (promote-loc (tok-loc t))) - (hide* (promote-bv-list (tok-hide t)))) - (%promote-map-add! t (%tok (tok-kind t) v* loc* hide*))))))))) - -(define (promote-tok-list xs) - (cond ((null? xs) '()) - (else (cons (promote-tok (car xs)) - (promote-tok-list (cdr xs)))))) - -;; Field list element: (name-bv ctype offset). -(define (promote-fields fs) - (cond ((null? fs) '()) - (else - (let ((f (car fs))) - (cons (list (promote-bv (car f)) - (promote-ctype (cadr f)) - (car (cddr f))) ; offset is fixnum - (promote-fields (cdr fs))))))) - -;; Fn-param list element: (name-bv-or-#f . ctype). -(define (promote-fn-params ps) - (cond ((null? ps) '()) - (else - (let* ((p (car ps)) - (nm (car p))) - (cons (cons (cond ((not nm) #f) (else (promote-bv nm))) - (promote-ctype (cdr p))) - (promote-fn-params (cdr ps))))))) - -;; Enum member element: (name-bv . fixnum). -(define (promote-enum-members ms) - (cond ((null? ms) '()) - (else - (let ((m (car ms))) - (cons (cons (promote-bv (car m)) (cdr m)) - (promote-enum-members (cdr ms))))))) - -;; ctype ext walker, dispatched by kind. -(define (promote-ctype-ext k ext) - (cond - ((or (eq? k 'void) (eq? k 'i8) (eq? k 'u8) - (eq? k 'i16) (eq? k 'u16) (eq? k 'i32) (eq? k 'u32) - (eq? k 'i64) (eq? k 'u64) (eq? k 'bool) - (eq? k 'flt) (eq? k 'dbl) (eq? k 'ldbl)) - ext) ; #f - ((eq? k 'ptr) (promote-ctype ext)) - ((eq? k 'arr) - (cons (promote-ctype (car ext)) (cdr ext))) ; (ctype . n) - ((eq? k 'fn) - (let ((ret (car ext)) (par (cadr ext)) (var (car (cddr ext)))) - (list (promote-ctype ret) - (promote-fn-params par) - var))) ; variadic? is bool - ((or (eq? k 'struct) (eq? k 'union)) - (let ((tag (car ext)) (done? (cadr ext)) (fs (car (cddr ext)))) - (list (cond ((not tag) #f) (else (promote-bv tag))) - done? - (promote-fields fs)))) - ((eq? k 'enum) - (let ((tag (car ext)) (ms (cadr ext))) - (list (cond ((not tag) #f) (else (promote-bv tag))) - (promote-enum-members ms)))) - (else (die #f "promote-ctype-ext: unknown kind" k)))) - -(define (promote-ctype c) - (cond - ((not c) c) - ((heap-in-main? c) c) ; interned prims; any prior-decl ctype - (else - (let ((cached (%promote-map-ref c))) - (cond - (cached cached) - (else - ;; Allocate a stand-in eagerly and register it before recursing, - ;; so a self-referential ctype (struct with a pointer to itself) - ;; resolves the back-edge to the same main copy. - (let ((m (%ctype (ctype-kind c) - (ctype-size c) (ctype-align c) - #f))) - (%promote-map-add! c m) - (ctype-ext-set! m (promote-ctype-ext (ctype-kind c) - (ctype-ext c))) - m))))))) - -(define (promote-sym s) - (cond - ((not s) s) - ((heap-in-main? s) s) - (else - (let ((cached (%promote-map-ref s))) - (cond - (cached cached) - (else - (let* ((nm (sym-name s)) - (sl (sym-slot s)) - (sl* (cond ((bytevector? sl) (promote-bv sl)) - (else sl)))) ; fixnum or #f - (%promote-map-add! s - (%sym (promote-bv nm) - (sym-kind s) ; symbol - (sym-storage s) ; symbol or #f - (promote-ctype (sym-type s)) - sl* - (sym-defined? s)))))))))) - -;; --- Alist promotion: head -> snapshot-tail. -;; Walks new entries (head ... snapshot-tail) and deep-copies each (k . v) -;; cons in the current heap. Tail (snapshot and below) is already in main -;; and shared. -(define (%promote-alist-frame head tail key-fn val-fn) - (cond - ((eq? head tail) tail) - (else - (let* ((entry (car head)) - (k* (key-fn (car entry))) - (v* (val-fn (cdr entry)))) - (cons (cons k* v*) - (%promote-alist-frame (cdr head) tail key-fn val-fn)))))) - -;; A snapshot captures the eq?-heads of the three world alists so -;; promote-roots! knows where the new entries stop. -(define-record-type world-snap - (%world-snap scope-top tags-top str-pool) - world-snap? - (scope-top wsnap-scope-top) - (tags-top wsnap-tags-top) - (str-pool wsnap-str-pool)) - -(define (snapshot-roots w) - (%world-snap (car (world-scope w)) - (car (world-tags w)) - (world-str-pool w))) - -(define (promote-roots! w snap) - ;; Step 1: fix forward-completion ext on pre-existing main-heap ctypes. - ;; Their ext was set to a scratch-allocated (list tag #t fs) by - ;; complete-agg!; rewrite into main now. Scratch-resident ctypes in the - ;; pending list get promoted via the tag walker below — skip here. - (let pc ((xs %promote-pending-completions)) - (cond - ((null? xs) #t) - (else - (let ((c (car xs))) - (cond - ((heap-in-main? c) - (ctype-ext-set! c - (promote-ctype-ext (ctype-kind c) (ctype-ext c)))))) - (pc (cdr xs))))) - ;; Step 2: tags first (struct/union/enum identity anchors). The tags - ;; and scope worlds are stacks of frames; after parse-decl-or-fn only - ;; the file-scope top frame is live (nested frames popped via - ;; scope-leave!). Rebuild that frame; the rest (always '()) is - ;; preserved. - (let* ((tn (world-tags w)) - (nf (%promote-alist-frame (car tn) (wsnap-tags-top snap) - promote-bv promote-ctype))) - (world-tags-set! w (cons nf (cdr tn)))) - ;; Step 3: scope (var/typedef bindings). - (let* ((sn (world-scope w)) - (nf (%promote-alist-frame (car sn) (wsnap-scope-top snap) - promote-bv promote-sym))) - (world-scope-set! w (cons nf (cdr sn)))) - ;; Step 4: string-pool (bv-content . label-bv). Flat alist, no frames. - (world-str-pool-set! w - (%promote-alist-frame (world-str-pool w) (wsnap-str-pool snap) - promote-bv promote-bv))) - -;; Iter-buffer carryover: any tok already pulled from upstream and held -;; in tok-iter-buf / pps-up-pending / pps-out-buf is scratch-allocated by -;; lex; promote each. pps-cur-file is a bv installed by `#line "file"`. -;; pps-macros entries new this decl get promoted; the snapshot tail -;; carries the prior-decl macros (already in main). - -(define-record-type iter-snap - (%iter-snap macros) - iter-snap? - (macros isnap-macros)) - -(define (snapshot-iters ps-iter) - ;; ps-iter is the pp-iter; its state is a pp-state. - (let ((st (tok-iter-state ps-iter))) - (%iter-snap (pps-macros st)))) - -(define (%promote-tok-buf it) - (tok-iter-buf-set! it (promote-tok-list (tok-iter-buf it)))) - -(define (promote-macro m) - (cond - ((not m) m) - ((heap-in-main? m) m) - (else - (let ((cached (%promote-map-ref m))) - (cond - (cached cached) - (else - (%promote-map-add! m - (%macro (macro-kind m) - (promote-bv-list (macro-params m)) - (promote-tok-list (macro-body m)))))))))) - -(define (promote-iter-buffers! pp-it isnap) +;; Deep-copy each top-level world alist into main. The world-tags / +;; world-scope stacks contain only the file-scope frame at decl +;; boundaries (nested frames popped by scope-leave!); rebuild that head +;; frame and preserve the (always-empty) tail. world-str-pool is a flat +;; alist. deep-copy short-circuits prior-decl entries via heap-in-current?, +;; so this is linear in the new entries only. +(define (promote-roots! w ctx) + (rewrite-pending-completions! ctx) + (let ((tn (world-tags w))) + (world-tags-set! w (cons (deep-copy ctx (car tn)) (cdr tn)))) + (let ((sn (world-scope w))) + (world-scope-set! w (cons (deep-copy ctx (car sn)) (cdr sn)))) + (world-str-pool-set! w (deep-copy ctx (world-str-pool w)))) + +;; Iter-buffer carryover. The pp-iter / lex-iter records themselves +;; live in main from cc-init; only their tok-iter-buf slots and the +;; pp-state's pending / out / cur-file / macros slots can hold +;; scratch-allocated content. Rewrite each in place via deep-copy. +(define (promote-iter-buffers! pp-it ctx) (let* ((st (tok-iter-state pp-it)) (lex-it (pps-lex-iter st))) - (%promote-tok-buf pp-it) - (cond (lex-it (%promote-tok-buf lex-it))) - (pps-up-pending-set! st (promote-tok-list (pps-up-pending st))) - (pps-out-buf-set! st (promote-tok-list (pps-out-buf st))) - (let ((cf (pps-cur-file st))) - (cond (cf (pps-cur-file-set! st (promote-bv cf))))) - (pps-macros-set! st - (%promote-alist-frame (pps-macros st) (isnap-macros isnap) - promote-bv promote-macro)))) + (tok-iter-buf-set! pp-it (deep-copy ctx (tok-iter-buf pp-it))) + (cond (lex-it + (tok-iter-buf-set! lex-it + (deep-copy ctx (tok-iter-buf lex-it))))) + (pps-up-pending-set! st (deep-copy ctx (pps-up-pending st))) + (pps-out-buf-set! st (deep-copy ctx (pps-out-buf st))) + (pps-cur-file-set! st (deep-copy ctx (pps-cur-file st))) + (pps-macros-set! st (deep-copy ctx (pps-macros st))))) (define (parse-translation-unit ps) (let loop () @@ -5002,18 +4768,16 @@ (cond ((eq? (tok-kind (peek ps)) 'EOF) #t) (else - (let ((wsnap (snapshot-roots (ps-world ps))) - (isnap (snapshot-iters (ps-iter ps)))) - (parse-decl-or-fn ps) - (use-main-heap!) - (promote-roots! (ps-world ps) wsnap) - (promote-iter-buffers! (ps-iter ps) isnap) - ;; cg-fn-meta may hold scratch alist conses left over from the - ;; just-finished function; cg-fn-begin would reset it, but a - ;; trailing fn means it'd dangle past reset-scratch-heap!. - (cg-fn-meta-set! (ps-cg ps) '()) - (%promote-clear!) - (reset-scratch-heap!)) + (parse-decl-or-fn ps) + (use-main-heap!) + (let ((ctx (make-deep-copy-context))) + (promote-roots! (ps-world ps) ctx) + (promote-iter-buffers! (ps-iter ps) ctx)) + ;; cg-fn-meta may hold scratch alist conses left over from the + ;; just-finished function; cg-fn-begin would reset it, but a + ;; trailing fn means it'd dangle past reset-scratch-heap!. + (cg-fn-meta-set! (ps-cg ps) '()) + (reset-scratch-heap!) (loop)))) (use-main-heap!) #t) diff --git a/docs/DEEP-COPY.md b/docs/DEEP-COPY.md @@ -1,120 +0,0 @@ -# Generic two-heap promotion - -`deep-copy` is the generic Scheme-level copier used to promote object -graphs from the scratch heap into the currently selected heap, normally -main. It was added to avoid writing one promotion walker per cc.scm -record type. - -## Status - -Done: - -- scheme1 record-introspection primitives: - `record?`, `record-td`, `record-ref`, `record-set!`, - `make-record/td`, `td-nfields`, `td-name`, and `heap-in-current?`. -- prelude `deep-copy` and `make-deep-copy-context`. -- Tests: - `tests/scheme1/121-record-introspection.scm`, - `tests/scheme1/122-deep-copy.scm`, - `tests/scheme1/123-scratch-reset-intern.scm`, and - `tests/scheme1/124-scratch-record-type.scm`. -- Main-only allocation for interpreter-global metadata that must - survive scratch resets: - symtab name bytes, record type descriptors, generated record - primitives, and TD field-name lists. - -Still open: - -- cc.scm still uses its hand-written `promote-*` walkers at the - parse-decl-or-fn boundary. The remaining migration is to replace - those walkers with `deep-copy`. - -## Current API - -```scheme -(define ctx (make-deep-copy-context)) -(define promoted (deep-copy ctx scratch-root)) -``` - -The destination is whichever heap is current when `deep-copy` allocates. -For scratch-to-main promotion, switch to main before calling it. - -The context is a boxed alist mapping original object identity to copied -object identity. It preserves shared substructure and breaks cycles by -registering an eager stand-in before recursively filling fields. - -## Copy Semantics - -`deep-copy` handles: - -- symbols, fixnums, immediates: returned as-is -- objects already in the current heap: returned as-is -- pairs: recursively copies car and cdr -- bytevectors: copies bytes with `bytevector-copy` -- records: allocates `make-record/td`, registers the stand-in, then - recursively copies each record slot -- procedures: rejected with an error - -This is deliberately suited to cc.scm parse graphs, which are made of -pairs, bytevectors, records, symbols, fixnums, booleans, and `#f`. - -## Record Introspection Primitives - -| primitive | semantics | -|---|---| -| `(record? obj)` | true iff `obj` is HEAP-tagged with `HDR.REC` | -| `(record-td obj)` | returns the TD pointer stored in the record | -| `(record-ref obj i)` | reads field `i`; no bounds check | -| `(record-set! obj i v)` | writes field `i`; no bounds check | -| `(make-record/td td)` | allocates a record in the current heap and fills slots with `UNSPEC` | -| `(td-nfields td)` | returns the TD field count | -| `(td-name td)` | returns the record type name symbol | -| `(heap-in-current? obj)` | true iff a pointer object is inside the selected heap arena | - -These primitives are unsafe by convention, like `heap-rewind!` and -`reset-scratch-heap!`: callers must provide valid record values, TDs, -and field indices. - -## Remaining cc.scm Rewrite - -The current cc.scm promotion layer has a per-type map and walkers: -`promote-bv`, `promote-loc`, `promote-tok`, `promote-ctype`, -`promote-sym`, `promote-macro`, and helper list walkers. These can -collapse to a single `deep-copy` context per parse boundary. - -The pending-completions step remains distinct: it mutates pre-existing -main-heap ctypes whose `ext` field was temporarily pointed at scratch. -That slot should be rewritten with `deep-copy` before the scratch reset. - -Sketch: - -```scheme -(define (rewrite-pending-completions! ctx) - (for-each - (lambda (c) - (cond ((heap-in-main? c) - (ctype-ext-set! c (deep-copy ctx (ctype-ext c)))))) - %promote-pending-completions)) - -(define (promote-roots! w ctx) - (rewrite-pending-completions! ctx) - (let ((tn (world-tags w))) - (world-tags-set! w (cons (deep-copy ctx (car tn)) (cdr tn)))) - (let ((sn (world-scope w))) - (world-scope-set! w (cons (deep-copy ctx (car sn)) (cdr sn)))) - (world-str-pool-set! w (deep-copy ctx (world-str-pool w)))) -``` - -Iterator buffers and macro tables should use the same context so shared -objects stay shared across all promoted roots in one boundary. - -## Notes - -- The context alist currently lives wherever it is created. For cc.scm - promotion this should be main, matching the existing `%promote-map` - behavior. -- `heap-in-current?` short-circuits before the context lookup, so - re-copying an already promoted object returns immediately. -- `make-record/td` zero-fills fields with `UNSPEC`. This is important - for cyclic records: a back-edge can see the stand-in before all fields - are filled, but the object is always well-formed.