boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit b25bf2dc25aeea993c644a0ec47dad2e7887a58d
parent e211db47f0bb08f759b81b96a7cdf3da315eff4f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 28 Apr 2026 12:27:53 -0700

scheme1: record introspection, deep-copy

Diffstat:
Mscheme1/prelude.scm | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscheme1/scheme1.P1pp | 201+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/scheme1/121-record-introspection.expected-exit | 1+
Atests/scheme1/121-record-introspection.scm | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/scheme1/122-deep-copy.expected-exit | 1+
Atests/scheme1/122-deep-copy.scm | 125+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 479 insertions(+), 0 deletions(-)

diff --git a/scheme1/prelude.scm b/scheme1/prelude.scm @@ -361,6 +361,70 @@ (bytevector-u8-set! bv i (car xs)) (loop (cdr xs) (+ i 1))))))) +;; --- Generic deep-copy --------------------------------------------- +;; Structural clone of pair / bytevector / record graphs in the +;; currently-selected heap. Preserves eq? identity across shared +;; substructure and tolerates cycles via an eager stand-in registered +;; before recursion. +;; +;; The ctx is a one-cell box around an (orig . copy) alist; lookups +;; key off pointer identity (assq) so two structurally-equal but +;; physically-distinct objects are treated separately. Cells leak into +;; whichever heap is current when ctx is created — typically main +;; during cc.scm's parse-decl-or-fn promotion. +;; +;; Strict positive-list dispatch: pair / bytevector / record. Anything +;; else that masquerades as heap-allocated (closures, prims, MV-packs) +;; surfaces as an error rather than silently dangling. +(define (make-deep-copy-context) (cons '() #f)) + +(define (%dcc-lookup ctx obj) + (let ((p (assq obj (car ctx)))) + (if p (cdr p) #f))) + +(define (%dcc-register! ctx obj copy) + (set-car! ctx (cons (cons obj copy) (car ctx))) + copy) + +(define (deep-copy ctx obj) + (cond + ((symbol? obj) obj) + ((heap-in-current? obj) obj) + ((pair? obj) + (let ((c (%dcc-lookup ctx obj))) + (cond + (c c) + (else + (let ((p (cons #f #f))) + (%dcc-register! ctx obj p) + (set-car! p (deep-copy ctx (car obj))) + (set-cdr! p (deep-copy ctx (cdr obj))) + p))))) + ((bytevector? obj) + (let ((c (%dcc-lookup ctx obj))) + (cond + (c c) + (else + (%dcc-register! ctx obj + (bytevector-copy obj 0 (bytevector-length obj))))))) + ((record? obj) + (let ((c (%dcc-lookup ctx obj))) + (cond + (c c) + (else + (let* ((td (record-td obj)) + (n (td-nfields td)) + (s (make-record/td td))) + (%dcc-register! ctx obj s) + (let fill ((i 0)) + (cond ((= i n) s) + (else + (record-set! s i (deep-copy ctx (record-ref obj i))) + (fill (+ i 1)))))))))) + ((procedure? obj) + (error "deep-copy: cannot copy procedure" obj)) + (else obj))) + ;; --- Vector <-> list -- need make-vector / vector-ref / vector-set! / ;; vector-length, none of which are yet primitives. ------------------ ; (define (vector->list-helper v i acc) diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -5973,6 +5973,185 @@ %ret %endscope +# (heap-in-current? obj) -> bool. True iff obj's masked pointer falls +# inside whichever heap is currently selected (main or scratch). +# Generalizes heap-in-main? -- the two agree when main is current, and +# heap-in-current? returns #t for scratch-resident objects iff scratch +# is current. Tag bits are masked off; non-pointer values yield #f. +# Used by deep-copy as the "already in target arena" short-circuit. +:prim_heap_in_current_q_entry +%scope prim_heap_in_current_q + %car(t0, a0) + %li(t1, -8) + %and(t0, t0, t1) + %ld_global(t1, &current_heap_next_ptr) + %la(t2, &heap_next) + %bne(t1, t2, &::scratch) + %ld_global(t1, &heap_buf_ptr) + %bltu(t0, t1, &::false) + %li(t2, %HEAP_CAP_BYTES) + %add(t1, t1, t2) + %bltu(t0, t1, &::true) + %b(&::false) + ::scratch + %ld_global(t1, &scratch_buf_ptr) + %bltu(t0, t1, &::false) + %li(t2, %SCRATCH_CAP_BYTES) + %add(t1, t1, t2) + %bltu(t0, t1, &::true) + ::false + %li(a0, %imm_val(%IMM.FALSE)) + %ret + ::true + %li(a0, %imm_val(%IMM.TRUE)) + %ret +%endscope + +# Record introspection. Surfaces the unsafe %record-* helpers (heap +# layout: [HDR.REC][td][f0..fN-1], field i at tagged + 13 + 8*i; +# nfields lives at TD's offset 13 raw). All primitives below trust +# their inputs -- no bounds check, no kind check on record-ref / +# record-set! / record-td. Same unsafe-by-convention status as +# heap-rewind! / reset-scratch-heap!. See docs/DEEP-COPY.md. + +# (record? obj) -> bool. True iff obj is HEAP-tagged with HDR.REC. +:prim_recordq_entry +%scope prim_recordq + %car(t0, a0) + %li(a0, %imm_val(%IMM.FALSE)) + %tagof(t1, t0) + %li(t2, %TAG.HEAP) + %bne(t1, t2, &::end) + %hdr_type(t1, t0) + %li(t2, %HDR.REC) + %bne(t1, t2, &::end) + %li(a0, %imm_val(%IMM.TRUE)) + ::end + %ret +%endscope + +# (record-td rec) -> td. Reads the TD slot from the record header. No +# kind check; caller is expected to gate with record? if needed. +:prim_record_td_entry + %car(t0, a0) + %heap_ld(a0, t0, %REC.td) + %ret + +# (record-ref rec idx) -> field value. idx is a tagged fixnum; since +# tagged_fixnum = raw_idx * 8 (fixnum tag bits are 0), the byte offset +# is exactly idx + 13 from the tagged record pointer. No bounds check. +:prim_record_ref_entry + %args2(t0, t1, a0) ; t0=rec, t1=idx (tagged fixnum = raw*8) + %addi(t0, t0, 13) + %add(t0, t0, t1) + %ld(a0, t0, 0) + %ret + +# (record-set! rec idx val) -> unspec. In-place store at slot idx. +:prim_record_set_bang_entry +%scope prim_record_set_bang + %car(t0, a0) ; rec + %cdr(a0, a0) + %car(t1, a0) ; idx (tagged fixnum) + %cdr(a0, a0) + %car(t2, a0) ; val + %addi(t0, t0, 13) + %add(t0, t0, t1) + %st(t2, t0, 0) + %li(a0, %imm_val(%IMM.UNSPEC)) + %ret +%endscope + +# (make-record/td td) -> fresh record allocated in the current heap. +# Reads td.nfields, allocates 16 + nfields*8 bytes with HDR.REC, sets +# the td slot, and zero-fills field slots to IMM.UNSPEC. Mirrors +# eval_define_record_type's ctor allocation but driven by the TD's +# nfields rather than a runtime args list. Used by deep-copy as a +# pre-fill stand-in before recursive slot promotion. +# +# Locals: +# td (the TD pointer; saved across alloc_hdr) +# record (the new record pointer) +%fn2(prim_make_record_td_entry, {td record}, { + %car(t0, a0) ; td + %stl(t0, td) + + %heap_ld(a0, t0, %TD.nfields) ; raw nfields + %shli(a0, a0, 3) ; nfields * 8 + %addi(a0, a0, 16) ; + REC header (hdr + td slot) + %li(a1, %HDR.REC) + %call(&alloc_hdr) + %stl(a0, record) + + %ldl(t0, td) + %heap_st(t0, a0, %REC.td) + + # Zero-fill field slots to IMM.UNSPEC. Cursor starts at first slot + # (tagged + 13); count = nfields read again from the TD. + %heap_ld(t0, t0, %TD.nfields) + %addi(t1, a0, 13) + %li(t2, %imm_val(%IMM.UNSPEC)) + + ::fill_loop + %beqz(t0, &::fill_done) + %st(t2, t1, 0) + %addi(t1, t1, 8) + %addi(t0, t0, -1) + %b(&::fill_loop) + + ::fill_done + %ldl(a0, record) +}) + +# (td-nfields td) -> tagged fixnum count of fields. +:prim_td_nfields_entry + %car(t0, a0) + %heap_ld(a0, t0, %TD.nfields) ; raw count + %mkfix(a0, a0) + %ret + +# (td-name td) -> symbol bound at define-record-type time. +:prim_td_name_entry + %car(t0, a0) + %heap_ld(a0, t0, %TD.name) + %ret + +# Debug primitives. UNSAFE: peek-u8 dereferences arbitrary addresses. +# Intended for diagnosing heap-layout bugs from scheme1 user code; not +# part of the surface contract. + +# (tagged-value obj) -> fixnum. Returns the raw byte address of obj +# with tag bits masked off, encoded as a tagged fixnum so format / +# display can print it. Pass the result back into peek-u8 to read raw +# bytes. For non-pointer values (fixnums, immediates, syms) the masked +# value is small but still encodable; the result is meaningful only for +# heap-tagged inputs. +:prim_tagged_value_entry + %car(t0, a0) + %li(t1, -8) + %and(t0, t0, t1) + %mkfix(a0, t0) + %ret + +# (peek-u8 addr) -> fixnum. Reads one byte at the given raw byte +# address (tagged fixnum input, untagged inside). UNSAFE: no bounds +# check; a wild address segfaults the process. +:prim_peek_u8_entry + %car(t0, a0) + %sari(t0, t0, 3) + %lb(a0, t0, 0) + %mkfix(a0, a0) + %ret + +# (current-heap-next) -> fixnum. Returns the current heap's bump +# pointer (raw byte address) as a tagged fixnum. Used to inspect where +# the next allocation will land. +:prim_current_heap_next_entry + %ld_global(t0, &current_heap_next_ptr) + %ld(t0, t0, 0) + %mkfix(a0, t0) + %ret + # heap_oom_die() -> never returns. Reached from cons / alloc_hdr / # alloc_bytes when current_*_next would pass current_*_end. Selects # msg_heap_full vs msg_scratch_full by comparing current_heap_next_ptr @@ -6126,6 +6305,17 @@ :name_use_main_heap_bang "use-main-heap!" '00' :name_reset_scratch_heap_bang "reset-scratch-heap!" '00000000' :name_heap_in_main_q "heap-in-main?" '0000' +:name_heap_in_current_q "heap-in-current?" '00000000000000' +:name_recordq "record?" +:name_record_td "record-td" '000000000000' +:name_record_ref "record-ref" '0000000000' +:name_record_set_bang "record-set!" '00000000' +:name_make_record_td "make-record/td" '00' +:name_td_nfields "td-nfields" '0000000000' +:name_td_name "td-name" +:name_tagged_value "tagged-value" '000000' +:name_peek_u8 "peek-u8" +:name_current_heap_next "current-heap-next" '000000000000' # Writer string constants. Lengths are hard-coded at the str_putn call # sites (write_to_bv branches). No NUL needed in the source bytes -- @@ -6215,6 +6405,17 @@ &name_use_main_heap_bang %(0) $(14) &prim_use_main_heap_bang_entry %(0) &name_reset_scratch_heap_bang %(0) $(19) &prim_reset_scratch_heap_bang_entry %(0) &name_heap_in_main_q %(0) $(13) &prim_heap_in_main_q_entry %(0) +&name_heap_in_current_q %(0) $(16) &prim_heap_in_current_q_entry %(0) +&name_recordq %(0) $(7) &prim_recordq_entry %(0) +&name_record_td %(0) $(9) &prim_record_td_entry %(0) +&name_record_ref %(0) $(10) &prim_record_ref_entry %(0) +&name_record_set_bang %(0) $(11) &prim_record_set_bang_entry %(0) +&name_make_record_td %(0) $(14) &prim_make_record_td_entry %(0) +&name_td_nfields %(0) $(10) &prim_td_nfields_entry %(0) +&name_td_name %(0) $(7) &prim_td_name_entry %(0) +&name_tagged_value %(0) $(12) &prim_tagged_value_entry %(0) +&name_peek_u8 %(0) $(7) &prim_peek_u8_entry %(0) +&name_current_heap_next %(0) $(17) &prim_current_heap_next_entry %(0) &name_values %(0) $(6) &prim_values_entry %(0) &name_call_with_values %(0) $(16) &prim_call_with_values_entry %(0) :prim_table_end diff --git a/tests/scheme1/121-record-introspection.expected-exit b/tests/scheme1/121-record-introspection.expected-exit @@ -0,0 +1 @@ +42 diff --git a/tests/scheme1/121-record-introspection.scm b/tests/scheme1/121-record-introspection.scm @@ -0,0 +1,87 @@ +; Record introspection primitives: record?, record-td, record-ref, +; record-set!, make-record/td, td-nfields, td-name, heap-in-current?. +; Backs the generic deep-copy walker (see docs/DEEP-COPY.md). + +(define-record-type point + (mk-point x y) + point? + (x point-x) + (y point-y point-y-set!)) + +(define-record-type box + (mk-box val) + box? + (val box-val box-val-set!)) + +(define p (mk-point 3 4)) +(define b (mk-box 7)) + +;; ---- record? ---- +(if (record? p) 0 (sys-exit 1)) +(if (record? b) 0 (sys-exit 2)) +(if (not (record? 5)) 0 (sys-exit 3)) +(if (not (record? "bv")) 0 (sys-exit 4)) +(if (not (record? '())) 0 (sys-exit 5)) +(if (not (record? '(a))) 0 (sys-exit 6)) +(if (not (record? 'sym)) 0 (sys-exit 7)) +(if (not (record? #f)) 0 (sys-exit 8)) + +;; ---- record-td: identity for instances of the same type ---- +(define td-p (record-td p)) +(define p2 (mk-point 9 10)) +(if (eq? td-p (record-td p2)) 0 (sys-exit 10)) +;; Different types yield different TDs +(if (not (eq? td-p (record-td b))) 0 (sys-exit 11)) + +;; ---- td-nfields ---- +(if (= 2 (td-nfields td-p)) 0 (sys-exit 20)) +(if (= 1 (td-nfields (record-td b))) 0 (sys-exit 21)) + +;; ---- td-name ---- +(if (eq? 'point (td-name td-p)) 0 (sys-exit 30)) +(if (eq? 'box (td-name (record-td b))) 0 (sys-exit 31)) + +;; ---- record-ref reads slots by index ---- +(if (= 3 (record-ref p 0)) 0 (sys-exit 40)) +(if (= 4 (record-ref p 1)) 0 (sys-exit 41)) +(if (= 7 (record-ref b 0)) 0 (sys-exit 42)) + +;; ---- record-set! writes slots by index ---- +(record-set! p 1 42) +(if (= 42 (point-y p)) 0 (sys-exit 50)) +(if (= 42 (record-ref p 1)) 0 (sys-exit 51)) +(record-set! b 0 'tagged) +(if (eq? 'tagged (box-val b)) 0 (sys-exit 52)) + +;; ---- make-record/td: zero-filled to UNSPEC, then fillable ---- +(define p3 (make-record/td td-p)) +(if (record? p3) 0 (sys-exit 60)) +(if (eq? td-p (record-td p3)) 0 (sys-exit 61)) +(if (point? p3) 0 (sys-exit 62)) +(record-set! p3 0 100) +(record-set! p3 1 200) +(if (= 100 (point-x p3)) 0 (sys-exit 63)) +(if (= 200 (point-y p3)) 0 (sys-exit 64)) + +;; ---- heap-in-current? generalizes heap-in-main? ---- +;; Default heap is main. +(if (heap-in-current? p) 0 (sys-exit 70)) +(if (heap-in-current? "bv-in-main") 0 (sys-exit 71)) +;; Tagged non-pointers are #f. +(if (not (heap-in-current? 5)) 0 (sys-exit 72)) +(if (not (heap-in-current? 'sym)) 0 (sys-exit 73)) +(if (not (heap-in-current? #f)) 0 (sys-exit 74)) +(if (not (heap-in-current? '())) 0 (sys-exit 75)) + +;; Allocate a record in scratch, then in main; each is in-current within +;; its arena and not in-current after switching. +(use-scratch-heap!) +(define s-rec (mk-point 1 2)) +(if (heap-in-current? s-rec) 0 (sys-exit 80)) +(if (not (heap-in-main? s-rec)) 0 (sys-exit 81)) +(use-main-heap!) +(if (not (heap-in-current? s-rec)) 0 (sys-exit 82)) +(if (heap-in-current? p) 0 (sys-exit 83)) +(reset-scratch-heap!) + +(sys-exit 42) diff --git a/tests/scheme1/122-deep-copy.expected-exit b/tests/scheme1/122-deep-copy.expected-exit @@ -0,0 +1 @@ +42 diff --git a/tests/scheme1/122-deep-copy.scm b/tests/scheme1/122-deep-copy.scm @@ -0,0 +1,125 @@ +; Generic deep-copy: structural clone of pairs / bytevectors / records, +; with identity preservation across shared substructure and cycle +; tolerance via an eager stand-in registered before slot fill. Used by +; cc.scm to promote scratch-allocated parse output into main; mirror +; here in scheme1 itself to lock down the contract. See +; docs/DEEP-COPY.md. +; +; By design, deep-copy short-circuits when an object is already in the +; current heap (so re-promotion is O(1)). The structural tests below +; therefore allocate sources in scratch and copy with main current. + +(define-record-type cell + (mk-cell head tail) + cell? + (head cell-head cell-head-set!) + (tail cell-tail cell-tail-set!)) + +;; ---- Already-in-target short-circuit returns the same object ---- +(define m-pair (cons 1 2)) +(define ctx0 (make-deep-copy-context)) +(if (eq? m-pair (deep-copy ctx0 m-pair)) 0 (sys-exit 1)) + +;; ---- Symbols pass through untouched (interned) ---- +(if (eq? 'foo (deep-copy ctx0 'foo)) 0 (sys-exit 2)) + +;; ---- Fixnums / immediates pass through ---- +(if (= 42 (deep-copy ctx0 42)) 0 (sys-exit 3)) +(if (eq? #t (deep-copy ctx0 #t)) 0 (sys-exit 4)) +(if (eq? '() (deep-copy ctx0 '())) 0 (sys-exit 5)) + +;; ---- Pair deep-copy from scratch -> main ---- +(use-scratch-heap!) +(define s-list (cons 1 (cons 2 (cons 3 '())))) +(use-main-heap!) +(define ctx1 (make-deep-copy-context)) +(define m-list (deep-copy ctx1 s-list)) +(if (equal? s-list m-list) 0 (sys-exit 10)) +(if (not (eq? s-list m-list)) 0 (sys-exit 11)) +(if (heap-in-main? m-list) 0 (sys-exit 12)) +(if (heap-in-main? (cdr m-list)) 0 (sys-exit 13)) +(reset-scratch-heap!) +;; m-list survives scratch reset +(if (equal? m-list (cons 1 (cons 2 (cons 3 '())))) 0 (sys-exit 14)) + +;; ---- Bytevector deep-copy from scratch -> main ---- +(use-scratch-heap!) +(define s-bv (bytevector 1 2 3 4 5)) +(use-main-heap!) +(define ctx2 (make-deep-copy-context)) +(define m-bv (deep-copy ctx2 s-bv)) +(if (bytevector=? s-bv m-bv) 0 (sys-exit 20)) +(if (not (eq? s-bv m-bv)) 0 (sys-exit 21)) +(if (heap-in-main? m-bv) 0 (sys-exit 22)) +(reset-scratch-heap!) +;; main-heap copy survives scratch reset +(if (= 5 (bytevector-length m-bv)) 0 (sys-exit 23)) +(if (= 1 (bytevector-u8-ref m-bv 0)) 0 (sys-exit 24)) +(if (= 5 (bytevector-u8-ref m-bv 4)) 0 (sys-exit 25)) + +;; ---- Record deep-copy from scratch -> main ---- +(use-scratch-heap!) +(define s-cell (mk-cell 10 20)) +(use-main-heap!) +(define ctx3 (make-deep-copy-context)) +(define m-cell (deep-copy ctx3 s-cell)) +(if (cell? m-cell) 0 (sys-exit 30)) +(if (not (eq? s-cell m-cell)) 0 (sys-exit 31)) +(if (= 10 (cell-head m-cell)) 0 (sys-exit 32)) +(if (= 20 (cell-tail m-cell)) 0 (sys-exit 33)) +;; Same TD: TDs are persistent and not copied +(if (eq? (record-td s-cell) (record-td m-cell)) 0 (sys-exit 34)) +(if (heap-in-main? m-cell) 0 (sys-exit 35)) +(reset-scratch-heap!) +(if (= 10 (cell-head m-cell)) 0 (sys-exit 36)) + +;; ---- Identity preservation across shared subobjects ---- +(use-scratch-heap!) +(define s-shared (cons 'a 'b)) +(define s-x (cons s-shared s-shared)) +(use-main-heap!) +(define ctx4 (make-deep-copy-context)) +(define m-x (deep-copy ctx4 s-x)) +;; Both halves of the result reference one fresh shared cons in main. +(if (eq? (car m-x) (cdr m-x)) 0 (sys-exit 40)) +(if (not (eq? (car m-x) s-shared)) 0 (sys-exit 41)) +(if (eq? 'a (car (car m-x))) 0 (sys-exit 42)) +(if (eq? 'b (cdr (car m-x))) 0 (sys-exit 43)) +(reset-scratch-heap!) + +;; ---- Cycle handling (record points to itself) ---- +(use-scratch-heap!) +(define s-cyc (mk-cell 1 #f)) +(cell-tail-set! s-cyc s-cyc) +(use-main-heap!) +(define ctx5 (make-deep-copy-context)) +(define m-cyc (deep-copy ctx5 s-cyc)) +(if (not (eq? s-cyc m-cyc)) 0 (sys-exit 50)) +(if (= 1 (cell-head m-cyc)) 0 (sys-exit 51)) +(if (eq? m-cyc (cell-tail m-cyc)) 0 (sys-exit 52)) +(reset-scratch-heap!) +;; m-cyc still its own tail after scratch reset +(if (eq? m-cyc (cell-tail m-cyc)) 0 (sys-exit 53)) + +;; ---- Mixed pair-of-record graph ---- +(use-scratch-heap!) +(define s-rec (mk-cell 9 (cons 1 (cons 2 '())))) +(use-main-heap!) +(define ctx6 (make-deep-copy-context)) +(define m-rec (deep-copy ctx6 s-rec)) +(if (not (eq? s-rec m-rec)) 0 (sys-exit 60)) +(if (= 9 (cell-head m-rec)) 0 (sys-exit 61)) +(if (equal? (cell-tail m-rec) '(1 2)) 0 (sys-exit 62)) +(if (heap-in-main? m-rec) 0 (sys-exit 63)) +(if (heap-in-main? (cell-tail m-rec)) 0 (sys-exit 64)) +(reset-scratch-heap!) +(if (equal? (cell-tail m-rec) '(1 2)) 0 (sys-exit 65)) + +;; ---- ctx reuse: second pass over the same already-copied root is O(1) +;; via heap-in-current?, returning the main-heap object directly. +(define ctx7 (make-deep-copy-context)) +(define copy-once (deep-copy ctx7 (cons 'a (cons 'b '())))) +(define copy-again (deep-copy ctx7 copy-once)) +(if (eq? copy-once copy-again) 0 (sys-exit 70)) + +(sys-exit 42)