commit b25bf2dc25aeea993c644a0ec47dad2e7887a58d
parent e211db47f0bb08f759b81b96a7cdf3da315eff4f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 28 Apr 2026 12:27:53 -0700
scheme1: record introspection, deep-copy
Diffstat:
6 files changed, 479 insertions(+), 0 deletions(-)
diff --git a/scheme1/prelude.scm b/scheme1/prelude.scm
@@ -361,6 +361,70 @@
(bytevector-u8-set! bv i (car xs))
(loop (cdr xs) (+ i 1)))))))
+;; --- Generic deep-copy ---------------------------------------------
+;; Structural clone of pair / bytevector / record graphs in the
+;; currently-selected heap. Preserves eq? identity across shared
+;; substructure and tolerates cycles via an eager stand-in registered
+;; before recursion.
+;;
+;; The ctx is a one-cell box around an (orig . copy) alist; lookups
+;; key off pointer identity (assq) so two structurally-equal but
+;; physically-distinct objects are treated separately. Cells leak into
+;; whichever heap is current when ctx is created — typically main
+;; during cc.scm's parse-decl-or-fn promotion.
+;;
+;; Strict positive-list dispatch: pair / bytevector / record. Anything
+;; else that masquerades as heap-allocated (closures, prims, MV-packs)
+;; surfaces as an error rather than silently dangling.
+(define (make-deep-copy-context) (cons '() #f))
+
+(define (%dcc-lookup ctx obj)
+ (let ((p (assq obj (car ctx))))
+ (if p (cdr p) #f)))
+
+(define (%dcc-register! ctx obj copy)
+ (set-car! ctx (cons (cons obj copy) (car ctx)))
+ copy)
+
+(define (deep-copy ctx obj)
+ (cond
+ ((symbol? obj) obj)
+ ((heap-in-current? obj) obj)
+ ((pair? obj)
+ (let ((c (%dcc-lookup ctx obj)))
+ (cond
+ (c c)
+ (else
+ (let ((p (cons #f #f)))
+ (%dcc-register! ctx obj p)
+ (set-car! p (deep-copy ctx (car obj)))
+ (set-cdr! p (deep-copy ctx (cdr obj)))
+ p)))))
+ ((bytevector? obj)
+ (let ((c (%dcc-lookup ctx obj)))
+ (cond
+ (c c)
+ (else
+ (%dcc-register! ctx obj
+ (bytevector-copy obj 0 (bytevector-length obj)))))))
+ ((record? obj)
+ (let ((c (%dcc-lookup ctx obj)))
+ (cond
+ (c c)
+ (else
+ (let* ((td (record-td obj))
+ (n (td-nfields td))
+ (s (make-record/td td)))
+ (%dcc-register! ctx obj s)
+ (let fill ((i 0))
+ (cond ((= i n) s)
+ (else
+ (record-set! s i (deep-copy ctx (record-ref obj i)))
+ (fill (+ i 1))))))))))
+ ((procedure? obj)
+ (error "deep-copy: cannot copy procedure" obj))
+ (else obj)))
+
;; --- Vector <-> list -- need make-vector / vector-ref / vector-set! /
;; vector-length, none of which are yet primitives. ------------------
; (define (vector->list-helper v i acc)
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -5973,6 +5973,185 @@
%ret
%endscope
+# (heap-in-current? obj) -> bool. True iff obj's masked pointer falls
+# inside whichever heap is currently selected (main or scratch).
+# Generalizes heap-in-main? -- the two agree when main is current, and
+# heap-in-current? returns #t for scratch-resident objects iff scratch
+# is current. Tag bits are masked off; non-pointer values yield #f.
+# Used by deep-copy as the "already in target arena" short-circuit.
+:prim_heap_in_current_q_entry
+%scope prim_heap_in_current_q
+ %car(t0, a0)
+ %li(t1, -8)
+ %and(t0, t0, t1)
+ %ld_global(t1, ¤t_heap_next_ptr)
+ %la(t2, &heap_next)
+ %bne(t1, t2, &::scratch)
+ %ld_global(t1, &heap_buf_ptr)
+ %bltu(t0, t1, &::false)
+ %li(t2, %HEAP_CAP_BYTES)
+ %add(t1, t1, t2)
+ %bltu(t0, t1, &::true)
+ %b(&::false)
+ ::scratch
+ %ld_global(t1, &scratch_buf_ptr)
+ %bltu(t0, t1, &::false)
+ %li(t2, %SCRATCH_CAP_BYTES)
+ %add(t1, t1, t2)
+ %bltu(t0, t1, &::true)
+ ::false
+ %li(a0, %imm_val(%IMM.FALSE))
+ %ret
+ ::true
+ %li(a0, %imm_val(%IMM.TRUE))
+ %ret
+%endscope
+
+# Record introspection. Surfaces the unsafe %record-* helpers (heap
+# layout: [HDR.REC][td][f0..fN-1], field i at tagged + 13 + 8*i;
+# nfields lives at TD's offset 13 raw). All primitives below trust
+# their inputs -- no bounds check, no kind check on record-ref /
+# record-set! / record-td. Same unsafe-by-convention status as
+# heap-rewind! / reset-scratch-heap!. See docs/DEEP-COPY.md.
+
+# (record? obj) -> bool. True iff obj is HEAP-tagged with HDR.REC.
+:prim_recordq_entry
+%scope prim_recordq
+ %car(t0, a0)
+ %li(a0, %imm_val(%IMM.FALSE))
+ %tagof(t1, t0)
+ %li(t2, %TAG.HEAP)
+ %bne(t1, t2, &::end)
+ %hdr_type(t1, t0)
+ %li(t2, %HDR.REC)
+ %bne(t1, t2, &::end)
+ %li(a0, %imm_val(%IMM.TRUE))
+ ::end
+ %ret
+%endscope
+
+# (record-td rec) -> td. Reads the TD slot from the record header. No
+# kind check; caller is expected to gate with record? if needed.
+:prim_record_td_entry
+ %car(t0, a0)
+ %heap_ld(a0, t0, %REC.td)
+ %ret
+
+# (record-ref rec idx) -> field value. idx is a tagged fixnum; since
+# tagged_fixnum = raw_idx * 8 (fixnum tag bits are 0), the byte offset
+# is exactly idx + 13 from the tagged record pointer. No bounds check.
+:prim_record_ref_entry
+ %args2(t0, t1, a0) ; t0=rec, t1=idx (tagged fixnum = raw*8)
+ %addi(t0, t0, 13)
+ %add(t0, t0, t1)
+ %ld(a0, t0, 0)
+ %ret
+
+# (record-set! rec idx val) -> unspec. In-place store at slot idx.
+:prim_record_set_bang_entry
+%scope prim_record_set_bang
+ %car(t0, a0) ; rec
+ %cdr(a0, a0)
+ %car(t1, a0) ; idx (tagged fixnum)
+ %cdr(a0, a0)
+ %car(t2, a0) ; val
+ %addi(t0, t0, 13)
+ %add(t0, t0, t1)
+ %st(t2, t0, 0)
+ %li(a0, %imm_val(%IMM.UNSPEC))
+ %ret
+%endscope
+
+# (make-record/td td) -> fresh record allocated in the current heap.
+# Reads td.nfields, allocates 16 + nfields*8 bytes with HDR.REC, sets
+# the td slot, and zero-fills field slots to IMM.UNSPEC. Mirrors
+# eval_define_record_type's ctor allocation but driven by the TD's
+# nfields rather than a runtime args list. Used by deep-copy as a
+# pre-fill stand-in before recursive slot promotion.
+#
+# Locals:
+# td (the TD pointer; saved across alloc_hdr)
+# record (the new record pointer)
+%fn2(prim_make_record_td_entry, {td record}, {
+ %car(t0, a0) ; td
+ %stl(t0, td)
+
+ %heap_ld(a0, t0, %TD.nfields) ; raw nfields
+ %shli(a0, a0, 3) ; nfields * 8
+ %addi(a0, a0, 16) ; + REC header (hdr + td slot)
+ %li(a1, %HDR.REC)
+ %call(&alloc_hdr)
+ %stl(a0, record)
+
+ %ldl(t0, td)
+ %heap_st(t0, a0, %REC.td)
+
+ # Zero-fill field slots to IMM.UNSPEC. Cursor starts at first slot
+ # (tagged + 13); count = nfields read again from the TD.
+ %heap_ld(t0, t0, %TD.nfields)
+ %addi(t1, a0, 13)
+ %li(t2, %imm_val(%IMM.UNSPEC))
+
+ ::fill_loop
+ %beqz(t0, &::fill_done)
+ %st(t2, t1, 0)
+ %addi(t1, t1, 8)
+ %addi(t0, t0, -1)
+ %b(&::fill_loop)
+
+ ::fill_done
+ %ldl(a0, record)
+})
+
+# (td-nfields td) -> tagged fixnum count of fields.
+:prim_td_nfields_entry
+ %car(t0, a0)
+ %heap_ld(a0, t0, %TD.nfields) ; raw count
+ %mkfix(a0, a0)
+ %ret
+
+# (td-name td) -> symbol bound at define-record-type time.
+:prim_td_name_entry
+ %car(t0, a0)
+ %heap_ld(a0, t0, %TD.name)
+ %ret
+
+# Debug primitives. UNSAFE: peek-u8 dereferences arbitrary addresses.
+# Intended for diagnosing heap-layout bugs from scheme1 user code; not
+# part of the surface contract.
+
+# (tagged-value obj) -> fixnum. Returns the raw byte address of obj
+# with tag bits masked off, encoded as a tagged fixnum so format /
+# display can print it. Pass the result back into peek-u8 to read raw
+# bytes. For non-pointer values (fixnums, immediates, syms) the masked
+# value is small but still encodable; the result is meaningful only for
+# heap-tagged inputs.
+:prim_tagged_value_entry
+ %car(t0, a0)
+ %li(t1, -8)
+ %and(t0, t0, t1)
+ %mkfix(a0, t0)
+ %ret
+
+# (peek-u8 addr) -> fixnum. Reads one byte at the given raw byte
+# address (tagged fixnum input, untagged inside). UNSAFE: no bounds
+# check; a wild address segfaults the process.
+:prim_peek_u8_entry
+ %car(t0, a0)
+ %sari(t0, t0, 3)
+ %lb(a0, t0, 0)
+ %mkfix(a0, a0)
+ %ret
+
+# (current-heap-next) -> fixnum. Returns the current heap's bump
+# pointer (raw byte address) as a tagged fixnum. Used to inspect where
+# the next allocation will land.
+:prim_current_heap_next_entry
+ %ld_global(t0, ¤t_heap_next_ptr)
+ %ld(t0, t0, 0)
+ %mkfix(a0, t0)
+ %ret
+
# heap_oom_die() -> never returns. Reached from cons / alloc_hdr /
# alloc_bytes when current_*_next would pass current_*_end. Selects
# msg_heap_full vs msg_scratch_full by comparing current_heap_next_ptr
@@ -6126,6 +6305,17 @@
:name_use_main_heap_bang "use-main-heap!" '00'
:name_reset_scratch_heap_bang "reset-scratch-heap!" '00000000'
:name_heap_in_main_q "heap-in-main?" '0000'
+:name_heap_in_current_q "heap-in-current?" '00000000000000'
+:name_recordq "record?"
+:name_record_td "record-td" '000000000000'
+:name_record_ref "record-ref" '0000000000'
+:name_record_set_bang "record-set!" '00000000'
+:name_make_record_td "make-record/td" '00'
+:name_td_nfields "td-nfields" '0000000000'
+:name_td_name "td-name"
+:name_tagged_value "tagged-value" '000000'
+:name_peek_u8 "peek-u8"
+:name_current_heap_next "current-heap-next" '000000000000'
# Writer string constants. Lengths are hard-coded at the str_putn call
# sites (write_to_bv branches). No NUL needed in the source bytes --
@@ -6215,6 +6405,17 @@
&name_use_main_heap_bang %(0) $(14) &prim_use_main_heap_bang_entry %(0)
&name_reset_scratch_heap_bang %(0) $(19) &prim_reset_scratch_heap_bang_entry %(0)
&name_heap_in_main_q %(0) $(13) &prim_heap_in_main_q_entry %(0)
+&name_heap_in_current_q %(0) $(16) &prim_heap_in_current_q_entry %(0)
+&name_recordq %(0) $(7) &prim_recordq_entry %(0)
+&name_record_td %(0) $(9) &prim_record_td_entry %(0)
+&name_record_ref %(0) $(10) &prim_record_ref_entry %(0)
+&name_record_set_bang %(0) $(11) &prim_record_set_bang_entry %(0)
+&name_make_record_td %(0) $(14) &prim_make_record_td_entry %(0)
+&name_td_nfields %(0) $(10) &prim_td_nfields_entry %(0)
+&name_td_name %(0) $(7) &prim_td_name_entry %(0)
+&name_tagged_value %(0) $(12) &prim_tagged_value_entry %(0)
+&name_peek_u8 %(0) $(7) &prim_peek_u8_entry %(0)
+&name_current_heap_next %(0) $(17) &prim_current_heap_next_entry %(0)
&name_values %(0) $(6) &prim_values_entry %(0)
&name_call_with_values %(0) $(16) &prim_call_with_values_entry %(0)
:prim_table_end
diff --git a/tests/scheme1/121-record-introspection.expected-exit b/tests/scheme1/121-record-introspection.expected-exit
@@ -0,0 +1 @@
+42
diff --git a/tests/scheme1/121-record-introspection.scm b/tests/scheme1/121-record-introspection.scm
@@ -0,0 +1,87 @@
+; Record introspection primitives: record?, record-td, record-ref,
+; record-set!, make-record/td, td-nfields, td-name, heap-in-current?.
+; Backs the generic deep-copy walker (see docs/DEEP-COPY.md).
+
+(define-record-type point
+ (mk-point x y)
+ point?
+ (x point-x)
+ (y point-y point-y-set!))
+
+(define-record-type box
+ (mk-box val)
+ box?
+ (val box-val box-val-set!))
+
+(define p (mk-point 3 4))
+(define b (mk-box 7))
+
+;; ---- record? ----
+(if (record? p) 0 (sys-exit 1))
+(if (record? b) 0 (sys-exit 2))
+(if (not (record? 5)) 0 (sys-exit 3))
+(if (not (record? "bv")) 0 (sys-exit 4))
+(if (not (record? '())) 0 (sys-exit 5))
+(if (not (record? '(a))) 0 (sys-exit 6))
+(if (not (record? 'sym)) 0 (sys-exit 7))
+(if (not (record? #f)) 0 (sys-exit 8))
+
+;; ---- record-td: identity for instances of the same type ----
+(define td-p (record-td p))
+(define p2 (mk-point 9 10))
+(if (eq? td-p (record-td p2)) 0 (sys-exit 10))
+;; Different types yield different TDs
+(if (not (eq? td-p (record-td b))) 0 (sys-exit 11))
+
+;; ---- td-nfields ----
+(if (= 2 (td-nfields td-p)) 0 (sys-exit 20))
+(if (= 1 (td-nfields (record-td b))) 0 (sys-exit 21))
+
+;; ---- td-name ----
+(if (eq? 'point (td-name td-p)) 0 (sys-exit 30))
+(if (eq? 'box (td-name (record-td b))) 0 (sys-exit 31))
+
+;; ---- record-ref reads slots by index ----
+(if (= 3 (record-ref p 0)) 0 (sys-exit 40))
+(if (= 4 (record-ref p 1)) 0 (sys-exit 41))
+(if (= 7 (record-ref b 0)) 0 (sys-exit 42))
+
+;; ---- record-set! writes slots by index ----
+(record-set! p 1 42)
+(if (= 42 (point-y p)) 0 (sys-exit 50))
+(if (= 42 (record-ref p 1)) 0 (sys-exit 51))
+(record-set! b 0 'tagged)
+(if (eq? 'tagged (box-val b)) 0 (sys-exit 52))
+
+;; ---- make-record/td: zero-filled to UNSPEC, then fillable ----
+(define p3 (make-record/td td-p))
+(if (record? p3) 0 (sys-exit 60))
+(if (eq? td-p (record-td p3)) 0 (sys-exit 61))
+(if (point? p3) 0 (sys-exit 62))
+(record-set! p3 0 100)
+(record-set! p3 1 200)
+(if (= 100 (point-x p3)) 0 (sys-exit 63))
+(if (= 200 (point-y p3)) 0 (sys-exit 64))
+
+;; ---- heap-in-current? generalizes heap-in-main? ----
+;; Default heap is main.
+(if (heap-in-current? p) 0 (sys-exit 70))
+(if (heap-in-current? "bv-in-main") 0 (sys-exit 71))
+;; Tagged non-pointers are #f.
+(if (not (heap-in-current? 5)) 0 (sys-exit 72))
+(if (not (heap-in-current? 'sym)) 0 (sys-exit 73))
+(if (not (heap-in-current? #f)) 0 (sys-exit 74))
+(if (not (heap-in-current? '())) 0 (sys-exit 75))
+
+;; Allocate a record in scratch, then in main; each is in-current within
+;; its arena and not in-current after switching.
+(use-scratch-heap!)
+(define s-rec (mk-point 1 2))
+(if (heap-in-current? s-rec) 0 (sys-exit 80))
+(if (not (heap-in-main? s-rec)) 0 (sys-exit 81))
+(use-main-heap!)
+(if (not (heap-in-current? s-rec)) 0 (sys-exit 82))
+(if (heap-in-current? p) 0 (sys-exit 83))
+(reset-scratch-heap!)
+
+(sys-exit 42)
diff --git a/tests/scheme1/122-deep-copy.expected-exit b/tests/scheme1/122-deep-copy.expected-exit
@@ -0,0 +1 @@
+42
diff --git a/tests/scheme1/122-deep-copy.scm b/tests/scheme1/122-deep-copy.scm
@@ -0,0 +1,125 @@
+; Generic deep-copy: structural clone of pairs / bytevectors / records,
+; with identity preservation across shared substructure and cycle
+; tolerance via an eager stand-in registered before slot fill. Used by
+; cc.scm to promote scratch-allocated parse output into main; mirror
+; here in scheme1 itself to lock down the contract. See
+; docs/DEEP-COPY.md.
+;
+; By design, deep-copy short-circuits when an object is already in the
+; current heap (so re-promotion is O(1)). The structural tests below
+; therefore allocate sources in scratch and copy with main current.
+
+(define-record-type cell
+ (mk-cell head tail)
+ cell?
+ (head cell-head cell-head-set!)
+ (tail cell-tail cell-tail-set!))
+
+;; ---- Already-in-target short-circuit returns the same object ----
+(define m-pair (cons 1 2))
+(define ctx0 (make-deep-copy-context))
+(if (eq? m-pair (deep-copy ctx0 m-pair)) 0 (sys-exit 1))
+
+;; ---- Symbols pass through untouched (interned) ----
+(if (eq? 'foo (deep-copy ctx0 'foo)) 0 (sys-exit 2))
+
+;; ---- Fixnums / immediates pass through ----
+(if (= 42 (deep-copy ctx0 42)) 0 (sys-exit 3))
+(if (eq? #t (deep-copy ctx0 #t)) 0 (sys-exit 4))
+(if (eq? '() (deep-copy ctx0 '())) 0 (sys-exit 5))
+
+;; ---- Pair deep-copy from scratch -> main ----
+(use-scratch-heap!)
+(define s-list (cons 1 (cons 2 (cons 3 '()))))
+(use-main-heap!)
+(define ctx1 (make-deep-copy-context))
+(define m-list (deep-copy ctx1 s-list))
+(if (equal? s-list m-list) 0 (sys-exit 10))
+(if (not (eq? s-list m-list)) 0 (sys-exit 11))
+(if (heap-in-main? m-list) 0 (sys-exit 12))
+(if (heap-in-main? (cdr m-list)) 0 (sys-exit 13))
+(reset-scratch-heap!)
+;; m-list survives scratch reset
+(if (equal? m-list (cons 1 (cons 2 (cons 3 '())))) 0 (sys-exit 14))
+
+;; ---- Bytevector deep-copy from scratch -> main ----
+(use-scratch-heap!)
+(define s-bv (bytevector 1 2 3 4 5))
+(use-main-heap!)
+(define ctx2 (make-deep-copy-context))
+(define m-bv (deep-copy ctx2 s-bv))
+(if (bytevector=? s-bv m-bv) 0 (sys-exit 20))
+(if (not (eq? s-bv m-bv)) 0 (sys-exit 21))
+(if (heap-in-main? m-bv) 0 (sys-exit 22))
+(reset-scratch-heap!)
+;; main-heap copy survives scratch reset
+(if (= 5 (bytevector-length m-bv)) 0 (sys-exit 23))
+(if (= 1 (bytevector-u8-ref m-bv 0)) 0 (sys-exit 24))
+(if (= 5 (bytevector-u8-ref m-bv 4)) 0 (sys-exit 25))
+
+;; ---- Record deep-copy from scratch -> main ----
+(use-scratch-heap!)
+(define s-cell (mk-cell 10 20))
+(use-main-heap!)
+(define ctx3 (make-deep-copy-context))
+(define m-cell (deep-copy ctx3 s-cell))
+(if (cell? m-cell) 0 (sys-exit 30))
+(if (not (eq? s-cell m-cell)) 0 (sys-exit 31))
+(if (= 10 (cell-head m-cell)) 0 (sys-exit 32))
+(if (= 20 (cell-tail m-cell)) 0 (sys-exit 33))
+;; Same TD: TDs are persistent and not copied
+(if (eq? (record-td s-cell) (record-td m-cell)) 0 (sys-exit 34))
+(if (heap-in-main? m-cell) 0 (sys-exit 35))
+(reset-scratch-heap!)
+(if (= 10 (cell-head m-cell)) 0 (sys-exit 36))
+
+;; ---- Identity preservation across shared subobjects ----
+(use-scratch-heap!)
+(define s-shared (cons 'a 'b))
+(define s-x (cons s-shared s-shared))
+(use-main-heap!)
+(define ctx4 (make-deep-copy-context))
+(define m-x (deep-copy ctx4 s-x))
+;; Both halves of the result reference one fresh shared cons in main.
+(if (eq? (car m-x) (cdr m-x)) 0 (sys-exit 40))
+(if (not (eq? (car m-x) s-shared)) 0 (sys-exit 41))
+(if (eq? 'a (car (car m-x))) 0 (sys-exit 42))
+(if (eq? 'b (cdr (car m-x))) 0 (sys-exit 43))
+(reset-scratch-heap!)
+
+;; ---- Cycle handling (record points to itself) ----
+(use-scratch-heap!)
+(define s-cyc (mk-cell 1 #f))
+(cell-tail-set! s-cyc s-cyc)
+(use-main-heap!)
+(define ctx5 (make-deep-copy-context))
+(define m-cyc (deep-copy ctx5 s-cyc))
+(if (not (eq? s-cyc m-cyc)) 0 (sys-exit 50))
+(if (= 1 (cell-head m-cyc)) 0 (sys-exit 51))
+(if (eq? m-cyc (cell-tail m-cyc)) 0 (sys-exit 52))
+(reset-scratch-heap!)
+;; m-cyc still its own tail after scratch reset
+(if (eq? m-cyc (cell-tail m-cyc)) 0 (sys-exit 53))
+
+;; ---- Mixed pair-of-record graph ----
+(use-scratch-heap!)
+(define s-rec (mk-cell 9 (cons 1 (cons 2 '()))))
+(use-main-heap!)
+(define ctx6 (make-deep-copy-context))
+(define m-rec (deep-copy ctx6 s-rec))
+(if (not (eq? s-rec m-rec)) 0 (sys-exit 60))
+(if (= 9 (cell-head m-rec)) 0 (sys-exit 61))
+(if (equal? (cell-tail m-rec) '(1 2)) 0 (sys-exit 62))
+(if (heap-in-main? m-rec) 0 (sys-exit 63))
+(if (heap-in-main? (cell-tail m-rec)) 0 (sys-exit 64))
+(reset-scratch-heap!)
+(if (equal? (cell-tail m-rec) '(1 2)) 0 (sys-exit 65))
+
+;; ---- ctx reuse: second pass over the same already-copied root is O(1)
+;; via heap-in-current?, returning the main-heap object directly.
+(define ctx7 (make-deep-copy-context))
+(define copy-once (deep-copy ctx7 (cons 'a (cons 'b '()))))
+(define copy-again (deep-copy ctx7 copy-once))
+(if (eq? copy-once copy-again) 0 (sys-exit 70))
+
+(sys-exit 42)