commit 25cd91ac28bfcead03871cd6a55f14c78ba3ff58
parent e43969a17afb5202cc1ec0f5752c34350a6f4475
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 28 Apr 2026 09:25:20 -0700
scheme1: CC-SCRATCH Phase 2 — two-heap primitives
Adds a 16 MiB scratch arena alongside the 256 MiB main heap and
routes cons / alloc_hdr / alloc_bytes through current_heap_next_ptr /
current_heap_end_ptr indirection, defaulting to main at heap_init.
New primitives:
(use-scratch-heap!) repoint current_heap_*_ptr at scratch_*
(use-main-heap!) repoint current_heap_*_ptr at heap_*
(reset-scratch-heap!) scratch_next = scratch_buf_ptr (aligned)
(heap-in-main? obj) bool — masked obj pointer in [heap_buf,
heap_buf + HEAP_CAP_BYTES)
heap-mark / heap-rewind! now operate on the current heap (absolute
pointer as fixnum) so mark/rewind pairs work in either context.
Scratch-overflow OOM is dispatched separately ("scratch exhausted")
via a shared heap_oom_die helper. Existing 512 MB p_memsz already
covers the new arena (1 + 0.25 + 256 + 16 MiB ≈ 273 MiB).
tests/scheme1/115-two-heap.scm exercises the A → B → C clone
pattern Phase 3 will use at the parse-decl-or-fn boundary: B
switches to scratch, C allocates freely, B switches back to main,
clones C's survivor, resets scratch, and returns the main-heap
clone to the heap-unaware caller.
Diffstat:
3 files changed, 224 insertions(+), 36 deletions(-)
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -33,10 +33,11 @@
# BSS arenas anchored past :ELF_end. readbuf is 1 MiB (sized to fit
# the catm'd cc compiler source incl. prelude — see READBUF_CAP_BYTES),
-# then symtab, then the heap last so it can use the full remainder of
-# the ELF p_memsz reservation (currently 512 MiB) declared in
-# vendor/seed/<arch>/ELF.hex2. HEAP_CAP_BYTES (currently 256 MiB) is
-# the explicit cap; total bss usage = readbuf + symtab + heap and must
+# then symtab, then the main heap, then the scratch heap. The ELF
+# p_memsz reservation (currently 512 MiB, declared in
+# vendor/seed/<arch>/ELF.hex2) covers all of them with headroom.
+# HEAP_CAP_BYTES (256 MiB) and SCRATCH_CAP_BYTES (16 MiB) are explicit
+# caps; total bss usage = readbuf + symtab + heap + scratch and must
# fit inside p_memsz. p1_main calls libp1pp's init_arenas, which walks
# arena_table and writes &ELF_end + sum of prior sizes into each
# pointer slot.
@@ -44,6 +45,7 @@
%macro SYMTAB_CAP_SLOTS() 8192 %endm
%macro READBUF_CAP_BYTES() 1048576 %endm
%macro HEAP_CAP_BYTES() 0x10000000 %endm
+%macro SCRATCH_CAP_BYTES() 0x1000000 %endm
# =========================================================================
# Tag idioms
@@ -3422,19 +3424,24 @@
# Heap: cons (leaf) and alloc_hdr (leaf)
# =========================================================================
#
-# Both are call-free leaves: bump heap_next, write fields, return tagged
-# pointer. Each allocation tests (new_next <= heap_end) and aborts via
-# runtime_error if the bump would overflow the heap arena. heap_next is
-# kept 8-byte aligned so every PAIR/HEAP tag bit is exact: cons always
-# bumps by a multiple of 8 (16); alloc_hdr / alloc_bytes round their
-# argument up via %alignup(_,_,8,_).
+# Both are call-free leaves: bump *current_heap_next_ptr, write fields,
+# return tagged pointer. Each allocation tests (new_next <=
+# *current_heap_end_ptr) and aborts via runtime_error if the bump would
+# overflow the current heap. The current heap is selected by
+# (use-scratch-heap!) / (use-main-heap!); both pointer-of-pointer slots
+# default to &heap_next / &heap_end at heap_init. *_next is kept 8-byte
+# aligned so every PAIR/HEAP tag bit is exact: cons always bumps by a
+# multiple of 8 (16); alloc_hdr / alloc_bytes round their argument up
+# via %alignup(_,_,8,_).
# cons(car=a0, cdr=a1) -> tagged pair (a0). Allocates 16 bytes.
:cons
%scope cons
- %lda_global(t0, t2, &heap_next)
+ %ld_global(t2, ¤t_heap_next_ptr)
+ %ld(t0, t2, 0)
%addi(t1, t0, %PAIR.SIZE)
- %ld_global(a3, &heap_end)
+ %ld_global(a3, ¤t_heap_end_ptr)
+ %ld(a3, a3, 0)
%bltu(a3, t1, &::oom)
%st(a0, t0, %PAIR.car)
%st(a1, t0, %PAIR.cdr)
@@ -3442,7 +3449,7 @@
%addi(a0, t0, %TAG.PAIR)
%ret
::oom
- %die(msg_heap_full)
+ %b(&heap_oom_die)
%endscope
# alloc_hdr(bytes=a0, hdr_word=a1) -> tagged heap obj (a0)
@@ -3450,16 +3457,18 @@
:alloc_hdr
%scope alloc_hdr
%alignup(a0, a0, 8, t0)
- %lda_global(t0, t2, &heap_next)
+ %ld_global(t2, ¤t_heap_next_ptr)
+ %ld(t0, t2, 0)
%add(t1, t0, a0)
- %ld_global(a3, &heap_end)
+ %ld_global(a3, ¤t_heap_end_ptr)
+ %ld(a3, a3, 0)
%bltu(a3, t1, &::oom)
%st(t1, t2, 0)
%st(a1, t0, 0)
%addi(a0, t0, 3)
%ret
::oom
- %die(msg_heap_full)
+ %b(&heap_oom_die)
%endscope
# list_length(list=a0) -> count (a0). Linear walk; clobbers a0 (used as
@@ -4331,15 +4340,17 @@
:alloc_bytes
%scope alloc_bytes
%alignup(a0, a0, 8, t0)
- %lda_global(t1, t2, &heap_next)
+ %ld_global(t2, ¤t_heap_next_ptr)
+ %ld(t1, t2, 0)
%add(t0, t1, a0)
- %ld_global(a3, &heap_end)
+ %ld_global(a3, ¤t_heap_end_ptr)
+ %ld(a3, a3, 0)
%bltu(a3, t0, &::oom)
%st(t0, t2, 0)
%mov(a0, t1)
%ret
::oom
- %die(msg_heap_full)
+ %b(&heap_oom_die)
%endscope
# bv_capacity_for(n=a0) -> smallest power-of-two ≥ n, minimum 16. Pure
@@ -6195,32 +6206,103 @@
%mkfix(a0, a0)
%ret
-# (heap-mark) -> tagged fixnum: bytes consumed since heap_init.
+# (heap-mark) -> tagged fixnum: absolute current_*_next pointer.
# Capture before transient allocations; pass to (heap-rewind! m) to
# discard everything allocated after the mark. UNSAFE: any pointer
# referencing the rewound region becomes dangling. Caller must keep only
# fixnums / symbols / pointers into surviving (pre-mark) heap across the
-# rewind. Same body as heap-usage; kept distinct for callsite clarity.
+# rewind. Operates on whichever heap is current; mark and rewind must
+# pair within the same heap context.
:prim_heap_mark_entry
- %ld_global(t0, &heap_next)
- %ld_global(t1, &heap_buf_ptr)
- %sub(a0, t0, t1)
- %mkfix(a0, a0)
+ %ld_global(t0, ¤t_heap_next_ptr)
+ %ld(t0, t0, 0)
+ %mkfix(a0, t0)
%ret
-# (heap-rewind! mark) -> unspec. Resets heap_next to (heap_buf_ptr +
-# mark). UNSAFE: subsequent allocations overwrite the freed region; any
+# (heap-rewind! mark) -> unspec. Restores current_*_next to mark.
+# UNSAFE: subsequent allocations overwrite the freed region; any
# surviving reference into it is dangling. No bounds check -- caller
-# passes a value previously returned by (heap-mark).
+# passes a value previously returned by (heap-mark) on the same heap.
:prim_heap_rewind_bang_entry
%car(t0, a0)
%untag_fix(t0, t0)
- %ld_global(t1, &heap_buf_ptr)
- %add(t0, t0, t1)
- %st_global(t0, &heap_next, t1)
+ %ld_global(t1, ¤t_heap_next_ptr)
+ %st(t0, t1, 0)
+ %li(a0, %imm_val(%IMM.UNSPEC))
+ %ret
+
+# (use-scratch-heap!) -> unspec. Repoints current_heap_*_ptr at the
+# scratch heap's next/end slots. Subsequent cons / alloc_hdr /
+# alloc_bytes bump scratch_next; alloc that would cross scratch_end
+# dies via heap_oom_die ("scratch exhausted").
+:prim_use_scratch_heap_bang_entry
+ %la(t0, &scratch_next)
+ %st_global(t0, ¤t_heap_next_ptr, t1)
+ %la(t0, &scratch_end)
+ %st_global(t0, ¤t_heap_end_ptr, t1)
+ %li(a0, %imm_val(%IMM.UNSPEC))
+ %ret
+
+# (use-main-heap!) -> unspec. Repoints current_heap_*_ptr at the main
+# heap's next/end slots. Default at heap_init.
+:prim_use_main_heap_bang_entry
+ %la(t0, &heap_next)
+ %st_global(t0, ¤t_heap_next_ptr, t1)
+ %la(t0, &heap_end)
+ %st_global(t0, ¤t_heap_end_ptr, t1)
+ %li(a0, %imm_val(%IMM.UNSPEC))
+ %ret
+
+# (reset-scratch-heap!) -> unspec. Resets scratch_next to the start of
+# the scratch arena (8-byte aligned). UNSAFE: any reference into scratch
+# becomes dangling. Caller is responsible for having promoted survivors
+# to main first.
+:prim_reset_scratch_heap_bang_entry
+ %ld_global(t0, &scratch_buf_ptr)
+ %alignup(t0, t0, 8, t1)
+ %st_global(t0, &scratch_next, t1)
%li(a0, %imm_val(%IMM.UNSPEC))
%ret
+# (heap-in-main? obj) -> bool. True iff obj's masked pointer falls
+# inside the main heap arena [heap_buf_ptr, heap_buf_ptr +
+# HEAP_CAP_BYTES). Used by promote walkers to skip already-promoted /
+# scratch-resident objects. Tag bits are masked off so callers can pass
+# tagged pointers directly. Non-pointer objects (fixnums, immediates,
+# small sym indices) yield false because their masked values are far
+# below heap_buf_ptr.
+:prim_heap_in_main_q_entry
+%scope prim_heap_in_main_q
+ %car(t0, a0)
+ %li(t1, -8)
+ %and(t0, t0, t1)
+ %ld_global(t1, &heap_buf_ptr)
+ %bltu(t0, t1, &::false)
+ %li(t2, %HEAP_CAP_BYTES)
+ %add(t1, t1, t2)
+ %bltu(t0, t1, &::true)
+ ::false
+ %li(a0, %imm_val(%IMM.FALSE))
+ %ret
+ ::true
+ %li(a0, %imm_val(%IMM.TRUE))
+ %ret
+%endscope
+
+# heap_oom_die() -> never returns. Reached from cons / alloc_hdr /
+# alloc_bytes when current_*_next would pass current_*_end. Selects
+# msg_heap_full vs msg_scratch_full by comparing current_heap_next_ptr
+# against &heap_next, then tails into runtime_error via %die.
+:heap_oom_die
+%scope heap_oom_die
+ %ld_global(t0, ¤t_heap_next_ptr)
+ %la(t1, &heap_next)
+ %beq(t0, t1, &::main)
+ %die(msg_scratch_full)
+ ::main
+ %die(msg_heap_full)
+%endscope
+
# (values . xs) -- multiple-values producer. Single-arg case returns the
# arg unchanged so (values x) is interchangeable with x in any 1-value
# context; 0 or 2+ args materialize an MV-pack.
@@ -6367,9 +6449,17 @@
;; "heap-mark" + NUL = 10 bytes; pad with 6 NUL to reach 16.
;; "heap-rewind!"+ NUL = 13 bytes; pad with 3 NUL to reach 16.
;; "heap-usage" + NUL = 11 bytes; pad with 5 NUL to reach 16.
+;; "use-scratch-heap!" + NUL = 18 bytes; pad with 6 NUL to reach 24.
+;; "use-main-heap!" + NUL = 15 bytes; pad with 1 NUL to reach 16.
+;; "reset-scratch-heap!"+ NUL = 20 bytes; pad with 4 NUL to reach 24.
+;; "heap-in-main?" + NUL = 14 bytes; pad with 2 NUL to reach 16.
:name_heap_mark "heap-mark" '000000000000'
:name_heap_rewind_bang "heap-rewind!" '000000'
:name_heap_usage "heap-usage" '0000000000'
+:name_use_scratch_heap_bang "use-scratch-heap!" '000000000000'
+:name_use_main_heap_bang "use-main-heap!" '00'
+:name_reset_scratch_heap_bang "reset-scratch-heap!" '00000000'
+:name_heap_in_main_q "heap-in-main?" '0000'
# Writer string constants. Lengths are hard-coded at the str_putn call
# sites (write_to_bv branches). No NUL needed in the source bytes --
@@ -6453,6 +6543,10 @@
&name_heap_usage %(0) $(10) &prim_heap_usage_entry %(0)
&name_heap_mark %(0) $(9) &prim_heap_mark_entry %(0)
&name_heap_rewind_bang %(0) $(12) &prim_heap_rewind_bang_entry %(0)
+&name_use_scratch_heap_bang %(0) $(17) &prim_use_scratch_heap_bang_entry %(0)
+&name_use_main_heap_bang %(0) $(14) &prim_use_main_heap_bang_entry %(0)
+&name_reset_scratch_heap_bang %(0) $(19) &prim_reset_scratch_heap_bang_entry %(0)
+&name_heap_in_main_q %(0) $(13) &prim_heap_in_main_q_entry %(0)
&name_values %(0) $(6) &prim_values_entry %(0)
&name_call_with_values %(0) $(16) &prim_call_with_values_entry %(0)
:prim_table_end
@@ -6467,6 +6561,10 @@
:msg_unbound "scheme1: unbound variable" '0a' '00'
:msg_not_proc "scheme1: not a procedure" '0a' '00'
:msg_heap_full "scheme1: heap exhausted" '0a' '00'
+;; "scheme1: scratch exhausted" + auto-NUL = 27 bytes; '0a' '00' = 2;
+;; pad to 32 (multiple of 4) so the code that follows stays 4-aligned
+;; — aarch64 instruction fetch SIGBUSes on a misaligned PC.
+:msg_scratch_full "scheme1: scratch exhausted" '0a' '00000000'
:msg_readbuf_full "scheme1: source buffer overflow" '0a' '00'
:msg_bv_oob "scheme1: bytevector index out of range" '0a' '00'
:msg_unterm_string "scheme1: unterminated string literal" '0a' '00'
@@ -6489,11 +6587,13 @@
# Startup -- heap_init
# =========================================================================
-# heap_init() -> none. Sets heap_next (&heap_buf rounded up to 8-byte
-# alignment) and heap_end (heap_buf + HEAP_CAP_BYTES). cons assumes
-# 8-byte-aligned heap_next so every pair pointer's low 3 bits are exactly
-# the PAIR tag; &ELF_end's alignment depends on the data section above
-# it. cons / alloc_hdr / alloc_bytes test (heap_next + bytes <= heap_end)
+# heap_init() -> none. Initializes the main heap (heap_next /
+# heap_end), the scratch heap (scratch_next / scratch_end), and points
+# current_heap_*_ptr at the main slots so cons / alloc_hdr /
+# alloc_bytes default to allocating in main. Both _next slots are
+# rounded up to 8-byte alignment so every PAIR/HEAP tag bit is exact;
+# &ELF_end's alignment depends on the data section above it. cons /
+# alloc_hdr / alloc_bytes test (*current_next + bytes <= *current_end)
# on every allocation and abort via runtime_error on overflow. Leaf.
:heap_init
%ld_global(t0, &heap_buf_ptr)
@@ -6505,6 +6605,20 @@
%add(t0, t0, t1)
%st_global(t0, &heap_end, t1)
+ %ld_global(t0, &scratch_buf_ptr)
+ %alignup(t0, t0, 8, t1)
+ %st_global(t0, &scratch_next, t1)
+
+ %ld_global(t0, &scratch_buf_ptr)
+ %li(t1, %SCRATCH_CAP_BYTES)
+ %add(t0, t0, t1)
+ %st_global(t0, &scratch_end, t1)
+
+ %la(t0, &heap_next)
+ %st_global(t0, ¤t_heap_next_ptr, t1)
+ %la(t0, &heap_end)
+ %st_global(t0, ¤t_heap_end_ptr, t1)
+
%ret
# =========================================================================
@@ -6518,6 +6632,7 @@
%arena_entry(&readbuf_buf_ptr, %READBUF_CAP_BYTES)
%arena_entry(&symtab_buf_ptr, (* %SYMTAB_CAP_SLOTS %SYMENT.SIZE))
%arena_entry(&heap_buf_ptr, %HEAP_CAP_BYTES)
+%arena_entry(&scratch_buf_ptr, %SCRATCH_CAP_BYTES)
:arena_table_end
# =========================================================================
@@ -6531,6 +6646,21 @@
# HEAP_CAP_BYTES). Read on every allocation.
:heap_end $(0)
+# scratch_next / scratch_end: bump pointer and limit for the scratch
+# heap (= scratch_buf_ptr .. + SCRATCH_CAP_BYTES). Selected via
+# (use-scratch-heap!); reset to scratch_buf_ptr by
+# (reset-scratch-heap!).
+:scratch_next $(0)
+:scratch_end $(0)
+
+# current_heap_next_ptr / current_heap_end_ptr: pointer-of-pointer
+# slots holding either &heap_next/&heap_end or
+# &scratch_next/&scratch_end. cons / alloc_hdr / alloc_bytes /
+# heap-mark / heap-rewind! double-deref through these slots so the
+# heap selection is a single store of two addresses.
+:current_heap_next_ptr $(0)
+:current_heap_end_ptr $(0)
+
# Source-buffer cursor and slurped length.
:readbuf_pos $(0)
:readbuf_len $(0)
@@ -6585,5 +6715,6 @@
:readbuf_buf_ptr $(0)
:heap_buf_ptr $(0)
:symtab_buf_ptr $(0)
+:scratch_buf_ptr $(0)
:ELF_end
diff --git a/tests/scheme1/115-two-heap.expected-exit b/tests/scheme1/115-two-heap.expected-exit
@@ -0,0 +1 @@
+42
diff --git a/tests/scheme1/115-two-heap.scm b/tests/scheme1/115-two-heap.scm
@@ -0,0 +1,56 @@
+; Two-heap primitives, A → B → C ergonomics. The shape Phase 3 of
+; CC-SCRATCH.md will use at the parse-decl-or-fn boundary.
+;
+; A: caller. Heap-unaware; just calls (B input) and uses the result.
+; B: boundary. Switches to scratch, runs C in scratch, switches back
+; to main, *clones* C's survivor into the main heap (the "promote"
+; walker), resets scratch, returns the main-heap clone.
+; C: worker. Heap-unaware; allocates freely in whatever heap is
+; current. Returns a freshly-allocated (sum . count) pair.
+;
+; The clone runs while main is current and reads from scratch via
+; ordinary pointer access — neither heap selection affects loads, only
+; allocations. After the clone, scratch can be wholesale reset because
+; nothing in main points into it.
+
+(define (C input)
+ (let loop ((xs input) (sum 0) (count 0))
+ (if (null? xs)
+ (cons sum count)
+ (loop (cdr xs) (+ sum (car xs)) (+ count 1)))))
+
+; Promote: read from scratch-allocated pair, allocate the clone in
+; the currently-selected heap. C's result has fixnum car/cdr so a
+; one-level cons is enough; real promote walkers recurse on heap-
+; allocated children.
+(define (promote-pair p)
+ (cons (car p) (cdr p)))
+
+(define (B input)
+ (use-scratch-heap!)
+ (let ((scratch-result (C input)))
+ (use-main-heap!)
+ (let ((main-result (promote-pair scratch-result)))
+ (reset-scratch-heap!)
+ main-result)))
+
+(define (A)
+ (let ((input (cons 1 (cons 2 (cons 3 (cons 4 (cons 5 '())))))))
+ (B input)))
+
+; Drive B twice. A scratch leak would push the second call past the
+; cap; a missed promotion would leave r1 dangling once r2's run resets.
+(define r1 (A))
+(define r2 (A))
+(define r1-in-main? (heap-in-main? r1))
+(define r2-in-main? (heap-in-main? r2))
+
+(sys-exit
+ (cond
+ ((not r1-in-main?) 1) ; clone landed in main
+ ((not r2-in-main?) 2)
+ ((not (= (car r1) 15)) 3) ; r1 still readable after r2's reset
+ ((not (= (cdr r1) 5)) 4)
+ ((not (= (car r2) 15)) 5)
+ ((not (= (cdr r2) 5)) 6)
+ (else 42)))