boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 25cd91ac28bfcead03871cd6a55f14c78ba3ff58
parent e43969a17afb5202cc1ec0f5752c34350a6f4475
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 28 Apr 2026 09:25:20 -0700

scheme1: CC-SCRATCH Phase 2 — two-heap primitives

Adds a 16 MiB scratch arena alongside the 256 MiB main heap and
routes cons / alloc_hdr / alloc_bytes through current_heap_next_ptr /
current_heap_end_ptr indirection, defaulting to main at heap_init.

New primitives:
  (use-scratch-heap!)     repoint current_heap_*_ptr at scratch_*
  (use-main-heap!)        repoint current_heap_*_ptr at heap_*
  (reset-scratch-heap!)   scratch_next = scratch_buf_ptr (aligned)
  (heap-in-main? obj)     bool — masked obj pointer in [heap_buf,
                          heap_buf + HEAP_CAP_BYTES)

heap-mark / heap-rewind! now operate on the current heap (absolute
pointer as fixnum) so mark/rewind pairs work in either context.
Scratch-overflow OOM is dispatched separately ("scratch exhausted")
via a shared heap_oom_die helper. Existing 512 MB p_memsz already
covers the new arena (1 + 0.25 + 256 + 16 MiB ≈ 273 MiB).

tests/scheme1/115-two-heap.scm exercises the A → B → C clone
pattern Phase 3 will use at the parse-decl-or-fn boundary: B
switches to scratch, C allocates freely, B switches back to main,
clones C's survivor, resets scratch, and returns the main-heap
clone to the heap-unaware caller.

Diffstat:
Mscheme1/scheme1.P1pp | 203+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Atests/scheme1/115-two-heap.expected-exit | 1+
Atests/scheme1/115-two-heap.scm | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 224 insertions(+), 36 deletions(-)

diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -33,10 +33,11 @@ # BSS arenas anchored past :ELF_end. readbuf is 1 MiB (sized to fit # the catm'd cc compiler source incl. prelude — see READBUF_CAP_BYTES), -# then symtab, then the heap last so it can use the full remainder of -# the ELF p_memsz reservation (currently 512 MiB) declared in -# vendor/seed/<arch>/ELF.hex2. HEAP_CAP_BYTES (currently 256 MiB) is -# the explicit cap; total bss usage = readbuf + symtab + heap and must +# then symtab, then the main heap, then the scratch heap. The ELF +# p_memsz reservation (currently 512 MiB, declared in +# vendor/seed/<arch>/ELF.hex2) covers all of them with headroom. +# HEAP_CAP_BYTES (256 MiB) and SCRATCH_CAP_BYTES (16 MiB) are explicit +# caps; total bss usage = readbuf + symtab + heap + scratch and must # fit inside p_memsz. p1_main calls libp1pp's init_arenas, which walks # arena_table and writes &ELF_end + sum of prior sizes into each # pointer slot. @@ -44,6 +45,7 @@ %macro SYMTAB_CAP_SLOTS() 8192 %endm %macro READBUF_CAP_BYTES() 1048576 %endm %macro HEAP_CAP_BYTES() 0x10000000 %endm +%macro SCRATCH_CAP_BYTES() 0x1000000 %endm # ========================================================================= # Tag idioms @@ -3422,19 +3424,24 @@ # Heap: cons (leaf) and alloc_hdr (leaf) # ========================================================================= # -# Both are call-free leaves: bump heap_next, write fields, return tagged -# pointer. Each allocation tests (new_next <= heap_end) and aborts via -# runtime_error if the bump would overflow the heap arena. heap_next is -# kept 8-byte aligned so every PAIR/HEAP tag bit is exact: cons always -# bumps by a multiple of 8 (16); alloc_hdr / alloc_bytes round their -# argument up via %alignup(_,_,8,_). +# Both are call-free leaves: bump *current_heap_next_ptr, write fields, +# return tagged pointer. Each allocation tests (new_next <= +# *current_heap_end_ptr) and aborts via runtime_error if the bump would +# overflow the current heap. The current heap is selected by +# (use-scratch-heap!) / (use-main-heap!); both pointer-of-pointer slots +# default to &heap_next / &heap_end at heap_init. *_next is kept 8-byte +# aligned so every PAIR/HEAP tag bit is exact: cons always bumps by a +# multiple of 8 (16); alloc_hdr / alloc_bytes round their argument up +# via %alignup(_,_,8,_). # cons(car=a0, cdr=a1) -> tagged pair (a0). Allocates 16 bytes. :cons %scope cons - %lda_global(t0, t2, &heap_next) + %ld_global(t2, &current_heap_next_ptr) + %ld(t0, t2, 0) %addi(t1, t0, %PAIR.SIZE) - %ld_global(a3, &heap_end) + %ld_global(a3, &current_heap_end_ptr) + %ld(a3, a3, 0) %bltu(a3, t1, &::oom) %st(a0, t0, %PAIR.car) %st(a1, t0, %PAIR.cdr) @@ -3442,7 +3449,7 @@ %addi(a0, t0, %TAG.PAIR) %ret ::oom - %die(msg_heap_full) + %b(&heap_oom_die) %endscope # alloc_hdr(bytes=a0, hdr_word=a1) -> tagged heap obj (a0) @@ -3450,16 +3457,18 @@ :alloc_hdr %scope alloc_hdr %alignup(a0, a0, 8, t0) - %lda_global(t0, t2, &heap_next) + %ld_global(t2, &current_heap_next_ptr) + %ld(t0, t2, 0) %add(t1, t0, a0) - %ld_global(a3, &heap_end) + %ld_global(a3, &current_heap_end_ptr) + %ld(a3, a3, 0) %bltu(a3, t1, &::oom) %st(t1, t2, 0) %st(a1, t0, 0) %addi(a0, t0, 3) %ret ::oom - %die(msg_heap_full) + %b(&heap_oom_die) %endscope # list_length(list=a0) -> count (a0). Linear walk; clobbers a0 (used as @@ -4331,15 +4340,17 @@ :alloc_bytes %scope alloc_bytes %alignup(a0, a0, 8, t0) - %lda_global(t1, t2, &heap_next) + %ld_global(t2, &current_heap_next_ptr) + %ld(t1, t2, 0) %add(t0, t1, a0) - %ld_global(a3, &heap_end) + %ld_global(a3, &current_heap_end_ptr) + %ld(a3, a3, 0) %bltu(a3, t0, &::oom) %st(t0, t2, 0) %mov(a0, t1) %ret ::oom - %die(msg_heap_full) + %b(&heap_oom_die) %endscope # bv_capacity_for(n=a0) -> smallest power-of-two ≥ n, minimum 16. Pure @@ -6195,32 +6206,103 @@ %mkfix(a0, a0) %ret -# (heap-mark) -> tagged fixnum: bytes consumed since heap_init. +# (heap-mark) -> tagged fixnum: absolute current_*_next pointer. # Capture before transient allocations; pass to (heap-rewind! m) to # discard everything allocated after the mark. UNSAFE: any pointer # referencing the rewound region becomes dangling. Caller must keep only # fixnums / symbols / pointers into surviving (pre-mark) heap across the -# rewind. Same body as heap-usage; kept distinct for callsite clarity. +# rewind. Operates on whichever heap is current; mark and rewind must +# pair within the same heap context. :prim_heap_mark_entry - %ld_global(t0, &heap_next) - %ld_global(t1, &heap_buf_ptr) - %sub(a0, t0, t1) - %mkfix(a0, a0) + %ld_global(t0, &current_heap_next_ptr) + %ld(t0, t0, 0) + %mkfix(a0, t0) %ret -# (heap-rewind! mark) -> unspec. Resets heap_next to (heap_buf_ptr + -# mark). UNSAFE: subsequent allocations overwrite the freed region; any +# (heap-rewind! mark) -> unspec. Restores current_*_next to mark. +# UNSAFE: subsequent allocations overwrite the freed region; any # surviving reference into it is dangling. No bounds check -- caller -# passes a value previously returned by (heap-mark). +# passes a value previously returned by (heap-mark) on the same heap. :prim_heap_rewind_bang_entry %car(t0, a0) %untag_fix(t0, t0) - %ld_global(t1, &heap_buf_ptr) - %add(t0, t0, t1) - %st_global(t0, &heap_next, t1) + %ld_global(t1, &current_heap_next_ptr) + %st(t0, t1, 0) + %li(a0, %imm_val(%IMM.UNSPEC)) + %ret + +# (use-scratch-heap!) -> unspec. Repoints current_heap_*_ptr at the +# scratch heap's next/end slots. Subsequent cons / alloc_hdr / +# alloc_bytes bump scratch_next; alloc that would cross scratch_end +# dies via heap_oom_die ("scratch exhausted"). +:prim_use_scratch_heap_bang_entry + %la(t0, &scratch_next) + %st_global(t0, &current_heap_next_ptr, t1) + %la(t0, &scratch_end) + %st_global(t0, &current_heap_end_ptr, t1) + %li(a0, %imm_val(%IMM.UNSPEC)) + %ret + +# (use-main-heap!) -> unspec. Repoints current_heap_*_ptr at the main +# heap's next/end slots. Default at heap_init. +:prim_use_main_heap_bang_entry + %la(t0, &heap_next) + %st_global(t0, &current_heap_next_ptr, t1) + %la(t0, &heap_end) + %st_global(t0, &current_heap_end_ptr, t1) + %li(a0, %imm_val(%IMM.UNSPEC)) + %ret + +# (reset-scratch-heap!) -> unspec. Resets scratch_next to the start of +# the scratch arena (8-byte aligned). UNSAFE: any reference into scratch +# becomes dangling. Caller is responsible for having promoted survivors +# to main first. +:prim_reset_scratch_heap_bang_entry + %ld_global(t0, &scratch_buf_ptr) + %alignup(t0, t0, 8, t1) + %st_global(t0, &scratch_next, t1) %li(a0, %imm_val(%IMM.UNSPEC)) %ret +# (heap-in-main? obj) -> bool. True iff obj's masked pointer falls +# inside the main heap arena [heap_buf_ptr, heap_buf_ptr + +# HEAP_CAP_BYTES). Used by promote walkers to skip already-promoted / +# scratch-resident objects. Tag bits are masked off so callers can pass +# tagged pointers directly. Non-pointer objects (fixnums, immediates, +# small sym indices) yield false because their masked values are far +# below heap_buf_ptr. +:prim_heap_in_main_q_entry +%scope prim_heap_in_main_q + %car(t0, a0) + %li(t1, -8) + %and(t0, t0, t1) + %ld_global(t1, &heap_buf_ptr) + %bltu(t0, t1, &::false) + %li(t2, %HEAP_CAP_BYTES) + %add(t1, t1, t2) + %bltu(t0, t1, &::true) + ::false + %li(a0, %imm_val(%IMM.FALSE)) + %ret + ::true + %li(a0, %imm_val(%IMM.TRUE)) + %ret +%endscope + +# heap_oom_die() -> never returns. Reached from cons / alloc_hdr / +# alloc_bytes when current_*_next would pass current_*_end. Selects +# msg_heap_full vs msg_scratch_full by comparing current_heap_next_ptr +# against &heap_next, then tails into runtime_error via %die. +:heap_oom_die +%scope heap_oom_die + %ld_global(t0, &current_heap_next_ptr) + %la(t1, &heap_next) + %beq(t0, t1, &::main) + %die(msg_scratch_full) + ::main + %die(msg_heap_full) +%endscope + # (values . xs) -- multiple-values producer. Single-arg case returns the # arg unchanged so (values x) is interchangeable with x in any 1-value # context; 0 or 2+ args materialize an MV-pack. @@ -6367,9 +6449,17 @@ ;; "heap-mark" + NUL = 10 bytes; pad with 6 NUL to reach 16. ;; "heap-rewind!"+ NUL = 13 bytes; pad with 3 NUL to reach 16. ;; "heap-usage" + NUL = 11 bytes; pad with 5 NUL to reach 16. +;; "use-scratch-heap!" + NUL = 18 bytes; pad with 6 NUL to reach 24. +;; "use-main-heap!" + NUL = 15 bytes; pad with 1 NUL to reach 16. +;; "reset-scratch-heap!"+ NUL = 20 bytes; pad with 4 NUL to reach 24. +;; "heap-in-main?" + NUL = 14 bytes; pad with 2 NUL to reach 16. :name_heap_mark "heap-mark" '000000000000' :name_heap_rewind_bang "heap-rewind!" '000000' :name_heap_usage "heap-usage" '0000000000' +:name_use_scratch_heap_bang "use-scratch-heap!" '000000000000' +:name_use_main_heap_bang "use-main-heap!" '00' +:name_reset_scratch_heap_bang "reset-scratch-heap!" '00000000' +:name_heap_in_main_q "heap-in-main?" '0000' # Writer string constants. Lengths are hard-coded at the str_putn call # sites (write_to_bv branches). No NUL needed in the source bytes -- @@ -6453,6 +6543,10 @@ &name_heap_usage %(0) $(10) &prim_heap_usage_entry %(0) &name_heap_mark %(0) $(9) &prim_heap_mark_entry %(0) &name_heap_rewind_bang %(0) $(12) &prim_heap_rewind_bang_entry %(0) +&name_use_scratch_heap_bang %(0) $(17) &prim_use_scratch_heap_bang_entry %(0) +&name_use_main_heap_bang %(0) $(14) &prim_use_main_heap_bang_entry %(0) +&name_reset_scratch_heap_bang %(0) $(19) &prim_reset_scratch_heap_bang_entry %(0) +&name_heap_in_main_q %(0) $(13) &prim_heap_in_main_q_entry %(0) &name_values %(0) $(6) &prim_values_entry %(0) &name_call_with_values %(0) $(16) &prim_call_with_values_entry %(0) :prim_table_end @@ -6467,6 +6561,10 @@ :msg_unbound "scheme1: unbound variable" '0a' '00' :msg_not_proc "scheme1: not a procedure" '0a' '00' :msg_heap_full "scheme1: heap exhausted" '0a' '00' +;; "scheme1: scratch exhausted" + auto-NUL = 27 bytes; '0a' '00' = 2; +;; pad to 32 (multiple of 4) so the code that follows stays 4-aligned +;; — aarch64 instruction fetch SIGBUSes on a misaligned PC. +:msg_scratch_full "scheme1: scratch exhausted" '0a' '00000000' :msg_readbuf_full "scheme1: source buffer overflow" '0a' '00' :msg_bv_oob "scheme1: bytevector index out of range" '0a' '00' :msg_unterm_string "scheme1: unterminated string literal" '0a' '00' @@ -6489,11 +6587,13 @@ # Startup -- heap_init # ========================================================================= -# heap_init() -> none. Sets heap_next (&heap_buf rounded up to 8-byte -# alignment) and heap_end (heap_buf + HEAP_CAP_BYTES). cons assumes -# 8-byte-aligned heap_next so every pair pointer's low 3 bits are exactly -# the PAIR tag; &ELF_end's alignment depends on the data section above -# it. cons / alloc_hdr / alloc_bytes test (heap_next + bytes <= heap_end) +# heap_init() -> none. Initializes the main heap (heap_next / +# heap_end), the scratch heap (scratch_next / scratch_end), and points +# current_heap_*_ptr at the main slots so cons / alloc_hdr / +# alloc_bytes default to allocating in main. Both _next slots are +# rounded up to 8-byte alignment so every PAIR/HEAP tag bit is exact; +# &ELF_end's alignment depends on the data section above it. cons / +# alloc_hdr / alloc_bytes test (*current_next + bytes <= *current_end) # on every allocation and abort via runtime_error on overflow. Leaf. :heap_init %ld_global(t0, &heap_buf_ptr) @@ -6505,6 +6605,20 @@ %add(t0, t0, t1) %st_global(t0, &heap_end, t1) + %ld_global(t0, &scratch_buf_ptr) + %alignup(t0, t0, 8, t1) + %st_global(t0, &scratch_next, t1) + + %ld_global(t0, &scratch_buf_ptr) + %li(t1, %SCRATCH_CAP_BYTES) + %add(t0, t0, t1) + %st_global(t0, &scratch_end, t1) + + %la(t0, &heap_next) + %st_global(t0, &current_heap_next_ptr, t1) + %la(t0, &heap_end) + %st_global(t0, &current_heap_end_ptr, t1) + %ret # ========================================================================= @@ -6518,6 +6632,7 @@ %arena_entry(&readbuf_buf_ptr, %READBUF_CAP_BYTES) %arena_entry(&symtab_buf_ptr, (* %SYMTAB_CAP_SLOTS %SYMENT.SIZE)) %arena_entry(&heap_buf_ptr, %HEAP_CAP_BYTES) +%arena_entry(&scratch_buf_ptr, %SCRATCH_CAP_BYTES) :arena_table_end # ========================================================================= @@ -6531,6 +6646,21 @@ # HEAP_CAP_BYTES). Read on every allocation. :heap_end $(0) +# scratch_next / scratch_end: bump pointer and limit for the scratch +# heap (= scratch_buf_ptr .. + SCRATCH_CAP_BYTES). Selected via +# (use-scratch-heap!); reset to scratch_buf_ptr by +# (reset-scratch-heap!). +:scratch_next $(0) +:scratch_end $(0) + +# current_heap_next_ptr / current_heap_end_ptr: pointer-of-pointer +# slots holding either &heap_next/&heap_end or +# &scratch_next/&scratch_end. cons / alloc_hdr / alloc_bytes / +# heap-mark / heap-rewind! double-deref through these slots so the +# heap selection is a single store of two addresses. +:current_heap_next_ptr $(0) +:current_heap_end_ptr $(0) + # Source-buffer cursor and slurped length. :readbuf_pos $(0) :readbuf_len $(0) @@ -6585,5 +6715,6 @@ :readbuf_buf_ptr $(0) :heap_buf_ptr $(0) :symtab_buf_ptr $(0) +:scratch_buf_ptr $(0) :ELF_end diff --git a/tests/scheme1/115-two-heap.expected-exit b/tests/scheme1/115-two-heap.expected-exit @@ -0,0 +1 @@ +42 diff --git a/tests/scheme1/115-two-heap.scm b/tests/scheme1/115-two-heap.scm @@ -0,0 +1,56 @@ +; Two-heap primitives, A → B → C ergonomics. The shape Phase 3 of +; CC-SCRATCH.md will use at the parse-decl-or-fn boundary. +; +; A: caller. Heap-unaware; just calls (B input) and uses the result. +; B: boundary. Switches to scratch, runs C in scratch, switches back +; to main, *clones* C's survivor into the main heap (the "promote" +; walker), resets scratch, returns the main-heap clone. +; C: worker. Heap-unaware; allocates freely in whatever heap is +; current. Returns a freshly-allocated (sum . count) pair. +; +; The clone runs while main is current and reads from scratch via +; ordinary pointer access — neither heap selection affects loads, only +; allocations. After the clone, scratch can be wholesale reset because +; nothing in main points into it. + +(define (C input) + (let loop ((xs input) (sum 0) (count 0)) + (if (null? xs) + (cons sum count) + (loop (cdr xs) (+ sum (car xs)) (+ count 1))))) + +; Promote: read from scratch-allocated pair, allocate the clone in +; the currently-selected heap. C's result has fixnum car/cdr so a +; one-level cons is enough; real promote walkers recurse on heap- +; allocated children. +(define (promote-pair p) + (cons (car p) (cdr p))) + +(define (B input) + (use-scratch-heap!) + (let ((scratch-result (C input))) + (use-main-heap!) + (let ((main-result (promote-pair scratch-result))) + (reset-scratch-heap!) + main-result))) + +(define (A) + (let ((input (cons 1 (cons 2 (cons 3 (cons 4 (cons 5 '()))))))) + (B input))) + +; Drive B twice. A scratch leak would push the second call past the +; cap; a missed promotion would leave r1 dangling once r2's run resets. +(define r1 (A)) +(define r2 (A)) +(define r1-in-main? (heap-in-main? r1)) +(define r2-in-main? (heap-in-main? r2)) + +(sys-exit + (cond + ((not r1-in-main?) 1) ; clone landed in main + ((not r2-in-main?) 2) + ((not (= (car r1) 15)) 3) ; r1 still readable after r2's reset + ((not (= (cdr r1) 5)) 4) + ((not (= (car r2) 15)) 5) + ((not (= (cdr r2) 5)) 6) + (else 42)))