boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 66ab01573550a19083f93d3e6588272df7cadc7f
parent dd05e292cf67df0b1d60b89a27a4e3b0182fd508
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 26 Apr 2026 02:04:02 -0700

cc: resolve cg/parse contract drift; bump scheme1 readbuf to 256K

Three coordinated fixes against the contract gaps surfaced by the
parallel-agent integration:

1. cg-loop body-thunk receives the tag.

   Previously cg-loop allocated and returned a tag, but parse needed
   the tag *before* body-thunk ran (for break/continue calls inside
   the body). Parse worked around this by pre-allocating its own tag
   and ignoring cg's — producing %break/%continue calls that
   referenced labels cg never emitted.

   Fix: cg-loop now invokes (body-thunk tag), threading cg's actual
   loop tag into body emission. Parse drops %parse-tag-namer and uses
   the tag arg directly. Switch follows suit: parse reads
   (swctx-end-tag sw) instead of allocating its own switch tag.

2. cg-assign auto-casts rhs to lhs's type.

   Per CC-CONTRACTS §4.1, the parser was supposed to cg-cast rhs to
   lhs's type before cg-assign — but parse only sees the vstack top
   and can't peek beneath to find lhs's type. cg-assign already pops
   both opnds, so it owns the cast naturally.

   Fix: cg-assign now repushes rhs and calls cg-cast internally
   before storing. CC-CONTRACTS §4.1 updated; CC-INTERNALS notes the
   ownership shift.

3. scheme1 READBUF_CAP_BYTES bumped 64 KiB -> 256 KiB.

   The integrated cc compiler source (prelude + util + data + lex +
   pp + cg + parse + main) is ~155 KB. Tests for the parser had to
   ship a 2 KB mini-prelude.scm to fit under the old 64 KB cap. With
   the new cap, the full prelude can be used everywhere; mini-prelude
   stays for now since parse tests already work and the integration
   pass will switch back to the full prelude.

   The 32 MiB ELF p_memsz reservation easily absorbs the +192 KB
   readbuf bump.

Tests: scheme1's own suite (94 fixtures), cc-util (14), cc-lex (16),
cc-pp (22), cc-cg (15), cc-parse (15) all pass on aarch64 against the
rebuilt scheme1 binary. cg's 08-while-break-continue fixture and
expected output adjusted to match the new contract; parse's 05 and 13
expected traces re-tagged Lp0 -> L0.

CC-CONTRACTS adds §3.3 documenting the loop-tag protocol; older §3.3
(staging) and §3.4 (alloc-slot) renumbered to §3.4 / §3.5.

Diffstat:
Mcc/cg.scm | 20+++++++++++++++-----
Mcc/parse.scm | 94++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Mdocs/CC-CONTRACTS.md | 48+++++++++++++++++++++++++++++++++++++++++-------
Mdocs/CC-INTERNALS.md | 6+++---
Mscheme1/scheme1.P1pp | 7++++---
Mtests/cc-cg/08-while-break-continue.expected | 2+-
Mtests/cc-cg/08-while-break-continue.scm | 11++++++-----
Mtests/cc-parse/05-while-break.expected-trace | 2+-
Mtests/cc-parse/13-while-continue.expected-trace | 2+-
Mtests/cc-parse/cg-trace.scm | 28+++++++++++++++++++++++-----
10 files changed, 145 insertions(+), 75 deletions(-)

diff --git a/cc/cg.scm b/cc/cg.scm @@ -482,10 +482,17 @@ (else (die #f "cg-unop: unknown op" op))))) (define (cg-assign cg) - (let* ((rhs (cg-pop cg)) - (lhs (cg-pop cg)) - (ty (opnd-type lhs))) + ;; Pops rhs, pops lhs, casts rhs to lhs's type (parser cannot peek + ;; deeper than vstack top to do this itself — CC-CONTRACTS §4.2), + ;; emits the store, pushes the assigned value as the result rval. + (let* ((rhs0 (cg-pop cg)) + (lhs (cg-pop cg)) + (ty (opnd-type lhs))) (cond ((not (opnd-lval? lhs)) (die #f "cg-assign: lhs not lvalue"))) + ;; Cast rhs to lhs's type (no-op when the types already match). + (cg-push cg rhs0) + (cg-cast cg ty) + (let ((rhs (cg-pop cg))) (%cg-load-opnd-into cg rhs 'a0) (cond ((eq? (opnd-kind lhs) 'frame) @@ -499,7 +506,7 @@ (%cg-emit-la cg 't0 (opnd-ext lhs)) (%cg-emit-st cg 'a0 't0 0)) (else (die #f "cg-assign: unsupported lhs kind" (opnd-kind lhs)))) - (%cg-spill-reg cg 'a0 ty))) + (%cg-spill-reg cg 'a0 ty)))) ;; -------------------------------------------------------------------- ;; Calls @@ -573,6 +580,9 @@ (%cg-emit-many cg (list "})\n")))) (define (cg-loop cg head-thunk body-thunk) + ;; body-thunk receives the loop tag as its argument; parser uses + ;; that tag for cg-break / cg-continue inside the body. CC-CONTRACTS + ;; §1.9 / §3.3. (let ((tag (%cg-fresh-loop-tag cg))) (%cg-emit-many cg (list "%loop_tag(" tag ", {\n")) (head-thunk) @@ -582,7 +592,7 @@ (let ((c (cg-pop cg))) (%cg-load-opnd-into cg c 't0) (%cg-emit-many cg (list "%if_eqz(t0, { %break(" tag ") })\n"))))) - (body-thunk) + (body-thunk tag) (%cg-emit-many cg (list "})\n")) tag)) diff --git a/cc/parse.scm b/cc/parse.scm @@ -512,35 +512,38 @@ (advance ps) (parse-stmt ps)) (else #t))))) -(define %parse-tag-namer (make-namer "Lp")) +;; cg-loop's body-thunk now receives the tag from cg (CC-CONTRACTS +;; §3.3); the parser threads it into break/continue via loop-ctx. (define (parse-while-stmt ps) (expect-kw ps 'while) (expect-punct ps 'lparen) - (let ((tag (%parse-tag-namer))) - (cg-loop (ps-cg ps) - (lambda () (parse-expr ps) (rval! ps)) - (lambda () - (expect-punct ps 'rparen) - (push-loop-ctx! ps 'while tag #t) - (parse-stmt ps) - (pop-loop-ctx! ps))) #t)) + (cg-loop (ps-cg ps) + (lambda () (parse-expr ps) (rval! ps)) + (lambda (tag) + (expect-punct ps 'rparen) + (push-loop-ctx! ps 'while tag #t) + (parse-stmt ps) + (pop-loop-ctx! ps))) #t) (define (parse-do-stmt ps) (expect-kw ps 'do) - (let ((tag (%parse-tag-namer))) - (push-loop-ctx! ps 'do tag #t) - (cg-loop (ps-cg ps) - (lambda () #t) - (lambda () - (parse-stmt ps) - (expect-kw ps 'while) (expect-punct ps 'lparen) - (parse-expr ps) (rval! ps) - (expect-punct ps 'rparen) (expect-punct ps 'semi) - (cg-unop (ps-cg ps) 'lnot) - (cg-if (ps-cg ps) - (lambda () (cg-break (ps-cg ps) tag))))) - (pop-loop-ctx! ps) #t)) + ;; do-while needs its tag known *before* the body parses, so we + ;; capture it inside the body-thunk and stash it for pop-loop-ctx + ;; via a side cell. + (cg-loop (ps-cg ps) + (lambda () #t) + (lambda (tag) + (push-loop-ctx! ps 'do tag #t) + (parse-stmt ps) + (pop-loop-ctx! ps) + (expect-kw ps 'while) (expect-punct ps 'lparen) + (parse-expr ps) (rval! ps) + (expect-punct ps 'rparen) (expect-punct ps 'semi) + (cg-unop (ps-cg ps) 'lnot) + (cg-if (ps-cg ps) + (lambda () (cg-break (ps-cg ps) tag))))) + #t) (define (parse-for-stmt ps) (expect-kw ps 'for) (expect-punct ps 'lparen) @@ -550,27 +553,26 @@ ((stmt-starts-decl? ps) (parse-local-decl ps)) (else (parse-expr ps) (cg-pop (ps-cg ps)) (expect-punct ps 'semi))) - (let ((tag (%parse-tag-namer))) - (cg-loop (ps-cg ps) - (lambda () - (cond ((at-punct? ps 'semi) - (cg-push-imm (ps-cg ps) %t-i32 1)) - (else (parse-expr ps) (rval! ps))) - (expect-punct ps 'semi)) - (lambda () - (let ((stk (collect-til-rparen ps))) - (expect-punct ps 'rparen) - (push-loop-ctx! ps 'for tag #t) - (parse-stmt ps) - (pop-loop-ctx! ps) - (cond - ((null? stk) #t) - (else - (let ((sv (ps-toks ps))) - (ps-toks-set! ps - (append stk (list (make-tok 'EOF #f #f)))) - (parse-expr ps) (cg-pop (ps-cg ps)) - (ps-toks-set! ps sv)))))))) + (cg-loop (ps-cg ps) + (lambda () + (cond ((at-punct? ps 'semi) + (cg-push-imm (ps-cg ps) %t-i32 1)) + (else (parse-expr ps) (rval! ps))) + (expect-punct ps 'semi)) + (lambda (tag) + (let ((stk (collect-til-rparen ps))) + (expect-punct ps 'rparen) + (push-loop-ctx! ps 'for tag #t) + (parse-stmt ps) + (pop-loop-ctx! ps) + (cond + ((null? stk) #t) + (else + (let ((sv (ps-toks ps))) + (ps-toks-set! ps + (append stk (list (make-tok 'EOF #f #f)))) + (parse-expr ps) (cg-pop (ps-cg ps)) + (ps-toks-set! ps sv))))))) (scope-leave! ps) #t) (define (collect-til-rparen ps) @@ -593,7 +595,11 @@ (expect-kw ps 'switch) (expect-punct ps 'lparen) (parse-expr ps) (rval! ps) (expect-punct ps 'rparen) - (let ((sw (cg-switch-begin (ps-cg ps))) (tg (%parse-tag-namer))) + ;; Switch's break-target tag is the swctx's end-tag — cg owns it, + ;; and we read it back so cg-break inside the switch body emits a + ;; tag cg actually labels. + (let* ((sw (cg-switch-begin (ps-cg ps))) + (tg (swctx-end-tag sw))) (push-loop-ctx-sw! ps 'switch tg sw) (parse-stmt ps) (pop-loop-ctx! ps) diff --git a/docs/CC-CONTRACTS.md b/docs/CC-CONTRACTS.md @@ -246,13 +246,18 @@ between: ...else-trace... (ifelse-end) -(loop-begin <tag>) - ...body trace... -(loop-end <tag>) +(loop-begin) + ...head trace... + ...body trace, with (break <tag>) / (continue <tag>) using the + tag cg passed to body-thunk... +(loop-end) ``` This is the canonical surface — `cg-if` *internally* uses a thunk, but the trace exposes begin/mid/end markers so tests can read top-down. +The loop tag is allocated by cg (CC-CONTRACTS §3.3) and identifies +which loop a break/continue refers to; it does not appear in the +loop-begin / loop-end markers themselves. ### 2.3 Diagnostic format @@ -325,7 +330,36 @@ cg: The frame size is rounded up to 16 to satisfy the P1 stack-align contract. -### 3.3 Outgoing-arg staging +### 3.3 Loop tag protocol + +```scheme +(cg-loop cg head-thunk body-thunk) -> tag +``` + +`cg-loop` allocates a fresh per-function tag (`L0`, `L1`, …), emits +the libp1pp `%loop_tag(<tag>, { … })` wrapper, runs `head-thunk` +inside the loop head (it is expected to leave the condition opnd on +the vstack), pops the condition and emits an `%if_eqz(t0, %break(tag))`, +then invokes `body-thunk` **with the tag as its single argument**: + +```scheme +(cg-loop cg + (lambda () (cg-push-imm cg %t-i32 1)) + (lambda (tag) + (cg-continue cg tag) + (cg-break cg tag))) +``` + +The parser uses the same tag for any `cg-break` / `cg-continue` calls +made during body emission. `cg-loop` returns the tag to its caller as +well, so post-loop teardown code may reference it; `cg-loop-end` is a +no-op kept for symmetry. + +Switch dispatch follows the same pattern: `cg-switch-begin` returns a +`swctx` whose `swctx-end-tag` accessor exposes cg's break-target tag +to the parser. + +### 3.4 Outgoing-arg staging When `cg-call` is asked to emit a call with arity > 4, it stages args 4..(N-1) into the *low-addressed* prefix of the current frame @@ -336,7 +370,7 @@ reserves that prefix at fn-end before any other slots — i.e., The accounting is internal to cg. Parse never sees staging slots. -### 3.4 cg-alloc-slot contract +### 3.5 cg-alloc-slot contract ```scheme (cg-alloc-slot cg bytes align) -> offset @@ -368,8 +402,8 @@ The parser **must** call cg in this order around each operation: | `&x` | parse x → must be lval; then `cg-take-addr` | | `(T)e` | parse e → if lval, `cg-load` (unless casting to a pointer); then `cg-cast T` | | `f(a, b, ...)` | parse f → if lval and `f` not a function-typed identifier, `cg-load`; parse each arg → `cg-load` if lval, then `cg-cast` to param type (or default-promote for variadic args); then `cg-call` | -| `lhs = rhs` | parse lhs → must be lval (no load); parse rhs → `cg-load` if lval; `cg-cast` to lhs type; `cg-assign` | -| `lhs += rhs` | parse lhs (lval) → duplicate via `cg-take-addr` then `cg-push-deref`; parse rhs; `cg-arith-conv`; `cg-binop add`; `cg-cast` to lhs type; `cg-assign` | +| `lhs = rhs` | parse lhs → must be lval (no load); parse rhs → `cg-load` if lval; `cg-assign` (cg internally casts rhs to lhs type — parse cannot peek beneath vstack top) | +| `lhs += rhs` | parse lhs (lval) → duplicate via `cg-take-addr` then `cg-push-deref`; parse rhs; `cg-arith-conv`; `cg-binop add`; `cg-assign` (cg casts internally) | | `return e` | parse e → `cg-load` if lval; `cg-cast` to fn return type; `cg-return` | | `if (e) ...` | parse e → `cg-load` if lval; `cg-cast bool` if not already int-shaped; `cg-if` | diff --git a/docs/CC-INTERNALS.md b/docs/CC-INTERNALS.md @@ -482,7 +482,7 @@ live outside. ```scheme (cg-binop cg op) -> opnd ; pop b, pop a, push (a op b) (cg-unop cg op) -> opnd ; pop a, push (op a) -(cg-assign cg) -> opnd ; pop rval, pop lval, store, push rval (assignment yields the value) +(cg-assign cg) -> opnd ; pop rval, pop lval, cast rval to lval-type, store, push assigned value (cg-assign owns the cast since parse cannot peek beneath vstack top) ``` `op` for binop is a symbol from: @@ -509,8 +509,8 @@ These take thunks so the parser can recursively emit the body: ```scheme (cg-if cg then-thunk) ; pop cond; emit %if_nez { (then-thunk) } (cg-ifelse cg then-thunk else-thunk) ; pop cond; emit %ifelse_nez { ... }{ ... } -(cg-loop cg head-thunk body-thunk) -> tag ; head-thunk emits the cond test; tag returned to parser -(cg-loop-end cg tag) ; closes a %loop_tag +(cg-loop cg head-thunk body-thunk) -> tag ; head-thunk emits the cond; body-thunk is invoked as (body-thunk tag) so parse can use the same tag for cg-break / cg-continue inside the body; tag is also returned to the caller +(cg-loop-end cg tag) ; no-op; reserved for future per-loop teardown (cg-break cg tag) (cg-continue cg tag) ``` diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -55,8 +55,9 @@ %struct TD { hdr name nfields } # .SIZE = 24 # Records are variable width: header + td slot + N field slots. -# BSS arena offsets from :ELF_end. readbuf is 64 KiB, then the heap -# (HEAP_CAP_BYTES, currently 16 MiB), then symtab. The three are packed +# BSS arena offsets from :ELF_end. readbuf is 256 KiB (sized to fit +# the catm'd cc compiler source incl. prelude — see READBUF_CAP_BYTES), +# then the heap (HEAP_CAP_BYTES, currently 16 MiB), then symtab. The three are packed # back-to-back; everything lives within the ELF p_memsz reservation # (currently 32 MiB) declared in vendor/seed/<arch>/ELF.hex2. p1_main's # startup loop materializes &ELF_end + OFF_X into the matching pointer @@ -67,7 +68,7 @@ %endm %macro READBUF_CAP_BYTES() -65536 +262144 %endm %macro HEAP_CAP_BYTES() diff --git a/tests/cc-cg/08-while-break-continue.expected b/tests/cc-cg/08-while-break-continue.expected @@ -5,9 +5,9 @@ %loop_tag(L0, { %li(t0, 1) %if_eqz(t0, { %break(L0) }) -}) %continue(L0) %break(L0) +}) %li(a0, 0) %st(a0, sp, (+ %main__SO 0)) %b(&::ret) diff --git a/tests/cc-cg/08-while-break-continue.scm b/tests/cc-cg/08-while-break-continue.scm @@ -4,15 +4,16 @@ (let ((cg (cg-init))) (cg-fn-begin cg "main" '() %t-i32) + ;; cg-loop's body-thunk receives the tag (CC-CONTRACTS §3.3); we use + ;; it to issue one continue and one break. (let ((tag (cg-loop cg ;; head: condition is a literal 1 (infinite-ish loop). (lambda () (cg-push-imm cg %t-i32 1)) - ;; body: continue and break referenced by tag below. - (lambda () 0)))) - ;; Demonstrate the helpers — emit one of each then loop-end. - (cg-continue cg tag) - (cg-break cg tag) + ;; body: emit one continue + one break inline. + (lambda (tag) + (cg-continue cg tag) + (cg-break cg tag))))) (cg-loop-end cg tag)) (cg-push-imm cg %t-i32 0) (cg-return cg) diff --git a/tests/cc-parse/05-while-break.expected-trace b/tests/cc-parse/05-while-break.expected-trace @@ -2,7 +2,7 @@ (loop-begin) (push-sym ("x" param)) (load) -(break "Lp0") +(break "L0") (loop-end) (push-sym ("x" param)) (load) diff --git a/tests/cc-parse/13-while-continue.expected-trace b/tests/cc-parse/13-while-continue.expected-trace @@ -2,7 +2,7 @@ (loop-begin) (push-sym ("x" param)) (load) -(continue "Lp0") +(continue "L0") (loop-end) (push-imm i32 0) (cast i32) diff --git a/tests/cc-parse/cg-trace.scm b/tests/cc-parse/cg-trace.scm @@ -117,16 +117,34 @@ (%cg-emit! '(ifelse-mid)) (eh) (%cg-emit! '(ifelse-end)) #t) +;; Mock-cg's loop tag counter mirrors real cg's `%cg-fresh-loop-tag` — +;; a fresh `Lk` per cg-loop / cg-switch-begin call. body receives the +;; tag (CC-CONTRACTS §3.3). +(define %mock-loop-namer (make-namer "L")) (define (cg-loop cg head body) - (%cg-emit! '(loop-begin)) - (head) (body) - (%cg-emit! '(loop-end)) - "L0") + (let ((tag (%mock-loop-namer))) + (%cg-emit! '(loop-begin)) + (head) (body tag) + (%cg-emit! '(loop-end)) + tag)) (define (cg-loop-end cg tag) (%cg-emit! (list 'loop-close tag)) #t) (define (cg-break cg tag) (%cg-emit! (list 'break tag)) #t) (define (cg-continue cg tag) (%cg-emit! (list 'continue tag)) #t) -(define (cg-switch-begin cg) (%cg-emit! '(switch-begin)) 'mock-sw) +;; Switch returns a swctx record so parse can read swctx-end-tag. +;; We import the real record def from cg.scm? No — cg.scm isn't +;; loaded under the trace mock. Recreate a minimal compatible record +;; here under the same name. +(define-record-type swctx + (%swctx ctrl-slot end-tag default-lbl) + swctx? + (ctrl-slot swctx-ctrl-slot) + (end-tag swctx-end-tag) + (default-lbl swctx-default-lbl swctx-default-lbl-set!)) + +(define (cg-switch-begin cg) + (%cg-emit! '(switch-begin)) + (%swctx 0 (%mock-loop-namer) #f)) (define (cg-switch-case cg sw v) (%cg-emit! (list 'switch-case v)) #t) (define (cg-switch-default cg sw) (%cg-emit! '(switch-default)) #t) (define (cg-switch-end cg sw) (%cg-emit! '(switch-end)) #t)