commit 66ab01573550a19083f93d3e6588272df7cadc7f
parent dd05e292cf67df0b1d60b89a27a4e3b0182fd508
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 26 Apr 2026 02:04:02 -0700
cc: resolve cg/parse contract drift; bump scheme1 readbuf to 256K
Three coordinated fixes against the contract gaps surfaced by the
parallel-agent integration:
1. cg-loop body-thunk receives the tag.
Previously cg-loop allocated and returned a tag, but parse needed
the tag *before* body-thunk ran (for break/continue calls inside
the body). Parse worked around this by pre-allocating its own tag
and ignoring cg's — producing %break/%continue calls that
referenced labels cg never emitted.
Fix: cg-loop now invokes (body-thunk tag), threading cg's actual
loop tag into body emission. Parse drops %parse-tag-namer and uses
the tag arg directly. Switch follows suit: parse reads
(swctx-end-tag sw) instead of allocating its own switch tag.
2. cg-assign auto-casts rhs to lhs's type.
Per CC-CONTRACTS §4.1, the parser was supposed to cg-cast rhs to
lhs's type before cg-assign — but parse only sees the vstack top
and can't peek beneath to find lhs's type. cg-assign already pops
both opnds, so it owns the cast naturally.
Fix: cg-assign now repushes rhs and calls cg-cast internally
before storing. CC-CONTRACTS §4.1 updated; CC-INTERNALS notes the
ownership shift.
3. scheme1 READBUF_CAP_BYTES bumped 64 KiB -> 256 KiB.
The integrated cc compiler source (prelude + util + data + lex +
pp + cg + parse + main) is ~155 KB. Tests for the parser had to
ship a 2 KB mini-prelude.scm to fit under the old 64 KB cap. With
the new cap, the full prelude can be used everywhere; mini-prelude
stays for now since parse tests already work and the integration
pass will switch back to the full prelude.
The 32 MiB ELF p_memsz reservation easily absorbs the +192 KB
readbuf bump.
Tests: scheme1's own suite (94 fixtures), cc-util (14), cc-lex (16),
cc-pp (22), cc-cg (15), cc-parse (15) all pass on aarch64 against the
rebuilt scheme1 binary. cg's 08-while-break-continue fixture and
expected output adjusted to match the new contract; parse's 05 and 13
expected traces re-tagged Lp0 -> L0.
CC-CONTRACTS adds §3.3 documenting the loop-tag protocol; older §3.3
(staging) and §3.4 (alloc-slot) renumbered to §3.4 / §3.5.
Diffstat:
10 files changed, 145 insertions(+), 75 deletions(-)
diff --git a/cc/cg.scm b/cc/cg.scm
@@ -482,10 +482,17 @@
(else (die #f "cg-unop: unknown op" op)))))
(define (cg-assign cg)
- (let* ((rhs (cg-pop cg))
- (lhs (cg-pop cg))
- (ty (opnd-type lhs)))
+ ;; Pops rhs, pops lhs, casts rhs to lhs's type (parser cannot peek
+ ;; deeper than vstack top to do this itself — CC-CONTRACTS §4.2),
+ ;; emits the store, pushes the assigned value as the result rval.
+ (let* ((rhs0 (cg-pop cg))
+ (lhs (cg-pop cg))
+ (ty (opnd-type lhs)))
(cond ((not (opnd-lval? lhs)) (die #f "cg-assign: lhs not lvalue")))
+ ;; Cast rhs to lhs's type (no-op when the types already match).
+ (cg-push cg rhs0)
+ (cg-cast cg ty)
+ (let ((rhs (cg-pop cg)))
(%cg-load-opnd-into cg rhs 'a0)
(cond
((eq? (opnd-kind lhs) 'frame)
@@ -499,7 +506,7 @@
(%cg-emit-la cg 't0 (opnd-ext lhs))
(%cg-emit-st cg 'a0 't0 0))
(else (die #f "cg-assign: unsupported lhs kind" (opnd-kind lhs))))
- (%cg-spill-reg cg 'a0 ty)))
+ (%cg-spill-reg cg 'a0 ty))))
;; --------------------------------------------------------------------
;; Calls
@@ -573,6 +580,9 @@
(%cg-emit-many cg (list "})\n"))))
(define (cg-loop cg head-thunk body-thunk)
+ ;; body-thunk receives the loop tag as its argument; parser uses
+ ;; that tag for cg-break / cg-continue inside the body. CC-CONTRACTS
+ ;; §1.9 / §3.3.
(let ((tag (%cg-fresh-loop-tag cg)))
(%cg-emit-many cg (list "%loop_tag(" tag ", {\n"))
(head-thunk)
@@ -582,7 +592,7 @@
(let ((c (cg-pop cg)))
(%cg-load-opnd-into cg c 't0)
(%cg-emit-many cg (list "%if_eqz(t0, { %break(" tag ") })\n")))))
- (body-thunk)
+ (body-thunk tag)
(%cg-emit-many cg (list "})\n"))
tag))
diff --git a/cc/parse.scm b/cc/parse.scm
@@ -512,35 +512,38 @@
(advance ps) (parse-stmt ps))
(else #t)))))
-(define %parse-tag-namer (make-namer "Lp"))
+;; cg-loop's body-thunk now receives the tag from cg (CC-CONTRACTS
+;; §3.3); the parser threads it into break/continue via loop-ctx.
(define (parse-while-stmt ps)
(expect-kw ps 'while)
(expect-punct ps 'lparen)
- (let ((tag (%parse-tag-namer)))
- (cg-loop (ps-cg ps)
- (lambda () (parse-expr ps) (rval! ps))
- (lambda ()
- (expect-punct ps 'rparen)
- (push-loop-ctx! ps 'while tag #t)
- (parse-stmt ps)
- (pop-loop-ctx! ps))) #t))
+ (cg-loop (ps-cg ps)
+ (lambda () (parse-expr ps) (rval! ps))
+ (lambda (tag)
+ (expect-punct ps 'rparen)
+ (push-loop-ctx! ps 'while tag #t)
+ (parse-stmt ps)
+ (pop-loop-ctx! ps))) #t)
(define (parse-do-stmt ps)
(expect-kw ps 'do)
- (let ((tag (%parse-tag-namer)))
- (push-loop-ctx! ps 'do tag #t)
- (cg-loop (ps-cg ps)
- (lambda () #t)
- (lambda ()
- (parse-stmt ps)
- (expect-kw ps 'while) (expect-punct ps 'lparen)
- (parse-expr ps) (rval! ps)
- (expect-punct ps 'rparen) (expect-punct ps 'semi)
- (cg-unop (ps-cg ps) 'lnot)
- (cg-if (ps-cg ps)
- (lambda () (cg-break (ps-cg ps) tag)))))
- (pop-loop-ctx! ps) #t))
+ ;; do-while needs its tag known *before* the body parses, so we
+ ;; capture it inside the body-thunk and stash it for pop-loop-ctx
+ ;; via a side cell.
+ (cg-loop (ps-cg ps)
+ (lambda () #t)
+ (lambda (tag)
+ (push-loop-ctx! ps 'do tag #t)
+ (parse-stmt ps)
+ (pop-loop-ctx! ps)
+ (expect-kw ps 'while) (expect-punct ps 'lparen)
+ (parse-expr ps) (rval! ps)
+ (expect-punct ps 'rparen) (expect-punct ps 'semi)
+ (cg-unop (ps-cg ps) 'lnot)
+ (cg-if (ps-cg ps)
+ (lambda () (cg-break (ps-cg ps) tag)))))
+ #t)
(define (parse-for-stmt ps)
(expect-kw ps 'for) (expect-punct ps 'lparen)
@@ -550,27 +553,26 @@
((stmt-starts-decl? ps) (parse-local-decl ps))
(else (parse-expr ps) (cg-pop (ps-cg ps))
(expect-punct ps 'semi)))
- (let ((tag (%parse-tag-namer)))
- (cg-loop (ps-cg ps)
- (lambda ()
- (cond ((at-punct? ps 'semi)
- (cg-push-imm (ps-cg ps) %t-i32 1))
- (else (parse-expr ps) (rval! ps)))
- (expect-punct ps 'semi))
- (lambda ()
- (let ((stk (collect-til-rparen ps)))
- (expect-punct ps 'rparen)
- (push-loop-ctx! ps 'for tag #t)
- (parse-stmt ps)
- (pop-loop-ctx! ps)
- (cond
- ((null? stk) #t)
- (else
- (let ((sv (ps-toks ps)))
- (ps-toks-set! ps
- (append stk (list (make-tok 'EOF #f #f))))
- (parse-expr ps) (cg-pop (ps-cg ps))
- (ps-toks-set! ps sv))))))))
+ (cg-loop (ps-cg ps)
+ (lambda ()
+ (cond ((at-punct? ps 'semi)
+ (cg-push-imm (ps-cg ps) %t-i32 1))
+ (else (parse-expr ps) (rval! ps)))
+ (expect-punct ps 'semi))
+ (lambda (tag)
+ (let ((stk (collect-til-rparen ps)))
+ (expect-punct ps 'rparen)
+ (push-loop-ctx! ps 'for tag #t)
+ (parse-stmt ps)
+ (pop-loop-ctx! ps)
+ (cond
+ ((null? stk) #t)
+ (else
+ (let ((sv (ps-toks ps)))
+ (ps-toks-set! ps
+ (append stk (list (make-tok 'EOF #f #f))))
+ (parse-expr ps) (cg-pop (ps-cg ps))
+ (ps-toks-set! ps sv)))))))
(scope-leave! ps) #t)
(define (collect-til-rparen ps)
@@ -593,7 +595,11 @@
(expect-kw ps 'switch) (expect-punct ps 'lparen)
(parse-expr ps) (rval! ps)
(expect-punct ps 'rparen)
- (let ((sw (cg-switch-begin (ps-cg ps))) (tg (%parse-tag-namer)))
+ ;; Switch's break-target tag is the swctx's end-tag — cg owns it,
+ ;; and we read it back so cg-break inside the switch body emits a
+ ;; tag cg actually labels.
+ (let* ((sw (cg-switch-begin (ps-cg ps)))
+ (tg (swctx-end-tag sw)))
(push-loop-ctx-sw! ps 'switch tg sw)
(parse-stmt ps)
(pop-loop-ctx! ps)
diff --git a/docs/CC-CONTRACTS.md b/docs/CC-CONTRACTS.md
@@ -246,13 +246,18 @@ between:
...else-trace...
(ifelse-end)
-(loop-begin <tag>)
- ...body trace...
-(loop-end <tag>)
+(loop-begin)
+ ...head trace...
+ ...body trace, with (break <tag>) / (continue <tag>) using the
+ tag cg passed to body-thunk...
+(loop-end)
```
This is the canonical surface — `cg-if` *internally* uses a thunk,
but the trace exposes begin/mid/end markers so tests can read top-down.
+The loop tag is allocated by cg (CC-CONTRACTS §3.3) and identifies
+which loop a break/continue refers to; it does not appear in the
+loop-begin / loop-end markers themselves.
### 2.3 Diagnostic format
@@ -325,7 +330,36 @@ cg:
The frame size is rounded up to 16 to satisfy the P1 stack-align
contract.
-### 3.3 Outgoing-arg staging
+### 3.3 Loop tag protocol
+
+```scheme
+(cg-loop cg head-thunk body-thunk) -> tag
+```
+
+`cg-loop` allocates a fresh per-function tag (`L0`, `L1`, …), emits
+the libp1pp `%loop_tag(<tag>, { … })` wrapper, runs `head-thunk`
+inside the loop head (it is expected to leave the condition opnd on
+the vstack), pops the condition and emits an `%if_eqz(t0, %break(tag))`,
+then invokes `body-thunk` **with the tag as its single argument**:
+
+```scheme
+(cg-loop cg
+ (lambda () (cg-push-imm cg %t-i32 1))
+ (lambda (tag)
+ (cg-continue cg tag)
+ (cg-break cg tag)))
+```
+
+The parser uses the same tag for any `cg-break` / `cg-continue` calls
+made during body emission. `cg-loop` returns the tag to its caller as
+well, so post-loop teardown code may reference it; `cg-loop-end` is a
+no-op kept for symmetry.
+
+Switch dispatch follows the same pattern: `cg-switch-begin` returns a
+`swctx` whose `swctx-end-tag` accessor exposes cg's break-target tag
+to the parser.
+
+### 3.4 Outgoing-arg staging
When `cg-call` is asked to emit a call with arity > 4, it stages
args 4..(N-1) into the *low-addressed* prefix of the current frame
@@ -336,7 +370,7 @@ reserves that prefix at fn-end before any other slots — i.e.,
The accounting is internal to cg. Parse never sees staging slots.
-### 3.4 cg-alloc-slot contract
+### 3.5 cg-alloc-slot contract
```scheme
(cg-alloc-slot cg bytes align) -> offset
@@ -368,8 +402,8 @@ The parser **must** call cg in this order around each operation:
| `&x` | parse x → must be lval; then `cg-take-addr` |
| `(T)e` | parse e → if lval, `cg-load` (unless casting to a pointer); then `cg-cast T` |
| `f(a, b, ...)` | parse f → if lval and `f` not a function-typed identifier, `cg-load`; parse each arg → `cg-load` if lval, then `cg-cast` to param type (or default-promote for variadic args); then `cg-call` |
-| `lhs = rhs` | parse lhs → must be lval (no load); parse rhs → `cg-load` if lval; `cg-cast` to lhs type; `cg-assign` |
-| `lhs += rhs` | parse lhs (lval) → duplicate via `cg-take-addr` then `cg-push-deref`; parse rhs; `cg-arith-conv`; `cg-binop add`; `cg-cast` to lhs type; `cg-assign` |
+| `lhs = rhs` | parse lhs → must be lval (no load); parse rhs → `cg-load` if lval; `cg-assign` (cg internally casts rhs to lhs type — parse cannot peek beneath vstack top) |
+| `lhs += rhs` | parse lhs (lval) → duplicate via `cg-take-addr` then `cg-push-deref`; parse rhs; `cg-arith-conv`; `cg-binop add`; `cg-assign` (cg casts internally) |
| `return e` | parse e → `cg-load` if lval; `cg-cast` to fn return type; `cg-return` |
| `if (e) ...` | parse e → `cg-load` if lval; `cg-cast bool` if not already int-shaped; `cg-if` |
diff --git a/docs/CC-INTERNALS.md b/docs/CC-INTERNALS.md
@@ -482,7 +482,7 @@ live outside.
```scheme
(cg-binop cg op) -> opnd ; pop b, pop a, push (a op b)
(cg-unop cg op) -> opnd ; pop a, push (op a)
-(cg-assign cg) -> opnd ; pop rval, pop lval, store, push rval (assignment yields the value)
+(cg-assign cg) -> opnd ; pop rval, pop lval, cast rval to lval-type, store, push assigned value (cg-assign owns the cast since parse cannot peek beneath vstack top)
```
`op` for binop is a symbol from:
@@ -509,8 +509,8 @@ These take thunks so the parser can recursively emit the body:
```scheme
(cg-if cg then-thunk) ; pop cond; emit %if_nez { (then-thunk) }
(cg-ifelse cg then-thunk else-thunk) ; pop cond; emit %ifelse_nez { ... }{ ... }
-(cg-loop cg head-thunk body-thunk) -> tag ; head-thunk emits the cond test; tag returned to parser
-(cg-loop-end cg tag) ; closes a %loop_tag
+(cg-loop cg head-thunk body-thunk) -> tag ; head-thunk emits the cond; body-thunk is invoked as (body-thunk tag) so parse can use the same tag for cg-break / cg-continue inside the body; tag is also returned to the caller
+(cg-loop-end cg tag) ; no-op; reserved for future per-loop teardown
(cg-break cg tag)
(cg-continue cg tag)
```
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -55,8 +55,9 @@
%struct TD { hdr name nfields } # .SIZE = 24
# Records are variable width: header + td slot + N field slots.
-# BSS arena offsets from :ELF_end. readbuf is 64 KiB, then the heap
-# (HEAP_CAP_BYTES, currently 16 MiB), then symtab. The three are packed
+# BSS arena offsets from :ELF_end. readbuf is 256 KiB (sized to fit
+# the catm'd cc compiler source incl. prelude — see READBUF_CAP_BYTES),
+# then the heap (HEAP_CAP_BYTES, currently 16 MiB), then symtab. The three are packed
# back-to-back; everything lives within the ELF p_memsz reservation
# (currently 32 MiB) declared in vendor/seed/<arch>/ELF.hex2. p1_main's
# startup loop materializes &ELF_end + OFF_X into the matching pointer
@@ -67,7 +68,7 @@
%endm
%macro READBUF_CAP_BYTES()
-65536
+262144
%endm
%macro HEAP_CAP_BYTES()
diff --git a/tests/cc-cg/08-while-break-continue.expected b/tests/cc-cg/08-while-break-continue.expected
@@ -5,9 +5,9 @@
%loop_tag(L0, {
%li(t0, 1)
%if_eqz(t0, { %break(L0) })
-})
%continue(L0)
%break(L0)
+})
%li(a0, 0)
%st(a0, sp, (+ %main__SO 0))
%b(&::ret)
diff --git a/tests/cc-cg/08-while-break-continue.scm b/tests/cc-cg/08-while-break-continue.scm
@@ -4,15 +4,16 @@
(let ((cg (cg-init)))
(cg-fn-begin cg "main" '() %t-i32)
+ ;; cg-loop's body-thunk receives the tag (CC-CONTRACTS §3.3); we use
+ ;; it to issue one continue and one break.
(let ((tag (cg-loop
cg
;; head: condition is a literal 1 (infinite-ish loop).
(lambda () (cg-push-imm cg %t-i32 1))
- ;; body: continue and break referenced by tag below.
- (lambda () 0))))
- ;; Demonstrate the helpers — emit one of each then loop-end.
- (cg-continue cg tag)
- (cg-break cg tag)
+ ;; body: emit one continue + one break inline.
+ (lambda (tag)
+ (cg-continue cg tag)
+ (cg-break cg tag)))))
(cg-loop-end cg tag))
(cg-push-imm cg %t-i32 0)
(cg-return cg)
diff --git a/tests/cc-parse/05-while-break.expected-trace b/tests/cc-parse/05-while-break.expected-trace
@@ -2,7 +2,7 @@
(loop-begin)
(push-sym ("x" param))
(load)
-(break "Lp0")
+(break "L0")
(loop-end)
(push-sym ("x" param))
(load)
diff --git a/tests/cc-parse/13-while-continue.expected-trace b/tests/cc-parse/13-while-continue.expected-trace
@@ -2,7 +2,7 @@
(loop-begin)
(push-sym ("x" param))
(load)
-(continue "Lp0")
+(continue "L0")
(loop-end)
(push-imm i32 0)
(cast i32)
diff --git a/tests/cc-parse/cg-trace.scm b/tests/cc-parse/cg-trace.scm
@@ -117,16 +117,34 @@
(%cg-emit! '(ifelse-mid)) (eh)
(%cg-emit! '(ifelse-end)) #t)
+;; Mock-cg's loop tag counter mirrors real cg's `%cg-fresh-loop-tag` —
+;; a fresh `Lk` per cg-loop / cg-switch-begin call. body receives the
+;; tag (CC-CONTRACTS §3.3).
+(define %mock-loop-namer (make-namer "L"))
(define (cg-loop cg head body)
- (%cg-emit! '(loop-begin))
- (head) (body)
- (%cg-emit! '(loop-end))
- "L0")
+ (let ((tag (%mock-loop-namer)))
+ (%cg-emit! '(loop-begin))
+ (head) (body tag)
+ (%cg-emit! '(loop-end))
+ tag))
(define (cg-loop-end cg tag) (%cg-emit! (list 'loop-close tag)) #t)
(define (cg-break cg tag) (%cg-emit! (list 'break tag)) #t)
(define (cg-continue cg tag) (%cg-emit! (list 'continue tag)) #t)
-(define (cg-switch-begin cg) (%cg-emit! '(switch-begin)) 'mock-sw)
+;; Switch returns a swctx record so parse can read swctx-end-tag.
+;; We import the real record def from cg.scm? No — cg.scm isn't
+;; loaded under the trace mock. Recreate a minimal compatible record
+;; here under the same name.
+(define-record-type swctx
+ (%swctx ctrl-slot end-tag default-lbl)
+ swctx?
+ (ctrl-slot swctx-ctrl-slot)
+ (end-tag swctx-end-tag)
+ (default-lbl swctx-default-lbl swctx-default-lbl-set!))
+
+(define (cg-switch-begin cg)
+ (%cg-emit! '(switch-begin))
+ (%swctx 0 (%mock-loop-namer) #f))
(define (cg-switch-case cg sw v) (%cg-emit! (list 'switch-case v)) #t)
(define (cg-switch-default cg sw) (%cg-emit! '(switch-default)) #t)
(define (cg-switch-end cg sw) (%cg-emit! '(switch-end)) #t)