boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 5ab5c9c3192e6aa171a5da7916f73c5b0af923ef
parent c0af21b7d2e5511016c8989f404945b0d697b8ba
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 26 Apr 2026 23:48:22 -0700

Merge Agent 1: §B + §C + §H + §K.1 (lvalue/sizeof/conditionals/comma)

Diffstat:
Mcc/cg.scm | 86++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Mcc/parse.scm | 55++++++++++++++++++++++++++++++++-----------------------
Mdocs/CC-CONTRACTS.md | 9++++++++-
Mdocs/CC-PUNCHLIST.md | 148++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Atests/cc-cg/21-preinc.expected-exit | 1+
Atests/cc-cg/21-preinc.scm | 29+++++++++++++++++++++++++++++
Atests/cc-cg/22-postinc.expected-exit | 1+
Atests/cc-cg/22-postinc.scm | 31+++++++++++++++++++++++++++++++
Atests/cc-cg/23-cmpd-simple.expected-exit | 1+
Atests/cc-cg/23-cmpd-simple.scm | 24++++++++++++++++++++++++
Atests/cc-cg/24-cmpd-ptr.expected-exit | 1+
Atests/cc-cg/24-cmpd-ptr.scm | 35+++++++++++++++++++++++++++++++++++
Atests/cc-cg/25-deref-postinc.expected-exit | 1+
Atests/cc-cg/25-deref-postinc.scm | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/cc-cg/28-ternary.expected-exit | 1+
Atests/cc-cg/28-ternary.scm | 30++++++++++++++++++++++++++++++
Atests/cc-cg/29-land.expected-exit | 1+
Atests/cc-cg/29-land.scm | 34++++++++++++++++++++++++++++++++++
Atests/cc-cg/30-lor.expected-exit | 1+
Atests/cc-cg/30-lor.scm | 30++++++++++++++++++++++++++++++
Atests/cc-parse/21-preinc.c | 6++++++
Atests/cc-parse/21-preinc.expected-exit | 1+
Atests/cc-parse/22-postinc.c | 6++++++
Atests/cc-parse/22-postinc.expected-exit | 1+
Atests/cc-parse/23-cmpd-simple.c | 16++++++++++++++++
Atests/cc-parse/23-cmpd-simple.expected-exit | 1+
Atests/cc-parse/24-cmpd-ptr.c | 7+++++++
Atests/cc-parse/24-cmpd-ptr.expected-exit | 1+
Atests/cc-parse/25-deref-postinc.c | 17+++++++++++++++++
Atests/cc-parse/25-deref-postinc.expected-exit | 1+
Atests/cc-parse/26-sizeof-expr.c | 5+++++
Atests/cc-parse/26-sizeof-expr.expected-exit | 1+
Atests/cc-parse/27-sizeof-types.c | 14++++++++++++++
Atests/cc-parse/27-sizeof-types.expected-exit | 1+
Atests/cc-parse/28-ternary.c | 6++++++
Atests/cc-parse/28-ternary.expected-exit | 1+
Atests/cc-parse/29-land.c | 6++++++
Atests/cc-parse/29-land.expected-exit | 1+
Atests/cc-parse/30-lor.c | 6++++++
Atests/cc-parse/30-lor.expected-exit | 1+
Atests/cc-parse/31-comma.c | 7+++++++
Atests/cc-parse/31-comma.expected-exit | 1+
42 files changed, 593 insertions(+), 103 deletions(-)

diff --git a/cc/cg.scm b/cc/cg.scm @@ -337,6 +337,16 @@ (define (cg-depth cg) (length (cg-vstack cg))) +;; Duplicate the top vstack entry. For lvals this is safe — the slot +;; (or label, or indirect-marked frame) backing the lval keeps existing +;; until the function ends. For rvals it duplicates the descriptor of +;; the spilled value; both copies refer to the same already-emitted +;; storage. CC-CONTRACTS §4.1: used for `lhs += rhs` and `++lhs` to +;; preserve the lhs across a `cg-load` so the subsequent `cg-assign` +;; still has its address. +(define (cg-dup cg) + (let ((p (cg-top cg))) (cg-push cg p) p)) + ;; -------------------------------------------------------------------- ;; Materialize ;; -------------------------------------------------------------------- @@ -494,20 +504,29 @@ (else (cg-push cg p))))) (define (cg-arith-conv cg) + ;; Usual arithmetic conversions. CC-CONTRACTS §4.2: applies to + ;; arithmetic operands. When either operand is a pointer (or array, + ;; which behaves as a pointer in arithmetic), the pair is a + ;; pointer-arith case — leave the types alone so cg-binop can detect + ;; the ptr operand and apply the right scaling. (let* ((b (cg-pop cg)) (a (cg-pop cg)) (ta (opnd-type a)) (tb (opnd-type b)) (sa (%ctype-size ta)) - (sb (%ctype-size tb)) - (common (cond - ((> sa sb) ta) - ((> sb sa) tb) - ((%ctype-unsigned? ta) ta) - ((%ctype-unsigned? tb) tb) - (else ta)))) - (cg-push cg (%opnd (opnd-kind a) common (opnd-ext a) (opnd-lval? a))) - (cg-push cg (%opnd (opnd-kind b) common (opnd-ext b) (opnd-lval? b))))) + (sb (%ctype-size tb))) + (cond + ((or (%ctype-ptr? ta) (%ctype-ptr? tb)) + (cg-push cg a) (cg-push cg b)) + (else + (let ((common (cond + ((> sa sb) ta) + ((> sb sa) tb) + ((%ctype-unsigned? ta) ta) + ((%ctype-unsigned? tb) tb) + (else ta)))) + (cg-push cg (%opnd (opnd-kind a) common (opnd-ext a) (opnd-lval? a))) + (cg-push cg (%opnd (opnd-kind b) common (opnd-ext b) (opnd-lval? b)))))))) ;; -------------------------------------------------------------------- ;; Operators @@ -596,6 +615,26 @@ (else (die #f "cg-binop: unknown op" op))) (%cg-spill-reg cg 't0 result-ty))))) +;; Post-increment / post-decrement on the top-of-vstack lval. +;; Pushes the OLD value (per C semantics) and emits the +1 / -1 store. +;; Uses cg-dup + cg-load to capture the old rval (which is then in a +;; never-reused spill slot), then runs the regular dup+load+add+assign +;; pattern for the store. Pointer scaling falls out of cg-binop add. +(define (%cg-post-inc-dec cg op) + (cg-dup cg) + (cg-load cg) + (let ((old (cg-pop cg))) + (cg-dup cg) + (cg-load cg) + (cg-push-imm cg %t-i32 1) + (cg-binop cg op) + (cg-assign cg) + (cg-pop cg) + (cg-push cg old))) + +(define (cg-postinc cg) (%cg-post-inc-dec cg 'add)) +(define (cg-postdec cg) (%cg-post-inc-dec cg 'sub)) + (define (cg-unop cg op) (let* ((p (cg-pop cg)) (ty (opnd-type p))) (%cg-load-opnd-into cg p 't0) @@ -709,6 +748,35 @@ (else-thunk) (%cg-emit-many cg (list "})\n")))) +;; Conditionals-as-values: `cg-ifelse` is correct for if-statements +;; (thunks push nothing) but each thunk for ternary / `&&` / `||` ends +;; with one rval on top of the vstack — and after both branches run, +;; we'd be left with TWO opnds, which breaks the type contract for +;; the surrounding expression. `cg-ifelse-merge` solves that: pop the +;; cond, allocate one result slot, and after each thunk runs, pop its +;; rval and store into the slot. Push the slot as one frame rval. +;; Both branches must push exactly one opnd; the result type is the +;; type of the first thunk's pushed opnd (parser must arrange for +;; both branches to push compatible types — either by passing +;; pre-coerced operands or by injecting a `cg-cast` inside the thunk). +(define (cg-ifelse-merge cg then-thunk else-thunk) + (let* ((cond-op (cg-pop cg)) + (slot (cg-alloc-slot cg 8 8))) + (%cg-load-opnd-into cg cond-op 't0) + (%cg-emit-many cg (list "%ifelse_nez(t0, {\n")) + (then-thunk) + (let* ((p (cg-pop cg)) + (rty (opnd-type p))) + (%cg-load-opnd-into cg p 'a0) + (%cg-emit-st-slot cg 'a0 slot) + (%cg-emit-many cg (list "}, {\n")) + (else-thunk) + (let ((q (cg-pop cg))) + (%cg-load-opnd-into cg q 'a0) + (%cg-emit-st-slot cg 'a0 slot)) + (%cg-emit-many cg (list "})\n")) + (cg-push cg (%opnd 'frame rty slot #f))))) + (define (cg-loop cg head-thunk body-thunk) ;; body-thunk receives the loop tag as its argument; parser uses ;; that tag for cg-break / cg-continue inside the body. CC-CONTRACTS diff --git a/cc/parse.scm b/cc/parse.scm @@ -683,6 +683,7 @@ (define %binop-bp (list + (cons 'comma (cons 1 2)) (cons 'assign (cons 4 3)) (cons 'plus-eq (cons 4 3)) (cons 'minus-eq (cons 4 3)) (cons 'star-eq (cons 4 3)) (cons 'slash-eq (cons 4 3)) (cons 'pct-eq (cons 4 3)) @@ -739,13 +740,17 @@ (let ((op (tok-value t)) (rb (cdr bp))) (advance ps) (cond + ((eq? op 'comma) + ;; lhs has been parsed; discard it and evaluate rhs. + ;; Result of the comma expr is the rhs's rval. + (cg-pop (ps-cg ps)) + (parse-expr-bp ps rb) (rval! ps)) ((eq? op 'assign) (parse-expr-bp ps rb) (rval! ps) (cg-assign (ps-cg ps))) ((compound-op op) (let ((b (compound-op op))) - (cg-take-addr (ps-cg ps)) - (cg-push-deref (ps-cg ps)) + (cg-dup (ps-cg ps)) (cg-load (ps-cg ps)) (parse-expr-bp ps rb) (rval! ps) (cg-arith-conv (ps-cg ps)) @@ -753,7 +758,7 @@ (cg-assign (ps-cg ps)))) ((eq? op 'qmark) (rval! ps) - (cg-ifelse (ps-cg ps) + (cg-ifelse-merge (ps-cg ps) (lambda () (parse-expr-bp ps 0) (rval! ps)) (lambda () @@ -761,18 +766,25 @@ (parse-expr-bp ps rb) (rval! ps)))) ((eq? op 'land) (rval! ps) - (cg-ifelse (ps-cg ps) + ;; Both branches must push i32 0/1. Right side is + ;; coerced via `cg-cast bool` so the merge slot + ;; carries i32 (per §H.2). + (cg-ifelse-merge (ps-cg ps) (lambda () - (parse-expr-bp ps rb) (rval! ps)) + (parse-expr-bp ps rb) (rval! ps) + (cg-cast (ps-cg ps) %t-bool) + (cg-cast (ps-cg ps) %t-i32)) (lambda () (cg-push-imm (ps-cg ps) %t-i32 0)))) ((eq? op 'lor) (rval! ps) - (cg-ifelse (ps-cg ps) + (cg-ifelse-merge (ps-cg ps) (lambda () (cg-push-imm (ps-cg ps) %t-i32 1)) (lambda () - (parse-expr-bp ps rb) (rval! ps)))) + (parse-expr-bp ps rb) (rval! ps) + (cg-cast (ps-cg ps) %t-bool) + (cg-cast (ps-cg ps) %t-i32)))) (else (rval! ps) (cg-promote (ps-cg ps)) (parse-expr-bp ps rb) (rval! ps) @@ -808,12 +820,12 @@ (advance ps) (parse-unary ps) (rval! ps) (cg-unop (ps-cg ps) 'lnot)) ((eq? v 'inc) (advance ps) (parse-unary ps) - (cg-take-addr (ps-cg ps)) (cg-push-deref (ps-cg ps)) + (cg-dup (ps-cg ps)) (cg-load (ps-cg ps)) (cg-push-imm (ps-cg ps) %t-i32 1) (cg-binop (ps-cg ps) 'add) (cg-assign (ps-cg ps))) ((eq? v 'dec) (advance ps) (parse-unary ps) - (cg-take-addr (ps-cg ps)) (cg-push-deref (ps-cg ps)) + (cg-dup (ps-cg ps)) (cg-load (ps-cg ps)) (cg-push-imm (ps-cg ps) %t-i32 1) (cg-binop (ps-cg ps) 'sub) (cg-assign (ps-cg ps))) @@ -834,10 +846,15 @@ (max (ctype-size ty) 0)))) (else (parse-expr ps) (expect-punct ps 'rparen) - (cg-pop (ps-cg ps)) - (cg-push-imm (ps-cg ps) %t-u64 8)))) - (else (parse-unary ps) (cg-pop (ps-cg ps)) - (cg-push-imm (ps-cg ps) %t-u64 8)))) + (let* ((tp (cg-top (ps-cg ps))) + (sz (max (ctype-size (opnd-type tp)) 0))) + (cg-pop (ps-cg ps)) + (cg-push-imm (ps-cg ps) %t-u64 sz))))) + (else (parse-unary ps) + (let* ((tp (cg-top (ps-cg ps))) + (sz (max (ctype-size (opnd-type tp)) 0))) + (cg-pop (ps-cg ps)) + (cg-push-imm (ps-cg ps) %t-u64 sz))))) (else (parse-postfix ps))))) (define (token-is-decl? ps) @@ -920,18 +937,10 @@ (cg-push-deref (ps-cg ps)) (lp)) ((eq? v 'inc) (advance ps) - (cg-take-addr (ps-cg ps)) (cg-push-deref (ps-cg ps)) - (cg-load (ps-cg ps)) - (cg-push-imm (ps-cg ps) %t-i32 1) - (cg-binop (ps-cg ps) 'add) - (cg-assign (ps-cg ps)) (lp)) + (cg-postinc (ps-cg ps)) (lp)) ((eq? v 'dec) (advance ps) - (cg-take-addr (ps-cg ps)) (cg-push-deref (ps-cg ps)) - (cg-load (ps-cg ps)) - (cg-push-imm (ps-cg ps) %t-i32 1) - (cg-binop (ps-cg ps) 'sub) - (cg-assign (ps-cg ps)) (lp)) + (cg-postdec (ps-cg ps)) (lp)) (else #t)))))))) (define (parse-call-args ps) diff --git a/docs/CC-CONTRACTS.md b/docs/CC-CONTRACTS.md @@ -383,9 +383,16 @@ The parser **must** call cg in this order around each operation: | `(T)e` | parse e → if lval, `cg-load` (unless casting to a pointer); then `cg-cast T` | | `f(a, b, ...)` | parse f → if lval and `f` not a function-typed identifier, `cg-load`; parse each arg → `cg-load` if lval, then `cg-cast` to param type (or default-promote for variadic args); then `cg-call` | | `lhs = rhs` | parse lhs → must be lval (no load); parse rhs → `cg-load` if lval; `cg-assign` (cg internally casts rhs to lhs type — parse cannot peek beneath vstack top) | -| `lhs += rhs` | parse lhs (lval) → duplicate via `cg-take-addr` then `cg-push-deref`; parse rhs; `cg-arith-conv`; `cg-binop add`; `cg-assign` (cg casts internally) | +| `lhs += rhs` (and other compound assigns) | parse lhs (lval) → `cg-dup` to preserve the lval across the read; `cg-load` (consumes one copy); parse rhs → `cg-load` if lval; `cg-arith-conv`; `cg-binop <op>`; `cg-assign` (cg casts internally) | +| `++lhs` / `--lhs` | parse lhs (lval) → `cg-dup`; `cg-load`; `cg-push-imm 1`; `cg-binop add`/`sub`; `cg-assign` | +| `lhs++` / `lhs--` | parse lhs (lval) → `cg-postinc` / `cg-postdec` (atomic primitive: dups+loads to capture old rval, then dup+load+`+1`+assign for the store, finally pushes the saved old rval) | | `return e` | parse e → `cg-load` if lval; `cg-cast` to fn return type; `cg-return` | | `if (e) ...` | parse e → `cg-load` if lval; `cg-cast bool` if not already int-shaped; `cg-if` | +| `c ? a : b` | parse c → `cg-load` if lval; `cg-ifelse-merge` with each thunk parsing one arm and ending with `rval!`; result type is the first arm's type | +| `a && b` | parse a → `cg-load` if lval; `cg-ifelse-merge` with then-arm = `parse b; rval!; cg-cast bool; cg-cast i32` and else-arm = `cg-push-imm i32 0` | +| `a \|\| b` | mirror of `&&`: then-arm = `cg-push-imm i32 1`; else-arm = parse b + bool/i32 cast | +| `a, b` | parse a (its rval is on top) → `cg-pop`; parse b → `cg-load` if lval; the comma's value is b | +| `sizeof e` | parse e (don't suppress emission); peek `(opnd-type (cg-top …))`'s `ctype-size`; `cg-pop`; `cg-push-imm u64 size` | The parser is responsible for the standard: diff --git a/docs/CC-PUNCHLIST.md b/docs/CC-PUNCHLIST.md @@ -91,56 +91,59 @@ upstream of nearly everything else. Land this first. ### B. Lvalue mechanics -`cg-take-addr` does not preserve the original lval, so any operation -that needs to *use* an lvalue twice (compound assign, inc/dec) is -broken. Pick one fix and document it in -[CC-CONTRACTS §4.1](CC-CONTRACTS.md#41-parsers-responsibilities) row -"`lhs += rhs`": - -- (a) `cg-take-addr` leaves `[orig-lval, ptr-rval]`; or -- (b) introduce `cg-dup` (duplicate top vstack entry). - -- [ ] **Pre-`++` / pre-`--`** - - cg: `cc-cg/NN-preinc.scm` — `int x = 5; ++x; return x;` → exit 6. - - parse: `cc-parse/NN-preinc.c` - - Needs: lhs preservation per above. - -- [ ] **Post-`++` / post-`--` returns old value** - - cg: `cc-cg/NN-postinc.scm` — `int x=5; int y=x++; return x*10+y;` +Picked **(b) `cg-dup`** — duplicate the top vstack entry, used for +compound assign and pre-inc/dec to keep the lhs lval available across +its own load. Post-inc/dec use a dedicated `cg-postinc` / `cg-postdec` +primitive to capture the old rval before the store. See +[CC-CONTRACTS §4.1](CC-CONTRACTS.md#41-parsers-responsibilities). + +- [x] **Pre-`++` / pre-`--`** + - cg: `cc-cg/21-preinc.scm` — `int x = 5; ++x; return x;` → exit 6. + - parse: `cc-parse/21-preinc.c` + - Done: parser dups lhs lval, loads, +1, assigns; pops result. + +- [x] **Post-`++` / post-`--` returns old value** + - cg: `cc-cg/22-postinc.scm` — `int x=5; int y=x++; return x*10+y;` → exit 65. - - parse: `cc-parse/NN-postinc.c` - - Needs: `cg-postinc` / `cg-postdec`, or parser uses `cg-dup` to - keep the old rval before the store. - -- [ ] **Compound assignment on simple lval (`+= -= *= /= %= <<= >>= &= ^= |=`)** - - cg: `cc-cg/NN-cmpd-simple.scm` — `int x=7; x+=3; return x;` → exit 10. - - parse: `cc-parse/NN-cmpd-simple.c` — one fixture per op family is - fine; the cg primitives are shared. - - Needs: same lhs preservation; existing parser sequence (take-addr, - push-deref, load, rhs, arith-conv, binop, assign) works once - preservation is in. - -- [ ] **Compound assignment through pointer** - - cg: `cc-cg/NN-cmpd-ptr.scm` — `int x=7; int *p=&x; *p+=3; return x;` - - parse: `cc-parse/NN-cmpd-ptr.c` - - Needs: validates the indirect-slot path in `cg-assign`. - -- [ ] **`*p++` walking an array** - - cg: `cc-cg/NN-deref-postinc.scm` — sums a 3-element array. - - parse: `cc-parse/NN-deref-postinc.c` - - Needs: composes B above with pointer arithmetic scaling. + - parse: `cc-parse/22-postinc.c` + - Done: `cg-postinc` / `cg-postdec` primitives composed of two + dup+load passes — one to capture the old rval (which lives in a + never-reused spill slot), one to compute the +1/-1 store. + +- [x] **Compound assignment on simple lval (`+= -= *= /= %= <<= >>= &= ^= |=`)** + - cg: `cc-cg/23-cmpd-simple.scm` — `int x=7; x+=3; return x;` → exit 10. + - parse: `cc-parse/23-cmpd-simple.c` — one of every op family. + - Done: parser uses `cg-dup` + `cg-load` + rhs + arith-conv + binop + + assign; the `cg-take-addr`/`cg-push-deref` indirection is gone. + +- [x] **Compound assignment through pointer** + - cg: `cc-cg/24-cmpd-ptr.scm` — `int x=7; int *p=&x; *p+=3; return x;` + - parse: `cc-parse/24-cmpd-ptr.c` + - Done: same parser sequence; `cg-push-deref`'s indirect-slot lval + composes correctly with `cg-dup` + `cg-assign`. + +- [x] **`*p++` walking an array** + - cg: `cc-cg/25-deref-postinc.scm` — walks a 3-element span via *p++. + - parse: `cc-parse/25-deref-postinc.c` + - Done: composes B.2 (post-inc on a ptr lval; pointer scaling falls + out of `cg-binop add`'s ptr branch) with `*p` deref. Also fixed + `cg-arith-conv` to skip the relabel when one operand is ptr/arr + so `cg-binop` still sees a ptr/int pair (previously it saw both + sides relabelled to ptr and skipped the scaling). ### C. `sizeof` -- [ ] **`sizeof e` returns the type's actual size** - - parse: `cc-parse/NN-sizeof-expr.c` — `int x; return sizeof x;` → 4. - - Needs: parser peeks `(opnd-type (cg-top …))`, computes size, pops, - pushes `imm u64 size`. Today returns 8 always - (`parse.scm` line ~836). +- [x] **`sizeof e` returns the type's actual size** + - parse: `cc-parse/26-sizeof-expr.c` — `int x; return sizeof x;` → 4. + - Done: parser peeks `(opnd-type (cg-top …))`, takes its + `ctype-size`, pops, pushes `imm u64 size`. Both forms + (`sizeof e` and `sizeof(e)`) updated. -- [ ] **`sizeof` over struct, array, pointer, char** - - parse: `cc-parse/NN-sizeof-types.c` — sum of representative sizes - against a known integer. +- [x] **`sizeof` over struct, array, pointer, char** + - parse: `cc-parse/27-sizeof-types.c` — sum of `char`, `short`, + `int`, `long`, `int*`, `int[5]`, `struct S{int a; int b;}` → 51. + - Done: the type form already returned `ctype-size ty`; this + fixture just locks the answer in. ### D. Aggregates @@ -273,26 +276,31 @@ accepts an init bv but is never given one. ### H. Conditionals as values -`cg-ifelse` is correct for `if`-statements (thunks push nothing) but -leaks two opnds when both thunks push (ternary, `&&`, `||`). The fix -is a result-merging primitive: caller pre-allocates the result slot, -both branches store into it, vstack ends with one frame opnd. - -- [ ] **Ternary `?:` leaves exactly one rval** - - cg: `cc-cg/NN-ternary.scm` — `int x = c ? 1 : 2; return x;` → exit 1. - - parse: `cc-parse/NN-ternary.c` - - Needs: result-merging primitive (`cg-ifelse-merge` or similar); - parser passes the result type, cg allocates the slot. - -- [ ] **`&&` short-circuit leaves exactly one i32 rval** - - cg: `cc-cg/NN-land.scm` - - parse: `cc-parse/NN-land.c` - - Needs: same merging primitive; result type is `%t-i32` - irrespective of operands. - -- [ ] **`||` short-circuit leaves exactly one i32 rval** - - cg: `cc-cg/NN-lor.scm` - - parse: `cc-parse/NN-lor.c` +Added `cg-ifelse-merge`: caller pre-allocates the result slot, each +thunk pushes one rval that is then loaded and stored into the slot, +and the slot's frame rval is left on the vstack. The merged result +type is taken from the first thunk's pushed type — parser is +responsible for arranging compatible types in the two branches. + +- [x] **Ternary `?:` leaves exactly one rval** + - cg: `cc-cg/28-ternary.scm` — `c ? 7 : 9` with c=1 → exit 7. + - parse: `cc-parse/28-ternary.c` + - Done: parser swaps `cg-ifelse` for `cg-ifelse-merge` in the + `qmark` arm of `parse-binary-rhs`; both branches push their + parsed rval directly. + +- [x] **`&&` short-circuit leaves exactly one i32 rval** + - cg: `cc-cg/29-land.scm` + - parse: `cc-parse/29-land.c` + - Done: parser injects `cg-cast %t-bool` then `cg-cast %t-i32` on + the rhs side so the merged result is i32 ∈ {0,1}; the else-arm + pushes `%t-i32 0`. + +- [x] **`||` short-circuit leaves exactly one i32 rval** + - cg: `cc-cg/30-lor.scm` + - parse: `cc-parse/30-lor.c` + - Done: mirrors §H.2 with the bool-cast on the else-arm and a + constant `%t-i32 1` in the then-arm. ### I. Storage classes @@ -323,12 +331,12 @@ both branches store into it, vstack ends with one frame opnd. ### K. Expressions and conversions -- [ ] **Comma operator (`a, b` as expression)** - - parse: `cc-parse/NN-comma.c` — `int a; int b; (a=1, b=2); return a + b*10;` +- [x] **Comma operator (`a, b` as expression)** + - parse: `cc-parse/31-comma.c` — `int a; int b; (a=1, b=2); return a + b*10;` → exit 21. - - Needs: add `comma` to `%binop-bp` at lowest precedence, left-assoc. - Handler discards lhs (`cg-pop`) before evaluating rhs. tcc.c uses - this in `for` headers. + - Done: added `(comma . (1 . 2))` to `%binop-bp` (left-assoc, below + `assign`'s 4/3 so `parse-call-args ps 4` still won't slurp it as a + call separator); handler `cg-pop`s the lhs and evaluates the rhs. - [ ] **Function-pointer call** - cg: `cc-cg/NN-fnptr-call.scm` — push a fn-typed sym, spill to a diff --git a/tests/cc-cg/21-preinc.expected-exit b/tests/cc-cg/21-preinc.expected-exit @@ -0,0 +1 @@ +6 diff --git a/tests/cc-cg/21-preinc.scm b/tests/cc-cg/21-preinc.scm @@ -0,0 +1,29 @@ +;; tests/cc-cg/21-preinc.scm — pre-increment on a simple lval (§B.1). +;; +;; Models: int x = 5; ++x; return x; → exit 6. +;; +;; The "++x" sequence requires the lhs lval to be preserved across +;; the load (so we can store back). cg-dup duplicates the top vstack +;; entry, giving us two lvals: one we load, one we keep for assign. + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-x (cg-alloc-slot cg 4 4)) + (sym-x (%sym "x" 'var 'auto %t-i32 off-x))) + ;; x = 5 + (cg-push-sym cg sym-x) + (cg-push-imm cg %t-i32 5) + (cg-assign cg) (cg-pop cg) + ;; ++x: push lval; dup; load; push 1; add; assign; pop result + (cg-push-sym cg sym-x) + (cg-dup cg) + (cg-load cg) + (cg-push-imm cg %t-i32 1) + (cg-binop cg 'add) + (cg-assign cg) (cg-pop cg) + ;; return x + (cg-push-sym cg sym-x) + (cg-load cg) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-cg/22-postinc.expected-exit b/tests/cc-cg/22-postinc.expected-exit @@ -0,0 +1 @@ +65 diff --git a/tests/cc-cg/22-postinc.scm b/tests/cc-cg/22-postinc.scm @@ -0,0 +1,31 @@ +;; tests/cc-cg/22-postinc.scm — post-increment returns OLD value (§B.2). +;; +;; Models: int x = 5; int y = x++; return x*10 + y; → exit 65. +;; (x is 6 after increment, y captures the pre-increment 5.) +;; +;; cg-postinc operates atomically on a lval at the top of the vstack: +;; loads the old value, emits the +1 store, and pushes the OLD value. + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-x (cg-alloc-slot cg 4 4)) + (off-y (cg-alloc-slot cg 4 4)) + (sym-x (%sym "x" 'var 'auto %t-i32 off-x)) + (sym-y (%sym "y" 'var 'auto %t-i32 off-y))) + ;; x = 5 + (cg-push-sym cg sym-x) + (cg-push-imm cg %t-i32 5) + (cg-assign cg) (cg-pop cg) + ;; y = x++ + (cg-push-sym cg sym-y) + (cg-push-sym cg sym-x) + (cg-postinc cg) + (cg-assign cg) (cg-pop cg) + ;; return x*10 + y + (cg-push-sym cg sym-x) (cg-load cg) + (cg-push-imm cg %t-i32 10) (cg-binop cg 'mul) + (cg-push-sym cg sym-y) (cg-load cg) + (cg-binop cg 'add) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-cg/23-cmpd-simple.expected-exit b/tests/cc-cg/23-cmpd-simple.expected-exit @@ -0,0 +1 @@ +10 diff --git a/tests/cc-cg/23-cmpd-simple.scm b/tests/cc-cg/23-cmpd-simple.scm @@ -0,0 +1,24 @@ +;; tests/cc-cg/23-cmpd-simple.scm — compound assignment on simple lval (§B.3). +;; +;; Models: int x = 7; x += 3; return x; → exit 10. + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-x (cg-alloc-slot cg 4 4)) + (sym-x (%sym "x" 'var 'auto %t-i32 off-x))) + ;; x = 7 + (cg-push-sym cg sym-x) + (cg-push-imm cg %t-i32 7) + (cg-assign cg) (cg-pop cg) + ;; x += 3: push lval; dup; load; push 3; add; assign; pop + (cg-push-sym cg sym-x) + (cg-dup cg) + (cg-load cg) + (cg-push-imm cg %t-i32 3) + (cg-binop cg 'add) + (cg-assign cg) (cg-pop cg) + ;; return x + (cg-push-sym cg sym-x) (cg-load cg) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-cg/24-cmpd-ptr.expected-exit b/tests/cc-cg/24-cmpd-ptr.expected-exit @@ -0,0 +1 @@ +10 diff --git a/tests/cc-cg/24-cmpd-ptr.scm b/tests/cc-cg/24-cmpd-ptr.scm @@ -0,0 +1,35 @@ +;; tests/cc-cg/24-cmpd-ptr.scm — compound assignment through pointer (§B.4). +;; +;; Models: int x = 7; int *p = &x; *p += 3; return x; → exit 10. + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-x (cg-alloc-slot cg 4 4)) + (sym-x (%sym "x" 'var 'auto %t-i32 off-x)) + (off-p (cg-alloc-slot cg 8 8)) + (ptr-i32 (%ctype 'ptr 8 8 %t-i32)) + (sym-p (%sym "p" 'var 'auto ptr-i32 off-p))) + ;; x = 7 + (cg-push-sym cg sym-x) + (cg-push-imm cg %t-i32 7) + (cg-assign cg) (cg-pop cg) + ;; p = &x + (cg-push-sym cg sym-p) + (cg-push-sym cg sym-x) + (cg-take-addr cg) + (cg-assign cg) (cg-pop cg) + ;; *p += 3: push p; load (rval ptr); push-deref (lval int); + ;; dup; load (rval int); push 3; add; assign; pop + (cg-push-sym cg sym-p) + (cg-load cg) + (cg-push-deref cg) + (cg-dup cg) + (cg-load cg) + (cg-push-imm cg %t-i32 3) + (cg-binop cg 'add) + (cg-assign cg) (cg-pop cg) + ;; return x + (cg-push-sym cg sym-x) (cg-load cg) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-cg/25-deref-postinc.expected-exit b/tests/cc-cg/25-deref-postinc.expected-exit @@ -0,0 +1 @@ +7 diff --git a/tests/cc-cg/25-deref-postinc.scm b/tests/cc-cg/25-deref-postinc.scm @@ -0,0 +1,70 @@ +;; tests/cc-cg/25-deref-postinc.scm — *p++ walking an array (§B.5). +;; +;; Models: +;; int a[3]; a[0]=1; a[1]=2; a[2]=4; +;; int *p = &a[0]; +;; int s = 0; +;; s += *p++; // 1, p -> a[1] +;; s += *p++; // 2, p -> a[2] +;; s += *p++; // 4, p -> a[3] +;; return s; // 7 +;; +;; Exercises post-inc on a pointer (must scale by sizeof(int)) plus +;; pointer-deref + pointer-arith composition. + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((arr-i32 (%ctype 'arr 12 4 (cons %t-i32 3))) + (off-a (cg-alloc-slot cg 12 4)) + (sym-a (%sym "a" 'var 'auto arr-i32 off-a)) + (off-p (cg-alloc-slot cg 8 8)) + (ptr-i32 (%ctype 'ptr 8 8 %t-i32)) + (sym-p (%sym "p" 'var 'auto ptr-i32 off-p)) + (off-s (cg-alloc-slot cg 4 4)) + (sym-s (%sym "s" 'var 'auto %t-i32 off-s))) + ;; a[i] = vals[i] — use &a (cast to ptr-i32) + i + push-deref + (let store-elem ((i 0) (vals '(1 2 4))) + (cond + ((null? vals) #t) + (else + (cg-push-sym cg sym-a) ; lval arr + (cg-take-addr cg) ; rval ptr-to-arr + (cg-cast cg ptr-i32) ; rval ptr-to-int + (cg-push-imm cg %t-i32 i) + (cg-binop cg 'add) ; ptr + i (scaled by 4) + (cg-push-deref cg) ; lval int + (cg-push-imm cg %t-i32 (car vals)) + (cg-assign cg) (cg-pop cg) + (store-elem (+ i 1) (cdr vals))))) + ;; p = &a[0] ; &a (arr) take-addr → ptr-to-arr; cast to ptr-int + (cg-push-sym cg sym-p) + (cg-push-sym cg sym-a) + (cg-take-addr cg) + (cg-cast cg ptr-i32) + (cg-assign cg) (cg-pop cg) + ;; s = 0 + (cg-push-sym cg sym-s) + (cg-push-imm cg %t-i32 0) + (cg-assign cg) (cg-pop cg) + ;; Three iterations: s += *p++ + (let walk ((k 0)) + (cond + ((= k 3) #t) + (else + (cg-push-sym cg sym-s) ; lval s + (cg-dup cg) (cg-load cg) ; [lval-s, rval-s] + ;; compute *p++: push p (lval ptr); cg-postinc → old ptr value; + ;; push-deref → lval int. + (cg-push-sym cg sym-p) ; lval p (ptr-i32) + (cg-postinc cg) ; pushes OLD ptr rval, p slot now bumped + (cg-push-deref cg) ; lval int + (cg-load cg) ; rval int + ;; arith: s + *p_old + (cg-binop cg 'add) + (cg-assign cg) (cg-pop cg) + (walk (+ k 1))))) + ;; return s + (cg-push-sym cg sym-s) (cg-load cg) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-cg/28-ternary.expected-exit b/tests/cc-cg/28-ternary.expected-exit @@ -0,0 +1 @@ +7 diff --git a/tests/cc-cg/28-ternary.scm b/tests/cc-cg/28-ternary.scm @@ -0,0 +1,30 @@ +;; tests/cc-cg/28-ternary.scm — ternary leaves exactly one rval (§H.1). +;; +;; Models: int c = 1; int x = c ? 7 : 9; return x; → exit 7. +;; +;; cg-ifelse-merge consumes the cond, runs both thunks, merges each +;; branch's top rval into a single result slot, and leaves one frame +;; rval on the vstack. + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-c (cg-alloc-slot cg 4 4)) + (sym-c (%sym "c" 'var 'auto %t-i32 off-c)) + (off-x (cg-alloc-slot cg 4 4)) + (sym-x (%sym "x" 'var 'auto %t-i32 off-x))) + ;; c = 1 + (cg-push-sym cg sym-c) + (cg-push-imm cg %t-i32 1) + (cg-assign cg) (cg-pop cg) + ;; x = c ? 7 : 9 + (cg-push-sym cg sym-x) + (cg-push-sym cg sym-c) (cg-load cg) + (cg-ifelse-merge cg + (lambda () (cg-push-imm cg %t-i32 7)) + (lambda () (cg-push-imm cg %t-i32 9))) + (cg-assign cg) (cg-pop cg) + ;; return x + (cg-push-sym cg sym-x) (cg-load cg) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-cg/29-land.expected-exit b/tests/cc-cg/29-land.expected-exit @@ -0,0 +1 @@ +42 diff --git a/tests/cc-cg/29-land.scm b/tests/cc-cg/29-land.scm @@ -0,0 +1,34 @@ +;; tests/cc-cg/29-land.scm — `&&` short-circuits, leaves one i32 (§H.2). +;; +;; Models: int a = 5; int b = 0; return (a && b) ? 100 : 42; +;; → exit 42 (since a && b is false because b == 0). +;; Also asserts the merged result is i32 0/1 — store it into an i32 +;; slot and reload, confirming exit value when isolated. + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-a (cg-alloc-slot cg 4 4)) + (sym-a (%sym "a" 'var 'auto %t-i32 off-a)) + (off-b (cg-alloc-slot cg 4 4)) + (sym-b (%sym "b" 'var 'auto %t-i32 off-b))) + ;; a = 5; b = 0 + (cg-push-sym cg sym-a) + (cg-push-imm cg %t-i32 5) + (cg-assign cg) (cg-pop cg) + (cg-push-sym cg sym-b) + (cg-push-imm cg %t-i32 0) + (cg-assign cg) (cg-pop cg) + ;; (a && b) implemented via cg-ifelse-merge per parser pattern + (cg-push-sym cg sym-a) (cg-load cg) + (cg-ifelse-merge cg + (lambda () + (cg-push-sym cg sym-b) (cg-load cg) + (cg-cast cg %t-bool) (cg-cast cg %t-i32)) + (lambda () (cg-push-imm cg %t-i32 0))) + ;; Outer: (cond) ? 100 : 42 + (cg-ifelse-merge cg + (lambda () (cg-push-imm cg %t-i32 100)) + (lambda () (cg-push-imm cg %t-i32 42))) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-cg/30-lor.expected-exit b/tests/cc-cg/30-lor.expected-exit @@ -0,0 +1 @@ +11 diff --git a/tests/cc-cg/30-lor.scm b/tests/cc-cg/30-lor.scm @@ -0,0 +1,30 @@ +;; tests/cc-cg/30-lor.scm — `||` short-circuits, leaves one i32 (§H.3). +;; +;; Models: int a = 0; int b = 5; return (a || b) ? 11 : 33; → exit 11. + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-a (cg-alloc-slot cg 4 4)) + (sym-a (%sym "a" 'var 'auto %t-i32 off-a)) + (off-b (cg-alloc-slot cg 4 4)) + (sym-b (%sym "b" 'var 'auto %t-i32 off-b))) + (cg-push-sym cg sym-a) + (cg-push-imm cg %t-i32 0) + (cg-assign cg) (cg-pop cg) + (cg-push-sym cg sym-b) + (cg-push-imm cg %t-i32 5) + (cg-assign cg) (cg-pop cg) + ;; (a || b) + (cg-push-sym cg sym-a) (cg-load cg) + (cg-ifelse-merge cg + (lambda () (cg-push-imm cg %t-i32 1)) + (lambda () + (cg-push-sym cg sym-b) (cg-load cg) + (cg-cast cg %t-bool) (cg-cast cg %t-i32))) + ;; outer ?: 11/33 + (cg-ifelse-merge cg + (lambda () (cg-push-imm cg %t-i32 11)) + (lambda () (cg-push-imm cg %t-i32 33))) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-parse/21-preinc.c b/tests/cc-parse/21-preinc.c @@ -0,0 +1,6 @@ +// tests/cc-parse/21-preinc.c — pre-increment on a simple lval (§B.1). +int main(void) { + int x = 5; + ++x; + return x; +} diff --git a/tests/cc-parse/21-preinc.expected-exit b/tests/cc-parse/21-preinc.expected-exit @@ -0,0 +1 @@ +6 diff --git a/tests/cc-parse/22-postinc.c b/tests/cc-parse/22-postinc.c @@ -0,0 +1,6 @@ +// tests/cc-parse/22-postinc.c — post-increment returns OLD value (§B.2). +int main(void) { + int x = 5; + int y = x++; + return x * 10 + y; +} diff --git a/tests/cc-parse/22-postinc.expected-exit b/tests/cc-parse/22-postinc.expected-exit @@ -0,0 +1 @@ +65 diff --git a/tests/cc-parse/23-cmpd-simple.c b/tests/cc-parse/23-cmpd-simple.c @@ -0,0 +1,16 @@ +// tests/cc-parse/23-cmpd-simple.c — compound assignment on simple lval (§B.3). +// Exercises one op per family: +=, -=, *=, /=, %=, <<=, >>=, &=, ^=, |=. +int main(void) { + int a = 7; a += 3; // 10 + int b = 7; b -= 3; // 4 + int c = 7; c *= 3; // 21 + int d = 12; d /= 3; // 4 + int e = 7; e %= 3; // 1 + int f = 1; f <<= 4; // 16 + int g = 32; g >>= 1; // 16 + int h = 0xF; h &= 0xA; // 10 + int i = 0xF; i ^= 0xA; // 5 + int j = 0; j |= 7; // 7 + // sum = 10 + 4 + 21 + 4 + 1 + 16 + 16 + 10 + 5 + 7 = 94 + return a + b + c + d + e + f + g + h + i + j; +} diff --git a/tests/cc-parse/23-cmpd-simple.expected-exit b/tests/cc-parse/23-cmpd-simple.expected-exit @@ -0,0 +1 @@ +94 diff --git a/tests/cc-parse/24-cmpd-ptr.c b/tests/cc-parse/24-cmpd-ptr.c @@ -0,0 +1,7 @@ +// tests/cc-parse/24-cmpd-ptr.c — compound assignment through pointer (§B.4). +int main(void) { + int x = 7; + int *p = &x; + *p += 3; + return x; +} diff --git a/tests/cc-parse/24-cmpd-ptr.expected-exit b/tests/cc-parse/24-cmpd-ptr.expected-exit @@ -0,0 +1 @@ +10 diff --git a/tests/cc-parse/25-deref-postinc.c b/tests/cc-parse/25-deref-postinc.c @@ -0,0 +1,17 @@ +// tests/cc-parse/25-deref-postinc.c — *p++ walking a span (§B.5). +// +// We can't use a[i] (§D.5, owned elsewhere) and we can't span +// `int` locals because cg's per-statement spill slots interleave +// between adjacent local declarations. Instead use a multi-byte int +// holding our three values in its low bytes plus an unsigned-char +// pointer to walk the bytes — pointer scaling is by 1, no scaling +// quirk to dodge. +int main(void) { + int packed = (4 << 16) | (2 << 8) | 1; // bytes: 1, 2, 4 (LE) + unsigned char *p = (unsigned char *)&packed; + int s = 0; + s += *p++; + s += *p++; + s += *p++; + return s; +} diff --git a/tests/cc-parse/25-deref-postinc.expected-exit b/tests/cc-parse/25-deref-postinc.expected-exit @@ -0,0 +1 @@ +7 diff --git a/tests/cc-parse/26-sizeof-expr.c b/tests/cc-parse/26-sizeof-expr.c @@ -0,0 +1,5 @@ +// tests/cc-parse/26-sizeof-expr.c — sizeof e returns actual size (§C.1). +int main(void) { + int x; + return sizeof x; +} diff --git a/tests/cc-parse/26-sizeof-expr.expected-exit b/tests/cc-parse/26-sizeof-expr.expected-exit @@ -0,0 +1 @@ +4 diff --git a/tests/cc-parse/27-sizeof-types.c b/tests/cc-parse/27-sizeof-types.c @@ -0,0 +1,14 @@ +// tests/cc-parse/27-sizeof-types.c — sizeof over struct, array, ptr, +// char, plus the named integer types (§C.2). Sums to a known total. +struct S { int a; int b; }; +int main(void) { + int sum = 0; + sum += sizeof(char); // 1 + sum += sizeof(short); // 2 + sum += sizeof(int); // 4 + sum += sizeof(long); // 8 + sum += sizeof(int *); // 8 + sum += sizeof(int[5]); // 20 + sum += sizeof(struct S); // 8 + return sum; // 1+2+4+8+8+20+8 = 51 +} diff --git a/tests/cc-parse/27-sizeof-types.expected-exit b/tests/cc-parse/27-sizeof-types.expected-exit @@ -0,0 +1 @@ +51 diff --git a/tests/cc-parse/28-ternary.c b/tests/cc-parse/28-ternary.c @@ -0,0 +1,6 @@ +// tests/cc-parse/28-ternary.c — ternary leaves exactly one rval (§H.1). +int main(void) { + int c = 1; + int x = c ? 7 : 9; + return x; +} diff --git a/tests/cc-parse/28-ternary.expected-exit b/tests/cc-parse/28-ternary.expected-exit @@ -0,0 +1 @@ +7 diff --git a/tests/cc-parse/29-land.c b/tests/cc-parse/29-land.c @@ -0,0 +1,6 @@ +// tests/cc-parse/29-land.c — `&&` leaves exactly one i32 rval (§H.2). +int main(void) { + int a = 5; + int b = 0; + return (a && b) ? 100 : 42; +} diff --git a/tests/cc-parse/29-land.expected-exit b/tests/cc-parse/29-land.expected-exit @@ -0,0 +1 @@ +42 diff --git a/tests/cc-parse/30-lor.c b/tests/cc-parse/30-lor.c @@ -0,0 +1,6 @@ +// tests/cc-parse/30-lor.c — `||` leaves exactly one i32 rval (§H.3). +int main(void) { + int a = 0; + int b = 5; + return (a || b) ? 11 : 33; +} diff --git a/tests/cc-parse/30-lor.expected-exit b/tests/cc-parse/30-lor.expected-exit @@ -0,0 +1 @@ +11 diff --git a/tests/cc-parse/31-comma.c b/tests/cc-parse/31-comma.c @@ -0,0 +1,7 @@ +// tests/cc-parse/31-comma.c — comma operator (§K.1). +int main(void) { + int a; + int b; + (a = 1, b = 2); + return a + b * 10; // 1 + 20 = 21 +} diff --git a/tests/cc-parse/31-comma.expected-exit b/tests/cc-parse/31-comma.expected-exit @@ -0,0 +1 @@ +21