boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 5fad95766a76391e0a2df9cbfe37ddf6daddf8eb
parent 5ab5c9c3192e6aa171a5da7916f73c5b0af923ef
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 26 Apr 2026 23:49:18 -0700

Merge Agent 2: §D + §L.2 (aggregates + array-param decay)

Diffstat:
Mcc/cg.scm | 114++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mcc/parse.scm | 35+++++++++++++++++++++++++----------
Mdocs/CC-PUNCHLIST.md | 107++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Atests/cc-cg/36-struct-load.expected-exit | 1+
Atests/cc-cg/36-struct-load.scm | 45+++++++++++++++++++++++++++++++++++++++++++++
Atests/cc-cg/37-struct-store.expected-exit | 1+
Atests/cc-cg/37-struct-store.scm | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/cc-cg/38-arrow.expected-exit | 1+
Atests/cc-cg/38-arrow.scm | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/cc-cg/40-array-index.expected-exit | 1+
Atests/cc-cg/40-array-index.scm | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/cc-parse/36-struct-load.c | 14++++++++++++++
Atests/cc-parse/36-struct-load.expected-exit | 1+
Atests/cc-parse/37-struct-store.c | 15+++++++++++++++
Atests/cc-parse/37-struct-store.expected-exit | 1+
Atests/cc-parse/38-arrow.c | 14++++++++++++++
Atests/cc-parse/38-arrow.expected-exit | 1+
Atests/cc-parse/39-struct-nested.c | 20++++++++++++++++++++
Atests/cc-parse/39-struct-nested.expected-exit | 1+
Atests/cc-parse/40-array-index.c | 12++++++++++++
Atests/cc-parse/40-array-index.expected-exit | 1+
Atests/cc-parse/41-array-2d.c | 17+++++++++++++++++
Atests/cc-parse/41-array-2d.expected-exit | 1+
Atests/cc-parse/42-struct-fn-arg.c | 18++++++++++++++++++
Atests/cc-parse/42-struct-fn-arg.expected-exit | 1+
Atests/cc-parse/43-array-param-decay.c | 26++++++++++++++++++++++++++
Atests/cc-parse/43-array-param-decay.expected-exit | 1+
27 files changed, 563 insertions(+), 56 deletions(-)

diff --git a/cc/cg.scm b/cc/cg.scm @@ -408,12 +408,119 @@ (cg-push cg (%opnd 'frame pe off #t))))))) ;; -------------------------------------------------------------------- +;; Aggregate field access (§D.1–D.4) +;; -------------------------------------------------------------------- +;; cg-push-field cg fname: +;; pop a struct/union lval; look up `fname` in the struct's fields +;; list (data.scm: ext = (tag complete? fields), where each field +;; is (name-bv ctype offset)); push a new lval at the field's +;; offset with the field's ctype. +;; +;; Three input cases: +;; - direct frame lval at slot `off` -> frame lval at off+fo +;; - indirect frame lval (slot holds addr) -> new indirect slot for +;; addr+fo +;; - global lval at label L -> indirect slot for +;; la(L)+fo +;; In all cases the resulting lval has the field's ctype. + +(define (%cg-find-field fields fname) + (let loop ((xs fields)) + (cond + ((null? xs) #f) + ((bv= (car (car xs)) fname) (car xs)) + (else (loop (cdr xs)))))) + +(define (cg-push-field cg fname) + (let* ((s (cg-pop cg)) + (sty (opnd-type s)) + (k (ctype-kind sty))) + (cond + ((not (or (eq? k 'struct) (eq? k 'union))) + (die #f "cg-push-field: not a struct/union" k)) + ((not (opnd-lval? s)) + (die #f "cg-push-field: not an lvalue" k)) + (else + (let* ((fields (car (cddr (ctype-ext sty)))) + (f (%cg-find-field fields fname))) + (cond + ((not f) (die #f "cg-push-field: no such field" fname)) + (else + (let* ((fty (cadr f)) (fo (car (cddr f)))) + (cond + ;; direct frame lval: just shift the slot offset. + ((and (eq? (opnd-kind s) 'frame) + (not (%cg-indirect? cg (opnd-ext s)))) + (cg-push cg (%opnd 'frame fty (+ (opnd-ext s) fo) #t))) + ;; indirect frame lval: addr lives in the slot. Compute + ;; addr+fo into a new indirect slot. + ((eq? (opnd-kind s) 'frame) + (%cg-emit-ld-slot cg 't0 (opnd-ext s)) + (cond + ((> fo 0) + (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n")))) + (let ((no (cg-alloc-slot cg 8 8))) + (%cg-emit-st-slot cg 't0 no) + (%cg-mark-indirect! cg no) + (cg-push cg (%opnd 'frame fty no #t)))) + ;; global lval: load addr, add offset, indirect slot. + ((eq? (opnd-kind s) 'global) + (%cg-emit-la cg 't0 (opnd-ext s)) + (cond + ((> fo 0) + (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n")))) + (let ((no (cg-alloc-slot cg 8 8))) + (%cg-emit-st-slot cg 't0 no) + (%cg-mark-indirect! cg no) + (cg-push cg (%opnd 'frame fty no #t)))) + (else + (die #f "cg-push-field: unsupported lval kind" + (opnd-kind s)))))))))))) + +;; cg-decay-array: +;; if top of vstack is an arr-typed lval, replace it with a ptr-rval +;; to the first element. C arrays decay to T* in most contexts; +;; parse calls this before rval-style operations. No-op otherwise. +(define (cg-decay-array cg) + (let ((tp (cg-top cg))) + (cond + ((and (opnd-lval? tp) (eq? (ctype-kind (opnd-type tp)) 'arr)) + (let* ((p (cg-pop cg)) + (et (car (ctype-ext (opnd-type p)))) + (pty (%ctype 'ptr 8 8 et))) + (cond + ;; direct frame lval: address is sp+off. + ((and (eq? (opnd-kind p) 'frame) + (not (%cg-indirect? cg (opnd-ext p)))) + (%cg-emit-many cg (list "%mov(t0, sp)\n" + "%addi(t0, t0, " + (%cg-slot-expr cg (opnd-ext p)) ")\n")) + (%cg-spill-reg cg 't0 pty)) + ;; indirect frame lval (rare for arrays, but support it): + ;; the slot holds the address already. + ((eq? (opnd-kind p) 'frame) + (%cg-emit-ld-slot cg 't0 (opnd-ext p)) + (%cg-spill-reg cg 't0 pty)) + ;; global array: la(label) is the address. + ((eq? (opnd-kind p) 'global) + (%cg-emit-la cg 't0 (opnd-ext p)) + (%cg-spill-reg cg 't0 pty)) + (else (die #f "cg-decay-array: unsupported lval kind" + (opnd-kind p)))))) + (else tp)))) + +;; -------------------------------------------------------------------- ;; Address & deref ;; -------------------------------------------------------------------- (define (cg-take-addr cg) (let* ((p (cg-pop cg)) (ty (opnd-type p)) - (pty (%ctype 'ptr 8 8 ty))) + ;; &arr yields T*, not (T[N])*. Result type is ptr-to-elem + ;; for arrays so subsequent pointer arithmetic scales by + ;; element size, not by array size. + (pty (cond ((eq? (ctype-kind ty) 'arr) + (%ctype 'ptr 8 8 (car (ctype-ext ty)))) + (else (%ctype 'ptr 8 8 ty))))) (cond ((not (opnd-lval? p)) (die #f "cg-take-addr: not an lvalue")) @@ -439,6 +546,11 @@ (let* ((p (cg-pop cg)) (ty (opnd-type p))) (cond ((not (opnd-lval? p)) (die #f "cg-load: not an lvalue")) + ;; Array lvalues decay to a ptr-rval addressing the first + ;; element (C array-to-pointer decay). We push the lval back + ;; and route through cg-decay-array for a single source of truth. + ((eq? (ctype-kind ty) 'arr) + (cg-push cg p) (cg-decay-array cg)) ((and (eq? (opnd-kind p) 'frame) (%cg-indirect? cg (opnd-ext p))) ;; Indirect frame-lval: slot holds the address. Stage the diff --git a/cc/parse.scm b/cc/parse.scm @@ -263,6 +263,11 @@ (cons #f (lambda (b k) (k #f (s b)))))))) (define (parse-decl-suf-cont ps) + ;; C declarator suffixes apply RIGHT-TO-LEFT (innermost first): + ;; int a[2][3] ⇒ arr (arr int 3) 2 (outer dim 2) + ;; not arr (arr int 2) 3 (which would treat the leftmost suffix as + ;; outermost). The recursive structure builds the inner suffix's + ;; result first, then this level wraps. (cond ((at-punct? ps 'lbrack) (advance ps) @@ -270,14 +275,14 @@ (else (parse-const-int ps)))) (_ (expect-punct ps 'rbrack)) (r (parse-decl-suf-cont ps))) - (lambda (b) (r (%mk-arr b ln))))) + (lambda (b) (%mk-arr (r b) ln)))) ((at-punct? ps 'lparen) (advance ps) (let* ((res (parse-fn-params ps)) (p (car res)) (v (cdr res))) (expect-punct ps 'rparen) (let ((r (parse-decl-suf-cont ps))) - (lambda (b) (r (%mk-fn b p v)))))) + (lambda (b) (%mk-fn (r b) p v))))) (else (lambda (b) b)))) (define (paren-is-group? ps) @@ -926,15 +931,25 @@ (cg-call (ps-cg ps) n #t) (lp))) ((eq? v 'dot) - (advance ps) (advance ps) - (cg-push-imm (ps-cg ps) %t-i64 0) - (cg-binop (ps-cg ps) 'add) - (cg-push-deref (ps-cg ps)) (lp)) + (advance ps) + (let ((nt (advance ps))) + (cond + ((not (eq? (tok-kind nt) 'IDENT)) + (die (tok-loc nt) "expected field name")) + (else + (cg-push-field (ps-cg ps) (tok-value nt)) (lp))))) ((eq? v 'arrow) - (advance ps) (advance ps) (rval! ps) - (cg-push-imm (ps-cg ps) %t-i64 0) - (cg-binop (ps-cg ps) 'add) - (cg-push-deref (ps-cg ps)) (lp)) + (advance ps) + (let ((nt (advance ps))) + (cond + ((not (eq? (tok-kind nt) 'IDENT)) + (die (tok-loc nt) "expected field name")) + (else + ;; ptr -> field: load the pointer to rval, deref to + ;; reach the struct lval, then push the field. + (rval! ps) + (cg-push-deref (ps-cg ps)) + (cg-push-field (ps-cg ps) (tok-value nt)) (lp))))) ((eq? v 'inc) (advance ps) (cg-postinc (ps-cg ps)) (lp)) diff --git a/docs/CC-PUNCHLIST.md b/docs/CC-PUNCHLIST.md @@ -147,45 +147,61 @@ primitive to capture the old rval before the store. See ### D. Aggregates -- [ ] **Struct member load** - - cg: `cc-cg/NN-struct-load.scm` — pushes a struct frame lval at - offset, loads field-typed value. - - parse: `cc-parse/NN-struct-load.c` — `struct S {int a; int b;}; struct S s; - s.a=1; s.b=2; return s.a + s.b*10;` → exit 21. - - Needs: `cg-push-field cg fname` — pop struct/union lval, look up - `fname` in `ctype-ext`'s `(tag complete? fields)`, push frame - lval at the right offset with the field's ctype. Replaces the - parser stub at `parse.scm` lines 947–960 that ignores the field - name and uses offset 0. - -- [ ] **Struct member store** - - cg: `cc-cg/NN-struct-store.scm` - - parse: `cc-parse/NN-struct-store.c` - - Needs: same primitive plus width-correct stores from §A. - -- [ ] **Pointer-to-struct (`p->x`)** - - cg: `cc-cg/NN-arrow.scm` - - parse: `cc-parse/NN-arrow.c` - - Needs: parser does ptr → deref → field via `cg-push-field`. - -- [ ] **Nested struct access (`s.inner.x`, `s->inner.x`)** - - parse: `cc-parse/NN-struct-nested.c` - -- [ ] **Array element access at non-zero index** - - cg: `cc-cg/NN-array-index.scm` — `int a[3]; a[0]=1; a[1]=2; a[2]=4; - return a[0]+a[1]+a[2];` → exit 7. - - parse: `cc-parse/NN-array-index.c` - - Needs: array lval decays to ptr-rval (in `cg-push-sym` or via a - new `cg-decay-array`); verify scaling for `arr` types in - `cg-binop add`. - -- [ ] **Multi-dim arrays** - - parse: `cc-parse/NN-array-2d.c` - - Needs: derived `arr (arr T N) M`; verify size/align/decay. - -- [ ] **Struct passed by pointer to a function** - - parse: `cc-parse/NN-struct-fn-arg.c` — passes `&s`. - - Needs: nothing new; smoke-tests §D primitives. +- [x] **Struct member load** + - cg: `cc-cg/36-struct-load.scm` — two-int struct, fields at 0 and 4. + - parse: `cc-parse/36-struct-load.c` + - Done: added `cg-push-field cg fname` (cg.scm). Pops struct/union + lval, looks up `fname` in `ctype-ext`'s `(tag complete? fields)`. + Three input cases: direct frame lval shifts the slot offset; + indirect frame lval loads addr+fo into a new indirect slot; + global lval `la`'s the label, adds `fo`, stashes via indirect + slot. Parser `dot` arm replaced. + +- [x] **Struct member store** + - cg: `cc-cg/37-struct-store.scm` — three u8 fields, distinct + multipliers in the readback to isolate per-field width. + - parse: `cc-parse/37-struct-store.c` + - Done: cg-push-field from §D.1 plus the width-aware store path + from §A.1. No new primitive. + +- [x] **Pointer-to-struct (`p->x`)** + - cg: `cc-cg/38-arrow.scm` + - parse: `cc-parse/38-arrow.c` + - Done: arrow arm in `parse-postfix-rest` calls rval! (loads ptr), + cg-push-deref (struct lval through ptr), then cg-push-field. + Indirect-frame branch of cg-push-field (added in §D.1) handles + the deref-result struct lval correctly. + +- [x] **Nested struct access (`s.inner.x`, `s->inner.x`)** + - parse: `cc-parse/39-struct-nested.c` + - Done: cg-push-field pushes a new lval whose ctype is the field's + type; if that's a struct, a subsequent `.x` chains naturally. + The fixture exercises both s.inner.x (direct frame) and + p->inner.x (indirect frame, via the §D.1 indirect path). + +- [x] **Array element access at non-zero index** + - cg: `cc-cg/40-array-index.scm` + - parse: `cc-parse/40-array-index.c` + - Done: cg-load on an arr-typed lval delegates to cg-decay-array, + pushing a ptr-rval to the first element. Existing `cg-binop add` + pointer-arithmetic path scales by the pointee size, so `a + i` + yields `&a[i]`, and cg-push-deref turns that into the element + lval. cg-take-addr on an arr lval was also adjusted to yield + T* (not (T[N])*) so `&a[0]` stays consistent. + +- [x] **Multi-dim arrays** + - parse: `cc-parse/41-array-2d.c` + - Done: fixed `parse-decl-suf-cont` to apply suffixes + right-to-left (innermost first) so `int a[2][3]` produces + `arr (arr int 3) 2`, not `arr (arr int 2) 3`. Same fix in the + fn-suffix arm so `T (...)(...)` chains compose correctly. Decay + + ptr arithmetic from §D.5 then handles the rest. + +- [x] **Struct passed by pointer to a function** + - parse: `cc-parse/42-struct-fn-arg.c` — passes `&s` to `sum2`, + callee returns `p->x + p->y`. + - Done: composes §D.1/§D.3 (cg-push-field, arrow access) and the + pre-existing param/call/return wiring. No new primitive. *Pass-by-value of structs is outside CC.md's accepted set; tcc.c doesn't use it.* @@ -385,12 +401,13 @@ responsible for arranging compatible types in the two branches. extent); `cg-push-field` for the flex member returns an `arr`- typed lval that decays to `ptr` on use. -- [ ] **`T[]` in parameter position decays to `T *`** - - parse: `cc-parse/NN-array-param-decay.c` — `int sum(int a[], int n) - { int s=0; for(int i=0;i<n;i++) s+=a[i]; return s; }` → known sum. - - Needs: parser detects `arr` ctype in fn-param position and - rewrites to `ptr` before slot allocation. cg sees a pointer and - needs no special handling. +- [x] **`T[]` in parameter position decays to `T *`** + - parse: `cc-parse/43-array-param-decay.c` — `int sum(int a[], int n) + { ... s+=a[i]; ... } sum(xs,4)` over `{1,2,3,4}` → exit 10. + - Done: `parse-fn-params` rewrites arr→ptr (and fn→ptr) before + slot allocation, so cg sees an 8-byte ptr slot and the callee's + `a[i]` decays the param's loaded ptr-rval and scales the index + correctly. - [ ] **Array of function pointers initialized with named functions** - parse: `cc-parse/NN-fnptr-tab.c` — `int f1(){return 1;} diff --git a/tests/cc-cg/36-struct-load.expected-exit b/tests/cc-cg/36-struct-load.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/cc-cg/36-struct-load.scm b/tests/cc-cg/36-struct-load.scm @@ -0,0 +1,45 @@ +;; tests/cc-cg/36-struct-load.scm — struct member load via cg-push-field +;; (§D.1 of docs/CC-PUNCHLIST.md). +;; +;; Models: +;; struct S { int a; int b; }; +;; struct S s; +;; s.a = 5; s.b = 7; <- using direct cg-push-field + cg-assign +;; return s.b - s.a; <- 2; assert (s.b - s.a == 2) -> exit 1 +;; +;; If cg-push-field is the broken stub it would access offset 0 for +;; both fields, so loading s.b would also yield 5 and the equality +;; check would fail (exit 0). With correct field-offset arithmetic +;; the result is 2 and exit is 1. + +(let* ((cg (cg-init)) + (st-ty (%ctype 'struct 8 4 + (list "S" #t + (list (list "a" %t-i32 0) + (list "b" %t-i32 4)))))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-s (cg-alloc-slot cg 8 4)) + (sym-s (%sym "s" 'var 'auto st-ty off-s))) + ;; s.a = 5 + (cg-push-sym cg sym-s) + (cg-push-field cg "a") + (cg-push-imm cg %t-i32 5) + (cg-assign cg) (cg-pop cg) + ;; s.b = 7 + (cg-push-sym cg sym-s) + (cg-push-field cg "b") + (cg-push-imm cg %t-i32 7) + (cg-assign cg) (cg-pop cg) + ;; return (s.b - s.a) == 2 + (cg-push-sym cg sym-s) + (cg-push-field cg "b") + (cg-load cg) + (cg-push-sym cg sym-s) + (cg-push-field cg "a") + (cg-load cg) + (cg-binop cg 'sub) + (cg-push-imm cg %t-i32 2) + (cg-binop cg 'eq) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-cg/37-struct-store.expected-exit b/tests/cc-cg/37-struct-store.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/cc-cg/37-struct-store.scm b/tests/cc-cg/37-struct-store.scm @@ -0,0 +1,56 @@ +;; tests/cc-cg/37-struct-store.scm — struct member store via cg-push-field +;; (§D.2 of docs/CC-PUNCHLIST.md). Uses char-typed fields to ensure the +;; width-correct store path from §A.1 cooperates with field offsets. +;; +;; struct B { unsigned char a; unsigned char b; unsigned char c; }; +;; b.a = 3; b.b = 5; b.c = 7; +;; If field stores ignored offsets (or used 8-byte writes), adjacent +;; bytes would clobber each other. Reading back a*1 + b*10 + c*100 +;; isolates each field's contribution: 3 + 50 + 700 = 753. (Truncated +;; to a u8 by the exit-code path: 753 & 255 = 241.) + +(let* ((cg (cg-init)) + (st-ty (%ctype 'struct 3 1 + (list "B" #t + (list (list "a" %t-u8 0) + (list "b" %t-u8 1) + (list "c" %t-u8 2)))))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-b (cg-alloc-slot cg 3 1)) + (sym-b (%sym "b" 'var 'auto st-ty off-b))) + ;; b.a = 3 + (cg-push-sym cg sym-b) + (cg-push-field cg "a") + (cg-push-imm cg %t-u8 3) + (cg-assign cg) (cg-pop cg) + ;; b.b = 5 + (cg-push-sym cg sym-b) + (cg-push-field cg "b") + (cg-push-imm cg %t-u8 5) + (cg-assign cg) (cg-pop cg) + ;; b.c = 7 + (cg-push-sym cg sym-b) + (cg-push-field cg "c") + (cg-push-imm cg %t-u8 7) + (cg-assign cg) (cg-pop cg) + ;; return (b.a + b.b*10 + b.c*100) == 753 + (cg-push-sym cg sym-b) + (cg-push-field cg "a") + (cg-load cg) + (cg-push-sym cg sym-b) + (cg-push-field cg "b") + (cg-load cg) + (cg-push-imm cg %t-i32 10) + (cg-binop cg 'mul) + (cg-binop cg 'add) + (cg-push-sym cg sym-b) + (cg-push-field cg "c") + (cg-load cg) + (cg-push-imm cg %t-i32 100) + (cg-binop cg 'mul) + (cg-binop cg 'add) + (cg-push-imm cg %t-i32 753) + (cg-binop cg 'eq) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-cg/38-arrow.expected-exit b/tests/cc-cg/38-arrow.expected-exit @@ -0,0 +1 @@ +5 diff --git a/tests/cc-cg/38-arrow.scm b/tests/cc-cg/38-arrow.scm @@ -0,0 +1,59 @@ +;; tests/cc-cg/38-arrow.scm — pointer-to-struct field access via cg +;; (§D.3 of docs/CC-PUNCHLIST.md). Models p->a / p->b through: +;; cg-push-sym sym-p ; lval (frame), holds ptr-value +;; cg-load ; rval ptr +;; cg-push-deref ; lval struct (indirect) +;; cg-push-field "a" ; lval int at p+0 +;; +;; struct S { int a; int b; }; struct S s; +;; struct S *p = &s; p->a = 4; p->b = 9; return p->b - p->a; +;; (The cg fixture takes &s into a frame slot directly so we don't +;; depend on parser-only address-of plumbing.) +;; +;; Expected: 9 - 4 == 5. + +(let* ((cg (cg-init)) + (st-ty (%ctype 'struct 8 4 + (list "S" #t + (list (list "a" %t-i32 0) + (list "b" %t-i32 4))))) + (pt-ty (%ctype 'ptr 8 8 st-ty))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-s (cg-alloc-slot cg 8 4)) + (off-p (cg-alloc-slot cg 8 8)) + (sym-s (%sym "s" 'var 'auto st-ty off-s)) + (sym-p (%sym "p" 'var 'auto pt-ty off-p))) + ;; p = &s + (cg-push-sym cg sym-p) + (cg-push-sym cg sym-s) + (cg-take-addr cg) + (cg-assign cg) (cg-pop cg) + ;; p->a = 4 + (cg-push-sym cg sym-p) + (cg-load cg) + (cg-push-deref cg) + (cg-push-field cg "a") + (cg-push-imm cg %t-i32 4) + (cg-assign cg) (cg-pop cg) + ;; p->b = 9 + (cg-push-sym cg sym-p) + (cg-load cg) + (cg-push-deref cg) + (cg-push-field cg "b") + (cg-push-imm cg %t-i32 9) + (cg-assign cg) (cg-pop cg) + ;; return p->b - p->a + (cg-push-sym cg sym-p) + (cg-load cg) + (cg-push-deref cg) + (cg-push-field cg "b") + (cg-load cg) + (cg-push-sym cg sym-p) + (cg-load cg) + (cg-push-deref cg) + (cg-push-field cg "a") + (cg-load cg) + (cg-binop cg 'sub) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-cg/40-array-index.expected-exit b/tests/cc-cg/40-array-index.expected-exit @@ -0,0 +1 @@ +7 diff --git a/tests/cc-cg/40-array-index.scm b/tests/cc-cg/40-array-index.scm @@ -0,0 +1,55 @@ +;; tests/cc-cg/40-array-index.scm — array element access at non-zero +;; index (§D.5 of docs/CC-PUNCHLIST.md). Models: +;; int a[3]; a[0]=1; a[1]=2; a[2]=4; return a[0]+a[1]+a[2]; -> 7 +;; +;; The cg API does the same dance the parser uses for a[i]: +;; cg-push-sym sym-a ; arr-typed lval +;; cg-push-imm i ; index rval +;; cg-binop add ; ptr arithmetic — `cg-load` decays array to +;; ; ptr, then add scales by elem size +;; cg-push-deref ; lval int through the computed pointer +;; +;; Wait — cg-binop pops both ops; for the lhs we need rval-of-arr, +;; not the array lval. We therefore call cg-load BEFORE the index +;; push, so the arr lval decays to a ptr-rval first. + +(let* ((cg (cg-init)) + (arr-ty (%ctype 'arr 12 4 (cons %t-i32 3)))) + (cg-fn-begin cg "main" '() %t-i32) + (let* ((off-a (cg-alloc-slot cg 12 4)) + (sym-a (%sym "a" 'var 'auto arr-ty off-a))) + ;; a[0] = 1 + (cg-push-sym cg sym-a) + (cg-load cg) ; arr lval -> ptr rval (decay) + (cg-push-imm cg %t-i32 0) + (cg-binop cg 'add) ; ptr + 0 + (cg-push-deref cg) + (cg-push-imm cg %t-i32 1) + (cg-assign cg) (cg-pop cg) + ;; a[1] = 2 + (cg-push-sym cg sym-a) (cg-load cg) + (cg-push-imm cg %t-i32 1) (cg-binop cg 'add) + (cg-push-deref cg) + (cg-push-imm cg %t-i32 2) + (cg-assign cg) (cg-pop cg) + ;; a[2] = 4 + (cg-push-sym cg sym-a) (cg-load cg) + (cg-push-imm cg %t-i32 2) (cg-binop cg 'add) + (cg-push-deref cg) + (cg-push-imm cg %t-i32 4) + (cg-assign cg) (cg-pop cg) + ;; return a[0] + a[1] + a[2] + (cg-push-sym cg sym-a) (cg-load cg) + (cg-push-imm cg %t-i32 0) (cg-binop cg 'add) + (cg-push-deref cg) (cg-load cg) + (cg-push-sym cg sym-a) (cg-load cg) + (cg-push-imm cg %t-i32 1) (cg-binop cg 'add) + (cg-push-deref cg) (cg-load cg) + (cg-binop cg 'add) + (cg-push-sym cg sym-a) (cg-load cg) + (cg-push-imm cg %t-i32 2) (cg-binop cg 'add) + (cg-push-deref cg) (cg-load cg) + (cg-binop cg 'add) + (cg-return cg)) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-parse/36-struct-load.c b/tests/cc-parse/36-struct-load.c @@ -0,0 +1,14 @@ +// tests/cc-parse/36-struct-load.c — struct member load via real C +// (§D.1 of docs/CC-PUNCHLIST.md). Two int fields at distinct offsets; +// reading both back in distinct positions of the result confirms the +// parser uses the field offset, not 0 for both. +// +// s.a=1; s.b=2; return s.a + s.b*10; => exit 21. + +int main() { + struct S { int a; int b; }; + struct S s; + s.a = 1; + s.b = 2; + return s.a + s.b * 10; +} diff --git a/tests/cc-parse/36-struct-load.expected-exit b/tests/cc-parse/36-struct-load.expected-exit @@ -0,0 +1 @@ +21 diff --git a/tests/cc-parse/37-struct-store.c b/tests/cc-parse/37-struct-store.c @@ -0,0 +1,15 @@ +// tests/cc-parse/37-struct-store.c — struct member store via real C +// (§D.2 of docs/CC-PUNCHLIST.md). char-typed fields exercise the +// width-correct store path; distinct multipliers in the readback +// isolate each field, so any offset/width bug yields a wrong sum. +// +// b.a=3; b.b=5; b.c=7; return (b.a + b.b*10 + b.c*100) == 753; -> 1. + +int main() { + struct B { unsigned char a; unsigned char b; unsigned char c; }; + struct B b; + b.a = 3; + b.b = 5; + b.c = 7; + return (b.a + b.b * 10 + b.c * 100) == 753; +} diff --git a/tests/cc-parse/37-struct-store.expected-exit b/tests/cc-parse/37-struct-store.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/cc-parse/38-arrow.c b/tests/cc-parse/38-arrow.c @@ -0,0 +1,14 @@ +// tests/cc-parse/38-arrow.c — pointer-to-struct field access via real C +// (§D.3 of docs/CC-PUNCHLIST.md). Validates the arrow arm: rval the +// pointer, deref to reach the struct, push-field at the right offset. +// +// p->a=4; p->b=9; return p->b - p->a; => exit 5. + +int main() { + struct S { int a; int b; }; + struct S s; + struct S *p = &s; + p->a = 4; + p->b = 9; + return p->b - p->a; +} diff --git a/tests/cc-parse/38-arrow.expected-exit b/tests/cc-parse/38-arrow.expected-exit @@ -0,0 +1 @@ +5 diff --git a/tests/cc-parse/39-struct-nested.c b/tests/cc-parse/39-struct-nested.c @@ -0,0 +1,20 @@ +// tests/cc-parse/39-struct-nested.c — nested struct field access via +// real C (§D.4 of docs/CC-PUNCHLIST.md). Tests both `s.inner.x` and +// `p->inner.x` chains. Padding ensures the inner struct is at a +// non-zero offset, so each step's offset must be summed correctly. +// +// outer.lead = 0; outer.inner.x=2; outer.inner.y=3; +// return p->inner.x + p->inner.y*10 + s.inner.x*100 + s.inner.y*1000; +// = 2 + 30 + 200 + 3000 = 3232. We compare to 3232 -> exit 1. + +int main() { + struct Inner { int x; int y; }; + struct Outer { int lead; struct Inner inner; }; + struct Outer s; + struct Outer *p = &s; + s.lead = 99; + s.inner.x = 2; + s.inner.y = 3; + return (p->inner.x + p->inner.y * 10 + s.inner.x * 100 + s.inner.y * 1000) + == 3232; +} diff --git a/tests/cc-parse/39-struct-nested.expected-exit b/tests/cc-parse/39-struct-nested.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/cc-parse/40-array-index.c b/tests/cc-parse/40-array-index.c @@ -0,0 +1,12 @@ +// tests/cc-parse/40-array-index.c — array element access at non-zero +// index via real C (§D.5 of docs/CC-PUNCHLIST.md). a[0]+a[1]+a[2] +// = 7. Distinct values 1,2,4 ensure each index is read independently +// (sum 7 forms a unique bit-pattern in 3 bits). + +int main() { + int a[3]; + a[0] = 1; + a[1] = 2; + a[2] = 4; + return a[0] + a[1] + a[2]; +} diff --git a/tests/cc-parse/40-array-index.expected-exit b/tests/cc-parse/40-array-index.expected-exit @@ -0,0 +1 @@ +7 diff --git a/tests/cc-parse/41-array-2d.c b/tests/cc-parse/41-array-2d.c @@ -0,0 +1,17 @@ +// tests/cc-parse/41-array-2d.c — multi-dim array indexing (§D.6 of +// docs/CC-PUNCHLIST.md). int a[2][3]; row-major, so &a[1][2] is at +// byte offset (1*3 + 2)*4 = 20 from a's base. We write distinct +// values into a[0][0]..a[1][2] and read them back via a known sum. +// +// Sum is 0 + 1 + 2 + 10 + 11 + 12 = 36. + +int main() { + int a[2][3]; + a[0][0] = 0; + a[0][1] = 1; + a[0][2] = 2; + a[1][0] = 10; + a[1][1] = 11; + a[1][2] = 12; + return a[0][0] + a[0][1] + a[0][2] + a[1][0] + a[1][1] + a[1][2]; +} diff --git a/tests/cc-parse/41-array-2d.expected-exit b/tests/cc-parse/41-array-2d.expected-exit @@ -0,0 +1 @@ +36 diff --git a/tests/cc-parse/42-struct-fn-arg.c b/tests/cc-parse/42-struct-fn-arg.c @@ -0,0 +1,18 @@ +// tests/cc-parse/42-struct-fn-arg.c — struct passed by pointer to a +// function (§D.7 of docs/CC-PUNCHLIST.md). Smoke-tests §D primitives: +// caller takes &s, callee dereferences via -> on the pointer arg. +// +// sum2(&s) where s={3,5} returns 8. + +struct P { int x; int y; }; + +int sum2(struct P *p) { + return p->x + p->y; +} + +int main() { + struct P s; + s.x = 3; + s.y = 5; + return sum2(&s); +} diff --git a/tests/cc-parse/42-struct-fn-arg.expected-exit b/tests/cc-parse/42-struct-fn-arg.expected-exit @@ -0,0 +1 @@ +8 diff --git a/tests/cc-parse/43-array-param-decay.c b/tests/cc-parse/43-array-param-decay.c @@ -0,0 +1,26 @@ +// tests/cc-parse/43-array-param-decay.c — T[] in fn-param position +// decays to T* (§L.2 of docs/CC-PUNCHLIST.md). The callee declares +// `int a[]`; the parser must rewrite it to `int *` before slot +// allocation so `a[i]` indexes through a real pointer (not via an +// array slot, which would be wrong for an actual int* argument). +// +// sum(a, 4) over {1,2,3,4} = 10. + +int sum(int a[], int n) { + int s = 0; + int i = 0; + while (i < n) { + s = s + a[i]; + i = i + 1; + } + return s; +} + +int main() { + int xs[4]; + xs[0] = 1; + xs[1] = 2; + xs[2] = 3; + xs[3] = 4; + return sum(xs, 4); +} diff --git a/tests/cc-parse/43-array-param-decay.expected-exit b/tests/cc-parse/43-array-param-decay.expected-exit @@ -0,0 +1 @@ +10