commit cdadeb225ec44876ca034f3552b7a0675a1598f3
parent ebb6ead5854a278ec3e5f5c72a7a536a114e67d5
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 3 May 2026 21:52:11 -0700
cc: deduplicate codegen and parser helpers
Cleanup pass over cc.scm. No behavior change; cc-cg / cc / cc-libc /
tcc-cc all unchanged. ~190 lines net.
- %cg-canonicalize: one kind-driven sext/zext helper used by frame-rval
load (%cg-load-opnd-into), narrowing cast (cg-cast), and narrow-typed
binop result (cg-binop), replacing three near-identical ladders.
- %cg-emit-{ld,st}-bv: shared width-dispatch core; the typed and
slot-typed variants become one-line trampolines.
- %cg-emit-addr-of: merged with %cg-emit-addr-of-any (the latter was a
superset). cg-va-start / cg-va-arg route ap-lvalue load/store through
it instead of open-coding the frame/global pmatch.
- %punct-scan: generic top-level token walker (paren/bracket depth +
optional ternary-? tracking). Used for %const-skip-dead-arm (one
helper for both ternary arms — the previous mid/rhs split was
behaviorally identical) and %const-skip-{land,lor}-rhs.
- %const-shift-op: shift combiner so parse-const-shift plugs into
%const-binl like every other binary level.
- %tok-decl-start?: single canonical "does TOK begin a type-name?"
predicate, used by %const-tok-is-decl?, %const-paren-is-cast?,
token-is-decl?, stmt-starts-decl? (adds storage classes), and the
parse-cast-or-unary disambiguator (adds __attribute__).
- %init-drop-thru-field, %global-init-elem, %local-init-elem: shared
designator-drop and brace-vs-elision dispatch for the four
parse-init-*-list/mode walkers.
- Removed dead %emit-local-elem-store.
cc.scm.md refreshed to match.
Diffstat:
| M | cc/cc.scm | | | 640 | ++++++++++++++++++++++++++++--------------------------------------------------- |
| M | cc/cc.scm.md | | | 96 | +++++++++++++++++++++++++++++++++++++++++++------------------------------------ |
2 files changed, 279 insertions(+), 457 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -2749,57 +2749,66 @@
(rb (%cg-reg->bv reg)))
(%cg-emit-many cg (list "%sext" width "(" rb ", " rb ")\n"))))
-(define (%cg-emit-ld-slot-typed cg reg ctype logical-off)
- (%cg-fp-reject! 'ld-slot ctype)
- (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype))
- (off-bv (%cg-slot-expr cg logical-off)))
+;; Canonicalize REG against CTYPE's kind: signed narrow types sign-extend,
+;; unsigned narrow types zero-extend, anything else is left alone (the
+;; full 64-bit value is already canonical). Used after operations that
+;; may have left a non-canonical bit pattern in reg — frame-rval load,
+;; narrowing cast, narrow-typed binop result.
+(define (%cg-canonicalize cg reg ctype)
+ (let* ((rb (%cg-reg->bv reg))
+ (k (ctype-kind ctype)))
(cond
- ((= sz 1)
- (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", sp, "
- off-bv ")\n"))
- (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56))))
- ((= sz 2) (%cg-emit-ld-sub cg reg "sp" off-bv (eq? kind 'i16) 2))
- ((= sz 4) (%cg-emit-ld-sub cg reg "sp" off-bv (eq? kind 'i32) 4))
- (else (%cg-emit-ld-slot cg reg logical-off)))))
-
-(define (%cg-emit-st-slot-typed cg reg ctype logical-off)
- (%cg-fp-reject! 'st-slot ctype)
- (let* ((sz (ctype-size ctype))
- (off-bv (%cg-slot-expr cg logical-off)))
- (cond
- ((= sz 1)
- (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", sp, "
- off-bv ")\n")))
- ((= sz 2) (%cg-emit-st-sub cg reg "sp" off-bv 2))
- ((= sz 4) (%cg-emit-st-sub cg reg "sp" off-bv 4))
- (else (%cg-emit-st-slot cg reg logical-off)))))
-
-(define (%cg-emit-ld-typed cg reg ctype base off)
+ ((eq? k 'i8) (%cg-emit-sext cg reg 56))
+ ((eq? k 'i16) (%cg-emit-sext cg reg 48))
+ ((eq? k 'i32) (%cg-emit-sext cg reg 32))
+ ((or (eq? k 'u8) (eq? k 'bool))
+ (%cg-emit-many cg (list "%zext8(" rb ", " rb ")\n")))
+ ((eq? k 'u16)
+ (%cg-emit-many cg (list "%zext16(" rb ", " rb ")\n")))
+ ((eq? k 'u32)
+ (%cg-emit-many cg (list "%zext32(" rb ", " rb ", t1)\n")))
+ (else 0))))
+
+;; Width-aware load/store core. BASE-BV / OFF-BV are pre-built (so the
+;; same body serves both the slot variants — base = "sp", off rendered
+;; through %cg-slot-expr — and the typed variants, where base is a
+;; register and off is a raw integer rendered via %n). 1-byte uses
+;; %lb/%sb (with i8 sext); 2- and 4-byte use the sub-word helpers; the
+;; 8-byte fallback emits a plain %ld/%st against the same base/off.
+(define (%cg-emit-ld-bv cg reg ctype base-bv off-bv)
(%cg-fp-reject! 'ld ctype)
(let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype))
- (base-bv (%cg-reg->bv base))
- (off-bv (%n off)))
+ (rb (%cg-reg->bv reg)))
(cond
((= sz 1)
- (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", "
- base-bv ", " off-bv ")\n"))
+ (%cg-emit-many cg (list "%lb(" rb ", " base-bv ", " off-bv ")\n"))
(cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56))))
((= sz 2) (%cg-emit-ld-sub cg reg base-bv off-bv (eq? kind 'i16) 2))
((= sz 4) (%cg-emit-ld-sub cg reg base-bv off-bv (eq? kind 'i32) 4))
- (else (%cg-emit-ld cg reg base off)))))
+ (else
+ (%cg-emit-many cg (list "%ld(" rb ", " base-bv ", " off-bv ")\n"))))))
-(define (%cg-emit-st-typed cg reg ctype base off)
+(define (%cg-emit-st-bv cg reg ctype base-bv off-bv)
(%cg-fp-reject! 'st ctype)
- (let* ((sz (ctype-size ctype))
- (base-bv (%cg-reg->bv base))
- (off-bv (%n off)))
+ (let ((sz (ctype-size ctype))
+ (rb (%cg-reg->bv reg)))
(cond
((= sz 1)
- (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", "
- base-bv ", " off-bv ")\n")))
+ (%cg-emit-many cg (list "%sb(" rb ", " base-bv ", " off-bv ")\n")))
((= sz 2) (%cg-emit-st-sub cg reg base-bv off-bv 2))
((= sz 4) (%cg-emit-st-sub cg reg base-bv off-bv 4))
- (else (%cg-emit-st cg reg base off)))))
+ (else
+ (%cg-emit-many cg (list "%st(" rb ", " base-bv ", " off-bv ")\n"))))))
+
+(define (%cg-emit-ld-slot-typed cg reg ctype logical-off)
+ (%cg-emit-ld-bv cg reg ctype "sp" (%cg-slot-expr cg logical-off)))
+(define (%cg-emit-st-slot-typed cg reg ctype logical-off)
+ (%cg-emit-st-bv cg reg ctype "sp" (%cg-slot-expr cg logical-off)))
+
+(define (%cg-emit-ld-typed cg reg ctype base off)
+ (%cg-emit-ld-bv cg reg ctype (%cg-reg->bv base) (%n off)))
+(define (%cg-emit-st-typed cg reg ctype base off)
+ (%cg-emit-st-bv cg reg ctype (%cg-reg->bv base) (%n off)))
(define (%cg-load-opnd-into cg op reg)
;; frame lval: load at type width. frame rval is a spilled word
@@ -2819,24 +2828,7 @@
;; on load so downstream 64-bit ALU/compare ops see the C-semantic
;; value.
(%cg-emit-ld-slot cg reg off)
- (let ((k (ctype-kind ty)))
- (cond
- ((eq? k 'i8) (%cg-emit-sext cg reg 56))
- ((eq? k 'i16) (%cg-emit-sext cg reg 48))
- ((eq? k 'i32) (%cg-emit-sext cg reg 32))
- ((eq? k 'u8)
- (%cg-emit-many cg (list "%zext8(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv reg) ")\n")))
- ((eq? k 'u16)
- (%cg-emit-many cg (list "%zext16(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv reg) ")\n")))
- ((eq? k 'u32)
- (%cg-emit-many cg (list "%zext32(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv reg) ", t1)\n")))
- ((eq? k 'bool)
- (%cg-emit-many cg (list "%zext8(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv reg) ")\n")))
- (else 0))))
+ (%cg-canonicalize cg reg ty))
(($ opnd? (kind frame) (ext ,off)) (%cg-emit-ld-slot cg reg off))
(($ opnd? (kind global) (lval? #f) (ext ,lbl)) (%cg-emit-la cg reg lbl))
(($ opnd? (kind global) (type ,ty) (ext ,lbl))
@@ -3352,23 +3344,27 @@
;; Materialize the address of an lval `op` directly into `reg`.
;; Variant of cg-take-addr that doesn't spill — used by struct copy
-;; primitives (cg-return on struct, cg-call's struct receive). Caller
-;; owns the lval (already popped).
+;; primitives (cg-return on struct, cg-copy-struct, cg-assign-struct,
+;; cg-call's struct receive). Caller owns the opnd (already popped).
+;;
+;; A frame opnd is treated as a slot whose address we want: if it's a
+;; flagged-indirect lval (slot holds a pointer to the real storage),
+;; load the pointer; otherwise the slot itself IS the storage and we
+;; lea its address. Frame rvals are temp spills — address = &slot. A
+;; global opnd's label is the address. Callers that require an lval
+;; check it before calling.
(define (%cg-emit-addr-of cg op reg)
- (cond
- ((not (opnd-lval? op)) (die #f "cg-emit-addr-of: not an lvalue"))
- (else
- (let ((reg-bv (%cg-reg->bv reg)))
- (pmatch op
- (($ opnd? (kind frame) (ext ,off))
- (guard (%cg-indirect? cg off))
- (%cg-emit-ld-slot cg reg off))
- (($ opnd? (kind frame) (ext ,off))
- (%cg-emit-lea-slot cg reg-bv (%cg-slot-expr cg off)))
- (($ opnd? (kind global) (ext ,lbl))
- (%cg-emit-la cg reg lbl))
- (else (die #f "cg-emit-addr-of: unsupported lval kind"
- (opnd-kind op))))))))
+ (let ((reg-bv (%cg-reg->bv reg)))
+ (pmatch op
+ (($ opnd? (kind frame) (lval? #t) (ext ,off))
+ (guard (%cg-indirect? cg off))
+ (%cg-emit-ld-slot cg reg off))
+ (($ opnd? (kind frame) (ext ,off))
+ (%cg-emit-lea-slot cg reg-bv (%cg-slot-expr cg off)))
+ (($ opnd? (kind global) (ext ,lbl))
+ (%cg-emit-la cg reg lbl))
+ (else (die #f "cg-emit-addr-of: unsupported opnd"
+ (opnd-kind op) (opnd-lval? op))))))
;; cg-copy-struct: pop src lval, pop dst lval, emit per-byte copy
;; from src to dst (both must be lvals of the same struct/union type).
@@ -3391,35 +3387,22 @@
;; the surrounding parser to consume (parse-expr-stmt's trailing
;; cg-pop, etc.). Distinct from cg-copy-struct because the
;; initializer caller needs no result on the vstack.
+;;
+;; The src may be either a frame lvalue (named local slot, *p deref,
+;; callee return-slot) or a frame rvalue (anonymous slot from a temp
+;; spill); %cg-emit-addr-of handles both shapes by treating the slot
+;; itself as the address whenever the lval indirection flag isn't set.
(define (cg-assign-struct cg)
(let* ((src (cg-pop cg))
(dst (cg-pop cg))
(sty (opnd-type dst))
(sz (ctype-size sty)))
(cond ((not (opnd-lval? dst)) (die #f "cg-assign-struct: dst not lvalue")))
- ;; A struct rhs may show up as either a frame lvalue (named local
- ;; slot, *p deref, callee return-slot) or a frame rvalue (anonymous
- ;; slot from a temp spill). Either way, it sits in a frame slot
- ;; whose address is the source. Compute addresses directly so we
- ;; don't have to fight %cg-emit-addr-of's lval-only contract.
- (%cg-emit-addr-of-any cg src 't0)
+ (%cg-emit-addr-of cg src 't0)
(%cg-emit-addr-of cg dst 't2)
(%cg-emit-byte-copy cg 't2 't0 't1 sz)
(cg-push cg dst)))
-(define (%cg-emit-addr-of-any cg op reg)
- (let ((reg-bv (%cg-reg->bv reg)))
- (pmatch op
- (($ opnd? (kind frame) (lval? #t) (ext ,off))
- (guard (%cg-indirect? cg off))
- (%cg-emit-ld-slot cg reg off))
- (($ opnd? (kind frame) (ext ,off))
- (%cg-emit-lea-slot cg reg-bv (%cg-slot-expr cg off)))
- (($ opnd? (kind global) (ext ,lbl))
- (%cg-emit-la cg reg lbl))
- (else (die #f "cg-emit-addr-of-any: unsupported opnd"
- (opnd-kind op) (opnd-lval? op))))))
-
;; Struct copy: defer to libp1pp memcpy via %memcpy_call. dst-reg and
;; src-reg hold the addresses; size is the byte count. tmp-reg is no
;; longer needed by this helper (kept in the signature so existing
@@ -3528,14 +3511,7 @@
;; relabel-only) restores the value. Unsigned targets mask
;; off high bits to zero-extend.
(%cg-load-opnd-into cg p 't0)
- (cond
- ((eq? to-kind 'i8) (%cg-emit-sext cg 't0 56))
- ((eq? to-kind 'i16) (%cg-emit-sext cg 't0 48))
- ((eq? to-kind 'i32) (%cg-emit-sext cg 't0 32))
- ((= to-sz 1) (%cg-emit-many cg (list "%zext8(t0, t0)\n")))
- ((= to-sz 2) (%cg-emit-many cg (list "%zext16(t0, t0)\n")))
- ((= to-sz 4) (%cg-emit-many cg (list "%zext32(t0, t0, t1)\n")))
- (else 0))
+ (%cg-canonicalize cg 't0 to-type)
(%cg-spill-reg cg 't0 to-type)))))
(define (cg-promote cg)
@@ -3680,21 +3656,7 @@
(cond
((or (eq? op 'eq) (eq? op 'ne)
(eq? op 'lt) (eq? op 'le) (eq? op 'gt) (eq? op 'ge)) 0)
- (else
- (let ((k (ctype-kind result-ty)))
- (cond
- ((eq? k 'i8) (%cg-emit-sext cg 't0 56))
- ((eq? k 'i16) (%cg-emit-sext cg 't0 48))
- ((eq? k 'i32) (%cg-emit-sext cg 't0 32))
- ((eq? k 'u8)
- (%cg-emit-many cg (list "%zext8(t0, t0)\n")))
- ((eq? k 'u16)
- (%cg-emit-many cg (list "%zext16(t0, t0)\n")))
- ((eq? k 'u32)
- (%cg-emit-many cg (list "%zext32(t0, t0, t1)\n")))
- ((eq? k 'bool)
- (%cg-emit-many cg (list "%zext8(t0, t0)\n")))
- (else 0)))))
+ (else (%cg-canonicalize cg 't0 result-ty)))
(%cg-spill-reg cg 't0 result-ty)))))
;; Post-increment / post-decrement on the top-of-vstack lval.
@@ -4003,20 +3965,9 @@
(vsl (%cg-vararg-first-slot cg)))
(cond ((not (opnd-lval? ap-lv))
(die #f "cg-va-start: ap not lvalue")))
- ;; Compute address into a0.
(%cg-emit-lea-slot cg "a0" (%cg-slot-expr cg vsl))
- ;; Store a0 at ap-lval.
- (cond
- ((eq? (opnd-kind ap-lv) 'frame)
- (cond
- ((%cg-indirect? cg (opnd-ext ap-lv))
- (%cg-emit-ld-slot cg 't0 (opnd-ext ap-lv))
- (%cg-emit-st cg 'a0 't0 0))
- (else (%cg-emit-st-slot cg 'a0 (opnd-ext ap-lv)))))
- ((eq? (opnd-kind ap-lv) 'global)
- (%cg-emit-la cg 't0 (opnd-ext ap-lv))
- (%cg-emit-st cg 'a0 't0 0))
- (else (die #f "cg-va-start: bad ap kind" (opnd-kind ap-lv))))))
+ (%cg-emit-addr-of cg ap-lv 't0)
+ (%cg-emit-st cg 'a0 't0 0)))
(define (cg-va-arg cg ctype)
;; Pop ap-lval. Load ap into a0. Read 8 bytes at [a0] into a1.
@@ -4025,35 +3976,14 @@
(let ((ap-lv (cg-pop cg)))
(cond ((not (opnd-lval? ap-lv))
(die #f "cg-va-arg: ap not lvalue")))
- ;; Load ap into a0.
- (pmatch ap-lv
- (($ opnd? (kind frame) (ext ,off))
- (guard (%cg-indirect? cg off))
- (%cg-emit-ld-slot cg 't0 off)
- (%cg-emit-ld cg 'a0 't0 0))
- (($ opnd? (kind frame) (ext ,off))
- (%cg-emit-ld-slot cg 'a0 off))
- (($ opnd? (kind global) (ext ,lbl))
- (%cg-emit-la cg 't0 lbl)
- (%cg-emit-ld cg 'a0 't0 0))
- (else (die #f "cg-va-arg: bad ap kind" (opnd-kind ap-lv))))
- ;; Load value at [a0] into a1 (full 8 bytes; cg-cast on the rval
- ;; the caller pushes will narrow if needed).
+ ;; Address of the storage that holds ap → t0; ap value → a0.
+ (%cg-emit-addr-of cg ap-lv 't0)
+ (%cg-emit-ld cg 'a0 't0 0)
+ ;; Read *ap into a1 (full 8 bytes; cg-cast on the rval the caller
+ ;; pushes will narrow if needed). Advance ap by 8 and store back.
(%cg-emit-ld cg 'a1 'a0 0)
- ;; Advance ap by 8.
(%cg-emit-many cg (list "%addi(a0, a0, 8)\n"))
- ;; Store advanced ap back.
- (pmatch ap-lv
- (($ opnd? (kind frame) (ext ,off))
- (guard (%cg-indirect? cg off))
- (%cg-emit-ld-slot cg 't0 off)
- (%cg-emit-st cg 'a0 't0 0))
- (($ opnd? (kind frame) (ext ,off))
- (%cg-emit-st-slot cg 'a0 off))
- (($ opnd? (kind global) (ext ,lbl))
- (%cg-emit-la cg 't0 lbl)
- (%cg-emit-st cg 'a0 't0 0))
- (else 0))
+ (%cg-emit-st cg 'a0 't0 0)
;; Spill the loaded value (a1) to a fresh frame slot under ctype.
(%cg-spill-reg cg 'a1 ctype)))
@@ -4775,18 +4705,21 @@
((%const-bool? c)
(let* ((t (parse-const-expr ps))
(_ (expect-punct ps 'colon)))
- (%const-skip-cond-rhs ps)
+ (%const-skip-dead-arm ps)
t))
(else
- (%const-skip-cond-mid ps)
+ (%const-skip-dead-arm ps)
(expect-punct ps 'colon)
(parse-const-cond ps))))
(else c))))
-;; Skip the middle of a ternary whose condition was false. Stop on the
-;; matching `:` at depth 0; nested `?:` pairs are absorbed by tracking
-;; an unmatched-? counter.
-(define (%const-skip-cond-mid ps)
+;; Generic top-level punct scanner used by skip-rhs / skip-cond helpers.
+;; Walks paren/bracket depth (a closing bracket at d=0 always stops) and
+;; optionally tracks ternary `?` depth. STOP? receives the punct value
+;; v at top-level (d=0, q=0 when q-aware?) and returns #t to stop. With
+;; Q-AWARE? = #t, a `?` at top-level opens a nested ternary and a
+;; matching `:` (q>0) closes it; the scanner stops on `:` only when q=0.
+(define (%punct-scan ps stop? q-aware?)
(let lp ((d 0) (q 0))
(let ((t (peek ps)))
(cond
@@ -4801,44 +4734,24 @@
((or (eq? v 'rparen) (eq? v 'rbrack))
(cond ((zero? d) #t)
(else (advance ps) (lp (- d 1) q))))
- ((and (zero? d) (eq? v 'qmark))
+ ((and q-aware? (zero? d) (eq? v 'qmark))
(advance ps) (lp d (+ q 1)))
- ((and (zero? d) (eq? v 'colon) (zero? q)) #t)
- ((and (zero? d) (eq? v 'colon))
+ ((and q-aware? (zero? d) (> q 0) (eq? v 'colon))
(advance ps) (lp d (- q 1)))
- ((and (zero? d) (zero? q)
- (or (eq? v 'comma) (eq? v 'semi) (eq? v 'rbrace)))
+ ((and (zero? d) (or (not q-aware?) (zero? q)) (stop? v))
#t)
(else (advance ps) (lp d q)))))))))
-;; Skip the third arm of a ternary whose condition was true. Third arm
-;; is a conditional-expression so it may itself contain nested `?:`
-;; pairs that we absorb. Stop on comma/semi/rbrace at depth 0 with no
-;; open `?`, or on a `:` that closes an outer ternary.
-(define (%const-skip-cond-rhs ps)
- (let lp ((d 0) (q 0))
- (let ((t (peek ps)))
- (cond
- ((eq? (tok-kind t) 'EOF) #t)
- ((not (eq? (tok-kind t) 'PUNCT))
- (advance ps) (lp d q))
- (else
- (let ((v (tok-value t)))
- (cond
- ((or (eq? v 'lparen) (eq? v 'lbrack))
- (advance ps) (lp (+ d 1) q))
- ((or (eq? v 'rparen) (eq? v 'rbrack))
- (cond ((zero? d) #t)
- (else (advance ps) (lp (- d 1) q))))
- ((and (zero? d) (eq? v 'qmark))
- (advance ps) (lp d (+ q 1)))
- ((and (zero? d) (eq? v 'colon) (> q 0))
- (advance ps) (lp d (- q 1)))
- ((and (zero? d) (zero? q)
- (or (eq? v 'colon) (eq? v 'comma)
- (eq? v 'semi) (eq? v 'rbrace)))
- #t)
- (else (advance ps) (lp d q)))))))))
+;; Skip the dead arm of a ternary. Same scanner whether we're skipping
+;; the middle (cond was false; will then expect-punct `:` and parse arm
+;; 3) or the third (cond was true; arm 2 already parsed and `:` already
+;; consumed). Both stop at top-level `:` / `,` / `;` / `}` with no
+;; open inner `?`; nested `?:` pairs are absorbed.
+(define (%const-skip-dead-arm ps)
+ (%punct-scan ps
+ (lambda (v)
+ (or (eq? v 'colon) (eq? v 'comma) (eq? v 'semi) (eq? v 'rbrace)))
+ #t))
;; Generic left-associative binary level.
;; ops: alist of punct-sym → (vp vp → vp).
@@ -4907,35 +4820,20 @@
(define (%const-skip-land-rhs ps)
;; rhs of && is a parse-const-bor — stop on `&&`, `||`, `?`, `:`,
;; `,`, `;`, `}`, and any closing/separator at depth 0.
- (%const-skip-rhs-til ps
+ (%punct-scan ps
(lambda (v)
(or (eq? v 'land) (eq? v 'lor) (eq? v 'qmark) (eq? v 'colon)
- (eq? v 'comma) (eq? v 'semi) (eq? v 'rbrace)))))
+ (eq? v 'comma) (eq? v 'semi) (eq? v 'rbrace)))
+ #f))
(define (%const-skip-lor-rhs ps)
;; rhs of || is a parse-const-land — stop on `||` (left-assoc),
;; `?`, `:`, `,`, `;`, `}`. `&&` binds TIGHTER than `||`, so it is
;; absorbed into the rhs and we do NOT stop on it.
- (%const-skip-rhs-til ps
+ (%punct-scan ps
(lambda (v)
(or (eq? v 'lor) (eq? v 'qmark) (eq? v 'colon)
- (eq? v 'comma) (eq? v 'semi) (eq? v 'rbrace)))))
-(define (%const-skip-rhs-til ps stop?)
- (let lp ((d 0))
- (let ((t (peek ps)))
- (cond
- ((eq? (tok-kind t) 'EOF) #t)
- ((not (eq? (tok-kind t) 'PUNCT))
- (advance ps) (lp d))
- (else
- (let ((v (tok-value t)))
- (cond
- ((or (eq? v 'lparen) (eq? v 'lbrack))
- (advance ps) (lp (+ d 1)))
- ((or (eq? v 'rparen) (eq? v 'rbrack))
- (cond ((zero? d) #t)
- (else (advance ps) (lp (- d 1)))))
- ((and (zero? d) (stop? v)) #t)
- (else (advance ps) (lp d)))))))))
+ (eq? v 'comma) (eq? v 'semi) (eq? v 'rbrace)))
+ #f))
(define (parse-const-bor ps)
(%const-binl ps parse-const-bxor (list (cons 'bar (lambda (a b) (%const-arith-op bit-or a b))))))
@@ -4956,24 +4854,19 @@
(cons 'gt (lambda (a b) (%const-cmp-op > a b)))
(cons 'ge (lambda (a b) (%const-cmp-op >= a b))))))
+;; Shift combiner: result type is the (promoted) lhs type — rhs is
+;; just a count, promoted independently. SIGN selects shl (+1) or shr (-1).
+(define (%const-shift-op sign a b)
+ (let* ((ap (%const-promote a))
+ (bp (%const-promote b))
+ (rt (cdr ap)))
+ (cons (%const-trunc (arithmetic-shift (car ap) (* sign (car bp))) rt)
+ rt)))
+
(define (parse-const-shift ps)
- ;; Shift result type is the (promoted) lhs type — rhs is just a count.
- (let lp ((a (parse-const-add ps)))
- (let ((t (peek ps)))
- (cond
- ((and (eq? (tok-kind t) 'PUNCT)
- (or (eq? (tok-value t) 'shl) (eq? (tok-value t) 'shr)))
- (let* ((op (tok-value (advance ps)))
- (b (parse-const-add ps))
- (ap (%const-promote a))
- (bp (%const-promote b))
- (rt (cdr ap))
- (sh (car bp))
- (av (car ap))
- (raw (cond ((eq? op 'shl) (arithmetic-shift av sh))
- (else (arithmetic-shift av (- 0 sh))))))
- (lp (cons (%const-trunc raw rt) rt))))
- (else a)))))
+ (%const-binl ps parse-const-add
+ (list (cons 'shl (lambda (a b) (%const-shift-op 1 a b)))
+ (cons 'shr (lambda (a b) (%const-shift-op -1 a b))))))
(define (parse-const-add ps)
(%const-binl ps parse-const-mul
@@ -5016,16 +4909,7 @@
(define (%const-paren-is-cast? ps)
;; A '(' starts a cast iff the following token kicks off a type-name.
- (pmatch (peek2 ps)
- (($ tok? (kind KW) (value ,v))
- (or (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int)
- (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned) (eq? v '_Bool)
- (eq? v 'float) (eq? v 'double)
- (eq? v '_Complex) (eq? v '_Imaginary)
- (eq? v 'struct) (eq? v 'union) (eq? v 'enum)
- (eq? v 'const) (eq? v 'volatile) (eq? v 'restrict)))
- (($ tok? (kind IDENT) (value ,n)) (typedef? ps n))
- (else #f)))
+ (%tok-decl-start? ps (peek2 ps)))
(define (%ctype-int? ty)
(let ((k (ctype-kind ty)))
@@ -5084,19 +4968,26 @@
(cons (%const-sizeof-expr ps #f) %t-u64))))
(else (parse-const-primary ps)))))
-(define (%const-tok-is-decl? ps)
- (pmatch (peek ps)
+;; Does TOK begin a type-name? Type specifiers, qualifiers,
+;; struct/union/enum tags, and typedef-name idents. Storage classes
+;; (auto/register/static/extern/typedef) are NOT included — those
+;; appear only at declaration position; callers that need them
+;; (e.g. stmt-starts-decl?) check separately.
+(define (%tok-decl-start? ps t)
+ (pmatch t
(($ tok? (kind KW) (value ,v))
(or (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int)
(eq? v 'long) (eq? v 'signed) (eq? v 'unsigned)
(eq? v '_Bool) (eq? v 'float) (eq? v 'double)
(eq? v '_Complex) (eq? v '_Imaginary)
- (eq? v 'struct) (eq? v 'union)
- (eq? v 'enum) (eq? v 'const) (eq? v 'volatile)
- (eq? v 'restrict) (eq? v 'inline)))
+ (eq? v 'struct) (eq? v 'union) (eq? v 'enum)
+ (eq? v 'const) (eq? v 'volatile) (eq? v 'restrict)
+ (eq? v 'inline)))
(($ tok? (kind IDENT) (value ,n)) (typedef? ps n))
(else #f)))
+(define (%const-tok-is-decl? ps) (%tok-decl-start? ps (peek ps)))
+
(define (parse-const-primary ps)
(let ((t (peek ps)))
(pmatch t
@@ -5808,6 +5699,67 @@
(cond ((and (pair? ext) (pair? (cdr ext))) (car (cddr ext)))
(else (die #f "init: not a struct" ty)))))
+;; After processing a designated initializer for FNAME, return the
+;; field list with FNAME and all preceding (already-overwritten or
+;; skipped) fields removed. Empty list if FNAME isn't found (caller
+;; should already have validated the field exists).
+(define (%init-drop-thru-field fields fname)
+ (cond ((null? fields) '())
+ ((equal? (car (car fields)) fname) (cdr fields))
+ (else (%init-drop-thru-field (cdr fields) fname))))
+
+;; Element/field dispatch for global aggregate initializers. ELIDE? = #f
+;; means caller has just consumed `{` for this element and we own the
+;; matching `}`; ELIDE? = #t is C99 §6.7.8 ¶22 brace elision (the
+;; sub-aggregate draws items from the parent stream, no inner braces).
+;; Returns the piece-list contributing this element to the encoding.
+(define (%global-init-elem ps t elide?)
+ (let ((k (ctype-kind t)))
+ (cond
+ ((eq? k 'arr)
+ (let-values (((p _c) (cond
+ (elide? (%parse-init-array-list/mode ps t #f))
+ (else (%parse-init-array-list ps t)))))
+ p))
+ ((or (eq? k 'struct) (eq? k 'union))
+ (cond
+ (elide? (%parse-init-struct-list/mode ps t #f))
+ (else (%parse-init-struct-list ps t))))
+ (else
+ (let ((p (%const-init-piece ps t)))
+ (cond
+ (elide? (list p))
+ (else
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (expect-punct ps 'rbrace)
+ (list p))))))))
+
+;; Element/field dispatch for local aggregate initializers. Mirrors
+;; %global-init-elem but emits per-element store ops via cg-assign for
+;; scalar leaves, and recurses into the local-list walkers for
+;; aggregates. Returns 0; the side effect is the emitted code.
+(define (%local-init-elem ps sm eoff t elide?)
+ (let ((k (ctype-kind t)))
+ (cond
+ ((eq? k 'arr)
+ (cond
+ (elide? (%parse-init-local-array-list/mode ps sm eoff t #f))
+ (else (%parse-init-local-array-list ps sm eoff t))))
+ ((or (eq? k 'struct) (eq? k 'union))
+ (cond
+ (elide? (%parse-init-local-struct-list/mode ps sm eoff t #f))
+ (else (%parse-init-local-struct-list ps sm eoff t))))
+ (else
+ (%push-frame-elem-lval ps eoff t)
+ (parse-expr-bp ps 4) (rval! ps)
+ (cg-cast (ps-cg ps) t)
+ (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))
+ (cond
+ (elide? 0)
+ (else
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (expect-punct ps 'rbrace)))))))
+
(define (%pad-piece nbytes)
(make-bytevector nbytes 0))
@@ -5955,33 +5907,10 @@
(let ((p
(cond
((at-punct? ps 'lbrace)
- ;; Nested aggregate: brace-flatten via recursion.
(advance ps)
- ;; element is itself struct/array
- (cond
- ((eq? (ctype-kind elem) 'arr)
- (let-values (((p _c) (%parse-init-array-list ps elem)))
- p))
- ((or (eq? (ctype-kind elem) 'struct)
- (eq? (ctype-kind elem) 'union))
- (%parse-init-struct-list ps elem))
- (else
- (let ((p (%const-init-piece ps elem)))
- (cond ((at-punct? ps 'comma) (advance ps)))
- (expect-punct ps 'rbrace)
- (list p)))))
- ;; Brace elision: aggregate element with no `{`.
- ;; Draw items from the SAME parent stream deep
- ;; enough to fill `elem`. C99 §6.7.8 ¶22.
- ((eq? (ctype-kind elem) 'arr)
- (let-values (((p _c)
- (%parse-init-array-list/mode ps elem #f)))
- p))
- ((or (eq? (ctype-kind elem) 'struct)
- (eq? (ctype-kind elem) 'union))
- (%parse-init-struct-list/mode ps elem #f))
+ (%global-init-elem ps elem #f))
(else
- (list (%const-init-piece ps elem))))))
+ (%global-init-elem ps elem #t)))))
;; Inter-item comma: consume except for the comma
;; following our LAST item in no-brace mode — that
;; one belongs to the enclosing parent.
@@ -6111,37 +6040,13 @@
(cond
((at-punct? ps 'lbrace)
(advance ps)
- (cond
- ((eq? (ctype-kind fty) 'arr)
- (let-values (((p _c) (%parse-init-array-list ps fty)))
- p))
- ((or (eq? (ctype-kind fty) 'struct)
- (eq? (ctype-kind fty) 'union))
- (%parse-init-struct-list ps fty))
- (else
- (let ((p (%const-init-piece ps fty)))
- (cond ((at-punct? ps 'comma) (advance ps)))
- (expect-punct ps 'rbrace)
- (list p)))))
- ;; Brace elision for aggregate field with no `{`.
- ((eq? (ctype-kind fty) 'arr)
- (let-values (((p _c)
- (%parse-init-array-list/mode ps fty #f)))
- p))
- ((or (eq? (ctype-kind fty) 'struct)
- (eq? (ctype-kind fty) 'union))
- (%parse-init-struct-list/mode ps fty #f))
+ (%global-init-elem ps fty #f))
(else
- (list (%const-init-piece ps fty)))))
+ (%global-init-elem ps fty #t))))
(rest1
(cond
- (designated?
- ;; designated init: drop fields up to and including target
- (let drop ((xs fields))
- (cond
- ((null? xs) '())
- ((equal? (car (car xs)) fname) (cdr xs))
- (else (drop (cdr xs))))))
+ ;; designated init: drop fields up to and including target
+ (designated? (%init-drop-thru-field fields fname))
(else (cdr rest)))))
;; Inter-item comma: consume except for the comma after our
;; LAST field in no-brace mode (belongs to enclosing list).
@@ -6203,13 +6108,6 @@
(else (die #f "init local: brace on scalar?"))))
(else (die (tok-loc (peek ps)) "init local aggregate?"))))
-(define (%emit-local-elem-store ps sm rel-off elem-ty piece-or-thunk)
- ;; Emit a single scalar store at slot[base + rel-off]. piece is the
- ;; raw initializer expression — but here we want to actually evaluate
- ;; it via parse-expr to allow non-const expressions for autos.
- ;; Caller handles this; this helper handles the store-into-frame ops.
- 0)
-
(define (%push-frame-elem-lval ps base-off ty)
(cg-push (ps-cg ps) (%opnd 'frame ty base-off #t)))
@@ -6263,32 +6161,9 @@
(cond
((at-punct? ps 'lbrace)
(advance ps)
- (cond
- ((eq? (ctype-kind elem) 'arr)
- (%parse-init-local-array-list ps sm eoff elem))
- ((or (eq? (ctype-kind elem) 'struct)
- (eq? (ctype-kind elem) 'union))
- (%parse-init-local-struct-list ps sm eoff elem))
- (else
- (%push-frame-elem-lval ps eoff elem)
- (parse-expr-bp ps 4) (rval! ps)
- (cg-cast (ps-cg ps) elem)
- (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))
- (cond ((at-punct? ps 'comma) (advance ps)))
- (expect-punct ps 'rbrace))))
- ;; Brace elision: aggregate element with no `{`. Draw items
- ;; from the SAME parent stream deep enough to fill `elem`.
- ;; C99 §6.7.8 ¶22.
- ((eq? (ctype-kind elem) 'arr)
- (%parse-init-local-array-list/mode ps sm eoff elem #f))
- ((or (eq? (ctype-kind elem) 'struct)
- (eq? (ctype-kind elem) 'union))
- (%parse-init-local-struct-list/mode ps sm eoff elem #f))
+ (%local-init-elem ps sm eoff elem #f))
(else
- (%push-frame-elem-lval ps eoff elem)
- (parse-expr-bp ps 4) (rval! ps)
- (cg-cast (ps-cg ps) elem)
- (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))))
+ (%local-init-elem ps sm eoff elem #t)))
;; Inter-item comma: in no-brace mode, don't eat the comma
;; that follows our LAST item (it belongs to the parent).
(cond
@@ -6393,38 +6268,12 @@
(cond
((at-punct? ps 'lbrace)
(advance ps)
- (cond
- ((eq? (ctype-kind fty) 'arr)
- (%parse-init-local-array-list ps sm eoff fty))
- ((or (eq? (ctype-kind fty) 'struct)
- (eq? (ctype-kind fty) 'union))
- (%parse-init-local-struct-list ps sm eoff fty))
- (else
- (%push-frame-elem-lval ps eoff fty)
- (parse-expr-bp ps 4) (rval! ps)
- (cg-cast (ps-cg ps) fty)
- (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))
- (cond ((at-punct? ps 'comma) (advance ps)))
- (expect-punct ps 'rbrace))))
- ;; Brace elision for aggregate field with no `{`.
- ((eq? (ctype-kind fty) 'arr)
- (%parse-init-local-array-list/mode ps sm eoff fty #f))
- ((or (eq? (ctype-kind fty) 'struct)
- (eq? (ctype-kind fty) 'union))
- (%parse-init-local-struct-list/mode ps sm eoff fty #f))
+ (%local-init-elem ps sm eoff fty #f))
(else
- (%push-frame-elem-lval ps eoff fty)
- (parse-expr-bp ps 4) (rval! ps)
- (cg-cast (ps-cg ps) fty)
- (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))))
+ (%local-init-elem ps sm eoff fty #t)))
(let ((rest1
(cond
- (designated?
- (let drop ((xs fields))
- (cond
- ((null? xs) '())
- ((equal? (car (car xs)) fname) (cdr xs))
- (else (drop (cdr xs))))))
+ (designated? (%init-drop-thru-field fields fname))
(else (cdr rest)))))
;; Inter-item comma: in no-brace mode, don't eat the comma
;; that follows our LAST field (belongs to enclosing list).
@@ -6489,19 +6338,16 @@
(else (parse-expr-stmt ps))))))
(define (stmt-starts-decl? ps)
- (pmatch (peek ps)
- (($ tok? (kind KW) (value ,v))
- (or (eq? v 'auto) (eq? v 'register) (eq? v 'static)
- (eq? v 'extern) (eq? v 'typedef) (eq? v 'const)
- (eq? v 'volatile) (eq? v 'restrict) (eq? v 'inline)
- (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int)
- (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned)
- (eq? v '_Bool) (eq? v 'float) (eq? v 'double)
- (eq? v '_Complex) (eq? v '_Imaginary)
- (eq? v 'struct) (eq? v 'union)
- (eq? v 'enum)))
- (($ tok? (kind IDENT) (value ,n)) (typedef? ps n))
- (else #f)))
+ (let ((t (peek ps)))
+ (or (%tok-decl-start? ps t)
+ ;; Storage classes only appear at declaration position; check here
+ ;; rather than fold them into %tok-decl-start? (which is also
+ ;; used for cast typenames where storage classes are illegal).
+ (pmatch t
+ (($ tok? (kind KW) (value ,v))
+ (or (eq? v 'auto) (eq? v 'register) (eq? v 'static)
+ (eq? v 'extern) (eq? v 'typedef)))
+ (else #f)))))
(define (parse-local-decl ps)
(let-values (((sto b) (parse-decl-spec ps)))
@@ -6971,34 +6817,16 @@
(cg-push-imm (ps-cg ps) %t-u64 sz))))))
(else (parse-postfix ps))))
-(define (token-is-decl? ps)
- (pmatch (peek ps)
- (($ tok? (kind KW) (value ,v))
- (or (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int)
- (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned)
- (eq? v '_Bool) (eq? v 'float) (eq? v 'double)
- (eq? v '_Complex) (eq? v '_Imaginary)
- (eq? v 'struct) (eq? v 'union)
- (eq? v 'enum) (eq? v 'const) (eq? v 'volatile)
- (eq? v 'restrict) (eq? v 'inline)))
- (($ tok? (kind IDENT) (value ,n)) (typedef? ps n))
- (else #f)))
+(define (token-is-decl? ps) (%tok-decl-start? ps (peek ps)))
(define (parse-cast-or-unary ps)
- (pmatch (peek2 ps)
- (($ tok? (kind KW) (value ,v))
- (guard (or (eq? v 'void) (eq? v 'char) (eq? v 'short)
- (eq? v 'int) (eq? v 'long) (eq? v 'signed)
- (eq? v 'unsigned) (eq? v '_Bool)
- (eq? v 'float) (eq? v 'double)
- (eq? v '_Complex) (eq? v '_Imaginary)
- (eq? v 'struct) (eq? v 'union) (eq? v 'enum)
- (eq? v 'const) (eq? v 'volatile)
- (eq? v 'restrict)
- ;; A leading GNU attribute on the cast typename
- ;; (e.g. `((__attribute__((...)) int(*)(void))ptr)()`)
- ;; — eaten by parse-decl-spec.
- (eq? v '__attribute__)))
+ (cond
+ ((or (%tok-decl-start? ps (peek2 ps))
+ ;; A leading GNU attribute on the cast typename
+ ;; (e.g. `((__attribute__((...)) int(*)(void))ptr)()`) — eaten
+ ;; by parse-decl-spec along with the rest of the decl-spec.
+ (let ((t (peek2 ps)))
+ (and (eq? (tok-kind t) 'KW) (eq? (tok-value t) '__attribute__))))
(advance ps)
(let*-values (((_sto bty) (parse-decl-spec ps))
((_n ty) (parse-declarator ps bty)))
@@ -7007,8 +6835,7 @@
;; (T){ ... } — compound literal (C99 §6.5.2.5). Looks like a
;; cast at the typename level but disambiguates on the
;; following `{` and is a postfix lvalue, not a cast operator.
- ((at-punct? ps 'lbrace)
- (parse-compound-literal ps ty))
+ ((at-punct? ps 'lbrace) (parse-compound-literal ps ty))
(else
(parse-unary ps)
;; Cast operand undergoes lvalue conversion first (C semantics):
@@ -7016,19 +6843,6 @@
;; bit-casts the resulting rval to the target type.
(rval! ps)
(cg-cast (ps-cg ps) ty)))))
- (($ tok? (kind IDENT) (value ,n))
- (guard (typedef? ps n))
- (advance ps)
- (let*-values (((_sto bty) (parse-decl-spec ps))
- ((_n ty) (parse-declarator ps bty)))
- (expect-punct ps 'rparen)
- (cond
- ((at-punct? ps 'lbrace)
- (parse-compound-literal ps ty))
- (else
- (parse-unary ps)
- (rval! ps)
- (cg-cast (ps-cg ps) ty)))))
(else (advance ps) (parse-expr ps)
(expect-punct ps 'rparen)
(parse-postfix-rest ps))))
diff --git a/cc/cc.scm.md b/cc/cc.scm.md
@@ -2,7 +2,7 @@
## Overview
-`cc.scm` is a complete C compiler (7405 lines) written in Scheme (scheme1 dialect) that compiles C source to P1pp assembly. It implements a streaming pipeline: **lexer → preprocessor → parser → codegen**. Designed for minimal memory use with fixed pre-allocated buffers and a scratch/main heap discipline that resets per declaration. Targets the P1 64-bit RISC ISA via libp1pp macros; output is consumed directly by the M1pp expander and hex2++ assembler/linker (see [docs/M1PP.md](../docs/M1PP.md), [docs/HEX2pp.md](../docs/HEX2pp.md)).
+`cc.scm` is a complete C compiler (7219 lines) written in Scheme (scheme1 dialect) that compiles C source to P1pp assembly. It implements a streaming pipeline: **lexer → preprocessor → parser → codegen**. Designed for minimal memory use with fixed pre-allocated buffers and a scratch/main heap discipline that resets per declaration. Targets the P1 64-bit RISC ISA via libp1pp macros; output is consumed directly by the M1pp expander and hex2++ assembler/linker (see [docs/M1PP.md](../docs/M1PP.md), [docs/HEX2pp.md](../docs/HEX2pp.md)).
---
@@ -17,9 +17,9 @@
| Symbol Alphabets | ~530–~595 | Keyword and punctuator alists |
| Lexer | 596–1700 | Tokenizes C source; trigraph/splice, comments, escape sequences |
| Preprocessor | 1701–2540 | `#define`, `#if`, macro expansion with hide-sets; `pp-eval-cexpr` delegates to `parse-const-int` via `%pp-make-const-ps` |
-| Code Generator | 2541–4332 | P1pp assembly emission, vstack, frame allocation, all operators and control flow |
-| Parser | 4334–6800 | Recursive-descent + Pratt; declarations, statements, expressions; shared constant-expression evaluator |
-| Main Driver | 7372–7405 | CLI parsing, file I/O, pipeline initialization |
+| Code Generator | 2541–4280 | P1pp assembly emission, vstack, frame allocation, all operators and control flow |
+| Parser | 4282–7180 | Recursive-descent + Pratt; declarations, statements, expressions; shared constant-expression evaluator |
+| Main Driver | 7186–7219 | CLI parsing, file I/O, pipeline initialization |
---
@@ -122,39 +122,43 @@ Source file
| **2431–2540** | Paste operator: `%pp-paste-tokens`; string fusion: `%pp-maybe-fuse-str`; `#if` evaluator: `%pp-make-const-ps` (IO adapter wrapping token list as minimal pstate), `pp-eval-cexpr`, `%pp-resolve-defined`, `%pp-expand-line`, `%pp-idents-as-zero` |
| **2541–2640** | CG emission primitives: `%cg-emit-buf`, `%cg-emit`, `%cg-emit-many`, `%cg-fresh-label`, `%n` (number→bv) |
| **2641–2745** | CG metadata: `%cg-fn-set!/%cg-fn-get`; register/label helpers: `%cg-reg→bv`, `%cg-emit-li`, `%cg-emit-la`, slot-expr (`(+ %<fn>__SO N)` so the slot offset resolves through the per-fn `__SO` macro at M1pp time) |
-| **2745–2870** | Load/store emission: `%cg-emit-ld/st`, `%cg-emit-ld-slot-typed` (sign-extended sub-word loads), `%cg-emit-sext`, `%cg-spill-reg` |
-| **2871–3025** | Operand loading: `%cg-load-opnd-into` (imm/frame/global) — re-canonicalizes a frame rval against its type kind on load (sign- or zero-extend); vstack ops: `cg-push/pop/top/depth/dup`, snapshot/rewind for sizeof |
-| **3025–3170** | Materialize: `cg-push-imm`, `cg-push-string` (with intern), `cg-push-sym` (fn/enum/var/param), `cg-push-deref` (indirect-slot tracking) |
-| **3171–3360** | Aggregate access: `cg-push-field` with `%cg-find-field` (anonymous-member-aware lookup, shared with parser's offsetof), `cg-decay-array`; address/deref: `%cg-emit-addr-of`, `cg-copy-struct`, `cg-take-addr`, `cg-load` |
-| **3361–3540** | Type conversions: `cg-cast` (bool/ptr/widening/narrowing with sign-extend), `cg-promote`, `cg-arith-conv` |
-| **3541–3720** | Operators: `cg-binop` (pointer arithmetic scaling, comparison), `cg-unop` (neg/bnot/lnot), `cg-assign` (type coercion), post-inc/dec |
-| **3720–3850** | Function calls: `cg-call` (sret >16B struct return, arg staging a0–a3 + stack, variadic) |
-| **3850–3950** | Return: `cg-return` (void/scalar/struct via `%b(&.ret)` to the per-fn dotted local label); conditional: `cg-if`, `cg-ifelse`, `cg-ifelse-merge` (ternary/`&&`/`||`); `%cg-merge-arith-type` (C11 §6.5.15 result type for ternary merge) |
-| **3951–4090** | Loop control flow: `cg-loop` (opens nested `.scope` with `:.top`/`:.end`), `cg-break` / `cg-continue` (bare `%break`/`%continue` resolved by hex2++'s innermost-out scope walk); switch: `cg-switch-begin/case/default/end` (dotted case labels and dispatch table inside the switch's `.scope`) |
-| **3994–4090** | Variadic: `cg-va-start`, `cg-va-arg`, `cg-va-end`; labels/goto: `cg-emit-label`, `cg-goto` — user C labels emit as `cc__<fn>__user_<name>` global names so `goto` survives nested loop/switch scopes |
-| **4090–4332** | Globals/data: `cg-emit-global` (prefixes `.align <ctype-align>` for both `.data` and `.bss`), `cg-emit-extern`, tentatives, `cg-intern-string` (string pool with `.align 8` framing), `%cg-bv->hex-lines` (bare-hex chunked output for hex2++); frame: `cg-alloc-slot`; lifecycle: `cg-init`, `cg-fn-begin/v`, `cg-fn-end` (wraps body in `%fn(name, frame, { … })`), `cg-finish` |
-| **4334–4460** | Scope/tag ops: `scope-enter/leave`, `scope-bind/lookup`, `tag-bind/lookup`, `typedef?` |
-| **4460–4560** | Type compatibility: `ctype-compat?`, `%fn-ctype-compat?`, `%fn-params-compat?`; symbol merge: `sym-merge` (linkage inheritance) |
-| **4560–4660** | Type constructors: `%mk-ptr`, `%mk-arr`, `%mk-fn`; qualifier handling: `eat-cv-quals!`, `skip-gnu-attribute!`, `eat-gnu-attributes!` |
-| **4660–4710** | Declaration specifiers: `parse-decl-spec` (storage/type/signedness), `resolve-base` |
-| **4710–4760** | Aggregate parsing: `parse-aggregate-spec` (struct/union forward + complete), `parse-struct-fields` (union offset=0), `complete-agg!` (size/align/fields), `parse-enum-spec` |
-| **4700–4760** | Const-expr value helpers: `%const-trunc`, `%const-arith-conv`, `%const-arith-conv-type`, `%const-promote`, `%const-bool?` |
-| **4845–4900** | Const-expr binary-level infrastructure: `%const-binl` (generic left-associative loop), `%const-arith-op`, `%const-div-op`, `%const-cmp-op` |
-| **4762–5230** | Constant expression evaluator: `parse-const-expr` → `parse-const-cond` (ternary) → binary levels via `%const-binl` (lor/land/bor/bxor/band/eq/rel/add/mul) → `parse-const-shift` (inline; lhs-type-only) → `parse-const-cast` → `parse-const-unary` (sizeof, &, prefix ops) → `parse-const-primary` (INT/CHAR/paren/enum-const); `%const-sizeof-expr` (cg snapshot/rewind; guards against pp context) |
-| **5040–5230** | offsetof support: `%const-parse-addrof-postfix`, `%const-parse-addrof-primary` — recognizes `&((T*)0)->field` chains; reuses `%cg-find-field` |
-| **5230–5400** | `parse-const-int`; declarators: `parse-declarator`, `parse-decl-cont`, `parse-decl-suf-cont`, `parse-fn-params` |
-| **5400–5430** | Phase 3 promotion: `%promote-pending-completions`, `rewrite-pending-completions!`, `promote-roots!`, `promote-iter-buffers!` (main/scratch boundary) |
-| **5403–5535** | Translation unit: `parse-translation-unit` with `call-with-scratch-cycle` per decl; `parse-decl-or-fn` |
-| **5535–5860** | Declarations/definitions: `handle-decl` (typedef/fn/var/static/file-scope/block-scope with tentatives) |
-| **5864–6160** | Global initializers: `parse-init-global` (string/brace/scalar with inferred-length arrays), `%parse-init-array-list` with element promotion, `%parse-init-struct-list` with designated designators and padding |
-| **6165–6440** | Local initializers: `parse-init-local-aggregate` (string/brace), `%parse-init-local-array-list`, `%parse-init-local-struct-list` (zero-pass); compound literals as frame lvalues |
-| **6447–6470** | Function body: `parse-fn-body`, `%parse-fn-body-inner` (param binding, scope enter/leave) |
-| **6467–6760** | Statements: `parse-stmt` dispatch, `parse-cstmt`, `parse-if-stmt`, `parse-while-stmt`, `parse-do-stmt` (`.scope` with `:.body` / `:.top` for `continue`-to-cond semantics), `parse-for-stmt` (`.scope` with deferred condition/step), `parse-switch-stmt`, `parse-case-stmt`, `parse-default-stmt`, `parse-return-stmt`, `parse-goto-stmt`, `parse-labelled-stmt`, `parse-expr-stmt`, `parse-local-decl` |
-| **6767–6810** | `%binop-bp` — Pratt binding power table (comma=1, assign=4, `\|\|`=10, `&&`=20, bitwise=30–50, relational=60, shift=70, add=80, mul=90) |
-| **6810–7090** | Expression parser: `parse-expr` (`expr-bp(0)`), `parse-expr-bp` (Pratt climbing), `parse-binary-rhs` (comma/assign/compound-assign/ternary/logical/bitwise) |
-| **7089–7250** | Unary/cast/postfix: `parse-unary` (prefix ops, sizeof), `parse-cast-or-unary` (paren disambiguation), `parse-compound-literal`, `parse-postfix` (`[]`/call/`.`/`->`/post-inc/post-dec) |
-| **7152–7370** | Call parsing: `call-fn-type`, `parse-call-args` (param casting, variadic promotion); builtins: `parse-builtin-va-start/va-arg/va-end`; primary: `parse-primary` (literals/idents/strings/parens/enum-consts); rvalue: `rval!`, `rval-not-fn!` |
-| **7372–7405** | Driver: `%cc-slurp`, `%cc-write`, CLI flag parsing (`--cc-debug`, `--cc-trace-emit`, `--lib=PFX`), `%cc-initial-defines` (CCSCM sentinel), `cc-main` (pipeline init + `parse-translation-unit` + `cg-finish` + write) |
+| **2745–2810** | Load/store emission: `%cg-emit-ld/st`, sub-byte width helpers; `%cg-emit-sext`; `%cg-canonicalize` (kind-driven sext/zext that puts a register back in canonical 64-bit form for its ctype); `%cg-emit-{ld,st}-bv` (width-dispatch core for the typed/slot-typed load+store family) |
+| **2810–2860** | `%cg-emit-{ld,st}-{slot-,}typed` thin wrappers calling the `-bv` core; `%cg-spill-reg` |
+| **2860–3020** | Operand loading: `%cg-load-opnd-into` (imm/frame/global) — re-canonicalizes a frame rval against its type kind on load via `%cg-canonicalize`; vstack ops: `cg-push/pop/top/depth/dup`, snapshot/rewind for sizeof |
+| **3020–3170** | Materialize: `cg-push-imm`, `cg-push-string` (with intern), `cg-push-sym` (fn/enum/var/param), `cg-push-deref` (indirect-slot tracking) |
+| **3171–3360** | Aggregate access: `cg-push-field` with `%cg-find-field` (anonymous-member-aware lookup, shared with parser's offsetof), `cg-decay-array`; address/deref: `%cg-emit-addr-of` (handles lval-indirect-frame, direct-frame for both lval and rval, and global), `cg-copy-struct`, `cg-assign-struct`, `cg-take-addr`, `cg-load` |
+| **3361–3530** | Type conversions: `cg-cast` (bool/ptr/widening/narrowing — calls `%cg-canonicalize` on narrow targets), `cg-promote`, `cg-arith-conv` |
+| **3530–3700** | Operators: `cg-binop` (pointer arithmetic scaling, comparison; uses `%cg-canonicalize` for narrow-typed binop results), `cg-unop` (neg/bnot/lnot), `cg-assign` (type coercion), post-inc/dec |
+| **3700–3830** | Function calls: `cg-call` (sret >16B struct return, arg staging a0–a3 + stack, variadic) |
+| **3830–3910** | Return: `cg-return` (void/scalar/struct via `%b(&.ret)` to the per-fn dotted local label); conditional: `cg-if`, `cg-ifelse`, `cg-ifelse-merge` (ternary/`&&`/`||`); `%cg-merge-arith-type` (C11 §6.5.15 result type for ternary merge) |
+| **3910–4020** | Loop control flow: `cg-loop` (opens nested `.scope` with `:.top`/`:.end`), `cg-break` / `cg-continue` (bare `%break`/`%continue` resolved by hex2++'s innermost-out scope walk); switch: `cg-switch-begin/case/default/end` (dotted case labels and dispatch table inside the switch's `.scope`) |
+| **3950–4040** | Variadic: `cg-va-start`, `cg-va-arg` (ap-lvalue store/load through `%cg-emit-addr-of`), `cg-va-end`; labels/goto: `cg-emit-label`, `cg-goto` — user C labels emit as `cc__<fn>__user_<name>` global names so `goto` survives nested loop/switch scopes |
+| **4040–4280** | Globals/data: `cg-emit-global` (prefixes `.align <ctype-align>` for both `.data` and `.bss`), `cg-emit-extern`, tentatives, `cg-intern-string` (string pool with `.align 8` framing), `%cg-bv->hex-lines` (bare-hex chunked output for hex2++); frame: `cg-alloc-slot`; lifecycle: `cg-init`, `cg-fn-begin/v`, `cg-fn-end` (wraps body in `%fn(name, frame, { … })`), `cg-finish` |
+| **4282–4400** | Scope/tag ops: `scope-enter/leave`, `scope-bind/lookup`, `tag-bind/lookup`, `typedef?` |
+| **4400–4500** | Type compatibility: `ctype-compat?`, `%fn-ctype-compat?`, `%fn-params-compat?`; symbol merge: `sym-merge` (linkage inheritance) |
+| **4500–4600** | Type constructors: `%mk-ptr`, `%mk-arr`, `%mk-fn`; qualifier handling: `eat-cv-quals!`, `skip-gnu-attribute!`, `eat-gnu-attributes!` |
+| **4600–4660** | Declaration specifiers: `parse-decl-spec` (storage/type/signedness), `resolve-base` |
+| **4660–4720** | Aggregate parsing: `parse-aggregate-spec` (struct/union forward + complete), `parse-struct-fields` (union offset=0), `complete-agg!` (size/align/fields), `parse-enum-spec` |
+| **4660–4720** | Const-expr value helpers: `%const-trunc`, `%const-arith-conv`, `%const-arith-conv-type`, `%const-promote`, `%const-bool?` |
+| **4720–4760** | `%punct-scan` — generic top-level token scanner (paren/bracket depth + optional ternary-`?` depth) parameterised by stop predicate; `%const-skip-dead-arm` (unevaluated arm of `?:`); `%const-skip-{land,lor}-rhs` (short-circuit && / ||) |
+| **4760–4810** | Const-expr binary-level infrastructure: `%const-binl` (generic left-associative loop), `%const-arith-op`, `%const-div-op`, `%const-cmp-op`, `%const-shift-op` |
+| **4690–5120** | Constant expression evaluator: `parse-const-expr` → `parse-const-cond` (ternary) → binary levels via `%const-binl` (lor/land/bor/bxor/band/eq/rel/add/shift/mul) → `parse-const-cast` → `parse-const-unary` (sizeof, &, prefix ops) → `parse-const-primary` (INT/CHAR/paren/enum-const); `%const-sizeof-expr` (cg snapshot/rewind; guards against pp context) |
+| **4970–4985** | `%tok-decl-start?` — single canonical "does TOK begin a type-name?" predicate. Used by `%const-tok-is-decl?`, `%const-paren-is-cast?`, `token-is-decl?`, `stmt-starts-decl?` (which adds storage classes), and `parse-cast-or-unary` (which adds `__attribute__`) |
+| **4940–5120** | offsetof support: `%const-parse-addrof-postfix`, `%const-parse-addrof-primary` — recognizes `&((T*)0)->field` chains; reuses `%cg-find-field` |
+| **5120–5290** | `parse-const-int`; declarators: `parse-declarator`, `parse-decl-cont`, `parse-decl-suf-cont`, `parse-fn-params` |
+| **5290–5320** | Phase 3 promotion: `%promote-pending-completions`, `rewrite-pending-completions!`, `promote-roots!`, `promote-iter-buffers!` (main/scratch boundary) |
+| **5294–5420** | Translation unit: `parse-translation-unit` with `call-with-scratch-cycle` per decl; `parse-decl-or-fn` |
+| **5426–5705** | Declarations/definitions: `handle-decl` (typedef/fn/var/static/file-scope/block-scope with tentatives) |
+| **5706–5770** | Initializer support helpers: `%init-drop-thru-field` (designator drop), `%global-init-elem` / `%local-init-elem` (brace-vs-elision element/field dispatch shared by the four `parse-init-*-list/mode` walkers) |
+| **5816–6065** | Global initializers: `parse-init-global` (string/brace/scalar with inferred-length arrays), `%parse-init-array-list` with element promotion, `%parse-init-struct-list` with designated designators and padding |
+| **6070–6290** | Local initializers: `parse-init-local-aggregate` (string/brace), `%parse-init-local-array-list`, `%parse-init-local-struct-list` (zero-pass); compound literals as frame lvalues |
+| **6296–6320** | Function body: `parse-fn-body`, `%parse-fn-body-inner` (param binding, scope enter/leave) |
+| **6316–6610** | Statements: `parse-stmt` dispatch, `parse-cstmt`, `parse-if-stmt`, `parse-while-stmt`, `parse-do-stmt` (`.scope` with `:.body` / `:.top` for `continue`-to-cond semantics), `parse-for-stmt` (`.scope` with deferred condition/step), `parse-switch-stmt`, `parse-case-stmt`, `parse-default-stmt`, `parse-return-stmt`, `parse-goto-stmt`, `parse-labelled-stmt`, `parse-expr-stmt`, `parse-local-decl` |
+| **6613–6655** | `%binop-bp` — Pratt binding power table (comma=1, assign=4, `\|\|`=10, `&&`=20, bitwise=30–50, relational=60, shift=70, add=80, mul=90) |
+| **6656–6900** | Expression parser: `parse-expr` (`expr-bp(0)`), `parse-expr-bp` (Pratt climbing), `parse-binary-rhs` (comma/assign/compound-assign/ternary/logical/bitwise) |
+| **6903–7065** | Unary/cast/postfix: `parse-unary` (prefix ops, sizeof), `parse-cast-or-unary` (paren disambiguation via `%tok-decl-start?` + `__attribute__` check), `parse-compound-literal`, `parse-postfix` (`[]`/call/`.`/`->`/post-inc/post-dec) |
+| **6965–7180** | Call parsing: `call-fn-type`, `parse-call-args` (param casting, variadic promotion); builtins: `parse-builtin-va-start/va-arg/va-end`; primary: `parse-primary` (literals/idents/strings/parens/enum-consts); rvalue: `rval!`, `rval-not-fn!` |
+| **7186–7219** | Driver: `%cc-slurp`, `%cc-write`, CLI flag parsing (`--cc-debug`, `--cc-trace-emit`, `--lib=PFX`), `%cc-initial-defines` (CCSCM sentinel), `cc-main` (pipeline init + `parse-translation-unit` + `cg-finish` + write) |
---
@@ -165,13 +169,17 @@ Source file
- **Heap discipline** — scratch heap reset at declaration boundaries via `call-with-scratch-cycle`; live roots deep-copied to main heap before reset
- **Vstack-based codegen** — expression evaluation pushes/pops `opnd` records; values optionally spilled to frame slots
- **Macro hide-sets** — `tok` carries hide set to prevent recursive expansion (C11 §6.10.3.4)
-- **Shared constant-expression evaluator** — `parse-const-*` serves both the parser (typed, with sizeof/cast/offsetof) and the preprocessor `#if` evaluator (`%pp-make-const-ps` wraps a token list as a minimal pstate with empty scope and `ps-cg = #f`); `%const-binl` provides the generic left-associative binary level pattern
-- **Sign-extension discipline** — narrow types (i8/i16/i32) stored as canonical 64-bit forms via shli/sari; widening casts are relabel-only. Frame rval loads (`%cg-load-opnd-into`) re-canonicalize against the opnd's type kind so a relabel-only cast (e.g. via `cg-arith-conv`) reads correctly downstream.
+- **Shared constant-expression evaluator** — `parse-const-*` serves both the parser (typed, with sizeof/cast/offsetof) and the preprocessor `#if` evaluator (`%pp-make-const-ps` wraps a token list as a minimal pstate with empty scope and `ps-cg = #f`); `%const-binl` is the generic left-associative binary-level pattern, fed by combiners (`%const-arith-op`, `%const-div-op`, `%const-cmp-op`, `%const-shift-op`) for every level from `||` down to `*` / `/` / `%`
+- **Sign-extension discipline** — narrow types (i8/i16/i32) stored as canonical 64-bit forms via shli/sari; widening casts are relabel-only. `%cg-canonicalize` centralises kind-driven sext/zext and is called from `%cg-load-opnd-into` (frame-rval load), `cg-cast` (narrowing), and `cg-binop` (narrow-typed result), so a relabel-only cast (e.g. via `cg-arith-conv`) reads correctly downstream.
- **Sret (struct return)** — structs >16B use indirect result: caller passes pointer in `a0`
-- **Variadic ABI** — 16 contiguous 8-byte slots; args 0–3 from `a`-regs, 4+ from `LDARG`
+- **Variadic ABI** — 16 contiguous 8-byte slots; args 0–3 from `a`-regs, 4+ from `LDARG`. `cg-va-start` / `cg-va-arg` route ap-lvalue stores/loads through `%cg-emit-addr-of`.
- **Tentative definitions** — collected in `world-tentatives`; emitted as `.bss` only if no full definition appears by TU end
- **FP softening** — float/double types parsed and sized per SysV ABI but all FP ops emit integer bitpattern operations
-- **M1pp + hex2++ output** — bodies are wrapped in libp1pp's `%fn(name, frame, { … })` (which opens a hex2++ `.scope` and emits `%enter`/`%eret`); compiler-internal labels (`:.ret`, loop `:.top`/`:.end`, switch `:.lbl_N`) are dotted scope-locals resolved by hex2++'s innermost-out scope walk; `%break` / `%continue` resolve through the same walk to the nearest enclosing scoped loop. User C labels use a `cc__<fn>__user_<name>` global mangling so `goto` is unaffected by nested scopes (C labels have function scope, not block).
-- **Alignment via `.align`** — `cg-emit-global` emits `.align <ctype-align>` before every `.data` or `.bss` symbol; `cg-intern-string` brackets each pooled string with `.align 8` so a non-multiple-of-4 string doesn't misalign the next instruction on aarch64. Intra-struct field padding stays as inline zero bytes (the offsets are constant relative to the aligned struct start, so a `.align` directive there would be redundant).
-- **Bare-hex string emission** — string pool and `(label-ref . LBL)` initializer pieces emit as bare hex chunks (≤64 bytes / 128 hex chars per line) consumed directly by hex2++; cc.scm no longer produces M0-style quoted-text literals.
-- **Ternary common type** — `cg-ifelse-merge` runs `%cg-merge-arith-type` over both arms after they emit, so the result `opnd` carries the C11 §6.5.15 common type rather than the first arm's type. The slot stores the raw 8-byte payload; `%cg-load-opnd-into` re-canonicalizes against whichever common type was picked. `&&`/`||` callers pre-cast both arms to `%t-i32` so the merge is a no-op for them.
+- **M1pp + hex2++ output** — bodies are wrapped in libp1pp's `%fn(name, frame, { … })`, which opens a hex2++ `.scope` and emits `%enter`/`%eret`. Compiler-internal labels (`:.ret`, loop `:.top`/`:.end`, switch `:.lbl_N`) are dotted scope-locals resolved by hex2++'s innermost-out scope walk; `%break` / `%continue` resolve through the same walk to the nearest enclosing scoped loop. User C labels use `cc__<fn>__user_<name>` global mangling so `goto` is unaffected by nested scopes (C labels have function scope, not block).
+- **Alignment via `.align`** — `cg-emit-global` emits `.align <ctype-align>` before every `.data` or `.bss` symbol; `cg-intern-string` brackets each pooled string with `.align 8` so a non-multiple-of-4 string doesn't misalign the next instruction on aarch64. Intra-struct field padding is inline zero bytes — offsets are constant relative to the aligned struct start, so a `.align` directive there would be redundant.
+- **Bare-hex string emission** — string pool and `(label-ref . LBL)` initializer pieces emit as bare hex chunks (≤64 bytes / 128 hex chars per line) consumed directly by hex2++.
+- **Ternary common type** — `cg-ifelse-merge` runs `%cg-merge-arith-type` over both arms after they emit; the result `opnd` carries the C11 §6.5.15 common type. The slot stores the raw 8-byte payload; `%cg-load-opnd-into` re-canonicalizes against whichever common type was picked. `&&`/`||` callers pre-cast both arms to `%t-i32`, so the merge is a no-op for them.
+- **Single type-name predicate** — every "does this token start a type-name?" check runs through `%tok-decl-start?` (`%const-tok-is-decl?`, `%const-paren-is-cast?`, `token-is-decl?`, the cast-or-unary disambiguator, and `stmt-starts-decl?` which adds storage classes).
+- **Shared bracket scanner** — `%punct-scan` is the one paren/bracket-depth walker, parameterised by stop predicate and an optional ternary-`?` tracking flag. All const-expr "skip dead arm" / "skip short-circuited rhs" helpers route through it.
+- **One core ld/st helper** — `%cg-emit-{ld,st}-bv` is the shared body behind both the slot-typed (base = `sp`, off rendered through `%cg-slot-expr`) and typed (explicit base register, raw int off via `%n`) variants. Width dispatch lives in one place; the four wrappers are 1-line trampolines.
+- **Aggregate-init dispatch** — `%global-init-elem` and `%local-init-elem`, each parameterised by an `elide?` flag, drive the brace-vs-elision element/field decision for all four `parse-init-*-list/mode` walkers.