boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

cc.scm (313483B)


      1 ;; cc/util.scm — leaf helpers. Depends only on the scheme1 prelude.
      2 
      3 ;; --------------------------------------------------------------------
      4 ;; bytevector helpers (scheme1 strings ARE bytevectors)
      5 ;; --------------------------------------------------------------------
      6 (define (bv= a b) (bytevector=? a b))
      7 
      8 (define (bv-prefix? p s)
      9   ;; Is s a bv that starts with the bytes of p?
     10   (let ((plen (bytevector-length p))
     11         (slen (bytevector-length s)))
     12     (if (< slen plen)
     13         #f
     14         (let loop ((i 0))
     15           (cond ((= i plen) #t)
     16                 ((= (bytevector-u8-ref p i) (bytevector-u8-ref s i))
     17                  (loop (+ i 1)))
     18                 (else #f))))))
     19 
     20 (define (bv-find bv byte from)
     21   ;; Linear scan for the first byte == `byte` at index >= from.
     22   ;; Returns the index, or #f if not found.
     23   (let ((n (bytevector-length bv)))
     24     (let loop ((i from))
     25       (cond ((>= i n) #f)
     26             ((= (bytevector-u8-ref bv i) byte) i)
     27             (else (loop (+ i 1)))))))
     28 
     29 (define (bv-slice bv start end)
     30   ;; Fresh copy of bytes in [start, end). bytevector-copy already does
     31   ;; this in scheme1 (3-arg form returns a fresh bv).
     32   (bytevector-copy bv start end))
     33 
     34 (define (bv-of-byte b) (make-bytevector 1 b))
     35 
     36 (define (bv-cat lst-of-bv)
     37   ;; Concat a list of bytevectors with one allocation. bytevector-append
     38   ;; is variadic, so apply does this in a single linear pass.
     39   (apply bytevector-append lst-of-bv))
     40 
     41 (define (bv->fixnum bv radix)
     42   ;; (values ok? val) — #t/n on parse, #f/#f on fail.
     43   ;; string->number is pure and returns #f on parse failure.
     44   (let ((n (string->number bv radix)))
     45     (if n (values #t n) (values #f #f))))
     46 
     47 (define (fixnum->bv n radix) (number->string n radix))
     48 
     49 ;; --------------------------------------------------------------------
     50 ;; lists / alists
     51 ;; --------------------------------------------------------------------
     52 (define (alist-ref    key al) (let ((p (assoc key al))) (and p (cdr p))))
     53 (define (alist-ref/eq key al) (let ((p (assq  key al))) (and p (cdr p))))
     54 
     55 (define (alist-set key val al) (cons (cons key val) al))
     56 
     57 (define (alist-update key f al)
     58   ;; Functional update by equal? key. If found, replace its value with
     59   ;; (f old-val). If not found, prepend (cons key (f #f)) so callers
     60   ;; can use this as upsert-with-default.
     61   (let loop ((xs al) (acc '()))
     62     (cond ((null? xs)
     63            (cons (cons key (f #f)) (reverse acc)))
     64           ((equal? (car (car xs)) key)
     65            (append (reverse acc)
     66                    (cons (cons key (f (cdr (car xs))))
     67                          (cdr xs))))
     68           (else (loop (cdr xs) (cons (car xs) acc))))))
     69 
     70 (define (any p xs)
     71   (cond ((null? xs) #f)
     72         ((p (car xs)) #t)
     73         (else (any p (cdr xs)))))
     74 
     75 (define (every p xs)
     76   (cond ((null? xs) #t)
     77         ((p (car xs)) (every p (cdr xs)))
     78         (else #f)))
     79 
     80 (define (count p xs)
     81   (let loop ((xs xs) (n 0))
     82     (cond ((null? xs) n)
     83           ((p (car xs)) (loop (cdr xs) (+ n 1)))
     84           (else (loop (cdr xs) n)))))
     85 
     86 ;; --------------------------------------------------------------------
     87 ;; ints
     88 ;; --------------------------------------------------------------------
     89 (define (min3 a b c)                 (min a (min b c)))
     90 (define (align-up n k)
     91   ;; round n up to the nearest multiple of k (k must be a power of 2)
     92   (let ((mask (- k 1)))
     93     (bit-and (+ n mask) (bit-not mask))))
     94 
     95 ;; --------------------------------------------------------------------
     96 ;; output buffer (fixed-size pre-allocated byte storage)
     97 ;;
     98 ;; Every buf owns one bytevector of `cap` bytes, plus a write `offset`.
     99 ;; buf-push! is bytevector-copy! into storage — zero allocation per
    100 ;; push, no chunks list to chase. This is what makes per-function
    101 ;; heap-mark/heap-rewind! safe in cg: the destination buf is fixed-
    102 ;; storage (allocated once, lives pre-mark), so byte-level mutations
    103 ;; survive a rewind that discards the parse/cg scratch.
    104 ;;
    105 ;; Sizing knobs live in one place so they're easy to tune as inputs
    106 ;; grow. cg-init picks per-buf caps; the per-fn bufs are reused
    107 ;; across functions (reset, not re-allocated).
    108 ;; --------------------------------------------------------------------
    109 
    110 ;; Tuning constants — total fixed pre-allocation ≈ 12.27 MiB on a
    111 ;; 64 MiB heap. Bump these when a workload overflows; the buf-overflow
    112 ;; die() reports off/len/cap so misses are easy to diagnose.
    113 ;;
    114 ;; Each cap is a power of two. scheme1's bv_capacity_for rounds the
    115 ;; requested length up to the smallest power of two ≥ n, so asking for
    116 ;; 2^k bytes consumes exactly 2^k of heap.
    117 (define %BUF-CAP-TEXT     8388608)   ; 8 MiB:   .text + entry stub
    118 (define %BUF-CAP-DATA     2097152)   ; 2 MiB:   .data (strings, globals)
    119 (define %BUF-CAP-BSS      2097152)   ; 2 MiB:   .bss
    120 (define %BUF-CAP-FN       262144)    ; 256 KiB: per-fn body asm
    121 (define %BUF-CAP-PROLOGUE 16384)     ; 16 KiB:  per-fn prologue
    122 (define %BUF-CAP-DEFAULT  65536)     ; 64 KiB:  make-buf fallback
    123 
    124 (define-record-type buf
    125   (%buf storage offset cap)
    126   buf?
    127   (storage buf-storage)                     ; bv: pre-allocated, never resized
    128   (offset  buf-offset  buf-offset-set!)     ; fixnum: bytes written so far
    129   (cap     buf-cap))                        ; fixnum: storage capacity
    130 
    131 (define (make-buf/cap cap)
    132   (%buf (make-bytevector cap 0) 0 cap))
    133 
    134 (define (make-buf) (make-buf/cap %BUF-CAP-DEFAULT))
    135 
    136 (define (buf-push! b bv)
    137   (let* ((n      (bytevector-length bv))
    138          (off    (buf-offset b))
    139          (newoff (+ off n)))
    140     (cond
    141       ((> newoff (buf-cap b))
    142        (die #f "buf overflow" off n (buf-cap b))))
    143     (bytevector-copy! (buf-storage b) off bv 0 n)
    144     (buf-offset-set! b newoff)))
    145 
    146 (define (buf-flush b)
    147   ;; Snapshot the used prefix as a fresh bv. One allocation; the
    148   ;; underlying storage is unchanged.
    149   (bytevector-copy (buf-storage b) 0 (buf-offset b)))
    150 
    151 (define (buf-reset! b) (buf-offset-set! b 0))
    152 
    153 (define (buf-drain! dst src)
    154   ;; Copy src's used bytes into dst at dst's current write head; reset
    155   ;; src to empty. dst and src must be distinct bufs.
    156   (let* ((slen   (buf-offset src))
    157          (doff   (buf-offset dst))
    158          (newoff (+ doff slen)))
    159     (cond
    160       ((> newoff (buf-cap dst))
    161        (die #f "buf-drain overflow" doff slen (buf-cap dst))))
    162     (bytevector-copy! (buf-storage dst) doff (buf-storage src) 0 slen)
    163     (buf-offset-set! dst newoff)
    164     (buf-offset-set! src 0)))
    165 
    166 ;; --------------------------------------------------------------------
    167 ;; diagnostics + I/O
    168 ;; --------------------------------------------------------------------
    169 (define (die loc msg . irritants)
    170   ;; Format:
    171   ;;   <file>:<line>:<col>: error: <msg>: <irritant> <irritant> ...
    172   ;; When loc is #f, the "<file>:<line>:<col>: " prefix is omitted.
    173   ;; irritants are written via display semantics (no quoting); format's
    174   ;; ~a handles bv/fixnum/pair/symbol the same way display does.
    175   ;;
    176   ;; All output is built into a single bv and sent to fd 2 with one
    177   ;; sys-write loop, so a partial write doesn't interleave fragments
    178   ;; from a concurrent process.
    179   (let* ((prefix (if loc
    180                      (format "~a:~d:~d: error: "
    181                              (loc-file loc) (loc-line loc) (loc-col loc))
    182                      "error: "))
    183          (head (bytevector-append prefix (format "~a" msg)))
    184          ;; Irritants get ": " before the first and " " between the rest.
    185          (tail (if (null? irritants)
    186                    (list NL-BV)
    187                    (let walk ((xs irritants) (sep ": ") (acc '()))
    188                      (if (null? xs)
    189                          (reverse (cons NL-BV acc))
    190                          (walk (cdr xs)
    191                                " "
    192                                (cons (format "~a" (car xs))
    193                                      (cons sep acc)))))))
    194          (out (bv-cat (cons head tail))))
    195     (write-bv-fd 2 out)
    196     (sys-exit 1)))
    197 
    198 (define (slurp-fd fd)
    199   ;; Read fd to EOF. Uses BUFSIZE chunks (same constant the prelude's
    200   ;; port layer uses); bv-concat-reverse builds the result in one
    201   ;; allocation so a multi-MB tcc.c stays linear.
    202   (let ((buf (make-bytevector BUFSIZE)))
    203     (let loop ((acc '()))
    204       (let ((r (sys-read fd buf 0 BUFSIZE)))
    205         (cond ((not (car r))
    206                (die #f "slurp-fd: sys-read failed" (cdr r)))
    207               ((zero? (cdr r))
    208                (bv-concat-reverse acc))
    209               (else
    210                (loop (cons (bytevector-copy buf 0 (cdr r)) acc))))))))
    211 
    212 (define (write-bv-fd fd bv)
    213   ;; Full write or die. sys-write may write fewer bytes than requested;
    214   ;; advance the offset and retry the unwritten tail.
    215   ;;
    216   ;; On failure we sys-exit directly instead of routing through `die`
    217   ;; — `die` itself uses write-bv-fd, so a write failure to fd 2 must
    218   ;; not recurse infinitely. Status 1 matches the contract for `die`.
    219   (let ((len (bytevector-length bv)))
    220     (let loop ((off 0))
    221       (if (= off len)
    222           #t
    223           (let ((r (sys-write fd bv off (- len off))))
    224             (cond ((not (car r))      (sys-exit 1))
    225                   ((zero? (cdr r))    (sys-exit 1))
    226                   (else (loop (+ off (cdr r))))))))))
    227 
    228 ;; --------------------------------------------------------------------
    229 ;; debug logging
    230 ;;
    231 ;; Cheap sticky on/off: the cc compiler is single-threaded and short-
    232 ;; lived, so a top-level mutable flag is fine. Toggle via
    233 ;; (debug-log-on!) / (debug-log-off!). When on, (debug-log msg . irr)
    234 ;; writes one line to fd 2 in the same display-style format as `die`,
    235 ;; but doesn't abort. The intent is to trace heap usage between cc
    236 ;; phases (lex/pp/parse/cg-finish) without compile-time conditionals.
    237 ;; --------------------------------------------------------------------
    238 (define %debug-log-enabled #f)
    239 (define (debug-log-on!)  (set! %debug-log-enabled #t))
    240 (define (debug-log-off!) (set! %debug-log-enabled #f))
    241 (define (debug-log? )    %debug-log-enabled)
    242 
    243 ;; --cc-trace-emit: if on, cg-fn-end injects a `%trace(MANGLED)` line
    244 ;; at the top of each emitted function body (right after the prologue's
    245 ;; argument-spill, so the macro is free to clobber a0..a3). Pairs with
    246 ;; libp1pp's %trace macro + libp1pp__trace runtime helper to produce a
    247 ;; stderr line per function entry, with the runtime address of the
    248 ;; first body instruction. See P1/P1pp.P1pp's "Tracepoint" section.
    249 (define %trace-emit-enabled #f)
    250 (define (trace-emit-on!)  (set! %trace-emit-enabled #t))
    251 (define (trace-emit-off!) (set! %trace-emit-enabled #f))
    252 (define (trace-emit?)     %trace-emit-enabled)
    253 
    254 (define (debug-log msg . irritants)
    255   (cond
    256     (%debug-log-enabled
    257      (let* ((head (bytevector-append "[cc] " (format "~a" msg)))
    258             (tail (if (null? irritants)
    259                       (list NL-BV)
    260                       (let walk ((xs irritants) (sep ": ") (acc '()))
    261                         (if (null? xs)
    262                             (reverse (cons NL-BV acc))
    263                             (walk (cdr xs)
    264                                   " "
    265                                   (cons (format "~a" (car xs))
    266                                         (cons sep acc)))))))
    267             (out (bv-cat (cons head tail))))
    268        (write-bv-fd 2 out)))
    269     (else #t)))
    270 
    271 ;; --------------------------------------------------------------------
    272 ;; fresh-name generator (used for cg label counters, etc.)
    273 ;; --------------------------------------------------------------------
    274 (define (make-namer prefix)
    275   ;; Returns a thunk; each call yields prefix0, prefix1, ... as a fresh
    276   ;; bv. The counter lives in the closure's lexical environment; scheme1
    277   ;; closures heap-capture by reference, so set! on ctr is sticky.
    278   (let ((ctr 0))
    279     (lambda ()
    280       (let ((s (bytevector-append prefix (number->string ctr 10))))
    281         (set! ctr (+ ctr 1))
    282         s))))
    283 ;; cc/data.scm — record types and symbol alphabets shared across modules.
    284 
    285 ;; --------------------------------------------------------------------
    286 ;; loc — source location for diagnostics
    287 ;; --------------------------------------------------------------------
    288 (define-record-type loc
    289   (%loc file line col)
    290   loc?
    291   (file loc-file)            ; bv
    292   (line loc-line)            ; fixnum
    293   (col  loc-col))            ; fixnum
    294 
    295 ;; --------------------------------------------------------------------
    296 ;; tok — lexer token.
    297 ;; --------------------------------------------------------------------
    298 (define-record-type tok
    299   (%tok kind value loc hide)
    300   tok?
    301   (kind  tok-kind)           ; IDENT | INT | STR | CHAR | KW | PUNCT
    302                              ; | NL | HASH | EOF
    303   (value tok-value)          ; bv | fixnum | symbol | #f
    304   (loc   tok-loc)            ; loc
    305   (hide  tok-hide))          ; list of bv (macro names already expanded)
    306 
    307 (define (make-tok kind value loc)
    308   (%tok kind value loc '()))
    309 
    310 ;; --------------------------------------------------------------------
    311 ;; macro — preprocessor macro definition
    312 ;; --------------------------------------------------------------------
    313 (define-record-type macro
    314   (%macro kind params body)
    315   macro?
    316   (kind   macro-kind)        ; 'obj | 'fn | 'fn-vararg
    317   (params macro-params)      ; list of bv
    318   (body   macro-body))       ; list of tok
    319 
    320 ;; --------------------------------------------------------------------
    321 ;; ctype — C type.
    322 ;;
    323 ;; size/align/ext mutate only on forward struct/union completion (see
    324 ;; complete-agg!). Every other ctype is constructed in its final shape
    325 ;; and treated as immutable thereafter.
    326 ;; --------------------------------------------------------------------
    327 (define-record-type ctype
    328   (%ctype kind size align ext)
    329   ctype?
    330   (kind  ctype-kind)
    331   (size  ctype-size  ctype-size-set!)
    332   (align ctype-align ctype-align-set!)
    333   (ext   ctype-ext   ctype-ext-set!))
    334 
    335 ;; Interned primitive ctypes. Equality is eq?.
    336 (define %t-void  (%ctype 'void  -1 -1 #f))
    337 (define %t-i8    (%ctype 'i8     1  1 #f))
    338 (define %t-u8    (%ctype 'u8     1  1 #f))
    339 (define %t-i16   (%ctype 'i16    2  2 #f))
    340 (define %t-u16   (%ctype 'u16    2  2 #f))
    341 (define %t-i32   (%ctype 'i32    4  4 #f))
    342 (define %t-u32   (%ctype 'u32    4  4 #f))
    343 (define %t-i64   (%ctype 'i64    8  8 #f))
    344 (define %t-u64   (%ctype 'u64    8  8 #f))
    345 (define %t-bool  (%ctype 'bool   1  1 #f))
    346 ;; Floating-point ctypes are parsed but never codegen'd; see CC.md §Cut.
    347 ;; Sizes/aligns match the SysV ABI so struct layout containing fp fields
    348 ;; works even when the cg refuses to emit fp ops.
    349 (define %t-flt   (%ctype 'flt    4  4 #f))
    350 (define %t-dbl   (%ctype 'dbl    8  8 #f))
    351 (define %t-ldbl  (%ctype 'ldbl   8  8 #f))
    352 
    353 ;; --------------------------------------------------------------------
    354 ;; sym — declared identifier (function, variable, typedef, …)
    355 ;; defined? distinguishes a forward declaration (extern fn proto, extern
    356 ;; var) from a definition (fn body, var with initializer, tentative def
    357 ;; without `extern`). scope-bind! merges compatible decls; only two
    358 ;; defined? syms with the same name fire a redefinition error.
    359 ;;
    360 ;; sym is immutable — no `sym-*-set!` accessor exists. scope-bind!'s
    361 ;; merge logic constructs a fresh sym rather than mutating in place.
    362 ;; Promotion (Phase 3 of CC-SCRATCH) relies on this: a deep-copied
    363 ;; sym in main heap is guaranteed structurally identical to its
    364 ;; scratch original.
    365 ;; --------------------------------------------------------------------
    366 (define-record-type sym
    367   (%sym name kind storage type slot defined?)
    368   sym?
    369   (name     sym-name)         ; bv
    370   (kind     sym-kind)         ; symbol from §1.7
    371   (storage  sym-storage)      ; symbol from §1.8 or #f
    372   (type     sym-type)         ; ctype
    373   (slot     sym-slot)         ; fixnum (auto local / param / enum-const value)
    374                               ; | #f (fn / global var / typedef)
    375   (defined? sym-defined?))    ; #t = definition, #f = decl-only
    376 
    377 ;; --------------------------------------------------------------------
    378 ;; opnd — operand on cg's vstack.
    379 ;; --------------------------------------------------------------------
    380 (define-record-type opnd
    381   (%opnd kind type ext lval?)
    382   opnd?
    383   (kind  opnd-kind)
    384   (type  opnd-type)
    385   (ext   opnd-ext)
    386   (lval? opnd-lval?))
    387 
    388 ;; --------------------------------------------------------------------
    389 ;; loop-ctx — entry on parser's loop/switch context stack.
    390 ;; --------------------------------------------------------------------
    391 (define-record-type loop-ctx
    392   (%loop-ctx kind tag has-continue?)
    393   loop-ctx?
    394   (kind          loop-ctx-kind)
    395   (tag           loop-ctx-tag)
    396   (has-continue? loop-ctx-has-continue?))
    397 
    398 ;; --------------------------------------------------------------------
    399 ;; fn-ctx — current-function context inside the parser.
    400 ;; --------------------------------------------------------------------
    401 (define-record-type fn-ctx
    402   (%fn-ctx name return-type params variadic? labels)
    403   fn-ctx?
    404   (name        fn-ctx-name)
    405   (return-type fn-ctx-return-type)
    406   (params      fn-ctx-params)
    407   (variadic?   fn-ctx-variadic?)
    408   (labels      fn-ctx-labels      fn-ctx-labels-set!))
    409 
    410 ;; --------------------------------------------------------------------
    411 ;; world — cross-decl persistent parser/cg state. The same world record
    412 ;; is shared by pstate and cg so its slots — scope (var/typedef
    413 ;; bindings), tags (struct/union/enum tags), str-pool (interned string
    414 ;; literals), tentatives (file-scope tentative defs awaiting end-of-TU
    415 ;; BSS emission) — can be reasoned about as one boundary contract.
    416 ;; Phase 3's promote walkers deep-copy from this single root.
    417 ;; --------------------------------------------------------------------
    418 (define-record-type world
    419   (%world scope tags str-pool tentatives)
    420   world?
    421   (scope      world-scope      world-scope-set!)
    422   (tags       world-tags       world-tags-set!)
    423   (str-pool   world-str-pool   world-str-pool-set!)
    424   (tentatives world-tentatives world-tentatives-set!))
    425 
    426 (define (make-world)
    427   (%world (list '()) (list '()) '() '()))
    428 
    429 ;; --------------------------------------------------------------------
    430 ;; pstate — parser state. Owned by parse.scm; read-only to cg.
    431 ;; --------------------------------------------------------------------
    432 ;; iter holds a tok-iter (typically a pp-iter chained over a lex-iter).
    433 ;; peek / peek2 / advance go through iter-peek / iter-peek2 / iter-next
    434 ;; so the parser pulls one token at a time, with no full materialized
    435 ;; token list.
    436 (define-record-type pstate
    437   (%pstate iter world loops fn-ctx cg)
    438   pstate?
    439   (iter   ps-iter   ps-iter-set!)
    440   (world  ps-world)
    441   (loops  ps-loops  ps-loops-set!)
    442   (fn-ctx ps-fn-ctx ps-fn-ctx-set!)
    443   (cg     ps-cg))
    444 
    445 (define (ps-scope ps)        (world-scope (ps-world ps)))
    446 (define (ps-scope-set! ps v) (world-scope-set! (ps-world ps) v))
    447 (define (ps-tags ps)         (world-tags (ps-world ps)))
    448 (define (ps-tags-set! ps v)  (world-tags-set! (ps-world ps) v))
    449 
    450 ;; --------------------------------------------------------------------
    451 ;; cg — codegen state. Owned by cg.scm.
    452 ;; --------------------------------------------------------------------
    453 ;; fn-buf and prologue-buf are pre-allocated (cg-init) and reused across
    454 ;; functions — cg-fn-begin/v calls buf-reset! on them, cg-fn-end drains
    455 ;; them into cg-text via buf-drain!. No per-fn allocation, which lets
    456 ;; the parse-decl-or-fn boundary (Phase 3, scratch heap) discard
    457 ;; everything the body allocated wholesale — fixed-storage byte writes
    458 ;; survive scratch reset because the buf storage was allocated in main.
    459 ;;
    460 ;; in-fn? discriminates "currently inside a function body" so
    461 ;; %cg-emit-buf can route emits to fn-buf during the body and cg-text
    462 ;; outside it (entry stub, etc.).
    463 ;;
    464 ;; cg-fn-meta: transient per-function state (fn-name, ret-slot, ret-type,
    465 ;; vararg-first-slot, indirect-slots, switch-case lists, ...). Reset on
    466 ;; cg-fn-begin/v; reads via %cg-fn-get / writes via %cg-fn-set!.
    467 ;; lib? / str-prefix encode the --lib=PFX flag from cc-main:
    468 ;;   #f / ""        — exec mode (default): cg-finish emits the
    469 ;;                    p1_main entry stub and trailing :ELF_end, and
    470 ;;                    cg-intern-string labels strings cc__str_N.
    471 ;;   #t / "<pfx>"   — library mode: skip the stub and :ELF_end so the
    472 ;;                    output catm's into a larger TU, and label strings
    473 ;;                    <pfx>cc__str_N so two cc.scm outputs in the same
    474 ;;                    link don't collide on cc__str_0..N.
    475 (define-record-type cg
    476   (%cg text data bss vstack frame-hi label-ctr world fn-meta fn-buf prologue-buf max-outgoing in-fn? lib? str-prefix)
    477   cg?
    478   (text         cg-text)
    479   (data         cg-data)
    480   (bss          cg-bss)
    481   (vstack       cg-vstack       cg-vstack-set!)
    482   (frame-hi     cg-frame-hi     cg-frame-hi-set!)
    483   (label-ctr    cg-label-ctr    cg-label-ctr-set!)
    484   (world        cg-world)
    485   (fn-meta      cg-fn-meta      cg-fn-meta-set!)
    486   (fn-buf       cg-fn-buf)
    487   (prologue-buf cg-prologue-buf)
    488   (max-outgoing cg-max-outgoing cg-max-outgoing-set!)
    489   (in-fn?       cg-in-fn?       cg-in-fn?-set!)
    490   (lib?         cg-lib?)
    491   (str-prefix   cg-str-prefix))
    492 
    493 (define (cg-str-pool cg)        (world-str-pool (cg-world cg)))
    494 (define (cg-str-pool-set! cg v) (world-str-pool-set! (cg-world cg) v))
    495 
    496 ;; ctype predicates used by both cg and parser.
    497 (define (%ctype-ptr? t)
    498   (let ((k (ctype-kind t)))
    499     (if (eq? k 'ptr) #t (eq? k 'arr))))
    500 
    501 (define (%ctype-pointee t)
    502   (cond ((eq? (ctype-kind t) 'ptr) (ctype-ext t))
    503         ((eq? (ctype-kind t) 'arr) (car (ctype-ext t)))
    504         (else #f)))
    505 
    506 (define (%ctype-unsigned? t)
    507   (let ((k (ctype-kind t)))
    508     (cond ((eq? k 'u8) #t) ((eq? k 'u16) #t) ((eq? k 'u32) #t)
    509           ((eq? k 'u64) #t) ((eq? k 'bool) #t)
    510           ((eq? k 'ptr) #t) ((eq? k 'arr) #t) ((eq? k 'fn) #t)
    511           (else #f))))
    512 
    513 (define (%ctype-arith? t)
    514   (let ((k (ctype-kind t)))
    515     (cond ((eq? k 'i8) #t) ((eq? k 'i16) #t) ((eq? k 'i32) #t)
    516           ((eq? k 'i64) #t) ((eq? k 'u8) #t) ((eq? k 'u16) #t)
    517           ((eq? k 'u32) #t) ((eq? k 'u64) #t) ((eq? k 'bool) #t)
    518           (else #f))))
    519 
    520 (define (%ctype-fp? t)
    521   (let ((k (ctype-kind t)))
    522     (cond ((eq? k 'flt) #t) ((eq? k 'dbl) #t) ((eq? k 'ldbl) #t)
    523           (else #f))))
    524 
    525 ;; --------------------------------------------------------------------
    526 ;; Symbol alphabets — canonical alists.
    527 ;; --------------------------------------------------------------------
    528 
    529 ;; Keyword bytevector → keyword symbol.
    530 (define %keyword-alist
    531   '(;; storage
    532     ("auto" . auto) ("register" . register) ("static" . static)
    533     ("extern" . extern) ("typedef" . typedef)
    534     ;; qualifiers (parsed and discarded by parse)
    535     ("const" . const) ("volatile" . volatile) ("restrict" . restrict)
    536     ("inline" . inline)
    537     ;; GNU attribute spec — parsed and discarded; see skip-gnu-attribute!
    538     ("__attribute__" . __attribute__)
    539     ;; type specifiers
    540     ("void" . void) ("char" . char) ("short" . short)
    541     ("int" . int) ("long" . long)
    542     ("signed" . signed) ("unsigned" . unsigned) ("_Bool" . _Bool)
    543     ;; rejected type specifiers (KW so diagnostics are crisp)
    544     ("float" . float) ("double" . double)
    545     ;; aggregates
    546     ("struct" . struct) ("union" . union) ("enum" . enum)
    547     ;; statements
    548     ("if" . if) ("else" . else)
    549     ("while" . while) ("do" . do) ("for" . for)
    550     ("switch" . switch) ("case" . case) ("default" . default)
    551     ("break" . break) ("continue" . continue)
    552     ("return" . return) ("goto" . goto)
    553     ;; operators
    554     ("sizeof" . sizeof)
    555     ;; reserved-and-rejected (KW so diagnostics are crisp)
    556     ("_Generic" . _Generic) ("_Atomic" . _Atomic)
    557     ("_Thread_local" . _Thread_local)
    558     ("_Alignof" . _Alignof) ("_Alignas" . _Alignas)
    559     ("_Static_assert" . _Static_assert)
    560     ("_Complex" . _Complex) ("_Imaginary" . _Imaginary)))
    561 
    562 ;; Punctuator bytevector → punct symbol.
    563 ;; Listed longest-match-first; the lexer scans this list in order.
    564 ;; Digraphs (<: :> <% %> %: %:%:) lex to their standard equivalents.
    565 (define %punct-alist
    566   '(;; 4-byte
    567     ("%:%:" . paste)
    568     ;; 3-byte
    569     ("..." . ellipsis) ("<<=" . shl-eq) (">>=" . shr-eq)
    570     ;; 2-byte
    571     ("##" . paste) ("->" . arrow)
    572     ("++" . inc) ("--" . dec)
    573     ("<<" . shl) (">>" . shr)
    574     ("<=" . le) (">=" . ge) ("==" . eq2) ("!=" . ne)
    575     ("&&" . land) ("||" . lor)
    576     ("+=" . plus-eq) ("-=" . minus-eq) ("*=" . star-eq)
    577     ("/=" . slash-eq) ("%=" . pct-eq)
    578     ("&=" . amp-eq) ("^=" . caret-eq) ("|=" . bar-eq)
    579     ;; digraphs (mapped to the standard equivalent symbol)
    580     ("<:" . lbrack) (":>" . rbrack)
    581     ("<%" . lbrace) ("%>" . rbrace) ("%:" . hash)
    582     ;; 1-byte
    583     ("[" . lbrack) ("]" . rbrack)
    584     ("(" . lparen) (")" . rparen)
    585     ("{" . lbrace) ("}" . rbrace)
    586     ("." . dot) ("," . comma) (";" . semi) (":" . colon) ("?" . qmark)
    587     ("+" . plus) ("-" . minus) ("*" . star) ("/" . slash) ("%" . pct)
    588     ("&" . amp) ("|" . bar) ("^" . caret) ("~" . tilde) ("!" . bang)
    589     ("<" . lt) (">" . gt) ("=" . assign)
    590     ("#" . hash)))
    591 ;; cc/lex.scm — bytestream → token list. Pure function; no I/O,
    592 ;; no macro awareness.
    593 ;;
    594 ;; The lexer walks `src` byte-by-byte, threading (pos, line, col)
    595 ;; explicitly through every helper (no mutable state). Each token
    596 ;; captures its starting loc; helpers return (tok npos nline ncol).
    597 ;; Trigraphs and `\<newline>` line splicing are handled via a single
    598 ;; logical-byte primitive `%lex-peek`: it advances over splices and
    599 ;; translates trigraphs in-place, so downstream code only ever sees
    600 ;; the "translation phase 2" stream. Comments are stripped at the
    601 ;; same level as whitespace. NL tokens are emitted at every physical
    602 ;; newline so pp can use them to terminate directives.
    603 ;;
    604 ;; Heap discipline (per tests/scheme1/093-heap-mark-rewind.scm):
    605 ;; token-producing helpers wrap their inner work in call-with-heap-
    606 ;; rewind. Slots that must survive the rewind (start-loc and the
    607 ;; integer holders for npos/nline/ncol) are bound by an outer let
    608 ;; *before* the call-with-heap-rewind invocation, so the let's env
    609 ;; extensions live below the mark. The byte-run scanners' tail-call
    610 ;; env frames and any %lex-peek 4-lists are above the mark and get
    611 ;; reclaimed. For helpers that produce a fresh bytevector (ident,
    612 ;; string), the bv is allocated between the two calls so it persists
    613 ;; into the parent arena. Numeric digit runs accumulate inline via
    614 ;; %accum-int-while.
    615 ;;
    616 ;; %lex-iter-pull wraps each token-emitting iteration in an outer
    617 ;; call-with-heap-rewind. The helper allocates its own tok+loc+bv
    618 ;; above this outer mark; the driver reads the scalar fields and
    619 ;; copies any bv contents into %lex-scratch (sticky, pre-mark) before
    620 ;; the wrapper rewinds. Post-rewind it rebuilds a fresh tok+loc and a
    621 ;; fresh bv (for IDENT/STR) sized to the actual content.
    622 
    623 ;; --------------------------------------------------------------------
    624 ;; Cross-rewind transport for IDENT / STR bv values.
    625 ;;
    626 ;; %lex-scratch is a single sticky bytevector allocated below any
    627 ;; lex-tokenize heap-mark. The driver copies bv data here *before* the
    628 ;; rewind, then post-rewind allocates a fresh bv (sized exactly to the
    629 ;; ident/string content) by copying back out of scratch. The whole
    630 ;; lex run shares this one buffer.
    631 ;;
    632 ;; Why scratch, not a deduplicating intern pool: under scheme1's
    633 ;; interpreter, walking a cons-list pool per lookup costs ~50–150 B
    634 ;; per step in bind_params/eval_args/named-let env-extension
    635 ;; overhead. Even 16-way bucketing has the walk cost outpace the
    636 ;; bv-allocation savings until scheme1 grows a vector primitive
    637 ;; (an O(1) bucket lookup without interpreter overhead).
    638 ;; --------------------------------------------------------------------
    639 (define %lex-scratch-cap 65536)
    640 (define %lex-scratch (make-bytevector %lex-scratch-cap 0))
    641 
    642 (define (%lex-init!) #t)
    643 
    644 (define (%lex-scratch<- bv len)
    645   (cond ((> len %lex-scratch-cap)
    646          (die #f "lex: token exceeds scratch cap" len)))
    647   (let loop ((i 0))
    648     (cond ((< i len)
    649            (bytevector-u8-set! %lex-scratch i (bytevector-u8-ref bv i))
    650            (loop (+ i 1))))))
    651 
    652 (define (%lex-scratch->bv len)
    653   ;; Allocate a fresh bv (exact size) and copy scratch[0..len) into it.
    654   (let ((bv (make-bytevector len 0)))
    655     (let copy ((i 0))
    656       (cond ((< i len)
    657              (bytevector-u8-set! bv i (bytevector-u8-ref %lex-scratch i))
    658              (copy (+ i 1)))))
    659     bv))
    660 
    661 ;; --------------------------------------------------------------------
    662 ;; Byte-class predicates (raw u8 values, not chars).
    663 ;; --------------------------------------------------------------------
    664 (define (%digit? b)        (if (< b 48) #f (if (< 57 b) #f #t)))     ; '0'..'9'
    665 (define (%hex? b)
    666   (cond ((%digit? b) #t)
    667         ((if (< b 65) #f (if (< 70 b) #f #t)) #t)                    ; 'A'..'F'
    668         ((if (< b 97) #f (if (< 102 b) #f #t)) #t)                   ; 'a'..'f'
    669         (else #f)))
    670 (define (%octal? b)        (if (< b 48) #f (if (< 55 b) #f #t)))     ; '0'..'7'
    671 (define (%alpha? b)
    672   (cond ((if (< b 65) #f (if (< 90 b) #f #t)) #t)                    ; 'A'..'Z'
    673         ((if (< b 97) #f (if (< 122 b) #f #t)) #t)                   ; 'a'..'z'
    674         (else #f)))
    675 (define (%ident-start? b)  (or (%alpha? b) (= b 95)))                ; '_'
    676 (define (%ident-cont?  b)  (or (%ident-start? b) (%digit? b)))
    677 (define (%hspace? b)       (or (= b 32) (= b 9) (= b 11) (= b 12)))  ; SP TAB VT FF
    678 (define (%newline? b)      (= b 10))                                 ; '\n'
    679 
    680 ;; --------------------------------------------------------------------
    681 ;; Logical byte access. %lex-peek returns
    682 ;;   (byte npos nline ncol)
    683 ;; where (npos, nline, ncol) points *just past* the consumed physical
    684 ;; bytes. On EOF it returns (#f pos line col).
    685 ;;
    686 ;; Two transformations folded in here:
    687 ;;
    688 ;;   - Trigraphs:  ??=  ??(  ??/  ??)  ??'  ??<  ??!  ??>  ??-
    689 ;;                  #    [    \    ]    ^    {    |    }    ~
    690 ;;     The pair `??` followed by one of the nine trigraph completers
    691 ;;     produces the translated byte and advances 3 source bytes.
    692 ;;   - Line splice: a backslash immediately followed by `\n` is removed
    693 ;;     as a unit (incrementing line, resetting col to 1) and we recurse
    694 ;;     to fetch the next logical byte.
    695 ;;
    696 ;; Other escapes (e.g. `\<not-newline>`) are returned as-is — string and
    697 ;; char literals do their own escape-handling.
    698 ;; --------------------------------------------------------------------
    699 (define (%trigraph-byte b)
    700   ;; Map the third trigraph byte to its replacement, or #f.
    701   (cond ((= b 61) 35)   ; '=' -> '#'
    702         ((= b 40) 91)   ; '(' -> '['
    703         ((= b 47) 92)   ; '/' -> '\\'
    704         ((= b 41) 93)   ; ')' -> ']'
    705         ((= b 39) 94)   ; '\'' -> '^'
    706         ((= b 60) 123)  ; '<' -> '{'
    707         ((= b 33) 124)  ; '!' -> '|'
    708         ((= b 62) 125)  ; '>' -> '}'
    709         ((= b 45) 126)  ; '-' -> '~'
    710         (else #f)))
    711 
    712 (define (%lex-peek src pos line col)
    713   (let ((n (bytevector-length src)))
    714     (cond
    715       ((>= pos n) (list #f pos line col))
    716       (else
    717        (let ((b (bytevector-u8-ref src pos)))
    718          (cond
    719            ;; Trigraph: ?? + completer
    720            ((and (= b 63)
    721                  (< (+ pos 2) n)
    722                  (= (bytevector-u8-ref src (+ pos 1)) 63))
    723             (let ((tr (%trigraph-byte (bytevector-u8-ref src (+ pos 2)))))
    724               (if tr
    725                   (list tr (+ pos 3) line (+ col 3))
    726                   (list b (+ pos 1) line (+ col 1)))))
    727            ;; Line splice: backslash + newline (consume both, no token)
    728            ((and (= b 92)
    729                  (< (+ pos 1) n)
    730                  (= (bytevector-u8-ref src (+ pos 1)) 10))
    731             (%lex-peek src (+ pos 2) (+ line 1) 1))
    732            ;; Newline: pass through but caller decides line/col bump
    733            ((%newline? b)
    734             (list b (+ pos 1) (+ line 1) 1))
    735            (else
    736             (list b (+ pos 1) line (+ col 1)))))))))
    737 
    738 ;; Convenience accessors over the 4-list.
    739 (define (%pk-byte p)  (car p))
    740 (define (%pk-pos  p)  (car (cdr p)))
    741 (define (%pk-line p)  (car (cdr (cdr p))))
    742 (define (%pk-col  p)  (car (cdr (cdr (cdr p)))))
    743 
    744 ;; Fast-byte test. When (%fast-byte? b) is #t, reading b directly with
    745 ;; bytevector-u8-ref is exactly equivalent to %lex-peek's result: the
    746 ;; logical byte is b, npos = pos+1, nline unchanged, ncol = col+1, and
    747 ;; no list allocation is needed. Excludes the three bytes that %lex-peek
    748 ;; can transform: '?' (trigraph), '\\' (line splice), '\n' (line bump).
    749 (define (%fast-byte? b)
    750   (cond ((= b 63) #f)
    751         ((= b 92) #f)
    752         ((= b 10) #f)
    753         (else #t)))
    754 
    755 ;; --------------------------------------------------------------------
    756 ;; Whitespace + comment skipper.  Returns (pos line col).
    757 ;; Handles spaces/tabs, // line comments, /* block */ comments. Does
    758 ;; *not* consume `\n` — newlines are tokens.
    759 ;; --------------------------------------------------------------------
    760 (define (%skip-ws-and-comments src pos line col file)
    761   (let ((n (bytevector-length src)))
    762     (cond
    763       ((>= pos n) (list pos line col))
    764       (else
    765        (let ((b (bytevector-u8-ref src pos)))
    766          (cond
    767            ((and (%fast-byte? b) (%hspace? b))
    768             (%skip-ws-and-comments src (+ pos 1) line (+ col 1) file))
    769            ((%fast-byte? b)
    770             ;; Fast-byte that isn't hspace. Only '/' is interesting;
    771             ;; everything else terminates the skip.
    772             (cond
    773               ((= b 47) (%maybe-comment src pos line col file))
    774               (else (list pos line col))))
    775            (else
    776             ;; Slow path: trigraph / splice / newline.
    777             (let* ((p (%lex-peek src pos line col))
    778                    (b2 (%pk-byte p)))
    779               (cond
    780                 ((not b2) (list pos line col))
    781                 ((%hspace? b2)
    782                  (%skip-ws-and-comments src (%pk-pos p) (%pk-line p) (%pk-col p)
    783                                         file))
    784                 ((= b2 47) (%maybe-comment src pos line col file))
    785                 (else (list pos line col)))))))))))
    786 
    787 (define (%maybe-comment src pos line col file)
    788   ;; Source byte at pos resolves to '/'. Decide between // line comment,
    789   ;; /* block comment, or "leave the slash alone" (it's a punctuator).
    790   (let* ((p (%lex-peek src pos line col))
    791          (q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
    792          (b2 (%pk-byte q)))
    793     (cond
    794       ((and b2 (= b2 47))
    795        (%skip-line-comment src (%pk-pos q) (%pk-line q) (%pk-col q) file))
    796       ((and b2 (= b2 42))
    797        (%skip-block-comment src (%pk-pos q) (%pk-line q) (%pk-col q)
    798                             file line col))
    799       (else (list pos line col)))))
    800 
    801 (define (%skip-line-comment src pos line col file)
    802   ;; Consume bytes until end-of-stream or until we *see* '\n' (do not
    803   ;; consume the newline itself; outer loop emits the NL).
    804   (let ((n (bytevector-length src)))
    805     (cond
    806       ((>= pos n) (%skip-ws-and-comments src pos line col file))
    807       (else
    808        (let ((b (bytevector-u8-ref src pos)))
    809          (cond
    810            ;; '\n' terminates without consuming.
    811            ((= b 10) (%skip-ws-and-comments src pos line col file))
    812            ((%fast-byte? b)
    813             (%skip-line-comment src (+ pos 1) line (+ col 1) file))
    814            (else
    815             ;; Slow path: ?/\ — let %lex-peek handle trigraph/splice.
    816             (let* ((p (%lex-peek src pos line col))
    817                    (b2 (%pk-byte p)))
    818               (cond
    819                 ((not b2) (%skip-ws-and-comments src pos line col file))
    820                 ((%newline? b2) (%skip-ws-and-comments src pos line col file))
    821                 (else
    822                  (%skip-line-comment src (%pk-pos p) (%pk-line p) (%pk-col p)
    823                                      file)))))))))))
    824 
    825 (define (%skip-block-comment src pos line col file start-line start-col)
    826   (let ((n (bytevector-length src)))
    827     (cond
    828       ((>= pos n)
    829        (die (%loc file start-line start-col)
    830             "unterminated /* block comment"))
    831       (else
    832        (let ((b (bytevector-u8-ref src pos)))
    833          (cond
    834            ;; Fast path for plain content bytes that aren't '*'.
    835            ((and (%fast-byte? b) (not (= b 42)))
    836             (%skip-block-comment src (+ pos 1) line (+ col 1)
    837                                  file start-line start-col))
    838            (else
    839             ;; Slow path: '*', '\n', '?' (trigraph), '\\' (splice).
    840             (let* ((p (%lex-peek src pos line col))
    841                    (b1 (%pk-byte p)))
    842               (cond
    843                 ((not b1)
    844                  (die (%loc file start-line start-col)
    845                       "unterminated /* block comment"))
    846                 ((= b1 42)
    847                  (let* ((q  (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
    848                         (b2 (%pk-byte q)))
    849                    (cond
    850                      ((not b2)
    851                       (die (%loc file start-line start-col)
    852                            "unterminated /* block comment"))
    853                      ((= b2 47)
    854                       (%skip-ws-and-comments src (%pk-pos q) (%pk-line q) (%pk-col q)
    855                                              file))
    856                      (else
    857                       ;; Re-scan starting at the byte after '*'; the '*' was
    858                       ;; not the closer, but the next byte might itself be '*'.
    859                       (%skip-block-comment src (%pk-pos p) (%pk-line p) (%pk-col p)
    860                                            file start-line start-col)))))
    861                 (else
    862                  (%skip-block-comment src (%pk-pos p) (%pk-line p) (%pk-col p)
    863                                       file start-line start-col)))))))))))
    864 
    865 ;; --------------------------------------------------------------------
    866 ;; Byte-run scanners.
    867 ;;
    868 ;; Tail-recursive walkers used by ident/number/string readers. None
    869 ;; allocate per scanned byte on the fast path (only %lex-peek 4-lists
    870 ;; on trigraph/splice/newline); the per-iteration env frames allocated
    871 ;; by tail recursion are reclaimed by the caller's heap-rewind!.
    872 ;;
    873 ;; - %scan-while:    count bytes that satisfy pred. (count npos nline ncol)
    874 ;; - %fill-while-bv: write matching bytes into a pre-sized bv.
    875 ;; - %accum-int-while: accumulate a base-N integer over digit bytes.
    876 ;;     (val count npos nline ncol)
    877 ;; - %accum-octal-bounded: same, but stops after k digits.
    878 ;; --------------------------------------------------------------------
    879 (define (%scan-while pred src pos line col)
    880   (let ((n (bytevector-length src)))
    881     (let loop ((pos pos) (line line) (col col) (cnt 0))
    882       (cond
    883         ((>= pos n) (list cnt pos line col))
    884         (else
    885          (let ((b (bytevector-u8-ref src pos)))
    886            (cond
    887              ((%fast-byte? b)
    888               (if (pred b)
    889                   (loop (+ pos 1) line (+ col 1) (+ cnt 1))
    890                   (list cnt pos line col)))
    891              (else
    892               (let* ((p (%lex-peek src pos line col))
    893                      (b2 (%pk-byte p)))
    894                 (if (and b2 (pred b2))
    895                     (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ cnt 1))
    896                     (list cnt pos line col)))))))))))
    897 
    898 (define (%fill-while-bv pred src pos line col bv idx)
    899   (let ((n (bytevector-length src)))
    900     (let loop ((pos pos) (line line) (col col) (idx idx))
    901       (cond
    902         ((>= pos n) idx)
    903         (else
    904          (let ((b (bytevector-u8-ref src pos)))
    905            (cond
    906              ((%fast-byte? b)
    907               (cond
    908                 ((pred b)
    909                  (bytevector-u8-set! bv idx b)
    910                  (loop (+ pos 1) line (+ col 1) (+ idx 1)))
    911                 (else idx)))
    912              (else
    913               (let* ((p (%lex-peek src pos line col))
    914                      (b2 (%pk-byte p)))
    915                 (cond
    916                   ((and b2 (pred b2))
    917                    (bytevector-u8-set! bv idx b2)
    918                    (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1)))
    919                   (else idx)))))))))))
    920 
    921 (define (%digit-val-byte b)
    922   ;; ASCII digit byte → integer value. Caller guarantees b is a valid
    923   ;; digit in the relevant base (0-9 / 0-7 / 0-9a-fA-F).
    924   (cond ((%digit? b) (- b 48))
    925         ((if (< b 65) #f (if (< 70 b) #f #t)) (+ (- b 65) 10))
    926         ((if (< b 97) #f (if (< 102 b) #f #t)) (+ (- b 97) 10))
    927         (else 0)))
    928 
    929 (define (%accum-int-while pred src pos line col base)
    930   (let ((n (bytevector-length src)))
    931     (let loop ((pos pos) (line line) (col col) (val 0) (cnt 0))
    932       (cond
    933         ((>= pos n) (list val cnt pos line col))
    934         (else
    935          (let ((b (bytevector-u8-ref src pos)))
    936            (cond
    937              ((%fast-byte? b)
    938               (if (pred b)
    939                   (loop (+ pos 1) line (+ col 1)
    940                         (+ (* val base) (%digit-val-byte b)) (+ cnt 1))
    941                   (list val cnt pos line col)))
    942              (else
    943               (let* ((p (%lex-peek src pos line col))
    944                      (b2 (%pk-byte p)))
    945                 (if (and b2 (pred b2))
    946                     (loop (%pk-pos p) (%pk-line p) (%pk-col p)
    947                           (+ (* val base) (%digit-val-byte b2)) (+ cnt 1))
    948                     (list val cnt pos line col)))))))))))
    949 
    950 (define (%accum-octal-bounded src pos line col k)
    951   ;; Up to k octal digits. Returns (val count npos nline ncol).
    952   (let ((n (bytevector-length src)))
    953     (let loop ((pos pos) (line line) (col col) (k k) (val 0) (cnt 0))
    954       (cond
    955         ((zero? k) (list val cnt pos line col))
    956         ((>= pos n) (list val cnt pos line col))
    957         (else
    958          (let ((b (bytevector-u8-ref src pos)))
    959            (cond
    960              ((%fast-byte? b)
    961               (if (%octal? b)
    962                   (loop (+ pos 1) line (+ col 1) (- k 1)
    963                         (+ (* val 8) (- b 48)) (+ cnt 1))
    964                   (list val cnt pos line col)))
    965              (else
    966               (let* ((p (%lex-peek src pos line col))
    967                      (b2 (%pk-byte p)))
    968                 (if (and b2 (%octal? b2))
    969                     (loop (%pk-pos p) (%pk-line p) (%pk-col p) (- k 1)
    970                           (+ (* val 8) (- b2 48)) (+ cnt 1))
    971                     (list val cnt pos line col)))))))))))
    972 
    973 ;; --------------------------------------------------------------------
    974 ;; Identifier / keyword reader.
    975 ;;
    976 ;; Returns (tok npos nline ncol). Caller has already verified that the
    977 ;; first byte at `pos` satisfies %ident-start?.
    978 ;;
    979 ;; Two-pass with call-with-heap-rewind: pass 1 (%scan-while) sizes the
    980 ;; run, then between the two calls we allocate `name` bv so it survives
    981 ;; the second rewind, then pass 2 (%fill-while-bv) writes into it. The
    982 ;; integer slots count/npos/nline/ncol are bound by the outer let so
    983 ;; they survive both rewinds.
    984 ;; --------------------------------------------------------------------
    985 (define (lex-read-ident src pos file)
    986   ;; Public for tests. Threads line/col from a fresh start.
    987   (%lex-read-ident src pos 1 (+ pos 1) file))
    988 
    989 (define (%lex-read-ident src pos line col file)
    990   (let ((start-loc (%loc file line col))
    991         (count 0) (npos 0) (nline 0) (ncol 0))
    992     (call-with-heap-rewind
    993       (lambda ()
    994         (let ((sres (%scan-while %ident-cont? src pos line col)))
    995           (set! count (car sres))
    996           (set! npos  (car (cdr sres)))
    997           (set! nline (car (cdr (cdr sres))))
    998           (set! ncol  (car (cdr (cdr (cdr sres))))))))
    999     (let ((name (make-bytevector count 0)))
   1000       (call-with-heap-rewind
   1001         (lambda ()
   1002           (%fill-while-bv %ident-cont? src pos line col name 0)))
   1003       (let ((kw (alist-ref name %keyword-alist)))
   1004         (cons (if kw
   1005                   (make-tok 'KW kw start-loc)
   1006                   (make-tok 'IDENT name start-loc))
   1007               (list npos nline ncol))))))
   1008 
   1009 ;; --------------------------------------------------------------------
   1010 ;; Number reader.
   1011 ;;
   1012 ;; Decimal: [1-9][0-9]*  (suffix: u U l L ll LL combinations)
   1013 ;; Hex:     0x[0-9a-fA-F]+ | 0X...
   1014 ;; Octal:   0[0-7]*
   1015 ;; Float:   anything looking like 1.0, 1e3, .5 → die crisply.
   1016 ;;
   1017 ;; Returns (tok npos nline ncol) on success. Aborts via `die` on float.
   1018 ;;
   1019 ;; %accum-int-while folds digit collection and value computation into
   1020 ;; one walk — no per-byte cons cells, no separate digits-list pass.
   1021 ;; --------------------------------------------------------------------
   1022 (define (lex-read-number src pos file)
   1023   (%lex-read-number src pos 1 (+ pos 1) file))
   1024 
   1025 (define (%lex-read-number src pos line col file)
   1026   (let* ((start-loc (%loc file line col))
   1027          (p (%lex-peek src pos line col))
   1028          (b (%pk-byte p)))
   1029     (cond
   1030       ;; '0x' / '0X' hex prefix
   1031       ((and (= b 48)
   1032             (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
   1033                    (b2 (%pk-byte q)))
   1034               (and b2 (or (= b2 120) (= b2 88)))))   ; 'x' or 'X'
   1035        (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
   1036               (r (%accum-int-while %hex? src
   1037                                     (%pk-pos q) (%pk-line q) (%pk-col q) 16))
   1038               (val   (car r))
   1039               (cnt   (car (cdr r)))
   1040               (pos2  (car (cdr (cdr r))))
   1041               (line2 (car (cdr (cdr (cdr r)))))
   1042               (col2  (car (cdr (cdr (cdr (cdr r)))))))
   1043          (if (zero? cnt)
   1044              (die start-loc "expected hex digits after 0x")
   1045              (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file)))
   1046                (cons (make-tok 'INT val start-loc) after)))))
   1047       ;; '0' alone → octal sequence (could be just zero)
   1048       ((= b 48)
   1049        (let* ((r (%accum-int-while %octal? src
   1050                                     (%pk-pos p) (%pk-line p) (%pk-col p) 8))
   1051               (val   (car r))
   1052               (pos2  (car (cdr (cdr r))))
   1053               (line2 (car (cdr (cdr (cdr r)))))
   1054               (col2  (car (cdr (cdr (cdr (cdr r)))))))
   1055          ;; Reject '.' / 'e' / 'E' immediately after the octal run — float.
   1056          (%check-no-float src pos2 line2 col2 file start-loc)
   1057          ;; Reject stray digits 8/9 in an octal context (e.g. 089).
   1058          (let* ((p3 (%lex-peek src pos2 line2 col2))
   1059                 (b3 (%pk-byte p3)))
   1060            (if (and b3 (%digit? b3))
   1061                (die start-loc "invalid octal digit" (bv-of-byte b3))
   1062                (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file)))
   1063                  (cons (make-tok 'INT val start-loc) after))))))
   1064       ;; '1'-'9' → decimal
   1065       ((%digit? b)
   1066        (let* ((r (%accum-int-while %digit? src pos line col 10))
   1067               (val   (car r))
   1068               (pos2  (car (cdr (cdr r))))
   1069               (line2 (car (cdr (cdr (cdr r)))))
   1070               (col2  (car (cdr (cdr (cdr (cdr r)))))))
   1071          (%check-no-float src pos2 line2 col2 file start-loc)
   1072          (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file)))
   1073            (cons (make-tok 'INT val start-loc) after))))
   1074       ;; '.' followed by a digit = float-style literal — reject.
   1075       ((= b 46)
   1076        (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
   1077               (b2 (%pk-byte q)))
   1078          (if (and b2 (%digit? b2))
   1079              (die start-loc "floating-point literal not supported")
   1080              ;; Otherwise '.' was a punctuator — caller wouldn't have
   1081              ;; routed here unless it was a digit-led prefix.
   1082              (die start-loc "internal: number reader on non-number"))))
   1083       (else
   1084        (die start-loc "internal: number reader on non-number")))))
   1085 
   1086 (define (%check-no-float src pos line col file start-loc)
   1087   ;; If the byte at pos starts a fractional/exponent part, abort.
   1088   (let* ((p (%lex-peek src pos line col))
   1089          (b (%pk-byte p)))
   1090     (cond
   1091       ((not b) #t)
   1092       ((= b 46)  ; '.'
   1093        (die start-loc "floating-point literal not supported"))
   1094       ((or (= b 101) (= b 69))  ; 'e' / 'E'
   1095        ;; Only a float exponent if followed by [+-]?digit.
   1096        (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
   1097               (b2 (%pk-byte q)))
   1098          (cond
   1099            ((and b2 (%digit? b2))
   1100             (die start-loc "floating-point literal not supported"))
   1101            ((and b2 (or (= b2 43) (= b2 45)))
   1102             (let* ((r (%lex-peek src (%pk-pos q) (%pk-line q) (%pk-col q)))
   1103                    (b3 (%pk-byte r)))
   1104               (if (and b3 (%digit? b3))
   1105                   (die start-loc "floating-point literal not supported")
   1106                   #t)))
   1107            (else #t))))
   1108       (else #t))))
   1109 
   1110 (define (%lex-strip-int-suffix src pos line col file)
   1111   ;; Consume any combination of u U l L (the long can be doubled). We
   1112   ;; don't validate orderings strictly; tcc.c uses the canonical forms.
   1113   ;; Returns (npos nline ncol).
   1114   (let loop ((pos pos) (line line) (col col))
   1115     (let* ((p (%lex-peek src pos line col))
   1116            (b (%pk-byte p)))
   1117       (cond
   1118         ((not b) (list pos line col))
   1119         ((or (= b 117) (= b 85)    ; u U
   1120              (= b 108) (= b 76))   ; l L
   1121          (loop (%pk-pos p) (%pk-line p) (%pk-col p)))
   1122         (else (list pos line col))))))
   1123 
   1124 ;; --------------------------------------------------------------------
   1125 ;; Escape sequence reader.
   1126 ;;
   1127 ;; %scan-or-fill-escape decodes one escape sequence starting at `pos`
   1128 ;; (which points one past the leading `\\`). When `bv` is a bytevector,
   1129 ;; the resulting byte is written to (bv idx); when it is #f, no write
   1130 ;; occurs (used during the string-pass scan phase). Returns the 4-list
   1131 ;; (val npos nline ncol).
   1132 ;; --------------------------------------------------------------------
   1133 (define (%scan-or-fill-escape src pos line col file start-loc bv idx)
   1134   (let* ((p (%lex-peek src pos line col))
   1135          (b (%pk-byte p)))
   1136     (cond
   1137       ((not b) (die start-loc "unterminated escape sequence"))
   1138       ;; \xNN — 1+ hex digits (tcc.c uses 1- and 2-digit forms).
   1139       ((or (= b 120) (= b 88))   ; 'x' / 'X'
   1140        (let* ((r (%accum-int-while %hex? src
   1141                                     (%pk-pos p) (%pk-line p) (%pk-col p) 16))
   1142               (val0  (car r))
   1143               (cnt   (car (cdr r)))
   1144               (pos2  (car (cdr (cdr r))))
   1145               (line2 (car (cdr (cdr (cdr r)))))
   1146               (col2  (car (cdr (cdr (cdr (cdr r)))))))
   1147          (cond
   1148            ((zero? cnt) (die start-loc "expected hex digits after \\x"))
   1149            (else
   1150             (let ((val (bit-and val0 255)))
   1151               (cond (bv (bytevector-u8-set! bv idx val))
   1152                     (else #f))
   1153               (list val pos2 line2 col2))))))
   1154       ;; \NNN — 1..3 octal digits.
   1155       ((%octal? b)
   1156        (let* ((r (%accum-octal-bounded src pos line col 3))
   1157               (val0  (car r))
   1158               (pos2  (car (cdr (cdr r))))
   1159               (line2 (car (cdr (cdr (cdr r)))))
   1160               (col2  (car (cdr (cdr (cdr (cdr r))))))
   1161               (val   (bit-and val0 255)))
   1162          (cond (bv (bytevector-u8-set! bv idx val))
   1163                (else #f))
   1164          (list val pos2 line2 col2)))
   1165       (else
   1166        (let ((val (cond ((= b 110) 10)        ; n
   1167                         ((= b 116) 9)         ; t
   1168                         ((= b 114) 13)        ; r
   1169                         ((= b 92)  92)        ; \\
   1170                         ((= b 39)  39)        ; '
   1171                         ((= b 34)  34)        ; "
   1172                         ((= b 48)  0)         ; 0 (already handled by octal but be safe)
   1173                         ((= b 97)  7)         ; \a -> BEL
   1174                         ((= b 98)  8)         ; \b
   1175                         ((= b 102) 12)        ; \f
   1176                         ((= b 118) 11)        ; \v
   1177                         ((= b 63)  63)        ; \?
   1178                         (else
   1179                          (die start-loc "unknown escape" (bv-of-byte b))))))
   1180          (cond (bv (bytevector-u8-set! bv idx val))
   1181                (else #f))
   1182          (list val (%pk-pos p) (%pk-line p) (%pk-col p)))))))
   1183 
   1184 ;; --------------------------------------------------------------------
   1185 ;; String reader.
   1186 ;;
   1187 ;; Caller has verified src[pos] == '"' (raw byte 34). Returns
   1188 ;; (tok npos nline ncol) with the raw decoded bytes (no NUL appended).
   1189 ;;
   1190 ;; Two-pass: %string-pass with bv=#f counts effective bytes (escapes
   1191 ;; collapse to 1 byte each); after rewind we allocate the final bv and
   1192 ;; rerun with bv set so the bytes are written directly into it.
   1193 ;; --------------------------------------------------------------------
   1194 (define (lex-read-string src pos file)
   1195   (%lex-read-string src pos 1 (+ pos 1) file))
   1196 
   1197 (define (%lex-read-string src pos line col file)
   1198   (let ((start-loc (%loc file line col))
   1199         (cnt 0) (npos 0) (nline 0) (ncol 0))
   1200     ;; '"' (34) is a fast-byte and never a trigraph result, so the
   1201     ;; physical byte at `pos` is exactly the opening quote.
   1202     (cond
   1203       ((or (>= pos (bytevector-length src))
   1204            (not (= (bytevector-u8-ref src pos) 34)))
   1205        (die start-loc "internal: string reader on non-quote"))
   1206       (else
   1207        (call-with-heap-rewind
   1208          (lambda ()
   1209            (let ((sres (%string-pass src (+ pos 1) line (+ col 1)
   1210                                       file start-loc #f)))
   1211              (set! cnt   (car sres))
   1212              (set! npos  (car (cdr sres)))
   1213              (set! nline (car (cdr (cdr sres))))
   1214              (set! ncol  (car (cdr (cdr (cdr sres))))))))
   1215        (let ((bv (make-bytevector cnt 0)))
   1216          (call-with-heap-rewind
   1217            (lambda ()
   1218              (%string-pass src (+ pos 1) line (+ col 1) file start-loc bv)))
   1219          (cons (make-tok 'STR bv start-loc)
   1220                (list npos nline ncol)))))))
   1221 
   1222 (define (%string-pass src pos line col file start-loc bv)
   1223   ;; Walk the string body (after opening "). When `bv` is #f, count
   1224   ;; effective bytes; when it is a bytevector, write bytes into it at
   1225   ;; index 0..count-1. Returns (count npos nline ncol).
   1226   (let ((n (bytevector-length src)))
   1227     (let loop ((pos pos) (line line) (col col) (idx 0))
   1228       (cond
   1229         ((>= pos n) (die start-loc "unterminated string literal"))
   1230         (else
   1231          (let ((b (bytevector-u8-ref src pos)))
   1232            (cond
   1233              ;; Closing quote — fast byte but special.
   1234              ((= b 34)
   1235               (list idx (+ pos 1) line (+ col 1)))
   1236              ((%fast-byte? b)
   1237               (cond (bv (bytevector-u8-set! bv idx b))
   1238                     (else #f))
   1239               (loop (+ pos 1) line (+ col 1) (+ idx 1)))
   1240              (else
   1241               ;; Slow path: ?/\ (trigraph/splice/escape) or '\n'.
   1242               (let* ((p (%lex-peek src pos line col))
   1243                      (b2 (%pk-byte p)))
   1244                 (cond
   1245                   ((not b2)
   1246                    (die start-loc "unterminated string literal"))
   1247                   ((= b2 34)
   1248                    (list idx (%pk-pos p) (%pk-line p) (%pk-col p)))
   1249                   ((%newline? b2)
   1250                    (die start-loc "newline in string literal"))
   1251                   ((= b2 92)
   1252                    (let* ((er    (%scan-or-fill-escape
   1253                                    src (%pk-pos p) (%pk-line p) (%pk-col p)
   1254                                    file start-loc bv idx))
   1255                           (epos  (car (cdr er)))
   1256                           (eline (car (cdr (cdr er))))
   1257                           (ecol  (car (cdr (cdr (cdr er))))))
   1258                      (loop epos eline ecol (+ idx 1))))
   1259                   (else
   1260                    (cond (bv (bytevector-u8-set! bv idx b2))
   1261                          (else #f))
   1262                    (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1)))))))))))))
   1263 
   1264 ;; --------------------------------------------------------------------
   1265 ;; Char reader.
   1266 ;;
   1267 ;; Caller has verified src[pos] == '\''. Multi-character constants
   1268 ;; ('AB') are rejected via die.
   1269 ;; --------------------------------------------------------------------
   1270 (define (lex-read-char src pos file)
   1271   (%lex-read-char src pos 1 (+ pos 1) file))
   1272 
   1273 (define (%lex-read-char src pos line col file)
   1274   (let* ((start-loc (%loc file line col))
   1275          (p0 (%lex-peek src pos line col))
   1276          (b0 (%pk-byte p0)))
   1277     (if (not (and b0 (= b0 39)))
   1278         (die start-loc "internal: char reader on non-quote")
   1279         (%collect-char src (%pk-pos p0) (%pk-line p0) (%pk-col p0)
   1280                        file start-loc))))
   1281 
   1282 (define (%collect-char src pos line col file start-loc)
   1283   ;; Read exactly one byte (handling escapes), then expect closing '\''.
   1284   (let* ((p (%lex-peek src pos line col))
   1285          (b (%pk-byte p)))
   1286     (cond
   1287       ((not b) (die start-loc "unterminated char literal"))
   1288       ((= b 39) (die start-loc "empty char literal"))
   1289       ((%newline? b) (die start-loc "newline in char literal"))
   1290       ((= b 92)   ; escape
   1291        (let* ((r     (%scan-or-fill-escape src
   1292                                             (%pk-pos p) (%pk-line p) (%pk-col p)
   1293                                             file start-loc #f 0))
   1294               (val   (car r))
   1295               (pos2  (car (cdr r)))
   1296               (line2 (car (cdr (cdr r))))
   1297               (col2  (car (cdr (cdr (cdr r))))))
   1298          (%expect-char-close src pos2 line2 col2 file start-loc val)))
   1299       (else
   1300        (%expect-char-close src (%pk-pos p) (%pk-line p) (%pk-col p)
   1301                            file start-loc b)))))
   1302 
   1303 (define (%expect-char-close src pos line col file start-loc val)
   1304   (let* ((p (%lex-peek src pos line col))
   1305          (b (%pk-byte p)))
   1306     (cond
   1307       ((not b) (die start-loc "unterminated char literal"))
   1308       ((= b 39)
   1309        (cons (make-tok 'CHAR val start-loc)
   1310              (list (%pk-pos p) (%pk-line p) (%pk-col p))))
   1311       (else
   1312        (die start-loc "multi-character char constant not supported")))))
   1313 
   1314 ;; --------------------------------------------------------------------
   1315 ;; Punctuator reader.
   1316 ;;
   1317 ;; Greedy longest-match against %punct-alist. The alist
   1318 ;; is already ordered longest-first. We additionally bucket entries by
   1319 ;; their first byte so %lex-read-punct only loops over the small set of
   1320 ;; patterns that can start at the current source byte.
   1321 ;; --------------------------------------------------------------------
   1322 
   1323 (define (%alist-ref-int k al)
   1324   ;; Lookup in an int-keyed alist (linear scan, '= compare).
   1325   (cond ((null? al) #f)
   1326         ((= (car (car al)) k) (cdr (car al)))
   1327         (else (%alist-ref-int k (cdr al)))))
   1328 
   1329 (define (%mem-int? k xs)
   1330   (cond ((null? xs) #f)
   1331         ((= (car xs) k) #t)
   1332         (else (%mem-int? k (cdr xs)))))
   1333 
   1334 (define (%filter-by-first-byte b al)
   1335   ;; Subset of `al` whose pattern starts with byte b, preserving order.
   1336   (cond
   1337     ((null? al) '())
   1338     ((= (bytevector-u8-ref (car (car al)) 0) b)
   1339      (cons (car al) (%filter-by-first-byte b (cdr al))))
   1340     (else (%filter-by-first-byte b (cdr al)))))
   1341 
   1342 (define (%group-by-first-byte al)
   1343   ;; Build ((first-byte . sub-alist) ...) over `al`, one bucket per
   1344   ;; distinct first byte; sub-alist preserves longest-match-first
   1345   ;; order from the source list.
   1346   (let loop ((xs al) (seen '()) (out '()))
   1347     (cond
   1348       ((null? xs) (reverse out))
   1349       (else
   1350        (let* ((entry (car xs))
   1351               (pat   (car entry))
   1352               (b     (bytevector-u8-ref pat 0)))
   1353          (cond
   1354            ((%mem-int? b seen) (loop (cdr xs) seen out))
   1355            (else
   1356             (loop (cdr xs)
   1357                   (cons b seen)
   1358                   (cons (cons b (%filter-by-first-byte b al)) out)))))))))
   1359 
   1360 (define %punct-buckets (%group-by-first-byte %punct-alist))
   1361 
   1362 (define (lex-read-punct src pos file)
   1363   (%lex-read-punct src pos 1 (+ pos 1) file))
   1364 
   1365 (define (%lex-read-punct src pos line col file)
   1366   (let* ((start-loc (%loc file line col))
   1367          (p (%lex-peek src pos line col))
   1368          (b (%pk-byte p)))
   1369     (cond
   1370       ((not b) (die start-loc "unrecognized byte" "EOF"))
   1371       (else
   1372        (let ((bucket (%alist-ref-int b %punct-buckets)))
   1373          (cond
   1374            ((not bucket) (die start-loc "unrecognized byte" (bv-of-byte b)))
   1375            (else (%punct-loop src pos line col file start-loc bucket))))))))
   1376 
   1377 (define (%punct-loop src pos line col file start-loc al)
   1378   (cond
   1379     ((null? al)
   1380      (let* ((p (%lex-peek src pos line col)))
   1381        (die start-loc "unrecognized byte"
   1382             (if (%pk-byte p) (bv-of-byte (%pk-byte p)) "EOF"))))
   1383     (else
   1384      (let* ((entry (car al))
   1385             (pat   (car entry))
   1386             (sym   (cdr entry))
   1387             (m     (%match-bytes src pos line col pat 0)))
   1388        (if m
   1389            (cons (make-tok 'PUNCT sym start-loc) m)
   1390            (%punct-loop src pos line col file start-loc (cdr al)))))))
   1391 
   1392 (define (%match-bytes src pos line col pat i)
   1393   ;; If the next bytes from (pos line col), in logical-byte stream
   1394   ;; order, equal `pat[i..]`, return (npos nline ncol) after the
   1395   ;; match. Otherwise #f.
   1396   (cond
   1397     ((= i (bytevector-length pat)) (list pos line col))
   1398     (else
   1399      (let ((n (bytevector-length src)))
   1400        (cond
   1401          ((>= pos n) #f)
   1402          (else
   1403           (let ((b  (bytevector-u8-ref src pos))
   1404                 (pb (bytevector-u8-ref pat i)))
   1405             (cond
   1406               ((%fast-byte? b)
   1407                (if (= b pb)
   1408                    (%match-bytes src (+ pos 1) line (+ col 1) pat (+ i 1))
   1409                    #f))
   1410               (else
   1411                (let* ((p (%lex-peek src pos line col))
   1412                       (b2 (%pk-byte p)))
   1413                  (cond
   1414                    ((not b2) #f)
   1415                    ((= b2 pb)
   1416                     (%match-bytes src (%pk-pos p) (%pk-line p) (%pk-col p)
   1417                                   pat (+ i 1)))
   1418                    (else #f))))))))))))
   1419 
   1420 ;; --------------------------------------------------------------------
   1421 ;; tok-iter — streaming token source.
   1422 ;; --------------------------------------------------------------------
   1423 ;; Each pipeline layer (lex, pp, parser) wraps the layer below as a
   1424 ;; tok-iter. iter-next pulls one token at a time. iter-peek/iter-peek2
   1425 ;; cache lookahead in `buf`. iter-unget! pushes back. Live-data bound is
   1426 ;; lookahead (≤2) + per-layer state, not source length.
   1427 ;;
   1428 ;; Pull-fns must keep yielding EOF after the first EOF (idempotent).
   1429 (define-record-type tok-iter
   1430   (%tok-iter pull-fn state buf)
   1431   tok-iter?
   1432   (pull-fn tok-iter-pull-fn)
   1433   (state   tok-iter-state)
   1434   (buf     tok-iter-buf  tok-iter-buf-set!))
   1435 
   1436 (define (iter-next it)
   1437   (let ((b (tok-iter-buf it)))
   1438     (cond
   1439       ((null? b) ((tok-iter-pull-fn it) (tok-iter-state it)))
   1440       (else
   1441        (tok-iter-buf-set! it (cdr b))
   1442        (car b)))))
   1443 
   1444 (define (iter-peek it)
   1445   (let ((b (tok-iter-buf it)))
   1446     (cond
   1447       ((null? b)
   1448        (let ((t ((tok-iter-pull-fn it) (tok-iter-state it))))
   1449          (tok-iter-buf-set! it (list t))
   1450          t))
   1451       (else (car b)))))
   1452 
   1453 (define (iter-peek2 it)
   1454   (let ((b (tok-iter-buf it)))
   1455     (cond
   1456       ((null? b)
   1457        (let* ((t1 ((tok-iter-pull-fn it) (tok-iter-state it)))
   1458               (t2 ((tok-iter-pull-fn it) (tok-iter-state it))))
   1459          (tok-iter-buf-set! it (list t1 t2))
   1460          t2))
   1461       ((null? (cdr b))
   1462        (let ((t2 ((tok-iter-pull-fn it) (tok-iter-state it))))
   1463          (tok-iter-buf-set! it (cons (car b) (list t2)))
   1464          t2))
   1465       (else (car (cdr b))))))
   1466 
   1467 (define (iter-unget! it t)
   1468   (tok-iter-buf-set! it (cons t (tok-iter-buf it))))
   1469 
   1470 ;; Drain an iter to a list ending in EOF. Used by lex-tokenize /
   1471 ;; pp-expand so the cc-lex / cc-pp test runners can inspect the
   1472 ;; materialized stream.
   1473 (define (iter->list it)
   1474   (let loop ((acc '()))
   1475     (let ((t (iter-next it)))
   1476       (cond
   1477         ((eq? (tok-kind t) 'EOF) (reverse (cons t acc)))
   1478         (else (loop (cons t acc)))))))
   1479 
   1480 ;; --------------------------------------------------------------------
   1481 ;; list-iter — wrap an existing token list as a tok-iter. Yields each
   1482 ;; tok in turn; once exhausted, keeps yielding EOF (idempotent). The
   1483 ;; wrapped list typically already ends in EOF.
   1484 ;; --------------------------------------------------------------------
   1485 (define-record-type list-iter-state
   1486   (%list-iter-state toks)
   1487   list-iter-state?
   1488   (toks lis-toks lis-toks-set!))
   1489 
   1490 (define (make-list-iter toks)
   1491   (%tok-iter %list-iter-pull (%list-iter-state toks) '()))
   1492 
   1493 (define (%list-iter-pull st)
   1494   (let ((toks (lis-toks st)))
   1495     (cond
   1496       ((null? toks) (make-tok 'EOF #f #f))
   1497       (else
   1498        (lis-toks-set! st (cdr toks))
   1499        (car toks)))))
   1500 
   1501 ;; --------------------------------------------------------------------
   1502 ;; lex-iter — streaming lexer. Steady state: pos/line/col + bol? in
   1503 ;; lex-state; per-token allocation reclaimed via heap-mark/rewind.
   1504 ;; --------------------------------------------------------------------
   1505 ;; bol? — `#t` when no token has been emitted on the current physical
   1506 ;; line yet (start of file, or only NL + whitespace seen since the last
   1507 ;; line break). pp recognizes a directive only when its leading `#` is
   1508 ;; at line-start; we forward that decision into the token stream by
   1509 ;; emitting `HASH` instead of `(PUNCT hash …)` for a line-leading `#`.
   1510 ;;
   1511 ;; Heap discipline: each call to %lex-iter-pull is wrapped in a
   1512 ;; call-with-heap-rewind. All scratch the helper allocates (the
   1513 ;; helper's own tok/loc, the `(cons tok 4-list)` it returns, every
   1514 ;; bind_params / let* / eval_args env-cons consumed getting in and out)
   1515 ;; lives above the mark and is reclaimed before returning. Per-token
   1516 ;; scratch (kind/val/vlen/loc-line/loc-col/npos/nline/ncol/nbol?) is
   1517 ;; allocated in the outer `let` BEFORE the wrapper call — set! mutates
   1518 ;; those cells in place across the rewind. Bv contents survive via the
   1519 ;; sticky %lex-scratch buffer + %lex-scratch->bv (allocated post-rewind).
   1520 ;; The survivors per token are tok (48 B) + loc (40 B) + bv if any.
   1521 (define-record-type lex-state
   1522   (%lex-state src file pos line col bol? done?)
   1523   lex-state?
   1524   (src   lex-state-src)
   1525   (file  lex-state-file)
   1526   (pos   lex-state-pos    lex-state-pos-set!)
   1527   (line  lex-state-line   lex-state-line-set!)
   1528   (col   lex-state-col    lex-state-col-set!)
   1529   (bol?  lex-state-bol?   lex-state-bol?-set!)
   1530   (done? lex-state-done?  lex-state-done?-set!))
   1531 
   1532 (define (make-lex-iter src file)
   1533   (%lex-init!)
   1534   (%tok-iter %lex-iter-pull
   1535              (%lex-state src file 0 1 1 #t #f)
   1536              '()))
   1537 
   1538 (define (%lex-iter-pull st)
   1539   (cond
   1540     ((lex-state-done? st)
   1541      ;; Idempotent EOF: keep yielding EOF after the first one.
   1542      (make-tok 'EOF #f (%loc (lex-state-file st)
   1543                               (lex-state-line st)
   1544                               (lex-state-col st))))
   1545     (else (%lex-iter-step st))))
   1546 
   1547 (define (%lex-iter-step st)
   1548   (let ((src  (lex-state-src st))
   1549         (file (lex-state-file st))
   1550         (pos  (lex-state-pos st))
   1551         (line (lex-state-line st))
   1552         (col  (lex-state-col st))
   1553         (bol? (lex-state-bol? st))
   1554         ;; Per-iteration scratch — must be allocated BEFORE the call to
   1555         ;; call-with-heap-rewind so that set!s issued from inside the
   1556         ;; thunk still find live cells after the rewind.
   1557         (kind #f) (val #f) (vlen 0)
   1558         (loc-line 1) (loc-col 1)
   1559         (npos 0) (nline 1) (ncol 1) (nbol? #f))
   1560     (call-with-heap-rewind
   1561      (lambda ()
   1562       (let* ((sw (%skip-ws-and-comments src pos line col file))
   1563            (pos1  (car sw))
   1564            (line1 (car (cdr sw)))
   1565            (col1  (car (cdr (cdr sw))))
   1566            (p     (%lex-peek src pos1 line1 col1))
   1567            (b     (%pk-byte p)))
   1568       (set! loc-line line1)
   1569       (set! loc-col col1)
   1570       (set! val #f) (set! vlen 0) (set! nbol? #f)
   1571       (cond
   1572         ;; EOF
   1573         ((not b)
   1574          (set! kind 'EOF)
   1575          (set! npos pos1) (set! nline line1) (set! ncol col1))
   1576         ;; Newline → NL token; next call starts at bol.
   1577         ((%newline? b)
   1578          (set! kind 'NL)
   1579          (set! npos (%pk-pos p))
   1580          (set! nline (%pk-line p))
   1581          (set! ncol  (%pk-col  p))
   1582          (set! nbol? #t))
   1583         ;; Line-leading `#`: bare `#` becomes HASH; `##` falls
   1584         ;; through to punctuator (lexes as `paste`).
   1585         ((and bol? (= b 35))
   1586          (let* ((q  (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
   1587                 (b2 (%pk-byte q)))
   1588            (cond
   1589              ((and b2 (= b2 35))
   1590               (let* ((r (%lex-read-punct src pos1 line1 col1 file))
   1591                      (tok (car r)) (rest (cdr r)))
   1592                 (set! kind 'PUNCT) (set! val (tok-value tok))
   1593                 (set! npos  (car rest))
   1594                 (set! nline (car (cdr rest)))
   1595                 (set! ncol  (car (cdr (cdr rest))))))
   1596              (else
   1597               (set! kind 'HASH)
   1598               (set! npos  (%pk-pos p))
   1599               (set! nline (%pk-line p))
   1600               (set! ncol  (%pk-col  p))))))
   1601         ;; Identifier / keyword
   1602         ((%ident-start? b)
   1603          (let* ((r (%lex-read-ident src pos1 line1 col1 file))
   1604                 (tok (car r)) (rest (cdr r)))
   1605            (set! kind (tok-kind tok))
   1606            (cond ((eq? (tok-kind tok) 'KW)
   1607                   (set! val (tok-value tok)))
   1608                  (else
   1609                   (let ((bv (tok-value tok)))
   1610                     (set! vlen (bytevector-length bv))
   1611                     (%lex-scratch<- bv vlen))))
   1612            (set! npos  (car rest))
   1613            (set! nline (car (cdr rest)))
   1614            (set! ncol  (car (cdr (cdr rest))))))
   1615         ;; Number (digit start)
   1616         ((%digit? b)
   1617          (let* ((r (%lex-read-number src pos1 line1 col1 file))
   1618                 (tok (car r)) (rest (cdr r)))
   1619            (set! kind 'INT) (set! val (tok-value tok))
   1620            (set! npos  (car rest))
   1621            (set! nline (car (cdr rest)))
   1622            (set! ncol  (car (cdr (cdr rest))))))
   1623         ;; '.' is a punctuator unless followed by a digit (float).
   1624         ((= b 46)
   1625          (let* ((q  (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
   1626                 (b2 (%pk-byte q)))
   1627            (cond
   1628              ((and b2 (%digit? b2))
   1629               (die (%loc file line1 col1)
   1630                    "floating-point literal not supported"))
   1631              (else
   1632               (let* ((r (%lex-read-punct src pos1 line1 col1 file))
   1633                      (tok (car r)) (rest (cdr r)))
   1634                 (set! kind 'PUNCT) (set! val (tok-value tok))
   1635                 (set! npos  (car rest))
   1636                 (set! nline (car (cdr rest)))
   1637                 (set! ncol  (car (cdr (cdr rest)))))))))
   1638         ;; String
   1639         ((= b 34)
   1640          (let* ((r (%lex-read-string src pos1 line1 col1 file))
   1641                 (tok (car r)) (rest (cdr r))
   1642                 (bv  (tok-value tok)))
   1643            (set! kind 'STR)
   1644            (set! vlen (bytevector-length bv))
   1645            (%lex-scratch<- bv vlen)
   1646            (set! npos  (car rest))
   1647            (set! nline (car (cdr rest)))
   1648            (set! ncol  (car (cdr (cdr rest))))))
   1649         ;; Char
   1650         ((= b 39)
   1651          (let* ((r (%lex-read-char src pos1 line1 col1 file))
   1652                 (tok (car r)) (rest (cdr r)))
   1653            (set! kind 'CHAR) (set! val (tok-value tok))
   1654            (set! npos  (car rest))
   1655            (set! nline (car (cdr rest)))
   1656            (set! ncol  (car (cdr (cdr rest))))))
   1657         ;; Punctuator (default)
   1658         (else
   1659          (let* ((r (%lex-read-punct src pos1 line1 col1 file))
   1660                 (tok (car r)) (rest (cdr r)))
   1661            ;; Line-leading `%:` digraph also acts as HASH for directives.
   1662            (cond
   1663              ((and bol? (eq? (tok-value tok) 'hash))
   1664               (set! kind 'HASH))
   1665              (else
   1666               (set! kind 'PUNCT) (set! val (tok-value tok))))
   1667            (set! npos  (car rest))
   1668            (set! nline (car (cdr rest)))
   1669            (set! ncol  (car (cdr (cdr rest))))))))))
   1670     ;; Reconstruct the survivor below the mark and advance state.
   1671     (cond
   1672       ((eq? kind 'EOF)
   1673        (lex-state-done?-set! st #t)
   1674        (make-tok 'EOF #f (%loc file loc-line loc-col)))
   1675       (else
   1676        (let ((tok-val (cond ((eq? kind 'IDENT) (%lex-scratch->bv vlen))
   1677                             ((eq? kind 'STR)   (%lex-scratch->bv vlen))
   1678                             (else val))))
   1679          (lex-state-pos-set! st npos)
   1680          (lex-state-line-set! st nline)
   1681          (lex-state-col-set! st ncol)
   1682          (lex-state-bol?-set! st nbol?)
   1683          (make-tok kind tok-val (%loc file loc-line loc-col)))))))
   1684 
   1685 ;; Drain a lex-iter into a list ending in EOF, for the cc-lex test
   1686 ;; runner. Production callers chain make-lex-iter directly.
   1687 (define (lex-tokenize src file)
   1688   (iter->list (make-lex-iter src file)))
   1689 ;; cc/pp.scm — preprocessor. Hide-set per C11 6.10.3.4.
   1690 ;; #include rejected (CC.md §Toolchain envelope).
   1691 
   1692 ;; --- helpers ---
   1693 (define (%pp-bv-mem? x xs)
   1694   (cond ((null? xs) #f)
   1695         ((bv= x (car xs)) #t)
   1696         (else (%pp-bv-mem? x (cdr xs)))))
   1697 
   1698 (define (%pp-bv-union a b)
   1699   (cond ((null? a) b)
   1700         ((%pp-bv-mem? (car a) b) (%pp-bv-union (cdr a) b))
   1701         (else (cons (car a) (%pp-bv-union (cdr a) b)))))
   1702 
   1703 (define (%pp-with-hide t hide)
   1704   (%tok (tok-kind t) (tok-value t) (tok-loc t) hide))
   1705 (define (%pp-with-loc t loc)
   1706   (%tok (tok-kind t) (tok-value t) loc (tok-hide t)))
   1707 
   1708 ;; --- pp-state (private record) ---
   1709 ;; cond-stack: list of (active? . has-taken?). Outer-active gating is
   1710 ;; computed by walking the stack rather than encoding it in frames.
   1711 ;;
   1712 ;; Streaming fields drive make-pp-iter; the bounded-buffer path used
   1713 ;; by pp-eval-cexpr leaves them at #f / '().
   1714 ;;   lex-iter   — upstream tok-iter, or #f
   1715 ;;   up-pending — toks unshifted upstream (macro-expansion bodies that
   1716 ;;                must be re-scanned for further expansion)
   1717 ;;   out-buf    — toks already dispatched but stashed for the next pull
   1718 ;;                (peek-and-fuse for adjacent STRs lookahead overshoots
   1719 ;;                by one tok, which lands here)
   1720 (define-record-type pp-state
   1721   (%pp-state macros cond-stack cur-file line-delta lex-iter up-pending out-buf)
   1722   pp-state?
   1723   (macros     pps-macros     pps-macros-set!)
   1724   (cond-stack pps-cond-stack pps-cond-stack-set!)
   1725   (cur-file   pps-cur-file   pps-cur-file-set!)
   1726   (line-delta pps-line-delta pps-line-delta-set!)
   1727   (lex-iter   pps-lex-iter)
   1728   (up-pending pps-up-pending pps-up-pending-set!)
   1729   (out-buf    pps-out-buf    pps-out-buf-set!))
   1730 
   1731 (define (%pp-make-state defs) (%pp-state defs '() #f 0 #f '() '()))
   1732 
   1733 (define (%pp-active? state)
   1734   (let loop ((xs (pps-cond-stack state)))
   1735     (cond ((null? xs) #t)
   1736           ((not (car (car xs))) #f)
   1737           (else (loop (cdr xs))))))
   1738 
   1739 ;; Active for the *parent* of the top frame (used by elif/else).
   1740 (define (%pp-parent-active? state)
   1741   (let ((cs (pps-cond-stack state)))
   1742     (cond ((null? cs) #t)
   1743           (else
   1744            (let loop ((xs (cdr cs)))
   1745              (cond ((null? xs) #t)
   1746                    ((not (car (car xs))) #f)
   1747                    (else (loop (cdr xs)))))))))
   1748 
   1749 ;; --- token classification ---
   1750 (define (%pp-eof? t)   (eq? (tok-kind t) 'EOF))
   1751 (define (%pp-nl? t)    (eq? (tok-kind t) 'NL))
   1752 (define (%pp-hash? t)  (eq? (tok-kind t) 'HASH))
   1753 (define (%pp-ident? t) (eq? (tok-kind t) 'IDENT))
   1754 (define (%pp-int? t)   (eq? (tok-kind t) 'INT))
   1755 (define (%pp-punct? t pname)
   1756   (and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) pname)))
   1757 (define (%pp-ident-name? t name-bv)
   1758   (and (%pp-ident? t) (bv= (tok-value t) name-bv)))
   1759 (define (%pp-skip-ws toks) toks)
   1760 
   1761 ;; --- built-in macro names ---
   1762 (define %pp-bv-FILE   "__FILE__")
   1763 (define %pp-bv-LINE   "__LINE__")
   1764 (define %pp-bv-STDC   "__STDC__")
   1765 (define %pp-bv-LISPCC "__LISPCC__")
   1766 (define %pp-bv-DATE   "__DATE__")
   1767 (define %pp-bv-TIME   "__TIME__")
   1768 (define %pp-bv-STDC-VERSION "__STDC_VERSION__")
   1769 (define %pp-bv-STDC-HOSTED  "__STDC_HOSTED__")
   1770 (define %pp-bv-VA-ARGS "__VA_ARGS__")
   1771 (define %pp-bv-defined "defined")
   1772 
   1773 ;; Fixed values for reproducibility — we don't read the wall clock.
   1774 (define %pp-bv-DATE-VALUE "Jan  1 1970")
   1775 (define %pp-bv-TIME-VALUE "00:00:00")
   1776 
   1777 (define (%pp-builtin? name)
   1778   (or (bv= name %pp-bv-FILE) (bv= name %pp-bv-LINE)
   1779       (bv= name %pp-bv-STDC) (bv= name %pp-bv-LISPCC)
   1780       (bv= name %pp-bv-DATE) (bv= name %pp-bv-TIME)
   1781       (bv= name %pp-bv-STDC-VERSION) (bv= name %pp-bv-STDC-HOSTED)))
   1782 
   1783 (define (%pp-expand-builtin name loc state)
   1784   ;; Emit the token at the ORIGINAL loc; %pp-relocate downstream will
   1785   ;; apply pps-cur-file / pps-line-delta. Doing the rewrite here too
   1786   ;; (then letting relocate re-apply it) double-shifts __LINE__'s loc.
   1787   ;; The VALUE of __LINE__ / __FILE__ already reflects the post-#line
   1788   ;; mapping because we compute `file`/`line` from cur-file/line-delta.
   1789   (let* ((file (or (pps-cur-file state) (loc-file loc)))
   1790          (line (+ (loc-line loc) (pps-line-delta state))))
   1791     (cond
   1792       ((bv= name %pp-bv-FILE)         (list (%tok 'STR file loc '())))
   1793       ((bv= name %pp-bv-LINE)         (list (%tok 'INT line loc '())))
   1794       ((bv= name %pp-bv-STDC)         (list (%tok 'INT 1 loc '())))
   1795       ((bv= name %pp-bv-LISPCC)       (list (%tok 'INT 1 loc '())))
   1796       ((bv= name %pp-bv-DATE)         (list (%tok 'STR %pp-bv-DATE-VALUE loc '())))
   1797       ((bv= name %pp-bv-TIME)         (list (%tok 'STR %pp-bv-TIME-VALUE loc '())))
   1798       ((bv= name %pp-bv-STDC-VERSION) (list (%tok 'INT 199901 loc '())))
   1799       ((bv= name %pp-bv-STDC-HOSTED)  (list (%tok 'INT 1 loc '())))
   1800       (else (die loc "internal: not a builtin" name)))))
   1801 
   1802 ;; --- buf-list: simple reversed-list buffer of toks ---
   1803 (define-record-type buf-list
   1804   (%buf-list xs)
   1805   buf-list?
   1806   (xs buf-list-xs buf-list-xs-set!))
   1807 (define (make-buf-list) (%buf-list '()))
   1808 (define (buf-list-push! b t) (buf-list-xs-set! b (cons t (buf-list-xs b))))
   1809 (define (buf-list-push-many! b ts)
   1810   (let loop ((ts ts))
   1811     (cond ((null? ts) #t)
   1812           (else (buf-list-push! b (car ts)) (loop (cdr ts))))))
   1813 (define (buf-list-flush b) (reverse (buf-list-xs b)))
   1814 
   1815 ;; --- make-pp-iter: streaming preprocessor ---
   1816 ;; Wraps a lex-iter (or any tok-iter). Returns a tok-iter. Live data
   1817 ;; bounded by parser state + lookahead, not source length. Adjacent-STR
   1818 ;; fusion happens inline via peek-and-stash.
   1819 (define (make-pp-iter src-iter initial-defines)
   1820   (let ((st (%pp-state initial-defines '() #f 0 src-iter '() '())))
   1821     (%tok-iter %pp-iter-pull st '())))
   1822 
   1823 (define (%pp-iter-pull st)
   1824   (let ((ob (pps-out-buf st)))
   1825     (cond
   1826       ((not (null? ob))
   1827        (pps-out-buf-set! st (cdr ob))
   1828        (car ob))
   1829       (else (%pp-maybe-fuse-str st (%pp-dispatch-step st))))))
   1830 
   1831 ;; --- upstream helpers ---
   1832 ;; Upstream tokens come either from up-pending (macro-expansion bodies
   1833 ;; that need re-scanning) or from the wrapped lex-iter.
   1834 (define (%pp-pull-upstream st)
   1835   (let ((up (pps-up-pending st)))
   1836     (cond
   1837       ((not (null? up))
   1838        (pps-up-pending-set! st (cdr up))
   1839        (car up))
   1840       (else (iter-next (pps-lex-iter st))))))
   1841 
   1842 (define (%pp-peek-upstream st)
   1843   (let ((up (pps-up-pending st)))
   1844     (cond
   1845       ((not (null? up)) (car up))
   1846       (else (iter-peek (pps-lex-iter st))))))
   1847 
   1848 ;; Push toks to the front of upstream so (car toks) is yielded next.
   1849 (define (%pp-unshift-upstream! st toks)
   1850   (pps-up-pending-set! st (append toks (pps-up-pending st))))
   1851 
   1852 ;; Collect tokens up to (not including) NL or EOF. NL is consumed; EOF
   1853 ;; is unshifted back so the main loop sees it.
   1854 (define (%pp-collect-line-stream st)
   1855   (let loop ((acc '()))
   1856     (let ((t (%pp-pull-upstream st)))
   1857       (cond
   1858         ((%pp-eof? t)
   1859          (%pp-unshift-upstream! st (list t))
   1860          (reverse acc))
   1861         ((%pp-nl? t) (reverse acc))
   1862         (else (loop (cons t acc)))))))
   1863 
   1864 ;; Streaming arg collection for fn-like macro calls. Position is just
   1865 ;; AFTER the opening `(`. Returns the list of arg-tokenlists.
   1866 (define (%pp-collect-args-stream st call-loc)
   1867   (let loop ((depth 0) (cur '()) (args '()))
   1868     (let ((t (%pp-pull-upstream st)))
   1869       (cond
   1870         ((%pp-eof? t)
   1871          (die call-loc "macro call: unterminated argument list"))
   1872         ((and (= depth 0) (%pp-punct? t 'rparen))
   1873          (cond
   1874            ;; Empty parens count as one empty argument; bind-args
   1875            ;; degenerates this back to "no args" for 0-param macros.
   1876            ((and (null? args) (null? cur)) (list '()))
   1877            (else (reverse (cons (reverse cur) args)))))
   1878         ((and (= depth 0) (%pp-punct? t 'comma))
   1879          (loop 0 '() (cons (reverse cur) args)))
   1880         ((%pp-punct? t 'lparen)
   1881          (loop (+ depth 1) (cons t cur) args))
   1882         ((%pp-punct? t 'rparen)
   1883          (loop (- depth 1) (cons t cur) args))
   1884         (else (loop depth (cons t cur) args))))))
   1885 
   1886 ;; Single dispatch step. Returns one post-pp tok (skipping NLs,
   1887 ;; processing directives, expanding macros). Does NOT apply STR-fusion
   1888 ;; — that happens one layer up in %pp-iter-pull, otherwise the
   1889 ;; recursive lookahead during fusion would itself fuse further STRs
   1890 ;; and drag tokens past the run into out-buf.
   1891 (define (%pp-dispatch-step st)
   1892   (let ((t (%pp-pull-upstream st)))
   1893     (cond
   1894       ((%pp-eof? t)
   1895        (cond ((not (null? (pps-cond-stack st)))
   1896               (die (tok-loc t) "unterminated #if/#ifdef/#ifndef"))
   1897              (else t)))
   1898       ((%pp-nl? t) (%pp-dispatch-step st))
   1899       ((%pp-hash? t)
   1900        (let ((line (%pp-collect-line-stream st)))
   1901          (%pp-dispatch-directive t line st #f)
   1902          (%pp-dispatch-step st)))
   1903       ((not (%pp-active? st))
   1904        (%pp-dispatch-step st))
   1905       ((%pp-ident? t)
   1906        (let ((name (tok-value t)))
   1907          (cond
   1908            ((%pp-bv-mem? name (tok-hide t))
   1909             (%pp-relocate t st))
   1910            ((%pp-builtin? name)
   1911             (let ((toks (%pp-expand-builtin name (tok-loc t) st)))
   1912               (%pp-unshift-upstream! st toks)
   1913               (%pp-dispatch-step st)))
   1914            (else
   1915             (let ((m (alist-ref name (pps-macros st))))
   1916               (cond
   1917                 ((not m) (%pp-relocate t st))
   1918                 ((eq? (macro-kind m) 'obj)
   1919                  (let ((body (%pp-prepare-body (macro-body m)
   1920                                (cons name (tok-hide t))
   1921                                (tok-loc t))))
   1922                    (%pp-unshift-upstream! st body)
   1923                    (%pp-dispatch-step st)))
   1924                 (else
   1925                  ;; fn-like or fn-vararg: peek upstream for `(`. If
   1926                  ;; not present, pass IDENT through unchanged (no
   1927                  ;; consumption); the next iter call will process the
   1928                  ;; following tok normally.
   1929                  (let ((next (%pp-peek-upstream st)))
   1930                    (cond
   1931                      ((not (%pp-punct? next 'lparen))
   1932                       (%pp-relocate t st))
   1933                      (else
   1934                       (%pp-pull-upstream st)        ; consume `(`
   1935                       (let* ((args (%pp-collect-args-stream st (tok-loc t)))
   1936                              (params (macro-params m))
   1937                              (variadic? (eq? (macro-kind m) 'fn-vararg))
   1938                              (env (%pp-bind-args params args variadic? (tok-loc t)))
   1939                              (sub (%pp-substitute (macro-body m) env (tok-loc t) st))
   1940                              (body (%pp-prepare-body sub
   1941                                      (cons name (tok-hide t))
   1942                                      (tok-loc t))))
   1943                         (%pp-unshift-upstream! st body)
   1944                         (%pp-dispatch-step st))))))))))))
   1945       (else (%pp-relocate t st)))))
   1946 
   1947 ;; Translation phase 6 (peek-and-fuse). If `cur` is STR, look at the
   1948 ;; next post-pp tok; if it's STR, fuse and repeat. Anything else gets
   1949 ;; stashed in out-buf for the next iter-next call. Lookahead goes
   1950 ;; through %pp-dispatch-step (no fusion), so a non-STR neighbor
   1951 ;; correctly terminates the run.
   1952 (define (%pp-maybe-fuse-str st cur)
   1953   (cond
   1954     ((not (eq? (tok-kind cur) 'STR)) cur)
   1955     (else
   1956      (let loop ((cur cur))
   1957        (let ((next (%pp-dispatch-step st)))
   1958          (cond
   1959            ((eq? (tok-kind next) 'STR)
   1960             (loop (%tok 'STR
   1961                         (bytevector-append (tok-value cur) (tok-value next))
   1962                         (tok-loc cur)
   1963                         (tok-hide cur))))
   1964            (else
   1965             (pps-out-buf-set! st (cons next (pps-out-buf st)))
   1966             cur)))))))
   1967 
   1968 ;; Drain a pp-iter into a list ending in EOF, for the cc-pp test
   1969 ;; runner. The input token list becomes the upstream via make-list-iter.
   1970 ;; Production callers chain make-pp-iter directly over a make-lex-iter.
   1971 (define (pp-expand toks initial-defines)
   1972   (iter->list (make-pp-iter (make-list-iter toks) initial-defines)))
   1973 
   1974 ;; --- directive dispatch ---
   1975 ;; pmatch-based on the directive name bv. bv literals match by equal?.
   1976 ;; Directive name can arrive as IDENT (most cases) or KW (`if` and `else`
   1977 ;; are C keywords promoted by lex; their KW symbol values map back to bv
   1978 ;; via symbol->string).
   1979 (define (%pp-directive-name t)
   1980   (cond ((eq? (tok-kind t) 'IDENT) (tok-value t))
   1981         ((eq? (tok-kind t) 'KW)    (symbol->string (tok-value t)))
   1982         (else #f)))
   1983 
   1984 (define (%pp-dispatch-directive hash-tok line state out)
   1985   (let ((line (%pp-skip-ws line)))
   1986     (cond
   1987       ((null? line) #t)            ; bare `#` line — null directive
   1988       ((%pp-directive-name (car line))
   1989        (let ((name (%pp-directive-name (car line)))
   1990              (rest (cdr line))
   1991              (loc  (tok-loc (car line))))
   1992          (pmatch name
   1993            ("define"  (cond ((%pp-active? state) (%pp-do-define rest state)) (else #t)))
   1994            ("undef"   (cond ((%pp-active? state) (%pp-do-undef rest state))  (else #t)))
   1995            ("if"      (%pp-do-if rest state))
   1996            ("ifdef"   (%pp-do-ifdef rest state))
   1997            ("ifndef"  (%pp-do-ifndef rest state))
   1998            ("elif"    (%pp-do-elif rest state))
   1999            ("else"    (%pp-do-else rest state))
   2000            ("endif"   (%pp-do-endif rest state))
   2001            ("error"   (cond ((%pp-active? state)
   2002                              (%pp-do-error (cons (car line) rest) state))
   2003                             (else #t)))
   2004            ("line"    (cond ((%pp-active? state)
   2005                              ;; Macro-expand the operands BEFORE
   2006                              ;; processing (`#line MACRO`). Pre-expansion
   2007                              ;; we capture the directive's source line so
   2008                              ;; the line-delta math doesn't anchor on a
   2009                              ;; macro definition site.
   2010                              (let ((here (cond
   2011                                            ((null? rest)
   2012                                             (loc-line (tok-loc hash-tok)))
   2013                                            (else
   2014                                             (loc-line (tok-loc (car rest)))))))
   2015                                (%pp-do-line (%pp-expand-line rest state)
   2016                                             state here)))
   2017                             (else #t)))
   2018            ("pragma"  (cond ((%pp-active? state) (%pp-do-pragma rest state)) (else #t)))
   2019            ("include" (cond ((%pp-active? state) (%pp-do-include rest state)) (else #t)))
   2020            (else (die loc "unknown preprocessor directive" name)))))
   2021       (else
   2022        (die (tok-loc (car line)) "expected directive name after '#'"
   2023             (tok-kind (car line)))))))
   2024 
   2025 ;; --- #define ---
   2026 ;; function-like vs object-like is decided by an immediately-adjacent `(`.
   2027 ;; "Adjacent" = column of `(` equals column of name + length of name.
   2028 (define (%pp-do-define line state)
   2029   (cond
   2030     ((null? line) (die #f "#define requires a macro name"))
   2031     ((not (%pp-ident? (car line)))
   2032      (die (tok-loc (car line)) "#define: expected identifier"))
   2033     (else
   2034      (let* ((nt (car line)) (name (tok-value nt)) (rest (cdr line)))
   2035        (cond
   2036          ((and (not (null? rest))
   2037                (%pp-punct? (car rest) 'lparen)
   2038                (= (loc-col (tok-loc (car rest)))
   2039                   (+ (loc-col (tok-loc nt))
   2040                      (bytevector-length name))))
   2041           (%pp-define-fn name (cdr rest) (tok-loc nt) state))
   2042          (else
   2043           (let ((m (%macro 'obj '() rest)))
   2044             (pps-macros-set! state (alist-set name m (pps-macros state))))))))))
   2045 
   2046 (define (%pp-define-fn name post-lparen nloc state)
   2047   (let loop ((toks post-lparen) (params '()) (variadic? #f))
   2048     (cond
   2049       ((null? toks) (die nloc "#define: unterminated parameter list"))
   2050       ((%pp-punct? (car toks) 'rparen)
   2051        (let* ((body (cdr toks))
   2052               (kind (if variadic? 'fn-vararg 'fn))
   2053               (m    (%macro kind (reverse params) body)))
   2054          (pps-macros-set! state (alist-set name m (pps-macros state)))))
   2055       ((%pp-punct? (car toks) 'ellipsis)
   2056        (let ((rest (cdr toks)))
   2057          (cond
   2058            ((null? rest) (die (tok-loc (car toks)) "#define: '...' must precede ')'"))
   2059            ((%pp-punct? (car rest) 'rparen) (loop rest params #t))
   2060            (else (die (tok-loc (car rest)) "#define: garbage after '...'")))))
   2061       ((null? params)
   2062        (cond
   2063          ((%pp-ident? (car toks))
   2064           (loop (cdr toks) (cons (tok-value (car toks)) params) #f))
   2065          (else (die (tok-loc (car toks)) "#define: expected parameter name"))))
   2066       (else
   2067        (cond
   2068          ((%pp-punct? (car toks) 'comma)
   2069           (let ((after (cdr toks)))
   2070             (cond
   2071               ((null? after) (die (tok-loc (car toks)) "#define: trailing ','"))
   2072               ((%pp-punct? (car after) 'ellipsis)
   2073                (let ((aa (cdr after)))
   2074                  (cond
   2075                    ((and (not (null? aa)) (%pp-punct? (car aa) 'rparen))
   2076                     (loop aa params #t))
   2077                    (else (die (tok-loc (car after))
   2078                               "#define: '...' must precede ')'")))))
   2079               ((%pp-ident? (car after))
   2080                (loop (cdr after) (cons (tok-value (car after)) params) #f))
   2081               (else
   2082                (die (tok-loc (car after))
   2083                     "#define: expected parameter name after ','")))))
   2084          (else (die (tok-loc (car toks))
   2085                     "#define: expected ',' or ')' in parameter list")))))))
   2086 
   2087 ;; --- #undef ---
   2088 (define (%pp-do-undef line state)
   2089   (cond
   2090     ((null? line) (die #f "#undef requires a macro name"))
   2091     ((not (%pp-ident? (car line)))
   2092      (die (tok-loc (car line)) "#undef: expected identifier"))
   2093     (else
   2094      (pps-macros-set! state
   2095        (%pp-alist-drop (tok-value (car line)) (pps-macros state))))))
   2096 
   2097 (define (%pp-alist-drop key al)
   2098   (cond ((null? al) '())
   2099         ((bv= (car (car al)) key) (cdr al))
   2100         (else (cons (car al) (%pp-alist-drop key (cdr al))))))
   2101 
   2102 ;; --- #if / #ifdef / #ifndef / #elif / #else / #endif ---
   2103 ;; cond-stack frame: (active? taken? else?). active? gates the body
   2104 ;; until the next #elif/#else/#endif; taken? records whether ANY arm
   2105 ;; (the original #if branch or any #elif) has matched, so later arms
   2106 ;; stay inactive; else? records that we have already passed an #else
   2107 ;; in this frame, so a subsequent #elif/#else is rejected.
   2108 (define (%pp-frame a? t? e?) (list a? t? e?))
   2109 (define (%pp-frame-active? f) (car f))
   2110 (define (%pp-frame-taken?  f) (car (cdr f)))
   2111 (define (%pp-frame-else?   f) (car (cdr (cdr f))))
   2112 
   2113 (define (%pp-do-if line state)
   2114   (cond
   2115     ((not (%pp-active? state))
   2116      (pps-cond-stack-set! state (cons (%pp-frame #f #f #f) (pps-cond-stack state))))
   2117     (else
   2118      (let* ((v (pp-eval-cexpr line state))
   2119             (a? (not (= v 0))))
   2120        (pps-cond-stack-set! state (cons (%pp-frame a? a? #f) (pps-cond-stack state)))))))
   2121 
   2122 (define (%pp-do-ifdef line state)
   2123   (cond
   2124     ((not (%pp-active? state))
   2125      (pps-cond-stack-set! state (cons (%pp-frame #f #f #f) (pps-cond-stack state))))
   2126     (else
   2127      (let ((d? (%pp-defined? (%pp-name-of-single line) state)))
   2128        (pps-cond-stack-set! state
   2129          (cons (%pp-frame d? d? #f) (pps-cond-stack state)))))))
   2130 
   2131 (define (%pp-do-ifndef line state)
   2132   (cond
   2133     ((not (%pp-active? state))
   2134      (pps-cond-stack-set! state (cons (%pp-frame #f #f #f) (pps-cond-stack state))))
   2135     (else
   2136      (let ((a? (not (%pp-defined? (%pp-name-of-single line) state))))
   2137        (pps-cond-stack-set! state
   2138          (cons (%pp-frame a? a? #f) (pps-cond-stack state)))))))
   2139 
   2140 (define (%pp-name-of-single line)
   2141   (cond
   2142     ((null? line) (die #f "#ifdef/#ifndef: missing identifier"))
   2143     ((not (%pp-ident? (car line)))
   2144      (die (tok-loc (car line)) "#ifdef/#ifndef: expected identifier"))
   2145     (else (tok-value (car line)))))
   2146 
   2147 (define (%pp-defined? name state)
   2148   (or (alist-ref name (pps-macros state))
   2149       (%pp-builtin? name)
   2150       #f))
   2151 
   2152 (define (%pp-do-elif line state)
   2153   (let ((cs (pps-cond-stack state)))
   2154     (cond
   2155       ((null? cs) (die #f "#elif outside #if"))
   2156       (else
   2157        (let* ((top (car cs)) (rest (cdr cs))
   2158               (taken? (%pp-frame-taken? top))
   2159               (else?  (%pp-frame-else? top))
   2160               (par? (%pp-parent-active? state)))
   2161          (cond
   2162            (else? (die #f "#elif after #else"))
   2163            ((or (not par?) taken?)
   2164             (pps-cond-stack-set! state (cons (%pp-frame #f taken? #f) rest)))
   2165            (else
   2166             (let* ((v (pp-eval-cexpr line state))
   2167                    (a? (not (= v 0))))
   2168               (pps-cond-stack-set! state
   2169                 (cons (%pp-frame a? (or a? taken?) #f) rest))))))))))
   2170 
   2171 (define (%pp-do-else line state)
   2172   (let ((cs (pps-cond-stack state)))
   2173     (cond
   2174       ((null? cs) (die #f "#else outside #if"))
   2175       (else
   2176        (let* ((top (car cs)) (rest (cdr cs))
   2177               (taken? (%pp-frame-taken? top))
   2178               (else?  (%pp-frame-else? top))
   2179               (par? (%pp-parent-active? state)))
   2180          (cond
   2181            (else? (die #f "#else after #else"))
   2182            ((not par?)
   2183             (pps-cond-stack-set! state (cons (%pp-frame #f taken? #t) rest)))
   2184            (taken?
   2185             (pps-cond-stack-set! state (cons (%pp-frame #f #t #t) rest)))
   2186            (else
   2187             (pps-cond-stack-set! state (cons (%pp-frame #t #t #t) rest)))))))))
   2188 
   2189 (define (%pp-do-endif line state)
   2190   (let ((cs (pps-cond-stack state)))
   2191     (cond ((null? cs) (die #f "#endif outside #if"))
   2192           (else (pps-cond-stack-set! state (cdr cs))))))
   2193 
   2194 ;; --- #error ---
   2195 ;; line[0] is the directive name "error"; the rest is the user message.
   2196 (define (%pp-do-error line state)
   2197   (let* ((msg-toks (if (null? line) '() (cdr line)))
   2198          (loc (if (null? line) #f (tok-loc (car line))))
   2199          (msg (%pp-toks->display msg-toks)))
   2200     (die loc "#error" msg)))
   2201 
   2202 ;; Per C11 §6.10.3.2 ¶2: whitespace between argument tokens becomes a
   2203 ;; single space; absence of whitespace must NOT introduce one. We
   2204 ;; approximate "had whitespace" by comparing locations: a space goes
   2205 ;; in iff the next token does not abut the previous one (different
   2206 ;; line, or column gap larger than the prev spelling length).
   2207 (define (%pp-toks->display toks)
   2208   (let loop ((toks toks) (prev #f) (prev-bv #f) (acc '()))
   2209     (cond
   2210       ((null? toks) (bv-cat (reverse acc)))
   2211       (else
   2212        (let* ((t (car toks)) (p (%pp-tok->bv t))
   2213               (sep? (cond
   2214                       ((not prev) #f)
   2215                       ((or (not (tok-loc prev)) (not (tok-loc t))) #t)
   2216                       ((not (= (loc-line (tok-loc prev))
   2217                                (loc-line (tok-loc t)))) #t)
   2218                       (else
   2219                        (not (= (loc-col (tok-loc t))
   2220                                (+ (loc-col (tok-loc prev))
   2221                                   (bytevector-length prev-bv))))))))
   2222          (loop (cdr toks) t p
   2223                (if sep? (cons p (cons " " acc)) (cons p acc))))))))
   2224 
   2225 ;; Reverse-map punctuator symbol -> source spelling. %punct-alist may
   2226 ;; map several spellings to the same symbol (e.g. both "[" and "<:"
   2227 ;; resolve to 'lbrack); the 1-byte canonical forms appear last in the
   2228 ;; source list, so a last-wins fold yields "[" rather than the digraph.
   2229 (define %pp-punct-spell
   2230   (let loop ((al %punct-alist) (acc '()))
   2231     (cond ((null? al) acc)
   2232           (else (loop (cdr al)
   2233                       (alist-set (cdr (car al)) (car (car al)) acc))))))
   2234 
   2235 (define (%pp-punct-spelling sym)
   2236   (or (alist-ref/eq sym %pp-punct-spell) (symbol->string sym)))
   2237 
   2238 (define (%pp-tok->bv t)
   2239   (let ((k (tok-kind t)) (v (tok-value t)))
   2240     (cond
   2241       ((eq? k 'IDENT) v)
   2242       ((eq? k 'INT)   (fixnum->bv v 10))
   2243       ((eq? k 'STR)   (%pp-quote-bytes v 34))
   2244       ((eq? k 'CHAR)  (%pp-quote-bytes (bv-of-byte v) 39))
   2245       ((eq? k 'KW)    (symbol->string v))
   2246       ((eq? k 'PUNCT) (%pp-punct-spelling v))
   2247       (else "?"))))
   2248 
   2249 ;; Reconstruct a string/char literal source spelling from cooked content.
   2250 ;; Per C11 6.10.3.2: stringize must reproduce the source spelling of
   2251 ;; STR/CHAR constants — every `"` and `\` is prefixed with `\`, and
   2252 ;; the common control-character escapes are restored from their cooked
   2253 ;; bytes. `delim` is 34 for STR, 39 for CHAR.
   2254 (define (%pp-quote-bytes bv delim)
   2255   (let* ((n (bytevector-length bv))
   2256          (delim-bv (bv-of-byte delim)))
   2257     (let loop ((i 0) (acc (list delim-bv)))
   2258       (cond
   2259         ((= i n) (bv-cat (reverse (cons delim-bv acc))))
   2260         (else
   2261          (let ((b (bytevector-u8-ref bv i)))
   2262            (cond
   2263              ((or (= b delim) (= b 92))
   2264               (loop (+ i 1) (cons (bv-of-byte b) (cons "\\" acc))))
   2265              ((= b 10) (loop (+ i 1) (cons "\\n" acc)))
   2266              ((= b 9)  (loop (+ i 1) (cons "\\t" acc)))
   2267              ((= b 13) (loop (+ i 1) (cons "\\r" acc)))
   2268              (else
   2269               (loop (+ i 1) (cons (bv-of-byte b) acc))))))))))
   2270 
   2271 ;; --- #line / #pragma / #include ---
   2272 ;; Approximate #line: subsequent toks have line = (orig-line + delta),
   2273 ;; where delta = (N - here-line - 1). Good enough for most cases.
   2274 (define (%pp-do-line line state here)
   2275   (cond
   2276     ((null? line) (die #f "#line requires a line number"))
   2277     ((not (%pp-int? (car line)))
   2278      (die (tok-loc (car line)) "#line: expected integer"))
   2279     (else
   2280      (let* ((nt (car line)) (n (tok-value nt))
   2281             (rest (cdr line)))
   2282        (pps-line-delta-set! state (- n here 1))
   2283        (cond
   2284          ((null? rest) #t)
   2285          ((eq? (tok-kind (car rest)) 'STR)
   2286           (pps-cur-file-set! state (tok-value (car rest))))
   2287          (else (die (tok-loc (car rest))
   2288                     "#line: expected string after number")))))))
   2289 
   2290 (define (%pp-do-pragma line state) #t)
   2291 
   2292 (define (%pp-do-include line state)
   2293   (die (if (null? line) #f (tok-loc (car line)))
   2294        "#include: file inclusion is handled upstream by pre-flatten"))
   2295 
   2296 ;; --- macro expansion engine ---
   2297 ;; Walk toks; for each IDENT, look up in macros / builtins. Hide-set:
   2298 ;; if the name is in t.hide, don't expand. Otherwise expand and rescan
   2299 ;; the produced body, with hide += {name}.
   2300 (define (%pp-emit-expanded toks state out)
   2301   (let loop ((toks toks))
   2302     (cond
   2303       ((null? toks) #t)
   2304       (else
   2305        (let* ((t (car toks)) (rest (cdr toks)))
   2306          (cond
   2307            ((not (%pp-ident? t))
   2308             (buf-list-push! out (%pp-relocate t state))
   2309             (loop rest))
   2310            (else
   2311             (let ((name (tok-value t)))
   2312               (cond
   2313                 ((%pp-bv-mem? name (tok-hide t))
   2314                  (buf-list-push! out (%pp-relocate t state))
   2315                  (loop rest))
   2316                 ((%pp-builtin? name)
   2317                  (buf-list-push-many! out
   2318                    (%pp-expand-builtin name (tok-loc t) state))
   2319                  (loop rest))
   2320                 (else
   2321                  (let ((m (alist-ref name (pps-macros state))))
   2322                    (cond
   2323                      ((not m)
   2324                       (buf-list-push! out (%pp-relocate t state))
   2325                       (loop rest))
   2326                      (else
   2327                       (%pp-apply-macro t m rest state out loop))))))))))))))
   2328 
   2329 (define (%pp-apply-macro t m rest state out cont)
   2330   (let ((kind (macro-kind m)) (name (tok-value t)))
   2331     (cond
   2332       ((eq? kind 'obj)
   2333        (let ((bodies (%pp-prepare-body (macro-body m)
   2334                        (cons name (tok-hide t))
   2335                        (tok-loc t))))
   2336          (%pp-emit-expanded bodies state out)
   2337          (cont rest)))
   2338       (else
   2339        (let ((after (%pp-skip-ws rest)))
   2340          (cond
   2341            ((or (null? after) (not (%pp-punct? (car after) 'lparen)))
   2342             (buf-list-push! out (%pp-relocate t state))
   2343             (cont rest))
   2344            (else
   2345             (let* ((ar (%pp-collect-args (cdr after) (tok-loc t)))
   2346                    (args (car ar)) (rest2 (cdr ar))
   2347                    (params (macro-params m))
   2348                    (variadic? (eq? kind 'fn-vararg))
   2349                    (env (%pp-bind-args params args variadic? (tok-loc t)))
   2350                    (sub (%pp-substitute (macro-body m) env (tok-loc t) state))
   2351                    (bodies (%pp-prepare-body sub
   2352                              (cons name (tok-hide t))
   2353                              (tok-loc t))))
   2354               (%pp-emit-expanded bodies state out)
   2355               (cont rest2)))))))))
   2356 
   2357 ;; Stamp built-in marker tokens (__LINE__ / __FILE__) inside the body
   2358 ;; with the macro-invocation location, so they report the call site
   2359 ;; per C11 §6.10.8. Other body tokens keep their #define-time loc so
   2360 ;; diagnostics still point at the macro body. Hide-set is updated
   2361 ;; with the macro name on every token.
   2362 (define (%pp-prepare-body body extra-hide . call-loc-opt)
   2363   (let ((call-loc (cond ((null? call-loc-opt) #f)
   2364                         (else (car call-loc-opt)))))
   2365     (map (lambda (t)
   2366            (let ((hidden (%pp-with-hide t (%pp-bv-union extra-hide
   2367                                                         (tok-hide t)))))
   2368              (cond
   2369                ((and call-loc (%pp-ident? hidden)
   2370                      (or (bv= (tok-value hidden) %pp-bv-LINE)
   2371                          (bv= (tok-value hidden) %pp-bv-FILE)))
   2372                 (%pp-with-loc hidden call-loc))
   2373                (else hidden))))
   2374          body)))
   2375 
   2376 ;; Collect comma-separated args. `toks` starts AFTER `(`. Returns
   2377 ;; (args . rest), where args is a list of token-lists.
   2378 (define (%pp-collect-args toks call-loc)
   2379   (let loop ((toks toks) (depth 0) (cur '()) (args '()))
   2380     (cond
   2381       ((null? toks) (die call-loc "macro call: unterminated argument list"))
   2382       ((%pp-eof? (car toks))
   2383        (die call-loc "macro call: unterminated argument list"))
   2384       ((and (= depth 0) (%pp-punct? (car toks) 'rparen))
   2385        (let ((args*
   2386               (cond
   2387                 ;; Empty parens count as one empty argument; bind-args
   2388                 ;; degenerates this back to "no args" for 0-param macros.
   2389                 ((and (null? args) (null? cur)) (list '()))
   2390                 (else (reverse (cons (reverse cur) args))))))
   2391          (cons args* (cdr toks))))
   2392       ((and (= depth 0) (%pp-punct? (car toks) 'comma))
   2393        (loop (cdr toks) 0 '() (cons (reverse cur) args)))
   2394       ((%pp-punct? (car toks) 'lparen)
   2395        (loop (cdr toks) (+ depth 1) (cons (car toks) cur) args))
   2396       ((%pp-punct? (car toks) 'rparen)
   2397        (loop (cdr toks) (- depth 1) (cons (car toks) cur) args))
   2398       (else
   2399        (loop (cdr toks) depth (cons (car toks) cur) args)))))
   2400 
   2401 ;; Bind formals → token-lists (alist by bv key). Variadic gathers
   2402 ;; trailing actuals into __VA_ARGS__, joined with synthetic commas.
   2403 (define (%pp-bind-args params args variadic? call-loc)
   2404   (let* ((np (length params)) (na (length args)))
   2405     (cond
   2406       (variadic?
   2407        (cond
   2408          ((< na np) (die call-loc "macro call: too few arguments"))
   2409          (else
   2410           (let loop ((ps params) (as args) (acc '()))
   2411             (cond
   2412               ((null? ps)
   2413                (alist-set %pp-bv-VA-ARGS (%pp-join-comma as) acc))
   2414               (else
   2415                (loop (cdr ps) (cdr as)
   2416                      (alist-set (car ps) (car as) acc))))))))
   2417       (else
   2418        (cond
   2419          ((and (= np 0) (= na 1) (null? (car args))) '())
   2420          ((not (= np na)) (die call-loc "macro call: argument count mismatch"))
   2421          (else
   2422           (let loop ((ps params) (as args) (acc '()))
   2423             (cond
   2424               ((null? ps) acc)
   2425               (else (loop (cdr ps) (cdr as)
   2426                           (alist-set (car ps) (car as) acc)))))))))))
   2427 
   2428 (define (%pp-join-comma argss)
   2429   (cond
   2430     ((null? argss) '())
   2431     ((null? (cdr argss)) (car argss))
   2432     (else
   2433      (append (car argss)
   2434              (cons (%pp-synth-comma) (%pp-join-comma (cdr argss)))))))
   2435 
   2436 (define (%pp-synth-comma)
   2437   (%tok 'PUNCT 'comma (%loc "<expand>" 0 0) '()))
   2438 
   2439 ;; Body substitution: walk body; replace param IDENTs with arg toks,
   2440 ;; handle `#param` (stringize) and `a##b` (paste). Per C11 §6.10.3.1,
   2441 ;; arguments are macro-expanded BEFORE substitution into the body
   2442 ;; EXCEPT when the parameter is the operand of `#` or `##` (in which
   2443 ;; case the raw token list is used). Without prescan, recursive uses
   2444 ;; like M(M(1)) for `#define M(x) ...x...` fail to expand the inner
   2445 ;; M during rescan because the outer M is in every substituted
   2446 ;; token's hide-set.
   2447 (define (%pp-substitute body env call-loc state)
   2448   (let loop ((body body) (out '()))
   2449     (cond
   2450       ((null? body) (reverse out))
   2451       (else
   2452        (let ((t (car body)) (rest (cdr body)))
   2453          (cond
   2454            ((%pp-punct? t 'hash)
   2455             (cond
   2456               ((or (null? rest) (not (%pp-ident? (car rest))))
   2457                (die (tok-loc t) "stringize: '#' must precede a parameter name"))
   2458               (else
   2459                (let* ((id (car rest)) (pn (tok-value id))
   2460                       (pt (alist-ref pn env)))
   2461                  (cond
   2462                    ((not pt)
   2463                     (die (tok-loc id) "stringize: '#' operand must be a parameter" pn))
   2464                    (else
   2465                     (let ((s (%tok 'STR (%pp-toks->display pt) (tok-loc t) '())))
   2466                       (loop (cdr rest) (cons s out)))))))))
   2467            ((%pp-punct? t 'paste)
   2468             (cond
   2469               ((null? out) (die (tok-loc t) "paste: '##' cannot start a body"))
   2470               ((null? rest) (die (tok-loc t) "paste: '##' cannot end a body"))
   2471               (else
   2472                (let* ((lhs (car out))
   2473                       (rt (car rest))
   2474                       (rhs-list
   2475                        (cond
   2476                          ((and (%pp-ident? rt) (alist-ref (tok-value rt) env))
   2477                           (alist-ref (tok-value rt) env))
   2478                          (else (list rt)))))
   2479                  (cond
   2480                    ((null? rhs-list) (loop (cdr rest) out))
   2481                    (else
   2482                     (let* ((p (%pp-paste-tokens lhs (car rhs-list)))
   2483                            (after (append (cdr rhs-list) (cdr rest))))
   2484                       (loop after (cons p (cdr out))))))))))
   2485            ((%pp-ident? t)
   2486             (let* ((pn (tok-value t)) (pt (alist-ref pn env)))
   2487               (cond
   2488                 ((not pt) (loop rest (cons t out)))
   2489                 ((and (not (null? rest)) (%pp-punct? (car rest) 'paste))
   2490                  ;; Operand of ##: use raw arg tokens (no prescan).
   2491                  (cond
   2492                    ((null? pt) (loop (cdr rest) out))
   2493                    (else (loop rest (append (reverse pt) out)))))
   2494                 (else
   2495                  ;; Normal use: prescan (fully macro-expand the arg)
   2496                  ;; before substitution, per C11 §6.10.3.1.
   2497                  (let ((exp (%pp-expand-line pt state)))
   2498                    (loop rest (append (reverse exp) out)))))))
   2499            (else (loop rest (cons t out)))))))))
   2500 
   2501 ;; Paste two tokens textually; reparse the result.
   2502 (define (%pp-paste-tokens lhs rhs)
   2503   (let ((lk (tok-kind lhs)) (rk (tok-kind rhs)))
   2504     (cond
   2505       ((and (eq? lk 'IDENT) (eq? rk 'IDENT))
   2506        (%tok 'IDENT (bytevector-append (tok-value lhs) (tok-value rhs))
   2507              (tok-loc lhs) (%pp-bv-union (tok-hide lhs) (tok-hide rhs))))
   2508       ((and (eq? lk 'IDENT) (eq? rk 'INT))
   2509        (%tok 'IDENT (bytevector-append (tok-value lhs) (fixnum->bv (tok-value rhs) 10))
   2510              (tok-loc lhs) (%pp-bv-union (tok-hide lhs) (tok-hide rhs))))
   2511       ((and (eq? lk 'INT) (eq? rk 'INT))
   2512        (let ((s (bytevector-append (fixnum->bv (tok-value lhs) 10)
   2513                                     (fixnum->bv (tok-value rhs) 10))))
   2514          (let-values (((ok? n) (bv->fixnum s 10)))
   2515            (cond
   2516              ((not ok?) (die (tok-loc lhs) "paste: cannot reparse as integer" s))
   2517              (else (%tok 'INT n (tok-loc lhs)
   2518                          (%pp-bv-union (tok-hide lhs) (tok-hide rhs))))))))
   2519       (else (die (tok-loc lhs) "paste: unsupported token kinds" lk rk)))))
   2520 
   2521 (define (%pp-relocate t state)
   2522   (cond
   2523     ((and (= (pps-line-delta state) 0) (not (pps-cur-file state))) t)
   2524     (else
   2525      (let* ((l (tok-loc t))
   2526             (f (or (pps-cur-file state) (loc-file l)))
   2527             (ln (+ (loc-line l) (pps-line-delta state)))
   2528             (c (loc-col l)))
   2529        (%pp-with-loc t (%loc f ln c))))))
   2530 
   2531 ;; --- pp-eval-cexpr: #if expression evaluator ---
   2532 ;; Steps: resolve `defined NAME`, macro-expand the rest, treat any
   2533 ;; remaining IDENT as 0, then delegate to parse-const-int via a minimal
   2534 ;; pstate (empty scope, no cg). sizeof(type) works as an extension;
   2535 ;; sizeof(expr) dies with a clear message.
   2536 ;;
   2537 ;; Arena boundary (test 093 A→B→C pattern). Everything inside the
   2538 ;; call-with-heap-rewind thunk is scratch: `s1`/`s2`/`s3` plus the
   2539 ;; parse-const-* (value . ctype) cells at every level. parse-const-int
   2540 ;; returns the integer via `car`, which is a fixnum immediate and
   2541 ;; survives the rewind. The error path goes through `die` (sys-exits),
   2542 ;; so no rewind there.
   2543 (define (%pp-make-const-ps toks)
   2544   (%pstate (make-list-iter toks)
   2545            (%world (list '()) (list '()) '() '())
   2546            '() #f #f))
   2547 
   2548 (define (pp-eval-cexpr toks outer)
   2549   ;; `outer` is the live %pp-state. We mint a fresh state for #if
   2550   ;; evaluation but inherit cur-file and line-delta so __FILE__ /
   2551   ;; __LINE__ inside the expression reflect any preceding #line.
   2552   (call-with-heap-rewind
   2553     (lambda ()
   2554       (let* ((state (%pp-state (pps-macros outer) '()
   2555                                (pps-cur-file outer)
   2556                                (pps-line-delta outer)
   2557                                #f '() '()))
   2558              (s1 (%pp-resolve-defined toks state))
   2559              (s2 (%pp-expand-line s1 state))
   2560              (s3 (%pp-idents-as-zero s2))
   2561              (ps (%pp-make-const-ps s3))
   2562              (val (parse-const-int ps))
   2563              (t   (peek ps)))
   2564         (cond
   2565           ((eq? (tok-kind t) 'EOF) val)
   2566           (else (die (tok-loc t) "#if: garbage at end of expression"
   2567                      (tok-kind t))))))))
   2568 
   2569 (define (%pp-expand-line toks state)
   2570   (let ((out (make-buf-list)))
   2571     (%pp-emit-expanded toks state out)
   2572     (buf-list-flush out)))
   2573 
   2574 (define (%pp-resolve-defined toks state)
   2575   (let loop ((toks toks) (acc '()))
   2576     (cond
   2577       ((null? toks) (reverse acc))
   2578       ((%pp-ident-name? (car toks) %pp-bv-defined)
   2579        (let ((rest (cdr toks)))
   2580          (cond
   2581            ((null? rest) (die (tok-loc (car toks)) "defined: missing operand"))
   2582            ((%pp-ident? (car rest))
   2583             (let ((v (if (%pp-defined? (tok-value (car rest)) state) 1 0)))
   2584               (loop (cdr rest)
   2585                     (cons (%tok 'INT v (tok-loc (car toks)) '()) acc))))
   2586            ((%pp-punct? (car rest) 'lparen)
   2587             (let ((after (cdr rest)))
   2588               (cond
   2589                 ((or (null? after) (not (%pp-ident? (car after))))
   2590                  (die (tok-loc (car toks)) "defined: expected identifier"))
   2591                 (else
   2592                  (let ((aa (cdr after)))
   2593                    (cond
   2594                      ((or (null? aa) (not (%pp-punct? (car aa) 'rparen)))
   2595                       (die (tok-loc (car toks)) "defined: expected ')'"))
   2596                      (else
   2597                       (let ((v (if (%pp-defined? (tok-value (car after)) state) 1 0)))
   2598                         (loop (cdr aa)
   2599                               (cons (%tok 'INT v (tok-loc (car toks)) '()) acc)))))))) ))
   2600            (else (die (tok-loc (car rest)) "defined: expected identifier or '('")))))
   2601       (else (loop (cdr toks) (cons (car toks) acc))))))
   2602 
   2603 (define (%pp-idents-as-zero toks)
   2604   (map (lambda (t)
   2605          (cond ((%pp-ident? t) (%tok 'INT 0 (tok-loc t) '()))
   2606                (else t)))
   2607        toks))
   2608 
   2609 ;; cc/cg.scm — codegen state and emission API.
   2610 ;; Conversion split: parse owns promotion etc; cg owns sign extension,
   2611 ;; signed/unsigned dispatch, pointer scaling.
   2612 ;;
   2613 ;; Output uses libp1pp's structured macros (%fn, %ifelse_nez,
   2614 ;; %break, %continue) per docs/LIBP1PP.md. Function-local control-flow
   2615 ;; labels are hex2++ dotted labels inside %fn's .scope.
   2616 ;;
   2617 ;; Frame layout:
   2618 ;;   [sp + 0 .. staging*8)        outgoing-arg staging
   2619 ;;   [sp + staging*8 ..)          locals + spilled vstack values
   2620 ;; Slot offsets are emitted symbolically as `(+ %<fn>__SO N)` so the
   2621 ;; staging size, only known at fn-end, can be filled in via a 0-arg
   2622 ;; M1pp macro `<fn>__SO` defined just before the `%fn(...)` block.
   2623 
   2624 (define (%cg-emit-buf cg)
   2625   (cond ((cg-in-fn? cg) (cg-fn-buf cg)) (else (cg-text cg))))
   2626 
   2627 (define (%cg-emit cg bv)
   2628   (buf-push! (%cg-emit-buf cg) bv))
   2629 
   2630 (define (%cg-emit-many cg bvs)
   2631   (for-each (lambda (b) (%cg-emit cg b)) bvs))
   2632 
   2633 (define (%n n) (number->string n 10))
   2634 
   2635 ;; Per-fn metadata (name, ret-slot, ret-type, switch-case lists, ...)
   2636 ;; lives on cg-fn-meta, reset at every cg-fn-begin/v.
   2637 ;;
   2638 ;; Update is destructive: assq for the key, set-cdr! if found, else
   2639 ;; prepend. The functional alist-update path was O(n) per write *with*
   2640 ;; an append+reverse rebuild — and cg-fn-begin/v plus every emit in a
   2641 ;; function body hammers this. Mutation here is safe: the meta alist
   2642 ;; is private to one cg, scratch-only, and discarded at fn-end.
   2643 (define (%cg-fn-set! cg key val)
   2644   (let* ((meta (cg-fn-meta cg))
   2645          (p    (assq key meta)))
   2646     (cond (p (set-cdr! p val))
   2647           (else (cg-fn-meta-set! cg (cons (cons key val) meta))))))
   2648 
   2649 (define (%cg-fn-get cg key) (alist-ref/eq key (cg-fn-meta cg)))
   2650 
   2651 (define (%cg-fresh-label cg prefix)
   2652   (let* ((n (cg-label-ctr cg))
   2653          (bv (bytevector-append prefix (%n n))))
   2654     (cg-label-ctr-set! cg (+ n 1))
   2655     bv))
   2656 
   2657 (define (%cg-fresh-loop-tag cg) (%cg-fresh-label cg "L"))
   2658 (define (%cg-fresh-lbl cg)      (%cg-fresh-label cg "lbl_"))
   2659 
   2660 (define (%cg-bump-outgoing! cg n)
   2661   (if (< (cg-max-outgoing cg) n) (cg-max-outgoing-set! cg n) 0))
   2662 
   2663 (define (%cg-slot-expr cg logical-off)
   2664   (let ((nm (%cg-fn-get cg '%fn-name)))
   2665     (bv-cat (list "(+ %" nm "__SO " (%n logical-off) ")"))))
   2666 
   2667 (define (%cg-mangle-global name-bv)
   2668   (bytevector-append "cc__" name-bv))
   2669 
   2670 ;; Label for a sym at the M1 layer.
   2671 ;;
   2672 ;; C linkage rules drive this directly:
   2673 ;;   - external linkage (the default at file scope, plus any `extern`
   2674 ;;     decl): bare ident. Same label name shared between every decl
   2675 ;;     and the eventual definition, in any order. `extern T memcpy()`
   2676 ;;     links to libp1pp's `:memcpy`; `int g_acc;` and refs to it
   2677 ;;     share `:g_acc`.
   2678 ;;   - internal linkage (`static`): cc__-prefixed. Free to mangle
   2679 ;;     since `static` is invisible across TUs, and the prefix keeps
   2680 ;;     it out of the external/runtime namespace.
   2681 ;; Block-scope statics already mangle their sym-name to
   2682 ;; `<fnname>__<n>` at parse time (see line ~5125); the cc__ prefix
   2683 ;; here just nests another layer of namespacing on top of that.
   2684 (define (%cg-sym-label sm)
   2685   (cond
   2686     ((eq? (sym-storage sm) 'static) (%cg-mangle-global (sym-name sm)))
   2687     (else                            (sym-name sm))))
   2688 
   2689 (define (%cg-reg->bv r) (symbol->string r))
   2690 
   2691 (define (%cg-emit-li cg reg n)
   2692   (%cg-emit-many cg (list "%li(" (%cg-reg->bv reg) ", " (%n n) ")\n")))
   2693 
   2694 (define (%cg-emit-la cg reg label-bv)
   2695   (%cg-emit-many cg (list "%la(" (%cg-reg->bv reg) ", &" label-bv ")\n")))
   2696 
   2697 (define (%cg-emit-ld-slot cg reg logical-off)
   2698   (%cg-emit-many cg (list "%ld(" (%cg-reg->bv reg) ", sp, "
   2699                           (%cg-slot-expr cg logical-off) ")\n")))
   2700 
   2701 (define (%cg-emit-st-slot cg reg logical-off)
   2702   (%cg-emit-many cg (list "%st(" (%cg-reg->bv reg) ", sp, "
   2703                           (%cg-slot-expr cg logical-off) ")\n")))
   2704 
   2705 (define (%cg-emit-ld cg reg base off)
   2706   (%cg-emit-many cg (list "%ld(" (%cg-reg->bv reg) ", "
   2707                           (%cg-reg->bv base) ", " (%n off) ")\n")))
   2708 
   2709 (define (%cg-emit-st cg reg base off)
   2710   (%cg-emit-many cg (list "%st(" (%cg-reg->bv reg) ", "
   2711                           (%cg-reg->bv base) ", " (%n off) ")\n")))
   2712 
   2713 ;; Width-aware load/store. Dispatches on ctype-size:
   2714 ;;   1: %lb / %sb (LB zero-extends; for signed i8 we sign-extend by
   2715 ;;      shli/sari 56 to materialize the canonical 64-bit form).
   2716 ;;   2/4: byte-decomposed (P1 has only 1-byte and 8-byte memory ops,
   2717 ;;      and word ops require natural alignment which we can't promise
   2718 ;;      for struct fields or non-word-aligned local slots). Loads
   2719 ;;      gather bytes via %lb + shli/or; stores scatter via shri/%sb.
   2720 ;;      Signed loads (i16/i32) sign-extend via shli/sari to canonical
   2721 ;;      64-bit form.
   2722 ;;   8 (and any other size): %ld / %st.
   2723 ;; Scratch convention: helpers may clobber t1; callers never pass
   2724 ;; reg=t1.
   2725 
   2726 ;; Sub-word loads/stores defer byte-decomposition to libp1pp's
   2727 ;; %ld_h / %ld_w / %ld_sh / %ld_sw / %st_h / %st_w macros (see
   2728 ;; P1/P1pp.P1pp). cc.scm just emits one macro call per access; the
   2729 ;; macro arranges the byte gather/scatter and (for signed loads) folds
   2730 ;; in the sign-extend. t1 is the conventional scratch.
   2731 (define (%cg-emit-ld-sub cg reg base-bv off-bv signed? n-bytes)
   2732   (let ((mname (cond ((= n-bytes 2) (if signed? "%ld_sh(" "%ld_h("))
   2733                      ((= n-bytes 4) (if signed? "%ld_sw(" "%ld_w("))
   2734                      (else (die #f "cg-emit-ld-sub: bad width" n-bytes)))))
   2735     (%cg-emit-many cg (list mname (%cg-reg->bv reg) ", "
   2736                             base-bv ", " off-bv ", t1)\n"))))
   2737 
   2738 (define (%cg-emit-st-sub cg reg base-bv off-bv n-bytes)
   2739   (let ((mname (cond ((= n-bytes 2) "%st_h(")
   2740                      ((= n-bytes 4) "%st_w(")
   2741                      (else (die #f "cg-emit-st-sub: bad width" n-bytes)))))
   2742     (%cg-emit-many cg (list mname (%cg-reg->bv reg) ", "
   2743                             base-bv ", " off-bv ", t1)\n"))))
   2744 
   2745 ;; "address of frame slot" — defers to libp1pp's %lea_slot, which hides
   2746 ;; the backend frame-header offset that %mov(rd, sp) folds in.
   2747 (define (%cg-emit-lea-slot cg reg-bv slot-bv)
   2748   (%cg-emit-many cg (list "%lea_slot(" reg-bv ", " slot-bv ")\n")))
   2749 
   2750 ;; sext8/16/32 emitted via libp1pp's %sext<N>(rd, ra). shift-amount is
   2751 ;; kept as the parameter for call-site clarity (callers think in bit
   2752 ;; widths via the same 56/48/32 amounts they always have).
   2753 (define (%cg-emit-sext cg reg shift-amount)
   2754   (let ((width (cond ((= shift-amount 56) "8")
   2755                      ((= shift-amount 48) "16")
   2756                      ((= shift-amount 32) "32")
   2757                      (else (die #f "cg-emit-sext: bad shift" shift-amount))))
   2758         (rb (%cg-reg->bv reg)))
   2759     (%cg-emit-many cg (list "%sext" width "(" rb ", " rb ")\n"))))
   2760 
   2761 ;; Canonicalize REG against CTYPE's kind: signed narrow types sign-extend,
   2762 ;; unsigned narrow types zero-extend, anything else is left alone (the
   2763 ;; full 64-bit value is already canonical). Used after operations that
   2764 ;; may have left a non-canonical bit pattern in reg — frame-rval load,
   2765 ;; narrowing cast, narrow-typed binop result.
   2766 (define (%cg-canonicalize cg reg ctype)
   2767   (let* ((rb (%cg-reg->bv reg))
   2768          (k  (ctype-kind ctype)))
   2769     (cond
   2770       ((eq? k 'i8)  (%cg-emit-sext cg reg 56))
   2771       ((eq? k 'i16) (%cg-emit-sext cg reg 48))
   2772       ((eq? k 'i32) (%cg-emit-sext cg reg 32))
   2773       ((or (eq? k 'u8) (eq? k 'bool))
   2774        (%cg-emit-many cg (list "%zext8(" rb ", " rb ")\n")))
   2775       ((eq? k 'u16)
   2776        (%cg-emit-many cg (list "%zext16(" rb ", " rb ")\n")))
   2777       ((eq? k 'u32)
   2778        (%cg-emit-many cg (list "%zext32(" rb ", " rb ", t1)\n")))
   2779       (else 0))))
   2780 
   2781 ;; Width-aware load/store core. BASE-BV / OFF-BV are pre-built (so the
   2782 ;; same body serves both the slot variants — base = "sp", off rendered
   2783 ;; through %cg-slot-expr — and the typed variants, where base is a
   2784 ;; register and off is a raw integer rendered via %n). 1-byte uses
   2785 ;; %lb/%sb (with i8 sext); 2- and 4-byte use the sub-word helpers; the
   2786 ;; 8-byte fallback emits a plain %ld/%st against the same base/off.
   2787 (define (%cg-emit-ld-bv cg reg ctype base-bv off-bv)
   2788   (%cg-fp-reject! 'ld ctype)
   2789   (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype))
   2790          (rb (%cg-reg->bv reg)))
   2791     (cond
   2792       ((= sz 1)
   2793        (%cg-emit-many cg (list "%lb(" rb ", " base-bv ", " off-bv ")\n"))
   2794        (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56))))
   2795       ((= sz 2) (%cg-emit-ld-sub cg reg base-bv off-bv (eq? kind 'i16) 2))
   2796       ((= sz 4) (%cg-emit-ld-sub cg reg base-bv off-bv (eq? kind 'i32) 4))
   2797       (else
   2798        (%cg-emit-many cg (list "%ld(" rb ", " base-bv ", " off-bv ")\n"))))))
   2799 
   2800 (define (%cg-emit-st-bv cg reg ctype base-bv off-bv)
   2801   (%cg-fp-reject! 'st ctype)
   2802   (let ((sz (ctype-size ctype))
   2803         (rb (%cg-reg->bv reg)))
   2804     (cond
   2805       ((= sz 1)
   2806        (%cg-emit-many cg (list "%sb(" rb ", " base-bv ", " off-bv ")\n")))
   2807       ((= sz 2) (%cg-emit-st-sub cg reg base-bv off-bv 2))
   2808       ((= sz 4) (%cg-emit-st-sub cg reg base-bv off-bv 4))
   2809       (else
   2810        (%cg-emit-many cg (list "%st(" rb ", " base-bv ", " off-bv ")\n"))))))
   2811 
   2812 (define (%cg-emit-ld-slot-typed cg reg ctype logical-off)
   2813   (%cg-emit-ld-bv cg reg ctype "sp" (%cg-slot-expr cg logical-off)))
   2814 (define (%cg-emit-st-slot-typed cg reg ctype logical-off)
   2815   (%cg-emit-st-bv cg reg ctype "sp" (%cg-slot-expr cg logical-off)))
   2816 
   2817 (define (%cg-emit-ld-typed cg reg ctype base off)
   2818   (%cg-emit-ld-bv cg reg ctype (%cg-reg->bv base) (%n off)))
   2819 (define (%cg-emit-st-typed cg reg ctype base off)
   2820   (%cg-emit-st-bv cg reg ctype (%cg-reg->bv base) (%n off)))
   2821 
   2822 (define (%cg-load-opnd-into cg op reg)
   2823   ;; frame lval: load at type width. frame rval is a spilled word
   2824   ;; (alloc-slot 8 8 in %cg-spill-reg) — always 8-byte load.
   2825   ;; global lval width > 1 byte-gathers must not alias dest with base —
   2826   ;; the first %lb would otherwise clobber the address before subsequent
   2827   ;; byte loads. Stage the address in t2.
   2828   (%cg-fp-reject! 'load (opnd-type op))
   2829   (pmatch op
   2830     (($ opnd? (kind imm)    (ext ,n))                (%cg-emit-li cg reg n))
   2831     (($ opnd? (kind frame)  (lval? #t) (type ,ty) (ext ,off))
   2832      (%cg-emit-ld-slot-typed cg reg ty off))
   2833     (($ opnd? (kind frame)  (lval? #f) (type ,ty) (ext ,off))
   2834      ;; Frame rval: spilled as 8 bytes, but the slot's bit-pattern may
   2835      ;; not be canonical for the opnd's CURRENT type (e.g.
   2836      ;; cg-arith-conv relabeled a signed slot as unsigned). Canonicalize
   2837      ;; on load so downstream 64-bit ALU/compare ops see the C-semantic
   2838      ;; value.
   2839      (%cg-emit-ld-slot cg reg off)
   2840      (%cg-canonicalize cg reg ty))
   2841     (($ opnd? (kind frame)  (ext ,off))              (%cg-emit-ld-slot cg reg off))
   2842     (($ opnd? (kind global) (lval? #f) (ext ,lbl))   (%cg-emit-la cg reg lbl))
   2843     (($ opnd? (kind global) (type ,ty)  (ext ,lbl))
   2844      (%cg-emit-la cg 't2 lbl)
   2845      (%cg-emit-ld-typed cg reg ty 't2 0))
   2846     (else (die #f "cg internal: unknown opnd-kind" (opnd-kind op)))))
   2847 
   2848 (define (%cg-spill-reg cg reg ty)
   2849   (let* ((off (cg-alloc-slot cg 8 8))
   2850          (op  (%opnd 'frame ty off #f)))
   2851     (%cg-emit-st-slot cg reg off)
   2852     (cg-vstack-set! cg (cons op (cg-vstack cg)))
   2853     op))
   2854 
   2855 ;; Floating-point softening. Real FP arithmetic is not implemented;
   2856 ;; instead the cg silently treats fp ctypes as same-sized integer
   2857 ;; bit patterns (flt as 4-byte, dbl/ldbl as 8-byte). Loads, stores,
   2858 ;; and same-size casts round-trip the bytes; widening int→fp casts
   2859 ;; leave the int bit-pattern in the wider slot; binops use integer
   2860 ;; ALU ops. tcc.flat.c contains real fp code paths (parse_number,
   2861 ;; ieee_finite, …) that the bootstrap tcc-boot2 never executes when
   2862 ;; compiling float-free programs, so producing valid-but-semantically-
   2863 ;; wrong P1pp here is sufficient. Kept as a named no-op so the call
   2864 ;; sites stay grep-able if a future bootstrap target needs real FP.
   2865 (define (%cg-fp-reject! op-name ty) #t)
   2866 
   2867 (define (%reg-by-idx i)
   2868   (cond ((= i 0) 'a0) ((= i 1) 'a1) ((= i 2) 'a2) ((= i 3) 'a3)
   2869         (else (die #f "cg: param idx > 3 needs ldarg path" i))))
   2870 
   2871 ;; --------------------------------------------------------------------
   2872 ;; Lifecycle
   2873 ;; --------------------------------------------------------------------
   2874 
   2875 ;; cc-cg fixtures construct a cg directly via (cg-init) — they don't
   2876 ;; emit ELF and don't link against another TU, so library knobs are
   2877 ;; irrelevant. cc-main routes through cg-init/v with the parsed flag.
   2878 (define (cg-init) (cg-init/v #f ""))
   2879 
   2880 (define (cg-init/v lib? str-prefix)
   2881   (%cg (make-buf/cap %BUF-CAP-TEXT)        ; text
   2882        (make-buf/cap %BUF-CAP-DATA)        ; data
   2883        (make-buf/cap %BUF-CAP-BSS)         ; bss
   2884        '()                                  ; vstack
   2885        0                                    ; frame-hi
   2886        0                                    ; label-ctr
   2887        (make-world)                         ; world (shared with pstate)
   2888        '()                                  ; fn-meta
   2889        (make-buf/cap %BUF-CAP-FN)          ; fn-buf (reused per fn)
   2890        (make-buf/cap %BUF-CAP-PROLOGUE)    ; prologue-buf (reused per fn)
   2891        0                                    ; max-outgoing
   2892        #f                                   ; in-fn?
   2893        lib?                                 ; lib? (skip entry stub + :ELF_end)
   2894        str-prefix))                         ; str-prefix (cc__str_N namespacing)
   2895 
   2896 (define (cg-finish cg)
   2897   ;; Tentative file-scope defs (`int x;` / `static int x;` with no
   2898   ;; initializer and not later defined with `=`) get their .bss slot
   2899   ;; here at end of TU. C 6.9.2 — see cg-flush-tentatives!.
   2900   (cg-flush-tentatives! cg)
   2901   ;; Entry stub. P1's program-entry contract (docs/P1.md §Program Entry)
   2902   ;; delivers argc in a0 and argv in a1 at p1_main. %call doesn't
   2903   ;; clobber a0/a1, so falling straight through to main forwards
   2904   ;; them unchanged. The 16-byte frame is just enough for %enter's
   2905   ;; saved-fp/lr to fit; main builds its own frame on top.
   2906   ;;
   2907   ;; In lib mode the stub and :ELF_end are suppressed: the catm chain
   2908   ;; supplies them once, from P1/entry-*.P1pp and P1/elf-end.P1pp, so
   2909   ;; library TUs don't fight the executable TU for ownership of
   2910   ;; :p1_main and don't truncate ELF p_filesz at the first inner
   2911   ;; :ELF_end (hex2 sizes off the first one it sees).
   2912   (cond
   2913     ((not (cg-lib? cg))
   2914      (let ((tb (cg-text cg)))
   2915        (buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to main\n")
   2916        (buf-push! tb "%fn(p1_main, 16, {\n")
   2917        (buf-push! tb "%call(&main)\n")
   2918        (buf-push! tb "})\n"))))
   2919   (bv-cat (list (buf-flush (cg-text cg))
   2920                 (buf-flush (cg-data cg))
   2921                 (buf-flush (cg-bss  cg))
   2922                 (cond ((cg-lib? cg) "")
   2923                       (else ":ELF_end\n")))))
   2924 
   2925 (define (cg-fn-begin cg name params return-type)
   2926   (cg-fn-begin/v cg name params return-type #f))
   2927 
   2928 ;; Variadic-aware variant. variadic? = #t reserves 16 contiguous 8-byte
   2929 ;; slots covering incoming arg indices 0..15, populating each from the
   2930 ;; appropriate source: a-register for idx 0..3, LDARG slot (idx-4) for
   2931 ;; idx 4..15. va_start computes the address of the slot at index =
   2932 ;; named-arg count, so va_arg walks linearly through the rest.
   2933 ;; Indices 4..15 may be garbage when the caller passed fewer args; user
   2934 ;; code stops walking based on a count or sentinel before those slots
   2935 ;; are read. Limit of 15 variadic args (after named) is enough for
   2936 ;; tcc.c's logging shapes; bump VARARG_WINDOW if you need more.
   2937 (define (cg-fn-begin/v cg name params return-type variadic?)
   2938   (buf-reset!           (cg-fn-buf       cg))
   2939   (buf-reset!           (cg-prologue-buf cg))
   2940   (cg-in-fn?-set!       cg #t)
   2941   (cg-vstack-set!       cg '())
   2942   (cg-frame-hi-set!     cg 0)
   2943   ;; cg-label-ctr is NOT reset per-fn. Compiler-internal labels are
   2944   ;; emitted as dotted hex2++ locals inside %fn's .scope (and sometimes
   2945   ;; nested .scope blocks), so within-TU collisions are already prevented
   2946   ;; by local lookup. Keeping the counter monotonic across functions is
   2947   ;; no longer required for correctness, just for stable, readable label
   2948   ;; names in expanded.M1 traces.
   2949   (cg-max-outgoing-set! cg 0)
   2950   (cg-fn-meta-set!      cg '())
   2951   (%cg-fn-set! cg '%fn-name        name)
   2952   (%cg-fn-set! cg '%fn-ret-type    return-type)
   2953   (%cg-fn-set! cg '%indirect-slots '())
   2954   (%cg-fn-set! cg '%fn-variadic?   variadic?)
   2955   ;; Return slot per P1.md §Arguments. ≤8B → a0; 9–16B → a0+a1; >16B
   2956   ;; struct/union → indirect-result (A2): caller passes sret ptr in
   2957   ;; a0; cg-return writes through it; sret-slot saves a0 for cg-fn-end.
   2958   (let* ((rsz (cond ((eq? (ctype-kind return-type) 'void) 8)
   2959                     (else (align-up (max 8 (ctype-size return-type)) 8))))
   2960          (ret-slot (cg-alloc-slot cg rsz 8)))
   2961     (%cg-fn-set! cg '%fn-ret-slot ret-slot)
   2962     (cond
   2963       ((not (eq? (ctype-kind return-type) 'void))
   2964        (let zinit ((k 0))
   2965          (cond
   2966            ((>= k rsz) #t)
   2967            (else
   2968             (buf-push! (cg-prologue-buf cg)
   2969                        (bv-cat (list "%li(t0, 0)\n"
   2970                                      "%st(t0, sp, "
   2971                                      (%cg-slot-expr cg (+ ret-slot k))
   2972                                      ")\n")))
   2973             (zinit (+ k 8))))))))
   2974   (let* ((rk    (ctype-kind return-type))
   2975          (sret? (and (or (eq? rk 'struct) (eq? rk 'union))
   2976                      (> (ctype-size return-type) 16))))
   2977     (%cg-fn-set! cg '%fn-sret? sret?)
   2978     (cond
   2979       (sret?
   2980        (let ((ss (cg-alloc-slot cg 8 8)))
   2981          (%cg-fn-set! cg '%fn-sret-slot ss)
   2982          (buf-push! (cg-prologue-buf cg)
   2983                     (bv-cat (list "%st(a0, sp, "
   2984                                   (%cg-slot-expr cg ss) ")\n")))))
   2985       (else (%cg-fn-set! cg '%fn-sret-slot #f))))
   2986   ;; Variadic save area is capped at 16 incoming-arg slots; reject
   2987   ;; variadic definitions whose named-arg count would already fill or
   2988   ;; exceed it (no room left for variadic reads).
   2989   (cond
   2990     ((and variadic? (> (length params) 16))
   2991      (die #f "cg-fn-begin: variadic function exceeds 16-arg save-area cap"
   2992           name (length params))))
   2993   ;; With sret, explicit arg i lives at ABI position (i+1): args 0..2
   2994   ;; in a1..a3, args 3+ in slot (i-3).
   2995   (let* ((sret-shift (if (%cg-fn-get cg '%fn-sret?) 1 0))
   2996          (spill (lambda (abi off)
   2997                   (cond
   2998                     ((< abi 4)
   2999                      (buf-push! (cg-prologue-buf cg)
   3000                                 (bv-cat (list "%st(" (%cg-reg->bv (%reg-by-idx abi))
   3001                                               ", sp, "
   3002                                               (%cg-slot-expr cg off) ")\n"))))
   3003                     (else
   3004                      (buf-push! (cg-prologue-buf cg)
   3005                                 (bv-cat (list "%ldarg(t0, " (%n (- abi 4)) ")\n"
   3006                                               "%st(t0, sp, "
   3007                                               (%cg-slot-expr cg off) ")\n"))))))))
   3008     (let walk ((ps params) (idx 0) (out '()) (first-slot #f))
   3009       (cond
   3010         ((null? ps)
   3011          (cond
   3012            (variadic?
   3013             (let pad ((i idx) (vfirst #f) (fs first-slot))
   3014               (cond
   3015                 ((>= i 16)
   3016                  (%cg-fn-set! cg '%fn-vararg-first-slot (or vfirst fs))
   3017                  (reverse out))
   3018                 (else
   3019                  (let ((off (cg-alloc-slot cg 8 8)))
   3020                    (spill (+ i sret-shift) off)
   3021                    (pad (+ i 1) (or vfirst off) (or fs off)))))))
   3022            (else (reverse out))))
   3023         (else
   3024          (let* ((p    (car ps))
   3025                 (nm   (car p))
   3026                 (ty   (cdr p))
   3027                 ;; AAPCS: 9..16B aggregates ride two consecutive arg
   3028                 ;; positions (regs or stack slots), wider-than-16B
   3029                 ;; aggregates would normally pass by reference — not
   3030                 ;; supported here yet.
   3031                 (n    (%cg-param-reg-count ty))
   3032                 (sz   (cond ((%cg-param-aggregate? ty)
   3033                              (align-up (ctype-size ty) 8))
   3034                             (else 8)))
   3035                 (al   (cond ((%cg-param-aggregate? ty)
   3036                              (max 8 (ctype-align ty)))
   3037                             (else 8)))
   3038                 (off  (cg-alloc-slot cg sz al))
   3039                 (psym (%sym nm 'param #f ty off #t)))
   3040            (let chunk ((i 0))
   3041              (cond ((>= i n) 0)
   3042                    (else
   3043                     (spill (+ idx sret-shift i) (+ off (* i 8)))
   3044                     (chunk (+ i 1)))))
   3045            (walk (cdr ps) (+ idx n) (cons (cons nm psym) out)
   3046                  (or first-slot off))))))))
   3047 
   3048 ;; Number of consecutive ABI slots (regs or stack words) consumed by a
   3049 ;; parameter of TY. Aggregates ≤16B take ⌈size/8⌉; everything else 1.
   3050 (define (%cg-param-reg-count ty)
   3051   (cond
   3052     ((%cg-param-aggregate? ty)
   3053      (let ((sz (ctype-size ty)))
   3054        (cond
   3055          ((> sz 16)
   3056           (die #f "cg: aggregate arg/param >16B not supported" sz))
   3057          ((> sz 8) 2)
   3058          (else 1))))
   3059     (else 1)))
   3060 
   3061 (define (%cg-param-aggregate? ty)
   3062   (let ((k (ctype-kind ty)))
   3063     (or (eq? k 'struct) (eq? k 'union))))
   3064 
   3065 (define (cg-fn-end cg)
   3066   ;; Drain prologue-buf and fn-buf directly into cg-text via buf-drain!
   3067   ;; (memcpy, no allocation). Header/footer pieces go through buf-push!
   3068   ;; on cg-text — also memcpy. Net result: zero net heap allocation in
   3069   ;; cg-fn-end other than the small (%n N) bvs for staging-bytes /
   3070   ;; frame-size, which the enclosing parse-decl-or-fn boundary's
   3071   ;; reset-scratch-heap! reclaims.
   3072   (let* ((name          (%cg-fn-get cg '%fn-name))
   3073          (ret-slot      (%cg-fn-get cg '%fn-ret-slot))
   3074          (ret-type      (%cg-fn-get cg '%fn-ret-type))
   3075          (locals-hi     (cg-frame-hi cg))
   3076          (staging-bytes (* 8 (cg-max-outgoing cg)))
   3077          (raw-size      (+ staging-bytes locals-hi))
   3078          (frame-size    (align-up raw-size 16))
   3079          ;; Look up the bound sym for this fn so `static void foo(){...}`
   3080          ;; emits the same cc__-mangled label that callers reference.
   3081          ;; The sym was bound by parse-fn-body before the body parse,
   3082          ;; so it's in the top scope frame at this point.
   3083          (fn-sym        (alist-ref name (car (world-scope (cg-world cg)))))
   3084          (mangled       (cond (fn-sym (%cg-sym-label fn-sym))
   3085                               (else name)))
   3086          (tb            (cg-text cg)))
   3087     ;; Now that the body is fully emitted, leave fn dispatch so any
   3088     ;; trailing emits in this function (including the ret-block below)
   3089     ;; route to cg-text directly.
   3090     (cg-in-fn?-set! cg #f)
   3091     ;; staging-size macro
   3092     (buf-push! tb "%macro ")
   3093     (buf-push! tb name)
   3094     (buf-push! tb "__SO()\n")
   3095     (buf-push! tb (%n staging-bytes))
   3096     (buf-push! tb "\n%endm\n")
   3097     ;; %fn header
   3098     (buf-push! tb "%fn(")
   3099     (buf-push! tb mangled)
   3100     (buf-push! tb ", ")
   3101     (buf-push! tb (%n frame-size))
   3102     (buf-push! tb ", {\n")
   3103     ;; prologue + body, drained byte-for-byte
   3104     (buf-drain! tb (cg-prologue-buf cg))
   3105     ;; --cc-trace-emit: emit `%trace(&LBL, LEN)` between prologue (which
   3106     ;; spilled live argument regs to slots) and body, so the macro can
   3107     ;; freely clobber a0..a2. The mangled name rides through the
   3108     ;; regular string pool — cg-intern-string emits it with a trailing
   3109     ;; NUL and pads to 8-byte alignment, so the next data label stays
   3110     ;; aligned. We pass the *logical* byte length (no NUL) so the
   3111     ;; runtime print stops at the actual end of the name.
   3112     (cond
   3113       ((trace-emit?)
   3114        (let ((tag-lbl (cg-intern-string cg mangled)))
   3115          (buf-push! tb "%trace(&")
   3116          (buf-push! tb tag-lbl)
   3117          (buf-push! tb ", ")
   3118          (buf-push! tb (%n (bytevector-length mangled)))
   3119          (buf-push! tb ")\n"))))
   3120     (buf-drain! tb (cg-fn-buf cg))
   3121     ;; ret block: ≤8B → a0; 9–16B → a0+a1; >16B sret → a0 = saved sret ptr.
   3122     (buf-push! tb ":.ret\n")
   3123     (let ((rk (ctype-kind ret-type))
   3124           (sret? (%cg-fn-get cg '%fn-sret?)))
   3125       (cond
   3126         ((eq? rk 'void)
   3127          (buf-push! tb "%li(a0, 0)\n"))
   3128         (sret?
   3129          (buf-push! tb "%ld(a0, sp, ")
   3130          (buf-push! tb (%cg-slot-expr cg (%cg-fn-get cg '%fn-sret-slot)))
   3131          (buf-push! tb ")\n"))
   3132         (else
   3133          (buf-push! tb "%ld(a0, sp, ")
   3134          (buf-push! tb (%cg-slot-expr cg ret-slot))
   3135          (buf-push! tb ")\n")
   3136          (cond
   3137            ((> (ctype-size ret-type) 8)
   3138             (buf-push! tb "%ld(a1, sp, ")
   3139             (buf-push! tb (%cg-slot-expr cg (+ ret-slot 8)))
   3140             (buf-push! tb ")\n"))))))
   3141     (buf-push! tb "})\n")
   3142     (cg-vstack-set!       cg '())
   3143     (cg-frame-hi-set!     cg 0)
   3144     (cg-max-outgoing-set! cg 0)
   3145     0))
   3146 
   3147 ;; --------------------------------------------------------------------
   3148 ;; Vstack
   3149 ;; --------------------------------------------------------------------
   3150 (define (cg-push cg op)
   3151   (cg-vstack-set! cg (cons op (cg-vstack cg)))
   3152   op)
   3153 
   3154 (define (cg-pop cg)
   3155   (let ((s (cg-vstack cg)))
   3156     (cond ((null? s) (die #f "cg-pop: empty vstack"))
   3157           (else (cg-vstack-set! cg (cdr s)) (car s)))))
   3158 
   3159 (define (cg-top cg)
   3160   (let ((s (cg-vstack cg)))
   3161     (cond ((null? s) (die #f "cg-top: empty vstack")) (else (car s)))))
   3162 
   3163 (define (cg-depth cg) (length (cg-vstack cg)))
   3164 
   3165 ;; --------------------------------------------------------------------
   3166 ;; Snapshot / rewind — discard any vstack pushes and fn-buf bytes
   3167 ;; emitted between snapshot and rewind. Used by sizeof to parse its
   3168 ;; operand for type information without retaining its side effects
   3169 ;; (CC.md §Expressions: sizeof's operand is not evaluated). Internal-
   3170 ;; only; the parser is the sole expected caller.
   3171 ;;
   3172 ;; vstack captures the head of the cons-list (immutable structurally).
   3173 ;; fn-buf is restored by resetting buf-offset; the underlying storage
   3174 ;; bytes past the new offset become garbage that the next buf-push!
   3175 ;; will overwrite (buf-push! always copies into [offset, offset+len)).
   3176 ;; frame-hi and max-outgoing are also restored so cg-alloc-slot calls
   3177 ;; inside the rewound region don't leak frame bytes.
   3178 ;; --------------------------------------------------------------------
   3179 (define (cg-snapshot cg)
   3180   (cond
   3181     ((not (cg-in-fn? cg))
   3182      (die #f "cg-snapshot: not in fn")))
   3183   (list (cg-vstack cg)
   3184         (buf-offset (cg-fn-buf cg))
   3185         (cg-frame-hi cg)
   3186         (cg-max-outgoing cg)))
   3187 
   3188 (define (cg-rewind cg tag)
   3189   (cg-vstack-set!       cg (car tag))
   3190   (buf-offset-set!      (cg-fn-buf cg) (cadr tag))
   3191   (cg-frame-hi-set!     cg (caddr tag))
   3192   (cg-max-outgoing-set! cg (cadddr tag)))
   3193 
   3194 ;; Duplicate the top vstack entry. For lvals this is safe — the slot
   3195 ;; (or label, or indirect-marked frame) backing the lval keeps existing
   3196 ;; until the function ends. For rvals it duplicates the descriptor of
   3197 ;; the spilled value; both copies refer to the same already-emitted
   3198 ;; storage. Used for `lhs += rhs` and `++lhs` to preserve the lhs
   3199 ;; across a `cg-load` so the subsequent `cg-assign` still has its
   3200 ;; address.
   3201 (define (cg-dup cg)
   3202   (let ((p (cg-top cg))) (cg-push cg p) p))
   3203 
   3204 ;; --------------------------------------------------------------------
   3205 ;; Materialize
   3206 ;; --------------------------------------------------------------------
   3207 (define (cg-push-imm cg ctype value)
   3208   (cg-push cg (%opnd 'imm ctype value #f)))
   3209 
   3210 (define (cg-push-string cg bv-content)
   3211   (let* ((label (cg-intern-string cg bv-content))
   3212          (cp-ty (%ctype 'ptr 8 8 %t-i8)))
   3213     (cg-push cg (%opnd 'global cp-ty label #f))))
   3214 
   3215 (define (cg-push-sym cg sm)
   3216   (pmatch sm
   3217     (($ sym? (kind fn) (type ,ty))
   3218      (cg-push cg (%opnd 'global ty (%cg-sym-label sm) #f)))
   3219     (($ sym? (kind enum-const) (type ,ty) (slot ,v))
   3220      (cg-push cg (%opnd 'imm ty v #f)))
   3221     (($ sym? (kind var) (storage extern) (type ,ty))
   3222      (cg-push cg (%opnd 'global ty (%cg-sym-label sm) #t)))
   3223     (($ sym? (kind var) (storage static) (type ,ty))
   3224      (cg-push cg (%opnd 'global ty (%cg-sym-label sm) #t)))
   3225     (($ sym? (kind var) (type ,ty) (slot ,off))
   3226      (cg-push cg (%opnd 'frame ty off #t)))
   3227     (($ sym? (kind param) (type ,ty) (slot ,off))
   3228      (cg-push cg (%opnd 'frame ty off #t)))
   3229     (else (die #f "cg-push-sym: unsupported sym-kind" (sym-kind sm)))))
   3230 
   3231 ;; A cg-push-deref result is a frame-lval whose slot HOLDS THE ADDRESS
   3232 ;; (not the value). To distinguish from ordinary frame-lvals (whose
   3233 ;; slot holds the value directly), we tag indirect slots in
   3234 ;; %indirect-slots so cg-load and cg-assign can do the extra
   3235 ;; indirection.
   3236 (define (%cg-mark-indirect! cg off)
   3237   (let ((cur (or (%cg-fn-get cg '%indirect-slots) '())))
   3238     (%cg-fn-set! cg '%indirect-slots (cons off cur))))
   3239 
   3240 (define (%cg-indirect? cg off)
   3241   (let ((cur (or (%cg-fn-get cg '%indirect-slots) '())))
   3242     (let loop ((xs cur))
   3243       (cond ((null? xs) #f) ((= (car xs) off) #t) (else (loop (cdr xs)))))))
   3244 
   3245 (define (cg-push-deref cg)
   3246   (let* ((p  (cg-pop cg))
   3247          (pt (opnd-type p))
   3248          (pe (cond ((eq? (ctype-kind pt) 'ptr) (ctype-ext pt))
   3249                    ((eq? (ctype-kind pt) 'arr) (car (ctype-ext pt)))
   3250                    (else #f))))
   3251     (cond
   3252       ((not pe) (die #f "cg-push-deref: not a pointer" pt))
   3253       (else
   3254        (%cg-load-opnd-into cg p 't0)
   3255        (let ((off (cg-alloc-slot cg 8 8)))
   3256          (%cg-emit-st-slot cg 't0 off)
   3257          (%cg-mark-indirect! cg off)
   3258          (cg-push cg (%opnd 'frame pe off #t)))))))
   3259 
   3260 ;; --------------------------------------------------------------------
   3261 ;; Aggregate field access (§D.1–D.4)
   3262 ;; --------------------------------------------------------------------
   3263 ;; cg-push-field cg fname:
   3264 ;;   pop a struct/union lval; look up `fname` in the struct's fields
   3265 ;;   list (data.scm: ext = (tag complete? fields), where each field
   3266 ;;   is (name-bv ctype offset)); push a new lval at the field's
   3267 ;;   offset with the field's ctype.
   3268 ;;
   3269 ;; Three input cases:
   3270 ;;   - direct frame lval at slot `off`        -> frame lval at off+fo
   3271 ;;   - indirect frame lval (slot holds addr)  -> new indirect slot for
   3272 ;;                                                addr+fo
   3273 ;;   - global lval at label L                 -> indirect slot for
   3274 ;;                                                la(L)+fo
   3275 ;; In all cases the resulting lval has the field's ctype.
   3276 
   3277 ;; Look up FNAME in FIELDS. C11 §6.7.2.1: a struct/union member with no
   3278 ;; declarator (e.g. `union { int a; int b; };` inside another struct) is
   3279 ;; an "anonymous member" — its members are addressed as if they belonged
   3280 ;; directly to the enclosing aggregate. We recurse into any name=#f
   3281 ;; member of struct/union kind, composing the outer member's offset with
   3282 ;; the inner field's offset, and return a synthetic (name ctype off)
   3283 ;; triple so callers can stay agnostic about anonymity.
   3284 (define (%cg-find-field fields fname)
   3285   (let loop ((xs fields))
   3286     (cond
   3287       ((null? xs) #f)
   3288       (else
   3289        (let* ((f (car xs))
   3290               (fn (car f)))
   3291          (cond
   3292            ((and fn (bv= fn fname)) f)
   3293            ((and (not fn)
   3294                  (let ((k (ctype-kind (cadr f))))
   3295                    (or (eq? k 'struct) (eq? k 'union))))
   3296             (let* ((sub-ext (ctype-ext (cadr f)))
   3297                    (sub-fields (car (cddr sub-ext)))
   3298                    (hit (%cg-find-field sub-fields fname)))
   3299               (cond
   3300                 (hit (list (car hit)
   3301                            (cadr hit)
   3302                            (+ (car (cddr f)) (car (cddr hit)))))
   3303                 (else (loop (cdr xs))))))
   3304            (else (loop (cdr xs)))))))))
   3305 
   3306 (define (cg-push-field cg fname)
   3307   (let* ((s   (cg-pop cg))
   3308          (sty (opnd-type s))
   3309          (k   (ctype-kind sty)))
   3310     (cond
   3311       ((not (or (eq? k 'struct) (eq? k 'union)))
   3312        (die #f "cg-push-field: not a struct/union" k))
   3313       ((not (opnd-lval? s))
   3314        (die #f "cg-push-field: not an lvalue" k))
   3315       (else
   3316        (let* ((fields (car (cddr (ctype-ext sty))))
   3317               (f (%cg-find-field fields fname)))
   3318          (cond
   3319            ((not f) (die #f "cg-push-field: no such field" fname))
   3320            (else
   3321             (let* ((fty (cadr f)) (fo (car (cddr f))))
   3322               (pmatch s
   3323                 ;; direct frame lval: just shift the slot offset.
   3324                 (($ opnd? (kind frame) (ext ,off))
   3325                  (guard (not (%cg-indirect? cg off)))
   3326                  (cg-push cg (%opnd 'frame fty (+ off fo) #t)))
   3327                 ;; indirect frame lval: addr lives in the slot. Compute
   3328                 ;; addr+fo into a new indirect slot.
   3329                 (($ opnd? (kind frame) (ext ,off))
   3330                  (%cg-emit-ld-slot cg 't0 off)
   3331                  (cond
   3332                    ((> fo 0)
   3333                     (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n"))))
   3334                  (let ((no (cg-alloc-slot cg 8 8)))
   3335                    (%cg-emit-st-slot cg 't0 no)
   3336                    (%cg-mark-indirect! cg no)
   3337                    (cg-push cg (%opnd 'frame fty no #t))))
   3338                 ;; global lval: load addr, add offset, indirect slot.
   3339                 (($ opnd? (kind global) (ext ,lbl))
   3340                  (%cg-emit-la cg 't0 lbl)
   3341                  (cond
   3342                    ((> fo 0)
   3343                     (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n"))))
   3344                  (let ((no (cg-alloc-slot cg 8 8)))
   3345                    (%cg-emit-st-slot cg 't0 no)
   3346                    (%cg-mark-indirect! cg no)
   3347                    (cg-push cg (%opnd 'frame fty no #t))))
   3348                 (else
   3349                  (die #f "cg-push-field: unsupported lval kind"
   3350                       (opnd-kind s))))))))))))
   3351 
   3352 ;; cg-decay-array:
   3353 ;;   if top of vstack is an arr-typed lval, replace it with a ptr-rval
   3354 ;;   to the first element. C arrays decay to T* in most contexts;
   3355 ;;   parse calls this before rval-style operations. No-op otherwise.
   3356 (define (cg-decay-array cg)
   3357   (let ((tp (cg-top cg)))
   3358     (cond
   3359       ((and (opnd-lval? tp) (eq? (ctype-kind (opnd-type tp)) 'arr))
   3360        (let* ((p   (cg-pop cg))
   3361               (et  (car (ctype-ext (opnd-type p))))
   3362               (pty (%ctype 'ptr 8 8 et)))
   3363          (pmatch p
   3364            ;; direct frame lval: address is sp+off.
   3365            (($ opnd? (kind frame) (ext ,off))
   3366             (guard (not (%cg-indirect? cg off)))
   3367             (%cg-emit-lea-slot cg "t0" (%cg-slot-expr cg off))
   3368             (%cg-spill-reg cg 't0 pty))
   3369            ;; indirect frame lval (rare for arrays, but support it):
   3370            ;; the slot holds the address already.
   3371            (($ opnd? (kind frame) (ext ,off))
   3372             (%cg-emit-ld-slot cg 't0 off)
   3373             (%cg-spill-reg cg 't0 pty))
   3374            ;; global array: la(label) is the address.
   3375            (($ opnd? (kind global) (ext ,lbl))
   3376             (%cg-emit-la cg 't0 lbl)
   3377             (%cg-spill-reg cg 't0 pty))
   3378            (else (die #f "cg-decay-array: unsupported lval kind"
   3379                       (opnd-kind p))))))
   3380       (else tp))))
   3381 
   3382 ;; --------------------------------------------------------------------
   3383 ;; Address & deref
   3384 ;; --------------------------------------------------------------------
   3385 
   3386 ;; Materialize the address of an lval `op` directly into `reg`.
   3387 ;; Variant of cg-take-addr that doesn't spill — used by struct copy
   3388 ;; primitives (cg-return on struct, cg-copy-struct, cg-assign-struct,
   3389 ;; cg-call's struct receive). Caller owns the opnd (already popped).
   3390 ;;
   3391 ;; A frame opnd is treated as a slot whose address we want: if it's a
   3392 ;; flagged-indirect lval (slot holds a pointer to the real storage),
   3393 ;; load the pointer; otherwise the slot itself IS the storage and we
   3394 ;; lea its address. Frame rvals are temp spills — address = &slot. A
   3395 ;; global opnd's label is the address. Callers that require an lval
   3396 ;; check it before calling.
   3397 (define (%cg-emit-addr-of cg op reg)
   3398   (let ((reg-bv (%cg-reg->bv reg)))
   3399     (pmatch op
   3400       (($ opnd? (kind frame) (lval? #t) (ext ,off))
   3401        (guard (%cg-indirect? cg off))
   3402        (%cg-emit-ld-slot cg reg off))
   3403       (($ opnd? (kind frame) (ext ,off))
   3404        (%cg-emit-lea-slot cg reg-bv (%cg-slot-expr cg off)))
   3405       (($ opnd? (kind global) (ext ,lbl))
   3406        (%cg-emit-la cg reg lbl))
   3407       (else (die #f "cg-emit-addr-of: unsupported opnd"
   3408                  (opnd-kind op) (opnd-lval? op))))))
   3409 
   3410 ;; cg-copy-struct: pop src lval, pop dst lval, emit per-byte copy
   3411 ;; from src to dst (both must be lvals of the same struct/union type).
   3412 ;; Used by parser for struct-typed assignment / initializer-from-call
   3413 ;; targets. Pushes nothing.
   3414 (define (cg-copy-struct cg)
   3415   (let* ((src (cg-pop cg))
   3416          (dst (cg-pop cg))
   3417          (sty (opnd-type dst))
   3418          (sz  (ctype-size sty)))
   3419     (cond
   3420       ((not (opnd-lval? src)) (die #f "cg-copy-struct: src not lvalue"))
   3421       ((not (opnd-lval? dst)) (die #f "cg-copy-struct: dst not lvalue")))
   3422     (%cg-emit-addr-of cg src 't0)
   3423     (%cg-emit-addr-of cg dst 't2)
   3424     (%cg-emit-byte-copy cg 't2 't0 't1 sz)))
   3425 
   3426 ;; Struct/union `=` assignment: pop src lval, pop dst lval, memcpy,
   3427 ;; then push dst back so the assignment expression has a result for
   3428 ;; the surrounding parser to consume (parse-expr-stmt's trailing
   3429 ;; cg-pop, etc.). Distinct from cg-copy-struct because the
   3430 ;; initializer caller needs no result on the vstack.
   3431 ;;
   3432 ;; The src may be either a frame lvalue (named local slot, *p deref,
   3433 ;; callee return-slot) or a frame rvalue (anonymous slot from a temp
   3434 ;; spill); %cg-emit-addr-of handles both shapes by treating the slot
   3435 ;; itself as the address whenever the lval indirection flag isn't set.
   3436 (define (cg-assign-struct cg)
   3437   (let* ((src (cg-pop cg))
   3438          (dst (cg-pop cg))
   3439          (sty (opnd-type dst))
   3440          (sz  (ctype-size sty)))
   3441     (cond ((not (opnd-lval? dst)) (die #f "cg-assign-struct: dst not lvalue")))
   3442     (%cg-emit-addr-of cg src 't0)
   3443     (%cg-emit-addr-of cg dst 't2)
   3444     (%cg-emit-byte-copy cg 't2 't0 't1 sz)
   3445     (cg-push cg dst)))
   3446 
   3447 ;; Struct copy: defer to libp1pp memcpy via %memcpy_call. dst-reg and
   3448 ;; src-reg hold the addresses; size is the byte count. tmp-reg is no
   3449 ;; longer needed by this helper (kept in the signature so existing
   3450 ;; callers don't have to thread their scratch allocation differently),
   3451 ;; but the macro itself uses a0/a1/a2 around the call. dst-reg and
   3452 ;; src-reg must not be a0 (the dst move would clobber a different live
   3453 ;; input register); both current callers use t-regs.
   3454 (define (%cg-emit-byte-copy cg dst-reg src-reg tmp-reg size)
   3455   (%cg-emit-many cg (list "%memcpy_call("
   3456                           (%cg-reg->bv dst-reg) ", "
   3457                           (%cg-reg->bv src-reg) ", "
   3458                           (%n size) ")\n")))
   3459 
   3460 (define (cg-take-addr cg)
   3461   (let* ((p   (cg-pop cg))
   3462          (ty  (opnd-type p))
   3463          ;; &arr yields T(*)[N] per strict C. Pointer arithmetic on
   3464          ;; the result scales by sizeof(T[N]) (the whole array), so
   3465          ;; &arr + 1 is one-past-end. Array-to-pointer decay happens
   3466          ;; on use via cg-decay-array, not at the & operator.
   3467          (pty (%ctype 'ptr 8 8 ty)))
   3468     (pmatch p
   3469       ;; &function: a function designator (rval of fn type pushed by
   3470       ;; cg-push-sym) already evaluates to its entry-point address. The
   3471       ;; `&` is a no-op semantically — re-tag the operand as ptr-to-fn.
   3472       (($ opnd? (kind global) (type ,t) (ext ,lbl) (lval? #f))
   3473        (guard (eq? (ctype-kind t) 'fn))
   3474        (cg-push cg (%opnd 'global pty lbl #f)))
   3475       (($ opnd? (lval? #f)) (die #f "cg-take-addr: not an lvalue"))
   3476       ;; The address itself lives at sp+slot — &*p degenerates to p.
   3477       (($ opnd? (kind frame) (ext ,off))
   3478        (guard (%cg-indirect? cg off))
   3479        (%cg-emit-ld-slot cg 't0 off)
   3480        (%cg-spill-reg cg 't0 pty))
   3481       ;; %lea_slot wraps the "%mov(rd, sp); %addi(rd, rd, slot)" idiom;
   3482       ;; the backend hides any frame-header offset inside %mov(rd, sp).
   3483       (($ opnd? (kind frame) (ext ,off))
   3484        (%cg-emit-lea-slot cg "t0" (%cg-slot-expr cg off))
   3485        (%cg-spill-reg cg 't0 pty))
   3486       (($ opnd? (kind global) (ext ,lbl))
   3487        (%cg-emit-la cg 't0 lbl)
   3488        (%cg-spill-reg cg 't0 pty))
   3489       (else (die #f "cg-take-addr: non-addressable" (opnd-kind p))))))
   3490 
   3491 (define (cg-load cg)
   3492   (let* ((p (cg-pop cg)) (ty (opnd-type p)))
   3493     (cond
   3494       ((not (opnd-lval? p)) (die #f "cg-load: not an lvalue"))
   3495       ;; Array lvalues decay to a ptr-rval addressing the first
   3496       ;; element (C array-to-pointer decay). We push the lval back
   3497       ;; and route through cg-decay-array for a single source of truth.
   3498       ((eq? (ctype-kind ty) 'arr)
   3499        (cg-push cg p) (cg-decay-array cg))
   3500       ;; Struct/union lvalues stay as lvalues — there is no
   3501       ;; register-sized rvalue form for an aggregate, and the
   3502       ;; existing 8-byte spill path silently truncated anything
   3503       ;; wider (the bug that broke `c = cond ? a : b` for
   3504       ;; sizeof(struct) > 8). Surrounding expression machinery
   3505       ;; (cg-ifelse-merge / cg-assign-struct / cg-call) consumes
   3506       ;; aggregate operands as lvalues already.
   3507       ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union))
   3508        (cg-push cg p))
   3509       ((and (eq? (opnd-kind p) 'frame)
   3510             (%cg-indirect? cg (opnd-ext p)))
   3511        ;; Indirect frame-lval: slot holds the address. Stage the
   3512        ;; address in t2 so multi-byte gathers don't alias dest with
   3513        ;; base.
   3514        (%cg-emit-ld-slot cg 't2 (opnd-ext p))
   3515        (%cg-emit-ld-typed cg 't0 ty 't2 0)
   3516        (%cg-spill-reg cg 't0 ty))
   3517       (else (%cg-load-opnd-into cg p 't0) (%cg-spill-reg cg 't0 ty)))))
   3518 
   3519 ;; --------------------------------------------------------------------
   3520 ;; Type conversions
   3521 ;; --------------------------------------------------------------------
   3522 (define (cg-cast cg to-type)
   3523   (let* ((p       (cg-pop cg))
   3524          (from-ty (opnd-type p))
   3525          (from-sz (ctype-size from-ty))
   3526          (to-sz   (ctype-size to-type))
   3527          (to-kind (ctype-kind to-type)))
   3528     (%cg-fp-reject! 'cast-to to-type)
   3529     (%cg-fp-reject! 'cast-from from-ty)
   3530     (cond
   3531       ((eq? to-kind 'bool)
   3532        (%cg-load-opnd-into cg p 't0)
   3533        (%cg-emit-many cg (list "%bool(t0, t0)\n"))
   3534        (%cg-spill-reg cg 't0 to-type))
   3535       ((or (eq? to-kind 'ptr)
   3536            (and (or (eq? to-kind 'i64) (eq? to-kind 'u64))
   3537                 (or (eq? (ctype-kind from-ty) 'ptr)
   3538                     (eq? (ctype-kind from-ty) 'arr))))
   3539        (cg-push cg (%opnd (opnd-kind p) to-type (opnd-ext p) (opnd-lval? p))))
   3540       ;; Same-size or widening cast — retag only when the canonical
   3541       ;; 64-bit slot form for FROM-TY is also canonical for TO-TYPE.
   3542       ;; That holds unless we're crossing from a signed type into an
   3543       ;; unsigned one of the same or wider width: the source's
   3544       ;; sign-extended high bits would leak past the unsigned width
   3545       ;; and corrupt later 64-bit operands (compares, wider casts).
   3546       ;; Same applies to same-size unsigned→signed at narrow widths
   3547       ;; (the narrow branch's sign-extend turns 0xCA back into the
   3548       ;; canonical i8 slot 0xFF…FFCA).
   3549       ((and (>= to-sz from-sz)
   3550             (not (and (not (%ctype-unsigned? from-ty))
   3551                       (%ctype-unsigned? to-type)))
   3552             (not (and (= to-sz from-sz)
   3553                       (%ctype-unsigned? from-ty)
   3554                       (not (%ctype-unsigned? to-type)))))
   3555        (cg-push cg (%opnd (opnd-kind p) to-type (opnd-ext p) (opnd-lval? p))))
   3556       (else
   3557        ;; Narrowing cast OR same/widening with signedness flip.
   3558        ;; Signed targets (i8/i16/i32) shli/sari to truncate-and-
   3559        ;; sign-extend in one step, so the slot holds the canonical
   3560        ;; 64-bit form and a subsequent widening cast (which is
   3561        ;; relabel-only) restores the value. Unsigned targets mask
   3562        ;; off high bits to zero-extend.
   3563        (%cg-load-opnd-into cg p 't0)
   3564        (%cg-canonicalize cg 't0 to-type)
   3565        (%cg-spill-reg cg 't0 to-type)))))
   3566 
   3567 (define (cg-promote cg)
   3568   (let* ((p  (cg-pop cg))
   3569          (ty (opnd-type p))
   3570          (sz (ctype-size ty)))
   3571     (cond
   3572       ;; C 6.3.1.1: _Bool, char, short, and any narrower int type
   3573       ;; promote to (signed) int — every representable value fits
   3574       ;; in i32. Treating narrow unsigned types as u32 here would
   3575       ;; drag the subsequent arith-conv into picking the unsigned
   3576       ;; common type, flipping signedness of `>>`, comparisons,
   3577       ;; division, etc. against the C rule. Canonical form for any
   3578       ;; in-range narrow value already matches i32, so the cast is
   3579       ;; relabel-only.
   3580       ((< sz 4)
   3581        (cg-push cg (%opnd (opnd-kind p) %t-i32 (opnd-ext p) (opnd-lval? p))))
   3582       (else (cg-push cg p)))))
   3583 
   3584 (define (cg-arith-conv cg)
   3585   ;; Usual arithmetic conversions on arithmetic operands. When either
   3586   ;; operand is a pointer (or array,
   3587   ;; which behaves as a pointer in arithmetic), the pair is a
   3588   ;; pointer-arith case — leave the types alone so cg-binop can detect
   3589   ;; the ptr operand and apply the right scaling.
   3590   (let* ((b  (cg-pop cg))
   3591          (a  (cg-pop cg))
   3592          (ta (opnd-type a))
   3593          (tb (opnd-type b))
   3594          (sa (ctype-size ta))
   3595          (sb (ctype-size tb)))
   3596     (cond
   3597       ;; Pointer/array arithmetic: leave types alone so cg-binop's
   3598       ;; ptr-aware add/sub branch fires with the correct pointee type
   3599       ;; (and doesn't see two pointers, which would skip scaling).
   3600       ((or (%ctype-ptr? ta) (%ctype-ptr? tb))
   3601        (cg-push cg a)
   3602        (cg-push cg b))
   3603       (else
   3604        (let ((common (cond
   3605                        ((> sa sb) ta)
   3606                        ((> sb sa) tb)
   3607                        ((%ctype-unsigned? ta) ta)
   3608                        ((%ctype-unsigned? tb) tb)
   3609                        (else ta))))
   3610          ;; Route through cg-cast (rather than relabel only) so the
   3611          ;; canonical 64-bit slot form lines up with COMMON. Same-size
   3612          ;; cross-signedness conversions (i32→u32, u32→i32, …) need an
   3613          ;; actual zext/sext to canonicalize; otherwise an i32 -3
   3614          ;; relabeled to u32 keeps its sign-extended slot bits and
   3615          ;; compares unequal to a u32 imm with the same C value.
   3616          (cg-push cg a) (cg-cast cg common)
   3617          (let ((a* (cg-pop cg)))
   3618            (cg-push cg b) (cg-cast cg common)
   3619            (let ((b* (cg-pop cg)))
   3620              (cg-push cg a*)
   3621              (cg-push cg b*))))))))
   3622 
   3623 ;; --------------------------------------------------------------------
   3624 ;; Operators
   3625 ;; --------------------------------------------------------------------
   3626 (define (%cg-emit-rrr cg op rd ra rb)
   3627   (%cg-emit-many cg (list "%" op "(" (%cg-reg->bv rd) ", "
   3628                           (%cg-reg->bv ra) ", " (%cg-reg->bv rb) ")\n")))
   3629 
   3630 (define (%cg-emit-cmp cg cc ra rb rd)
   3631   (%cg-emit-many cg (list "%cmpset_" cc "("
   3632                           (%cg-reg->bv rd) ", "
   3633                           (%cg-reg->bv ra) ", " (%cg-reg->bv rb)
   3634                           ")\n")))
   3635 
   3636 (define (cg-binop cg op)
   3637   (let* ((b  (cg-pop cg))
   3638          (a  (cg-pop cg))
   3639          (ta (opnd-type a))
   3640          (tb (opnd-type b))
   3641          (unsigned? (or (%ctype-unsigned? ta) (%ctype-unsigned? tb)))
   3642          (a-ptr? (%ctype-ptr? ta))
   3643          (b-ptr? (%ctype-ptr? tb))
   3644          (result-ty
   3645           (cond
   3646             ((or (eq? op 'eq) (eq? op 'ne)
   3647                  (eq? op 'lt) (eq? op 'le) (eq? op 'gt) (eq? op 'ge))
   3648              %t-i32)
   3649             ((and a-ptr? b-ptr? (eq? op 'sub)) %t-i64)
   3650             (a-ptr? ta)
   3651             (b-ptr? tb)
   3652             (else ta))))
   3653     (cond
   3654       ((and a-ptr? (or (eq? op 'add) (eq? op 'sub)) (not b-ptr?))
   3655        (%cg-load-opnd-into cg a 'a0)
   3656        (%cg-load-opnd-into cg b 'a1)
   3657        (let ((sz   (ctype-size (%ctype-pointee ta)))
   3658              (mac (if (eq? op 'add) "%ptr_add(" "%ptr_sub(")))
   3659          (%cg-emit-many cg (list mac "t0, a0, a1, " (%n sz) ", t1)\n")))
   3660        (%cg-spill-reg cg 't0 result-ty))
   3661       ((and b-ptr? (eq? op 'add) (not a-ptr?))
   3662        (%cg-load-opnd-into cg a 'a0)
   3663        (%cg-load-opnd-into cg b 'a1)
   3664        (let ((sz (ctype-size (%ctype-pointee tb))))
   3665          (%cg-emit-many cg (list "%ptr_add(t0, a1, a0, " (%n sz) ", t1)\n")))
   3666        (%cg-spill-reg cg 't0 result-ty))
   3667       ((and a-ptr? b-ptr? (eq? op 'sub))
   3668        (%cg-load-opnd-into cg a 'a0)
   3669        (%cg-load-opnd-into cg b 'a1)
   3670        (let ((sz (ctype-size (%ctype-pointee ta))))
   3671          (%cg-emit-many cg (list "%ptr_diff(t0, a0, a1, " (%n sz) ", t1)\n")))
   3672        (%cg-spill-reg cg 't0 result-ty))
   3673       (else
   3674        (%cg-load-opnd-into cg a 'a0)
   3675        (%cg-load-opnd-into cg b 'a1)
   3676        (cond
   3677          ((eq? op 'add) (%cg-emit-rrr cg "add" 't0 'a0 'a1))
   3678          ((eq? op 'sub) (%cg-emit-rrr cg "sub" 't0 'a0 'a1))
   3679          ((eq? op 'mul) (%cg-emit-rrr cg "mul" 't0 'a0 'a1))
   3680          ((eq? op 'and) (%cg-emit-rrr cg "and" 't0 'a0 'a1))
   3681          ((eq? op 'or)  (%cg-emit-rrr cg "or"  't0 'a0 'a1))
   3682          ((eq? op 'xor) (%cg-emit-rrr cg "xor" 't0 'a0 'a1))
   3683          ((eq? op 'shl) (%cg-emit-rrr cg "shl" 't0 'a0 'a1))
   3684          ((eq? op 'shr)
   3685           ;; Shift result type is the promoted LEFT operand's type
   3686           ;; (C 6.5.7); arithmetic vs logical shift must follow that
   3687           ;; signedness alone, not the rhs's. cg-arith-conv may have
   3688           ;; relabeled ta to match an unsigned rhs — guard against
   3689           ;; that by checking the original `a` opnd's signedness.
   3690           (if (%ctype-unsigned? ta)
   3691               (%cg-emit-rrr cg "shr" 't0 'a0 'a1)
   3692               (%cg-emit-rrr cg "sar" 't0 'a0 'a1)))
   3693          ((eq? op 'div) (%cg-emit-rrr cg "div" 't0 'a0 'a1))
   3694          ((eq? op 'rem) (%cg-emit-rrr cg "rem" 't0 'a0 'a1))
   3695          ((eq? op 'eq) (%cg-emit-cmp cg "eq"  'a0 'a1 't0))
   3696          ((eq? op 'ne) (%cg-emit-cmp cg "ne"  'a0 'a1 't0))
   3697          ((eq? op 'lt) (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a0 'a1 't0))
   3698          ((eq? op 'gt) (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a1 'a0 't0))
   3699          ((eq? op 'le) (%cg-emit-cmp cg (if unsigned? "leu" "le") 'a0 'a1 't0))
   3700          ((eq? op 'ge) (%cg-emit-cmp cg (if unsigned? "geu" "ge") 'a0 'a1 't0))
   3701          (else (die #f "cg-binop: unknown op" op)))
   3702        ;; Canonicalize narrow integer results to their type's bit width
   3703        ;; before spilling, so the slot's bit-pattern matches result-ty.
   3704        ;; Compare ops already yield 0/1; skip them. Pointer-arith branches
   3705        ;; above don't reach here.
   3706        (cond
   3707          ((or (eq? op 'eq) (eq? op 'ne)
   3708               (eq? op 'lt) (eq? op 'le) (eq? op 'gt) (eq? op 'ge)) 0)
   3709          (else (%cg-canonicalize cg 't0 result-ty)))
   3710        (%cg-spill-reg cg 't0 result-ty)))))
   3711 
   3712 ;; Post-increment / post-decrement on the top-of-vstack lval.
   3713 ;; Pushes the OLD value (per C semantics) and emits the +1 / -1 store.
   3714 ;; Uses cg-dup + cg-load to capture the old rval (which is then in a
   3715 ;; never-reused spill slot), then runs the regular dup+load+add+assign
   3716 ;; pattern for the store. Pointer scaling falls out of cg-binop add.
   3717 (define (%cg-post-inc-dec cg op)
   3718   (cg-dup cg)
   3719   (cg-load cg)
   3720   (let ((old (cg-pop cg)))
   3721     (cg-dup cg)
   3722     (cg-load cg)
   3723     (cg-push-imm cg %t-i32 1)
   3724     (cg-binop cg op)
   3725     (cg-assign cg)
   3726     (cg-pop cg)
   3727     (cg-push cg old)))
   3728 
   3729 (define (cg-postinc cg) (%cg-post-inc-dec cg 'add))
   3730 (define (cg-postdec cg) (%cg-post-inc-dec cg 'sub))
   3731 
   3732 (define (cg-unop cg op)
   3733   (let* ((p  (cg-pop cg)) (ty (opnd-type p)))
   3734     (%cg-load-opnd-into cg p 't0)
   3735     (cond
   3736       ((eq? op 'neg)
   3737        (%cg-emit-many cg (list "%neg(t0, t0, t1)\n"))
   3738        (%cg-spill-reg cg 't0 ty))
   3739       ((eq? op 'bnot)
   3740        (%cg-emit-many cg (list "%bnot(t0, t0, t1)\n"))
   3741        (%cg-spill-reg cg 't0 ty))
   3742       ((eq? op 'lnot)
   3743        (%cg-emit-many cg (list "%cmpset_eqz(t0, t0)\n"))
   3744        (%cg-spill-reg cg 't0 %t-i32))
   3745       (else (die #f "cg-unop: unknown op" op)))))
   3746 
   3747 (define (cg-assign cg)
   3748   ;; Pops rhs, pops lhs, casts rhs to lhs's type (parser cannot peek
   3749   ;; deeper than vstack top to do this itself), emits the store, pushes
   3750   ;; the assigned value as the result rval.
   3751   (let* ((rhs0 (cg-pop cg))
   3752          (lhs  (cg-pop cg))
   3753          (ty   (opnd-type lhs)))
   3754     (cond ((not (opnd-lval? lhs)) (die #f "cg-assign: lhs not lvalue")))
   3755     ;; Cast rhs to lhs's type (no-op when the types already match).
   3756     (cg-push cg rhs0)
   3757     (cg-cast cg ty)
   3758     (let ((rhs (cg-pop cg)))
   3759       (%cg-load-opnd-into cg rhs 'a0)
   3760       (pmatch lhs
   3761         (($ opnd? (kind frame) (ext ,off))
   3762          (guard (%cg-indirect? cg off))
   3763          (%cg-emit-ld-slot cg 't0 off)
   3764          (%cg-emit-st-typed cg 'a0 ty 't0 0))
   3765         (($ opnd? (kind frame) (ext ,off))
   3766          (%cg-emit-st-slot-typed cg 'a0 ty off))
   3767         (($ opnd? (kind global) (ext ,lbl))
   3768          (%cg-emit-la cg 't0 lbl)
   3769          (%cg-emit-st-typed cg 'a0 ty 't0 0))
   3770         (else (die #f "cg-assign: unsupported lhs kind" (opnd-kind lhs))))
   3771       (%cg-spill-reg cg 'a0 ty))))
   3772 
   3773 ;; --------------------------------------------------------------------
   3774 ;; Calls
   3775 ;; --------------------------------------------------------------------
   3776 (define (cg-call cg arity has-result?)
   3777   (let* ((args (let loop ((i 0) (acc '()))
   3778                  (cond ((= i arity) acc)
   3779                        (else (loop (+ i 1) (cons (cg-pop cg) acc))))))
   3780          (fn-op (cg-pop cg))
   3781          ;; sret = struct/union > 16B return; shift args by one reg
   3782          ;; and place a0 last so it's not clobbered by arg loads.
   3783          (fty (opnd-type fn-op))
   3784          (rty (cond
   3785                 ((eq? (ctype-kind fty) 'fn) (car (ctype-ext fty)))
   3786                 ((eq? (ctype-kind fty) 'ptr)
   3787                  (let ((p (ctype-ext fty)))
   3788                    (if (eq? (ctype-kind p) 'fn) (car (ctype-ext p)) %t-i64)))
   3789                 (else %t-i64)))
   3790          (rk  (ctype-kind rty))
   3791          (sret? (and has-result?
   3792                      (or (eq? rk 'struct) (eq? rk 'union))
   3793                      (> (ctype-size rty) 16)))
   3794          ;; If the callee is variadic, the callee's save area caps total
   3795          ;; incoming args at 16. Reject silent miscompiles up front.
   3796          (callee-fty (cond
   3797                        ((eq? (ctype-kind fty) 'fn) fty)
   3798                        ((and (eq? (ctype-kind fty) 'ptr)
   3799                              (eq? (ctype-kind (ctype-ext fty)) 'fn))
   3800                         (ctype-ext fty))
   3801                        (else #f)))
   3802          (callee-variadic? (and callee-fty
   3803                                 (let ((ext (ctype-ext callee-fty)))
   3804                                   (and (pair? ext) (pair? (cdr ext))
   3805                                        (pair? (cddr ext))
   3806                                        (car (cddr ext))))))
   3807          (_cap-check (cond
   3808                        ((and callee-variadic? (> arity 16))
   3809                         (die #f "cg-call: variadic call exceeds 16-arg save-area cap"
   3810                              arity))
   3811                        (else 0)))
   3812          (sret-shift (if sret? 1 0))
   3813          (recv-slot (cond
   3814                       (sret?
   3815                        (cg-alloc-slot cg
   3816                                       (align-up (ctype-size rty) 8)
   3817                                       (max 8 (ctype-align rty))))
   3818                       (else #f))))
   3819     (let stage ((xs args) (idx 0))
   3820       (cond
   3821         ((null? xs) 0)
   3822         (else
   3823          (let* ((arg (car xs))
   3824                 (aty (opnd-type arg))
   3825                 (n   (%cg-param-reg-count aty)))
   3826            (cond
   3827              ;; Aggregate >8B: load both halves into successive arg
   3828              ;; regs / stack slots. Stage the struct's address in t0
   3829              ;; once and chunk-load 8 bytes at a time.
   3830              ((and (%cg-param-aggregate? aty) (> n 1))
   3831               (%cg-emit-addr-of cg arg 't0)
   3832               (let chunk ((i 0))
   3833                 (cond
   3834                   ((>= i n) 0)
   3835                   (else
   3836                    (let ((tabi (+ idx sret-shift i)))
   3837                      (cond
   3838                        ((< tabi 4)
   3839                         (%cg-emit-many cg
   3840                                        (list "%ld("
   3841                                              (%cg-reg->bv (%reg-by-idx tabi))
   3842                                              ", t0, " (%n (* i 8)) ")\n")))
   3843                        (else
   3844                         (%cg-emit-many cg
   3845                                        (list "%ld(t1, t0, "
   3846                                              (%n (* i 8)) ")\n"))
   3847                         (%cg-emit-st cg 't1 'sp (* 8 (- tabi 4))))))
   3848                    (chunk (+ i 1)))))
   3849               (stage (cdr xs) (+ idx n)))
   3850              (else
   3851               (let ((abi (+ idx sret-shift)))
   3852                 (cond
   3853                   ((< abi 4)
   3854                    (%cg-load-opnd-into cg arg (%reg-by-idx abi))
   3855                    (stage (cdr xs) (+ idx 1)))
   3856                   (else
   3857                    (%cg-load-opnd-into cg arg 't0)
   3858                    (%cg-emit-st cg 't0 'sp (* 8 (- abi 4)))
   3859                    (stage (cdr xs) (+ idx 1)))))))))))
   3860     ;; Stack-arg footprint accounts for the extra ABI slot any
   3861     ;; >8B-aggregate arg consumed beyond its single-position cousin.
   3862     (let* ((nabi (let count ((xs args) (n sret-shift))
   3863                    (cond ((null? xs) n)
   3864                          (else (count (cdr xs)
   3865                                       (+ n (%cg-param-reg-count
   3866                                               (opnd-type (car xs)))))))))
   3867            (sa  (max 0 (- nabi 4))))
   3868       (cond ((> sa 0) (%cg-bump-outgoing! cg sa)) (else 0)))
   3869     (cond
   3870       (sret?
   3871        (%cg-emit-lea-slot cg "a0" (%cg-slot-expr cg recv-slot))))
   3872     (cond
   3873       ((and (eq? (opnd-kind fn-op) 'global) (not (opnd-lval? fn-op)))
   3874        (%cg-emit-many cg (list "%call(&" (opnd-ext fn-op) ")\n")))
   3875       (else
   3876        (%cg-load-opnd-into cg fn-op 't0)
   3877        (%cg-emit-many cg (list "%callr(t0)\n"))))
   3878     (cond
   3879       (has-result?
   3880        (cond
   3881          ;; >16B sret (A2): a0 holds recv-slot; push as struct lval.
   3882          (sret? (cg-push cg (%opnd 'frame rty recv-slot #t)))
   3883          ;; ≤16B struct/union (A1): fresh slot, spill from a0/a1.
   3884          ((and (or (eq? rk 'struct) (eq? rk 'union))
   3885                (<= (ctype-size rty) 16))
   3886           (let* ((sz   (ctype-size rty))
   3887                  (al   (max 8 (ctype-align rty)))
   3888                  (slot (cg-alloc-slot cg (align-up sz 8) al)))
   3889             (%cg-emit-st-slot cg 'a0 slot)
   3890             (cond ((> sz 8) (%cg-emit-st-slot cg 'a1 (+ slot 8))))
   3891             (cg-push cg (%opnd 'frame rty slot #t))))
   3892          (else
   3893           (%cg-spill-reg cg 'a0 rty))))
   3894       (else #f))))
   3895 
   3896 ;; --------------------------------------------------------------------
   3897 ;; Return
   3898 ;; --------------------------------------------------------------------
   3899 (define (cg-return cg)
   3900   (let* ((ret-slot (%cg-fn-get cg '%fn-ret-slot))
   3901          (ret-type (%cg-fn-get cg '%fn-ret-type))
   3902          (rk       (ctype-kind ret-type))
   3903          (sret?    (%cg-fn-get cg '%fn-sret?)))
   3904     (cond
   3905       ((eq? rk 'void)
   3906        (%cg-emit-many cg (list "%b(&.ret)\n")))
   3907       ((or (eq? rk 'struct) (eq? rk 'union))
   3908        ;; struct-by-value: ≤16B (A1) → ret-slot; >16B (A2 sret) → *sret-slot.
   3909        (let* ((p (cg-pop cg)) (sz (ctype-size ret-type)))
   3910          (cond ((not (opnd-lval? p))
   3911                 (die #f "cg-return: struct value must be an lvalue")))
   3912          (%cg-emit-addr-of cg p 't0)
   3913          (cond
   3914            (sret?
   3915             (%cg-emit-ld-slot cg 't2 (%cg-fn-get cg '%fn-sret-slot)))
   3916          (else
   3917             (%cg-emit-lea-slot cg "t2" (%cg-slot-expr cg ret-slot))))
   3918          (%cg-emit-byte-copy cg 't2 't0 't1 sz)
   3919          (%cg-emit-many cg (list "%b(&.ret)\n"))))
   3920       (else
   3921        (let ((p (cg-pop cg)))
   3922          (%cg-load-opnd-into cg p 'a0)
   3923          (%cg-emit-st-slot cg 'a0 ret-slot)
   3924          (%cg-emit-many cg (list "%b(&.ret)\n")))))))
   3925 
   3926 ;; --------------------------------------------------------------------
   3927 ;; Structured control flow
   3928 ;; --------------------------------------------------------------------
   3929 (define (cg-if cg then-thunk)
   3930   (let ((p (cg-pop cg)))
   3931     (%cg-load-opnd-into cg p 't0)
   3932     (%cg-emit-many cg (list "%if_nez(t0, {\n"))
   3933     (then-thunk)
   3934     (%cg-emit-many cg (list "})\n"))))
   3935 
   3936 (define (cg-ifelse cg then-thunk else-thunk)
   3937   (let ((p (cg-pop cg)))
   3938     (%cg-load-opnd-into cg p 't0)
   3939     (%cg-emit-many cg (list "%ifelse_nez(t0, {\n"))
   3940     (then-thunk)
   3941     (%cg-emit-many cg (list "}, {\n"))
   3942     (else-thunk)
   3943     (%cg-emit-many cg (list "})\n"))))
   3944 
   3945 ;; Conditionals-as-values: `cg-ifelse` is correct for if-statements
   3946 ;; (thunks push nothing) but each thunk for ternary / `&&` / `||` ends
   3947 ;; with one rval on top of the vstack — and after both branches run,
   3948 ;; we'd be left with TWO opnds, which breaks the type contract for
   3949 ;; the surrounding expression. `cg-ifelse-merge` solves that: pop the
   3950 ;; cond, allocate one result slot, and after each thunk runs, pop its
   3951 ;; rval and store into the slot. Push the slot as one frame rval.
   3952 ;;
   3953 ;; Result type follows C11 §6.5.15 ¶5 for ternary: the usual arithmetic
   3954 ;; conversions over the two arms' types. The slot stores the raw 8-byte
   3955 ;; payload (per cc.scm's canonical-form discipline); %cg-load-opnd-into
   3956 ;; then re-canonicalizes on read against whatever common type we picked.
   3957 ;; For `&&` / `||` callers both arms are pre-cast to %t-i32 by the
   3958 ;; parser, so the merge is a no-op on type.
   3959 (define (cg-ifelse-merge cg then-thunk else-thunk)
   3960   (let* ((cond-op (cg-pop cg)))
   3961     (%cg-load-opnd-into cg cond-op 't0)
   3962     (%cg-emit-many cg (list "%ifelse_nez(t0, {\n"))
   3963     (then-thunk)
   3964     (let* ((p     (cg-pop cg))
   3965            (rty1  (opnd-type p))
   3966            (rk1   (ctype-kind rty1))
   3967            ;; Struct/union arms can't ride the canonical 8-byte word
   3968            ;; slot — the arm's bytes have to land in a slot sized to
   3969            ;; the struct, and each arm memcpys its lvalue in. tcc's
   3970            ;; expr_cond does this exact `type = bt1 == 6 ? type1 : type2`
   3971            ;; pattern across CType structs, so without this case
   3972            ;; cc.scm-compiled tcc-boot2 self-corrupts.
   3973            (aggr? (or (eq? rk1 'struct) (eq? rk1 'union)))
   3974            (slot  (cond (aggr?
   3975                          (cg-alloc-slot cg
   3976                                         (align-up (ctype-size rty1) 8)
   3977                                         (max 8 (ctype-align rty1))))
   3978                         (else
   3979                          (cg-alloc-slot cg 8 8)))))
   3980       (%cg-merge-write-arm cg p slot aggr?)
   3981       (%cg-emit-many cg (list "}, {\n"))
   3982       (else-thunk)
   3983       (let* ((q    (cg-pop cg))
   3984              (rty2 (opnd-type q)))
   3985         (%cg-merge-write-arm cg q slot aggr?)
   3986         (%cg-emit-many cg (list "})\n"))
   3987         ;; Aggregate result is pushed as a frame lval so cg-copy-struct
   3988         ;; (which asserts src must be lval) accepts it; %cg-emit-addr-of
   3989         ;; falls through the `lval? #t` guard (slot is direct, not
   3990         ;; indirect) and returns the slot's address either way.
   3991         (cg-push cg (%opnd 'frame
   3992                            (%cg-merge-arith-type rty1 rty2)
   3993                            slot
   3994                            aggr?))))))
   3995 
   3996 (define (%cg-merge-write-arm cg op slot aggr?)
   3997   (cond
   3998     (aggr?
   3999      (%cg-emit-addr-of cg op 't0)
   4000      (%cg-emit-lea-slot cg "t2" (%cg-slot-expr cg slot))
   4001      (%cg-emit-byte-copy cg 't2 't0 't1 (ctype-size (opnd-type op))))
   4002     (else
   4003      (%cg-load-opnd-into cg op 'a0)
   4004      (%cg-emit-st-slot cg 'a0 slot))))
   4005 
   4006 ;; Usual arithmetic conversion over two ctypes (C11 §6.3.1.8):
   4007 ;; integer-promote each (sub-int → int), then pick the wider with
   4008 ;; unsigned tie-break. Falls back to t1 for non-arithmetic kinds
   4009 ;; (pointer, struct, array — ternary on those preserves the first
   4010 ;; arm's type as before).
   4011 (define (%cg-merge-arith-type t1 t2)
   4012   (cond
   4013     ((and (%ctype-arith? t1) (%ctype-arith? t2))
   4014      (let ((p1 (cond ((< (ctype-size t1) 4) %t-i32) (else t1)))
   4015            (p2 (cond ((< (ctype-size t2) 4) %t-i32) (else t2))))
   4016        (cond
   4017          ((> (ctype-size p1) (ctype-size p2)) p1)
   4018          ((> (ctype-size p2) (ctype-size p1)) p2)
   4019          ((%ctype-unsigned? p1) p1)
   4020          ((%ctype-unsigned? p2) p2)
   4021          (else p1))))
   4022     (else t1)))
   4023 
   4024 (define (cg-loop cg head-thunk body-thunk)
   4025   ;; body-thunk receives the loop tag as its argument; parser uses
   4026   ;; that tag for cg-break / cg-continue inside the body.
   4027   (let ((tag (%cg-fresh-loop-tag cg)))
   4028     (%cg-emit-many cg (list ".scope\n"
   4029                             ":.top\n"))
   4030     (head-thunk)
   4031     (cond
   4032       ((zero? (cg-depth cg)) 0)
   4033       (else
   4034        (let ((c (cg-pop cg)))
   4035          (%cg-load-opnd-into cg c 't0)
   4036          (%cg-emit-many cg (list "%if_eqz(t0, { %break })\n")))))
   4037     (body-thunk tag)
   4038     (%cg-emit-many cg (list "%b(&.top)\n"
   4039                             ":.end\n"
   4040                             ".endscope\n"))
   4041     tag))
   4042 
   4043 (define (cg-break cg tag)
   4044   (%cg-emit-many cg (list "%break\n")))
   4045 
   4046 (define (cg-continue cg tag)
   4047   (%cg-emit-many cg (list "%continue\n")))
   4048 
   4049 ;; --------------------------------------------------------------------
   4050 ;; Variadic receive (§G.2). Layout: cg-fn-begin/v reserves a 16-slot
   4051 ;; (8 bytes each) save area at known frame offsets, populating each
   4052 ;; slot from the appropriate ABI source — a-register for indices 0..3,
   4053 ;; LDARG for indices 4..15. va_start sets ap to the address of the
   4054 ;; first slot past the named-arg count; va_arg reads *ap, advances ap
   4055 ;; by 8, and pushes the value as the requested type.
   4056 ;;
   4057 ;; ap is an lval (typically a `va_list` local). cg-va-start pops it,
   4058 ;; computes the address, stores into *ap (or the slot directly), and
   4059 ;; pushes nothing. cg-va-arg pops ap-lval, loads ap, dereferences for
   4060 ;; the value, advances ap, stores back, pushes the loaded value.
   4061 ;;
   4062 ;; Cap: total incoming args (named + variadic) must fit in the 16-slot
   4063 ;; save area. Variadic call sites exceeding this die in cg-call;
   4064 ;; variadic definitions whose named-arg count exceeds it die in
   4065 ;; cg-fn-begin/v.
   4066 ;; --------------------------------------------------------------------
   4067 (define (%cg-vararg-first-slot cg)
   4068   (let ((s (%cg-fn-get cg '%fn-vararg-first-slot)))
   4069     (cond ((not s) (die #f "cg-va-start: not a variadic function"))
   4070           (else s))))
   4071 
   4072 (define (cg-va-start cg)
   4073   ;; Pop ap-lval. Materialize "&sp + vararg-first-slot" into a0,
   4074   ;; store through ap-lval. Pushes nothing.
   4075   (let* ((ap-lv (cg-pop cg))
   4076          (vsl   (%cg-vararg-first-slot cg)))
   4077     (cond ((not (opnd-lval? ap-lv))
   4078            (die #f "cg-va-start: ap not lvalue")))
   4079     (%cg-emit-lea-slot cg "a0" (%cg-slot-expr cg vsl))
   4080     (%cg-emit-addr-of cg ap-lv 't0)
   4081     (%cg-emit-st cg 'a0 't0 0)))
   4082 
   4083 (define (cg-va-arg cg ctype)
   4084   ;; Pop ap-lval. Load ap into a0. Read 8 bytes at [a0] into a1.
   4085   ;; Advance a0 by 8 and store back through ap-lval. Push a1 as rval
   4086   ;; of type ctype (caller cg-cast's if needed).
   4087   (let ((ap-lv (cg-pop cg)))
   4088     (cond ((not (opnd-lval? ap-lv))
   4089            (die #f "cg-va-arg: ap not lvalue")))
   4090     ;; Address of the storage that holds ap → t0; ap value → a0.
   4091     (%cg-emit-addr-of cg ap-lv 't0)
   4092     (%cg-emit-ld cg 'a0 't0 0)
   4093     ;; Read *ap into a1 (full 8 bytes; cg-cast on the rval the caller
   4094     ;; pushes will narrow if needed). Advance ap by 8 and store back.
   4095     (%cg-emit-ld cg 'a1 'a0 0)
   4096     (%cg-emit-many cg (list "%addi(a0, a0, 8)\n"))
   4097     (%cg-emit-st cg 'a0 't0 0)
   4098     ;; Spill the loaded value (a1) to a fresh frame slot under ctype.
   4099     (%cg-spill-reg cg 'a1 ctype)))
   4100 
   4101 (define (cg-va-end cg)
   4102   ;; va_end is a no-op in this design. Pop and discard ap-lval.
   4103   (cg-pop cg)
   4104   0)
   4105 
   4106 ;; --------------------------------------------------------------------
   4107 ;; Labels and unconditional goto.
   4108 ;; C labels have function scope, even when the labelled statement appears
   4109 ;; inside a nested block/loop. Emit them as function-qualified global
   4110 ;; labels rather than dotted hex2++ locals, because dotted definitions
   4111 ;; inside a nested `.scope` would be invisible to gotos outside it.
   4112 ;; --------------------------------------------------------------------
   4113 (define (%cg-user-label cg name-bv)
   4114   (let ((fn (%cg-fn-get cg '%fn-name)))
   4115     (bv-cat (list "cc__" fn "__user_" name-bv))))
   4116 
   4117 (define (cg-emit-label cg name-bv)
   4118   (%cg-emit-many cg (list ":" (%cg-user-label cg name-bv) "\n")))
   4119 
   4120 (define (cg-goto cg name-bv)
   4121   (%cg-emit-many cg (list "%b(&" (%cg-user-label cg name-bv) ")\n")))
   4122 
   4123 ;; --------------------------------------------------------------------
   4124 ;; switch
   4125 ;; --------------------------------------------------------------------
   4126 (define-record-type swctx
   4127   (%swctx ctrl-slot end-tag default-lbl)
   4128   swctx?
   4129   (ctrl-slot   swctx-ctrl-slot)
   4130   (end-tag     swctx-end-tag)
   4131   (default-lbl swctx-default-lbl swctx-default-lbl-set!))
   4132 
   4133 (define (cg-switch-begin cg)
   4134   (let* ((p   (cg-pop cg))
   4135          (off (cg-alloc-slot cg 8 8))
   4136          (tag (%cg-fresh-loop-tag cg))
   4137          (disp-lbl (bytevector-append "sw_disp_" tag)))
   4138     (%cg-load-opnd-into cg p 't0)
   4139     (%cg-emit-st-slot cg 't0 off)
   4140     (%cg-emit-many cg (list ".scope\n"
   4141                             "%b(&." disp-lbl ")\n"))
   4142     (%swctx off tag #f)))
   4143 
   4144 (define (cg-switch-case cg sw const-int)
   4145   (let* ((lbl (%cg-fresh-lbl cg))
   4146          (key (string->symbol
   4147                (bytevector-append "%sw_cases__" (swctx-end-tag sw))))
   4148          (cur (or (%cg-fn-get cg key) '()))
   4149          (entry (cons const-int lbl)))
   4150     (%cg-fn-set! cg key (cons entry cur))
   4151     (%cg-emit-many cg (list ":." lbl "\n"))))
   4152 
   4153 (define (cg-switch-default cg sw)
   4154   (let ((lbl (%cg-fresh-lbl cg)))
   4155     (swctx-default-lbl-set! sw lbl)
   4156     (%cg-emit-many cg (list ":." lbl "\n"))))
   4157 
   4158 (define (cg-switch-end cg sw)
   4159   (let* ((tag (swctx-end-tag sw))
   4160          (key (string->symbol (bytevector-append "%sw_cases__" tag)))
   4161          (cases (reverse (or (%cg-fn-get cg key) '())))
   4162          (default-lbl (swctx-default-lbl sw))
   4163          (disp-lbl (bytevector-append "sw_disp_" tag)))
   4164     (%cg-emit-many cg (list "%break\n"
   4165                             ":." disp-lbl "\n"))
   4166     (%cg-emit-many cg (list "%ld(t0, sp, "
   4167                             (%cg-slot-expr cg (swctx-ctrl-slot sw)) ")\n"))
   4168     (for-each
   4169      (lambda (c)
   4170        (%cg-emit-many cg (list "%switch_case(t0, t1, "
   4171                                (%n (car c)) ", &." (cdr c) ")\n")))
   4172      cases)
   4173     (cond
   4174       (default-lbl (%cg-emit-many cg (list "%b(&." default-lbl ")\n")))
   4175       (else 0))
   4176     (%cg-emit-many cg (list "%break\n"
   4177                             ":.end\n"
   4178                             ".endscope\n"))))
   4179 
   4180 ;; --------------------------------------------------------------------
   4181 ;; Globals and data
   4182 ;; --------------------------------------------------------------------
   4183 ;; cg-emit-global: emit a global symbol into either .data (initialized)
   4184 ;; or .bss (zero-init).
   4185 ;;
   4186 ;; init can be:
   4187 ;;   #f                       — zero-init in .bss (size from sym's ctype).
   4188 ;;   (piece ...)              — initialized in .data; pieces concatenated.
   4189 ;;
   4190 ;; Each piece is either:
   4191 ;;   <bytevector>             — raw bytes; emitted as bare hex chunks
   4192 ;;                              (64 bytes / 128 hex chars per line).
   4193 ;;   (label-ref . <label-bv>) — 8-byte pointer slot containing &label;
   4194 ;;                              emitted as `&<label> %(0)` (4B label ref +
   4195 ;;                              4B zero pad).
   4196 (define (%cg-init-piece->bv piece)
   4197   (cond
   4198     ((bytevector? piece)
   4199      (bv-cat (%cg-bv->hex-lines piece #f)))
   4200     ((and (pair? piece) (eq? (car piece) 'label-ref))
   4201      (bv-cat (list "&" (cdr piece) " %(0)\n")))
   4202     (else (die #f "cg-emit-global: bad init piece" piece))))
   4203 
   4204 (define (cg-emit-global cg sym init)
   4205   (let* ((lbl (%cg-sym-label sym))
   4206          (sz  (ctype-size (sym-type sym)))
   4207          (size (if (< sz 0) 8 sz))
   4208          (al  (max 1 (ctype-align (sym-type sym)))))
   4209     (cond
   4210       (init
   4211        (buf-push! (cg-data cg) (bv-cat (list "\n.align " (%n al) "\n:"
   4212                                              lbl "\n")))
   4213        (let walk ((ps init))
   4214          (cond
   4215            ((null? ps) 0)
   4216            (else
   4217             (buf-push! (cg-data cg) (%cg-init-piece->bv (car ps)))
   4218             (walk (cdr ps))))))
   4219       (else
   4220        (buf-push! (cg-bss cg)
   4221                   (bv-cat (list "\n.align " (%n al) "\n:" lbl "\n"
   4222                                 (let zero-loop ((rem size) (acc '()))
   4223                                   (cond
   4224                                     ((<= rem 0) (bv-cat (reverse acc)))
   4225                                     ((>= rem 8)
   4226                                      (zero-loop (- rem 8) (cons "$(0)\n" acc)))
   4227                                     (else
   4228                                      (zero-loop (- rem 1) (cons "!(0)\n" acc))))))))))
   4229   0))
   4230 
   4231 (define (cg-emit-extern cg sym) 0)
   4232 
   4233 ;; Record `n` as a tentative file-scope definition: don't emit BSS yet,
   4234 ;; but if no full definition appears by end of TU, cg-finish will emit
   4235 ;; zero-init storage for it. Idempotent — extra entries with the same
   4236 ;; name are harmless (cg-finish dedupes via scope-lookup).
   4237 (define (cg-add-tentative! cg n)
   4238   (let* ((w (cg-world cg))
   4239          (cur (world-tentatives w)))
   4240     (cond
   4241       ((member n cur) #t)
   4242       (else (world-tentatives-set! w (cons n cur))))))
   4243 
   4244 ;; End-of-TU pass: for each pending tentative, look up the latest sym
   4245 ;; binding. If it's still `defined?=#f`, no real definition replaced it,
   4246 ;; so emit zero-init storage now. Otherwise the .data emission already
   4247 ;; covered it.
   4248 (define (cg-flush-tentatives! cg)
   4249   (let* ((w (cg-world cg))
   4250          (top (car (world-scope w))))
   4251     (for-each
   4252       (lambda (n)
   4253         (let ((sm (alist-ref n top)))
   4254           (cond
   4255             ((and sm
   4256                   (eq? (sym-kind sm) 'var)
   4257                   (not (sym-defined? sm)))
   4258              (cg-emit-global cg sm #f)))))
   4259       (world-tentatives w))))
   4260 
   4261 (define (cg-intern-string cg bv-content)
   4262   (let ((p (alist-ref bv-content (cg-str-pool cg))))
   4263     (cond
   4264       (p p)
   4265       (else
   4266        (let* ((n   (length (cg-str-pool cg)))
   4267               (lbl (bytevector-append
   4268                     (cg-str-prefix cg) "cc__str_" (%n n))))
   4269          (cg-str-pool-set! cg
   4270            (alist-set bv-content lbl (cg-str-pool cg)))
   4271          (buf-push! (cg-data cg)
   4272                     (bv-cat (append (list "\n.align " (%n %CG-STR-ALIGN)
   4273                                           "\n:" lbl "\n")
   4274                                     (%cg-bv->hex-lines bv-content #t)
   4275                                     (list ".align " (%n %CG-STR-ALIGN) "\n"))))
   4276          lbl)))))
   4277 
   4278 ;; Mint a fresh, never-recurring label for an unnamed file-scope
   4279 ;; compound literal. Mirrors cg-intern-string's namer pattern (prefix +
   4280 ;; "cc__cl_" + N), with N drawn from cg-label-ctr — the same monotonic
   4281 ;; counter the per-fn label minters use. Different prefix → no collision
   4282 ;; with `Lcc__N` / `lbl_N`.
   4283 (define (%cg-fresh-cl-label cg)
   4284   (let* ((n   (cg-label-ctr cg))
   4285          (lbl (bytevector-append (cg-str-prefix cg) "cc__cl_" (%n n))))
   4286     (cg-label-ctr-set! cg (+ n 1))
   4287     lbl))
   4288 
   4289 ;; Render BV's bytes as bare hex accepted directly by hex2++. Lines are
   4290 ;; chunked to ≤128 hex chars (= 64 bytes) to keep generated P1pp readable.
   4291 ;;
   4292 ;; If TRAILING-NUL? is #t, an extra 0x00 byte is appended to terminate
   4293 ;; a C string. Alignment is emitted explicitly by callers with .align
   4294 ;; so hex2++ owns padding instead of cc.scm manufacturing zero bytes.
   4295 ;; The other caller (%cg-init-piece->bv) emits arbitrary initializer
   4296 ;; bytes whose length is sized exactly to the C-visible field; padding a
   4297 ;; 4-byte int slot to 8 would shift every following struct field.
   4298 ;; Returns a list of bytevectors ready for bv-cat.
   4299 (define %CG-HEX-CHUNK-BYTES 64)
   4300 (define %CG-STR-ALIGN       8)
   4301 
   4302 (define (%cg-bv->hex-lines bv trailing-nul?)
   4303   (let* ((len     (bytevector-length bv))
   4304          (logical (cond (trailing-nul? (+ len 1)) (else len)))
   4305          (total   logical))
   4306     (cond
   4307       ((= total 0) '())
   4308       (else
   4309        (let loop ((i 0) (acc '()))
   4310          (cond
   4311            ((>= i total) (reverse acc))
   4312            (else
   4313             (let ((end (cond ((< (+ i %CG-HEX-CHUNK-BYTES) total)
   4314                               (+ i %CG-HEX-CHUNK-BYTES))
   4315                              (else total))))
   4316               (loop end (cons (%cg-hex-line bv i end len) acc))))))))))
   4317 
   4318 ;; One `XXXX...XX\n` line covering BV bytes [START, END). Indices
   4319 ;; >= LEN render as 0x00 (used for the trailing NUL terminator).
   4320 (define (%cg-hex-line bv start end len)
   4321   (let* ((nbytes (- end start))
   4322          (out    (make-bytevector (+ (* 2 nbytes) 1))))
   4323     (let loop ((j start) (k 0))
   4324       (cond
   4325         ((= j end)
   4326          (bytevector-u8-set! out k (char->integer #\newline))
   4327          out)
   4328         (else
   4329          (let ((b (cond ((< j len) (bytevector-u8-ref bv j))
   4330                         (else 0))))
   4331            (bytevector-u8-set! out k       (%cg-hex-digit
   4332                                             (arithmetic-shift b -4)))
   4333            (bytevector-u8-set! out (+ k 1) (%cg-hex-digit (bit-and b 15)))
   4334            (loop (+ j 1) (+ k 2))))))))
   4335 
   4336 (define (%cg-hex-digit n)
   4337   (cond ((< n 10) (+ n (char->integer #\0)))
   4338         (else    (+ (- n 10) (char->integer #\A)))))
   4339 
   4340 ;; --------------------------------------------------------------------
   4341 ;; Frame
   4342 ;; --------------------------------------------------------------------
   4343 (define (cg-alloc-slot cg bytes align)
   4344   (let* ((aligned (align-up (cg-frame-hi cg) align))
   4345          (new-hi  (+ aligned bytes)))
   4346     (cg-frame-hi-set! cg new-hi)
   4347     aligned))
   4348 ;; cc/parse.scm — recursive-descent + Pratt parser. Minimal scheme1.
   4349 
   4350 (define (make-pstate iter cg)
   4351   (%pstate iter (cg-world cg) '() #f cg))
   4352 
   4353 (define (peek ps)    (iter-peek  (ps-iter ps)))
   4354 (define (peek2 ps)   (iter-peek2 (ps-iter ps)))
   4355 (define (advance ps) (iter-next  (ps-iter ps)))
   4356 (define (at-kw? ps s)
   4357   (pmatch (peek ps)
   4358     (($ tok? (kind KW) (value ,v)) (eq? v s))
   4359     (else #f)))
   4360 (define (at-punct? ps s)
   4361   (pmatch (peek ps)
   4362     (($ tok? (kind PUNCT) (value ,v)) (eq? v s))
   4363     (else #f)))
   4364 (define (expect-kw ps s)
   4365   (let ((t (peek ps)))
   4366     (pmatch t
   4367       (($ tok? (kind KW) (value ,v)) (guard (eq? v s)) (advance ps))
   4368       (else (die (tok-loc t) "expected kw" s)))))
   4369 (define (expect-punct ps s)
   4370   (let ((t (peek ps)))
   4371     (pmatch t
   4372       (($ tok? (kind PUNCT) (value ,v)) (guard (eq? v s)) (advance ps))
   4373       (else (die (tok-loc t) "expected punct" s)))))
   4374 
   4375 (define (scope-enter! ps)
   4376   (ps-scope-set! ps (cons '() (ps-scope ps)))
   4377   (ps-tags-set!  ps (cons '() (ps-tags ps))))
   4378 (define (scope-leave! ps)
   4379   (ps-scope-set! ps (cdr (ps-scope ps)))
   4380   (ps-tags-set!  ps (cdr (ps-tags ps))))
   4381 (define (ctype-compat? a b)
   4382   (cond
   4383     ((eq? a b) #t)
   4384     ((not (eq? (ctype-kind a) (ctype-kind b))) #f)
   4385     (else
   4386      (let ((k (ctype-kind a)))
   4387        (cond
   4388          ((eq? k 'ptr) (ctype-compat? (ctype-ext a) (ctype-ext b)))
   4389          ((eq? k 'arr)
   4390           (let ((ea (ctype-ext a)) (eb (ctype-ext b)))
   4391             (and (ctype-compat? (car ea) (car eb))
   4392                  (or (= (cdr ea) (cdr eb))
   4393                      (< (cdr ea) 0) (< (cdr eb) 0)))))
   4394          ((eq? k 'fn) (%fn-ctype-compat? (ctype-ext a) (ctype-ext b)))
   4395          ((or (eq? k 'struct) (eq? k 'union) (eq? k 'enum)) #f)
   4396          (else #t))))))
   4397 
   4398 (define (%fn-ctype-compat? a b)
   4399   (and (ctype-compat? (car a) (car b))
   4400        (eq? (car (cddr a)) (car (cddr b)))
   4401        (%fn-params-compat? (cadr a) (cadr b))))
   4402 
   4403 (define (%fn-params-compat? pa pb)
   4404   (cond
   4405     ((and (null? pa) (null? pb)) #t)
   4406     ((or (null? pa) (null? pb)) #f)
   4407     ((ctype-compat? (cdar pa) (cdar pb))
   4408      (%fn-params-compat? (cdr pa) (cdr pb)))
   4409     (else #f)))
   4410 
   4411 (define (sym-merge old new)
   4412   (cond
   4413     ((not (eq? (sym-kind old) (sym-kind new)))
   4414      (die #f "redecl: kind mismatch" (sym-name old)))
   4415     ((not (ctype-compat? (sym-type old) (sym-type new)))
   4416      (die #f "redecl: type mismatch" (sym-name old)))
   4417     ((eq? (sym-kind old) 'typedef) old)
   4418     ((eq? (sym-kind old) 'enum-const)
   4419      (cond ((equal? (sym-slot old) (sym-slot new)) old)
   4420            (else (die #f "enum-const redecl" (sym-name old)))))
   4421     ((and (sym-defined? old) (sym-defined? new))
   4422      (die #f "redefinition" (sym-name old)))
   4423     ;; Linkage inherits from the first declaration (C 6.2.2 ¶4): if a
   4424     ;; later decl/def of the same identifier doesn't carry a storage
   4425     ;; class, it picks up the prior one. tcc.c relies on this with
   4426     ;; `static T f(); ... T f() {…}` — the prior `static` makes both
   4427     ;; the decl and the def internal-linkage. Without this carry-
   4428     ;; through cc.scm split them across two label namespaces.
   4429     ((sym-defined? new)
   4430      (cond
   4431        ((eq? (sym-storage old) 'static)
   4432         (%sym (sym-name new) (sym-kind new) 'static
   4433               (sym-type new) (sym-slot new) #t))
   4434        (else new)))
   4435     (else old)))
   4436 
   4437 (define (scope-bind! ps n s)
   4438   (let* ((f (ps-scope ps)) (top (car f)) (r (cdr f))
   4439          (old (alist-ref n top)))
   4440     (cond
   4441       ((not old)
   4442        (ps-scope-set! ps (cons (alist-set n s top) r)))
   4443       (else
   4444        (let ((merged (sym-merge old s)))
   4445          (cond
   4446            ((eq? merged old) #t)
   4447            (else
   4448             (ps-scope-set! ps (cons (alist-set n merged top) r)))))))))
   4449 (define (scope-lookup ps n)
   4450   (let loop ((f (ps-scope ps)))
   4451     (cond ((null? f) #f)
   4452           (else
   4453            (let ((v (alist-ref n (car f))))
   4454              (if v v (loop (cdr f))))))))
   4455 (define (tag-bind! ps n c)
   4456   (let* ((f (ps-tags ps)) (top (car f)) (r (cdr f)))
   4457     (ps-tags-set! ps (cons (alist-set n c top) r))))
   4458 (define (tag-lookup ps n)
   4459   (let loop ((f (ps-tags ps)))
   4460     (cond ((null? f) #f)
   4461           (else (let ((v (alist-ref n (car f))))
   4462                   (if v v (loop (cdr f))))))))
   4463 (define (typedef? ps n)
   4464   (let ((sm (scope-lookup ps n)))
   4465     (and sm (eq? (sym-kind sm) 'typedef))))
   4466 
   4467 (define (%mk-ptr p) (%ctype 'ptr 8 8 p))
   4468 (define (%mk-arr e n)
   4469   (%ctype 'arr (if (< n 0) -1 (* n (ctype-size e)))
   4470           (ctype-align e) (cons e n)))
   4471 (define (%mk-fn r p v) (%ctype 'fn -1 -1 (list r p v)))
   4472 (define (ctype-is-ptr? t) (eq? (ctype-kind t) 'ptr))
   4473 (define (ctype-is-fn?  t) (eq? (ctype-kind t) 'fn))
   4474 (define (ctype-is-arr? t) (eq? (ctype-kind t) 'arr))
   4475 
   4476 (define (eat-cv-quals! ps)
   4477   (cond ((at-kw? ps '__attribute__)
   4478          (skip-gnu-attribute! ps) (eat-cv-quals! ps))
   4479         ((or (at-kw? ps 'const) (at-kw? ps 'volatile)
   4480              (at-kw? ps 'restrict))
   4481          (advance ps) (eat-cv-quals! ps))
   4482         (else #t)))
   4483 
   4484 ;; Consume a GNU `__attribute__ (( ... ))` spec and discard. The keyword
   4485 ;; has been peeked but not yet consumed. tcc.c's prototypes use these
   4486 ;; for noreturn / format / aligned annotations that the bootstrap doesn't
   4487 ;; need to honour semantically — same softening pattern as floats and
   4488 ;; rejected-but-accepted type specifiers.
   4489 (define (skip-gnu-attribute! ps)
   4490   (advance ps)
   4491   (expect-punct ps 'lparen)
   4492   (let loop ((depth 1))
   4493     (let ((t (peek ps)))
   4494       (cond
   4495         ((eq? (tok-kind t) 'EOF)
   4496          (die (tok-loc t) "EOF in __attribute__"))
   4497         ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'lparen))
   4498          (advance ps) (loop (+ depth 1)))
   4499         ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'rparen))
   4500          (advance ps)
   4501          (cond ((= depth 1) #t)
   4502                (else (loop (- depth 1)))))
   4503         (else (advance ps) (loop depth))))))
   4504 
   4505 (define (eat-gnu-attributes! ps)
   4506   (cond ((at-kw? ps '__attribute__)
   4507          (skip-gnu-attribute! ps) (eat-gnu-attributes! ps))
   4508         (else #t)))
   4509 
   4510 (define (parse-decl-spec ps)
   4511   (let loop ((sto #f) (sn #f) (lg 0) (b #f) (saw #f))
   4512     (let ((t (peek ps)))
   4513       (cond
   4514         ((at-kw? ps '__attribute__)
   4515          (skip-gnu-attribute! ps) (loop sto sn lg b saw))
   4516         ((or (at-kw? ps 'auto) (at-kw? ps 'register))
   4517          (advance ps) (loop sto sn lg b #t))
   4518         ((at-kw? ps 'static)  (advance ps) (loop 'static sn lg b #t))
   4519         ((at-kw? ps 'extern)  (advance ps) (loop 'extern sn lg b #t))
   4520         ((at-kw? ps 'typedef) (advance ps) (loop 'typedef sn lg b #t))
   4521         ((or (at-kw? ps 'const) (at-kw? ps 'volatile)
   4522              (at-kw? ps 'restrict) (at-kw? ps 'inline))
   4523          (advance ps) (loop sto sn lg b #t))
   4524         ((at-kw? ps 'signed)   (advance ps) (loop sto 'signed lg b #t))
   4525         ((at-kw? ps 'unsigned) (advance ps) (loop sto 'unsigned lg b #t))
   4526         ((at-kw? ps 'short) (advance ps) (loop sto sn -1 b #t))
   4527         ((at-kw? ps 'long)  (advance ps) (loop sto sn (+ lg 1) b #t))
   4528         ((at-kw? ps 'void) (advance ps) (loop sto sn lg 'void #t))
   4529         ((at-kw? ps 'char) (advance ps) (loop sto sn lg 'char #t))
   4530         ((at-kw? ps 'int)  (advance ps) (loop sto sn lg 'int #t))
   4531         ((at-kw? ps '_Bool) (advance ps) (loop sto sn lg 'bool #t))
   4532         ;; Floats: parsed as type specifiers so prototypes and struct
   4533         ;; layouts in the flattened TU don't trip the parser. The cg
   4534         ;; rejects fp loads/arith/casts at use, see %cg-fp-reject!.
   4535         ;; _Complex / _Imaginary are eaten silently — tcc.c only mentions
   4536         ;; them inside HAVE_FLOAT-gated paths.
   4537         ((at-kw? ps 'float)  (advance ps) (loop sto sn lg 'float #t))
   4538         ((at-kw? ps 'double) (advance ps) (loop sto sn lg 'double #t))
   4539         ((or (at-kw? ps '_Complex) (at-kw? ps '_Imaginary))
   4540          (advance ps) (loop sto sn lg b #t))
   4541         ((or (at-kw? ps '_Atomic) (at-kw? ps '_Thread_local)
   4542              (at-kw? ps '_Alignas) (at-kw? ps '_Generic)
   4543              (at-kw? ps '_Alignof) (at-kw? ps '_Static_assert))
   4544          (die (tok-loc t) "rejected" (tok-value t)))
   4545         ((at-kw? ps 'struct)
   4546          (loop sto sn lg (parse-aggregate-spec ps 'struct) #t))
   4547         ((at-kw? ps 'union)
   4548          (loop sto sn lg (parse-aggregate-spec ps 'union) #t))
   4549         ((at-kw? ps 'enum)
   4550          (loop sto sn lg (parse-enum-spec ps) #t))
   4551         ;; __builtin_va_list — gcc/clang builtin type. We don't model
   4552         ;; it as a struct; for our P1 ABI a va_list is just a char*
   4553         ;; into the stack save area (cg-va-start/arg/end work over an
   4554         ;; 8-byte slot). Letting __builtin_va_list mean `char *` here
   4555         ;; lets a single header source — `typedef __builtin_va_list
   4556         ;; va_list;` — compile cleanly under both cc.scm and stock
   4557         ;; gcc/clang (where it's their native struct).
   4558         ((and (not b) (eq? (tok-kind t) 'IDENT)
   4559               (bv= (tok-value t) "__builtin_va_list"))
   4560          (advance ps)
   4561          (loop sto sn lg (%ctype 'ptr 8 8 %t-i8) #t))
   4562         ((and (not b) (eq? (tok-kind t) 'IDENT)
   4563               (let ((sm (scope-lookup ps (tok-value t))))
   4564                 (and sm (eq? (sym-kind sm) 'typedef))))
   4565          (let* ((tk (advance ps)) (sm (scope-lookup ps (tok-value tk))))
   4566            (loop sto sn lg (sym-type sm) #t)))
   4567         (else
   4568          (cond ((not saw) (die (tok-loc t) "expected decl-spec"
   4569                                (tok-value t)))
   4570                (else (values sto (resolve-base t sn lg b)))))))))
   4571 
   4572 (define (resolve-base loc sn lg b)
   4573   (cond
   4574     ((eq? b 'void)
   4575      (if (or sn (not (zero? lg))) (die loc "void+qual") %t-void))
   4576     ((eq? b 'bool)
   4577      (if (or sn (not (zero? lg))) (die loc "bool+qual") %t-bool))
   4578     ((eq? b 'char)
   4579      (cond ((eq? sn 'unsigned) %t-u8) (else %t-i8)))
   4580     ((or (eq? b 'int) (and (not b) (or sn (not (zero? lg)))))
   4581      (cond ((= lg -1) (if (eq? sn 'unsigned) %t-u16 %t-i16))
   4582            ((= lg 0)  (if (eq? sn 'unsigned) %t-u32 %t-i32))
   4583            (else      (if (eq? sn 'unsigned) %t-u64 %t-i64))))
   4584     ((eq? b 'float)
   4585      (if (or sn (not (zero? lg))) (die loc "float+qual") %t-flt))
   4586     ((eq? b 'double)
   4587      (cond (sn        (die loc "double+sign"))
   4588            ((= lg 0)  %t-dbl)
   4589            ((= lg 1)  %t-ldbl)
   4590            (else      (die loc "double+long*" lg))))
   4591     ((ctype? b)
   4592      (if (or sn (not (zero? lg))) (die loc "type+qual") b))
   4593     (else (die loc "unknown decl-spec"))))
   4594 
   4595 (define (parse-aggregate-spec ps kind)
   4596   (advance ps)
   4597   ;; GCC `__attribute__((...))` may sit between `struct/union` and
   4598   ;; the tag/`{`. Eat and discard.
   4599   (eat-gnu-attributes! ps)
   4600   (let ((tag (pmatch (peek ps)
   4601                (($ tok? (kind IDENT)) (tok-value (advance ps)))
   4602                (else #f))))
   4603     (eat-gnu-attributes! ps)
   4604     (cond
   4605       ((at-punct? ps 'lbrace)
   4606        (advance ps)
   4607        ;; A `struct/union TAG { ... }` declaration introduces (or
   4608        ;; completes) the tag in the *current* scope. Looking up in
   4609        ;; outer scopes via tag-lookup would let an inner-scope
   4610        ;; definition mutate an outer-scope same-tag ctype via
   4611        ;; complete-agg!. Restrict the reuse to the top frame, and
   4612        ;; only when the existing tag is still incomplete (size < 0);
   4613        ;; otherwise this is an attempted redefinition.
   4614        (let* ((ex (and tag (alist-ref tag (car (ps-tags ps)))))
   4615               (ct (cond ((and ex (eq? (ctype-kind ex) kind)
   4616                                (< (ctype-size ex) 0)) ex)
   4617                         ((and ex (eq? (ctype-kind ex) kind))
   4618                          (die (tok-loc (peek ps)) "agg redefinition" tag))
   4619                         (else (let ((c (%ctype kind -1 -1
   4620                                               (list (or tag #f) #f '()))))
   4621                                 (if tag (tag-bind! ps tag c)) c))))
   4622               (fields (parse-struct-fields ps kind)))
   4623          (expect-punct ps 'rbrace)
   4624          (complete-agg! ct kind tag fields) ct))
   4625       (tag (let ((ex (tag-lookup ps tag)))
   4626              (cond (ex ex)
   4627                    (else (let ((c (%ctype kind -1 -1
   4628                                          (list tag #f '()))))
   4629                            (tag-bind! ps tag c) c)))))
   4630       (else (die (tok-loc (peek ps)) "anon agg")))))
   4631 
   4632 (define (parse-struct-fields ps kind)
   4633   ;; For unions, every field stays at offset 0; complete-agg! takes the
   4634   ;; max of field sizes for the union's overall size.
   4635   (let ((struct? (eq? kind 'struct)))
   4636     (let loop ((acc '()) (off 0))
   4637       (cond
   4638         ((at-punct? ps 'rbrace) (reverse acc))
   4639         (else
   4640          (let-values (((_sto bty) (parse-decl-spec ps)))
   4641            (let dl ((acc2 acc) (o2 off))
   4642              (let*-values (((nm ty) (parse-declarator ps bty)))
   4643                (let* ((al (max (ctype-align ty) 1))
   4644                       (sz (ctype-size ty))
   4645                       (oa (if struct? (align-up o2 al) 0))
   4646                       (next (if struct? (+ oa (max sz 0)) 0)))
   4647                  (cond
   4648                    ((at-punct? ps 'comma)
   4649                     (advance ps)
   4650                     (dl (cons (list nm ty oa) acc2) next))
   4651                    ((at-punct? ps 'semi)
   4652                     (advance ps)
   4653                     (loop (cons (list nm ty oa) acc2) next))
   4654                    (else (die (tok-loc (peek ps)) "field"))))))))))))
   4655 
   4656 (define (complete-agg! ct k tag fs)
   4657   (let* ((ma (let m ((xs fs) (a 1))
   4658                (if (null? xs) a
   4659                    (m (cdr xs) (max a (ctype-align (cadr (car xs))))))))
   4660          (last (let l ((xs fs) (e 0))
   4661                  (if (null? xs) e
   4662                      (let* ((f (car xs)) (off (car (cddr f)))
   4663                             (sz (ctype-size (cadr f))))
   4664                        (l (cdr xs) (max e (+ off (max sz 0))))))))
   4665          (sz (cond ((eq? k 'union)
   4666                     (let u ((xs fs) (s 0))
   4667                       (if (null? xs) s
   4668                           (u (cdr xs)
   4669                              (max s (ctype-size (cadr (car xs))))))))
   4670                    (else (align-up last ma)))))
   4671     (ctype-size-set! ct sz)
   4672     (ctype-align-set! ct ma)
   4673     (ctype-ext-set! ct (list tag #t fs))
   4674     ;; Phase 3: if `ct` is a forward-declared struct/union that lived in
   4675     ;; main from a prior decl, its newly-set ext lives in scratch and
   4676     ;; would dangle on reset-scratch-heap!. Track it so promote-roots!
   4677     ;; can rewrite ext in main before the boundary fires. Scratch-resident
   4678     ;; ct (defined and completed in this decl) is promoted normally via
   4679     ;; the tag walker.
   4680     (set! %promote-pending-completions
   4681           (cons ct %promote-pending-completions))))
   4682 
   4683 (define (parse-enum-spec ps)
   4684   (advance ps)
   4685   (let ((tag (pmatch (peek ps)
   4686                (($ tok? (kind IDENT)) (tok-value (advance ps)))
   4687                (else #f))))
   4688     (cond
   4689       ((at-punct? ps 'lbrace)
   4690        (advance ps)
   4691        ;; Parse all members first, then construct the enum ctype with
   4692        ;; the final members list and tag-bind it. Members reference
   4693        ;; earlier enum-consts via scope-lookup (not via the enum tag),
   4694        ;; so deferring tag-bind! is safe.
   4695        (let loop ((vs '()) (nv 0))
   4696          (cond
   4697            ((at-punct? ps 'rbrace)
   4698             (advance ps)
   4699             (let ((ct (%ctype 'enum 4 4 (list tag (reverse vs)))))
   4700               (if tag (tag-bind! ps tag ct))
   4701               ct))
   4702            (else
   4703             (let* ((nt (advance ps)) (nm (tok-value nt))
   4704                    (val (cond ((at-punct? ps 'assign)
   4705                                (advance ps) (parse-const-int ps))
   4706                               (else nv))))
   4707               (scope-bind! ps nm
   4708                            (%sym nm 'enum-const #f %t-i32 val #t))
   4709               (cond ((at-punct? ps 'comma) (advance ps))
   4710                     ((at-punct? ps 'rbrace) #t)
   4711                     (else (die (tok-loc (peek ps)) "enum")))
   4712               (loop (cons (cons nm val) vs) (+ val 1)))))))
   4713       (tag (let ((e (tag-lookup ps tag)))
   4714              (cond (e e)
   4715                    (else (let ((c (%ctype 'enum 4 4 (list tag '()))))
   4716                            (tag-bind! ps tag c) c)))))
   4717       (else (die (tok-loc (peek ps)) "enum")))))
   4718 
   4719 ;; ====================================================================
   4720 ;; Integer constant expressions (C99 §6.6).
   4721 ;;
   4722 ;; parse-const-expr ps -> (value . ctype)
   4723 ;;   A self-contained walker that never touches cg. The four call sites
   4724 ;;   that demand an integer constant expression — array bounds, enum
   4725 ;;   initializers, case labels, file-scope/static initializers — all go
   4726 ;;   through here. Returns a (value . ctype) pair so a final cast can
   4727 ;;   truncate at the target type's width (e.g. `(int)(unsigned char)257`
   4728 ;;   needs the inner cast to mask off to u8 before the outer relabel).
   4729 ;;
   4730 ;; Operand surface: integer / character literals, enum constants,
   4731 ;; sizeof(TYPENAME), unary + - ~ !, binary + - * / % << >> & | ^,
   4732 ;; compare < <= > >= == !=, logical && || (short-circuit at the value
   4733 ;; layer; both sides are still parsed so the token stream advances),
   4734 ;; ternary ?:, cast to integer type, parenthesization. Anything else
   4735 ;; dies. Floats / function calls / address-of / non-const idents / VLAs
   4736 ;; are out of scope.
   4737 ;; ====================================================================
   4738 
   4739 ;; Truncate VALUE to the width and signedness of CT. Integer types only
   4740 ;; — pointer/array/etc. operands abort upstream.
   4741 (define (%const-trunc value ct)
   4742   (let* ((sz (ctype-size ct))
   4743          (k  (ctype-kind ct))
   4744          (mask (cond ((<= sz 0) 0)
   4745                      ((= sz 1) #xff)
   4746                      ((= sz 2) #xffff)
   4747                      ((= sz 4) #xffffffff)
   4748                      (else -1))))
   4749     (cond
   4750       ;; bool: 0 or 1.
   4751       ((eq? k 'bool) (if (= value 0) 0 1))
   4752       ;; 8-byte integers — value already fits in scheme's bignum.
   4753       ((or (eq? k 'i64) (eq? k 'u64))
   4754        (cond ((eq? k 'u64)
   4755               ;; Mask to 64 bits without losing sign on negative values.
   4756               (bit-and value #xffffffffffffffff))
   4757              (else value)))
   4758       ((%ctype-unsigned? ct) (bit-and value mask))
   4759       (else
   4760        ;; Signed: mask to width, then sign-extend if top bit is set.
   4761        (let* ((m (bit-and value mask))
   4762               (sign-bit (arithmetic-shift 1 (- (* sz 8) 1))))
   4763          (cond ((= 0 (bit-and m sign-bit)) m)
   4764                (else (- m (arithmetic-shift 1 (* sz 8))))))))))
   4765 
   4766 ;; Usual arithmetic conversions on (value . ctype) pairs. Both operands
   4767 ;; have already been integer-promoted (≤ int → int) by the caller.
   4768 ;; Returns three values: truncated a, truncated b, and the shared result
   4769 ;; ctype.
   4770 (define (%const-arith-conv ap bp)
   4771   (let* ((av (car ap)) (at (cdr ap))
   4772          (bv (car bp)) (bt (cdr bp))
   4773          (rt (%const-arith-conv-type at bt)))
   4774     (values (%const-trunc av rt) (%const-trunc bv rt) rt)))
   4775 
   4776 (define (%const-arith-conv-type at bt)
   4777   ;; Pick the wider type; tie-break on unsigned. Caller has already
   4778   ;; promoted both to >= int width.
   4779   (let ((sa (ctype-size at)) (sb (ctype-size bt)))
   4780     (cond
   4781       ((> sa sb) at)
   4782       ((> sb sa) bt)
   4783       ((%ctype-unsigned? at) at)
   4784       ((%ctype-unsigned? bt) bt)
   4785       (else at))))
   4786 
   4787 (define (%const-promote vp)
   4788   ;; Integer promotion (C11 §6.3.1.1): types narrower than int
   4789   ;; (i8/u8/i16/u16/bool) widen to (signed) int — every value of an
   4790   ;; unsigned sub-int type fits in int on this target, so the promotion
   4791   ;; rank picks signed int, not unsigned int. This matters for the
   4792   ;; usual arithmetic conversions in cross-signedness comparisons,
   4793   ;; e.g. ((unsigned char)-1 < (int)-1) must promote LHS to int 255
   4794   ;; (not u32 0xff) so the result is 0, not 1.
   4795   (let* ((v (car vp)) (ct (cdr vp))
   4796          (sz (ctype-size ct)))
   4797     (cond
   4798       ((< sz 4) (cons (%const-trunc v %t-i32) %t-i32))
   4799       (else vp))))
   4800 
   4801 (define (%const-bool? vp) (not (= 0 (car vp))))
   4802 
   4803 (define (parse-const-expr ps) (parse-const-cond ps))
   4804 
   4805 ;; Ternary (right-associative). Per C11 §6.6 ¶3 + §6.5.15/4 only the
   4806 ;; chosen branch is evaluated; the other need not be a valid constant
   4807 ;; expression (e.g. `1 ? 2 : 1/0` must yield 2, not abort). The dead
   4808 ;; arm is skipped via %const-skip-cond-{mid,rhs}, like the &&/||
   4809 ;; short-circuit paths above.
   4810 (define (parse-const-cond ps)
   4811   (let ((c (parse-const-lor ps)))
   4812     (cond
   4813       ((at-punct? ps 'qmark)
   4814        (advance ps)
   4815        (cond
   4816          ((%const-bool? c)
   4817           (let* ((t (parse-const-expr ps))
   4818                  (_ (expect-punct ps 'colon)))
   4819             (%const-skip-dead-arm ps)
   4820             t))
   4821          (else
   4822           (%const-skip-dead-arm ps)
   4823           (expect-punct ps 'colon)
   4824           (parse-const-cond ps))))
   4825       (else c))))
   4826 
   4827 ;; Generic top-level punct scanner used by skip-rhs / skip-cond helpers.
   4828 ;; Walks paren/bracket depth (a closing bracket at d=0 always stops) and
   4829 ;; optionally tracks ternary `?` depth. STOP? receives the punct value
   4830 ;; v at top-level (d=0, q=0 when q-aware?) and returns #t to stop. With
   4831 ;; Q-AWARE? = #t, a `?` at top-level opens a nested ternary and a
   4832 ;; matching `:` (q>0) closes it; the scanner stops on `:` only when q=0.
   4833 (define (%punct-scan ps stop? q-aware?)
   4834   (let lp ((d 0) (q 0))
   4835     (let ((t (peek ps)))
   4836       (cond
   4837         ((eq? (tok-kind t) 'EOF) #t)
   4838         ((not (eq? (tok-kind t) 'PUNCT))
   4839          (advance ps) (lp d q))
   4840         (else
   4841          (let ((v (tok-value t)))
   4842            (cond
   4843              ((or (eq? v 'lparen) (eq? v 'lbrack))
   4844               (advance ps) (lp (+ d 1) q))
   4845              ((or (eq? v 'rparen) (eq? v 'rbrack))
   4846               (cond ((zero? d) #t)
   4847                     (else (advance ps) (lp (- d 1) q))))
   4848              ((and q-aware? (zero? d) (eq? v 'qmark))
   4849               (advance ps) (lp d (+ q 1)))
   4850              ((and q-aware? (zero? d) (> q 0) (eq? v 'colon))
   4851               (advance ps) (lp d (- q 1)))
   4852              ((and (zero? d) (or (not q-aware?) (zero? q)) (stop? v))
   4853               #t)
   4854              (else (advance ps) (lp d q)))))))))
   4855 
   4856 ;; Skip the dead arm of a ternary. Same scanner whether we're skipping
   4857 ;; the middle (cond was false; will then expect-punct `:` and parse arm
   4858 ;; 3) or the third (cond was true; arm 2 already parsed and `:` already
   4859 ;; consumed). Both stop at top-level `:` / `,` / `;` / `}` with no
   4860 ;; open inner `?`; nested `?:` pairs are absorbed.
   4861 (define (%const-skip-dead-arm ps)
   4862   (%punct-scan ps
   4863     (lambda (v)
   4864       (or (eq? v 'colon) (eq? v 'comma) (eq? v 'semi) (eq? v 'rbrace)))
   4865     #t))
   4866 
   4867 ;; Generic left-associative binary level.
   4868 ;; ops: alist of punct-sym → (vp vp → vp).
   4869 (define (%const-binl ps next ops)
   4870   (let lp ((a (next ps)))
   4871     (let* ((t (peek ps))
   4872            (hit (and (eq? (tok-kind t) 'PUNCT)
   4873                      (alist-ref/eq (tok-value t) ops))))
   4874       (cond ((not hit) a)
   4875             (else (advance ps) (lp (hit a (next ps))))))))
   4876 
   4877 ;; Arithmetic combiner: promote both, arith-conv, apply fn, truncate.
   4878 (define (%const-arith-op fn a b)
   4879   (let-values (((av bv rt) (%const-arith-conv (%const-promote a) (%const-promote b))))
   4880     (cons (%const-trunc (fn av bv) rt) rt)))
   4881 
   4882 ;; Like %const-arith-op but rejects a zero divisor.
   4883 (define (%const-div-op fn a b)
   4884   (let-values (((av bv rt) (%const-arith-conv (%const-promote a) (%const-promote b))))
   4885     (cond ((= bv 0) (die #f "const-expr: divide by zero")))
   4886     (cons (%const-trunc (fn av bv) rt) rt)))
   4887 
   4888 ;; Comparison combiner: result is always (0-or-1 . %t-i32).
   4889 (define (%const-cmp-op fn a b)
   4890   (let-values (((av bv _rt) (%const-arith-conv (%const-promote a) (%const-promote b))))
   4891     (cons (if (fn av bv) 1 0) %t-i32)))
   4892 
   4893 ;; Short-circuit per C11 §6.5.13/14 ¶4: rhs is not evaluated when the
   4894 ;; lhs determines the result. Required so `1 || (1/0)` and
   4895 ;; `0 && (1/0)` yield 1/0 rather than aborting on divide-by-zero.
   4896 (define (parse-const-lor ps)
   4897   (let lp ((a (parse-const-land ps)))
   4898     (cond
   4899       ((at-punct? ps 'lor)
   4900        (advance ps)
   4901        (cond
   4902          ((%const-bool? a)
   4903           (%const-skip-lor-rhs ps)
   4904           (lp (cons 1 %t-i32)))
   4905          (else
   4906           (let ((b (parse-const-land ps)))
   4907             (lp (cons (if (%const-bool? b) 1 0) %t-i32))))))
   4908       (else a))))
   4909 
   4910 (define (parse-const-land ps)
   4911   (let lp ((a (parse-const-bor ps)))
   4912     (cond
   4913       ((at-punct? ps 'land)
   4914        (advance ps)
   4915        (cond
   4916          ((not (%const-bool? a))
   4917           (%const-skip-land-rhs ps)
   4918           (lp (cons 0 %t-i32)))
   4919          (else
   4920           (let ((b (parse-const-bor ps)))
   4921             (lp (cons (if (%const-bool? b) 1 0) %t-i32))))))
   4922       (else a))))
   4923 
   4924 ;; Skip the rhs of a short-circuited && / ||. The rhs grammar is
   4925 ;; the operand level of the operator: parse-const-bor for &&,
   4926 ;; parse-const-land for ||. We can't just call those parsers because
   4927 ;; the rhs may itself be invalid as a constant expression (e.g.
   4928 ;; `1/0`); instead, scan tokens at paren/brack depth 0 until we hit
   4929 ;; another operator at the same-or-lower binding level, comma,
   4930 ;; semicolon, colon, qmark, rbrace, rbrack, rparen, or EOF.
   4931 (define (%const-skip-land-rhs ps)
   4932   ;; rhs of && is a parse-const-bor — stop on `&&`, `||`, `?`, `:`,
   4933   ;; `,`, `;`, `}`, and any closing/separator at depth 0.
   4934   (%punct-scan ps
   4935     (lambda (v)
   4936       (or (eq? v 'land) (eq? v 'lor) (eq? v 'qmark) (eq? v 'colon)
   4937           (eq? v 'comma) (eq? v 'semi) (eq? v 'rbrace)))
   4938     #f))
   4939 (define (%const-skip-lor-rhs ps)
   4940   ;; rhs of || is a parse-const-land — stop on `||` (left-assoc),
   4941   ;; `?`, `:`, `,`, `;`, `}`. `&&` binds TIGHTER than `||`, so it is
   4942   ;; absorbed into the rhs and we do NOT stop on it.
   4943   (%punct-scan ps
   4944     (lambda (v)
   4945       (or (eq? v 'lor) (eq? v 'qmark) (eq? v 'colon)
   4946           (eq? v 'comma) (eq? v 'semi) (eq? v 'rbrace)))
   4947     #f))
   4948 
   4949 (define (parse-const-bor ps)
   4950   (%const-binl ps parse-const-bxor (list (cons 'bar   (lambda (a b) (%const-arith-op bit-or  a b))))))
   4951 (define (parse-const-bxor ps)
   4952   (%const-binl ps parse-const-band (list (cons 'caret (lambda (a b) (%const-arith-op bit-xor a b))))))
   4953 (define (parse-const-band ps)
   4954   (%const-binl ps parse-const-eq   (list (cons 'amp   (lambda (a b) (%const-arith-op bit-and a b))))))
   4955 
   4956 (define (parse-const-eq ps)
   4957   (%const-binl ps parse-const-rel
   4958     (list (cons 'eq2 (lambda (a b) (%const-cmp-op =                         a b)))
   4959           (cons 'ne  (lambda (a b) (%const-cmp-op (lambda (x y) (not (= x y))) a b))))))
   4960 
   4961 (define (parse-const-rel ps)
   4962   (%const-binl ps parse-const-shift
   4963     (list (cons 'lt (lambda (a b) (%const-cmp-op <  a b)))
   4964           (cons 'le (lambda (a b) (%const-cmp-op <= a b)))
   4965           (cons 'gt (lambda (a b) (%const-cmp-op >  a b)))
   4966           (cons 'ge (lambda (a b) (%const-cmp-op >= a b))))))
   4967 
   4968 ;; Shift combiner: result type is the (promoted) lhs type — rhs is
   4969 ;; just a count, promoted independently. SIGN selects shl (+1) or shr (-1).
   4970 (define (%const-shift-op sign a b)
   4971   (let* ((ap (%const-promote a))
   4972          (bp (%const-promote b))
   4973          (rt (cdr ap)))
   4974     (cons (%const-trunc (arithmetic-shift (car ap) (* sign (car bp))) rt)
   4975           rt)))
   4976 
   4977 (define (parse-const-shift ps)
   4978   (%const-binl ps parse-const-add
   4979     (list (cons 'shl (lambda (a b) (%const-shift-op  1 a b)))
   4980           (cons 'shr (lambda (a b) (%const-shift-op -1 a b))))))
   4981 
   4982 (define (parse-const-add ps)
   4983   (%const-binl ps parse-const-mul
   4984     (list (cons 'plus  (lambda (a b) (%const-arith-op + a b)))
   4985           (cons 'minus (lambda (a b) (%const-arith-op - a b))))))
   4986 
   4987 (define (parse-const-mul ps)
   4988   (%const-binl ps parse-const-cast
   4989     (list (cons 'star  (lambda (a b) (%const-arith-op *         a b)))
   4990           (cons 'slash (lambda (a b) (%const-div-op   quotient  a b)))
   4991           (cons 'pct   (lambda (a b) (%const-div-op   remainder a b))))))
   4992 
   4993 (define (parse-const-cast ps)
   4994   ;; (typename) operand — distinguished from ( expr ) by paren-is-group?.
   4995   ;; Pointer casts are accepted only as a type re-tag — the integer
   4996   ;; offset rides through unchanged. This is what the offsetof idiom
   4997   ;; `(T *)0` and the outer `(size_t) <ptr-const>` need; we do not
   4998   ;; admit general pointer arithmetic in const-expr.
   4999   (cond
   5000     ((at-punct? ps 'lparen)
   5001      (cond
   5002        ((%const-paren-is-cast? ps)
   5003         (advance ps)
   5004         (let*-values (((_sto bty) (parse-decl-spec ps))
   5005                       ((_n   ty)  (parse-declarator ps bty)))
   5006           (expect-punct ps 'rparen)
   5007           (cond
   5008             ((%ctype-int? ty)
   5009              (let ((v (parse-const-cast ps)))
   5010                (cons (%const-trunc (car v) ty) ty)))
   5011             ((eq? (ctype-kind ty) 'ptr)
   5012              (let ((v (parse-const-cast ps)))
   5013                (cons (car v) ty)))
   5014             (else
   5015              (die (tok-loc (peek ps))
   5016                   "const-expr: cast must be integer or pointer"
   5017                   (ctype-kind ty))))))
   5018        (else (parse-const-unary ps))))
   5019     (else (parse-const-unary ps))))
   5020 
   5021 (define (%const-paren-is-cast? ps)
   5022   ;; A '(' starts a cast iff the following token kicks off a type-name.
   5023   (%tok-decl-start? ps (peek2 ps)))
   5024 
   5025 (define (%ctype-int? ty)
   5026   (let ((k (ctype-kind ty)))
   5027     (or (eq? k 'i8) (eq? k 'u8) (eq? k 'i16) (eq? k 'u16)
   5028         (eq? k 'i32) (eq? k 'u32) (eq? k 'i64) (eq? k 'u64)
   5029         (eq? k 'bool) (eq? k 'enum))))
   5030 
   5031 (define (parse-const-unary ps)
   5032   (let ((t (peek ps)))
   5033     (pmatch t
   5034       (($ tok? (kind PUNCT) (value plus))
   5035        (advance ps) (%const-promote (parse-const-cast ps)))
   5036       (($ tok? (kind PUNCT) (value minus))
   5037        (advance ps)
   5038        (let* ((vp (%const-promote (parse-const-cast ps)))
   5039               (rt (cdr vp)))
   5040          (cons (%const-trunc (- 0 (car vp)) rt) rt)))
   5041       (($ tok? (kind PUNCT) (value tilde))
   5042        (advance ps)
   5043        (let* ((vp (%const-promote (parse-const-cast ps)))
   5044               (rt (cdr vp)))
   5045          (cons (%const-trunc (bit-not (car vp)) rt) rt)))
   5046       (($ tok? (kind PUNCT) (value bang))
   5047        (advance ps)
   5048        (let ((vp (parse-const-cast ps)))
   5049          (cons (if (%const-bool? vp) 0 1) %t-i32)))
   5050       (($ tok? (kind PUNCT) (value amp))
   5051        ;; Address-of in const-expr context. Restricted to the offsetof
   5052        ;; idiom: a null-pointer-typed base reached via (T *)0 (with
   5053        ;; optional grouping/deref) followed by ->/. field selectors.
   5054        ;; The integer value is the running byte offset; '&' wraps the
   5055        ;; designator's type in a pointer for any outer integer cast to
   5056        ;; consume.
   5057        (advance ps)
   5058        (let* ((dp (%const-parse-addrof-postfix ps)))
   5059          (cons (car dp) (%mk-ptr (cdr dp)))))
   5060       (($ tok? (kind KW) (value sizeof))
   5061        (advance ps)
   5062        (cond
   5063          ((at-punct? ps 'lparen)
   5064           (advance ps)
   5065           (cond
   5066             ((%const-tok-is-decl? ps)
   5067              (let*-values (((_sto bty) (parse-decl-spec ps))
   5068                            ((_n   ty)  (parse-declarator ps bty)))
   5069                (expect-punct ps 'rparen)
   5070                (cons (max (ctype-size ty) 0) %t-u64)))
   5071             (else
   5072              ;; sizeof(EXPR) in const-expr context. Operand is not
   5073              ;; evaluated (C11 §6.5.3.4) — snapshot the cg, parse the
   5074              ;; expr through the regular parser to recover its ctype,
   5075              ;; then rewind to discard any emission/vstack pushes.
   5076              (cons (%const-sizeof-expr ps #t) %t-u64))))
   5077          (else
   5078           ;; `sizeof EXPR` (no parens). Same no-eval rule.
   5079           (cons (%const-sizeof-expr ps #f) %t-u64))))
   5080       (else (parse-const-primary ps)))))
   5081 
   5082 ;; Does TOK begin a type-name? Type specifiers, qualifiers,
   5083 ;; struct/union/enum tags, and typedef-name idents. Storage classes
   5084 ;; (auto/register/static/extern/typedef) are NOT included — those
   5085 ;; appear only at declaration position; callers that need them
   5086 ;; (e.g. stmt-starts-decl?) check separately.
   5087 (define (%tok-decl-start? ps t)
   5088   (pmatch t
   5089     (($ tok? (kind KW) (value ,v))
   5090      (or (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int)
   5091          (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned)
   5092          (eq? v '_Bool) (eq? v 'float) (eq? v 'double)
   5093          (eq? v '_Complex) (eq? v '_Imaginary)
   5094          (eq? v 'struct) (eq? v 'union) (eq? v 'enum)
   5095          (eq? v 'const) (eq? v 'volatile) (eq? v 'restrict)
   5096          (eq? v 'inline)))
   5097     (($ tok? (kind IDENT) (value ,n)) (typedef? ps n))
   5098     (else #f)))
   5099 
   5100 (define (%const-tok-is-decl? ps) (%tok-decl-start? ps (peek ps)))
   5101 
   5102 (define (parse-const-primary ps)
   5103   (let ((t (peek ps)))
   5104     (pmatch t
   5105       (($ tok? (kind INT) (value ,v))
   5106        (advance ps)
   5107        ;; Untyped INT literals ride as i32. Suffixes (L, LL, U) aren't
   5108        ;; preserved through to the parser, but const-expr operands at
   5109        ;; the granularity 118 cares about all fit in i32.
   5110        (cons v %t-i32))
   5111       (($ tok? (kind CHAR) (value ,v))
   5112        (advance ps)
   5113        ;; Character constants have type int in C.
   5114        (cons v %t-i32))
   5115       (($ tok? (kind PUNCT) (value lparen))
   5116        (advance ps)
   5117        (let ((v (parse-const-expr ps)))
   5118          (expect-punct ps 'rparen) v))
   5119       (($ tok? (kind IDENT) (value ,n))
   5120        (let ((sm (scope-lookup ps n)))
   5121          (cond ((and sm (eq? (sym-kind sm) 'enum-const))
   5122                 (advance ps) (cons (sym-slot sm) %t-i32))
   5123                (else (die (tok-loc t) "const-expr: not a constant" n)))))
   5124       (else (die (tok-loc t) "const-expr: bad operand"
   5125                  (tok-value t))))))
   5126 
   5127 ;; ====================================================================
   5128 ;; offsetof support inside const-expr.
   5129 ;;
   5130 ;; Recognises `&((T *)0)->FIELD`, `&(*(T *)0).FIELD`, and chains thereof
   5131 ;; — the only address-of idioms that show up in static initializers
   5132 ;; (tcc.c options_W[] / options_f[] / options_m[] tables, and any
   5133 ;; offsetof macro expansion of the same shape). Each helper threads a
   5134 ;; (offset . ctype) pair: integer byte offset of the running designator
   5135 ;; from the null base, plus the lvalue's ctype. Field lookup reuses
   5136 ;; %cg-find-field, so anonymous union/struct members work the same way
   5137 ;; as in regular field access.
   5138 ;; ====================================================================
   5139 
   5140 (define (%const-parse-addrof-postfix ps)
   5141   ;; postfix: primary ( -> FIELD | . FIELD )*
   5142   (let lp ((p (%const-parse-addrof-primary ps)))
   5143     (pmatch (peek ps)
   5144       (($ tok? (kind PUNCT) (value arrow))
   5145        (advance ps) (lp (%const-addrof-arrow ps p)))
   5146       (($ tok? (kind PUNCT) (value dot))
   5147        (advance ps) (lp (%const-addrof-dot ps p)))
   5148       (else p))))
   5149 
   5150 (define (%const-parse-addrof-primary ps)
   5151   ;; primary: ( T )expr   ; pointer cast — the offsetof base
   5152   ;;        | ( postfix ) ; grouping
   5153   ;;        | * primary   ; deref
   5154   (cond
   5155     ((at-punct? ps 'lparen)
   5156      (cond
   5157        ((%const-paren-is-cast? ps)
   5158         (let ((cv (parse-const-cast ps)))
   5159           (cond
   5160             ((not (eq? (ctype-kind (cdr cv)) 'ptr))
   5161              (die #f "const-expr: addr-of: head must be a pointer cast"
   5162                   (ctype-kind (cdr cv)))))
   5163           cv))
   5164        (else
   5165         (advance ps)
   5166         (let ((r (%const-parse-addrof-postfix ps)))
   5167           (expect-punct ps 'rparen) r))))
   5168     ((at-punct? ps 'star)
   5169      (advance ps)
   5170      (let ((h (%const-parse-addrof-primary ps)))
   5171        (cond
   5172          ((not (eq? (ctype-kind (cdr h)) 'ptr))
   5173           (die #f "const-expr: addr-of: '*' on non-pointer"
   5174                (ctype-kind (cdr h)))))
   5175        (cons (car h) (ctype-ext (cdr h)))))
   5176     (else
   5177      (die (tok-loc (peek ps)) "const-expr: addr-of: unexpected token"
   5178           (tok-value (peek ps))))))
   5179 
   5180 (define (%const-addrof-arrow ps p)
   5181   (let* ((off (car p)) (ty (cdr p)))
   5182     (cond ((not (eq? (ctype-kind ty) 'ptr))
   5183            (die (tok-loc (peek ps)) "const-expr: -> on non-pointer"
   5184                 (ctype-kind ty))))
   5185     (let* ((sty (ctype-ext ty)) (sk (ctype-kind sty)))
   5186       (cond ((not (or (eq? sk 'struct) (eq? sk 'union)))
   5187              (die (tok-loc (peek ps))
   5188                   "const-expr: -> target not aggregate" sk)))
   5189       (%const-addrof-field ps sty off))))
   5190 
   5191 (define (%const-addrof-dot ps p)
   5192   (let* ((off (car p)) (ty (cdr p)) (k (ctype-kind ty)))
   5193     (cond ((not (or (eq? k 'struct) (eq? k 'union)))
   5194            (die (tok-loc (peek ps))
   5195                 "const-expr: . on non-aggregate" k)))
   5196     (%const-addrof-field ps ty off)))
   5197 
   5198 (define (%const-addrof-field ps sty base-off)
   5199   (let ((nt (peek ps)))
   5200     (cond ((not (eq? (tok-kind nt) 'IDENT))
   5201            (die (tok-loc nt)
   5202                 "const-expr: field selector needs an identifier"
   5203                 (tok-value nt))))
   5204     (advance ps)
   5205     (let* ((fields (car (cddr (ctype-ext sty))))
   5206            (f (%cg-find-field fields (tok-value nt))))
   5207       (cond ((not f) (die (tok-loc nt)
   5208                           "const-expr: no such field"
   5209                           (tok-value nt))))
   5210       (cons (+ base-off (car (cddr f))) (cadr f)))))
   5211 
   5212 ;; sizeof EXPR / sizeof(EXPR) in const-expr context. Delegates to the
   5213 ;; regular expression parser under a cg snapshot/rewind — same contract
   5214 ;; as parse-unary's sizeof: the operand is parsed to learn its type but
   5215 ;; not evaluated, so any emission or vstack push from the parse is
   5216 ;; discarded. Returns the operand's byte size as a non-negative int.
   5217 ;; If `paren?`, consumes the closing `)` after parsing.
   5218 (define (%const-sizeof-expr ps paren?)
   5219   (cond
   5220     ((not (ps-cg ps))
   5221      (die #f "#if: sizeof of expression not valid in preprocessor context"))
   5222     (else
   5223      (let ((tag (cg-snapshot (ps-cg ps))))
   5224        (cond (paren? (parse-expr ps) (expect-punct ps 'rparen))
   5225              (else  (parse-unary ps)))
   5226        (let* ((tp (cg-top (ps-cg ps)))
   5227               (sz (max (ctype-size (opnd-type tp)) 0)))
   5228          (cg-rewind (ps-cg ps) tag)
   5229          sz)))))
   5230 
   5231 ;; Convenience: returns the integer value alone (callers that don't
   5232 ;; need the type half of parse-const-expr's (value . ctype) result).
   5233 (define (parse-const-int ps) (car (parse-const-expr ps)))
   5234 
   5235 (define (parse-declarator ps base)
   5236   ;; Returns (values name type).
   5237   ((cdr (parse-decl-cont ps)) base
   5238    (lambda (n t) (values n t))))
   5239 
   5240 (define (parse-decl-cont ps)
   5241   (pmatch (peek ps)
   5242     (($ tok? (kind KW) (value __attribute__))
   5243      (skip-gnu-attribute! ps) (parse-decl-cont ps))
   5244     (($ tok? (kind PUNCT) (value star))
   5245      (advance ps) (eat-cv-quals! ps)
   5246      (let* ((r (parse-decl-cont ps)) (rf (cdr r)))
   5247        (cons (car r) (lambda (b k) (rf (%mk-ptr b) k)))))
   5248     (($ tok? (kind PUNCT) (value lparen))
   5249      (guard (paren-is-group? ps))
   5250      (advance ps)
   5251      (let* ((i (parse-decl-cont ps)) (if- (cdr i)))
   5252        (expect-punct ps 'rparen)
   5253        (let ((s (parse-decl-suf-cont ps)))
   5254          (cons (car i) (lambda (b k) (if- (s b) k))))))
   5255     (($ tok? (kind IDENT) (value ,n))
   5256      (advance ps)
   5257      (let ((s (parse-decl-suf-cont ps)))
   5258        (cons n (lambda (b k) (k n (s b))))))
   5259     (else
   5260      (let ((s (parse-decl-suf-cont ps)))
   5261        (cons #f (lambda (b k) (k #f (s b))))))))
   5262 
   5263 (define (parse-decl-suf-cont ps)
   5264   ;; C declarator suffixes apply RIGHT-TO-LEFT (innermost first):
   5265   ;;   int a[2][3]  ⇒  arr (arr int 3) 2     (outer dim 2)
   5266   ;; not arr (arr int 2) 3 (which would treat the leftmost suffix as
   5267   ;; outermost). The recursive structure builds the inner suffix's
   5268   ;; result first, then this level wraps.
   5269   (pmatch (peek ps)
   5270     (($ tok? (kind PUNCT) (value lbrack))
   5271      (advance ps)
   5272      ;; C99 §6.7.5.2 allows `static`, type qualifiers (const /
   5273      ;; volatile / restrict), and `*` (variable length array
   5274      ;; placeholder) inside array-of-T brackets in function
   5275      ;; parameter declarators. We don't honour the qualifier
   5276      ;; semantics — just consume them so the dimension expression
   5277      ;; that follows parses.
   5278      (let lp ()
   5279        (cond
   5280          ((or (at-kw? ps 'const) (at-kw? ps 'volatile)
   5281               (at-kw? ps 'restrict) (at-kw? ps 'static))
   5282           (advance ps) (lp))
   5283          (else #t)))
   5284      (let* ((ln (cond ((at-punct? ps 'rbrack) -1)
   5285                       ((at-punct? ps 'star) (advance ps) -1)
   5286                       (else (parse-const-int ps))))
   5287             (_ (expect-punct ps 'rbrack))
   5288             (r (parse-decl-suf-cont ps)))
   5289        (lambda (b) (%mk-arr (r b) ln))))
   5290     (($ tok? (kind PUNCT) (value lparen))
   5291      (advance ps)
   5292      (let-values (((p v) (parse-fn-params ps)))
   5293        (expect-punct ps 'rparen)
   5294        (let ((r (parse-decl-suf-cont ps)))
   5295          (lambda (b) (%mk-fn (r b) p v)))))
   5296     (($ tok? (kind KW) (value __attribute__))
   5297      (skip-gnu-attribute! ps) (parse-decl-suf-cont ps))
   5298     (else (lambda (b) b))))
   5299 
   5300 (define (paren-is-group? ps)
   5301   (pmatch (peek2 ps)
   5302     (($ tok? (kind KW) (value ,v))
   5303      (cond ((or (eq? v 'void) (eq? v 'char) (eq? v 'short)
   5304                 (eq? v 'int) (eq? v 'long) (eq? v 'signed)
   5305                 (eq? v 'unsigned) (eq? v '_Bool)
   5306                 (eq? v 'float) (eq? v 'double)
   5307                 (eq? v '_Complex) (eq? v '_Imaginary)
   5308                 (eq? v 'struct) (eq? v 'union) (eq? v 'enum)
   5309                 (eq? v 'const) (eq? v 'volatile)
   5310                 (eq? v 'restrict) (eq? v 'static)
   5311                 (eq? v 'extern) (eq? v 'register)) #f)
   5312            (else #t)))
   5313     (($ tok? (kind IDENT) (value ,n))
   5314      (cond ((typedef? ps n) #f) (else #t)))
   5315     (($ tok? (kind PUNCT) (value rparen)) #f)
   5316     (($ tok? (kind PUNCT) (value star))   #t)
   5317     (($ tok? (kind PUNCT) (value lparen)) #t)
   5318     (($ tok? (kind PUNCT) (value lbrack)) #t)
   5319     (else #f)))
   5320 
   5321 (define (parse-fn-params ps)
   5322   ;; Returns (values params variadic?).
   5323   (cond
   5324     ((at-punct? ps 'rparen) (values '() #f))
   5325     ((and (at-kw? ps 'void)
   5326           (eq? (tok-kind (peek2 ps)) 'PUNCT)
   5327           (eq? (tok-value (peek2 ps)) 'rparen))
   5328      (advance ps) (values '() #f))
   5329     (else
   5330      (let loop ((acc '()))
   5331        (cond
   5332          ((at-punct? ps 'ellipsis)
   5333           (advance ps) (values (reverse acc) #t))
   5334          (else
   5335           (let*-values (((_sto bty) (parse-decl-spec ps))
   5336                         ((nm   ty)  (parse-declarator ps bty)))
   5337             (let ((ty2 (cond ((ctype-is-arr? ty)
   5338                               (%mk-ptr (car (ctype-ext ty))))
   5339                              ((ctype-is-fn? ty) (%mk-ptr ty))
   5340                              (else ty))))
   5341               (cond
   5342                 ((at-punct? ps 'comma)
   5343                  (advance ps) (loop (cons (cons nm ty2) acc)))
   5344                 ((at-punct? ps 'rparen)
   5345                  (values (reverse (cons (cons nm ty2) acc)) #f))
   5346                 (else (die (tok-loc (peek ps)) "param")))))))))))
   5347 
   5348 ;; ====================================================================
   5349 ;; Phase 3: parse-decl-or-fn boundary — scratch by default, promote
   5350 ;; surviving roots into main and reset scratch at each top-level decl.
   5351 ;;
   5352 ;; Promotion uses the prelude's generic deep-copy via
   5353 ;; call-with-scratch-cycle; the per-decl identity-preserving map is
   5354 ;; folded into a single deep-copy context shared across all roots.
   5355 ;;
   5356 ;; Per-decl mutable state retained on the cc side:
   5357 ;;   %promote-pending-completions: list of struct/union ctypes that
   5358 ;;     complete-agg! mutated during the current decl. These are
   5359 ;;     pre-existing main-heap records whose ext now points at scratch;
   5360 ;;     ext is rewritten via deep-copy before the scratch reset.
   5361 ;; ====================================================================
   5362 
   5363 (define %promote-pending-completions '())
   5364 
   5365 (define (rewrite-pending-completions! ctx)
   5366   (for-each
   5367     (lambda (c)
   5368       (cond ((heap-in-main? c)
   5369              (ctype-ext-set! c (deep-copy ctx (ctype-ext c))))))
   5370     %promote-pending-completions)
   5371   (set! %promote-pending-completions '()))
   5372 
   5373 ;; Deep-copy each top-level world alist into main. The world-tags /
   5374 ;; world-scope stacks contain only the file-scope frame at decl
   5375 ;; boundaries (nested frames popped by scope-leave!); rebuild that head
   5376 ;; frame and preserve the (always-empty) tail. world-str-pool is a flat
   5377 ;; alist. deep-copy short-circuits prior-decl entries via heap-in-current?,
   5378 ;; so this is linear in the new entries only.
   5379 (define (promote-roots! w ctx)
   5380   (rewrite-pending-completions! ctx)
   5381   (let ((tn (world-tags w)))
   5382     (world-tags-set! w (cons (deep-copy ctx (car tn)) (cdr tn))))
   5383   (let ((sn (world-scope w)))
   5384     (world-scope-set! w (cons (deep-copy ctx (car sn)) (cdr sn))))
   5385   (world-str-pool-set!   w (deep-copy ctx (world-str-pool   w)))
   5386   (world-tentatives-set! w (deep-copy ctx (world-tentatives w))))
   5387 
   5388 ;; Iter-buffer carryover. The pp-iter / lex-iter records themselves
   5389 ;; live in main from cc-init; only their tok-iter-buf slots and the
   5390 ;; pp-state's pending / out / cur-file / macros slots can hold
   5391 ;; scratch-allocated content. Rewrite each in place via deep-copy.
   5392 (define (promote-iter-buffers! pp-it ctx)
   5393   (let* ((st     (tok-iter-state pp-it))
   5394          (lex-it (pps-lex-iter st)))
   5395     (tok-iter-buf-set! pp-it (deep-copy ctx (tok-iter-buf pp-it)))
   5396     (cond (lex-it
   5397            (tok-iter-buf-set! lex-it
   5398              (deep-copy ctx (tok-iter-buf lex-it)))))
   5399     (pps-up-pending-set! st (deep-copy ctx (pps-up-pending st)))
   5400     (pps-out-buf-set!    st (deep-copy ctx (pps-out-buf    st)))
   5401     (pps-cur-file-set!   st (deep-copy ctx (pps-cur-file   st)))
   5402     (pps-cond-stack-set! st (deep-copy ctx (pps-cond-stack st)))
   5403     (pps-macros-set!     st (deep-copy ctx (pps-macros     st)))))
   5404 
   5405 (define (parse-translation-unit ps)
   5406   (let loop ()
   5407     (let ((at-eof? #f))
   5408       (call-with-scratch-cycle
   5409         (lambda ()
   5410           (cond
   5411             ((eq? (tok-kind (peek ps)) 'EOF) (set! at-eof? #t))
   5412             (else
   5413              (cond
   5414                ((debug-log?)
   5415                 (let ((loc (tok-loc (peek ps))))
   5416                   (debug-log "decl" "line" (loc-line loc)
   5417                              "heap" (heap-usage)))))
   5418              (parse-decl-or-fn ps))))
   5419         (lambda ()
   5420           (cond
   5421             ((not at-eof?)
   5422              (let ((ctx (make-deep-copy-context)))
   5423                (promote-roots! (ps-world ps) ctx)
   5424                (promote-iter-buffers! (ps-iter ps) ctx))
   5425              ;; cg-fn-meta may hold scratch alist conses left over from
   5426              ;; the just-finished function; cg-fn-begin would reset it,
   5427              ;; but a trailing fn means it'd dangle past the reset.
   5428              (cg-fn-meta-set! (ps-cg ps) '())))))
   5429       (cond
   5430         (at-eof? #t)
   5431         (else (loop))))))
   5432 
   5433 (define (parse-decl-or-fn ps)
   5434   (let-values (((sto b) (parse-decl-spec ps)))
   5435     (cond
   5436       ((at-punct? ps 'semi) (advance ps) 'decl)
   5437       (else
   5438        (let-values (((n t) (parse-declarator ps b)))
   5439          (cond
   5440            ((and (ctype-is-fn? t) (at-punct? ps 'lbrace))
   5441             (parse-fn-body ps sto n t) 'fn)
   5442            (else
   5443             (handle-decl ps sto n t)
   5444             (let lp ()
   5445               (cond
   5446                 ((at-punct? ps 'comma)
   5447                  (advance ps)
   5448                  (let-values (((n2 t2) (parse-declarator ps b)))
   5449                    (handle-decl ps sto n2 t2) (lp)))
   5450                 (else (expect-punct ps 'semi) 'decl))))))))))
   5451 
   5452 ;; ---- Block-scope inferred-length array length resolution -------------
   5453 ;; The token iterator buffers lookahead in a list (see tok-iter); we
   5454 ;; can pull arbitrarily many tokens, then push them all back via
   5455 ;; iter-unget!. We use that to peek the initializer that follows `=`
   5456 ;; (without consuming it) and count its elements so cg-alloc-slot can
   5457 ;; reserve the right number of bytes BEFORE the initializer-emission
   5458 ;; loop runs (and starts spilling intermediate values into newly-
   5459 ;; allocated frame slots).
   5460 ;;
   5461 ;; Only the OUTERMOST length is inferred per C99 6.7.8/22, so for
   5462 ;; `int x[][3] = {{1,2,3},{4,5,6}};` we just count top-level
   5463 ;; brace-or-comma groups; the inner brace groups don't matter.
   5464 
   5465 (define (%peek-inferred-arr-init? ps)
   5466   ;; Check whether the next-after-`=` token starts a brace-init or a
   5467   ;; string-literal — the only initializer shapes that can resolve a
   5468   ;; block-scope inferred-length array. We do NOT consume `=`; we
   5469   ;; peek2 instead.
   5470   (let ((t2 (peek2 ps)))
   5471     (or (and (eq? (tok-kind t2) 'PUNCT) (eq? (tok-value t2) 'lbrace))
   5472         (eq? (tok-kind t2) 'STR))))
   5473 
   5474 (define (%resolve-inferred-arr-len ps ty)
   5475   ;; Returns a fresh array ctype with the resolved length. Does NOT
   5476   ;; consume the `=` or any of the initializer tokens — every token
   5477   ;; pulled is unget back in original order.
   5478   (let* ((eq-tok (iter-next (ps-iter ps)))    ; consume `=` (will unget)
   5479          (first  (iter-next (ps-iter ps)))     ; consume `{` or STR
   5480          (collected (list first eq-tok))       ; head order: revs at end
   5481          (count
   5482           (cond
   5483             ((eq? (tok-kind first) 'STR)
   5484              ;; String length + NUL.
   5485              (+ (bytevector-length (tok-value first)) 1))
   5486             (else
   5487              ;; first is `{`. Count top-level commas + 1, ignoring a
   5488              ;; trailing comma before `}`. Track brace depth so nested
   5489              ;; `{` for sub-aggregates are skipped.
   5490              (let lp ((depth 1) (n 0) (saw-elem? #f) (last-was-comma? #f)
   5491                       (acc collected))
   5492                (let ((t (iter-next (ps-iter ps))))
   5493                  (let ((acc2 (cons t acc)))
   5494                    (cond
   5495                      ((eq? (tok-kind t) 'EOF)
   5496                       ;; Bail; let the real parser report the error
   5497                       ;; after we restore tokens.
   5498                       (%inferred-arr-restore! ps acc2)
   5499                       (die #f "init: unterminated brace"))
   5500                      ((and (eq? (tok-kind t) 'PUNCT)
   5501                            (eq? (tok-value t) 'lbrace))
   5502                       (lp (+ depth 1) n #t #f acc2))
   5503                      ((and (eq? (tok-kind t) 'PUNCT)
   5504                            (eq? (tok-value t) 'rbrace))
   5505                       (cond
   5506                         ((= depth 1)
   5507                          ;; Done. Restore tokens (acc2 includes the
   5508                          ;; closing `}`).
   5509                          (%inferred-arr-restore! ps acc2)
   5510                          (cond ((not saw-elem?) 0)
   5511                                (last-was-comma? n)
   5512                                (else (+ n 1))))
   5513                         (else (lp (- depth 1) n saw-elem? #f acc2))))
   5514                      ((and (eq? (tok-kind t) 'PUNCT)
   5515                            (eq? (tok-value t) 'comma)
   5516                            (= depth 1))
   5517                       (lp depth (+ n 1) saw-elem? #t acc2))
   5518                      (else
   5519                       (lp depth n #t #f acc2)))))))))
   5520          )
   5521     (cond
   5522       ((eq? (tok-kind first) 'STR)
   5523        (%inferred-arr-restore! ps collected)))
   5524     (%init-fixed-arr-type ty count)))
   5525 
   5526 (define (%inferred-arr-restore! ps acc)
   5527   ;; acc is a stack of tokens in REVERSE consume order (most-recent
   5528   ;; first). iter-unget! prepends one at a time, so iterating acc in
   5529   ;; its current order pushes them back in the right sequence —
   5530   ;; i.e. the oldest-consumed token ends up at the front of the
   5531   ;; lookahead buffer.
   5532   (let lp ((xs acc))
   5533     (cond
   5534       ((null? xs) #t)
   5535       (else (iter-unget! (ps-iter ps) (car xs)) (lp (cdr xs))))))
   5536 
   5537 (define (handle-decl ps sto n ty)
   5538   (cond
   5539     ((not n) (die #f "no name"))
   5540     ((eq? sto 'typedef)
   5541      (scope-bind! ps n (%sym n 'typedef #f ty #f #t)))
   5542     ((ctype-is-fn? ty)
   5543      (scope-bind! ps n
   5544                   (%sym n 'fn (or sto 'extern) ty #f #f)))
   5545     ;; §I: block-scope `static` routes to a global with a name mangled
   5546     ;; on the enclosing function so two functions can each have their
   5547     ;; own `static int n;` without colliding. The sym's NAME holds the
   5548     ;; mangled form (cg-push-sym / cg-emit-global both prefix "cc__"
   5549     ;; onto sym-name to derive the emitted label); scope-bind!s key
   5550     ;; remains the original identifier for source-level lookup.
   5551     ((and (eq? sto 'static) (ps-fn-ctx ps))
   5552      (let* ((fname (fn-ctx-name (ps-fn-ctx ps)))
   5553             (mangled (bytevector-append fname "__" n)))
   5554        (cond
   5555          ((at-punct? ps 'assign)
   5556           (advance ps)
   5557           ;; Parse init first so an inferred-length array picks up its
   5558           ;; resolved type before sm is constructed (sym is immutable).
   5559           (let-values (((pieces ty2) (parse-init-global ps ty)))
   5560             (let ((sm (%sym mangled 'var 'static ty2 #f #t)))
   5561               (scope-bind! ps n sm)
   5562               (cg-emit-global (ps-cg ps) sm pieces))))
   5563          (else
   5564           (let ((sm (%sym mangled 'var 'static ty #f #t)))
   5565             (scope-bind! ps n sm)
   5566             (cg-emit-global (ps-cg ps) sm #f))))))
   5567     (else
   5568      (cond
   5569        ((not (ps-fn-ctx ps))
   5570         ;; File-scope decls. Three cases:
   5571         ;;   (a) initializer present  -> full external definition.
   5572         ;;   (b) `extern` no init     -> declaration only.
   5573         ;;   (c) no init, no `extern` -> tentative definition.
   5574         ;; (a) emits to .data immediately. (b) is recorded but emits
   5575         ;; nothing. (c) is recorded as `defined?=#f` and added to
   5576         ;; world-tentatives; cg-finish emits .bss at end of TU only if
   5577         ;; no full definition appeared. This lets two `static int x;`
   5578         ;; or a `static int x;` followed by `static int x = 1;`
   5579         ;; coexist (C 6.9.2 tentative-def merge).
   5580         (cond
   5581           ((at-punct? ps 'assign)
   5582            (advance ps)
   5583            (let-values (((pieces ty2) (parse-init-global ps ty)))
   5584              (let ((sm (%sym n 'var (or sto 'extern) ty2 #f #t)))
   5585                (scope-bind! ps n sm)
   5586                (cg-emit-global (ps-cg ps) sm pieces))))
   5587           ((eq? sto 'extern)
   5588            (let ((sm (%sym n 'var 'extern ty #f #f)))
   5589              (scope-bind! ps n sm)
   5590              (cg-emit-extern (ps-cg ps) sm)))
   5591           (else
   5592            (let ((sm (%sym n 'var (or sto 'extern) ty #f #f)))
   5593              (scope-bind! ps n sm)
   5594              (cg-add-tentative! (ps-cg ps) n)))))
   5595        (else
   5596         ;; Block-scope inferred-length array (`int a[] = {…};` or
   5597         ;; `char s[] = "…";`): peek the initializer past `=` to count
   5598         ;; elements / measure the string and rebuild `ty` with the
   5599         ;; resolved length BEFORE cg-alloc-slot. Otherwise the slot
   5600         ;; is sized off a -1 / 0 ctype-size (capped to 1 byte) and
   5601         ;; the per-element stores in parse-init-local-aggregate write
   5602         ;; past frame-hi — the next %cg-spill-reg then allocates
   5603         ;; right inside the array, clobbering elements.
   5604         (let* ((ty (cond
   5605                      ((and (eq? (ctype-kind ty) 'arr)
   5606                            (< (cdr (ctype-ext ty)) 0)
   5607                            (at-punct? ps 'assign)
   5608                            (%peek-inferred-arr-init? ps))
   5609                       (%resolve-inferred-arr-len ps ty))
   5610                      (else ty)))
   5611                (sz (max (ctype-size ty) 1))
   5612                (al (max (ctype-align ty) 1))
   5613                (sl (cg-alloc-slot (ps-cg ps) sz al))
   5614                (sm (%sym n 'var (or sto 'auto) ty sl #t)))
   5615           (scope-bind! ps n sm)
   5616           (cond
   5617             ((at-punct? ps 'assign)
   5618              (advance ps)
   5619              (cond
   5620                ;; Aggregate locals get the per-element store treatment.
   5621                ((or (at-punct? ps 'lbrace)
   5622                     (and (eq? (ctype-kind ty) 'arr)
   5623                          (eq? (tok-kind (peek ps)) 'STR)))
   5624                 (parse-init-local-aggregate ps sm ty))
   5625                ;; Struct/union initializer from a non-brace expression
   5626                ;; (typically a function call returning by-value). The
   5627                ;; expr produces a struct lval; we copy bytes into the
   5628                ;; destination slot.
   5629                ((or (eq? (ctype-kind ty) 'struct)
   5630                     (eq? (ctype-kind ty) 'union))
   5631                 (cg-push-sym (ps-cg ps) sm)
   5632                 (parse-expr-bp ps 4)
   5633                 (cg-copy-struct (ps-cg ps)))
   5634                (else
   5635                 (cg-push-sym (ps-cg ps) sm)
   5636                 (parse-expr-bp ps 4) (rval! ps)
   5637                 (cg-cast (ps-cg ps) ty)
   5638                 (cg-assign (ps-cg ps))
   5639                 (cg-pop (ps-cg ps)))))
   5640             (else #t))))))))
   5641 
   5642 ;; ====================================================================
   5643 ;; Initializers (see CC.md §Variable initializers).
   5644 ;;
   5645 ;; parse-init-global ps ty
   5646 ;;   Reads the initializer following `=` for a file-scope or block-scope
   5647 ;;   static var of static-storage type `ty` and returns a list of
   5648 ;;   pieces suitable for cg-emit-global. See cg.scm §cg-emit-global for
   5649 ;;   the piece grammar.
   5650 ;;
   5651 ;; parse-init-local ps sm ty
   5652 ;;   Reads the initializer for an auto-storage variable bound to slot
   5653 ;;   sym `sm` and emits per-element store cg ops. Returns unspecified.
   5654 ;; ====================================================================
   5655 
   5656 (define (%int->le-bv n nbytes)
   5657   ;; N-byte little-endian encoding of integer n into a fresh bv. Bytes
   5658   ;; >= sign-bit are filled by repeated >>8 (works for both signed and
   5659   ;; unsigned because we only keep the low N bytes).
   5660   (let ((out (make-bytevector nbytes 0)))
   5661     (let loop ((i 0) (v n))
   5662       (cond
   5663         ((= i nbytes) out)
   5664         (else
   5665          (bytevector-u8-set! out i (bit-and v 255))
   5666          (loop (+ i 1) (arithmetic-shift v -8)))))))
   5667 
   5668 ;; File-scope compound literal (C99 §6.5.2.5). The bracketed initializer
   5669 ;; following a typename in a static-storage initializer (or behind `&`
   5670 ;; in same) is an unnamed object with static storage duration. Drive
   5671 ;; the existing parse-init-global → cg-emit-global pipeline against a
   5672 ;; synthetic sym whose label is freshly minted via %cg-fresh-cl-label.
   5673 ;; Returns the emitted label; the caller wraps it in a (label-ref . LBL)
   5674 ;; piece. The leading `(T)` and the storage-class disambiguation belong
   5675 ;; to the caller — this entry point assumes peek = `{`.
   5676 (define (%emit-fs-compound-literal ps ty)
   5677   (let-values (((pieces ty2) (parse-init-global ps ty)))
   5678     (let* ((lbl (%cg-fresh-cl-label (ps-cg ps)))
   5679            ;; storage 'extern → %cg-sym-label returns the bare name
   5680            ;; unchanged (no extra "cc__" prefix), so the emitted label
   5681            ;; matches what we hand back to the caller.
   5682            (sm  (%sym lbl 'var 'extern ty2 #f #t)))
   5683       (cg-emit-global (ps-cg ps) sm pieces)
   5684       lbl)))
   5685 
   5686 (define (%const-init-piece ps ty)
   5687   ;; Parse a non-brace initializer expression for scalar type `ty` and
   5688   ;; return a single piece. Recognised forms:
   5689   ;;   - INT (with optional unary +/-)               -> N-byte LE bv
   5690   ;;   - enum-const IDENT                            -> N-byte LE bv
   5691   ;;   - &IDENT (address of a global var/fn)         -> (label-ref . cc__name)
   5692   ;;   - &(T){...} (address of file-scope literal)   -> (label-ref . cc__cl_N)
   5693   ;;   - IDENT  (function name; decays to fn ptr)    -> (label-ref . cc__name)
   5694   ;;   - STR    (only for char* targets)             -> (label-ref . string-pool-label)
   5695   ;;   - (T){...} (file-scope compound literal)      -> (label-ref . cc__cl_N)
   5696   (let ((t (peek ps)))
   5697     (cond
   5698       ;; Address initializer: &ident -> label-ref
   5699       ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'amp))
   5700        (advance ps)
   5701        (let ((it (peek ps)))
   5702          (cond
   5703            ((eq? (tok-kind it) 'IDENT)
   5704             (advance ps)
   5705             (let ((sm (scope-lookup ps (tok-value it))))
   5706               (cond
   5707                 ((not sm) (die (tok-loc it) "init: undecl" (tok-value it)))
   5708                 ((or (eq? (sym-kind sm) 'fn)
   5709                      (and (eq? (sym-kind sm) 'var)
   5710                           (or (eq? (sym-storage sm) 'static)
   5711                               (eq? (sym-storage sm) 'extern))))
   5712                  (cons 'label-ref (%cg-sym-label sm)))
   5713                 (else
   5714                  (die (tok-loc it) "init: &x must reference a global"
   5715                       (tok-value it))))))
   5716            ;; &(T){...} — address of an unnamed file-scope compound
   5717            ;; literal. Parse the typename, expect `{`, drive the
   5718            ;; literal into .data, and yield its label.
   5719            ((and (eq? (tok-kind it) 'PUNCT) (eq? (tok-value it) 'lparen)
   5720                  (%const-paren-is-cast? ps))
   5721             (advance ps)
   5722             (let*-values (((_sto bty) (parse-decl-spec ps))
   5723                           ((_n   ty2) (parse-declarator ps bty)))
   5724               (expect-punct ps 'rparen)
   5725               (cond
   5726                 ((not (at-punct? ps 'lbrace))
   5727                  (die (tok-loc (peek ps))
   5728                       "init: &(T) must be followed by { ... }"
   5729                       (tok-value (peek ps)))))
   5730               (cons 'label-ref (%emit-fs-compound-literal ps ty2))))
   5731            (else (die (tok-loc it) "init: &?" (tok-value it))))))
   5732       ;; (T){...} — file-scope compound literal. The literal is an
   5733       ;; lvalue of array/struct/union type; assignment to a pointer
   5734       ;; target decays it via its label address (label = first byte).
   5735       ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'lparen)
   5736             (%const-paren-is-cast? ps)
   5737             ;; Speculatively look past `(T)` for `{`. Since we have no
   5738             ;; 3-token peek, we have to commit to the (T) parse; if the
   5739             ;; following token isn't `{` it's a plain cast, so we fall
   5740             ;; back to the const-int path with the type already consumed.
   5741             #t)
   5742        ;; Take the (T) ourselves so we can dispatch on the next token.
   5743        (advance ps)
   5744        (let*-values (((_sto bty) (parse-decl-spec ps))
   5745                      ((_n   ty2) (parse-declarator ps bty)))
   5746          (expect-punct ps 'rparen)
   5747          (cond
   5748            ((at-punct? ps 'lbrace)
   5749             (cons 'label-ref (%emit-fs-compound-literal ps ty2)))
   5750            (else
   5751             ;; Not a compound literal — it's a constant cast, e.g.
   5752             ;; `(int)(unsigned char)257`. Mirror parse-const-cast's
   5753             ;; cast arm with the already-parsed type.
   5754             (cond
   5755               ((%ctype-int? ty2)
   5756                (let ((v (parse-const-cast ps)))
   5757                  (%int->le-bv (%const-trunc (car v) ty2)
   5758                               (max (ctype-size ty) 1))))
   5759               ((eq? (ctype-kind ty2) 'ptr)
   5760                ;; Pointer cast in const-expr: type-retag only. We expect
   5761                ;; the operand to be an integer-shaped const (e.g. 0) and
   5762                ;; emit it as the target's byte width.
   5763                (let ((v (parse-const-cast ps)))
   5764                  (%int->le-bv (car v) (max (ctype-size ty) 1))))
   5765               (else
   5766                (die (tok-loc (peek ps))
   5767                     "init: cast to non-scalar non-compound-literal"
   5768                     (ctype-kind ty2))))))))
   5769       ;; Function name or array name as a label-ref initializer.
   5770       ;; (Both decay to a pointer when used as a value.)
   5771       ((and (eq? (tok-kind t) 'IDENT)
   5772             (let ((sm (scope-lookup ps (tok-value t))))
   5773               (and sm
   5774                    (or (eq? (sym-kind sm) 'fn)
   5775                        (and (eq? (sym-kind sm) 'var)
   5776                             (eq? (ctype-kind (sym-type sm)) 'arr)
   5777                             (or (eq? (sym-storage sm) 'static)
   5778                                 (eq? (sym-storage sm) 'extern)))))))
   5779        (advance ps)
   5780        (let ((sm (scope-lookup ps (tok-value t))))
   5781          (cons 'label-ref (%cg-sym-label sm))))
   5782       ;; Plain string literal as char* initializer.
   5783       ((eq? (tok-kind t) 'STR)
   5784        (advance ps)
   5785        (let ((lbl (cg-intern-string (ps-cg ps) (tok-value t))))
   5786          (cons 'label-ref lbl)))
   5787       ;; Otherwise it's a const integer.
   5788       (else
   5789        (let ((v (parse-const-int ps)))
   5790          (%int->le-bv v (max (ctype-size ty) 1)))))))
   5791 
   5792 (define (%init-array-elem-type ty)
   5793   (cond ((eq? (ctype-kind ty) 'arr) (car (ctype-ext ty)))
   5794         (else (die #f "init: not an array" ty))))
   5795 
   5796 (define (%init-array-decl-len ty)
   5797   ;; Declared array length (-1 = inferred).
   5798   (cond ((eq? (ctype-kind ty) 'arr) (cdr (ctype-ext ty))) (else -1)))
   5799 
   5800 (define (%init-fixed-arr-type ty count)
   5801   ;; Construct a fresh array ctype with the inferred length resolved
   5802   ;; to `count`. Pure — does not mutate `ty`. For non-inferred or
   5803   ;; non-array `ty`, callers should detect this themselves and just
   5804   ;; pass `ty` through.
   5805   (%mk-arr (car (ctype-ext ty)) count))
   5806 
   5807 (define (%init-struct-fields ty)
   5808   ;; Return ((name-bv ctype offset) ...) for a struct/union ctype.
   5809   (let ((ext (ctype-ext ty)))
   5810     (cond ((and (pair? ext) (pair? (cdr ext))) (car (cddr ext)))
   5811           (else (die #f "init: not a struct" ty)))))
   5812 
   5813 ;; After processing a designated initializer for FNAME, return the
   5814 ;; field list with FNAME and all preceding (already-overwritten or
   5815 ;; skipped) fields removed. Empty list if FNAME isn't found (caller
   5816 ;; should already have validated the field exists).
   5817 (define (%init-drop-thru-field fields fname)
   5818   (cond ((null? fields) '())
   5819         ((equal? (car (car fields)) fname) (cdr fields))
   5820         (else (%init-drop-thru-field (cdr fields) fname))))
   5821 
   5822 ;; Element/field dispatch for global aggregate initializers. ELIDE? = #f
   5823 ;; means caller has just consumed `{` for this element and we own the
   5824 ;; matching `}`; ELIDE? = #t is C99 §6.7.8 ¶22 brace elision (the
   5825 ;; sub-aggregate draws items from the parent stream, no inner braces).
   5826 ;; Returns the piece-list contributing this element to the encoding.
   5827 (define (%global-init-elem ps t elide?)
   5828   (let ((k (ctype-kind t)))
   5829     (cond
   5830       ((eq? k 'arr)
   5831        (let-values (((p _c) (cond
   5832                               (elide? (%parse-init-array-list/mode ps t #f))
   5833                               (else   (%parse-init-array-list ps t)))))
   5834          p))
   5835       ((or (eq? k 'struct) (eq? k 'union))
   5836        (cond
   5837          (elide? (%parse-init-struct-list/mode ps t #f))
   5838          (else   (%parse-init-struct-list ps t))))
   5839       (else
   5840        (let ((p (%const-init-piece ps t)))
   5841          (cond
   5842            (elide? (list p))
   5843            (else
   5844             (cond ((at-punct? ps 'comma) (advance ps)))
   5845             (expect-punct ps 'rbrace)
   5846             (list p))))))))
   5847 
   5848 ;; Element/field dispatch for local aggregate initializers. Mirrors
   5849 ;; %global-init-elem but emits per-element store ops via cg-assign for
   5850 ;; scalar leaves, and recurses into the local-list walkers for
   5851 ;; aggregates. Returns 0; the side effect is the emitted code.
   5852 (define (%local-init-elem ps sm eoff t elide?)
   5853   (let ((k (ctype-kind t)))
   5854     (cond
   5855       ((eq? k 'arr)
   5856        (cond
   5857          (elide? (%parse-init-local-array-list/mode ps sm eoff t #f))
   5858          (else   (%parse-init-local-array-list ps sm eoff t))))
   5859       ((or (eq? k 'struct) (eq? k 'union))
   5860        (cond
   5861          (elide? (%parse-init-local-struct-list/mode ps sm eoff t #f))
   5862          (else   (%parse-init-local-struct-list ps sm eoff t))))
   5863       (else
   5864        (%push-frame-elem-lval ps eoff t)
   5865        (parse-expr-bp ps 4) (rval! ps)
   5866        (cg-cast (ps-cg ps) t)
   5867        (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))
   5868        (cond
   5869          (elide? 0)
   5870          (else
   5871           (cond ((at-punct? ps 'comma) (advance ps)))
   5872           (expect-punct ps 'rbrace)))))))
   5873 
   5874 (define (%pad-piece nbytes)
   5875   (make-bytevector nbytes 0))
   5876 
   5877 ;; Static aggregate init streaming support. parse-translation-unit runs
   5878 ;; declarations in scratch, but a large file-scope array initializer can
   5879 ;; contain hundreds of independent elements. Parse one array element
   5880 ;; past a heap mark, promote the element pieces and parser lookahead into
   5881 ;; main, rewind that element's transient lexer/pp/const-expr scratch, and
   5882 ;; keep the outer pieces list in main as well.
   5883 (define (%init-promote-unit ps thunk)
   5884   (let ((mark (heap-mark)))
   5885     (let ((scratch-result (thunk)))
   5886       (use-main-heap!)
   5887       (let ((ctx (make-deep-copy-context)))
   5888         (promote-roots! (ps-world ps) ctx)
   5889         (promote-iter-buffers! (ps-iter ps) ctx)
   5890         (let ((main-result (deep-copy ctx scratch-result)))
   5891           (use-scratch-heap!)
   5892           (heap-rewind! mark)
   5893           main-result)))))
   5894 
   5895 (define (%init-main-cons x xs)
   5896   (use-main-heap!)
   5897   (let ((r (cons x xs)))
   5898     (use-scratch-heap!)
   5899     r))
   5900 
   5901 (define (%init-main-reverse xs)
   5902   (use-main-heap!)
   5903   (let ((r (reverse xs)))
   5904     (use-scratch-heap!)
   5905     r))
   5906 
   5907 (define (%init-main-prepend-reversed xs acc)
   5908   (use-main-heap!)
   5909   (let loop ((ys xs) (out acc))
   5910     (cond
   5911       ((null? ys)
   5912        (use-scratch-heap!)
   5913        out)
   5914       (else
   5915        (loop (cdr ys) (cons (car ys) out))))))
   5916 
   5917 (define (%init-main-pad-piece nbytes)
   5918   (use-main-heap!)
   5919   (let ((p (%pad-piece nbytes)))
   5920     (use-scratch-heap!)
   5921     p))
   5922 
   5923 ;; ----- Global initializers ---------------------------------------------
   5924 ;; Returns (values pieces final-ty). For inferred-length array `ty`,
   5925 ;; final-ty is a freshly-built array ctype with the resolved length;
   5926 ;; otherwise final-ty is `ty` unchanged.
   5927 (define (parse-init-global ps ty)
   5928   (pmatch (peek ps)
   5929     ;; String literal initializer for char[]
   5930     (($ tok? (kind STR) (value ,s))
   5931      (guard (and (eq? (ctype-kind ty) 'arr)
   5932                  (let ((et (car (ctype-ext ty))))
   5933                    (or (eq? et %t-i8) (eq? et %t-u8)))))
   5934      (advance ps)
   5935      (let* ((slen (bytevector-length s))
   5936             (decl (cdr (ctype-ext ty)))
   5937             (final (cond ((< decl 0) (+ slen 1)) (else decl)))
   5938             (final-ty (cond ((< decl 0) (%init-fixed-arr-type ty final))
   5939                             (else ty))))
   5940        (let ((bv (make-bytevector final 0)))
   5941          (let loop ((i 0))
   5942            (cond
   5943              ((or (= i slen) (>= i final))
   5944               (values (list bv) final-ty))
   5945              (else
   5946               (bytevector-u8-set! bv i (bytevector-u8-ref s i))
   5947               (loop (+ i 1))))))))
   5948     ;; Brace-form
   5949     (($ tok? (kind PUNCT) (value lbrace))
   5950      (advance ps)
   5951      (cond
   5952        ((eq? (ctype-kind ty) 'arr)
   5953         (let-values (((pieces count) (%parse-init-array-list ps ty)))
   5954           (let* ((decl (%init-array-decl-len ty))
   5955                  (final-ty (cond ((< decl 0)
   5956                                   (%init-fixed-arr-type ty count))
   5957                                  (else ty))))
   5958             (values pieces final-ty))))
   5959        ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union))
   5960         (values (%parse-init-struct-list ps ty) ty))
   5961        (else
   5962         ;; Brace-wrapped scalar: { expr }
   5963         (let ((piece (%const-init-piece ps ty)))
   5964           (cond ((at-punct? ps 'comma) (advance ps)))
   5965           (expect-punct ps 'rbrace)
   5966           (values (list piece) ty)))))
   5967     ;; Bare scalar initializer
   5968     (else (values (list (%const-init-piece ps ty)) ty))))
   5969 
   5970 ;; Returns (values pieces count). `count` is the number of element
   5971 ;; initializers actually consumed (used by parse-init-global to resolve
   5972 ;; an inferred top-level length). C99 forbids inferred length in
   5973 ;; nested array elements, so recursive callers ignore `count`.
   5974 ;;
   5975 ;; `brace?` controls termination: when #t (the normal case), the loop
   5976 ;; consumes elements until `}` is seen. When #f (brace-elision recursion
   5977 ;; from C99 §6.7.8 ¶22), the loop consumes exactly `decl` elements from
   5978 ;; the parent stream and returns without expecting `}`. In no-brace
   5979 ;; mode, a leading `.` or `[` designator targets the enclosing aggregate
   5980 ;; — the recursion terminates immediately, padding the unfilled tail.
   5981 (define (%parse-init-array-list ps ty)
   5982   (%parse-init-array-list/mode ps ty #t))
   5983 
   5984 (define (%parse-init-array-list/mode ps ty brace?)
   5985   ;; Element-list array initializer; assumes `{` already consumed when
   5986   ;; brace? is #t.
   5987   (let* ((elem  (%init-array-elem-type ty))
   5988          (esize (ctype-size elem))
   5989          (decl  (%init-array-decl-len ty)))
   5990     (let lp ((acc '()) (count 0))
   5991       (cond
   5992         ((cond (brace? (at-punct? ps 'rbrace))
   5993                (else (or (>= count (cond ((< decl 0) 0) (else decl)))
   5994                          (at-punct? ps 'rbrace)
   5995                          (at-punct? ps 'dot)
   5996                          (at-punct? ps 'lbrack))))
   5997          (cond (brace? (advance ps)))
   5998          ;; Pad to declared length if longer than count.
   5999          (let* ((final (cond ((< decl 0) count) (else decl)))
   6000                 (pad (- final count)))
   6001            (values
   6002              (cond
   6003               ((> pad 0)
   6004                (%init-main-reverse
   6005                 (%init-main-cons (%init-main-pad-piece (* pad esize)) acc)))
   6006               (else (%init-main-reverse acc)))
   6007             count)))
   6008         (else
   6009          (let ((piece
   6010                 (%init-promote-unit
   6011                  ps
   6012                  (lambda ()
   6013                    ;; The trailing inter-element comma must be consumed
   6014                    ;; *inside* the mark/rewind window: advance loads pp/lex
   6015                    ;; lookahead into iter buffers, which promote-iter-buffers!
   6016                    ;; then deep-copies into main. Consuming it after the
   6017                    ;; rewind would leave that lookahead leaking on scratch.
   6018                    (let ((p
   6019                           (cond
   6020                             ((at-punct? ps 'lbrace)
   6021                              (advance ps)
   6022                              (%global-init-elem ps elem #f))
   6023                             (else
   6024                              (%global-init-elem ps elem #t)))))
   6025                      ;; Inter-item comma: consume except for the comma
   6026                      ;; following our LAST item in no-brace mode — that
   6027                      ;; one belongs to the enclosing parent.
   6028                      (cond
   6029                        (brace?
   6030                         (cond ((at-punct? ps 'comma) (advance ps))))
   6031                        (else
   6032                         ;; no-brace: consume comma only if more items
   6033                         ;; remain in our quota.
   6034                         (cond
   6035                           ((and (< (+ count 1)
   6036                                    (cond ((< decl 0) 0) (else decl)))
   6037                                 (at-punct? ps 'comma))
   6038                            (advance ps)))))
   6039                      p)))))
   6040            (lp (%init-main-prepend-reversed piece acc) (+ count 1))))))))
   6041 
   6042 (define (%piece-bytesize p)
   6043   ;; Output width of one piece (cf. %cg-init-piece->bv): a bv emits
   6044   ;; one byte per element; a (label-ref . _) emits an 8-byte slot.
   6045   (cond
   6046     ((bytevector? p) (bytevector-length p))
   6047     ((and (pair? p) (eq? (car p) 'label-ref)) 8)
   6048     (else (die #f "init: unknown piece" p))))
   6049 
   6050 (define (%pieces-bytesize ps-list)
   6051   (let loop ((xs ps-list) (n 0))
   6052     (cond ((null? xs) n)
   6053           (else (loop (cdr xs) (+ n (%piece-bytesize (car xs))))))))
   6054 
   6055 (define (%merge-init-entries entries total-size)
   6056   ;; entries: list of (abs-offset . piece-list), in source order.
   6057   ;; Sort stably by offset (later writes to the same offset win, per C
   6058   ;; designated-init semantics) and emit pad pieces in any gaps and at
   6059   ;; the tail. Preserves label-ref pieces — we never merge them into a
   6060   ;; flat bv.
   6061   (let* ((sorted (%init-stable-sort-by-offset entries))
   6062          (out
   6063           (let walk ((xs sorted) (cursor 0) (acc '()))
   6064             (cond
   6065               ((null? xs)
   6066                (cond
   6067                  ((< cursor total-size)
   6068                   (reverse (cons (%pad-piece (- total-size cursor)) acc)))
   6069                  (else (reverse acc))))
   6070               (else
   6071                (let* ((e        (car xs))
   6072                       (eoff     (car e))
   6073                       (epieces  (cdr e))
   6074                       (esize    (%pieces-bytesize epieces))
   6075                       (acc1     (cond
   6076                                   ((> eoff cursor)
   6077                                    (cons (%pad-piece (- eoff cursor)) acc))
   6078                                   (else acc)))
   6079                       (acc2     (append (reverse epieces) acc1)))
   6080                  (walk (cdr xs) (+ eoff esize) acc2)))))))
   6081     out))
   6082 
   6083 (define (%init-stable-sort-by-offset entries)
   6084   ;; Insertion sort, stable by source order for ties. n is small (one
   6085   ;; entry per initialized field) so O(n^2) is fine.
   6086   (let lp ((xs entries) (acc '()))
   6087     (cond
   6088       ((null? xs) acc)
   6089       (else
   6090        (let ((e (car xs)))
   6091          (lp (cdr xs)
   6092              (let ins ((ys acc) (head '()))
   6093                (cond
   6094                  ((null? ys)
   6095                   (append (reverse head) (list e)))
   6096                  ((<= (car e) (car (car ys)))
   6097                   (append (reverse head) (cons e ys)))
   6098                  (else
   6099                   (ins (cdr ys) (cons (car ys) head)))))))))))
   6100 
   6101 (define (%parse-init-struct-list ps ty)
   6102   (%parse-init-struct-list/mode ps ty #t))
   6103 
   6104 (define (%parse-init-struct-list/mode ps ty brace?)
   6105   ;; Struct/union initializer; assumes `{` already consumed when brace?.
   6106   ;; In no-brace mode (brace elision, C99 §6.7.8 ¶22), terminate when
   6107   ;; positional fields are exhausted, when a `}` is seen (belongs to
   6108   ;; the enclosing aggregate), or when a designator (`.`) appears (it
   6109   ;; targets the enclosing aggregate). Doesn't consume the trailing
   6110   ;; comma after the last field — that belongs to the parent list.
   6111   (let* ((fields (%init-struct-fields ty))
   6112          (size   (ctype-size ty))
   6113          (union? (eq? (ctype-kind ty) 'union)))
   6114     (let lp ((entries '()) (rest fields))
   6115       (cond
   6116         ((cond (brace? (at-punct? ps 'rbrace))
   6117                (else (or (null? rest)
   6118                          (at-punct? ps 'rbrace)
   6119                          (at-punct? ps 'dot)
   6120                          ;; Union in brace-elision mode: take one
   6121                          ;; member then return — the next sibling
   6122                          ;; initializer belongs to the parent
   6123                          ;; (C99 §6.7.8 ¶22 + union has one active
   6124                          ;; member at a time).
   6125                          (and union? (pair? entries)))))
   6126          (cond (brace? (advance ps)))
   6127          (%merge-init-entries (reverse entries) size))
   6128         (else
   6129          (let* ((designated? (at-punct? ps 'dot))
   6130                 (target
   6131                  (cond
   6132                    (designated?
   6133                     (advance ps)
   6134                     (let ((nt (advance ps)))
   6135                       (cond
   6136                         ((not (eq? (tok-kind nt) 'IDENT))
   6137                          (die (tok-loc nt) "init: .field expects ident")))
   6138                       (let ((f (%cg-find-field fields (tok-value nt))))
   6139                         (cond
   6140                           ((not f) (die (tok-loc nt) "init: no such field"
   6141                                         (tok-value nt))))
   6142                         (expect-punct ps 'assign)
   6143                         f)))
   6144                    ((null? rest)
   6145                     (die (tok-loc (peek ps)) "init: too many fields"))
   6146                    (else (car rest))))
   6147                 (fname  (car target))
   6148                 (fty    (car (cdr target)))
   6149                 (foff   (car (cddr target)))
   6150                 (piece-list
   6151                  (cond
   6152                    ((at-punct? ps 'lbrace)
   6153                     (advance ps)
   6154                     (%global-init-elem ps fty #f))
   6155                    (else
   6156                     (%global-init-elem ps fty #t))))
   6157                 (rest1
   6158                  (cond
   6159                    ;; designated init: drop fields up to and including target
   6160                    (designated? (%init-drop-thru-field fields fname))
   6161                    (else (cdr rest)))))
   6162            ;; Inter-item comma: consume except for the comma after our
   6163            ;; LAST field in no-brace mode (belongs to enclosing list).
   6164            (cond
   6165              (brace?
   6166               (cond ((at-punct? ps 'comma) (advance ps))))
   6167              (else
   6168               (cond ((and (not (null? rest1))
   6169                           ;; Union in brace-elision mode terminates
   6170                           ;; after one element regardless of rest1;
   6171                           ;; that means the comma belongs to the parent.
   6172                           (not union?)
   6173                           (at-punct? ps 'comma))
   6174                      (advance ps)))))
   6175            (lp (cons (cons foff piece-list) entries) rest1)))))))
   6176 
   6177 ;; ----- Local aggregate initializers ------------------------------------
   6178 ;; Emits per-element store sequences via cg ops into the slot of `sm`
   6179 ;; (a 'var sym whose slot is the frame offset). Assumes the assignment
   6180 ;; `=` has already been consumed.
   6181 (define (parse-init-local-aggregate ps sm ty)
   6182   (pmatch (peek ps)
   6183     ;; Local char[] = "string" — fill from string bytes.
   6184     (($ tok? (kind STR) (value ,s))
   6185      (guard (and (eq? (ctype-kind ty) 'arr)
   6186                  (let ((et (car (ctype-ext ty))))
   6187                    (or (eq? et %t-i8) (eq? et %t-u8)))))
   6188      (advance ps)
   6189      ;; Note: for inferred-length (`int x[] = "..."`) auto arrays the
   6190      ;; sm-type still records the original (size=-1) ctype — `sizeof(x)`
   6191      ;; in the body would not see the resolved length. The slot is also
   6192      ;; sized off the original (= 1 byte), so the path is pre-existing
   6193      ;; broken; we don't paper over it here. Real C bootstrap code uses
   6194      ;; statics/globals for inferred-length arrays.
   6195      (let* ((slen (bytevector-length s))
   6196             (decl (cdr (ctype-ext ty)))
   6197             (final (cond ((< decl 0) (+ slen 1)) (else decl))))
   6198        ;; Emit byte stores for each char in s, plus NUL for the
   6199        ;; trailing slot if final > slen.
   6200        (let loop ((i 0))
   6201          (cond
   6202            ((>= i final) #t)
   6203            (else
   6204             (let ((b (cond ((< i slen) (bytevector-u8-ref s i))
   6205                            (else 0)))
   6206                   (off (+ (sym-slot sm) i)))
   6207               (%push-frame-elem-lval ps off %t-u8)
   6208               (cg-push-imm (ps-cg ps) %t-u8 b)
   6209               (cg-assign (ps-cg ps))
   6210               (cg-pop (ps-cg ps))
   6211               (loop (+ i 1))))))))
   6212     (($ tok? (kind PUNCT) (value lbrace))
   6213      (advance ps)
   6214      (cond
   6215        ((eq? (ctype-kind ty) 'arr)
   6216         (%parse-init-local-array-list ps sm (sym-slot sm) ty))
   6217        ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union))
   6218         (%parse-init-local-struct-list ps sm (sym-slot sm) ty))
   6219        (else (die #f "init local: brace on scalar?"))))
   6220     (else (die (tok-loc (peek ps)) "init local aggregate?"))))
   6221 
   6222 (define (%push-frame-elem-lval ps base-off ty)
   6223   (cg-push (ps-cg ps) (%opnd 'frame ty base-off #t)))
   6224 
   6225 (define (%parse-init-local-array-list ps sm base-off ty)
   6226   (%parse-init-local-array-list/mode ps sm base-off ty #t))
   6227 
   6228 (define (%parse-init-local-array-list/mode ps sm base-off ty brace?)
   6229   (let* ((elem (%init-array-elem-type ty))
   6230          (esize (ctype-size elem))
   6231          (decl  (%init-array-decl-len ty)))
   6232     (let lp ((i 0))
   6233       (cond
   6234         ((cond (brace? (at-punct? ps 'rbrace))
   6235                (else (or (>= i (cond ((< decl 0) 0) (else decl)))
   6236                          (at-punct? ps 'rbrace)
   6237                          (at-punct? ps 'dot)
   6238                          (at-punct? ps 'lbrack))))
   6239          (cond (brace? (advance ps)))
   6240          ;; Inferred-length auto path is pre-existing broken (slot
   6241          ;; allocated off size=-1, sm-type unfixed). See note in
   6242          ;; parse-init-local-aggregate STR branch.
   6243          ;; Zero out remaining slots if any (declared length > i).
   6244          (let ((final (cond ((< decl 0) i) (else decl))))
   6245            (let zlp ((k i))
   6246              (cond
   6247                ((>= k final) #t)
   6248                (else
   6249                 (let ((off (+ base-off (* k esize))))
   6250                   (cond
   6251                     ((or (eq? (ctype-kind elem) 'arr)
   6252                          (eq? (ctype-kind elem) 'struct)
   6253                          (eq? (ctype-kind elem) 'union))
   6254                      ;; Zero each byte in this aggregate slot.
   6255                      (let zb ((j 0))
   6256                        (cond
   6257                          ((>= j esize) #t)
   6258                          (else
   6259                           (%push-frame-elem-lval ps (+ off j) %t-u8)
   6260                           (cg-push-imm (ps-cg ps) %t-u8 0)
   6261                           (cg-assign (ps-cg ps))
   6262                           (cg-pop (ps-cg ps))
   6263                           (zb (+ j 1))))))
   6264                     (else
   6265                      (%push-frame-elem-lval ps off elem)
   6266                      (cg-push-imm (ps-cg ps) elem 0)
   6267                      (cg-assign (ps-cg ps))
   6268                      (cg-pop (ps-cg ps)))))
   6269                 (zlp (+ k 1)))))))
   6270         (else
   6271          (let ((eoff (+ base-off (* i esize))))
   6272            (cond
   6273              ((at-punct? ps 'lbrace)
   6274               (advance ps)
   6275               (%local-init-elem ps sm eoff elem #f))
   6276              (else
   6277               (%local-init-elem ps sm eoff elem #t)))
   6278            ;; Inter-item comma: in no-brace mode, don't eat the comma
   6279            ;; that follows our LAST item (it belongs to the parent).
   6280            (cond
   6281              (brace?
   6282               (cond ((at-punct? ps 'comma) (advance ps))))
   6283              (else
   6284               (cond ((and (< (+ i 1)
   6285                              (cond ((< decl 0) 0) (else decl)))
   6286                           (at-punct? ps 'comma))
   6287                      (advance ps)))))
   6288            (lp (+ i 1))))))))
   6289 
   6290 (define (%bv-in-list? bv xs)
   6291   (cond ((null? xs) #f)
   6292         ((equal? bv (car xs)) #t)
   6293         (else (%bv-in-list? bv (cdr xs)))))
   6294 
   6295 ;; Does any leaf-name of `f` (a struct/union field tuple, possibly with
   6296 ;; a nameless anon-aggregate type) appear in `seen`? Used by the
   6297 ;; local-struct zero-pass to skip an anonymous member whose sub-field
   6298 ;; was already written through a designator like `.a` (C11 §6.7.2.1).
   6299 (define (%anon-touched? f seen)
   6300   (let ((fn (car f)))
   6301     (cond
   6302       (fn (%bv-in-list? fn seen))
   6303       (else
   6304        (let ((k (ctype-kind (cadr f))))
   6305          (cond
   6306            ((or (eq? k 'struct) (eq? k 'union))
   6307             (let lp ((xs (car (cddr (ctype-ext (cadr f))))))
   6308               (cond
   6309                 ((null? xs) #f)
   6310                 ((%anon-touched? (car xs) seen) #t)
   6311                 (else (lp (cdr xs))))))
   6312            (else #f)))))))
   6313 
   6314 (define (%emit-zero-field ps base-off f)
   6315   ;; Note: scheme1's `+` is binary-only — `(+ a b c)` returns (+ a b)
   6316   ;; and silently drops the rest. Compute absolute byte offsets via
   6317   ;; nested binary +.
   6318   (let* ((fty       (car (cdr f)))
   6319          (foff      (car (cddr f)))
   6320          (fsize     (ctype-size fty))
   6321          (start-off (+ base-off foff)))
   6322     (let zb ((j 0))
   6323       (cond
   6324         ((>= j fsize) #t)
   6325         (else
   6326          (%push-frame-elem-lval ps (+ start-off j) %t-u8)
   6327          (cg-push-imm (ps-cg ps) %t-u8 0)
   6328          (cg-assign (ps-cg ps))
   6329          (cg-pop (ps-cg ps))
   6330          (zb (+ j 1)))))))
   6331 
   6332 (define (%parse-init-local-struct-list ps sm base-off ty)
   6333   (%parse-init-local-struct-list/mode ps sm base-off ty #t))
   6334 
   6335 (define (%parse-init-local-struct-list/mode ps sm base-off ty brace?)
   6336   ;; Track each initialized field by name in `seen`; at the closing brace
   6337   ;; zero every field NOT in `seen`. Tracking by name (rather than
   6338   ;; positional "remaining" fields) handles a designator jumping
   6339   ;; backwards correctly — e.g. `{.y = 5}` must still zero `x`.
   6340   ;; C requires every unmentioned member of an aggregate with at least
   6341   ;; one designator/initializer to be zeroed (C11 §6.7.9 ¶21).
   6342   ;;
   6343   ;; In no-brace mode (brace elision, C99 §6.7.8 ¶22): terminate when
   6344   ;; positional fields exhausted, on `}` (parent's), or on `.` designator
   6345   ;; (targets parent). Don't consume trailing comma after our last field.
   6346   (let ((fields (%init-struct-fields ty)))
   6347     (let lp ((rest fields) (seen '()))
   6348       (cond
   6349         ((cond (brace? (at-punct? ps 'rbrace))
   6350                (else (or (null? rest)
   6351                          (at-punct? ps 'rbrace)
   6352                          (at-punct? ps 'dot))))
   6353          (cond (brace? (advance ps)))
   6354          (for-each
   6355            (lambda (f)
   6356              (cond ((not (%anon-touched? f seen))
   6357                     (%emit-zero-field ps base-off f))))
   6358            fields))
   6359         (else
   6360          (let* ((designated? (at-punct? ps 'dot))
   6361                 (target
   6362                  (cond
   6363                    (designated?
   6364                     (advance ps)
   6365                     (let ((nt (advance ps)))
   6366                       (let ((f (%cg-find-field fields (tok-value nt))))
   6367                         (cond
   6368                           ((not f) (die (tok-loc nt) "init: no such field"
   6369                                         (tok-value nt))))
   6370                         (expect-punct ps 'assign)
   6371                         f)))
   6372                    ((null? rest)
   6373                     (die (tok-loc (peek ps)) "init: too many fields"))
   6374                    (else (car rest))))
   6375                 (fname (car target))
   6376                 (fty   (car (cdr target)))
   6377                 (foff  (car (cddr target)))
   6378                 (eoff  (+ base-off foff)))
   6379            (cond
   6380              ((at-punct? ps 'lbrace)
   6381               (advance ps)
   6382               (%local-init-elem ps sm eoff fty #f))
   6383              (else
   6384               (%local-init-elem ps sm eoff fty #t)))
   6385            (let ((rest1
   6386                   (cond
   6387                     (designated? (%init-drop-thru-field fields fname))
   6388                     (else (cdr rest)))))
   6389              ;; Inter-item comma: in no-brace mode, don't eat the comma
   6390              ;; that follows our LAST field (belongs to enclosing list).
   6391              (cond
   6392                (brace?
   6393                 (cond ((at-punct? ps 'comma) (advance ps))))
   6394                (else
   6395                 (cond ((and (not (null? rest1))
   6396                             (at-punct? ps 'comma))
   6397                        (advance ps)))))
   6398              (lp rest1 (cons fname seen)))))))))
   6399 
   6400 
   6401 ;; parse-fn-body: bind the fn-sym for recursive lookup, then parse the
   6402 ;; body. Heap discipline is handled at the parse-decl-or-fn boundary —
   6403 ;; the body runs in scratch like the rest of the decl, and surviving
   6404 ;; roots (block-statics, string literals, block-scope tags that escape
   6405 ;; via the global tables) are promoted en masse there. See the Phase 3
   6406 ;; section above parse-translation-unit.
   6407 (define (parse-fn-body ps sto name dt)
   6408   (scope-bind! ps name (%sym name 'fn (or sto 'extern) dt #f #t))
   6409   (%parse-fn-body-inner ps name dt))
   6410 
   6411 (define (%parse-fn-body-inner ps name dt)
   6412   (let* ((e (ctype-ext dt)) (ret (car e))
   6413          (par (cadr e)) (var (car (cddr e))))
   6414     (let ((psyms (cg-fn-begin/v (ps-cg ps) name par ret var)))
   6415       (ps-fn-ctx-set! ps
   6416         (%fn-ctx name ret (map cdr psyms) var '()))
   6417       (scope-enter! ps)
   6418       (for-each (lambda (p) (scope-bind! ps (car p) (cdr p)))
   6419                 psyms)
   6420       (expect-punct ps 'lbrace)
   6421       (parse-cstmt-body ps)
   6422       (expect-punct ps 'rbrace)
   6423       (scope-leave! ps)
   6424       (ps-fn-ctx-set! ps #f)
   6425       (cg-fn-end (ps-cg ps)))))
   6426 
   6427 (define (parse-stmt ps)
   6428   (pmatch (peek ps)
   6429     (($ tok? (kind PUNCT) (value lbrace)) (parse-cstmt ps))
   6430     (($ tok? (kind KW) (value if))        (parse-if-stmt ps))
   6431     (($ tok? (kind KW) (value while))     (parse-while-stmt ps))
   6432     (($ tok? (kind KW) (value do))        (parse-do-stmt ps))
   6433     (($ tok? (kind KW) (value for))       (parse-for-stmt ps))
   6434     (($ tok? (kind KW) (value switch))    (parse-switch-stmt ps))
   6435     (($ tok? (kind KW) (value return))    (parse-return-stmt ps))
   6436     (($ tok? (kind KW) (value goto))      (parse-goto-stmt ps))
   6437     (($ tok? (kind KW) (value break))
   6438      (advance ps) (expect-punct ps 'semi) (do-break ps))
   6439     (($ tok? (kind KW) (value continue))
   6440      (advance ps) (expect-punct ps 'semi) (do-continue ps))
   6441     (($ tok? (kind KW) (value case))      (parse-case-stmt ps))
   6442     (($ tok? (kind KW) (value default))   (parse-default-stmt ps))
   6443     (($ tok? (kind IDENT))
   6444      (guard (and (eq? (tok-kind (peek2 ps)) 'PUNCT)
   6445                  (eq? (tok-value (peek2 ps)) 'colon)))
   6446      (parse-labelled-stmt ps))
   6447     (else
   6448      (cond ((stmt-starts-decl? ps) (parse-local-decl ps))
   6449            (else (parse-expr-stmt ps))))))
   6450 
   6451 (define (stmt-starts-decl? ps)
   6452   (let ((t (peek ps)))
   6453     (or (%tok-decl-start? ps t)
   6454         ;; Storage classes only appear at declaration position; check here
   6455         ;; rather than fold them into %tok-decl-start? (which is also
   6456         ;; used for cast typenames where storage classes are illegal).
   6457         (pmatch t
   6458           (($ tok? (kind KW) (value ,v))
   6459            (or (eq? v 'auto) (eq? v 'register) (eq? v 'static)
   6460                (eq? v 'extern) (eq? v 'typedef)))
   6461           (else #f)))))
   6462 
   6463 (define (parse-local-decl ps)
   6464   (let-values (((sto b) (parse-decl-spec ps)))
   6465     (cond
   6466       ((at-punct? ps 'semi) (advance ps) #t)
   6467       (else
   6468        (let lp ()
   6469          (let-values (((n t) (parse-declarator ps b)))
   6470            (handle-decl ps sto n t)
   6471            (cond ((at-punct? ps 'comma) (advance ps) (lp))
   6472                  (else (expect-punct ps 'semi) #t))))))))
   6473 
   6474 (define (parse-cstmt ps)
   6475   (expect-punct ps 'lbrace)
   6476   (scope-enter! ps)
   6477   (parse-cstmt-body ps)
   6478   (scope-leave! ps)
   6479   (expect-punct ps 'rbrace) #t)
   6480 
   6481 (define (parse-cstmt-body ps)
   6482   (cond
   6483     ((at-punct? ps 'rbrace) #t)
   6484     ((eq? (tok-kind (peek ps)) 'EOF)
   6485      (die (tok-loc (peek ps)) "EOF in cstmt"))
   6486     (else (parse-stmt ps) (parse-cstmt-body ps))))
   6487 
   6488 (define (parse-compound-stmt ps) (parse-cstmt ps))
   6489 
   6490 (define (parse-if-stmt ps)
   6491   (expect-kw ps 'if)
   6492   (expect-punct ps 'lparen)
   6493   (parse-expr ps) (rval! ps)
   6494   (expect-punct ps 'rparen)
   6495   (cg-ifelse (ps-cg ps)
   6496              (lambda () (parse-stmt ps))
   6497              (lambda ()
   6498                (cond ((at-kw? ps 'else)
   6499                       (advance ps) (parse-stmt ps))
   6500                      (else #t)))))
   6501 
   6502 ;; cg-loop's body-thunk receives the tag from cg; the parser threads
   6503 ;; it into break/continue via loop-ctx.
   6504 
   6505 (define (parse-while-stmt ps)
   6506   (expect-kw ps 'while)
   6507   (expect-punct ps 'lparen)
   6508   (cg-loop (ps-cg ps)
   6509            (lambda () (parse-expr ps) (rval! ps))
   6510            (lambda (tag)
   6511              (expect-punct ps 'rparen)
   6512              (push-loop-ctx! ps 'while tag #t)
   6513              (parse-stmt ps)
   6514              (pop-loop-ctx! ps))) #t)
   6515 
   6516 (define (parse-do-stmt ps)
   6517   (expect-kw ps 'do)
   6518   ;; `continue` in a do-while must jump to the *cond test* (C11
   6519   ;; §6.8.6.2 ¶2), not to the top of the body. The scoped loop labels
   6520   ;; `.top` at the condition test and `.end` after the loop, so bare
   6521   ;; %continue / %break bind through hex2++ local lookup.
   6522   ;;
   6523   ;; Layout:
   6524   ;;   .scope
   6525   ;;   :.body
   6526   ;;     <body>
   6527   ;;   :.top                 ; %continue jumps here
   6528   ;;     <cond>
   6529   ;;     %if_eqz(c, %break)
   6530   ;;   %b(&.body)
   6531   ;;   :.end
   6532   ;;   .endscope
   6533   (let* ((cg (ps-cg ps))
   6534          (tag (%cg-fresh-loop-tag cg)))
   6535     (%cg-emit-many cg (list ".scope\n"
   6536                             ":.body\n"))
   6537     (push-loop-ctx! ps 'do tag #t)
   6538     (parse-stmt ps)
   6539     (pop-loop-ctx! ps)
   6540     (expect-kw ps 'while) (expect-punct ps 'lparen)
   6541     (%cg-emit-many cg (list ":.top\n"))
   6542     (parse-expr ps) (rval! ps)
   6543     (expect-punct ps 'rparen) (expect-punct ps 'semi)
   6544     (let ((c (cg-pop cg)))
   6545       (%cg-load-opnd-into cg c 't0)
   6546       (%cg-emit-many cg (list "%if_eqz(t0, { %break })\n")))
   6547     (%cg-emit-many cg (list "%b(&.body)\n"
   6548                             ":.end\n"
   6549                             ".endscope\n")))
   6550   #t)
   6551 
   6552 (define (parse-for-stmt ps)
   6553   (expect-kw ps 'for) (expect-punct ps 'lparen)
   6554   (scope-enter! ps)
   6555   (cond
   6556     ((at-punct? ps 'semi) (advance ps))
   6557     ((stmt-starts-decl? ps) (parse-local-decl ps))
   6558     (else (parse-expr ps) (cg-pop (ps-cg ps))
   6559           (expect-punct ps 'semi)))
   6560   (let* ((cg (ps-cg ps))
   6561          (cond-toks (cond
   6562                       ((at-punct? ps 'semi) '())
   6563                       (else (collect-til-top-punct ps 'semi "EOF in for-cond"))))
   6564          (_ (expect-punct ps 'semi))
   6565          (step-toks (collect-til-rparen ps))
   6566          (_ (expect-punct ps 'rparen))
   6567          (tag (%cg-fresh-loop-tag cg)))
   6568     ;; A C `continue` in a for-loop must run the step expression before
   6569     ;; retesting the condition. Arrange the loop as:
   6570     ;;   jump test; top: step; test: condition; body; jump top
   6571     (%cg-emit-many cg (list ".scope\n"
   6572                             "%b(&.test)\n"
   6573                             ":.top\n"))
   6574     (parse-saved-expr-stmt ps step-toks)
   6575     (%cg-emit-many cg (list ":.test\n"))
   6576     (cond
   6577       ((null? cond-toks) (cg-push-imm cg %t-i32 1))
   6578       (else (parse-saved-expr ps cond-toks) (rval! ps)))
   6579     (let ((c (cg-pop cg)))
   6580       (%cg-load-opnd-into cg c 't0)
   6581       (%cg-emit-many cg (list "%if_eqz(t0, { %break })\n")))
   6582     (push-loop-ctx! ps 'for tag #t)
   6583     (parse-stmt ps)
   6584     (pop-loop-ctx! ps)
   6585     (%cg-emit-many cg (list "%b(&.top)\n"
   6586                             ":.end\n"
   6587                             ".endscope\n")))
   6588   (scope-leave! ps) #t)
   6589 
   6590 (define (parse-saved-expr ps toks)
   6591   (let ((sv (ps-iter ps)))
   6592     (ps-iter-set! ps (make-list-iter (append toks (list (make-tok 'EOF #f #f)))))
   6593     (parse-expr ps)
   6594     (ps-iter-set! ps sv)))
   6595 
   6596 (define (parse-saved-expr-stmt ps toks)
   6597   (cond
   6598     ((null? toks) #t)
   6599     (else (parse-saved-expr ps toks) (cg-pop (ps-cg ps)))))
   6600 
   6601 (define (collect-til-top-punct ps punct err)
   6602   (let loop ((acc '()) (d 0))
   6603     (let ((t (peek ps)))
   6604       (cond
   6605         ((eq? (tok-kind t) 'EOF)
   6606          (die (tok-loc t) err))
   6607         ((and (zero? d) (eq? (tok-kind t) 'PUNCT)
   6608               (eq? (tok-value t) punct)) (reverse acc))
   6609         (else
   6610          (let ((nt (advance ps)))
   6611            (loop (cons nt acc)
   6612                  (cond ((not (eq? (tok-kind nt) 'PUNCT)) d)
   6613                        ((or (eq? (tok-value nt) 'lparen)
   6614                             (eq? (tok-value nt) 'lbrack)) (+ d 1))
   6615                        ((or (eq? (tok-value nt) 'rparen)
   6616                             (eq? (tok-value nt) 'rbrack)) (- d 1))
   6617                        (else d)))))))))
   6618 
   6619 (define (collect-til-rparen ps)
   6620   (collect-til-top-punct ps 'rparen "EOF in for-step"))
   6621 
   6622 (define (parse-switch-stmt ps)
   6623   (expect-kw ps 'switch) (expect-punct ps 'lparen)
   6624   (parse-expr ps) (rval! ps)
   6625   (expect-punct ps 'rparen)
   6626   ;; Switch's break-target tag is the swctx's end-tag — cg owns it,
   6627   ;; and we read it back so cg-break inside the switch body emits a
   6628   ;; tag cg actually labels.
   6629   (let* ((sw (cg-switch-begin (ps-cg ps)))
   6630          (tg (swctx-end-tag sw)))
   6631     (push-loop-ctx-sw! ps 'switch tg sw)
   6632     (parse-stmt ps)
   6633     (pop-loop-ctx! ps)
   6634     (cg-switch-end (ps-cg ps) sw)))
   6635 
   6636 (define (parse-case-stmt ps)
   6637   (expect-kw ps 'case)
   6638   (let ((v (parse-const-int ps)))
   6639     (expect-punct ps 'colon)
   6640     (cg-switch-case (ps-cg ps) (innermost-sw ps) v)
   6641     (parse-stmt ps)))
   6642 
   6643 (define (parse-default-stmt ps)
   6644   (expect-kw ps 'default) (expect-punct ps 'colon)
   6645   (cg-switch-default (ps-cg ps) (innermost-sw ps))
   6646   (parse-stmt ps))
   6647 
   6648 (define (parse-return-stmt ps)
   6649   (expect-kw ps 'return)
   6650   (cond
   6651     ((at-punct? ps 'semi) (advance ps) (cg-return (ps-cg ps)))
   6652     (else
   6653      (let* ((fc  (ps-fn-ctx ps))
   6654             (rty (and fc (fn-ctx-return-type fc)))
   6655             (rk  (and rty (ctype-kind rty))))
   6656        (cond
   6657          ;; Struct/union return — leave the source as a struct lval;
   6658          ;; cg-return copies bytes into the function's return slot.
   6659          ;; (P1.md §Arguments and return values.)
   6660          ((or (eq? rk 'struct) (eq? rk 'union))
   6661           (parse-expr ps)
   6662           (cg-return (ps-cg ps)))
   6663          (else
   6664           (parse-expr ps) (rval! ps)
   6665           (cond
   6666             ((and fc (not (eq? rk 'void)))
   6667              (cg-cast (ps-cg ps) rty))
   6668             (else #t))
   6669           (cg-return (ps-cg ps)))))
   6670      (expect-punct ps 'semi))))
   6671 
   6672 (define (parse-goto-stmt ps)
   6673   (expect-kw ps 'goto)
   6674   (let ((t (advance ps)))
   6675     (cond ((eq? (tok-kind t) 'IDENT)
   6676            (cg-goto (ps-cg ps) (tok-value t)))
   6677           (else (die (tok-loc t) "label?"))))
   6678   (expect-punct ps 'semi))
   6679 
   6680 (define (parse-labelled-stmt ps)
   6681   (let ((t (advance ps)))
   6682     (expect-punct ps 'colon)
   6683     (cg-emit-label (ps-cg ps) (tok-value t))
   6684     (parse-stmt ps)))
   6685 
   6686 (define (parse-expr-stmt ps)
   6687   (cond
   6688     ((at-punct? ps 'semi) (advance ps) #t)
   6689     (else (parse-expr ps) (cg-pop (ps-cg ps))
   6690           (expect-punct ps 'semi))))
   6691 
   6692 (define (push-loop-ctx! ps k tg hc)
   6693   (ps-loops-set! ps (cons (%loop-ctx k tg hc) (ps-loops ps))))
   6694 (define (push-loop-ctx-sw! ps k tg sw)
   6695   (ps-loops-set! ps
   6696     (cons (%loop-ctx k (cons tg sw) #f) (ps-loops ps))))
   6697 (define (pop-loop-ctx! ps)
   6698   (ps-loops-set! ps (cdr (ps-loops ps))))
   6699 (define (do-break ps)
   6700   (let ((c (innermost-loop ps)))
   6701     (cond
   6702       ((not c) (die #f "break outside"))
   6703       ((eq? (loop-ctx-kind c) 'switch)
   6704        (cg-break (ps-cg ps) (car (loop-ctx-tag c))))
   6705       (else (cg-break (ps-cg ps) (loop-ctx-tag c))))))
   6706 (define (do-continue ps)
   6707   (let ((c (innermost-cont ps)))
   6708     (cond ((not c) (die #f "cont outside"))
   6709           (else (cg-continue (ps-cg ps) (loop-ctx-tag c))))))
   6710 (define (innermost-loop ps)
   6711   (cond ((null? (ps-loops ps)) #f) (else (car (ps-loops ps)))))
   6712 (define (innermost-cont ps)
   6713   (let lp ((xs (ps-loops ps)))
   6714     (cond ((null? xs) #f)
   6715           ((eq? (loop-ctx-kind (car xs)) 'switch) (lp (cdr xs)))
   6716           (else (car xs)))))
   6717 (define (innermost-sw ps)
   6718   (let lp ((xs (ps-loops ps)))
   6719     (cond ((null? xs) (die #f "case outside switch"))
   6720           ((eq? (loop-ctx-kind (car xs)) 'switch)
   6721            (cdr (loop-ctx-tag (car xs))))
   6722           (else (lp (cdr xs))))))
   6723 
   6724 (define %binop-bp
   6725   (list
   6726     (cons 'comma      (cons 1 2))
   6727     (cons 'assign     (cons 4 3)) (cons 'plus-eq (cons 4 3))
   6728     (cons 'minus-eq   (cons 4 3)) (cons 'star-eq (cons 4 3))
   6729     (cons 'slash-eq   (cons 4 3)) (cons 'pct-eq  (cons 4 3))
   6730     (cons 'shl-eq     (cons 4 3)) (cons 'shr-eq  (cons 4 3))
   6731     (cons 'amp-eq     (cons 4 3)) (cons 'caret-eq (cons 4 3))
   6732     (cons 'bar-eq     (cons 4 3)) (cons 'qmark   (cons 6 5))
   6733     (cons 'lor (cons 10 11)) (cons 'land (cons 20 21))
   6734     (cons 'bar (cons 30 31)) (cons 'caret (cons 40 41))
   6735     (cons 'amp (cons 50 51))
   6736     (cons 'eq2 (cons 60 61)) (cons 'ne (cons 60 61))
   6737     (cons 'lt (cons 70 71)) (cons 'le (cons 70 71))
   6738     (cons 'gt (cons 70 71)) (cons 'ge (cons 70 71))
   6739     (cons 'shl (cons 80 81)) (cons 'shr (cons 80 81))
   6740     (cons 'plus (cons 90 91)) (cons 'minus (cons 90 91))
   6741     (cons 'star (cons 100 101)) (cons 'slash (cons 100 101))
   6742     (cons 'pct (cons 100 101))))
   6743 
   6744 (define (binop-bp-of s) (alist-ref/eq s %binop-bp))
   6745 
   6746 (define (punct-to-cgop s)
   6747   (cond ((eq? s 'plus)  'add) ((eq? s 'minus) 'sub)
   6748         ((eq? s 'star)  'mul) ((eq? s 'slash) 'div)
   6749         ((eq? s 'pct)   'rem) ((eq? s 'amp)   'and)
   6750         ((eq? s 'bar)   'or)  ((eq? s 'caret) 'xor)
   6751         ((eq? s 'shl)   'shl) ((eq? s 'shr)   'shr)
   6752         ((eq? s 'eq2)   'eq)  ((eq? s 'ne)    'ne)
   6753         ((eq? s 'lt)    'lt)  ((eq? s 'le)    'le)
   6754         ((eq? s 'gt)    'gt)  ((eq? s 'ge)    'ge)
   6755         (else (die #f "binop" s))))
   6756 
   6757 (define (compound-op s)
   6758   (cond ((eq? s 'plus-eq)  'add) ((eq? s 'minus-eq) 'sub)
   6759         ((eq? s 'star-eq)  'mul) ((eq? s 'slash-eq) 'div)
   6760         ((eq? s 'pct-eq)   'rem) ((eq? s 'shl-eq)   'shl)
   6761         ((eq? s 'shr-eq)   'shr) ((eq? s 'amp-eq)   'and)
   6762         ((eq? s 'caret-eq) 'xor) ((eq? s 'bar-eq)   'or)
   6763         (else #f)))
   6764 
   6765 (define (parse-expr ps) (parse-expr-bp ps 0))
   6766 
   6767 (define (parse-expr-bp ps mn)
   6768   (parse-unary ps) (parse-binary-rhs ps mn))
   6769 
   6770 (define (parse-binary-rhs ps mn)
   6771   (let ((t (peek ps)))
   6772     (cond
   6773       ((not (eq? (tok-kind t) 'PUNCT)) #t)
   6774       (else
   6775        (let ((bp (binop-bp-of (tok-value t))))
   6776          (cond
   6777            ((not bp) #t)
   6778            ((< (car bp) mn) #t)
   6779            (else
   6780             (let ((op (tok-value t)) (rb (cdr bp)))
   6781               (advance ps)
   6782               (cond
   6783                 ((eq? op 'comma)
   6784                  ;; lhs has been parsed; discard it and evaluate rhs.
   6785                  ;; Result of the comma expr is the rhs's rval.
   6786                  (cg-pop (ps-cg ps))
   6787                  (parse-expr-bp ps rb) (rval! ps))
   6788                 ((eq? op 'assign)
   6789                  ;; Struct/union assignment must memcpy the whole
   6790                  ;; aggregate. The scalar cg-assign path loads/stores
   6791                  ;; via a single 8-byte register, dropping any field at
   6792                  ;; offset >= 8. Detect via the lhs (already on the
   6793                  ;; vstack) and route to cg-assign-struct, which keeps
   6794                  ;; rhs as an lvalue and emits a memcpy.
   6795                  (let* ((lhs-top (cg-top (ps-cg ps)))
   6796                         (lk (cond ((and (opnd? lhs-top) (opnd-lval? lhs-top))
   6797                                    (ctype-kind (opnd-type lhs-top)))
   6798                                   (else #f))))
   6799                    (cond
   6800                      ((or (eq? lk 'struct) (eq? lk 'union))
   6801                       (parse-expr-bp ps rb)
   6802                       (cg-assign-struct (ps-cg ps)))
   6803                      (else
   6804                       (parse-expr-bp ps rb) (rval! ps)
   6805                       (cg-assign (ps-cg ps))))))
   6806                 ((compound-op op)
   6807                  (let ((b (compound-op op)))
   6808                    (cg-dup (ps-cg ps))
   6809                    (cg-load (ps-cg ps))
   6810                    (parse-expr-bp ps rb) (rval! ps)
   6811                    ;; Skip the usual arithmetic conversion for shift
   6812                    ;; compounds (`<<=` / `>>=`) so the lhs's signedness
   6813                    ;; survives; cg-binop's shr branch then picks the
   6814                    ;; right arithmetic-vs-logical opcode.
   6815                    (cond ((or (eq? b 'shl) (eq? b 'shr)) #t)
   6816                          (else (cg-arith-conv (ps-cg ps))))
   6817                    (cg-binop (ps-cg ps) b)
   6818                    (cg-assign (ps-cg ps))))
   6819                 ((eq? op 'qmark)
   6820                  (rval! ps)
   6821                  (cg-ifelse-merge (ps-cg ps)
   6822                             (lambda ()
   6823                               (parse-expr-bp ps 0) (rval! ps))
   6824                             (lambda ()
   6825                               (expect-punct ps 'colon)
   6826                               (parse-expr-bp ps rb) (rval! ps))))
   6827                 ((eq? op 'land)
   6828                  (rval! ps)
   6829                  ;; Both branches must push i32 0/1. Right side is
   6830                  ;; coerced via `cg-cast bool` so the merge slot
   6831                  ;; carries i32 (per §H.2).
   6832                  (cg-ifelse-merge (ps-cg ps)
   6833                             (lambda ()
   6834                               (parse-expr-bp ps rb) (rval! ps)
   6835                               (cg-cast (ps-cg ps) %t-bool)
   6836                               (cg-cast (ps-cg ps) %t-i32))
   6837                             (lambda ()
   6838                               (cg-push-imm (ps-cg ps) %t-i32 0))))
   6839                 ((eq? op 'lor)
   6840                  (rval! ps)
   6841                  (cg-ifelse-merge (ps-cg ps)
   6842                             (lambda ()
   6843                               (cg-push-imm (ps-cg ps) %t-i32 1))
   6844                             (lambda ()
   6845                               (parse-expr-bp ps rb) (rval! ps)
   6846                               (cg-cast (ps-cg ps) %t-bool)
   6847                               (cg-cast (ps-cg ps) %t-i32))))
   6848                 (else
   6849                  (rval! ps) (cg-promote (ps-cg ps))
   6850                  (parse-expr-bp ps rb) (rval! ps)
   6851                  (cg-promote (ps-cg ps))
   6852                  ;; Shifts (C 6.5.7) only require integer promotion of
   6853                  ;; each operand individually; the usual arithmetic
   6854                  ;; conversion would force the lhs into an unsigned
   6855                  ;; common type when the rhs is unsigned, breaking
   6856                  ;; arithmetic-shift semantics for `signed >> unsigned`.
   6857                  (cond ((or (eq? op 'shl) (eq? op 'shr)) #t)
   6858                        (else (cg-arith-conv (ps-cg ps))))
   6859                  (cg-binop (ps-cg ps) (punct-to-cgop op))))
   6860               (parse-binary-rhs ps mn)))))))))
   6861 
   6862 (define (parse-unary ps)
   6863   (pmatch (peek ps)
   6864     (($ tok? (kind PUNCT) (value amp))
   6865      (advance ps) (parse-unary ps)
   6866      (cg-take-addr (ps-cg ps)))
   6867     (($ tok? (kind PUNCT) (value star))
   6868      (advance ps) (parse-unary ps) (rval! ps)
   6869      (cg-push-deref (ps-cg ps)))
   6870     (($ tok? (kind PUNCT) (value plus))
   6871      (advance ps) (parse-unary ps)
   6872      (rval! ps) (cg-promote (ps-cg ps)))
   6873     (($ tok? (kind PUNCT) (value minus))
   6874      (advance ps) (parse-unary ps)
   6875      (rval! ps) (cg-promote (ps-cg ps))
   6876      (cg-unop (ps-cg ps) 'neg))
   6877     (($ tok? (kind PUNCT) (value tilde))
   6878      (advance ps) (parse-unary ps)
   6879      (rval! ps) (cg-promote (ps-cg ps))
   6880      (cg-unop (ps-cg ps) 'bnot))
   6881     (($ tok? (kind PUNCT) (value bang))
   6882      (advance ps) (parse-unary ps) (rval! ps)
   6883      (cg-unop (ps-cg ps) 'lnot))
   6884     (($ tok? (kind PUNCT) (value inc))
   6885      (advance ps) (parse-unary ps)
   6886      (cg-dup (ps-cg ps))
   6887      (cg-load (ps-cg ps))
   6888      (cg-push-imm (ps-cg ps) %t-i32 1)
   6889      (cg-binop (ps-cg ps) 'add) (cg-assign (ps-cg ps)))
   6890     (($ tok? (kind PUNCT) (value dec))
   6891      (advance ps) (parse-unary ps)
   6892      (cg-dup (ps-cg ps))
   6893      (cg-load (ps-cg ps))
   6894      (cg-push-imm (ps-cg ps) %t-i32 1)
   6895      (cg-binop (ps-cg ps) 'sub) (cg-assign (ps-cg ps)))
   6896     (($ tok? (kind PUNCT) (value lparen)) (parse-cast-or-unary ps))
   6897     (($ tok? (kind KW) (value sizeof))
   6898      (advance ps)
   6899      (cond
   6900        ((at-punct? ps 'lparen)
   6901         (advance ps)
   6902         (cond
   6903           ((token-is-decl? ps)
   6904            (let*-values (((_sto bty) (parse-decl-spec ps))
   6905                          ((_n   ty)  (parse-declarator ps bty)))
   6906              (expect-punct ps 'rparen)
   6907              (cg-push-imm (ps-cg ps) %t-u64
   6908                           (max (ctype-size ty) 0))))
   6909           (else
   6910            ;; sizeof(EXPR): C semantics — operand is NOT evaluated.
   6911            ;; Snapshot cg state, parse the expr to learn its type,
   6912            ;; then rewind to discard any code emission and vstack
   6913            ;; pushes the parse incurred (e.g. `sizeof(x++)` must not
   6914            ;; increment x). cf. CC.md §Expressions.
   6915            (let ((tag (cg-snapshot (ps-cg ps))))
   6916              (parse-expr ps) (expect-punct ps 'rparen)
   6917              (let* ((tp (cg-top (ps-cg ps)))
   6918                     (sz (max (ctype-size (opnd-type tp)) 0)))
   6919                (cg-rewind (ps-cg ps) tag)
   6920                (cg-push-imm (ps-cg ps) %t-u64 sz))))))
   6921        (else
   6922         ;; sizeof EXPR (no parens) — same no-eval rule.
   6923         (let ((tag (cg-snapshot (ps-cg ps))))
   6924           (parse-unary ps)
   6925           (let* ((tp (cg-top (ps-cg ps)))
   6926                  (sz (max (ctype-size (opnd-type tp)) 0)))
   6927             (cg-rewind (ps-cg ps) tag)
   6928             (cg-push-imm (ps-cg ps) %t-u64 sz))))))
   6929     (else (parse-postfix ps))))
   6930 
   6931 (define (token-is-decl? ps) (%tok-decl-start? ps (peek ps)))
   6932 
   6933 (define (parse-cast-or-unary ps)
   6934   (cond
   6935     ((or (%tok-decl-start? ps (peek2 ps))
   6936          ;; A leading GNU attribute on the cast typename
   6937          ;; (e.g. `((__attribute__((...)) int(*)(void))ptr)()`) — eaten
   6938          ;; by parse-decl-spec along with the rest of the decl-spec.
   6939          (let ((t (peek2 ps)))
   6940            (and (eq? (tok-kind t) 'KW) (eq? (tok-value t) '__attribute__))))
   6941      (advance ps)
   6942      (let*-values (((_sto bty) (parse-decl-spec ps))
   6943                    ((_n   ty)  (parse-declarator ps bty)))
   6944        (expect-punct ps 'rparen)
   6945        (cond
   6946          ;; (T){ ... } — compound literal (C99 §6.5.2.5). Looks like a
   6947          ;; cast at the typename level but disambiguates on the
   6948          ;; following `{` and is a postfix lvalue, not a cast operator.
   6949          ((at-punct? ps 'lbrace) (parse-compound-literal ps ty))
   6950          (else
   6951           (parse-unary ps)
   6952           ;; Cast operand undergoes lvalue conversion first (C semantics):
   6953           ;; arrays decay to pointers, lvals become rvals. cg-cast then
   6954           ;; bit-casts the resulting rval to the target type.
   6955           (rval! ps)
   6956           (cg-cast (ps-cg ps) ty)))))
   6957     (else (advance ps) (parse-expr ps)
   6958           (expect-punct ps 'rparen)
   6959           (parse-postfix-rest ps))))
   6960 
   6961 ;; --------------------------------------------------------------------
   6962 ;; Compound literals (C99 §6.5.2.5):  (T){ init-list }
   6963 ;;
   6964 ;; Block scope — allocate a fresh frame slot sized for T, drive the
   6965 ;; existing local-aggregate initializer path against it, then push a
   6966 ;; frame lval typed as T. The literal is an lvalue with automatic
   6967 ;; storage tied to the enclosing block, so &literal, literal.field,
   6968 ;; literal[i], byval pass, and array decay all chain through the
   6969 ;; existing primitives (cg-take-addr / cg-push-field / cg-decay-array
   6970 ;; via rval!).
   6971 ;;
   6972 ;; File scope — handled out-of-band in %const-init-piece (incl. its `&`
   6973 ;; arm) via %emit-fs-compound-literal: pieces go to .data under a fresh
   6974 ;; cc__cl_N label and the enclosing initializer takes a (label-ref . LBL)
   6975 ;; piece. Reaching parse-compound-literal at file scope would mean an
   6976 ;; expression context outside an initializer (which file scope doesn't
   6977 ;; have), so this entry point still rejects it.
   6978 ;; --------------------------------------------------------------------
   6979 (define (parse-compound-literal ps ty)
   6980   (cond
   6981     ((not (ps-fn-ctx ps))
   6982      (die (tok-loc (peek ps)) "compound literal at file scope: unsupported")))
   6983   (let* ((sz (max (ctype-size ty) 1))
   6984          (al (max (ctype-align ty) 1))
   6985          (sl (cg-alloc-slot (ps-cg ps) sz al))
   6986          ;; Synthetic sym: parse-init-local-aggregate only reads
   6987          ;; sym-slot at its top-level entry to seed base-off; the
   6988          ;; recursive helpers thread `sm` along but never read other
   6989          ;; fields. The name is unbound and never enters scope.
   6990          (sm (%sym "__cl" 'var 'auto ty sl #t)))
   6991     (cond
   6992       ((or (eq? (ctype-kind ty) 'arr)
   6993            (eq? (ctype-kind ty) 'struct)
   6994            (eq? (ctype-kind ty) 'union))
   6995        (parse-init-local-aggregate ps sm ty))
   6996       (else
   6997        ;; Scalar (T){expr [,]} — parse-init-local-aggregate's brace arm
   6998        ;; only handles aggregates, so emit the single-element store
   6999        ;; here directly.
   7000        (expect-punct ps 'lbrace)
   7001        (cg-push (ps-cg ps) (%opnd 'frame ty sl #t))
   7002        (parse-expr-bp ps 4) (rval! ps)
   7003        (cg-cast (ps-cg ps) ty)
   7004        (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))
   7005        (cond ((at-punct? ps 'comma) (advance ps)))
   7006        (expect-punct ps 'rbrace)))
   7007     ;; The literal is an lvalue with automatic storage. ctype-size may
   7008     ;; have been resolved by parse-init-local-aggregate (e.g. (int[])
   7009     ;; gets its bound fixed in-place); we re-fetch via the slot's type
   7010     ;; pointer (ty) which the init code mutated.
   7011     (cg-push (ps-cg ps) (%opnd 'frame ty sl #t))
   7012     (parse-postfix-rest ps)))
   7013 
   7014 (define (parse-postfix ps)
   7015   (parse-primary ps) (parse-postfix-rest ps))
   7016 
   7017 (define (parse-postfix-rest ps)
   7018   (let lp ()
   7019     (pmatch (peek ps)
   7020       (($ tok? (kind PUNCT) (value lbrack))
   7021        (advance ps) (rval! ps)
   7022        (parse-expr ps) (rval! ps)
   7023        (expect-punct ps 'rbrack)
   7024        (cg-binop (ps-cg ps) 'add)
   7025        (cg-push-deref (ps-cg ps)) (lp))
   7026       (($ tok? (kind PUNCT) (value lparen))
   7027        (advance ps) (rval-not-fn! ps)
   7028        (let* ((fn-ty   (call-fn-type (ps-cg ps)))
   7029               (n (parse-call-args ps fn-ty))
   7030               ;; has-result? = #f for known void returns. Skips the
   7031               ;; wasted ST a0 → frame-slot spill that cg-call would
   7032               ;; otherwise emit for void calls.
   7033               (has-result?
   7034                (cond
   7035                  ((not fn-ty) #t)
   7036                  ((eq? (ctype-kind (car (ctype-ext fn-ty))) 'void) #f)
   7037                  (else #t))))
   7038          (expect-punct ps 'rparen)
   7039          (cg-call (ps-cg ps) n has-result?)
   7040          ;; Maintain parse's "one rval per expression" invariant so
   7041          ;; comma / parse-expr-stmt / for-init/step pop sites stay
   7042          ;; simple. The placeholder is vstack-only and never
   7043          ;; materialized (cg-pop is a vstack op, no emit).
   7044          (cond ((not has-result?)
   7045                 (cg-push-imm (ps-cg ps) %t-i32 0)))
   7046          (lp)))
   7047       (($ tok? (kind PUNCT) (value dot))
   7048        (advance ps)
   7049        (pmatch (advance ps)
   7050          (($ tok? (kind IDENT) (value ,n))
   7051           (cg-push-field (ps-cg ps) n) (lp))
   7052          (($ tok? (loc ,l)) (die l "expected field name"))))
   7053       (($ tok? (kind PUNCT) (value arrow))
   7054        (advance ps)
   7055        (pmatch (advance ps)
   7056          (($ tok? (kind IDENT) (value ,n))
   7057           ;; ptr -> field: load the pointer to rval, deref to reach
   7058           ;; the struct lval, then push the field.
   7059           (rval! ps)
   7060           (cg-push-deref (ps-cg ps))
   7061           (cg-push-field (ps-cg ps) n) (lp))
   7062          (($ tok? (loc ,l)) (die l "expected field name"))))
   7063       (($ tok? (kind PUNCT) (value inc))
   7064        (advance ps)
   7065        (cg-postinc (ps-cg ps)) (lp))
   7066       (($ tok? (kind PUNCT) (value dec))
   7067        (advance ps)
   7068        (cg-postdec (ps-cg ps)) (lp))
   7069       (else #t))))
   7070 
   7071 ;; call-fn-type cg -> ctype-or-#f
   7072 ;;   The function operand sits at the top of the vstack when
   7073 ;;   parse-call-args runs (just after rval-not-fn!). Its type may be
   7074 ;;   `fn` directly (named callee) or `ptr -> fn` (function pointer).
   7075 ;;   Returns the underlying `fn` ctype, or #f if the operand isn't
   7076 ;;   recognizably callable (callsite still works — no per-arg cast).
   7077 (define (call-fn-type cg)
   7078   (let* ((tp (cg-top cg)))
   7079     (cond
   7080       ((not tp) #f)
   7081       (else
   7082        (let* ((ty (opnd-type tp))
   7083               (k  (ctype-kind ty)))
   7084          (cond
   7085            ((eq? k 'fn) ty)
   7086            ((eq? k 'ptr)
   7087             (let ((pe (ctype-ext ty)))
   7088               (cond ((and pe (eq? (ctype-kind pe) 'fn)) pe)
   7089                     (else #f))))
   7090            (else #f)))))))
   7091 
   7092 ;; param-types-of fn-ty -> (params variadic?)  with a #f fallback.
   7093 (define (call-fn-param-info fn-ty)
   7094   (cond
   7095     ((not fn-ty) (cons '() #f))
   7096     (else
   7097      (let ((ext (ctype-ext fn-ty)))
   7098        (cons (cadr ext) (car (cddr ext)))))))
   7099 
   7100 ;; parse-call-args ps fn-ty -> arg-count
   7101 ;;   Casts each fixed arg to the declared param type (CC.md §K.5).
   7102 ;;   For variadic args (index >= named-arg count, when variadic? = #t)
   7103 ;;   applies cg-promote (CC.md §G.1).
   7104 (define (parse-call-args ps fn-ty)
   7105   (cond
   7106     ((at-punct? ps 'rparen) 0)
   7107     (else
   7108      (let* ((info  (call-fn-param-info fn-ty))
   7109             (params (car info))
   7110             (var?  (cdr info))
   7111             (nfix  (length params)))
   7112        (let lp ((n 0) (rem params))
   7113          (parse-expr-bp ps 4) (rval! ps)
   7114          (cond
   7115            ;; Fixed-arg: cast to declared param type. param entry shape
   7116            ;; is (name . ctype) per cg-fn-begin's contract.
   7117            ((not (null? rem))
   7118             (cg-cast (ps-cg ps) (cdr (car rem))))
   7119            ;; Variadic position (n >= nfix and var? is true): promote.
   7120            (var?
   7121             (cg-promote (ps-cg ps))))
   7122          (let ((m (+ n 1))
   7123                (rest (if (null? rem) '() (cdr rem))))
   7124            (cond ((at-punct? ps 'comma) (advance ps) (lp m rest))
   7125                  (else m))))))))
   7126 
   7127 ;; --------------------------------------------------------------------
   7128 ;; __builtin_va_* (§G.2). va_list / va_start / va_arg / va_end in
   7129 ;; <stdarg.h> alias these. Each is parsed as: name '(' args ')'.
   7130 ;; va_start(ap, last)  — last is parsed and discarded; cg only needs
   7131 ;;   the variadic-first-slot offset, which it already tracks.
   7132 ;; va_arg(ap, T)       — T is a type-name; result rval has that type.
   7133 ;; va_end(ap)          — no-op codegen; just consumes ap.
   7134 ;;
   7135 ;; Pushes a single imm 0 for va_start / va_end so they fit as
   7136 ;; expression statements; va_arg pushes the rval.
   7137 ;; --------------------------------------------------------------------
   7138 (define (parse-builtin-va-start ps)
   7139   (advance ps)                                 ; IDENT
   7140   (expect-punct ps 'lparen)
   7141   (parse-expr-bp ps 4)                         ; ap (must be lval)
   7142   (expect-punct ps 'comma)
   7143   ;; "last" is parsed for syntactic completeness then dropped — cg
   7144   ;; doesn't need it; the variadic-first-slot was determined at
   7145   ;; cg-fn-begin/v time.
   7146   (parse-expr-bp ps 4) (cg-pop (ps-cg ps))
   7147   (expect-punct ps 'rparen)
   7148   (cg-va-start (ps-cg ps))
   7149   ;; Push a placeholder rval so the call expression has a value
   7150   ;; (matches va_start's "void" but our parser expects all
   7151   ;; expressions to leave one rval).
   7152   (cg-push-imm (ps-cg ps) %t-i32 0))
   7153 
   7154 (define (parse-builtin-va-arg ps)
   7155   (advance ps)                                 ; IDENT
   7156   (expect-punct ps 'lparen)
   7157   (parse-expr-bp ps 4)                         ; ap (lval)
   7158   (expect-punct ps 'comma)
   7159   (let*-values (((_sto bty) (parse-decl-spec ps))
   7160                 ((_n   ty)  (parse-declarator ps bty)))
   7161     (expect-punct ps 'rparen)
   7162     (cg-va-arg (ps-cg ps) ty)))
   7163 
   7164 (define (parse-builtin-expect ps)
   7165   ;; GCC `__builtin_expect(EXPR, EXPECTED)` — branch-prediction hint.
   7166   ;; We ignore the hint and emit just the value of EXPR.
   7167   (advance ps)                                  ; IDENT
   7168   (expect-punct ps 'lparen)
   7169   (parse-expr-bp ps 4) (rval! ps)               ; result
   7170   (expect-punct ps 'comma)
   7171   (parse-expr-bp ps 4) (cg-pop (ps-cg ps))      ; expected (drop)
   7172   (expect-punct ps 'rparen))
   7173 
   7174 (define (parse-builtin-va-end ps)
   7175   (advance ps)                                 ; IDENT
   7176   (expect-punct ps 'lparen)
   7177   (parse-expr-bp ps 4)                         ; ap
   7178   (expect-punct ps 'rparen)
   7179   (cg-va-end (ps-cg ps))
   7180   (cg-push-imm (ps-cg ps) %t-i32 0))
   7181 
   7182 (define (parse-primary ps)
   7183   (let ((t (peek ps)))
   7184     (pmatch t
   7185       (($ tok? (kind INT) (value ,n))
   7186        (advance ps)
   7187        ;; C99 §6.4.4.1: pick the smallest type that holds the value.
   7188        ;; The lexer drops u/U/l/L suffixes before we get here, so we can't
   7189        ;; tell `0x1L` from `0x1`. But a value that doesn't fit in int has
   7190        ;; to widen anyway — otherwise `4294967296L + 7L` truncates to 7,
   7191        ;; because cg-arith-conv leaves both operands at i32 width.
   7192        (cg-push-imm (ps-cg ps)
   7193                     (cond ((<= n 2147483647) %t-i32)
   7194                           (else              %t-i64))
   7195                     n))
   7196       (($ tok? (kind CHAR) (value ,c))
   7197        (advance ps)
   7198        ;; C99 §6.4.4.4: an integer character constant has type int.
   7199        (cg-push-imm (ps-cg ps) %t-i32 c))
   7200       (($ tok? (kind STR) (value ,s))
   7201        (advance ps)
   7202        (cg-push-string (ps-cg ps) s))
   7203       (($ tok? (kind IDENT) (value ,n))
   7204        (cond
   7205          ((bv= n "__builtin_va_start") (parse-builtin-va-start ps))
   7206          ((bv= n "__builtin_va_arg")   (parse-builtin-va-arg ps))
   7207          ((bv= n "__builtin_va_end")   (parse-builtin-va-end ps))
   7208          ((bv= n "__builtin_expect")   (parse-builtin-expect ps))
   7209          (else
   7210           (let ((sm (scope-lookup ps n)))
   7211             (advance ps)
   7212             (cond
   7213               ((not sm) (die (tok-loc t) "undecl" n))
   7214               ((eq? (sym-kind sm) 'enum-const)
   7215                (cg-push-imm (ps-cg ps) %t-i32 (sym-slot sm)))
   7216               (else (cg-push-sym (ps-cg ps) sm)))))))
   7217       (($ tok? (kind PUNCT) (value lparen))
   7218        (advance ps) (parse-expr ps) (expect-punct ps 'rparen))
   7219       (else (die (tok-loc t) "unexp" (tok-value t))))))
   7220 
   7221 (define (rval! ps)
   7222   (let ((tp (cg-top (ps-cg ps))))
   7223     (cond ((and tp (opnd? tp) (opnd-lval? tp))
   7224            (cg-load (ps-cg ps)))
   7225           (else #t))))
   7226 
   7227 (define (rval-not-fn! ps)
   7228   (let ((tp (cg-top (ps-cg ps))))
   7229     (cond ((and tp (opnd? tp) (opnd-lval? tp)
   7230                 (not (ctype-is-fn? (opnd-type tp))))
   7231            (cg-load (ps-cg ps)))
   7232           (else #t))))
   7233 ;; cc/main.scm — driver. Argv, file I/O, ties phases together.
   7234 
   7235 ;; --------------------------------------------------------------------
   7236 ;; CLI:   cc [--cc-debug] [--cc-trace-emit] [--lib=PFX]
   7237 ;;           <input.c> <output.P1pp>
   7238 ;;
   7239 ;; scheme1 passes (argv) as a list of bvs; argv[0] is "scheme1", argv[1]
   7240 ;; is the catm'd compiler source path, argv[2..] are the user-facing
   7241 ;; positional args. cc-main strips the first two.
   7242 ;; --------------------------------------------------------------------
   7243 
   7244 (define (%cc-slurp path)
   7245   (let ((r (open-input path)))
   7246     (cond ((not (car r))
   7247            (die #f "cannot open input" path)))
   7248     (let* ((p (cdr r))
   7249            (rd (read-all p)))
   7250       (close p)
   7251       (cond ((not (car rd)) (die #f "read failed" path)))
   7252       (cdr rd))))
   7253 
   7254 (define (%cc-write path bv)
   7255   (let ((r (open-output path)))
   7256     (cond ((not (car r))
   7257            (die #f "cannot open output" path)))
   7258     (let ((p (cdr r)))
   7259       (write-bv-fd (port-fd p) bv)
   7260       (close p)
   7261       0)))
   7262 
   7263 ;; CC_DEBUG=1 in the env doesn't fly here (no getenv); instead, scan
   7264 ;; argv for a sentinel "--cc-debug" flag. When present, debug-log
   7265 ;; prints heap usage between phases to fd 2.
   7266 (define (%cc-flag? args flag)
   7267   (cond ((null? args) #f)
   7268         ((bv= (car args) flag) #t)
   7269         (else (%cc-flag? (cdr args) flag))))
   7270 
   7271 (define (%cc-strip-flag args flag)
   7272   (cond ((null? args) '())
   7273         ((bv= (car args) flag) (cdr args))
   7274         (else (cons (car args) (%cc-strip-flag (cdr args) flag)))))
   7275 
   7276 ;; --lib=PFX selects library-mode codegen: cc.scm skips the p1_main
   7277 ;; entry stub and trailing :ELF_end (the catm chain supplies them
   7278 ;; from P1/entry-*.P1pp + P1/elf-end.P1pp once), and namespaces
   7279 ;; anonymous string labels as PFX+"cc__str_N" so two cc.scm outputs
   7280 ;; in the same link don't collide on cc__str_0..N. Returns
   7281 ;; (values prefix-bv rest-args). PREFIX = "" means exec mode (flag
   7282 ;; absent). PREFIX = "" with the flag present is rejected — silently
   7283 ;; falling back to exec mode would mask a typo'd Makefile rule.
   7284 (define (%cc-take-lib args)
   7285   (let loop ((acc '()) (rest args) (pfx #f))
   7286     (cond
   7287       ((null? rest)
   7288        (values (cond (pfx pfx) (else "")) (reverse acc)))
   7289       ((bv-prefix? "--lib=" (car rest))
   7290        (cond (pfx (die #f "cc: --lib= specified twice")))
   7291        (let* ((arg (car rest))
   7292               (p   (bv-slice arg 6 (bytevector-length arg))))
   7293          (cond ((= 0 (bytevector-length p))
   7294                 (die #f "cc: --lib= requires a non-empty PREFIX")))
   7295          (loop acc (cdr rest) p)))
   7296       (else
   7297        (loop (cons (car rest) acc) (cdr rest) pfx)))))
   7298 
   7299 ;; Predefined macros visible to every translation unit. CCSCM lets
   7300 ;; tests/headers branch on "compiled by cc.scm" — e.g. skip <stdarg.h>
   7301 ;; and use the __builtin_va_* primitives directly.
   7302 (define %cc-initial-defines
   7303   (list (cons "CCSCM" (%macro 'obj '() '()))))
   7304 
   7305 (define (cc-main av)
   7306   (let* ((raw  (cdr (cdr av)))
   7307          (dbg  (%cc-flag? raw "--cc-debug"))
   7308          (a1   (%cc-strip-flag raw "--cc-debug"))
   7309          (tr   (%cc-flag? a1 "--cc-trace-emit"))
   7310          (a2   (%cc-strip-flag a1 "--cc-trace-emit")))
   7311     (cond (dbg (debug-log-on!)))
   7312     (cond (tr  (trace-emit-on!)))
   7313     (let-values (((lib-prefix args) (%cc-take-lib a2)))
   7314       (cond
   7315         ((or (null? args) (null? (cdr args)))
   7316          (die #f "usage: cc [--cc-debug] [--cc-trace-emit] [--lib=PFX] <input.c> <output.P1pp>")))
   7317       (let* ((in-path  (car args))
   7318              (out-path (car (cdr args)))
   7319              (lib?     (cond ((= 0 (bytevector-length lib-prefix)) #f)
   7320                              (else #t))))
   7321         (debug-log "phase=start" "heap" (heap-usage))
   7322         ;; Streaming pipeline: lex → pp → parser → cg, all concurrent.
   7323         ;; Each stage pulls one tok at a time from upstream. Steady-state
   7324         ;; live data is bounded by parser/pp state, not source length.
   7325         (let* ((src      (%cc-slurp in-path))
   7326                (_1       (debug-log "phase=slurp" "heap" (heap-usage)
   7327                                     "src-bytes" (bytevector-length src)))
   7328                (lex-iter (make-lex-iter src in-path))
   7329                (pp-iter  (make-pp-iter lex-iter %cc-initial-defines))
   7330                (cg       (cg-init/v lib? lib-prefix))
   7331                (ps       (make-pstate pp-iter cg)))
   7332           (parse-translation-unit ps)
   7333           (debug-log "phase=parse" "heap" (heap-usage))
   7334           (let ((out (cg-finish cg)))
   7335             (debug-log "phase=cg-finish" "heap" (heap-usage)
   7336                        "out-bytes" (bytevector-length out))
   7337             (%cc-write out-path out))
   7338           0)))))