cc.scm (313483B)
1 ;; cc/util.scm — leaf helpers. Depends only on the scheme1 prelude. 2 3 ;; -------------------------------------------------------------------- 4 ;; bytevector helpers (scheme1 strings ARE bytevectors) 5 ;; -------------------------------------------------------------------- 6 (define (bv= a b) (bytevector=? a b)) 7 8 (define (bv-prefix? p s) 9 ;; Is s a bv that starts with the bytes of p? 10 (let ((plen (bytevector-length p)) 11 (slen (bytevector-length s))) 12 (if (< slen plen) 13 #f 14 (let loop ((i 0)) 15 (cond ((= i plen) #t) 16 ((= (bytevector-u8-ref p i) (bytevector-u8-ref s i)) 17 (loop (+ i 1))) 18 (else #f)))))) 19 20 (define (bv-find bv byte from) 21 ;; Linear scan for the first byte == `byte` at index >= from. 22 ;; Returns the index, or #f if not found. 23 (let ((n (bytevector-length bv))) 24 (let loop ((i from)) 25 (cond ((>= i n) #f) 26 ((= (bytevector-u8-ref bv i) byte) i) 27 (else (loop (+ i 1))))))) 28 29 (define (bv-slice bv start end) 30 ;; Fresh copy of bytes in [start, end). bytevector-copy already does 31 ;; this in scheme1 (3-arg form returns a fresh bv). 32 (bytevector-copy bv start end)) 33 34 (define (bv-of-byte b) (make-bytevector 1 b)) 35 36 (define (bv-cat lst-of-bv) 37 ;; Concat a list of bytevectors with one allocation. bytevector-append 38 ;; is variadic, so apply does this in a single linear pass. 39 (apply bytevector-append lst-of-bv)) 40 41 (define (bv->fixnum bv radix) 42 ;; (values ok? val) — #t/n on parse, #f/#f on fail. 43 ;; string->number is pure and returns #f on parse failure. 44 (let ((n (string->number bv radix))) 45 (if n (values #t n) (values #f #f)))) 46 47 (define (fixnum->bv n radix) (number->string n radix)) 48 49 ;; -------------------------------------------------------------------- 50 ;; lists / alists 51 ;; -------------------------------------------------------------------- 52 (define (alist-ref key al) (let ((p (assoc key al))) (and p (cdr p)))) 53 (define (alist-ref/eq key al) (let ((p (assq key al))) (and p (cdr p)))) 54 55 (define (alist-set key val al) (cons (cons key val) al)) 56 57 (define (alist-update key f al) 58 ;; Functional update by equal? key. If found, replace its value with 59 ;; (f old-val). If not found, prepend (cons key (f #f)) so callers 60 ;; can use this as upsert-with-default. 61 (let loop ((xs al) (acc '())) 62 (cond ((null? xs) 63 (cons (cons key (f #f)) (reverse acc))) 64 ((equal? (car (car xs)) key) 65 (append (reverse acc) 66 (cons (cons key (f (cdr (car xs)))) 67 (cdr xs)))) 68 (else (loop (cdr xs) (cons (car xs) acc)))))) 69 70 (define (any p xs) 71 (cond ((null? xs) #f) 72 ((p (car xs)) #t) 73 (else (any p (cdr xs))))) 74 75 (define (every p xs) 76 (cond ((null? xs) #t) 77 ((p (car xs)) (every p (cdr xs))) 78 (else #f))) 79 80 (define (count p xs) 81 (let loop ((xs xs) (n 0)) 82 (cond ((null? xs) n) 83 ((p (car xs)) (loop (cdr xs) (+ n 1))) 84 (else (loop (cdr xs) n))))) 85 86 ;; -------------------------------------------------------------------- 87 ;; ints 88 ;; -------------------------------------------------------------------- 89 (define (min3 a b c) (min a (min b c))) 90 (define (align-up n k) 91 ;; round n up to the nearest multiple of k (k must be a power of 2) 92 (let ((mask (- k 1))) 93 (bit-and (+ n mask) (bit-not mask)))) 94 95 ;; -------------------------------------------------------------------- 96 ;; output buffer (fixed-size pre-allocated byte storage) 97 ;; 98 ;; Every buf owns one bytevector of `cap` bytes, plus a write `offset`. 99 ;; buf-push! is bytevector-copy! into storage — zero allocation per 100 ;; push, no chunks list to chase. This is what makes per-function 101 ;; heap-mark/heap-rewind! safe in cg: the destination buf is fixed- 102 ;; storage (allocated once, lives pre-mark), so byte-level mutations 103 ;; survive a rewind that discards the parse/cg scratch. 104 ;; 105 ;; Sizing knobs live in one place so they're easy to tune as inputs 106 ;; grow. cg-init picks per-buf caps; the per-fn bufs are reused 107 ;; across functions (reset, not re-allocated). 108 ;; -------------------------------------------------------------------- 109 110 ;; Tuning constants — total fixed pre-allocation ≈ 12.27 MiB on a 111 ;; 64 MiB heap. Bump these when a workload overflows; the buf-overflow 112 ;; die() reports off/len/cap so misses are easy to diagnose. 113 ;; 114 ;; Each cap is a power of two. scheme1's bv_capacity_for rounds the 115 ;; requested length up to the smallest power of two ≥ n, so asking for 116 ;; 2^k bytes consumes exactly 2^k of heap. 117 (define %BUF-CAP-TEXT 8388608) ; 8 MiB: .text + entry stub 118 (define %BUF-CAP-DATA 2097152) ; 2 MiB: .data (strings, globals) 119 (define %BUF-CAP-BSS 2097152) ; 2 MiB: .bss 120 (define %BUF-CAP-FN 262144) ; 256 KiB: per-fn body asm 121 (define %BUF-CAP-PROLOGUE 16384) ; 16 KiB: per-fn prologue 122 (define %BUF-CAP-DEFAULT 65536) ; 64 KiB: make-buf fallback 123 124 (define-record-type buf 125 (%buf storage offset cap) 126 buf? 127 (storage buf-storage) ; bv: pre-allocated, never resized 128 (offset buf-offset buf-offset-set!) ; fixnum: bytes written so far 129 (cap buf-cap)) ; fixnum: storage capacity 130 131 (define (make-buf/cap cap) 132 (%buf (make-bytevector cap 0) 0 cap)) 133 134 (define (make-buf) (make-buf/cap %BUF-CAP-DEFAULT)) 135 136 (define (buf-push! b bv) 137 (let* ((n (bytevector-length bv)) 138 (off (buf-offset b)) 139 (newoff (+ off n))) 140 (cond 141 ((> newoff (buf-cap b)) 142 (die #f "buf overflow" off n (buf-cap b)))) 143 (bytevector-copy! (buf-storage b) off bv 0 n) 144 (buf-offset-set! b newoff))) 145 146 (define (buf-flush b) 147 ;; Snapshot the used prefix as a fresh bv. One allocation; the 148 ;; underlying storage is unchanged. 149 (bytevector-copy (buf-storage b) 0 (buf-offset b))) 150 151 (define (buf-reset! b) (buf-offset-set! b 0)) 152 153 (define (buf-drain! dst src) 154 ;; Copy src's used bytes into dst at dst's current write head; reset 155 ;; src to empty. dst and src must be distinct bufs. 156 (let* ((slen (buf-offset src)) 157 (doff (buf-offset dst)) 158 (newoff (+ doff slen))) 159 (cond 160 ((> newoff (buf-cap dst)) 161 (die #f "buf-drain overflow" doff slen (buf-cap dst)))) 162 (bytevector-copy! (buf-storage dst) doff (buf-storage src) 0 slen) 163 (buf-offset-set! dst newoff) 164 (buf-offset-set! src 0))) 165 166 ;; -------------------------------------------------------------------- 167 ;; diagnostics + I/O 168 ;; -------------------------------------------------------------------- 169 (define (die loc msg . irritants) 170 ;; Format: 171 ;; <file>:<line>:<col>: error: <msg>: <irritant> <irritant> ... 172 ;; When loc is #f, the "<file>:<line>:<col>: " prefix is omitted. 173 ;; irritants are written via display semantics (no quoting); format's 174 ;; ~a handles bv/fixnum/pair/symbol the same way display does. 175 ;; 176 ;; All output is built into a single bv and sent to fd 2 with one 177 ;; sys-write loop, so a partial write doesn't interleave fragments 178 ;; from a concurrent process. 179 (let* ((prefix (if loc 180 (format "~a:~d:~d: error: " 181 (loc-file loc) (loc-line loc) (loc-col loc)) 182 "error: ")) 183 (head (bytevector-append prefix (format "~a" msg))) 184 ;; Irritants get ": " before the first and " " between the rest. 185 (tail (if (null? irritants) 186 (list NL-BV) 187 (let walk ((xs irritants) (sep ": ") (acc '())) 188 (if (null? xs) 189 (reverse (cons NL-BV acc)) 190 (walk (cdr xs) 191 " " 192 (cons (format "~a" (car xs)) 193 (cons sep acc))))))) 194 (out (bv-cat (cons head tail)))) 195 (write-bv-fd 2 out) 196 (sys-exit 1))) 197 198 (define (slurp-fd fd) 199 ;; Read fd to EOF. Uses BUFSIZE chunks (same constant the prelude's 200 ;; port layer uses); bv-concat-reverse builds the result in one 201 ;; allocation so a multi-MB tcc.c stays linear. 202 (let ((buf (make-bytevector BUFSIZE))) 203 (let loop ((acc '())) 204 (let ((r (sys-read fd buf 0 BUFSIZE))) 205 (cond ((not (car r)) 206 (die #f "slurp-fd: sys-read failed" (cdr r))) 207 ((zero? (cdr r)) 208 (bv-concat-reverse acc)) 209 (else 210 (loop (cons (bytevector-copy buf 0 (cdr r)) acc)))))))) 211 212 (define (write-bv-fd fd bv) 213 ;; Full write or die. sys-write may write fewer bytes than requested; 214 ;; advance the offset and retry the unwritten tail. 215 ;; 216 ;; On failure we sys-exit directly instead of routing through `die` 217 ;; — `die` itself uses write-bv-fd, so a write failure to fd 2 must 218 ;; not recurse infinitely. Status 1 matches the contract for `die`. 219 (let ((len (bytevector-length bv))) 220 (let loop ((off 0)) 221 (if (= off len) 222 #t 223 (let ((r (sys-write fd bv off (- len off)))) 224 (cond ((not (car r)) (sys-exit 1)) 225 ((zero? (cdr r)) (sys-exit 1)) 226 (else (loop (+ off (cdr r)))))))))) 227 228 ;; -------------------------------------------------------------------- 229 ;; debug logging 230 ;; 231 ;; Cheap sticky on/off: the cc compiler is single-threaded and short- 232 ;; lived, so a top-level mutable flag is fine. Toggle via 233 ;; (debug-log-on!) / (debug-log-off!). When on, (debug-log msg . irr) 234 ;; writes one line to fd 2 in the same display-style format as `die`, 235 ;; but doesn't abort. The intent is to trace heap usage between cc 236 ;; phases (lex/pp/parse/cg-finish) without compile-time conditionals. 237 ;; -------------------------------------------------------------------- 238 (define %debug-log-enabled #f) 239 (define (debug-log-on!) (set! %debug-log-enabled #t)) 240 (define (debug-log-off!) (set! %debug-log-enabled #f)) 241 (define (debug-log? ) %debug-log-enabled) 242 243 ;; --cc-trace-emit: if on, cg-fn-end injects a `%trace(MANGLED)` line 244 ;; at the top of each emitted function body (right after the prologue's 245 ;; argument-spill, so the macro is free to clobber a0..a3). Pairs with 246 ;; libp1pp's %trace macro + libp1pp__trace runtime helper to produce a 247 ;; stderr line per function entry, with the runtime address of the 248 ;; first body instruction. See P1/P1pp.P1pp's "Tracepoint" section. 249 (define %trace-emit-enabled #f) 250 (define (trace-emit-on!) (set! %trace-emit-enabled #t)) 251 (define (trace-emit-off!) (set! %trace-emit-enabled #f)) 252 (define (trace-emit?) %trace-emit-enabled) 253 254 (define (debug-log msg . irritants) 255 (cond 256 (%debug-log-enabled 257 (let* ((head (bytevector-append "[cc] " (format "~a" msg))) 258 (tail (if (null? irritants) 259 (list NL-BV) 260 (let walk ((xs irritants) (sep ": ") (acc '())) 261 (if (null? xs) 262 (reverse (cons NL-BV acc)) 263 (walk (cdr xs) 264 " " 265 (cons (format "~a" (car xs)) 266 (cons sep acc))))))) 267 (out (bv-cat (cons head tail)))) 268 (write-bv-fd 2 out))) 269 (else #t))) 270 271 ;; -------------------------------------------------------------------- 272 ;; fresh-name generator (used for cg label counters, etc.) 273 ;; -------------------------------------------------------------------- 274 (define (make-namer prefix) 275 ;; Returns a thunk; each call yields prefix0, prefix1, ... as a fresh 276 ;; bv. The counter lives in the closure's lexical environment; scheme1 277 ;; closures heap-capture by reference, so set! on ctr is sticky. 278 (let ((ctr 0)) 279 (lambda () 280 (let ((s (bytevector-append prefix (number->string ctr 10)))) 281 (set! ctr (+ ctr 1)) 282 s)))) 283 ;; cc/data.scm — record types and symbol alphabets shared across modules. 284 285 ;; -------------------------------------------------------------------- 286 ;; loc — source location for diagnostics 287 ;; -------------------------------------------------------------------- 288 (define-record-type loc 289 (%loc file line col) 290 loc? 291 (file loc-file) ; bv 292 (line loc-line) ; fixnum 293 (col loc-col)) ; fixnum 294 295 ;; -------------------------------------------------------------------- 296 ;; tok — lexer token. 297 ;; -------------------------------------------------------------------- 298 (define-record-type tok 299 (%tok kind value loc hide) 300 tok? 301 (kind tok-kind) ; IDENT | INT | STR | CHAR | KW | PUNCT 302 ; | NL | HASH | EOF 303 (value tok-value) ; bv | fixnum | symbol | #f 304 (loc tok-loc) ; loc 305 (hide tok-hide)) ; list of bv (macro names already expanded) 306 307 (define (make-tok kind value loc) 308 (%tok kind value loc '())) 309 310 ;; -------------------------------------------------------------------- 311 ;; macro — preprocessor macro definition 312 ;; -------------------------------------------------------------------- 313 (define-record-type macro 314 (%macro kind params body) 315 macro? 316 (kind macro-kind) ; 'obj | 'fn | 'fn-vararg 317 (params macro-params) ; list of bv 318 (body macro-body)) ; list of tok 319 320 ;; -------------------------------------------------------------------- 321 ;; ctype — C type. 322 ;; 323 ;; size/align/ext mutate only on forward struct/union completion (see 324 ;; complete-agg!). Every other ctype is constructed in its final shape 325 ;; and treated as immutable thereafter. 326 ;; -------------------------------------------------------------------- 327 (define-record-type ctype 328 (%ctype kind size align ext) 329 ctype? 330 (kind ctype-kind) 331 (size ctype-size ctype-size-set!) 332 (align ctype-align ctype-align-set!) 333 (ext ctype-ext ctype-ext-set!)) 334 335 ;; Interned primitive ctypes. Equality is eq?. 336 (define %t-void (%ctype 'void -1 -1 #f)) 337 (define %t-i8 (%ctype 'i8 1 1 #f)) 338 (define %t-u8 (%ctype 'u8 1 1 #f)) 339 (define %t-i16 (%ctype 'i16 2 2 #f)) 340 (define %t-u16 (%ctype 'u16 2 2 #f)) 341 (define %t-i32 (%ctype 'i32 4 4 #f)) 342 (define %t-u32 (%ctype 'u32 4 4 #f)) 343 (define %t-i64 (%ctype 'i64 8 8 #f)) 344 (define %t-u64 (%ctype 'u64 8 8 #f)) 345 (define %t-bool (%ctype 'bool 1 1 #f)) 346 ;; Floating-point ctypes are parsed but never codegen'd; see CC.md §Cut. 347 ;; Sizes/aligns match the SysV ABI so struct layout containing fp fields 348 ;; works even when the cg refuses to emit fp ops. 349 (define %t-flt (%ctype 'flt 4 4 #f)) 350 (define %t-dbl (%ctype 'dbl 8 8 #f)) 351 (define %t-ldbl (%ctype 'ldbl 8 8 #f)) 352 353 ;; -------------------------------------------------------------------- 354 ;; sym — declared identifier (function, variable, typedef, …) 355 ;; defined? distinguishes a forward declaration (extern fn proto, extern 356 ;; var) from a definition (fn body, var with initializer, tentative def 357 ;; without `extern`). scope-bind! merges compatible decls; only two 358 ;; defined? syms with the same name fire a redefinition error. 359 ;; 360 ;; sym is immutable — no `sym-*-set!` accessor exists. scope-bind!'s 361 ;; merge logic constructs a fresh sym rather than mutating in place. 362 ;; Promotion (Phase 3 of CC-SCRATCH) relies on this: a deep-copied 363 ;; sym in main heap is guaranteed structurally identical to its 364 ;; scratch original. 365 ;; -------------------------------------------------------------------- 366 (define-record-type sym 367 (%sym name kind storage type slot defined?) 368 sym? 369 (name sym-name) ; bv 370 (kind sym-kind) ; symbol from §1.7 371 (storage sym-storage) ; symbol from §1.8 or #f 372 (type sym-type) ; ctype 373 (slot sym-slot) ; fixnum (auto local / param / enum-const value) 374 ; | #f (fn / global var / typedef) 375 (defined? sym-defined?)) ; #t = definition, #f = decl-only 376 377 ;; -------------------------------------------------------------------- 378 ;; opnd — operand on cg's vstack. 379 ;; -------------------------------------------------------------------- 380 (define-record-type opnd 381 (%opnd kind type ext lval?) 382 opnd? 383 (kind opnd-kind) 384 (type opnd-type) 385 (ext opnd-ext) 386 (lval? opnd-lval?)) 387 388 ;; -------------------------------------------------------------------- 389 ;; loop-ctx — entry on parser's loop/switch context stack. 390 ;; -------------------------------------------------------------------- 391 (define-record-type loop-ctx 392 (%loop-ctx kind tag has-continue?) 393 loop-ctx? 394 (kind loop-ctx-kind) 395 (tag loop-ctx-tag) 396 (has-continue? loop-ctx-has-continue?)) 397 398 ;; -------------------------------------------------------------------- 399 ;; fn-ctx — current-function context inside the parser. 400 ;; -------------------------------------------------------------------- 401 (define-record-type fn-ctx 402 (%fn-ctx name return-type params variadic? labels) 403 fn-ctx? 404 (name fn-ctx-name) 405 (return-type fn-ctx-return-type) 406 (params fn-ctx-params) 407 (variadic? fn-ctx-variadic?) 408 (labels fn-ctx-labels fn-ctx-labels-set!)) 409 410 ;; -------------------------------------------------------------------- 411 ;; world — cross-decl persistent parser/cg state. The same world record 412 ;; is shared by pstate and cg so its slots — scope (var/typedef 413 ;; bindings), tags (struct/union/enum tags), str-pool (interned string 414 ;; literals), tentatives (file-scope tentative defs awaiting end-of-TU 415 ;; BSS emission) — can be reasoned about as one boundary contract. 416 ;; Phase 3's promote walkers deep-copy from this single root. 417 ;; -------------------------------------------------------------------- 418 (define-record-type world 419 (%world scope tags str-pool tentatives) 420 world? 421 (scope world-scope world-scope-set!) 422 (tags world-tags world-tags-set!) 423 (str-pool world-str-pool world-str-pool-set!) 424 (tentatives world-tentatives world-tentatives-set!)) 425 426 (define (make-world) 427 (%world (list '()) (list '()) '() '())) 428 429 ;; -------------------------------------------------------------------- 430 ;; pstate — parser state. Owned by parse.scm; read-only to cg. 431 ;; -------------------------------------------------------------------- 432 ;; iter holds a tok-iter (typically a pp-iter chained over a lex-iter). 433 ;; peek / peek2 / advance go through iter-peek / iter-peek2 / iter-next 434 ;; so the parser pulls one token at a time, with no full materialized 435 ;; token list. 436 (define-record-type pstate 437 (%pstate iter world loops fn-ctx cg) 438 pstate? 439 (iter ps-iter ps-iter-set!) 440 (world ps-world) 441 (loops ps-loops ps-loops-set!) 442 (fn-ctx ps-fn-ctx ps-fn-ctx-set!) 443 (cg ps-cg)) 444 445 (define (ps-scope ps) (world-scope (ps-world ps))) 446 (define (ps-scope-set! ps v) (world-scope-set! (ps-world ps) v)) 447 (define (ps-tags ps) (world-tags (ps-world ps))) 448 (define (ps-tags-set! ps v) (world-tags-set! (ps-world ps) v)) 449 450 ;; -------------------------------------------------------------------- 451 ;; cg — codegen state. Owned by cg.scm. 452 ;; -------------------------------------------------------------------- 453 ;; fn-buf and prologue-buf are pre-allocated (cg-init) and reused across 454 ;; functions — cg-fn-begin/v calls buf-reset! on them, cg-fn-end drains 455 ;; them into cg-text via buf-drain!. No per-fn allocation, which lets 456 ;; the parse-decl-or-fn boundary (Phase 3, scratch heap) discard 457 ;; everything the body allocated wholesale — fixed-storage byte writes 458 ;; survive scratch reset because the buf storage was allocated in main. 459 ;; 460 ;; in-fn? discriminates "currently inside a function body" so 461 ;; %cg-emit-buf can route emits to fn-buf during the body and cg-text 462 ;; outside it (entry stub, etc.). 463 ;; 464 ;; cg-fn-meta: transient per-function state (fn-name, ret-slot, ret-type, 465 ;; vararg-first-slot, indirect-slots, switch-case lists, ...). Reset on 466 ;; cg-fn-begin/v; reads via %cg-fn-get / writes via %cg-fn-set!. 467 ;; lib? / str-prefix encode the --lib=PFX flag from cc-main: 468 ;; #f / "" — exec mode (default): cg-finish emits the 469 ;; p1_main entry stub and trailing :ELF_end, and 470 ;; cg-intern-string labels strings cc__str_N. 471 ;; #t / "<pfx>" — library mode: skip the stub and :ELF_end so the 472 ;; output catm's into a larger TU, and label strings 473 ;; <pfx>cc__str_N so two cc.scm outputs in the same 474 ;; link don't collide on cc__str_0..N. 475 (define-record-type cg 476 (%cg text data bss vstack frame-hi label-ctr world fn-meta fn-buf prologue-buf max-outgoing in-fn? lib? str-prefix) 477 cg? 478 (text cg-text) 479 (data cg-data) 480 (bss cg-bss) 481 (vstack cg-vstack cg-vstack-set!) 482 (frame-hi cg-frame-hi cg-frame-hi-set!) 483 (label-ctr cg-label-ctr cg-label-ctr-set!) 484 (world cg-world) 485 (fn-meta cg-fn-meta cg-fn-meta-set!) 486 (fn-buf cg-fn-buf) 487 (prologue-buf cg-prologue-buf) 488 (max-outgoing cg-max-outgoing cg-max-outgoing-set!) 489 (in-fn? cg-in-fn? cg-in-fn?-set!) 490 (lib? cg-lib?) 491 (str-prefix cg-str-prefix)) 492 493 (define (cg-str-pool cg) (world-str-pool (cg-world cg))) 494 (define (cg-str-pool-set! cg v) (world-str-pool-set! (cg-world cg) v)) 495 496 ;; ctype predicates used by both cg and parser. 497 (define (%ctype-ptr? t) 498 (let ((k (ctype-kind t))) 499 (if (eq? k 'ptr) #t (eq? k 'arr)))) 500 501 (define (%ctype-pointee t) 502 (cond ((eq? (ctype-kind t) 'ptr) (ctype-ext t)) 503 ((eq? (ctype-kind t) 'arr) (car (ctype-ext t))) 504 (else #f))) 505 506 (define (%ctype-unsigned? t) 507 (let ((k (ctype-kind t))) 508 (cond ((eq? k 'u8) #t) ((eq? k 'u16) #t) ((eq? k 'u32) #t) 509 ((eq? k 'u64) #t) ((eq? k 'bool) #t) 510 ((eq? k 'ptr) #t) ((eq? k 'arr) #t) ((eq? k 'fn) #t) 511 (else #f)))) 512 513 (define (%ctype-arith? t) 514 (let ((k (ctype-kind t))) 515 (cond ((eq? k 'i8) #t) ((eq? k 'i16) #t) ((eq? k 'i32) #t) 516 ((eq? k 'i64) #t) ((eq? k 'u8) #t) ((eq? k 'u16) #t) 517 ((eq? k 'u32) #t) ((eq? k 'u64) #t) ((eq? k 'bool) #t) 518 (else #f)))) 519 520 (define (%ctype-fp? t) 521 (let ((k (ctype-kind t))) 522 (cond ((eq? k 'flt) #t) ((eq? k 'dbl) #t) ((eq? k 'ldbl) #t) 523 (else #f)))) 524 525 ;; -------------------------------------------------------------------- 526 ;; Symbol alphabets — canonical alists. 527 ;; -------------------------------------------------------------------- 528 529 ;; Keyword bytevector → keyword symbol. 530 (define %keyword-alist 531 '(;; storage 532 ("auto" . auto) ("register" . register) ("static" . static) 533 ("extern" . extern) ("typedef" . typedef) 534 ;; qualifiers (parsed and discarded by parse) 535 ("const" . const) ("volatile" . volatile) ("restrict" . restrict) 536 ("inline" . inline) 537 ;; GNU attribute spec — parsed and discarded; see skip-gnu-attribute! 538 ("__attribute__" . __attribute__) 539 ;; type specifiers 540 ("void" . void) ("char" . char) ("short" . short) 541 ("int" . int) ("long" . long) 542 ("signed" . signed) ("unsigned" . unsigned) ("_Bool" . _Bool) 543 ;; rejected type specifiers (KW so diagnostics are crisp) 544 ("float" . float) ("double" . double) 545 ;; aggregates 546 ("struct" . struct) ("union" . union) ("enum" . enum) 547 ;; statements 548 ("if" . if) ("else" . else) 549 ("while" . while) ("do" . do) ("for" . for) 550 ("switch" . switch) ("case" . case) ("default" . default) 551 ("break" . break) ("continue" . continue) 552 ("return" . return) ("goto" . goto) 553 ;; operators 554 ("sizeof" . sizeof) 555 ;; reserved-and-rejected (KW so diagnostics are crisp) 556 ("_Generic" . _Generic) ("_Atomic" . _Atomic) 557 ("_Thread_local" . _Thread_local) 558 ("_Alignof" . _Alignof) ("_Alignas" . _Alignas) 559 ("_Static_assert" . _Static_assert) 560 ("_Complex" . _Complex) ("_Imaginary" . _Imaginary))) 561 562 ;; Punctuator bytevector → punct symbol. 563 ;; Listed longest-match-first; the lexer scans this list in order. 564 ;; Digraphs (<: :> <% %> %: %:%:) lex to their standard equivalents. 565 (define %punct-alist 566 '(;; 4-byte 567 ("%:%:" . paste) 568 ;; 3-byte 569 ("..." . ellipsis) ("<<=" . shl-eq) (">>=" . shr-eq) 570 ;; 2-byte 571 ("##" . paste) ("->" . arrow) 572 ("++" . inc) ("--" . dec) 573 ("<<" . shl) (">>" . shr) 574 ("<=" . le) (">=" . ge) ("==" . eq2) ("!=" . ne) 575 ("&&" . land) ("||" . lor) 576 ("+=" . plus-eq) ("-=" . minus-eq) ("*=" . star-eq) 577 ("/=" . slash-eq) ("%=" . pct-eq) 578 ("&=" . amp-eq) ("^=" . caret-eq) ("|=" . bar-eq) 579 ;; digraphs (mapped to the standard equivalent symbol) 580 ("<:" . lbrack) (":>" . rbrack) 581 ("<%" . lbrace) ("%>" . rbrace) ("%:" . hash) 582 ;; 1-byte 583 ("[" . lbrack) ("]" . rbrack) 584 ("(" . lparen) (")" . rparen) 585 ("{" . lbrace) ("}" . rbrace) 586 ("." . dot) ("," . comma) (";" . semi) (":" . colon) ("?" . qmark) 587 ("+" . plus) ("-" . minus) ("*" . star) ("/" . slash) ("%" . pct) 588 ("&" . amp) ("|" . bar) ("^" . caret) ("~" . tilde) ("!" . bang) 589 ("<" . lt) (">" . gt) ("=" . assign) 590 ("#" . hash))) 591 ;; cc/lex.scm — bytestream → token list. Pure function; no I/O, 592 ;; no macro awareness. 593 ;; 594 ;; The lexer walks `src` byte-by-byte, threading (pos, line, col) 595 ;; explicitly through every helper (no mutable state). Each token 596 ;; captures its starting loc; helpers return (tok npos nline ncol). 597 ;; Trigraphs and `\<newline>` line splicing are handled via a single 598 ;; logical-byte primitive `%lex-peek`: it advances over splices and 599 ;; translates trigraphs in-place, so downstream code only ever sees 600 ;; the "translation phase 2" stream. Comments are stripped at the 601 ;; same level as whitespace. NL tokens are emitted at every physical 602 ;; newline so pp can use them to terminate directives. 603 ;; 604 ;; Heap discipline (per tests/scheme1/093-heap-mark-rewind.scm): 605 ;; token-producing helpers wrap their inner work in call-with-heap- 606 ;; rewind. Slots that must survive the rewind (start-loc and the 607 ;; integer holders for npos/nline/ncol) are bound by an outer let 608 ;; *before* the call-with-heap-rewind invocation, so the let's env 609 ;; extensions live below the mark. The byte-run scanners' tail-call 610 ;; env frames and any %lex-peek 4-lists are above the mark and get 611 ;; reclaimed. For helpers that produce a fresh bytevector (ident, 612 ;; string), the bv is allocated between the two calls so it persists 613 ;; into the parent arena. Numeric digit runs accumulate inline via 614 ;; %accum-int-while. 615 ;; 616 ;; %lex-iter-pull wraps each token-emitting iteration in an outer 617 ;; call-with-heap-rewind. The helper allocates its own tok+loc+bv 618 ;; above this outer mark; the driver reads the scalar fields and 619 ;; copies any bv contents into %lex-scratch (sticky, pre-mark) before 620 ;; the wrapper rewinds. Post-rewind it rebuilds a fresh tok+loc and a 621 ;; fresh bv (for IDENT/STR) sized to the actual content. 622 623 ;; -------------------------------------------------------------------- 624 ;; Cross-rewind transport for IDENT / STR bv values. 625 ;; 626 ;; %lex-scratch is a single sticky bytevector allocated below any 627 ;; lex-tokenize heap-mark. The driver copies bv data here *before* the 628 ;; rewind, then post-rewind allocates a fresh bv (sized exactly to the 629 ;; ident/string content) by copying back out of scratch. The whole 630 ;; lex run shares this one buffer. 631 ;; 632 ;; Why scratch, not a deduplicating intern pool: under scheme1's 633 ;; interpreter, walking a cons-list pool per lookup costs ~50–150 B 634 ;; per step in bind_params/eval_args/named-let env-extension 635 ;; overhead. Even 16-way bucketing has the walk cost outpace the 636 ;; bv-allocation savings until scheme1 grows a vector primitive 637 ;; (an O(1) bucket lookup without interpreter overhead). 638 ;; -------------------------------------------------------------------- 639 (define %lex-scratch-cap 65536) 640 (define %lex-scratch (make-bytevector %lex-scratch-cap 0)) 641 642 (define (%lex-init!) #t) 643 644 (define (%lex-scratch<- bv len) 645 (cond ((> len %lex-scratch-cap) 646 (die #f "lex: token exceeds scratch cap" len))) 647 (let loop ((i 0)) 648 (cond ((< i len) 649 (bytevector-u8-set! %lex-scratch i (bytevector-u8-ref bv i)) 650 (loop (+ i 1)))))) 651 652 (define (%lex-scratch->bv len) 653 ;; Allocate a fresh bv (exact size) and copy scratch[0..len) into it. 654 (let ((bv (make-bytevector len 0))) 655 (let copy ((i 0)) 656 (cond ((< i len) 657 (bytevector-u8-set! bv i (bytevector-u8-ref %lex-scratch i)) 658 (copy (+ i 1))))) 659 bv)) 660 661 ;; -------------------------------------------------------------------- 662 ;; Byte-class predicates (raw u8 values, not chars). 663 ;; -------------------------------------------------------------------- 664 (define (%digit? b) (if (< b 48) #f (if (< 57 b) #f #t))) ; '0'..'9' 665 (define (%hex? b) 666 (cond ((%digit? b) #t) 667 ((if (< b 65) #f (if (< 70 b) #f #t)) #t) ; 'A'..'F' 668 ((if (< b 97) #f (if (< 102 b) #f #t)) #t) ; 'a'..'f' 669 (else #f))) 670 (define (%octal? b) (if (< b 48) #f (if (< 55 b) #f #t))) ; '0'..'7' 671 (define (%alpha? b) 672 (cond ((if (< b 65) #f (if (< 90 b) #f #t)) #t) ; 'A'..'Z' 673 ((if (< b 97) #f (if (< 122 b) #f #t)) #t) ; 'a'..'z' 674 (else #f))) 675 (define (%ident-start? b) (or (%alpha? b) (= b 95))) ; '_' 676 (define (%ident-cont? b) (or (%ident-start? b) (%digit? b))) 677 (define (%hspace? b) (or (= b 32) (= b 9) (= b 11) (= b 12))) ; SP TAB VT FF 678 (define (%newline? b) (= b 10)) ; '\n' 679 680 ;; -------------------------------------------------------------------- 681 ;; Logical byte access. %lex-peek returns 682 ;; (byte npos nline ncol) 683 ;; where (npos, nline, ncol) points *just past* the consumed physical 684 ;; bytes. On EOF it returns (#f pos line col). 685 ;; 686 ;; Two transformations folded in here: 687 ;; 688 ;; - Trigraphs: ??= ??( ??/ ??) ??' ??< ??! ??> ??- 689 ;; # [ \ ] ^ { | } ~ 690 ;; The pair `??` followed by one of the nine trigraph completers 691 ;; produces the translated byte and advances 3 source bytes. 692 ;; - Line splice: a backslash immediately followed by `\n` is removed 693 ;; as a unit (incrementing line, resetting col to 1) and we recurse 694 ;; to fetch the next logical byte. 695 ;; 696 ;; Other escapes (e.g. `\<not-newline>`) are returned as-is — string and 697 ;; char literals do their own escape-handling. 698 ;; -------------------------------------------------------------------- 699 (define (%trigraph-byte b) 700 ;; Map the third trigraph byte to its replacement, or #f. 701 (cond ((= b 61) 35) ; '=' -> '#' 702 ((= b 40) 91) ; '(' -> '[' 703 ((= b 47) 92) ; '/' -> '\\' 704 ((= b 41) 93) ; ')' -> ']' 705 ((= b 39) 94) ; '\'' -> '^' 706 ((= b 60) 123) ; '<' -> '{' 707 ((= b 33) 124) ; '!' -> '|' 708 ((= b 62) 125) ; '>' -> '}' 709 ((= b 45) 126) ; '-' -> '~' 710 (else #f))) 711 712 (define (%lex-peek src pos line col) 713 (let ((n (bytevector-length src))) 714 (cond 715 ((>= pos n) (list #f pos line col)) 716 (else 717 (let ((b (bytevector-u8-ref src pos))) 718 (cond 719 ;; Trigraph: ?? + completer 720 ((and (= b 63) 721 (< (+ pos 2) n) 722 (= (bytevector-u8-ref src (+ pos 1)) 63)) 723 (let ((tr (%trigraph-byte (bytevector-u8-ref src (+ pos 2))))) 724 (if tr 725 (list tr (+ pos 3) line (+ col 3)) 726 (list b (+ pos 1) line (+ col 1))))) 727 ;; Line splice: backslash + newline (consume both, no token) 728 ((and (= b 92) 729 (< (+ pos 1) n) 730 (= (bytevector-u8-ref src (+ pos 1)) 10)) 731 (%lex-peek src (+ pos 2) (+ line 1) 1)) 732 ;; Newline: pass through but caller decides line/col bump 733 ((%newline? b) 734 (list b (+ pos 1) (+ line 1) 1)) 735 (else 736 (list b (+ pos 1) line (+ col 1))))))))) 737 738 ;; Convenience accessors over the 4-list. 739 (define (%pk-byte p) (car p)) 740 (define (%pk-pos p) (car (cdr p))) 741 (define (%pk-line p) (car (cdr (cdr p)))) 742 (define (%pk-col p) (car (cdr (cdr (cdr p))))) 743 744 ;; Fast-byte test. When (%fast-byte? b) is #t, reading b directly with 745 ;; bytevector-u8-ref is exactly equivalent to %lex-peek's result: the 746 ;; logical byte is b, npos = pos+1, nline unchanged, ncol = col+1, and 747 ;; no list allocation is needed. Excludes the three bytes that %lex-peek 748 ;; can transform: '?' (trigraph), '\\' (line splice), '\n' (line bump). 749 (define (%fast-byte? b) 750 (cond ((= b 63) #f) 751 ((= b 92) #f) 752 ((= b 10) #f) 753 (else #t))) 754 755 ;; -------------------------------------------------------------------- 756 ;; Whitespace + comment skipper. Returns (pos line col). 757 ;; Handles spaces/tabs, // line comments, /* block */ comments. Does 758 ;; *not* consume `\n` — newlines are tokens. 759 ;; -------------------------------------------------------------------- 760 (define (%skip-ws-and-comments src pos line col file) 761 (let ((n (bytevector-length src))) 762 (cond 763 ((>= pos n) (list pos line col)) 764 (else 765 (let ((b (bytevector-u8-ref src pos))) 766 (cond 767 ((and (%fast-byte? b) (%hspace? b)) 768 (%skip-ws-and-comments src (+ pos 1) line (+ col 1) file)) 769 ((%fast-byte? b) 770 ;; Fast-byte that isn't hspace. Only '/' is interesting; 771 ;; everything else terminates the skip. 772 (cond 773 ((= b 47) (%maybe-comment src pos line col file)) 774 (else (list pos line col)))) 775 (else 776 ;; Slow path: trigraph / splice / newline. 777 (let* ((p (%lex-peek src pos line col)) 778 (b2 (%pk-byte p))) 779 (cond 780 ((not b2) (list pos line col)) 781 ((%hspace? b2) 782 (%skip-ws-and-comments src (%pk-pos p) (%pk-line p) (%pk-col p) 783 file)) 784 ((= b2 47) (%maybe-comment src pos line col file)) 785 (else (list pos line col))))))))))) 786 787 (define (%maybe-comment src pos line col file) 788 ;; Source byte at pos resolves to '/'. Decide between // line comment, 789 ;; /* block comment, or "leave the slash alone" (it's a punctuator). 790 (let* ((p (%lex-peek src pos line col)) 791 (q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) 792 (b2 (%pk-byte q))) 793 (cond 794 ((and b2 (= b2 47)) 795 (%skip-line-comment src (%pk-pos q) (%pk-line q) (%pk-col q) file)) 796 ((and b2 (= b2 42)) 797 (%skip-block-comment src (%pk-pos q) (%pk-line q) (%pk-col q) 798 file line col)) 799 (else (list pos line col))))) 800 801 (define (%skip-line-comment src pos line col file) 802 ;; Consume bytes until end-of-stream or until we *see* '\n' (do not 803 ;; consume the newline itself; outer loop emits the NL). 804 (let ((n (bytevector-length src))) 805 (cond 806 ((>= pos n) (%skip-ws-and-comments src pos line col file)) 807 (else 808 (let ((b (bytevector-u8-ref src pos))) 809 (cond 810 ;; '\n' terminates without consuming. 811 ((= b 10) (%skip-ws-and-comments src pos line col file)) 812 ((%fast-byte? b) 813 (%skip-line-comment src (+ pos 1) line (+ col 1) file)) 814 (else 815 ;; Slow path: ?/\ — let %lex-peek handle trigraph/splice. 816 (let* ((p (%lex-peek src pos line col)) 817 (b2 (%pk-byte p))) 818 (cond 819 ((not b2) (%skip-ws-and-comments src pos line col file)) 820 ((%newline? b2) (%skip-ws-and-comments src pos line col file)) 821 (else 822 (%skip-line-comment src (%pk-pos p) (%pk-line p) (%pk-col p) 823 file))))))))))) 824 825 (define (%skip-block-comment src pos line col file start-line start-col) 826 (let ((n (bytevector-length src))) 827 (cond 828 ((>= pos n) 829 (die (%loc file start-line start-col) 830 "unterminated /* block comment")) 831 (else 832 (let ((b (bytevector-u8-ref src pos))) 833 (cond 834 ;; Fast path for plain content bytes that aren't '*'. 835 ((and (%fast-byte? b) (not (= b 42))) 836 (%skip-block-comment src (+ pos 1) line (+ col 1) 837 file start-line start-col)) 838 (else 839 ;; Slow path: '*', '\n', '?' (trigraph), '\\' (splice). 840 (let* ((p (%lex-peek src pos line col)) 841 (b1 (%pk-byte p))) 842 (cond 843 ((not b1) 844 (die (%loc file start-line start-col) 845 "unterminated /* block comment")) 846 ((= b1 42) 847 (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) 848 (b2 (%pk-byte q))) 849 (cond 850 ((not b2) 851 (die (%loc file start-line start-col) 852 "unterminated /* block comment")) 853 ((= b2 47) 854 (%skip-ws-and-comments src (%pk-pos q) (%pk-line q) (%pk-col q) 855 file)) 856 (else 857 ;; Re-scan starting at the byte after '*'; the '*' was 858 ;; not the closer, but the next byte might itself be '*'. 859 (%skip-block-comment src (%pk-pos p) (%pk-line p) (%pk-col p) 860 file start-line start-col))))) 861 (else 862 (%skip-block-comment src (%pk-pos p) (%pk-line p) (%pk-col p) 863 file start-line start-col))))))))))) 864 865 ;; -------------------------------------------------------------------- 866 ;; Byte-run scanners. 867 ;; 868 ;; Tail-recursive walkers used by ident/number/string readers. None 869 ;; allocate per scanned byte on the fast path (only %lex-peek 4-lists 870 ;; on trigraph/splice/newline); the per-iteration env frames allocated 871 ;; by tail recursion are reclaimed by the caller's heap-rewind!. 872 ;; 873 ;; - %scan-while: count bytes that satisfy pred. (count npos nline ncol) 874 ;; - %fill-while-bv: write matching bytes into a pre-sized bv. 875 ;; - %accum-int-while: accumulate a base-N integer over digit bytes. 876 ;; (val count npos nline ncol) 877 ;; - %accum-octal-bounded: same, but stops after k digits. 878 ;; -------------------------------------------------------------------- 879 (define (%scan-while pred src pos line col) 880 (let ((n (bytevector-length src))) 881 (let loop ((pos pos) (line line) (col col) (cnt 0)) 882 (cond 883 ((>= pos n) (list cnt pos line col)) 884 (else 885 (let ((b (bytevector-u8-ref src pos))) 886 (cond 887 ((%fast-byte? b) 888 (if (pred b) 889 (loop (+ pos 1) line (+ col 1) (+ cnt 1)) 890 (list cnt pos line col))) 891 (else 892 (let* ((p (%lex-peek src pos line col)) 893 (b2 (%pk-byte p))) 894 (if (and b2 (pred b2)) 895 (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ cnt 1)) 896 (list cnt pos line col))))))))))) 897 898 (define (%fill-while-bv pred src pos line col bv idx) 899 (let ((n (bytevector-length src))) 900 (let loop ((pos pos) (line line) (col col) (idx idx)) 901 (cond 902 ((>= pos n) idx) 903 (else 904 (let ((b (bytevector-u8-ref src pos))) 905 (cond 906 ((%fast-byte? b) 907 (cond 908 ((pred b) 909 (bytevector-u8-set! bv idx b) 910 (loop (+ pos 1) line (+ col 1) (+ idx 1))) 911 (else idx))) 912 (else 913 (let* ((p (%lex-peek src pos line col)) 914 (b2 (%pk-byte p))) 915 (cond 916 ((and b2 (pred b2)) 917 (bytevector-u8-set! bv idx b2) 918 (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1))) 919 (else idx))))))))))) 920 921 (define (%digit-val-byte b) 922 ;; ASCII digit byte → integer value. Caller guarantees b is a valid 923 ;; digit in the relevant base (0-9 / 0-7 / 0-9a-fA-F). 924 (cond ((%digit? b) (- b 48)) 925 ((if (< b 65) #f (if (< 70 b) #f #t)) (+ (- b 65) 10)) 926 ((if (< b 97) #f (if (< 102 b) #f #t)) (+ (- b 97) 10)) 927 (else 0))) 928 929 (define (%accum-int-while pred src pos line col base) 930 (let ((n (bytevector-length src))) 931 (let loop ((pos pos) (line line) (col col) (val 0) (cnt 0)) 932 (cond 933 ((>= pos n) (list val cnt pos line col)) 934 (else 935 (let ((b (bytevector-u8-ref src pos))) 936 (cond 937 ((%fast-byte? b) 938 (if (pred b) 939 (loop (+ pos 1) line (+ col 1) 940 (+ (* val base) (%digit-val-byte b)) (+ cnt 1)) 941 (list val cnt pos line col))) 942 (else 943 (let* ((p (%lex-peek src pos line col)) 944 (b2 (%pk-byte p))) 945 (if (and b2 (pred b2)) 946 (loop (%pk-pos p) (%pk-line p) (%pk-col p) 947 (+ (* val base) (%digit-val-byte b2)) (+ cnt 1)) 948 (list val cnt pos line col))))))))))) 949 950 (define (%accum-octal-bounded src pos line col k) 951 ;; Up to k octal digits. Returns (val count npos nline ncol). 952 (let ((n (bytevector-length src))) 953 (let loop ((pos pos) (line line) (col col) (k k) (val 0) (cnt 0)) 954 (cond 955 ((zero? k) (list val cnt pos line col)) 956 ((>= pos n) (list val cnt pos line col)) 957 (else 958 (let ((b (bytevector-u8-ref src pos))) 959 (cond 960 ((%fast-byte? b) 961 (if (%octal? b) 962 (loop (+ pos 1) line (+ col 1) (- k 1) 963 (+ (* val 8) (- b 48)) (+ cnt 1)) 964 (list val cnt pos line col))) 965 (else 966 (let* ((p (%lex-peek src pos line col)) 967 (b2 (%pk-byte p))) 968 (if (and b2 (%octal? b2)) 969 (loop (%pk-pos p) (%pk-line p) (%pk-col p) (- k 1) 970 (+ (* val 8) (- b2 48)) (+ cnt 1)) 971 (list val cnt pos line col))))))))))) 972 973 ;; -------------------------------------------------------------------- 974 ;; Identifier / keyword reader. 975 ;; 976 ;; Returns (tok npos nline ncol). Caller has already verified that the 977 ;; first byte at `pos` satisfies %ident-start?. 978 ;; 979 ;; Two-pass with call-with-heap-rewind: pass 1 (%scan-while) sizes the 980 ;; run, then between the two calls we allocate `name` bv so it survives 981 ;; the second rewind, then pass 2 (%fill-while-bv) writes into it. The 982 ;; integer slots count/npos/nline/ncol are bound by the outer let so 983 ;; they survive both rewinds. 984 ;; -------------------------------------------------------------------- 985 (define (lex-read-ident src pos file) 986 ;; Public for tests. Threads line/col from a fresh start. 987 (%lex-read-ident src pos 1 (+ pos 1) file)) 988 989 (define (%lex-read-ident src pos line col file) 990 (let ((start-loc (%loc file line col)) 991 (count 0) (npos 0) (nline 0) (ncol 0)) 992 (call-with-heap-rewind 993 (lambda () 994 (let ((sres (%scan-while %ident-cont? src pos line col))) 995 (set! count (car sres)) 996 (set! npos (car (cdr sres))) 997 (set! nline (car (cdr (cdr sres)))) 998 (set! ncol (car (cdr (cdr (cdr sres)))))))) 999 (let ((name (make-bytevector count 0))) 1000 (call-with-heap-rewind 1001 (lambda () 1002 (%fill-while-bv %ident-cont? src pos line col name 0))) 1003 (let ((kw (alist-ref name %keyword-alist))) 1004 (cons (if kw 1005 (make-tok 'KW kw start-loc) 1006 (make-tok 'IDENT name start-loc)) 1007 (list npos nline ncol)))))) 1008 1009 ;; -------------------------------------------------------------------- 1010 ;; Number reader. 1011 ;; 1012 ;; Decimal: [1-9][0-9]* (suffix: u U l L ll LL combinations) 1013 ;; Hex: 0x[0-9a-fA-F]+ | 0X... 1014 ;; Octal: 0[0-7]* 1015 ;; Float: anything looking like 1.0, 1e3, .5 → die crisply. 1016 ;; 1017 ;; Returns (tok npos nline ncol) on success. Aborts via `die` on float. 1018 ;; 1019 ;; %accum-int-while folds digit collection and value computation into 1020 ;; one walk — no per-byte cons cells, no separate digits-list pass. 1021 ;; -------------------------------------------------------------------- 1022 (define (lex-read-number src pos file) 1023 (%lex-read-number src pos 1 (+ pos 1) file)) 1024 1025 (define (%lex-read-number src pos line col file) 1026 (let* ((start-loc (%loc file line col)) 1027 (p (%lex-peek src pos line col)) 1028 (b (%pk-byte p))) 1029 (cond 1030 ;; '0x' / '0X' hex prefix 1031 ((and (= b 48) 1032 (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) 1033 (b2 (%pk-byte q))) 1034 (and b2 (or (= b2 120) (= b2 88))))) ; 'x' or 'X' 1035 (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) 1036 (r (%accum-int-while %hex? src 1037 (%pk-pos q) (%pk-line q) (%pk-col q) 16)) 1038 (val (car r)) 1039 (cnt (car (cdr r))) 1040 (pos2 (car (cdr (cdr r)))) 1041 (line2 (car (cdr (cdr (cdr r))))) 1042 (col2 (car (cdr (cdr (cdr (cdr r))))))) 1043 (if (zero? cnt) 1044 (die start-loc "expected hex digits after 0x") 1045 (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file))) 1046 (cons (make-tok 'INT val start-loc) after))))) 1047 ;; '0' alone → octal sequence (could be just zero) 1048 ((= b 48) 1049 (let* ((r (%accum-int-while %octal? src 1050 (%pk-pos p) (%pk-line p) (%pk-col p) 8)) 1051 (val (car r)) 1052 (pos2 (car (cdr (cdr r)))) 1053 (line2 (car (cdr (cdr (cdr r))))) 1054 (col2 (car (cdr (cdr (cdr (cdr r))))))) 1055 ;; Reject '.' / 'e' / 'E' immediately after the octal run — float. 1056 (%check-no-float src pos2 line2 col2 file start-loc) 1057 ;; Reject stray digits 8/9 in an octal context (e.g. 089). 1058 (let* ((p3 (%lex-peek src pos2 line2 col2)) 1059 (b3 (%pk-byte p3))) 1060 (if (and b3 (%digit? b3)) 1061 (die start-loc "invalid octal digit" (bv-of-byte b3)) 1062 (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file))) 1063 (cons (make-tok 'INT val start-loc) after)))))) 1064 ;; '1'-'9' → decimal 1065 ((%digit? b) 1066 (let* ((r (%accum-int-while %digit? src pos line col 10)) 1067 (val (car r)) 1068 (pos2 (car (cdr (cdr r)))) 1069 (line2 (car (cdr (cdr (cdr r))))) 1070 (col2 (car (cdr (cdr (cdr (cdr r))))))) 1071 (%check-no-float src pos2 line2 col2 file start-loc) 1072 (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file))) 1073 (cons (make-tok 'INT val start-loc) after)))) 1074 ;; '.' followed by a digit = float-style literal — reject. 1075 ((= b 46) 1076 (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) 1077 (b2 (%pk-byte q))) 1078 (if (and b2 (%digit? b2)) 1079 (die start-loc "floating-point literal not supported") 1080 ;; Otherwise '.' was a punctuator — caller wouldn't have 1081 ;; routed here unless it was a digit-led prefix. 1082 (die start-loc "internal: number reader on non-number")))) 1083 (else 1084 (die start-loc "internal: number reader on non-number"))))) 1085 1086 (define (%check-no-float src pos line col file start-loc) 1087 ;; If the byte at pos starts a fractional/exponent part, abort. 1088 (let* ((p (%lex-peek src pos line col)) 1089 (b (%pk-byte p))) 1090 (cond 1091 ((not b) #t) 1092 ((= b 46) ; '.' 1093 (die start-loc "floating-point literal not supported")) 1094 ((or (= b 101) (= b 69)) ; 'e' / 'E' 1095 ;; Only a float exponent if followed by [+-]?digit. 1096 (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) 1097 (b2 (%pk-byte q))) 1098 (cond 1099 ((and b2 (%digit? b2)) 1100 (die start-loc "floating-point literal not supported")) 1101 ((and b2 (or (= b2 43) (= b2 45))) 1102 (let* ((r (%lex-peek src (%pk-pos q) (%pk-line q) (%pk-col q))) 1103 (b3 (%pk-byte r))) 1104 (if (and b3 (%digit? b3)) 1105 (die start-loc "floating-point literal not supported") 1106 #t))) 1107 (else #t)))) 1108 (else #t)))) 1109 1110 (define (%lex-strip-int-suffix src pos line col file) 1111 ;; Consume any combination of u U l L (the long can be doubled). We 1112 ;; don't validate orderings strictly; tcc.c uses the canonical forms. 1113 ;; Returns (npos nline ncol). 1114 (let loop ((pos pos) (line line) (col col)) 1115 (let* ((p (%lex-peek src pos line col)) 1116 (b (%pk-byte p))) 1117 (cond 1118 ((not b) (list pos line col)) 1119 ((or (= b 117) (= b 85) ; u U 1120 (= b 108) (= b 76)) ; l L 1121 (loop (%pk-pos p) (%pk-line p) (%pk-col p))) 1122 (else (list pos line col)))))) 1123 1124 ;; -------------------------------------------------------------------- 1125 ;; Escape sequence reader. 1126 ;; 1127 ;; %scan-or-fill-escape decodes one escape sequence starting at `pos` 1128 ;; (which points one past the leading `\\`). When `bv` is a bytevector, 1129 ;; the resulting byte is written to (bv idx); when it is #f, no write 1130 ;; occurs (used during the string-pass scan phase). Returns the 4-list 1131 ;; (val npos nline ncol). 1132 ;; -------------------------------------------------------------------- 1133 (define (%scan-or-fill-escape src pos line col file start-loc bv idx) 1134 (let* ((p (%lex-peek src pos line col)) 1135 (b (%pk-byte p))) 1136 (cond 1137 ((not b) (die start-loc "unterminated escape sequence")) 1138 ;; \xNN — 1+ hex digits (tcc.c uses 1- and 2-digit forms). 1139 ((or (= b 120) (= b 88)) ; 'x' / 'X' 1140 (let* ((r (%accum-int-while %hex? src 1141 (%pk-pos p) (%pk-line p) (%pk-col p) 16)) 1142 (val0 (car r)) 1143 (cnt (car (cdr r))) 1144 (pos2 (car (cdr (cdr r)))) 1145 (line2 (car (cdr (cdr (cdr r))))) 1146 (col2 (car (cdr (cdr (cdr (cdr r))))))) 1147 (cond 1148 ((zero? cnt) (die start-loc "expected hex digits after \\x")) 1149 (else 1150 (let ((val (bit-and val0 255))) 1151 (cond (bv (bytevector-u8-set! bv idx val)) 1152 (else #f)) 1153 (list val pos2 line2 col2)))))) 1154 ;; \NNN — 1..3 octal digits. 1155 ((%octal? b) 1156 (let* ((r (%accum-octal-bounded src pos line col 3)) 1157 (val0 (car r)) 1158 (pos2 (car (cdr (cdr r)))) 1159 (line2 (car (cdr (cdr (cdr r))))) 1160 (col2 (car (cdr (cdr (cdr (cdr r)))))) 1161 (val (bit-and val0 255))) 1162 (cond (bv (bytevector-u8-set! bv idx val)) 1163 (else #f)) 1164 (list val pos2 line2 col2))) 1165 (else 1166 (let ((val (cond ((= b 110) 10) ; n 1167 ((= b 116) 9) ; t 1168 ((= b 114) 13) ; r 1169 ((= b 92) 92) ; \\ 1170 ((= b 39) 39) ; ' 1171 ((= b 34) 34) ; " 1172 ((= b 48) 0) ; 0 (already handled by octal but be safe) 1173 ((= b 97) 7) ; \a -> BEL 1174 ((= b 98) 8) ; \b 1175 ((= b 102) 12) ; \f 1176 ((= b 118) 11) ; \v 1177 ((= b 63) 63) ; \? 1178 (else 1179 (die start-loc "unknown escape" (bv-of-byte b)))))) 1180 (cond (bv (bytevector-u8-set! bv idx val)) 1181 (else #f)) 1182 (list val (%pk-pos p) (%pk-line p) (%pk-col p))))))) 1183 1184 ;; -------------------------------------------------------------------- 1185 ;; String reader. 1186 ;; 1187 ;; Caller has verified src[pos] == '"' (raw byte 34). Returns 1188 ;; (tok npos nline ncol) with the raw decoded bytes (no NUL appended). 1189 ;; 1190 ;; Two-pass: %string-pass with bv=#f counts effective bytes (escapes 1191 ;; collapse to 1 byte each); after rewind we allocate the final bv and 1192 ;; rerun with bv set so the bytes are written directly into it. 1193 ;; -------------------------------------------------------------------- 1194 (define (lex-read-string src pos file) 1195 (%lex-read-string src pos 1 (+ pos 1) file)) 1196 1197 (define (%lex-read-string src pos line col file) 1198 (let ((start-loc (%loc file line col)) 1199 (cnt 0) (npos 0) (nline 0) (ncol 0)) 1200 ;; '"' (34) is a fast-byte and never a trigraph result, so the 1201 ;; physical byte at `pos` is exactly the opening quote. 1202 (cond 1203 ((or (>= pos (bytevector-length src)) 1204 (not (= (bytevector-u8-ref src pos) 34))) 1205 (die start-loc "internal: string reader on non-quote")) 1206 (else 1207 (call-with-heap-rewind 1208 (lambda () 1209 (let ((sres (%string-pass src (+ pos 1) line (+ col 1) 1210 file start-loc #f))) 1211 (set! cnt (car sres)) 1212 (set! npos (car (cdr sres))) 1213 (set! nline (car (cdr (cdr sres)))) 1214 (set! ncol (car (cdr (cdr (cdr sres)))))))) 1215 (let ((bv (make-bytevector cnt 0))) 1216 (call-with-heap-rewind 1217 (lambda () 1218 (%string-pass src (+ pos 1) line (+ col 1) file start-loc bv))) 1219 (cons (make-tok 'STR bv start-loc) 1220 (list npos nline ncol))))))) 1221 1222 (define (%string-pass src pos line col file start-loc bv) 1223 ;; Walk the string body (after opening "). When `bv` is #f, count 1224 ;; effective bytes; when it is a bytevector, write bytes into it at 1225 ;; index 0..count-1. Returns (count npos nline ncol). 1226 (let ((n (bytevector-length src))) 1227 (let loop ((pos pos) (line line) (col col) (idx 0)) 1228 (cond 1229 ((>= pos n) (die start-loc "unterminated string literal")) 1230 (else 1231 (let ((b (bytevector-u8-ref src pos))) 1232 (cond 1233 ;; Closing quote — fast byte but special. 1234 ((= b 34) 1235 (list idx (+ pos 1) line (+ col 1))) 1236 ((%fast-byte? b) 1237 (cond (bv (bytevector-u8-set! bv idx b)) 1238 (else #f)) 1239 (loop (+ pos 1) line (+ col 1) (+ idx 1))) 1240 (else 1241 ;; Slow path: ?/\ (trigraph/splice/escape) or '\n'. 1242 (let* ((p (%lex-peek src pos line col)) 1243 (b2 (%pk-byte p))) 1244 (cond 1245 ((not b2) 1246 (die start-loc "unterminated string literal")) 1247 ((= b2 34) 1248 (list idx (%pk-pos p) (%pk-line p) (%pk-col p))) 1249 ((%newline? b2) 1250 (die start-loc "newline in string literal")) 1251 ((= b2 92) 1252 (let* ((er (%scan-or-fill-escape 1253 src (%pk-pos p) (%pk-line p) (%pk-col p) 1254 file start-loc bv idx)) 1255 (epos (car (cdr er))) 1256 (eline (car (cdr (cdr er)))) 1257 (ecol (car (cdr (cdr (cdr er)))))) 1258 (loop epos eline ecol (+ idx 1)))) 1259 (else 1260 (cond (bv (bytevector-u8-set! bv idx b2)) 1261 (else #f)) 1262 (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1))))))))))))) 1263 1264 ;; -------------------------------------------------------------------- 1265 ;; Char reader. 1266 ;; 1267 ;; Caller has verified src[pos] == '\''. Multi-character constants 1268 ;; ('AB') are rejected via die. 1269 ;; -------------------------------------------------------------------- 1270 (define (lex-read-char src pos file) 1271 (%lex-read-char src pos 1 (+ pos 1) file)) 1272 1273 (define (%lex-read-char src pos line col file) 1274 (let* ((start-loc (%loc file line col)) 1275 (p0 (%lex-peek src pos line col)) 1276 (b0 (%pk-byte p0))) 1277 (if (not (and b0 (= b0 39))) 1278 (die start-loc "internal: char reader on non-quote") 1279 (%collect-char src (%pk-pos p0) (%pk-line p0) (%pk-col p0) 1280 file start-loc)))) 1281 1282 (define (%collect-char src pos line col file start-loc) 1283 ;; Read exactly one byte (handling escapes), then expect closing '\''. 1284 (let* ((p (%lex-peek src pos line col)) 1285 (b (%pk-byte p))) 1286 (cond 1287 ((not b) (die start-loc "unterminated char literal")) 1288 ((= b 39) (die start-loc "empty char literal")) 1289 ((%newline? b) (die start-loc "newline in char literal")) 1290 ((= b 92) ; escape 1291 (let* ((r (%scan-or-fill-escape src 1292 (%pk-pos p) (%pk-line p) (%pk-col p) 1293 file start-loc #f 0)) 1294 (val (car r)) 1295 (pos2 (car (cdr r))) 1296 (line2 (car (cdr (cdr r)))) 1297 (col2 (car (cdr (cdr (cdr r)))))) 1298 (%expect-char-close src pos2 line2 col2 file start-loc val))) 1299 (else 1300 (%expect-char-close src (%pk-pos p) (%pk-line p) (%pk-col p) 1301 file start-loc b))))) 1302 1303 (define (%expect-char-close src pos line col file start-loc val) 1304 (let* ((p (%lex-peek src pos line col)) 1305 (b (%pk-byte p))) 1306 (cond 1307 ((not b) (die start-loc "unterminated char literal")) 1308 ((= b 39) 1309 (cons (make-tok 'CHAR val start-loc) 1310 (list (%pk-pos p) (%pk-line p) (%pk-col p)))) 1311 (else 1312 (die start-loc "multi-character char constant not supported"))))) 1313 1314 ;; -------------------------------------------------------------------- 1315 ;; Punctuator reader. 1316 ;; 1317 ;; Greedy longest-match against %punct-alist. The alist 1318 ;; is already ordered longest-first. We additionally bucket entries by 1319 ;; their first byte so %lex-read-punct only loops over the small set of 1320 ;; patterns that can start at the current source byte. 1321 ;; -------------------------------------------------------------------- 1322 1323 (define (%alist-ref-int k al) 1324 ;; Lookup in an int-keyed alist (linear scan, '= compare). 1325 (cond ((null? al) #f) 1326 ((= (car (car al)) k) (cdr (car al))) 1327 (else (%alist-ref-int k (cdr al))))) 1328 1329 (define (%mem-int? k xs) 1330 (cond ((null? xs) #f) 1331 ((= (car xs) k) #t) 1332 (else (%mem-int? k (cdr xs))))) 1333 1334 (define (%filter-by-first-byte b al) 1335 ;; Subset of `al` whose pattern starts with byte b, preserving order. 1336 (cond 1337 ((null? al) '()) 1338 ((= (bytevector-u8-ref (car (car al)) 0) b) 1339 (cons (car al) (%filter-by-first-byte b (cdr al)))) 1340 (else (%filter-by-first-byte b (cdr al))))) 1341 1342 (define (%group-by-first-byte al) 1343 ;; Build ((first-byte . sub-alist) ...) over `al`, one bucket per 1344 ;; distinct first byte; sub-alist preserves longest-match-first 1345 ;; order from the source list. 1346 (let loop ((xs al) (seen '()) (out '())) 1347 (cond 1348 ((null? xs) (reverse out)) 1349 (else 1350 (let* ((entry (car xs)) 1351 (pat (car entry)) 1352 (b (bytevector-u8-ref pat 0))) 1353 (cond 1354 ((%mem-int? b seen) (loop (cdr xs) seen out)) 1355 (else 1356 (loop (cdr xs) 1357 (cons b seen) 1358 (cons (cons b (%filter-by-first-byte b al)) out))))))))) 1359 1360 (define %punct-buckets (%group-by-first-byte %punct-alist)) 1361 1362 (define (lex-read-punct src pos file) 1363 (%lex-read-punct src pos 1 (+ pos 1) file)) 1364 1365 (define (%lex-read-punct src pos line col file) 1366 (let* ((start-loc (%loc file line col)) 1367 (p (%lex-peek src pos line col)) 1368 (b (%pk-byte p))) 1369 (cond 1370 ((not b) (die start-loc "unrecognized byte" "EOF")) 1371 (else 1372 (let ((bucket (%alist-ref-int b %punct-buckets))) 1373 (cond 1374 ((not bucket) (die start-loc "unrecognized byte" (bv-of-byte b))) 1375 (else (%punct-loop src pos line col file start-loc bucket)))))))) 1376 1377 (define (%punct-loop src pos line col file start-loc al) 1378 (cond 1379 ((null? al) 1380 (let* ((p (%lex-peek src pos line col))) 1381 (die start-loc "unrecognized byte" 1382 (if (%pk-byte p) (bv-of-byte (%pk-byte p)) "EOF")))) 1383 (else 1384 (let* ((entry (car al)) 1385 (pat (car entry)) 1386 (sym (cdr entry)) 1387 (m (%match-bytes src pos line col pat 0))) 1388 (if m 1389 (cons (make-tok 'PUNCT sym start-loc) m) 1390 (%punct-loop src pos line col file start-loc (cdr al))))))) 1391 1392 (define (%match-bytes src pos line col pat i) 1393 ;; If the next bytes from (pos line col), in logical-byte stream 1394 ;; order, equal `pat[i..]`, return (npos nline ncol) after the 1395 ;; match. Otherwise #f. 1396 (cond 1397 ((= i (bytevector-length pat)) (list pos line col)) 1398 (else 1399 (let ((n (bytevector-length src))) 1400 (cond 1401 ((>= pos n) #f) 1402 (else 1403 (let ((b (bytevector-u8-ref src pos)) 1404 (pb (bytevector-u8-ref pat i))) 1405 (cond 1406 ((%fast-byte? b) 1407 (if (= b pb) 1408 (%match-bytes src (+ pos 1) line (+ col 1) pat (+ i 1)) 1409 #f)) 1410 (else 1411 (let* ((p (%lex-peek src pos line col)) 1412 (b2 (%pk-byte p))) 1413 (cond 1414 ((not b2) #f) 1415 ((= b2 pb) 1416 (%match-bytes src (%pk-pos p) (%pk-line p) (%pk-col p) 1417 pat (+ i 1))) 1418 (else #f)))))))))))) 1419 1420 ;; -------------------------------------------------------------------- 1421 ;; tok-iter — streaming token source. 1422 ;; -------------------------------------------------------------------- 1423 ;; Each pipeline layer (lex, pp, parser) wraps the layer below as a 1424 ;; tok-iter. iter-next pulls one token at a time. iter-peek/iter-peek2 1425 ;; cache lookahead in `buf`. iter-unget! pushes back. Live-data bound is 1426 ;; lookahead (≤2) + per-layer state, not source length. 1427 ;; 1428 ;; Pull-fns must keep yielding EOF after the first EOF (idempotent). 1429 (define-record-type tok-iter 1430 (%tok-iter pull-fn state buf) 1431 tok-iter? 1432 (pull-fn tok-iter-pull-fn) 1433 (state tok-iter-state) 1434 (buf tok-iter-buf tok-iter-buf-set!)) 1435 1436 (define (iter-next it) 1437 (let ((b (tok-iter-buf it))) 1438 (cond 1439 ((null? b) ((tok-iter-pull-fn it) (tok-iter-state it))) 1440 (else 1441 (tok-iter-buf-set! it (cdr b)) 1442 (car b))))) 1443 1444 (define (iter-peek it) 1445 (let ((b (tok-iter-buf it))) 1446 (cond 1447 ((null? b) 1448 (let ((t ((tok-iter-pull-fn it) (tok-iter-state it)))) 1449 (tok-iter-buf-set! it (list t)) 1450 t)) 1451 (else (car b))))) 1452 1453 (define (iter-peek2 it) 1454 (let ((b (tok-iter-buf it))) 1455 (cond 1456 ((null? b) 1457 (let* ((t1 ((tok-iter-pull-fn it) (tok-iter-state it))) 1458 (t2 ((tok-iter-pull-fn it) (tok-iter-state it)))) 1459 (tok-iter-buf-set! it (list t1 t2)) 1460 t2)) 1461 ((null? (cdr b)) 1462 (let ((t2 ((tok-iter-pull-fn it) (tok-iter-state it)))) 1463 (tok-iter-buf-set! it (cons (car b) (list t2))) 1464 t2)) 1465 (else (car (cdr b)))))) 1466 1467 (define (iter-unget! it t) 1468 (tok-iter-buf-set! it (cons t (tok-iter-buf it)))) 1469 1470 ;; Drain an iter to a list ending in EOF. Used by lex-tokenize / 1471 ;; pp-expand so the cc-lex / cc-pp test runners can inspect the 1472 ;; materialized stream. 1473 (define (iter->list it) 1474 (let loop ((acc '())) 1475 (let ((t (iter-next it))) 1476 (cond 1477 ((eq? (tok-kind t) 'EOF) (reverse (cons t acc))) 1478 (else (loop (cons t acc))))))) 1479 1480 ;; -------------------------------------------------------------------- 1481 ;; list-iter — wrap an existing token list as a tok-iter. Yields each 1482 ;; tok in turn; once exhausted, keeps yielding EOF (idempotent). The 1483 ;; wrapped list typically already ends in EOF. 1484 ;; -------------------------------------------------------------------- 1485 (define-record-type list-iter-state 1486 (%list-iter-state toks) 1487 list-iter-state? 1488 (toks lis-toks lis-toks-set!)) 1489 1490 (define (make-list-iter toks) 1491 (%tok-iter %list-iter-pull (%list-iter-state toks) '())) 1492 1493 (define (%list-iter-pull st) 1494 (let ((toks (lis-toks st))) 1495 (cond 1496 ((null? toks) (make-tok 'EOF #f #f)) 1497 (else 1498 (lis-toks-set! st (cdr toks)) 1499 (car toks))))) 1500 1501 ;; -------------------------------------------------------------------- 1502 ;; lex-iter — streaming lexer. Steady state: pos/line/col + bol? in 1503 ;; lex-state; per-token allocation reclaimed via heap-mark/rewind. 1504 ;; -------------------------------------------------------------------- 1505 ;; bol? — `#t` when no token has been emitted on the current physical 1506 ;; line yet (start of file, or only NL + whitespace seen since the last 1507 ;; line break). pp recognizes a directive only when its leading `#` is 1508 ;; at line-start; we forward that decision into the token stream by 1509 ;; emitting `HASH` instead of `(PUNCT hash …)` for a line-leading `#`. 1510 ;; 1511 ;; Heap discipline: each call to %lex-iter-pull is wrapped in a 1512 ;; call-with-heap-rewind. All scratch the helper allocates (the 1513 ;; helper's own tok/loc, the `(cons tok 4-list)` it returns, every 1514 ;; bind_params / let* / eval_args env-cons consumed getting in and out) 1515 ;; lives above the mark and is reclaimed before returning. Per-token 1516 ;; scratch (kind/val/vlen/loc-line/loc-col/npos/nline/ncol/nbol?) is 1517 ;; allocated in the outer `let` BEFORE the wrapper call — set! mutates 1518 ;; those cells in place across the rewind. Bv contents survive via the 1519 ;; sticky %lex-scratch buffer + %lex-scratch->bv (allocated post-rewind). 1520 ;; The survivors per token are tok (48 B) + loc (40 B) + bv if any. 1521 (define-record-type lex-state 1522 (%lex-state src file pos line col bol? done?) 1523 lex-state? 1524 (src lex-state-src) 1525 (file lex-state-file) 1526 (pos lex-state-pos lex-state-pos-set!) 1527 (line lex-state-line lex-state-line-set!) 1528 (col lex-state-col lex-state-col-set!) 1529 (bol? lex-state-bol? lex-state-bol?-set!) 1530 (done? lex-state-done? lex-state-done?-set!)) 1531 1532 (define (make-lex-iter src file) 1533 (%lex-init!) 1534 (%tok-iter %lex-iter-pull 1535 (%lex-state src file 0 1 1 #t #f) 1536 '())) 1537 1538 (define (%lex-iter-pull st) 1539 (cond 1540 ((lex-state-done? st) 1541 ;; Idempotent EOF: keep yielding EOF after the first one. 1542 (make-tok 'EOF #f (%loc (lex-state-file st) 1543 (lex-state-line st) 1544 (lex-state-col st)))) 1545 (else (%lex-iter-step st)))) 1546 1547 (define (%lex-iter-step st) 1548 (let ((src (lex-state-src st)) 1549 (file (lex-state-file st)) 1550 (pos (lex-state-pos st)) 1551 (line (lex-state-line st)) 1552 (col (lex-state-col st)) 1553 (bol? (lex-state-bol? st)) 1554 ;; Per-iteration scratch — must be allocated BEFORE the call to 1555 ;; call-with-heap-rewind so that set!s issued from inside the 1556 ;; thunk still find live cells after the rewind. 1557 (kind #f) (val #f) (vlen 0) 1558 (loc-line 1) (loc-col 1) 1559 (npos 0) (nline 1) (ncol 1) (nbol? #f)) 1560 (call-with-heap-rewind 1561 (lambda () 1562 (let* ((sw (%skip-ws-and-comments src pos line col file)) 1563 (pos1 (car sw)) 1564 (line1 (car (cdr sw))) 1565 (col1 (car (cdr (cdr sw)))) 1566 (p (%lex-peek src pos1 line1 col1)) 1567 (b (%pk-byte p))) 1568 (set! loc-line line1) 1569 (set! loc-col col1) 1570 (set! val #f) (set! vlen 0) (set! nbol? #f) 1571 (cond 1572 ;; EOF 1573 ((not b) 1574 (set! kind 'EOF) 1575 (set! npos pos1) (set! nline line1) (set! ncol col1)) 1576 ;; Newline → NL token; next call starts at bol. 1577 ((%newline? b) 1578 (set! kind 'NL) 1579 (set! npos (%pk-pos p)) 1580 (set! nline (%pk-line p)) 1581 (set! ncol (%pk-col p)) 1582 (set! nbol? #t)) 1583 ;; Line-leading `#`: bare `#` becomes HASH; `##` falls 1584 ;; through to punctuator (lexes as `paste`). 1585 ((and bol? (= b 35)) 1586 (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) 1587 (b2 (%pk-byte q))) 1588 (cond 1589 ((and b2 (= b2 35)) 1590 (let* ((r (%lex-read-punct src pos1 line1 col1 file)) 1591 (tok (car r)) (rest (cdr r))) 1592 (set! kind 'PUNCT) (set! val (tok-value tok)) 1593 (set! npos (car rest)) 1594 (set! nline (car (cdr rest))) 1595 (set! ncol (car (cdr (cdr rest)))))) 1596 (else 1597 (set! kind 'HASH) 1598 (set! npos (%pk-pos p)) 1599 (set! nline (%pk-line p)) 1600 (set! ncol (%pk-col p)))))) 1601 ;; Identifier / keyword 1602 ((%ident-start? b) 1603 (let* ((r (%lex-read-ident src pos1 line1 col1 file)) 1604 (tok (car r)) (rest (cdr r))) 1605 (set! kind (tok-kind tok)) 1606 (cond ((eq? (tok-kind tok) 'KW) 1607 (set! val (tok-value tok))) 1608 (else 1609 (let ((bv (tok-value tok))) 1610 (set! vlen (bytevector-length bv)) 1611 (%lex-scratch<- bv vlen)))) 1612 (set! npos (car rest)) 1613 (set! nline (car (cdr rest))) 1614 (set! ncol (car (cdr (cdr rest)))))) 1615 ;; Number (digit start) 1616 ((%digit? b) 1617 (let* ((r (%lex-read-number src pos1 line1 col1 file)) 1618 (tok (car r)) (rest (cdr r))) 1619 (set! kind 'INT) (set! val (tok-value tok)) 1620 (set! npos (car rest)) 1621 (set! nline (car (cdr rest))) 1622 (set! ncol (car (cdr (cdr rest)))))) 1623 ;; '.' is a punctuator unless followed by a digit (float). 1624 ((= b 46) 1625 (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) 1626 (b2 (%pk-byte q))) 1627 (cond 1628 ((and b2 (%digit? b2)) 1629 (die (%loc file line1 col1) 1630 "floating-point literal not supported")) 1631 (else 1632 (let* ((r (%lex-read-punct src pos1 line1 col1 file)) 1633 (tok (car r)) (rest (cdr r))) 1634 (set! kind 'PUNCT) (set! val (tok-value tok)) 1635 (set! npos (car rest)) 1636 (set! nline (car (cdr rest))) 1637 (set! ncol (car (cdr (cdr rest))))))))) 1638 ;; String 1639 ((= b 34) 1640 (let* ((r (%lex-read-string src pos1 line1 col1 file)) 1641 (tok (car r)) (rest (cdr r)) 1642 (bv (tok-value tok))) 1643 (set! kind 'STR) 1644 (set! vlen (bytevector-length bv)) 1645 (%lex-scratch<- bv vlen) 1646 (set! npos (car rest)) 1647 (set! nline (car (cdr rest))) 1648 (set! ncol (car (cdr (cdr rest)))))) 1649 ;; Char 1650 ((= b 39) 1651 (let* ((r (%lex-read-char src pos1 line1 col1 file)) 1652 (tok (car r)) (rest (cdr r))) 1653 (set! kind 'CHAR) (set! val (tok-value tok)) 1654 (set! npos (car rest)) 1655 (set! nline (car (cdr rest))) 1656 (set! ncol (car (cdr (cdr rest)))))) 1657 ;; Punctuator (default) 1658 (else 1659 (let* ((r (%lex-read-punct src pos1 line1 col1 file)) 1660 (tok (car r)) (rest (cdr r))) 1661 ;; Line-leading `%:` digraph also acts as HASH for directives. 1662 (cond 1663 ((and bol? (eq? (tok-value tok) 'hash)) 1664 (set! kind 'HASH)) 1665 (else 1666 (set! kind 'PUNCT) (set! val (tok-value tok)))) 1667 (set! npos (car rest)) 1668 (set! nline (car (cdr rest))) 1669 (set! ncol (car (cdr (cdr rest)))))))))) 1670 ;; Reconstruct the survivor below the mark and advance state. 1671 (cond 1672 ((eq? kind 'EOF) 1673 (lex-state-done?-set! st #t) 1674 (make-tok 'EOF #f (%loc file loc-line loc-col))) 1675 (else 1676 (let ((tok-val (cond ((eq? kind 'IDENT) (%lex-scratch->bv vlen)) 1677 ((eq? kind 'STR) (%lex-scratch->bv vlen)) 1678 (else val)))) 1679 (lex-state-pos-set! st npos) 1680 (lex-state-line-set! st nline) 1681 (lex-state-col-set! st ncol) 1682 (lex-state-bol?-set! st nbol?) 1683 (make-tok kind tok-val (%loc file loc-line loc-col))))))) 1684 1685 ;; Drain a lex-iter into a list ending in EOF, for the cc-lex test 1686 ;; runner. Production callers chain make-lex-iter directly. 1687 (define (lex-tokenize src file) 1688 (iter->list (make-lex-iter src file))) 1689 ;; cc/pp.scm — preprocessor. Hide-set per C11 6.10.3.4. 1690 ;; #include rejected (CC.md §Toolchain envelope). 1691 1692 ;; --- helpers --- 1693 (define (%pp-bv-mem? x xs) 1694 (cond ((null? xs) #f) 1695 ((bv= x (car xs)) #t) 1696 (else (%pp-bv-mem? x (cdr xs))))) 1697 1698 (define (%pp-bv-union a b) 1699 (cond ((null? a) b) 1700 ((%pp-bv-mem? (car a) b) (%pp-bv-union (cdr a) b)) 1701 (else (cons (car a) (%pp-bv-union (cdr a) b))))) 1702 1703 (define (%pp-with-hide t hide) 1704 (%tok (tok-kind t) (tok-value t) (tok-loc t) hide)) 1705 (define (%pp-with-loc t loc) 1706 (%tok (tok-kind t) (tok-value t) loc (tok-hide t))) 1707 1708 ;; --- pp-state (private record) --- 1709 ;; cond-stack: list of (active? . has-taken?). Outer-active gating is 1710 ;; computed by walking the stack rather than encoding it in frames. 1711 ;; 1712 ;; Streaming fields drive make-pp-iter; the bounded-buffer path used 1713 ;; by pp-eval-cexpr leaves them at #f / '(). 1714 ;; lex-iter — upstream tok-iter, or #f 1715 ;; up-pending — toks unshifted upstream (macro-expansion bodies that 1716 ;; must be re-scanned for further expansion) 1717 ;; out-buf — toks already dispatched but stashed for the next pull 1718 ;; (peek-and-fuse for adjacent STRs lookahead overshoots 1719 ;; by one tok, which lands here) 1720 (define-record-type pp-state 1721 (%pp-state macros cond-stack cur-file line-delta lex-iter up-pending out-buf) 1722 pp-state? 1723 (macros pps-macros pps-macros-set!) 1724 (cond-stack pps-cond-stack pps-cond-stack-set!) 1725 (cur-file pps-cur-file pps-cur-file-set!) 1726 (line-delta pps-line-delta pps-line-delta-set!) 1727 (lex-iter pps-lex-iter) 1728 (up-pending pps-up-pending pps-up-pending-set!) 1729 (out-buf pps-out-buf pps-out-buf-set!)) 1730 1731 (define (%pp-make-state defs) (%pp-state defs '() #f 0 #f '() '())) 1732 1733 (define (%pp-active? state) 1734 (let loop ((xs (pps-cond-stack state))) 1735 (cond ((null? xs) #t) 1736 ((not (car (car xs))) #f) 1737 (else (loop (cdr xs)))))) 1738 1739 ;; Active for the *parent* of the top frame (used by elif/else). 1740 (define (%pp-parent-active? state) 1741 (let ((cs (pps-cond-stack state))) 1742 (cond ((null? cs) #t) 1743 (else 1744 (let loop ((xs (cdr cs))) 1745 (cond ((null? xs) #t) 1746 ((not (car (car xs))) #f) 1747 (else (loop (cdr xs))))))))) 1748 1749 ;; --- token classification --- 1750 (define (%pp-eof? t) (eq? (tok-kind t) 'EOF)) 1751 (define (%pp-nl? t) (eq? (tok-kind t) 'NL)) 1752 (define (%pp-hash? t) (eq? (tok-kind t) 'HASH)) 1753 (define (%pp-ident? t) (eq? (tok-kind t) 'IDENT)) 1754 (define (%pp-int? t) (eq? (tok-kind t) 'INT)) 1755 (define (%pp-punct? t pname) 1756 (and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) pname))) 1757 (define (%pp-ident-name? t name-bv) 1758 (and (%pp-ident? t) (bv= (tok-value t) name-bv))) 1759 (define (%pp-skip-ws toks) toks) 1760 1761 ;; --- built-in macro names --- 1762 (define %pp-bv-FILE "__FILE__") 1763 (define %pp-bv-LINE "__LINE__") 1764 (define %pp-bv-STDC "__STDC__") 1765 (define %pp-bv-LISPCC "__LISPCC__") 1766 (define %pp-bv-DATE "__DATE__") 1767 (define %pp-bv-TIME "__TIME__") 1768 (define %pp-bv-STDC-VERSION "__STDC_VERSION__") 1769 (define %pp-bv-STDC-HOSTED "__STDC_HOSTED__") 1770 (define %pp-bv-VA-ARGS "__VA_ARGS__") 1771 (define %pp-bv-defined "defined") 1772 1773 ;; Fixed values for reproducibility — we don't read the wall clock. 1774 (define %pp-bv-DATE-VALUE "Jan 1 1970") 1775 (define %pp-bv-TIME-VALUE "00:00:00") 1776 1777 (define (%pp-builtin? name) 1778 (or (bv= name %pp-bv-FILE) (bv= name %pp-bv-LINE) 1779 (bv= name %pp-bv-STDC) (bv= name %pp-bv-LISPCC) 1780 (bv= name %pp-bv-DATE) (bv= name %pp-bv-TIME) 1781 (bv= name %pp-bv-STDC-VERSION) (bv= name %pp-bv-STDC-HOSTED))) 1782 1783 (define (%pp-expand-builtin name loc state) 1784 ;; Emit the token at the ORIGINAL loc; %pp-relocate downstream will 1785 ;; apply pps-cur-file / pps-line-delta. Doing the rewrite here too 1786 ;; (then letting relocate re-apply it) double-shifts __LINE__'s loc. 1787 ;; The VALUE of __LINE__ / __FILE__ already reflects the post-#line 1788 ;; mapping because we compute `file`/`line` from cur-file/line-delta. 1789 (let* ((file (or (pps-cur-file state) (loc-file loc))) 1790 (line (+ (loc-line loc) (pps-line-delta state)))) 1791 (cond 1792 ((bv= name %pp-bv-FILE) (list (%tok 'STR file loc '()))) 1793 ((bv= name %pp-bv-LINE) (list (%tok 'INT line loc '()))) 1794 ((bv= name %pp-bv-STDC) (list (%tok 'INT 1 loc '()))) 1795 ((bv= name %pp-bv-LISPCC) (list (%tok 'INT 1 loc '()))) 1796 ((bv= name %pp-bv-DATE) (list (%tok 'STR %pp-bv-DATE-VALUE loc '()))) 1797 ((bv= name %pp-bv-TIME) (list (%tok 'STR %pp-bv-TIME-VALUE loc '()))) 1798 ((bv= name %pp-bv-STDC-VERSION) (list (%tok 'INT 199901 loc '()))) 1799 ((bv= name %pp-bv-STDC-HOSTED) (list (%tok 'INT 1 loc '()))) 1800 (else (die loc "internal: not a builtin" name))))) 1801 1802 ;; --- buf-list: simple reversed-list buffer of toks --- 1803 (define-record-type buf-list 1804 (%buf-list xs) 1805 buf-list? 1806 (xs buf-list-xs buf-list-xs-set!)) 1807 (define (make-buf-list) (%buf-list '())) 1808 (define (buf-list-push! b t) (buf-list-xs-set! b (cons t (buf-list-xs b)))) 1809 (define (buf-list-push-many! b ts) 1810 (let loop ((ts ts)) 1811 (cond ((null? ts) #t) 1812 (else (buf-list-push! b (car ts)) (loop (cdr ts)))))) 1813 (define (buf-list-flush b) (reverse (buf-list-xs b))) 1814 1815 ;; --- make-pp-iter: streaming preprocessor --- 1816 ;; Wraps a lex-iter (or any tok-iter). Returns a tok-iter. Live data 1817 ;; bounded by parser state + lookahead, not source length. Adjacent-STR 1818 ;; fusion happens inline via peek-and-stash. 1819 (define (make-pp-iter src-iter initial-defines) 1820 (let ((st (%pp-state initial-defines '() #f 0 src-iter '() '()))) 1821 (%tok-iter %pp-iter-pull st '()))) 1822 1823 (define (%pp-iter-pull st) 1824 (let ((ob (pps-out-buf st))) 1825 (cond 1826 ((not (null? ob)) 1827 (pps-out-buf-set! st (cdr ob)) 1828 (car ob)) 1829 (else (%pp-maybe-fuse-str st (%pp-dispatch-step st)))))) 1830 1831 ;; --- upstream helpers --- 1832 ;; Upstream tokens come either from up-pending (macro-expansion bodies 1833 ;; that need re-scanning) or from the wrapped lex-iter. 1834 (define (%pp-pull-upstream st) 1835 (let ((up (pps-up-pending st))) 1836 (cond 1837 ((not (null? up)) 1838 (pps-up-pending-set! st (cdr up)) 1839 (car up)) 1840 (else (iter-next (pps-lex-iter st)))))) 1841 1842 (define (%pp-peek-upstream st) 1843 (let ((up (pps-up-pending st))) 1844 (cond 1845 ((not (null? up)) (car up)) 1846 (else (iter-peek (pps-lex-iter st)))))) 1847 1848 ;; Push toks to the front of upstream so (car toks) is yielded next. 1849 (define (%pp-unshift-upstream! st toks) 1850 (pps-up-pending-set! st (append toks (pps-up-pending st)))) 1851 1852 ;; Collect tokens up to (not including) NL or EOF. NL is consumed; EOF 1853 ;; is unshifted back so the main loop sees it. 1854 (define (%pp-collect-line-stream st) 1855 (let loop ((acc '())) 1856 (let ((t (%pp-pull-upstream st))) 1857 (cond 1858 ((%pp-eof? t) 1859 (%pp-unshift-upstream! st (list t)) 1860 (reverse acc)) 1861 ((%pp-nl? t) (reverse acc)) 1862 (else (loop (cons t acc))))))) 1863 1864 ;; Streaming arg collection for fn-like macro calls. Position is just 1865 ;; AFTER the opening `(`. Returns the list of arg-tokenlists. 1866 (define (%pp-collect-args-stream st call-loc) 1867 (let loop ((depth 0) (cur '()) (args '())) 1868 (let ((t (%pp-pull-upstream st))) 1869 (cond 1870 ((%pp-eof? t) 1871 (die call-loc "macro call: unterminated argument list")) 1872 ((and (= depth 0) (%pp-punct? t 'rparen)) 1873 (cond 1874 ;; Empty parens count as one empty argument; bind-args 1875 ;; degenerates this back to "no args" for 0-param macros. 1876 ((and (null? args) (null? cur)) (list '())) 1877 (else (reverse (cons (reverse cur) args))))) 1878 ((and (= depth 0) (%pp-punct? t 'comma)) 1879 (loop 0 '() (cons (reverse cur) args))) 1880 ((%pp-punct? t 'lparen) 1881 (loop (+ depth 1) (cons t cur) args)) 1882 ((%pp-punct? t 'rparen) 1883 (loop (- depth 1) (cons t cur) args)) 1884 (else (loop depth (cons t cur) args)))))) 1885 1886 ;; Single dispatch step. Returns one post-pp tok (skipping NLs, 1887 ;; processing directives, expanding macros). Does NOT apply STR-fusion 1888 ;; — that happens one layer up in %pp-iter-pull, otherwise the 1889 ;; recursive lookahead during fusion would itself fuse further STRs 1890 ;; and drag tokens past the run into out-buf. 1891 (define (%pp-dispatch-step st) 1892 (let ((t (%pp-pull-upstream st))) 1893 (cond 1894 ((%pp-eof? t) 1895 (cond ((not (null? (pps-cond-stack st))) 1896 (die (tok-loc t) "unterminated #if/#ifdef/#ifndef")) 1897 (else t))) 1898 ((%pp-nl? t) (%pp-dispatch-step st)) 1899 ((%pp-hash? t) 1900 (let ((line (%pp-collect-line-stream st))) 1901 (%pp-dispatch-directive t line st #f) 1902 (%pp-dispatch-step st))) 1903 ((not (%pp-active? st)) 1904 (%pp-dispatch-step st)) 1905 ((%pp-ident? t) 1906 (let ((name (tok-value t))) 1907 (cond 1908 ((%pp-bv-mem? name (tok-hide t)) 1909 (%pp-relocate t st)) 1910 ((%pp-builtin? name) 1911 (let ((toks (%pp-expand-builtin name (tok-loc t) st))) 1912 (%pp-unshift-upstream! st toks) 1913 (%pp-dispatch-step st))) 1914 (else 1915 (let ((m (alist-ref name (pps-macros st)))) 1916 (cond 1917 ((not m) (%pp-relocate t st)) 1918 ((eq? (macro-kind m) 'obj) 1919 (let ((body (%pp-prepare-body (macro-body m) 1920 (cons name (tok-hide t)) 1921 (tok-loc t)))) 1922 (%pp-unshift-upstream! st body) 1923 (%pp-dispatch-step st))) 1924 (else 1925 ;; fn-like or fn-vararg: peek upstream for `(`. If 1926 ;; not present, pass IDENT through unchanged (no 1927 ;; consumption); the next iter call will process the 1928 ;; following tok normally. 1929 (let ((next (%pp-peek-upstream st))) 1930 (cond 1931 ((not (%pp-punct? next 'lparen)) 1932 (%pp-relocate t st)) 1933 (else 1934 (%pp-pull-upstream st) ; consume `(` 1935 (let* ((args (%pp-collect-args-stream st (tok-loc t))) 1936 (params (macro-params m)) 1937 (variadic? (eq? (macro-kind m) 'fn-vararg)) 1938 (env (%pp-bind-args params args variadic? (tok-loc t))) 1939 (sub (%pp-substitute (macro-body m) env (tok-loc t) st)) 1940 (body (%pp-prepare-body sub 1941 (cons name (tok-hide t)) 1942 (tok-loc t)))) 1943 (%pp-unshift-upstream! st body) 1944 (%pp-dispatch-step st)))))))))))) 1945 (else (%pp-relocate t st))))) 1946 1947 ;; Translation phase 6 (peek-and-fuse). If `cur` is STR, look at the 1948 ;; next post-pp tok; if it's STR, fuse and repeat. Anything else gets 1949 ;; stashed in out-buf for the next iter-next call. Lookahead goes 1950 ;; through %pp-dispatch-step (no fusion), so a non-STR neighbor 1951 ;; correctly terminates the run. 1952 (define (%pp-maybe-fuse-str st cur) 1953 (cond 1954 ((not (eq? (tok-kind cur) 'STR)) cur) 1955 (else 1956 (let loop ((cur cur)) 1957 (let ((next (%pp-dispatch-step st))) 1958 (cond 1959 ((eq? (tok-kind next) 'STR) 1960 (loop (%tok 'STR 1961 (bytevector-append (tok-value cur) (tok-value next)) 1962 (tok-loc cur) 1963 (tok-hide cur)))) 1964 (else 1965 (pps-out-buf-set! st (cons next (pps-out-buf st))) 1966 cur))))))) 1967 1968 ;; Drain a pp-iter into a list ending in EOF, for the cc-pp test 1969 ;; runner. The input token list becomes the upstream via make-list-iter. 1970 ;; Production callers chain make-pp-iter directly over a make-lex-iter. 1971 (define (pp-expand toks initial-defines) 1972 (iter->list (make-pp-iter (make-list-iter toks) initial-defines))) 1973 1974 ;; --- directive dispatch --- 1975 ;; pmatch-based on the directive name bv. bv literals match by equal?. 1976 ;; Directive name can arrive as IDENT (most cases) or KW (`if` and `else` 1977 ;; are C keywords promoted by lex; their KW symbol values map back to bv 1978 ;; via symbol->string). 1979 (define (%pp-directive-name t) 1980 (cond ((eq? (tok-kind t) 'IDENT) (tok-value t)) 1981 ((eq? (tok-kind t) 'KW) (symbol->string (tok-value t))) 1982 (else #f))) 1983 1984 (define (%pp-dispatch-directive hash-tok line state out) 1985 (let ((line (%pp-skip-ws line))) 1986 (cond 1987 ((null? line) #t) ; bare `#` line — null directive 1988 ((%pp-directive-name (car line)) 1989 (let ((name (%pp-directive-name (car line))) 1990 (rest (cdr line)) 1991 (loc (tok-loc (car line)))) 1992 (pmatch name 1993 ("define" (cond ((%pp-active? state) (%pp-do-define rest state)) (else #t))) 1994 ("undef" (cond ((%pp-active? state) (%pp-do-undef rest state)) (else #t))) 1995 ("if" (%pp-do-if rest state)) 1996 ("ifdef" (%pp-do-ifdef rest state)) 1997 ("ifndef" (%pp-do-ifndef rest state)) 1998 ("elif" (%pp-do-elif rest state)) 1999 ("else" (%pp-do-else rest state)) 2000 ("endif" (%pp-do-endif rest state)) 2001 ("error" (cond ((%pp-active? state) 2002 (%pp-do-error (cons (car line) rest) state)) 2003 (else #t))) 2004 ("line" (cond ((%pp-active? state) 2005 ;; Macro-expand the operands BEFORE 2006 ;; processing (`#line MACRO`). Pre-expansion 2007 ;; we capture the directive's source line so 2008 ;; the line-delta math doesn't anchor on a 2009 ;; macro definition site. 2010 (let ((here (cond 2011 ((null? rest) 2012 (loc-line (tok-loc hash-tok))) 2013 (else 2014 (loc-line (tok-loc (car rest))))))) 2015 (%pp-do-line (%pp-expand-line rest state) 2016 state here))) 2017 (else #t))) 2018 ("pragma" (cond ((%pp-active? state) (%pp-do-pragma rest state)) (else #t))) 2019 ("include" (cond ((%pp-active? state) (%pp-do-include rest state)) (else #t))) 2020 (else (die loc "unknown preprocessor directive" name))))) 2021 (else 2022 (die (tok-loc (car line)) "expected directive name after '#'" 2023 (tok-kind (car line))))))) 2024 2025 ;; --- #define --- 2026 ;; function-like vs object-like is decided by an immediately-adjacent `(`. 2027 ;; "Adjacent" = column of `(` equals column of name + length of name. 2028 (define (%pp-do-define line state) 2029 (cond 2030 ((null? line) (die #f "#define requires a macro name")) 2031 ((not (%pp-ident? (car line))) 2032 (die (tok-loc (car line)) "#define: expected identifier")) 2033 (else 2034 (let* ((nt (car line)) (name (tok-value nt)) (rest (cdr line))) 2035 (cond 2036 ((and (not (null? rest)) 2037 (%pp-punct? (car rest) 'lparen) 2038 (= (loc-col (tok-loc (car rest))) 2039 (+ (loc-col (tok-loc nt)) 2040 (bytevector-length name)))) 2041 (%pp-define-fn name (cdr rest) (tok-loc nt) state)) 2042 (else 2043 (let ((m (%macro 'obj '() rest))) 2044 (pps-macros-set! state (alist-set name m (pps-macros state)))))))))) 2045 2046 (define (%pp-define-fn name post-lparen nloc state) 2047 (let loop ((toks post-lparen) (params '()) (variadic? #f)) 2048 (cond 2049 ((null? toks) (die nloc "#define: unterminated parameter list")) 2050 ((%pp-punct? (car toks) 'rparen) 2051 (let* ((body (cdr toks)) 2052 (kind (if variadic? 'fn-vararg 'fn)) 2053 (m (%macro kind (reverse params) body))) 2054 (pps-macros-set! state (alist-set name m (pps-macros state))))) 2055 ((%pp-punct? (car toks) 'ellipsis) 2056 (let ((rest (cdr toks))) 2057 (cond 2058 ((null? rest) (die (tok-loc (car toks)) "#define: '...' must precede ')'")) 2059 ((%pp-punct? (car rest) 'rparen) (loop rest params #t)) 2060 (else (die (tok-loc (car rest)) "#define: garbage after '...'"))))) 2061 ((null? params) 2062 (cond 2063 ((%pp-ident? (car toks)) 2064 (loop (cdr toks) (cons (tok-value (car toks)) params) #f)) 2065 (else (die (tok-loc (car toks)) "#define: expected parameter name")))) 2066 (else 2067 (cond 2068 ((%pp-punct? (car toks) 'comma) 2069 (let ((after (cdr toks))) 2070 (cond 2071 ((null? after) (die (tok-loc (car toks)) "#define: trailing ','")) 2072 ((%pp-punct? (car after) 'ellipsis) 2073 (let ((aa (cdr after))) 2074 (cond 2075 ((and (not (null? aa)) (%pp-punct? (car aa) 'rparen)) 2076 (loop aa params #t)) 2077 (else (die (tok-loc (car after)) 2078 "#define: '...' must precede ')'"))))) 2079 ((%pp-ident? (car after)) 2080 (loop (cdr after) (cons (tok-value (car after)) params) #f)) 2081 (else 2082 (die (tok-loc (car after)) 2083 "#define: expected parameter name after ','"))))) 2084 (else (die (tok-loc (car toks)) 2085 "#define: expected ',' or ')' in parameter list"))))))) 2086 2087 ;; --- #undef --- 2088 (define (%pp-do-undef line state) 2089 (cond 2090 ((null? line) (die #f "#undef requires a macro name")) 2091 ((not (%pp-ident? (car line))) 2092 (die (tok-loc (car line)) "#undef: expected identifier")) 2093 (else 2094 (pps-macros-set! state 2095 (%pp-alist-drop (tok-value (car line)) (pps-macros state)))))) 2096 2097 (define (%pp-alist-drop key al) 2098 (cond ((null? al) '()) 2099 ((bv= (car (car al)) key) (cdr al)) 2100 (else (cons (car al) (%pp-alist-drop key (cdr al)))))) 2101 2102 ;; --- #if / #ifdef / #ifndef / #elif / #else / #endif --- 2103 ;; cond-stack frame: (active? taken? else?). active? gates the body 2104 ;; until the next #elif/#else/#endif; taken? records whether ANY arm 2105 ;; (the original #if branch or any #elif) has matched, so later arms 2106 ;; stay inactive; else? records that we have already passed an #else 2107 ;; in this frame, so a subsequent #elif/#else is rejected. 2108 (define (%pp-frame a? t? e?) (list a? t? e?)) 2109 (define (%pp-frame-active? f) (car f)) 2110 (define (%pp-frame-taken? f) (car (cdr f))) 2111 (define (%pp-frame-else? f) (car (cdr (cdr f)))) 2112 2113 (define (%pp-do-if line state) 2114 (cond 2115 ((not (%pp-active? state)) 2116 (pps-cond-stack-set! state (cons (%pp-frame #f #f #f) (pps-cond-stack state)))) 2117 (else 2118 (let* ((v (pp-eval-cexpr line state)) 2119 (a? (not (= v 0)))) 2120 (pps-cond-stack-set! state (cons (%pp-frame a? a? #f) (pps-cond-stack state))))))) 2121 2122 (define (%pp-do-ifdef line state) 2123 (cond 2124 ((not (%pp-active? state)) 2125 (pps-cond-stack-set! state (cons (%pp-frame #f #f #f) (pps-cond-stack state)))) 2126 (else 2127 (let ((d? (%pp-defined? (%pp-name-of-single line) state))) 2128 (pps-cond-stack-set! state 2129 (cons (%pp-frame d? d? #f) (pps-cond-stack state))))))) 2130 2131 (define (%pp-do-ifndef line state) 2132 (cond 2133 ((not (%pp-active? state)) 2134 (pps-cond-stack-set! state (cons (%pp-frame #f #f #f) (pps-cond-stack state)))) 2135 (else 2136 (let ((a? (not (%pp-defined? (%pp-name-of-single line) state)))) 2137 (pps-cond-stack-set! state 2138 (cons (%pp-frame a? a? #f) (pps-cond-stack state))))))) 2139 2140 (define (%pp-name-of-single line) 2141 (cond 2142 ((null? line) (die #f "#ifdef/#ifndef: missing identifier")) 2143 ((not (%pp-ident? (car line))) 2144 (die (tok-loc (car line)) "#ifdef/#ifndef: expected identifier")) 2145 (else (tok-value (car line))))) 2146 2147 (define (%pp-defined? name state) 2148 (or (alist-ref name (pps-macros state)) 2149 (%pp-builtin? name) 2150 #f)) 2151 2152 (define (%pp-do-elif line state) 2153 (let ((cs (pps-cond-stack state))) 2154 (cond 2155 ((null? cs) (die #f "#elif outside #if")) 2156 (else 2157 (let* ((top (car cs)) (rest (cdr cs)) 2158 (taken? (%pp-frame-taken? top)) 2159 (else? (%pp-frame-else? top)) 2160 (par? (%pp-parent-active? state))) 2161 (cond 2162 (else? (die #f "#elif after #else")) 2163 ((or (not par?) taken?) 2164 (pps-cond-stack-set! state (cons (%pp-frame #f taken? #f) rest))) 2165 (else 2166 (let* ((v (pp-eval-cexpr line state)) 2167 (a? (not (= v 0)))) 2168 (pps-cond-stack-set! state 2169 (cons (%pp-frame a? (or a? taken?) #f) rest)))))))))) 2170 2171 (define (%pp-do-else line state) 2172 (let ((cs (pps-cond-stack state))) 2173 (cond 2174 ((null? cs) (die #f "#else outside #if")) 2175 (else 2176 (let* ((top (car cs)) (rest (cdr cs)) 2177 (taken? (%pp-frame-taken? top)) 2178 (else? (%pp-frame-else? top)) 2179 (par? (%pp-parent-active? state))) 2180 (cond 2181 (else? (die #f "#else after #else")) 2182 ((not par?) 2183 (pps-cond-stack-set! state (cons (%pp-frame #f taken? #t) rest))) 2184 (taken? 2185 (pps-cond-stack-set! state (cons (%pp-frame #f #t #t) rest))) 2186 (else 2187 (pps-cond-stack-set! state (cons (%pp-frame #t #t #t) rest))))))))) 2188 2189 (define (%pp-do-endif line state) 2190 (let ((cs (pps-cond-stack state))) 2191 (cond ((null? cs) (die #f "#endif outside #if")) 2192 (else (pps-cond-stack-set! state (cdr cs)))))) 2193 2194 ;; --- #error --- 2195 ;; line[0] is the directive name "error"; the rest is the user message. 2196 (define (%pp-do-error line state) 2197 (let* ((msg-toks (if (null? line) '() (cdr line))) 2198 (loc (if (null? line) #f (tok-loc (car line)))) 2199 (msg (%pp-toks->display msg-toks))) 2200 (die loc "#error" msg))) 2201 2202 ;; Per C11 §6.10.3.2 ¶2: whitespace between argument tokens becomes a 2203 ;; single space; absence of whitespace must NOT introduce one. We 2204 ;; approximate "had whitespace" by comparing locations: a space goes 2205 ;; in iff the next token does not abut the previous one (different 2206 ;; line, or column gap larger than the prev spelling length). 2207 (define (%pp-toks->display toks) 2208 (let loop ((toks toks) (prev #f) (prev-bv #f) (acc '())) 2209 (cond 2210 ((null? toks) (bv-cat (reverse acc))) 2211 (else 2212 (let* ((t (car toks)) (p (%pp-tok->bv t)) 2213 (sep? (cond 2214 ((not prev) #f) 2215 ((or (not (tok-loc prev)) (not (tok-loc t))) #t) 2216 ((not (= (loc-line (tok-loc prev)) 2217 (loc-line (tok-loc t)))) #t) 2218 (else 2219 (not (= (loc-col (tok-loc t)) 2220 (+ (loc-col (tok-loc prev)) 2221 (bytevector-length prev-bv)))))))) 2222 (loop (cdr toks) t p 2223 (if sep? (cons p (cons " " acc)) (cons p acc)))))))) 2224 2225 ;; Reverse-map punctuator symbol -> source spelling. %punct-alist may 2226 ;; map several spellings to the same symbol (e.g. both "[" and "<:" 2227 ;; resolve to 'lbrack); the 1-byte canonical forms appear last in the 2228 ;; source list, so a last-wins fold yields "[" rather than the digraph. 2229 (define %pp-punct-spell 2230 (let loop ((al %punct-alist) (acc '())) 2231 (cond ((null? al) acc) 2232 (else (loop (cdr al) 2233 (alist-set (cdr (car al)) (car (car al)) acc)))))) 2234 2235 (define (%pp-punct-spelling sym) 2236 (or (alist-ref/eq sym %pp-punct-spell) (symbol->string sym))) 2237 2238 (define (%pp-tok->bv t) 2239 (let ((k (tok-kind t)) (v (tok-value t))) 2240 (cond 2241 ((eq? k 'IDENT) v) 2242 ((eq? k 'INT) (fixnum->bv v 10)) 2243 ((eq? k 'STR) (%pp-quote-bytes v 34)) 2244 ((eq? k 'CHAR) (%pp-quote-bytes (bv-of-byte v) 39)) 2245 ((eq? k 'KW) (symbol->string v)) 2246 ((eq? k 'PUNCT) (%pp-punct-spelling v)) 2247 (else "?")))) 2248 2249 ;; Reconstruct a string/char literal source spelling from cooked content. 2250 ;; Per C11 6.10.3.2: stringize must reproduce the source spelling of 2251 ;; STR/CHAR constants — every `"` and `\` is prefixed with `\`, and 2252 ;; the common control-character escapes are restored from their cooked 2253 ;; bytes. `delim` is 34 for STR, 39 for CHAR. 2254 (define (%pp-quote-bytes bv delim) 2255 (let* ((n (bytevector-length bv)) 2256 (delim-bv (bv-of-byte delim))) 2257 (let loop ((i 0) (acc (list delim-bv))) 2258 (cond 2259 ((= i n) (bv-cat (reverse (cons delim-bv acc)))) 2260 (else 2261 (let ((b (bytevector-u8-ref bv i))) 2262 (cond 2263 ((or (= b delim) (= b 92)) 2264 (loop (+ i 1) (cons (bv-of-byte b) (cons "\\" acc)))) 2265 ((= b 10) (loop (+ i 1) (cons "\\n" acc))) 2266 ((= b 9) (loop (+ i 1) (cons "\\t" acc))) 2267 ((= b 13) (loop (+ i 1) (cons "\\r" acc))) 2268 (else 2269 (loop (+ i 1) (cons (bv-of-byte b) acc)))))))))) 2270 2271 ;; --- #line / #pragma / #include --- 2272 ;; Approximate #line: subsequent toks have line = (orig-line + delta), 2273 ;; where delta = (N - here-line - 1). Good enough for most cases. 2274 (define (%pp-do-line line state here) 2275 (cond 2276 ((null? line) (die #f "#line requires a line number")) 2277 ((not (%pp-int? (car line))) 2278 (die (tok-loc (car line)) "#line: expected integer")) 2279 (else 2280 (let* ((nt (car line)) (n (tok-value nt)) 2281 (rest (cdr line))) 2282 (pps-line-delta-set! state (- n here 1)) 2283 (cond 2284 ((null? rest) #t) 2285 ((eq? (tok-kind (car rest)) 'STR) 2286 (pps-cur-file-set! state (tok-value (car rest)))) 2287 (else (die (tok-loc (car rest)) 2288 "#line: expected string after number"))))))) 2289 2290 (define (%pp-do-pragma line state) #t) 2291 2292 (define (%pp-do-include line state) 2293 (die (if (null? line) #f (tok-loc (car line))) 2294 "#include: file inclusion is handled upstream by pre-flatten")) 2295 2296 ;; --- macro expansion engine --- 2297 ;; Walk toks; for each IDENT, look up in macros / builtins. Hide-set: 2298 ;; if the name is in t.hide, don't expand. Otherwise expand and rescan 2299 ;; the produced body, with hide += {name}. 2300 (define (%pp-emit-expanded toks state out) 2301 (let loop ((toks toks)) 2302 (cond 2303 ((null? toks) #t) 2304 (else 2305 (let* ((t (car toks)) (rest (cdr toks))) 2306 (cond 2307 ((not (%pp-ident? t)) 2308 (buf-list-push! out (%pp-relocate t state)) 2309 (loop rest)) 2310 (else 2311 (let ((name (tok-value t))) 2312 (cond 2313 ((%pp-bv-mem? name (tok-hide t)) 2314 (buf-list-push! out (%pp-relocate t state)) 2315 (loop rest)) 2316 ((%pp-builtin? name) 2317 (buf-list-push-many! out 2318 (%pp-expand-builtin name (tok-loc t) state)) 2319 (loop rest)) 2320 (else 2321 (let ((m (alist-ref name (pps-macros state)))) 2322 (cond 2323 ((not m) 2324 (buf-list-push! out (%pp-relocate t state)) 2325 (loop rest)) 2326 (else 2327 (%pp-apply-macro t m rest state out loop)))))))))))))) 2328 2329 (define (%pp-apply-macro t m rest state out cont) 2330 (let ((kind (macro-kind m)) (name (tok-value t))) 2331 (cond 2332 ((eq? kind 'obj) 2333 (let ((bodies (%pp-prepare-body (macro-body m) 2334 (cons name (tok-hide t)) 2335 (tok-loc t)))) 2336 (%pp-emit-expanded bodies state out) 2337 (cont rest))) 2338 (else 2339 (let ((after (%pp-skip-ws rest))) 2340 (cond 2341 ((or (null? after) (not (%pp-punct? (car after) 'lparen))) 2342 (buf-list-push! out (%pp-relocate t state)) 2343 (cont rest)) 2344 (else 2345 (let* ((ar (%pp-collect-args (cdr after) (tok-loc t))) 2346 (args (car ar)) (rest2 (cdr ar)) 2347 (params (macro-params m)) 2348 (variadic? (eq? kind 'fn-vararg)) 2349 (env (%pp-bind-args params args variadic? (tok-loc t))) 2350 (sub (%pp-substitute (macro-body m) env (tok-loc t) state)) 2351 (bodies (%pp-prepare-body sub 2352 (cons name (tok-hide t)) 2353 (tok-loc t)))) 2354 (%pp-emit-expanded bodies state out) 2355 (cont rest2))))))))) 2356 2357 ;; Stamp built-in marker tokens (__LINE__ / __FILE__) inside the body 2358 ;; with the macro-invocation location, so they report the call site 2359 ;; per C11 §6.10.8. Other body tokens keep their #define-time loc so 2360 ;; diagnostics still point at the macro body. Hide-set is updated 2361 ;; with the macro name on every token. 2362 (define (%pp-prepare-body body extra-hide . call-loc-opt) 2363 (let ((call-loc (cond ((null? call-loc-opt) #f) 2364 (else (car call-loc-opt))))) 2365 (map (lambda (t) 2366 (let ((hidden (%pp-with-hide t (%pp-bv-union extra-hide 2367 (tok-hide t))))) 2368 (cond 2369 ((and call-loc (%pp-ident? hidden) 2370 (or (bv= (tok-value hidden) %pp-bv-LINE) 2371 (bv= (tok-value hidden) %pp-bv-FILE))) 2372 (%pp-with-loc hidden call-loc)) 2373 (else hidden)))) 2374 body))) 2375 2376 ;; Collect comma-separated args. `toks` starts AFTER `(`. Returns 2377 ;; (args . rest), where args is a list of token-lists. 2378 (define (%pp-collect-args toks call-loc) 2379 (let loop ((toks toks) (depth 0) (cur '()) (args '())) 2380 (cond 2381 ((null? toks) (die call-loc "macro call: unterminated argument list")) 2382 ((%pp-eof? (car toks)) 2383 (die call-loc "macro call: unterminated argument list")) 2384 ((and (= depth 0) (%pp-punct? (car toks) 'rparen)) 2385 (let ((args* 2386 (cond 2387 ;; Empty parens count as one empty argument; bind-args 2388 ;; degenerates this back to "no args" for 0-param macros. 2389 ((and (null? args) (null? cur)) (list '())) 2390 (else (reverse (cons (reverse cur) args)))))) 2391 (cons args* (cdr toks)))) 2392 ((and (= depth 0) (%pp-punct? (car toks) 'comma)) 2393 (loop (cdr toks) 0 '() (cons (reverse cur) args))) 2394 ((%pp-punct? (car toks) 'lparen) 2395 (loop (cdr toks) (+ depth 1) (cons (car toks) cur) args)) 2396 ((%pp-punct? (car toks) 'rparen) 2397 (loop (cdr toks) (- depth 1) (cons (car toks) cur) args)) 2398 (else 2399 (loop (cdr toks) depth (cons (car toks) cur) args))))) 2400 2401 ;; Bind formals → token-lists (alist by bv key). Variadic gathers 2402 ;; trailing actuals into __VA_ARGS__, joined with synthetic commas. 2403 (define (%pp-bind-args params args variadic? call-loc) 2404 (let* ((np (length params)) (na (length args))) 2405 (cond 2406 (variadic? 2407 (cond 2408 ((< na np) (die call-loc "macro call: too few arguments")) 2409 (else 2410 (let loop ((ps params) (as args) (acc '())) 2411 (cond 2412 ((null? ps) 2413 (alist-set %pp-bv-VA-ARGS (%pp-join-comma as) acc)) 2414 (else 2415 (loop (cdr ps) (cdr as) 2416 (alist-set (car ps) (car as) acc)))))))) 2417 (else 2418 (cond 2419 ((and (= np 0) (= na 1) (null? (car args))) '()) 2420 ((not (= np na)) (die call-loc "macro call: argument count mismatch")) 2421 (else 2422 (let loop ((ps params) (as args) (acc '())) 2423 (cond 2424 ((null? ps) acc) 2425 (else (loop (cdr ps) (cdr as) 2426 (alist-set (car ps) (car as) acc))))))))))) 2427 2428 (define (%pp-join-comma argss) 2429 (cond 2430 ((null? argss) '()) 2431 ((null? (cdr argss)) (car argss)) 2432 (else 2433 (append (car argss) 2434 (cons (%pp-synth-comma) (%pp-join-comma (cdr argss))))))) 2435 2436 (define (%pp-synth-comma) 2437 (%tok 'PUNCT 'comma (%loc "<expand>" 0 0) '())) 2438 2439 ;; Body substitution: walk body; replace param IDENTs with arg toks, 2440 ;; handle `#param` (stringize) and `a##b` (paste). Per C11 §6.10.3.1, 2441 ;; arguments are macro-expanded BEFORE substitution into the body 2442 ;; EXCEPT when the parameter is the operand of `#` or `##` (in which 2443 ;; case the raw token list is used). Without prescan, recursive uses 2444 ;; like M(M(1)) for `#define M(x) ...x...` fail to expand the inner 2445 ;; M during rescan because the outer M is in every substituted 2446 ;; token's hide-set. 2447 (define (%pp-substitute body env call-loc state) 2448 (let loop ((body body) (out '())) 2449 (cond 2450 ((null? body) (reverse out)) 2451 (else 2452 (let ((t (car body)) (rest (cdr body))) 2453 (cond 2454 ((%pp-punct? t 'hash) 2455 (cond 2456 ((or (null? rest) (not (%pp-ident? (car rest)))) 2457 (die (tok-loc t) "stringize: '#' must precede a parameter name")) 2458 (else 2459 (let* ((id (car rest)) (pn (tok-value id)) 2460 (pt (alist-ref pn env))) 2461 (cond 2462 ((not pt) 2463 (die (tok-loc id) "stringize: '#' operand must be a parameter" pn)) 2464 (else 2465 (let ((s (%tok 'STR (%pp-toks->display pt) (tok-loc t) '()))) 2466 (loop (cdr rest) (cons s out))))))))) 2467 ((%pp-punct? t 'paste) 2468 (cond 2469 ((null? out) (die (tok-loc t) "paste: '##' cannot start a body")) 2470 ((null? rest) (die (tok-loc t) "paste: '##' cannot end a body")) 2471 (else 2472 (let* ((lhs (car out)) 2473 (rt (car rest)) 2474 (rhs-list 2475 (cond 2476 ((and (%pp-ident? rt) (alist-ref (tok-value rt) env)) 2477 (alist-ref (tok-value rt) env)) 2478 (else (list rt))))) 2479 (cond 2480 ((null? rhs-list) (loop (cdr rest) out)) 2481 (else 2482 (let* ((p (%pp-paste-tokens lhs (car rhs-list))) 2483 (after (append (cdr rhs-list) (cdr rest)))) 2484 (loop after (cons p (cdr out)))))))))) 2485 ((%pp-ident? t) 2486 (let* ((pn (tok-value t)) (pt (alist-ref pn env))) 2487 (cond 2488 ((not pt) (loop rest (cons t out))) 2489 ((and (not (null? rest)) (%pp-punct? (car rest) 'paste)) 2490 ;; Operand of ##: use raw arg tokens (no prescan). 2491 (cond 2492 ((null? pt) (loop (cdr rest) out)) 2493 (else (loop rest (append (reverse pt) out))))) 2494 (else 2495 ;; Normal use: prescan (fully macro-expand the arg) 2496 ;; before substitution, per C11 §6.10.3.1. 2497 (let ((exp (%pp-expand-line pt state))) 2498 (loop rest (append (reverse exp) out))))))) 2499 (else (loop rest (cons t out))))))))) 2500 2501 ;; Paste two tokens textually; reparse the result. 2502 (define (%pp-paste-tokens lhs rhs) 2503 (let ((lk (tok-kind lhs)) (rk (tok-kind rhs))) 2504 (cond 2505 ((and (eq? lk 'IDENT) (eq? rk 'IDENT)) 2506 (%tok 'IDENT (bytevector-append (tok-value lhs) (tok-value rhs)) 2507 (tok-loc lhs) (%pp-bv-union (tok-hide lhs) (tok-hide rhs)))) 2508 ((and (eq? lk 'IDENT) (eq? rk 'INT)) 2509 (%tok 'IDENT (bytevector-append (tok-value lhs) (fixnum->bv (tok-value rhs) 10)) 2510 (tok-loc lhs) (%pp-bv-union (tok-hide lhs) (tok-hide rhs)))) 2511 ((and (eq? lk 'INT) (eq? rk 'INT)) 2512 (let ((s (bytevector-append (fixnum->bv (tok-value lhs) 10) 2513 (fixnum->bv (tok-value rhs) 10)))) 2514 (let-values (((ok? n) (bv->fixnum s 10))) 2515 (cond 2516 ((not ok?) (die (tok-loc lhs) "paste: cannot reparse as integer" s)) 2517 (else (%tok 'INT n (tok-loc lhs) 2518 (%pp-bv-union (tok-hide lhs) (tok-hide rhs)))))))) 2519 (else (die (tok-loc lhs) "paste: unsupported token kinds" lk rk))))) 2520 2521 (define (%pp-relocate t state) 2522 (cond 2523 ((and (= (pps-line-delta state) 0) (not (pps-cur-file state))) t) 2524 (else 2525 (let* ((l (tok-loc t)) 2526 (f (or (pps-cur-file state) (loc-file l))) 2527 (ln (+ (loc-line l) (pps-line-delta state))) 2528 (c (loc-col l))) 2529 (%pp-with-loc t (%loc f ln c)))))) 2530 2531 ;; --- pp-eval-cexpr: #if expression evaluator --- 2532 ;; Steps: resolve `defined NAME`, macro-expand the rest, treat any 2533 ;; remaining IDENT as 0, then delegate to parse-const-int via a minimal 2534 ;; pstate (empty scope, no cg). sizeof(type) works as an extension; 2535 ;; sizeof(expr) dies with a clear message. 2536 ;; 2537 ;; Arena boundary (test 093 A→B→C pattern). Everything inside the 2538 ;; call-with-heap-rewind thunk is scratch: `s1`/`s2`/`s3` plus the 2539 ;; parse-const-* (value . ctype) cells at every level. parse-const-int 2540 ;; returns the integer via `car`, which is a fixnum immediate and 2541 ;; survives the rewind. The error path goes through `die` (sys-exits), 2542 ;; so no rewind there. 2543 (define (%pp-make-const-ps toks) 2544 (%pstate (make-list-iter toks) 2545 (%world (list '()) (list '()) '() '()) 2546 '() #f #f)) 2547 2548 (define (pp-eval-cexpr toks outer) 2549 ;; `outer` is the live %pp-state. We mint a fresh state for #if 2550 ;; evaluation but inherit cur-file and line-delta so __FILE__ / 2551 ;; __LINE__ inside the expression reflect any preceding #line. 2552 (call-with-heap-rewind 2553 (lambda () 2554 (let* ((state (%pp-state (pps-macros outer) '() 2555 (pps-cur-file outer) 2556 (pps-line-delta outer) 2557 #f '() '())) 2558 (s1 (%pp-resolve-defined toks state)) 2559 (s2 (%pp-expand-line s1 state)) 2560 (s3 (%pp-idents-as-zero s2)) 2561 (ps (%pp-make-const-ps s3)) 2562 (val (parse-const-int ps)) 2563 (t (peek ps))) 2564 (cond 2565 ((eq? (tok-kind t) 'EOF) val) 2566 (else (die (tok-loc t) "#if: garbage at end of expression" 2567 (tok-kind t)))))))) 2568 2569 (define (%pp-expand-line toks state) 2570 (let ((out (make-buf-list))) 2571 (%pp-emit-expanded toks state out) 2572 (buf-list-flush out))) 2573 2574 (define (%pp-resolve-defined toks state) 2575 (let loop ((toks toks) (acc '())) 2576 (cond 2577 ((null? toks) (reverse acc)) 2578 ((%pp-ident-name? (car toks) %pp-bv-defined) 2579 (let ((rest (cdr toks))) 2580 (cond 2581 ((null? rest) (die (tok-loc (car toks)) "defined: missing operand")) 2582 ((%pp-ident? (car rest)) 2583 (let ((v (if (%pp-defined? (tok-value (car rest)) state) 1 0))) 2584 (loop (cdr rest) 2585 (cons (%tok 'INT v (tok-loc (car toks)) '()) acc)))) 2586 ((%pp-punct? (car rest) 'lparen) 2587 (let ((after (cdr rest))) 2588 (cond 2589 ((or (null? after) (not (%pp-ident? (car after)))) 2590 (die (tok-loc (car toks)) "defined: expected identifier")) 2591 (else 2592 (let ((aa (cdr after))) 2593 (cond 2594 ((or (null? aa) (not (%pp-punct? (car aa) 'rparen))) 2595 (die (tok-loc (car toks)) "defined: expected ')'")) 2596 (else 2597 (let ((v (if (%pp-defined? (tok-value (car after)) state) 1 0))) 2598 (loop (cdr aa) 2599 (cons (%tok 'INT v (tok-loc (car toks)) '()) acc)))))))) )) 2600 (else (die (tok-loc (car rest)) "defined: expected identifier or '('"))))) 2601 (else (loop (cdr toks) (cons (car toks) acc)))))) 2602 2603 (define (%pp-idents-as-zero toks) 2604 (map (lambda (t) 2605 (cond ((%pp-ident? t) (%tok 'INT 0 (tok-loc t) '())) 2606 (else t))) 2607 toks)) 2608 2609 ;; cc/cg.scm — codegen state and emission API. 2610 ;; Conversion split: parse owns promotion etc; cg owns sign extension, 2611 ;; signed/unsigned dispatch, pointer scaling. 2612 ;; 2613 ;; Output uses libp1pp's structured macros (%fn, %ifelse_nez, 2614 ;; %break, %continue) per docs/LIBP1PP.md. Function-local control-flow 2615 ;; labels are hex2++ dotted labels inside %fn's .scope. 2616 ;; 2617 ;; Frame layout: 2618 ;; [sp + 0 .. staging*8) outgoing-arg staging 2619 ;; [sp + staging*8 ..) locals + spilled vstack values 2620 ;; Slot offsets are emitted symbolically as `(+ %<fn>__SO N)` so the 2621 ;; staging size, only known at fn-end, can be filled in via a 0-arg 2622 ;; M1pp macro `<fn>__SO` defined just before the `%fn(...)` block. 2623 2624 (define (%cg-emit-buf cg) 2625 (cond ((cg-in-fn? cg) (cg-fn-buf cg)) (else (cg-text cg)))) 2626 2627 (define (%cg-emit cg bv) 2628 (buf-push! (%cg-emit-buf cg) bv)) 2629 2630 (define (%cg-emit-many cg bvs) 2631 (for-each (lambda (b) (%cg-emit cg b)) bvs)) 2632 2633 (define (%n n) (number->string n 10)) 2634 2635 ;; Per-fn metadata (name, ret-slot, ret-type, switch-case lists, ...) 2636 ;; lives on cg-fn-meta, reset at every cg-fn-begin/v. 2637 ;; 2638 ;; Update is destructive: assq for the key, set-cdr! if found, else 2639 ;; prepend. The functional alist-update path was O(n) per write *with* 2640 ;; an append+reverse rebuild — and cg-fn-begin/v plus every emit in a 2641 ;; function body hammers this. Mutation here is safe: the meta alist 2642 ;; is private to one cg, scratch-only, and discarded at fn-end. 2643 (define (%cg-fn-set! cg key val) 2644 (let* ((meta (cg-fn-meta cg)) 2645 (p (assq key meta))) 2646 (cond (p (set-cdr! p val)) 2647 (else (cg-fn-meta-set! cg (cons (cons key val) meta)))))) 2648 2649 (define (%cg-fn-get cg key) (alist-ref/eq key (cg-fn-meta cg))) 2650 2651 (define (%cg-fresh-label cg prefix) 2652 (let* ((n (cg-label-ctr cg)) 2653 (bv (bytevector-append prefix (%n n)))) 2654 (cg-label-ctr-set! cg (+ n 1)) 2655 bv)) 2656 2657 (define (%cg-fresh-loop-tag cg) (%cg-fresh-label cg "L")) 2658 (define (%cg-fresh-lbl cg) (%cg-fresh-label cg "lbl_")) 2659 2660 (define (%cg-bump-outgoing! cg n) 2661 (if (< (cg-max-outgoing cg) n) (cg-max-outgoing-set! cg n) 0)) 2662 2663 (define (%cg-slot-expr cg logical-off) 2664 (let ((nm (%cg-fn-get cg '%fn-name))) 2665 (bv-cat (list "(+ %" nm "__SO " (%n logical-off) ")")))) 2666 2667 (define (%cg-mangle-global name-bv) 2668 (bytevector-append "cc__" name-bv)) 2669 2670 ;; Label for a sym at the M1 layer. 2671 ;; 2672 ;; C linkage rules drive this directly: 2673 ;; - external linkage (the default at file scope, plus any `extern` 2674 ;; decl): bare ident. Same label name shared between every decl 2675 ;; and the eventual definition, in any order. `extern T memcpy()` 2676 ;; links to libp1pp's `:memcpy`; `int g_acc;` and refs to it 2677 ;; share `:g_acc`. 2678 ;; - internal linkage (`static`): cc__-prefixed. Free to mangle 2679 ;; since `static` is invisible across TUs, and the prefix keeps 2680 ;; it out of the external/runtime namespace. 2681 ;; Block-scope statics already mangle their sym-name to 2682 ;; `<fnname>__<n>` at parse time (see line ~5125); the cc__ prefix 2683 ;; here just nests another layer of namespacing on top of that. 2684 (define (%cg-sym-label sm) 2685 (cond 2686 ((eq? (sym-storage sm) 'static) (%cg-mangle-global (sym-name sm))) 2687 (else (sym-name sm)))) 2688 2689 (define (%cg-reg->bv r) (symbol->string r)) 2690 2691 (define (%cg-emit-li cg reg n) 2692 (%cg-emit-many cg (list "%li(" (%cg-reg->bv reg) ", " (%n n) ")\n"))) 2693 2694 (define (%cg-emit-la cg reg label-bv) 2695 (%cg-emit-many cg (list "%la(" (%cg-reg->bv reg) ", &" label-bv ")\n"))) 2696 2697 (define (%cg-emit-ld-slot cg reg logical-off) 2698 (%cg-emit-many cg (list "%ld(" (%cg-reg->bv reg) ", sp, " 2699 (%cg-slot-expr cg logical-off) ")\n"))) 2700 2701 (define (%cg-emit-st-slot cg reg logical-off) 2702 (%cg-emit-many cg (list "%st(" (%cg-reg->bv reg) ", sp, " 2703 (%cg-slot-expr cg logical-off) ")\n"))) 2704 2705 (define (%cg-emit-ld cg reg base off) 2706 (%cg-emit-many cg (list "%ld(" (%cg-reg->bv reg) ", " 2707 (%cg-reg->bv base) ", " (%n off) ")\n"))) 2708 2709 (define (%cg-emit-st cg reg base off) 2710 (%cg-emit-many cg (list "%st(" (%cg-reg->bv reg) ", " 2711 (%cg-reg->bv base) ", " (%n off) ")\n"))) 2712 2713 ;; Width-aware load/store. Dispatches on ctype-size: 2714 ;; 1: %lb / %sb (LB zero-extends; for signed i8 we sign-extend by 2715 ;; shli/sari 56 to materialize the canonical 64-bit form). 2716 ;; 2/4: byte-decomposed (P1 has only 1-byte and 8-byte memory ops, 2717 ;; and word ops require natural alignment which we can't promise 2718 ;; for struct fields or non-word-aligned local slots). Loads 2719 ;; gather bytes via %lb + shli/or; stores scatter via shri/%sb. 2720 ;; Signed loads (i16/i32) sign-extend via shli/sari to canonical 2721 ;; 64-bit form. 2722 ;; 8 (and any other size): %ld / %st. 2723 ;; Scratch convention: helpers may clobber t1; callers never pass 2724 ;; reg=t1. 2725 2726 ;; Sub-word loads/stores defer byte-decomposition to libp1pp's 2727 ;; %ld_h / %ld_w / %ld_sh / %ld_sw / %st_h / %st_w macros (see 2728 ;; P1/P1pp.P1pp). cc.scm just emits one macro call per access; the 2729 ;; macro arranges the byte gather/scatter and (for signed loads) folds 2730 ;; in the sign-extend. t1 is the conventional scratch. 2731 (define (%cg-emit-ld-sub cg reg base-bv off-bv signed? n-bytes) 2732 (let ((mname (cond ((= n-bytes 2) (if signed? "%ld_sh(" "%ld_h(")) 2733 ((= n-bytes 4) (if signed? "%ld_sw(" "%ld_w(")) 2734 (else (die #f "cg-emit-ld-sub: bad width" n-bytes))))) 2735 (%cg-emit-many cg (list mname (%cg-reg->bv reg) ", " 2736 base-bv ", " off-bv ", t1)\n")))) 2737 2738 (define (%cg-emit-st-sub cg reg base-bv off-bv n-bytes) 2739 (let ((mname (cond ((= n-bytes 2) "%st_h(") 2740 ((= n-bytes 4) "%st_w(") 2741 (else (die #f "cg-emit-st-sub: bad width" n-bytes))))) 2742 (%cg-emit-many cg (list mname (%cg-reg->bv reg) ", " 2743 base-bv ", " off-bv ", t1)\n")))) 2744 2745 ;; "address of frame slot" — defers to libp1pp's %lea_slot, which hides 2746 ;; the backend frame-header offset that %mov(rd, sp) folds in. 2747 (define (%cg-emit-lea-slot cg reg-bv slot-bv) 2748 (%cg-emit-many cg (list "%lea_slot(" reg-bv ", " slot-bv ")\n"))) 2749 2750 ;; sext8/16/32 emitted via libp1pp's %sext<N>(rd, ra). shift-amount is 2751 ;; kept as the parameter for call-site clarity (callers think in bit 2752 ;; widths via the same 56/48/32 amounts they always have). 2753 (define (%cg-emit-sext cg reg shift-amount) 2754 (let ((width (cond ((= shift-amount 56) "8") 2755 ((= shift-amount 48) "16") 2756 ((= shift-amount 32) "32") 2757 (else (die #f "cg-emit-sext: bad shift" shift-amount)))) 2758 (rb (%cg-reg->bv reg))) 2759 (%cg-emit-many cg (list "%sext" width "(" rb ", " rb ")\n")))) 2760 2761 ;; Canonicalize REG against CTYPE's kind: signed narrow types sign-extend, 2762 ;; unsigned narrow types zero-extend, anything else is left alone (the 2763 ;; full 64-bit value is already canonical). Used after operations that 2764 ;; may have left a non-canonical bit pattern in reg — frame-rval load, 2765 ;; narrowing cast, narrow-typed binop result. 2766 (define (%cg-canonicalize cg reg ctype) 2767 (let* ((rb (%cg-reg->bv reg)) 2768 (k (ctype-kind ctype))) 2769 (cond 2770 ((eq? k 'i8) (%cg-emit-sext cg reg 56)) 2771 ((eq? k 'i16) (%cg-emit-sext cg reg 48)) 2772 ((eq? k 'i32) (%cg-emit-sext cg reg 32)) 2773 ((or (eq? k 'u8) (eq? k 'bool)) 2774 (%cg-emit-many cg (list "%zext8(" rb ", " rb ")\n"))) 2775 ((eq? k 'u16) 2776 (%cg-emit-many cg (list "%zext16(" rb ", " rb ")\n"))) 2777 ((eq? k 'u32) 2778 (%cg-emit-many cg (list "%zext32(" rb ", " rb ", t1)\n"))) 2779 (else 0)))) 2780 2781 ;; Width-aware load/store core. BASE-BV / OFF-BV are pre-built (so the 2782 ;; same body serves both the slot variants — base = "sp", off rendered 2783 ;; through %cg-slot-expr — and the typed variants, where base is a 2784 ;; register and off is a raw integer rendered via %n). 1-byte uses 2785 ;; %lb/%sb (with i8 sext); 2- and 4-byte use the sub-word helpers; the 2786 ;; 8-byte fallback emits a plain %ld/%st against the same base/off. 2787 (define (%cg-emit-ld-bv cg reg ctype base-bv off-bv) 2788 (%cg-fp-reject! 'ld ctype) 2789 (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype)) 2790 (rb (%cg-reg->bv reg))) 2791 (cond 2792 ((= sz 1) 2793 (%cg-emit-many cg (list "%lb(" rb ", " base-bv ", " off-bv ")\n")) 2794 (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56)))) 2795 ((= sz 2) (%cg-emit-ld-sub cg reg base-bv off-bv (eq? kind 'i16) 2)) 2796 ((= sz 4) (%cg-emit-ld-sub cg reg base-bv off-bv (eq? kind 'i32) 4)) 2797 (else 2798 (%cg-emit-many cg (list "%ld(" rb ", " base-bv ", " off-bv ")\n")))))) 2799 2800 (define (%cg-emit-st-bv cg reg ctype base-bv off-bv) 2801 (%cg-fp-reject! 'st ctype) 2802 (let ((sz (ctype-size ctype)) 2803 (rb (%cg-reg->bv reg))) 2804 (cond 2805 ((= sz 1) 2806 (%cg-emit-many cg (list "%sb(" rb ", " base-bv ", " off-bv ")\n"))) 2807 ((= sz 2) (%cg-emit-st-sub cg reg base-bv off-bv 2)) 2808 ((= sz 4) (%cg-emit-st-sub cg reg base-bv off-bv 4)) 2809 (else 2810 (%cg-emit-many cg (list "%st(" rb ", " base-bv ", " off-bv ")\n")))))) 2811 2812 (define (%cg-emit-ld-slot-typed cg reg ctype logical-off) 2813 (%cg-emit-ld-bv cg reg ctype "sp" (%cg-slot-expr cg logical-off))) 2814 (define (%cg-emit-st-slot-typed cg reg ctype logical-off) 2815 (%cg-emit-st-bv cg reg ctype "sp" (%cg-slot-expr cg logical-off))) 2816 2817 (define (%cg-emit-ld-typed cg reg ctype base off) 2818 (%cg-emit-ld-bv cg reg ctype (%cg-reg->bv base) (%n off))) 2819 (define (%cg-emit-st-typed cg reg ctype base off) 2820 (%cg-emit-st-bv cg reg ctype (%cg-reg->bv base) (%n off))) 2821 2822 (define (%cg-load-opnd-into cg op reg) 2823 ;; frame lval: load at type width. frame rval is a spilled word 2824 ;; (alloc-slot 8 8 in %cg-spill-reg) — always 8-byte load. 2825 ;; global lval width > 1 byte-gathers must not alias dest with base — 2826 ;; the first %lb would otherwise clobber the address before subsequent 2827 ;; byte loads. Stage the address in t2. 2828 (%cg-fp-reject! 'load (opnd-type op)) 2829 (pmatch op 2830 (($ opnd? (kind imm) (ext ,n)) (%cg-emit-li cg reg n)) 2831 (($ opnd? (kind frame) (lval? #t) (type ,ty) (ext ,off)) 2832 (%cg-emit-ld-slot-typed cg reg ty off)) 2833 (($ opnd? (kind frame) (lval? #f) (type ,ty) (ext ,off)) 2834 ;; Frame rval: spilled as 8 bytes, but the slot's bit-pattern may 2835 ;; not be canonical for the opnd's CURRENT type (e.g. 2836 ;; cg-arith-conv relabeled a signed slot as unsigned). Canonicalize 2837 ;; on load so downstream 64-bit ALU/compare ops see the C-semantic 2838 ;; value. 2839 (%cg-emit-ld-slot cg reg off) 2840 (%cg-canonicalize cg reg ty)) 2841 (($ opnd? (kind frame) (ext ,off)) (%cg-emit-ld-slot cg reg off)) 2842 (($ opnd? (kind global) (lval? #f) (ext ,lbl)) (%cg-emit-la cg reg lbl)) 2843 (($ opnd? (kind global) (type ,ty) (ext ,lbl)) 2844 (%cg-emit-la cg 't2 lbl) 2845 (%cg-emit-ld-typed cg reg ty 't2 0)) 2846 (else (die #f "cg internal: unknown opnd-kind" (opnd-kind op))))) 2847 2848 (define (%cg-spill-reg cg reg ty) 2849 (let* ((off (cg-alloc-slot cg 8 8)) 2850 (op (%opnd 'frame ty off #f))) 2851 (%cg-emit-st-slot cg reg off) 2852 (cg-vstack-set! cg (cons op (cg-vstack cg))) 2853 op)) 2854 2855 ;; Floating-point softening. Real FP arithmetic is not implemented; 2856 ;; instead the cg silently treats fp ctypes as same-sized integer 2857 ;; bit patterns (flt as 4-byte, dbl/ldbl as 8-byte). Loads, stores, 2858 ;; and same-size casts round-trip the bytes; widening int→fp casts 2859 ;; leave the int bit-pattern in the wider slot; binops use integer 2860 ;; ALU ops. tcc.flat.c contains real fp code paths (parse_number, 2861 ;; ieee_finite, …) that the bootstrap tcc-boot2 never executes when 2862 ;; compiling float-free programs, so producing valid-but-semantically- 2863 ;; wrong P1pp here is sufficient. Kept as a named no-op so the call 2864 ;; sites stay grep-able if a future bootstrap target needs real FP. 2865 (define (%cg-fp-reject! op-name ty) #t) 2866 2867 (define (%reg-by-idx i) 2868 (cond ((= i 0) 'a0) ((= i 1) 'a1) ((= i 2) 'a2) ((= i 3) 'a3) 2869 (else (die #f "cg: param idx > 3 needs ldarg path" i)))) 2870 2871 ;; -------------------------------------------------------------------- 2872 ;; Lifecycle 2873 ;; -------------------------------------------------------------------- 2874 2875 ;; cc-cg fixtures construct a cg directly via (cg-init) — they don't 2876 ;; emit ELF and don't link against another TU, so library knobs are 2877 ;; irrelevant. cc-main routes through cg-init/v with the parsed flag. 2878 (define (cg-init) (cg-init/v #f "")) 2879 2880 (define (cg-init/v lib? str-prefix) 2881 (%cg (make-buf/cap %BUF-CAP-TEXT) ; text 2882 (make-buf/cap %BUF-CAP-DATA) ; data 2883 (make-buf/cap %BUF-CAP-BSS) ; bss 2884 '() ; vstack 2885 0 ; frame-hi 2886 0 ; label-ctr 2887 (make-world) ; world (shared with pstate) 2888 '() ; fn-meta 2889 (make-buf/cap %BUF-CAP-FN) ; fn-buf (reused per fn) 2890 (make-buf/cap %BUF-CAP-PROLOGUE) ; prologue-buf (reused per fn) 2891 0 ; max-outgoing 2892 #f ; in-fn? 2893 lib? ; lib? (skip entry stub + :ELF_end) 2894 str-prefix)) ; str-prefix (cc__str_N namespacing) 2895 2896 (define (cg-finish cg) 2897 ;; Tentative file-scope defs (`int x;` / `static int x;` with no 2898 ;; initializer and not later defined with `=`) get their .bss slot 2899 ;; here at end of TU. C 6.9.2 — see cg-flush-tentatives!. 2900 (cg-flush-tentatives! cg) 2901 ;; Entry stub. P1's program-entry contract (docs/P1.md §Program Entry) 2902 ;; delivers argc in a0 and argv in a1 at p1_main. %call doesn't 2903 ;; clobber a0/a1, so falling straight through to main forwards 2904 ;; them unchanged. The 16-byte frame is just enough for %enter's 2905 ;; saved-fp/lr to fit; main builds its own frame on top. 2906 ;; 2907 ;; In lib mode the stub and :ELF_end are suppressed: the catm chain 2908 ;; supplies them once, from P1/entry-*.P1pp and P1/elf-end.P1pp, so 2909 ;; library TUs don't fight the executable TU for ownership of 2910 ;; :p1_main and don't truncate ELF p_filesz at the first inner 2911 ;; :ELF_end (hex2 sizes off the first one it sees). 2912 (cond 2913 ((not (cg-lib? cg)) 2914 (let ((tb (cg-text cg))) 2915 (buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to main\n") 2916 (buf-push! tb "%fn(p1_main, 16, {\n") 2917 (buf-push! tb "%call(&main)\n") 2918 (buf-push! tb "})\n")))) 2919 (bv-cat (list (buf-flush (cg-text cg)) 2920 (buf-flush (cg-data cg)) 2921 (buf-flush (cg-bss cg)) 2922 (cond ((cg-lib? cg) "") 2923 (else ":ELF_end\n"))))) 2924 2925 (define (cg-fn-begin cg name params return-type) 2926 (cg-fn-begin/v cg name params return-type #f)) 2927 2928 ;; Variadic-aware variant. variadic? = #t reserves 16 contiguous 8-byte 2929 ;; slots covering incoming arg indices 0..15, populating each from the 2930 ;; appropriate source: a-register for idx 0..3, LDARG slot (idx-4) for 2931 ;; idx 4..15. va_start computes the address of the slot at index = 2932 ;; named-arg count, so va_arg walks linearly through the rest. 2933 ;; Indices 4..15 may be garbage when the caller passed fewer args; user 2934 ;; code stops walking based on a count or sentinel before those slots 2935 ;; are read. Limit of 15 variadic args (after named) is enough for 2936 ;; tcc.c's logging shapes; bump VARARG_WINDOW if you need more. 2937 (define (cg-fn-begin/v cg name params return-type variadic?) 2938 (buf-reset! (cg-fn-buf cg)) 2939 (buf-reset! (cg-prologue-buf cg)) 2940 (cg-in-fn?-set! cg #t) 2941 (cg-vstack-set! cg '()) 2942 (cg-frame-hi-set! cg 0) 2943 ;; cg-label-ctr is NOT reset per-fn. Compiler-internal labels are 2944 ;; emitted as dotted hex2++ locals inside %fn's .scope (and sometimes 2945 ;; nested .scope blocks), so within-TU collisions are already prevented 2946 ;; by local lookup. Keeping the counter monotonic across functions is 2947 ;; no longer required for correctness, just for stable, readable label 2948 ;; names in expanded.M1 traces. 2949 (cg-max-outgoing-set! cg 0) 2950 (cg-fn-meta-set! cg '()) 2951 (%cg-fn-set! cg '%fn-name name) 2952 (%cg-fn-set! cg '%fn-ret-type return-type) 2953 (%cg-fn-set! cg '%indirect-slots '()) 2954 (%cg-fn-set! cg '%fn-variadic? variadic?) 2955 ;; Return slot per P1.md §Arguments. ≤8B → a0; 9–16B → a0+a1; >16B 2956 ;; struct/union → indirect-result (A2): caller passes sret ptr in 2957 ;; a0; cg-return writes through it; sret-slot saves a0 for cg-fn-end. 2958 (let* ((rsz (cond ((eq? (ctype-kind return-type) 'void) 8) 2959 (else (align-up (max 8 (ctype-size return-type)) 8)))) 2960 (ret-slot (cg-alloc-slot cg rsz 8))) 2961 (%cg-fn-set! cg '%fn-ret-slot ret-slot) 2962 (cond 2963 ((not (eq? (ctype-kind return-type) 'void)) 2964 (let zinit ((k 0)) 2965 (cond 2966 ((>= k rsz) #t) 2967 (else 2968 (buf-push! (cg-prologue-buf cg) 2969 (bv-cat (list "%li(t0, 0)\n" 2970 "%st(t0, sp, " 2971 (%cg-slot-expr cg (+ ret-slot k)) 2972 ")\n"))) 2973 (zinit (+ k 8)))))))) 2974 (let* ((rk (ctype-kind return-type)) 2975 (sret? (and (or (eq? rk 'struct) (eq? rk 'union)) 2976 (> (ctype-size return-type) 16)))) 2977 (%cg-fn-set! cg '%fn-sret? sret?) 2978 (cond 2979 (sret? 2980 (let ((ss (cg-alloc-slot cg 8 8))) 2981 (%cg-fn-set! cg '%fn-sret-slot ss) 2982 (buf-push! (cg-prologue-buf cg) 2983 (bv-cat (list "%st(a0, sp, " 2984 (%cg-slot-expr cg ss) ")\n"))))) 2985 (else (%cg-fn-set! cg '%fn-sret-slot #f)))) 2986 ;; Variadic save area is capped at 16 incoming-arg slots; reject 2987 ;; variadic definitions whose named-arg count would already fill or 2988 ;; exceed it (no room left for variadic reads). 2989 (cond 2990 ((and variadic? (> (length params) 16)) 2991 (die #f "cg-fn-begin: variadic function exceeds 16-arg save-area cap" 2992 name (length params)))) 2993 ;; With sret, explicit arg i lives at ABI position (i+1): args 0..2 2994 ;; in a1..a3, args 3+ in slot (i-3). 2995 (let* ((sret-shift (if (%cg-fn-get cg '%fn-sret?) 1 0)) 2996 (spill (lambda (abi off) 2997 (cond 2998 ((< abi 4) 2999 (buf-push! (cg-prologue-buf cg) 3000 (bv-cat (list "%st(" (%cg-reg->bv (%reg-by-idx abi)) 3001 ", sp, " 3002 (%cg-slot-expr cg off) ")\n")))) 3003 (else 3004 (buf-push! (cg-prologue-buf cg) 3005 (bv-cat (list "%ldarg(t0, " (%n (- abi 4)) ")\n" 3006 "%st(t0, sp, " 3007 (%cg-slot-expr cg off) ")\n")))))))) 3008 (let walk ((ps params) (idx 0) (out '()) (first-slot #f)) 3009 (cond 3010 ((null? ps) 3011 (cond 3012 (variadic? 3013 (let pad ((i idx) (vfirst #f) (fs first-slot)) 3014 (cond 3015 ((>= i 16) 3016 (%cg-fn-set! cg '%fn-vararg-first-slot (or vfirst fs)) 3017 (reverse out)) 3018 (else 3019 (let ((off (cg-alloc-slot cg 8 8))) 3020 (spill (+ i sret-shift) off) 3021 (pad (+ i 1) (or vfirst off) (or fs off))))))) 3022 (else (reverse out)))) 3023 (else 3024 (let* ((p (car ps)) 3025 (nm (car p)) 3026 (ty (cdr p)) 3027 ;; AAPCS: 9..16B aggregates ride two consecutive arg 3028 ;; positions (regs or stack slots), wider-than-16B 3029 ;; aggregates would normally pass by reference — not 3030 ;; supported here yet. 3031 (n (%cg-param-reg-count ty)) 3032 (sz (cond ((%cg-param-aggregate? ty) 3033 (align-up (ctype-size ty) 8)) 3034 (else 8))) 3035 (al (cond ((%cg-param-aggregate? ty) 3036 (max 8 (ctype-align ty))) 3037 (else 8))) 3038 (off (cg-alloc-slot cg sz al)) 3039 (psym (%sym nm 'param #f ty off #t))) 3040 (let chunk ((i 0)) 3041 (cond ((>= i n) 0) 3042 (else 3043 (spill (+ idx sret-shift i) (+ off (* i 8))) 3044 (chunk (+ i 1))))) 3045 (walk (cdr ps) (+ idx n) (cons (cons nm psym) out) 3046 (or first-slot off)))))))) 3047 3048 ;; Number of consecutive ABI slots (regs or stack words) consumed by a 3049 ;; parameter of TY. Aggregates ≤16B take ⌈size/8⌉; everything else 1. 3050 (define (%cg-param-reg-count ty) 3051 (cond 3052 ((%cg-param-aggregate? ty) 3053 (let ((sz (ctype-size ty))) 3054 (cond 3055 ((> sz 16) 3056 (die #f "cg: aggregate arg/param >16B not supported" sz)) 3057 ((> sz 8) 2) 3058 (else 1)))) 3059 (else 1))) 3060 3061 (define (%cg-param-aggregate? ty) 3062 (let ((k (ctype-kind ty))) 3063 (or (eq? k 'struct) (eq? k 'union)))) 3064 3065 (define (cg-fn-end cg) 3066 ;; Drain prologue-buf and fn-buf directly into cg-text via buf-drain! 3067 ;; (memcpy, no allocation). Header/footer pieces go through buf-push! 3068 ;; on cg-text — also memcpy. Net result: zero net heap allocation in 3069 ;; cg-fn-end other than the small (%n N) bvs for staging-bytes / 3070 ;; frame-size, which the enclosing parse-decl-or-fn boundary's 3071 ;; reset-scratch-heap! reclaims. 3072 (let* ((name (%cg-fn-get cg '%fn-name)) 3073 (ret-slot (%cg-fn-get cg '%fn-ret-slot)) 3074 (ret-type (%cg-fn-get cg '%fn-ret-type)) 3075 (locals-hi (cg-frame-hi cg)) 3076 (staging-bytes (* 8 (cg-max-outgoing cg))) 3077 (raw-size (+ staging-bytes locals-hi)) 3078 (frame-size (align-up raw-size 16)) 3079 ;; Look up the bound sym for this fn so `static void foo(){...}` 3080 ;; emits the same cc__-mangled label that callers reference. 3081 ;; The sym was bound by parse-fn-body before the body parse, 3082 ;; so it's in the top scope frame at this point. 3083 (fn-sym (alist-ref name (car (world-scope (cg-world cg))))) 3084 (mangled (cond (fn-sym (%cg-sym-label fn-sym)) 3085 (else name))) 3086 (tb (cg-text cg))) 3087 ;; Now that the body is fully emitted, leave fn dispatch so any 3088 ;; trailing emits in this function (including the ret-block below) 3089 ;; route to cg-text directly. 3090 (cg-in-fn?-set! cg #f) 3091 ;; staging-size macro 3092 (buf-push! tb "%macro ") 3093 (buf-push! tb name) 3094 (buf-push! tb "__SO()\n") 3095 (buf-push! tb (%n staging-bytes)) 3096 (buf-push! tb "\n%endm\n") 3097 ;; %fn header 3098 (buf-push! tb "%fn(") 3099 (buf-push! tb mangled) 3100 (buf-push! tb ", ") 3101 (buf-push! tb (%n frame-size)) 3102 (buf-push! tb ", {\n") 3103 ;; prologue + body, drained byte-for-byte 3104 (buf-drain! tb (cg-prologue-buf cg)) 3105 ;; --cc-trace-emit: emit `%trace(&LBL, LEN)` between prologue (which 3106 ;; spilled live argument regs to slots) and body, so the macro can 3107 ;; freely clobber a0..a2. The mangled name rides through the 3108 ;; regular string pool — cg-intern-string emits it with a trailing 3109 ;; NUL and pads to 8-byte alignment, so the next data label stays 3110 ;; aligned. We pass the *logical* byte length (no NUL) so the 3111 ;; runtime print stops at the actual end of the name. 3112 (cond 3113 ((trace-emit?) 3114 (let ((tag-lbl (cg-intern-string cg mangled))) 3115 (buf-push! tb "%trace(&") 3116 (buf-push! tb tag-lbl) 3117 (buf-push! tb ", ") 3118 (buf-push! tb (%n (bytevector-length mangled))) 3119 (buf-push! tb ")\n")))) 3120 (buf-drain! tb (cg-fn-buf cg)) 3121 ;; ret block: ≤8B → a0; 9–16B → a0+a1; >16B sret → a0 = saved sret ptr. 3122 (buf-push! tb ":.ret\n") 3123 (let ((rk (ctype-kind ret-type)) 3124 (sret? (%cg-fn-get cg '%fn-sret?))) 3125 (cond 3126 ((eq? rk 'void) 3127 (buf-push! tb "%li(a0, 0)\n")) 3128 (sret? 3129 (buf-push! tb "%ld(a0, sp, ") 3130 (buf-push! tb (%cg-slot-expr cg (%cg-fn-get cg '%fn-sret-slot))) 3131 (buf-push! tb ")\n")) 3132 (else 3133 (buf-push! tb "%ld(a0, sp, ") 3134 (buf-push! tb (%cg-slot-expr cg ret-slot)) 3135 (buf-push! tb ")\n") 3136 (cond 3137 ((> (ctype-size ret-type) 8) 3138 (buf-push! tb "%ld(a1, sp, ") 3139 (buf-push! tb (%cg-slot-expr cg (+ ret-slot 8))) 3140 (buf-push! tb ")\n")))))) 3141 (buf-push! tb "})\n") 3142 (cg-vstack-set! cg '()) 3143 (cg-frame-hi-set! cg 0) 3144 (cg-max-outgoing-set! cg 0) 3145 0)) 3146 3147 ;; -------------------------------------------------------------------- 3148 ;; Vstack 3149 ;; -------------------------------------------------------------------- 3150 (define (cg-push cg op) 3151 (cg-vstack-set! cg (cons op (cg-vstack cg))) 3152 op) 3153 3154 (define (cg-pop cg) 3155 (let ((s (cg-vstack cg))) 3156 (cond ((null? s) (die #f "cg-pop: empty vstack")) 3157 (else (cg-vstack-set! cg (cdr s)) (car s))))) 3158 3159 (define (cg-top cg) 3160 (let ((s (cg-vstack cg))) 3161 (cond ((null? s) (die #f "cg-top: empty vstack")) (else (car s))))) 3162 3163 (define (cg-depth cg) (length (cg-vstack cg))) 3164 3165 ;; -------------------------------------------------------------------- 3166 ;; Snapshot / rewind — discard any vstack pushes and fn-buf bytes 3167 ;; emitted between snapshot and rewind. Used by sizeof to parse its 3168 ;; operand for type information without retaining its side effects 3169 ;; (CC.md §Expressions: sizeof's operand is not evaluated). Internal- 3170 ;; only; the parser is the sole expected caller. 3171 ;; 3172 ;; vstack captures the head of the cons-list (immutable structurally). 3173 ;; fn-buf is restored by resetting buf-offset; the underlying storage 3174 ;; bytes past the new offset become garbage that the next buf-push! 3175 ;; will overwrite (buf-push! always copies into [offset, offset+len)). 3176 ;; frame-hi and max-outgoing are also restored so cg-alloc-slot calls 3177 ;; inside the rewound region don't leak frame bytes. 3178 ;; -------------------------------------------------------------------- 3179 (define (cg-snapshot cg) 3180 (cond 3181 ((not (cg-in-fn? cg)) 3182 (die #f "cg-snapshot: not in fn"))) 3183 (list (cg-vstack cg) 3184 (buf-offset (cg-fn-buf cg)) 3185 (cg-frame-hi cg) 3186 (cg-max-outgoing cg))) 3187 3188 (define (cg-rewind cg tag) 3189 (cg-vstack-set! cg (car tag)) 3190 (buf-offset-set! (cg-fn-buf cg) (cadr tag)) 3191 (cg-frame-hi-set! cg (caddr tag)) 3192 (cg-max-outgoing-set! cg (cadddr tag))) 3193 3194 ;; Duplicate the top vstack entry. For lvals this is safe — the slot 3195 ;; (or label, or indirect-marked frame) backing the lval keeps existing 3196 ;; until the function ends. For rvals it duplicates the descriptor of 3197 ;; the spilled value; both copies refer to the same already-emitted 3198 ;; storage. Used for `lhs += rhs` and `++lhs` to preserve the lhs 3199 ;; across a `cg-load` so the subsequent `cg-assign` still has its 3200 ;; address. 3201 (define (cg-dup cg) 3202 (let ((p (cg-top cg))) (cg-push cg p) p)) 3203 3204 ;; -------------------------------------------------------------------- 3205 ;; Materialize 3206 ;; -------------------------------------------------------------------- 3207 (define (cg-push-imm cg ctype value) 3208 (cg-push cg (%opnd 'imm ctype value #f))) 3209 3210 (define (cg-push-string cg bv-content) 3211 (let* ((label (cg-intern-string cg bv-content)) 3212 (cp-ty (%ctype 'ptr 8 8 %t-i8))) 3213 (cg-push cg (%opnd 'global cp-ty label #f)))) 3214 3215 (define (cg-push-sym cg sm) 3216 (pmatch sm 3217 (($ sym? (kind fn) (type ,ty)) 3218 (cg-push cg (%opnd 'global ty (%cg-sym-label sm) #f))) 3219 (($ sym? (kind enum-const) (type ,ty) (slot ,v)) 3220 (cg-push cg (%opnd 'imm ty v #f))) 3221 (($ sym? (kind var) (storage extern) (type ,ty)) 3222 (cg-push cg (%opnd 'global ty (%cg-sym-label sm) #t))) 3223 (($ sym? (kind var) (storage static) (type ,ty)) 3224 (cg-push cg (%opnd 'global ty (%cg-sym-label sm) #t))) 3225 (($ sym? (kind var) (type ,ty) (slot ,off)) 3226 (cg-push cg (%opnd 'frame ty off #t))) 3227 (($ sym? (kind param) (type ,ty) (slot ,off)) 3228 (cg-push cg (%opnd 'frame ty off #t))) 3229 (else (die #f "cg-push-sym: unsupported sym-kind" (sym-kind sm))))) 3230 3231 ;; A cg-push-deref result is a frame-lval whose slot HOLDS THE ADDRESS 3232 ;; (not the value). To distinguish from ordinary frame-lvals (whose 3233 ;; slot holds the value directly), we tag indirect slots in 3234 ;; %indirect-slots so cg-load and cg-assign can do the extra 3235 ;; indirection. 3236 (define (%cg-mark-indirect! cg off) 3237 (let ((cur (or (%cg-fn-get cg '%indirect-slots) '()))) 3238 (%cg-fn-set! cg '%indirect-slots (cons off cur)))) 3239 3240 (define (%cg-indirect? cg off) 3241 (let ((cur (or (%cg-fn-get cg '%indirect-slots) '()))) 3242 (let loop ((xs cur)) 3243 (cond ((null? xs) #f) ((= (car xs) off) #t) (else (loop (cdr xs))))))) 3244 3245 (define (cg-push-deref cg) 3246 (let* ((p (cg-pop cg)) 3247 (pt (opnd-type p)) 3248 (pe (cond ((eq? (ctype-kind pt) 'ptr) (ctype-ext pt)) 3249 ((eq? (ctype-kind pt) 'arr) (car (ctype-ext pt))) 3250 (else #f)))) 3251 (cond 3252 ((not pe) (die #f "cg-push-deref: not a pointer" pt)) 3253 (else 3254 (%cg-load-opnd-into cg p 't0) 3255 (let ((off (cg-alloc-slot cg 8 8))) 3256 (%cg-emit-st-slot cg 't0 off) 3257 (%cg-mark-indirect! cg off) 3258 (cg-push cg (%opnd 'frame pe off #t))))))) 3259 3260 ;; -------------------------------------------------------------------- 3261 ;; Aggregate field access (§D.1–D.4) 3262 ;; -------------------------------------------------------------------- 3263 ;; cg-push-field cg fname: 3264 ;; pop a struct/union lval; look up `fname` in the struct's fields 3265 ;; list (data.scm: ext = (tag complete? fields), where each field 3266 ;; is (name-bv ctype offset)); push a new lval at the field's 3267 ;; offset with the field's ctype. 3268 ;; 3269 ;; Three input cases: 3270 ;; - direct frame lval at slot `off` -> frame lval at off+fo 3271 ;; - indirect frame lval (slot holds addr) -> new indirect slot for 3272 ;; addr+fo 3273 ;; - global lval at label L -> indirect slot for 3274 ;; la(L)+fo 3275 ;; In all cases the resulting lval has the field's ctype. 3276 3277 ;; Look up FNAME in FIELDS. C11 §6.7.2.1: a struct/union member with no 3278 ;; declarator (e.g. `union { int a; int b; };` inside another struct) is 3279 ;; an "anonymous member" — its members are addressed as if they belonged 3280 ;; directly to the enclosing aggregate. We recurse into any name=#f 3281 ;; member of struct/union kind, composing the outer member's offset with 3282 ;; the inner field's offset, and return a synthetic (name ctype off) 3283 ;; triple so callers can stay agnostic about anonymity. 3284 (define (%cg-find-field fields fname) 3285 (let loop ((xs fields)) 3286 (cond 3287 ((null? xs) #f) 3288 (else 3289 (let* ((f (car xs)) 3290 (fn (car f))) 3291 (cond 3292 ((and fn (bv= fn fname)) f) 3293 ((and (not fn) 3294 (let ((k (ctype-kind (cadr f)))) 3295 (or (eq? k 'struct) (eq? k 'union)))) 3296 (let* ((sub-ext (ctype-ext (cadr f))) 3297 (sub-fields (car (cddr sub-ext))) 3298 (hit (%cg-find-field sub-fields fname))) 3299 (cond 3300 (hit (list (car hit) 3301 (cadr hit) 3302 (+ (car (cddr f)) (car (cddr hit))))) 3303 (else (loop (cdr xs)))))) 3304 (else (loop (cdr xs))))))))) 3305 3306 (define (cg-push-field cg fname) 3307 (let* ((s (cg-pop cg)) 3308 (sty (opnd-type s)) 3309 (k (ctype-kind sty))) 3310 (cond 3311 ((not (or (eq? k 'struct) (eq? k 'union))) 3312 (die #f "cg-push-field: not a struct/union" k)) 3313 ((not (opnd-lval? s)) 3314 (die #f "cg-push-field: not an lvalue" k)) 3315 (else 3316 (let* ((fields (car (cddr (ctype-ext sty)))) 3317 (f (%cg-find-field fields fname))) 3318 (cond 3319 ((not f) (die #f "cg-push-field: no such field" fname)) 3320 (else 3321 (let* ((fty (cadr f)) (fo (car (cddr f)))) 3322 (pmatch s 3323 ;; direct frame lval: just shift the slot offset. 3324 (($ opnd? (kind frame) (ext ,off)) 3325 (guard (not (%cg-indirect? cg off))) 3326 (cg-push cg (%opnd 'frame fty (+ off fo) #t))) 3327 ;; indirect frame lval: addr lives in the slot. Compute 3328 ;; addr+fo into a new indirect slot. 3329 (($ opnd? (kind frame) (ext ,off)) 3330 (%cg-emit-ld-slot cg 't0 off) 3331 (cond 3332 ((> fo 0) 3333 (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n")))) 3334 (let ((no (cg-alloc-slot cg 8 8))) 3335 (%cg-emit-st-slot cg 't0 no) 3336 (%cg-mark-indirect! cg no) 3337 (cg-push cg (%opnd 'frame fty no #t)))) 3338 ;; global lval: load addr, add offset, indirect slot. 3339 (($ opnd? (kind global) (ext ,lbl)) 3340 (%cg-emit-la cg 't0 lbl) 3341 (cond 3342 ((> fo 0) 3343 (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n")))) 3344 (let ((no (cg-alloc-slot cg 8 8))) 3345 (%cg-emit-st-slot cg 't0 no) 3346 (%cg-mark-indirect! cg no) 3347 (cg-push cg (%opnd 'frame fty no #t)))) 3348 (else 3349 (die #f "cg-push-field: unsupported lval kind" 3350 (opnd-kind s)))))))))))) 3351 3352 ;; cg-decay-array: 3353 ;; if top of vstack is an arr-typed lval, replace it with a ptr-rval 3354 ;; to the first element. C arrays decay to T* in most contexts; 3355 ;; parse calls this before rval-style operations. No-op otherwise. 3356 (define (cg-decay-array cg) 3357 (let ((tp (cg-top cg))) 3358 (cond 3359 ((and (opnd-lval? tp) (eq? (ctype-kind (opnd-type tp)) 'arr)) 3360 (let* ((p (cg-pop cg)) 3361 (et (car (ctype-ext (opnd-type p)))) 3362 (pty (%ctype 'ptr 8 8 et))) 3363 (pmatch p 3364 ;; direct frame lval: address is sp+off. 3365 (($ opnd? (kind frame) (ext ,off)) 3366 (guard (not (%cg-indirect? cg off))) 3367 (%cg-emit-lea-slot cg "t0" (%cg-slot-expr cg off)) 3368 (%cg-spill-reg cg 't0 pty)) 3369 ;; indirect frame lval (rare for arrays, but support it): 3370 ;; the slot holds the address already. 3371 (($ opnd? (kind frame) (ext ,off)) 3372 (%cg-emit-ld-slot cg 't0 off) 3373 (%cg-spill-reg cg 't0 pty)) 3374 ;; global array: la(label) is the address. 3375 (($ opnd? (kind global) (ext ,lbl)) 3376 (%cg-emit-la cg 't0 lbl) 3377 (%cg-spill-reg cg 't0 pty)) 3378 (else (die #f "cg-decay-array: unsupported lval kind" 3379 (opnd-kind p)))))) 3380 (else tp)))) 3381 3382 ;; -------------------------------------------------------------------- 3383 ;; Address & deref 3384 ;; -------------------------------------------------------------------- 3385 3386 ;; Materialize the address of an lval `op` directly into `reg`. 3387 ;; Variant of cg-take-addr that doesn't spill — used by struct copy 3388 ;; primitives (cg-return on struct, cg-copy-struct, cg-assign-struct, 3389 ;; cg-call's struct receive). Caller owns the opnd (already popped). 3390 ;; 3391 ;; A frame opnd is treated as a slot whose address we want: if it's a 3392 ;; flagged-indirect lval (slot holds a pointer to the real storage), 3393 ;; load the pointer; otherwise the slot itself IS the storage and we 3394 ;; lea its address. Frame rvals are temp spills — address = &slot. A 3395 ;; global opnd's label is the address. Callers that require an lval 3396 ;; check it before calling. 3397 (define (%cg-emit-addr-of cg op reg) 3398 (let ((reg-bv (%cg-reg->bv reg))) 3399 (pmatch op 3400 (($ opnd? (kind frame) (lval? #t) (ext ,off)) 3401 (guard (%cg-indirect? cg off)) 3402 (%cg-emit-ld-slot cg reg off)) 3403 (($ opnd? (kind frame) (ext ,off)) 3404 (%cg-emit-lea-slot cg reg-bv (%cg-slot-expr cg off))) 3405 (($ opnd? (kind global) (ext ,lbl)) 3406 (%cg-emit-la cg reg lbl)) 3407 (else (die #f "cg-emit-addr-of: unsupported opnd" 3408 (opnd-kind op) (opnd-lval? op)))))) 3409 3410 ;; cg-copy-struct: pop src lval, pop dst lval, emit per-byte copy 3411 ;; from src to dst (both must be lvals of the same struct/union type). 3412 ;; Used by parser for struct-typed assignment / initializer-from-call 3413 ;; targets. Pushes nothing. 3414 (define (cg-copy-struct cg) 3415 (let* ((src (cg-pop cg)) 3416 (dst (cg-pop cg)) 3417 (sty (opnd-type dst)) 3418 (sz (ctype-size sty))) 3419 (cond 3420 ((not (opnd-lval? src)) (die #f "cg-copy-struct: src not lvalue")) 3421 ((not (opnd-lval? dst)) (die #f "cg-copy-struct: dst not lvalue"))) 3422 (%cg-emit-addr-of cg src 't0) 3423 (%cg-emit-addr-of cg dst 't2) 3424 (%cg-emit-byte-copy cg 't2 't0 't1 sz))) 3425 3426 ;; Struct/union `=` assignment: pop src lval, pop dst lval, memcpy, 3427 ;; then push dst back so the assignment expression has a result for 3428 ;; the surrounding parser to consume (parse-expr-stmt's trailing 3429 ;; cg-pop, etc.). Distinct from cg-copy-struct because the 3430 ;; initializer caller needs no result on the vstack. 3431 ;; 3432 ;; The src may be either a frame lvalue (named local slot, *p deref, 3433 ;; callee return-slot) or a frame rvalue (anonymous slot from a temp 3434 ;; spill); %cg-emit-addr-of handles both shapes by treating the slot 3435 ;; itself as the address whenever the lval indirection flag isn't set. 3436 (define (cg-assign-struct cg) 3437 (let* ((src (cg-pop cg)) 3438 (dst (cg-pop cg)) 3439 (sty (opnd-type dst)) 3440 (sz (ctype-size sty))) 3441 (cond ((not (opnd-lval? dst)) (die #f "cg-assign-struct: dst not lvalue"))) 3442 (%cg-emit-addr-of cg src 't0) 3443 (%cg-emit-addr-of cg dst 't2) 3444 (%cg-emit-byte-copy cg 't2 't0 't1 sz) 3445 (cg-push cg dst))) 3446 3447 ;; Struct copy: defer to libp1pp memcpy via %memcpy_call. dst-reg and 3448 ;; src-reg hold the addresses; size is the byte count. tmp-reg is no 3449 ;; longer needed by this helper (kept in the signature so existing 3450 ;; callers don't have to thread their scratch allocation differently), 3451 ;; but the macro itself uses a0/a1/a2 around the call. dst-reg and 3452 ;; src-reg must not be a0 (the dst move would clobber a different live 3453 ;; input register); both current callers use t-regs. 3454 (define (%cg-emit-byte-copy cg dst-reg src-reg tmp-reg size) 3455 (%cg-emit-many cg (list "%memcpy_call(" 3456 (%cg-reg->bv dst-reg) ", " 3457 (%cg-reg->bv src-reg) ", " 3458 (%n size) ")\n"))) 3459 3460 (define (cg-take-addr cg) 3461 (let* ((p (cg-pop cg)) 3462 (ty (opnd-type p)) 3463 ;; &arr yields T(*)[N] per strict C. Pointer arithmetic on 3464 ;; the result scales by sizeof(T[N]) (the whole array), so 3465 ;; &arr + 1 is one-past-end. Array-to-pointer decay happens 3466 ;; on use via cg-decay-array, not at the & operator. 3467 (pty (%ctype 'ptr 8 8 ty))) 3468 (pmatch p 3469 ;; &function: a function designator (rval of fn type pushed by 3470 ;; cg-push-sym) already evaluates to its entry-point address. The 3471 ;; `&` is a no-op semantically — re-tag the operand as ptr-to-fn. 3472 (($ opnd? (kind global) (type ,t) (ext ,lbl) (lval? #f)) 3473 (guard (eq? (ctype-kind t) 'fn)) 3474 (cg-push cg (%opnd 'global pty lbl #f))) 3475 (($ opnd? (lval? #f)) (die #f "cg-take-addr: not an lvalue")) 3476 ;; The address itself lives at sp+slot — &*p degenerates to p. 3477 (($ opnd? (kind frame) (ext ,off)) 3478 (guard (%cg-indirect? cg off)) 3479 (%cg-emit-ld-slot cg 't0 off) 3480 (%cg-spill-reg cg 't0 pty)) 3481 ;; %lea_slot wraps the "%mov(rd, sp); %addi(rd, rd, slot)" idiom; 3482 ;; the backend hides any frame-header offset inside %mov(rd, sp). 3483 (($ opnd? (kind frame) (ext ,off)) 3484 (%cg-emit-lea-slot cg "t0" (%cg-slot-expr cg off)) 3485 (%cg-spill-reg cg 't0 pty)) 3486 (($ opnd? (kind global) (ext ,lbl)) 3487 (%cg-emit-la cg 't0 lbl) 3488 (%cg-spill-reg cg 't0 pty)) 3489 (else (die #f "cg-take-addr: non-addressable" (opnd-kind p)))))) 3490 3491 (define (cg-load cg) 3492 (let* ((p (cg-pop cg)) (ty (opnd-type p))) 3493 (cond 3494 ((not (opnd-lval? p)) (die #f "cg-load: not an lvalue")) 3495 ;; Array lvalues decay to a ptr-rval addressing the first 3496 ;; element (C array-to-pointer decay). We push the lval back 3497 ;; and route through cg-decay-array for a single source of truth. 3498 ((eq? (ctype-kind ty) 'arr) 3499 (cg-push cg p) (cg-decay-array cg)) 3500 ;; Struct/union lvalues stay as lvalues — there is no 3501 ;; register-sized rvalue form for an aggregate, and the 3502 ;; existing 8-byte spill path silently truncated anything 3503 ;; wider (the bug that broke `c = cond ? a : b` for 3504 ;; sizeof(struct) > 8). Surrounding expression machinery 3505 ;; (cg-ifelse-merge / cg-assign-struct / cg-call) consumes 3506 ;; aggregate operands as lvalues already. 3507 ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union)) 3508 (cg-push cg p)) 3509 ((and (eq? (opnd-kind p) 'frame) 3510 (%cg-indirect? cg (opnd-ext p))) 3511 ;; Indirect frame-lval: slot holds the address. Stage the 3512 ;; address in t2 so multi-byte gathers don't alias dest with 3513 ;; base. 3514 (%cg-emit-ld-slot cg 't2 (opnd-ext p)) 3515 (%cg-emit-ld-typed cg 't0 ty 't2 0) 3516 (%cg-spill-reg cg 't0 ty)) 3517 (else (%cg-load-opnd-into cg p 't0) (%cg-spill-reg cg 't0 ty))))) 3518 3519 ;; -------------------------------------------------------------------- 3520 ;; Type conversions 3521 ;; -------------------------------------------------------------------- 3522 (define (cg-cast cg to-type) 3523 (let* ((p (cg-pop cg)) 3524 (from-ty (opnd-type p)) 3525 (from-sz (ctype-size from-ty)) 3526 (to-sz (ctype-size to-type)) 3527 (to-kind (ctype-kind to-type))) 3528 (%cg-fp-reject! 'cast-to to-type) 3529 (%cg-fp-reject! 'cast-from from-ty) 3530 (cond 3531 ((eq? to-kind 'bool) 3532 (%cg-load-opnd-into cg p 't0) 3533 (%cg-emit-many cg (list "%bool(t0, t0)\n")) 3534 (%cg-spill-reg cg 't0 to-type)) 3535 ((or (eq? to-kind 'ptr) 3536 (and (or (eq? to-kind 'i64) (eq? to-kind 'u64)) 3537 (or (eq? (ctype-kind from-ty) 'ptr) 3538 (eq? (ctype-kind from-ty) 'arr)))) 3539 (cg-push cg (%opnd (opnd-kind p) to-type (opnd-ext p) (opnd-lval? p)))) 3540 ;; Same-size or widening cast — retag only when the canonical 3541 ;; 64-bit slot form for FROM-TY is also canonical for TO-TYPE. 3542 ;; That holds unless we're crossing from a signed type into an 3543 ;; unsigned one of the same or wider width: the source's 3544 ;; sign-extended high bits would leak past the unsigned width 3545 ;; and corrupt later 64-bit operands (compares, wider casts). 3546 ;; Same applies to same-size unsigned→signed at narrow widths 3547 ;; (the narrow branch's sign-extend turns 0xCA back into the 3548 ;; canonical i8 slot 0xFF…FFCA). 3549 ((and (>= to-sz from-sz) 3550 (not (and (not (%ctype-unsigned? from-ty)) 3551 (%ctype-unsigned? to-type))) 3552 (not (and (= to-sz from-sz) 3553 (%ctype-unsigned? from-ty) 3554 (not (%ctype-unsigned? to-type))))) 3555 (cg-push cg (%opnd (opnd-kind p) to-type (opnd-ext p) (opnd-lval? p)))) 3556 (else 3557 ;; Narrowing cast OR same/widening with signedness flip. 3558 ;; Signed targets (i8/i16/i32) shli/sari to truncate-and- 3559 ;; sign-extend in one step, so the slot holds the canonical 3560 ;; 64-bit form and a subsequent widening cast (which is 3561 ;; relabel-only) restores the value. Unsigned targets mask 3562 ;; off high bits to zero-extend. 3563 (%cg-load-opnd-into cg p 't0) 3564 (%cg-canonicalize cg 't0 to-type) 3565 (%cg-spill-reg cg 't0 to-type))))) 3566 3567 (define (cg-promote cg) 3568 (let* ((p (cg-pop cg)) 3569 (ty (opnd-type p)) 3570 (sz (ctype-size ty))) 3571 (cond 3572 ;; C 6.3.1.1: _Bool, char, short, and any narrower int type 3573 ;; promote to (signed) int — every representable value fits 3574 ;; in i32. Treating narrow unsigned types as u32 here would 3575 ;; drag the subsequent arith-conv into picking the unsigned 3576 ;; common type, flipping signedness of `>>`, comparisons, 3577 ;; division, etc. against the C rule. Canonical form for any 3578 ;; in-range narrow value already matches i32, so the cast is 3579 ;; relabel-only. 3580 ((< sz 4) 3581 (cg-push cg (%opnd (opnd-kind p) %t-i32 (opnd-ext p) (opnd-lval? p)))) 3582 (else (cg-push cg p))))) 3583 3584 (define (cg-arith-conv cg) 3585 ;; Usual arithmetic conversions on arithmetic operands. When either 3586 ;; operand is a pointer (or array, 3587 ;; which behaves as a pointer in arithmetic), the pair is a 3588 ;; pointer-arith case — leave the types alone so cg-binop can detect 3589 ;; the ptr operand and apply the right scaling. 3590 (let* ((b (cg-pop cg)) 3591 (a (cg-pop cg)) 3592 (ta (opnd-type a)) 3593 (tb (opnd-type b)) 3594 (sa (ctype-size ta)) 3595 (sb (ctype-size tb))) 3596 (cond 3597 ;; Pointer/array arithmetic: leave types alone so cg-binop's 3598 ;; ptr-aware add/sub branch fires with the correct pointee type 3599 ;; (and doesn't see two pointers, which would skip scaling). 3600 ((or (%ctype-ptr? ta) (%ctype-ptr? tb)) 3601 (cg-push cg a) 3602 (cg-push cg b)) 3603 (else 3604 (let ((common (cond 3605 ((> sa sb) ta) 3606 ((> sb sa) tb) 3607 ((%ctype-unsigned? ta) ta) 3608 ((%ctype-unsigned? tb) tb) 3609 (else ta)))) 3610 ;; Route through cg-cast (rather than relabel only) so the 3611 ;; canonical 64-bit slot form lines up with COMMON. Same-size 3612 ;; cross-signedness conversions (i32→u32, u32→i32, …) need an 3613 ;; actual zext/sext to canonicalize; otherwise an i32 -3 3614 ;; relabeled to u32 keeps its sign-extended slot bits and 3615 ;; compares unequal to a u32 imm with the same C value. 3616 (cg-push cg a) (cg-cast cg common) 3617 (let ((a* (cg-pop cg))) 3618 (cg-push cg b) (cg-cast cg common) 3619 (let ((b* (cg-pop cg))) 3620 (cg-push cg a*) 3621 (cg-push cg b*)))))))) 3622 3623 ;; -------------------------------------------------------------------- 3624 ;; Operators 3625 ;; -------------------------------------------------------------------- 3626 (define (%cg-emit-rrr cg op rd ra rb) 3627 (%cg-emit-many cg (list "%" op "(" (%cg-reg->bv rd) ", " 3628 (%cg-reg->bv ra) ", " (%cg-reg->bv rb) ")\n"))) 3629 3630 (define (%cg-emit-cmp cg cc ra rb rd) 3631 (%cg-emit-many cg (list "%cmpset_" cc "(" 3632 (%cg-reg->bv rd) ", " 3633 (%cg-reg->bv ra) ", " (%cg-reg->bv rb) 3634 ")\n"))) 3635 3636 (define (cg-binop cg op) 3637 (let* ((b (cg-pop cg)) 3638 (a (cg-pop cg)) 3639 (ta (opnd-type a)) 3640 (tb (opnd-type b)) 3641 (unsigned? (or (%ctype-unsigned? ta) (%ctype-unsigned? tb))) 3642 (a-ptr? (%ctype-ptr? ta)) 3643 (b-ptr? (%ctype-ptr? tb)) 3644 (result-ty 3645 (cond 3646 ((or (eq? op 'eq) (eq? op 'ne) 3647 (eq? op 'lt) (eq? op 'le) (eq? op 'gt) (eq? op 'ge)) 3648 %t-i32) 3649 ((and a-ptr? b-ptr? (eq? op 'sub)) %t-i64) 3650 (a-ptr? ta) 3651 (b-ptr? tb) 3652 (else ta)))) 3653 (cond 3654 ((and a-ptr? (or (eq? op 'add) (eq? op 'sub)) (not b-ptr?)) 3655 (%cg-load-opnd-into cg a 'a0) 3656 (%cg-load-opnd-into cg b 'a1) 3657 (let ((sz (ctype-size (%ctype-pointee ta))) 3658 (mac (if (eq? op 'add) "%ptr_add(" "%ptr_sub("))) 3659 (%cg-emit-many cg (list mac "t0, a0, a1, " (%n sz) ", t1)\n"))) 3660 (%cg-spill-reg cg 't0 result-ty)) 3661 ((and b-ptr? (eq? op 'add) (not a-ptr?)) 3662 (%cg-load-opnd-into cg a 'a0) 3663 (%cg-load-opnd-into cg b 'a1) 3664 (let ((sz (ctype-size (%ctype-pointee tb)))) 3665 (%cg-emit-many cg (list "%ptr_add(t0, a1, a0, " (%n sz) ", t1)\n"))) 3666 (%cg-spill-reg cg 't0 result-ty)) 3667 ((and a-ptr? b-ptr? (eq? op 'sub)) 3668 (%cg-load-opnd-into cg a 'a0) 3669 (%cg-load-opnd-into cg b 'a1) 3670 (let ((sz (ctype-size (%ctype-pointee ta)))) 3671 (%cg-emit-many cg (list "%ptr_diff(t0, a0, a1, " (%n sz) ", t1)\n"))) 3672 (%cg-spill-reg cg 't0 result-ty)) 3673 (else 3674 (%cg-load-opnd-into cg a 'a0) 3675 (%cg-load-opnd-into cg b 'a1) 3676 (cond 3677 ((eq? op 'add) (%cg-emit-rrr cg "add" 't0 'a0 'a1)) 3678 ((eq? op 'sub) (%cg-emit-rrr cg "sub" 't0 'a0 'a1)) 3679 ((eq? op 'mul) (%cg-emit-rrr cg "mul" 't0 'a0 'a1)) 3680 ((eq? op 'and) (%cg-emit-rrr cg "and" 't0 'a0 'a1)) 3681 ((eq? op 'or) (%cg-emit-rrr cg "or" 't0 'a0 'a1)) 3682 ((eq? op 'xor) (%cg-emit-rrr cg "xor" 't0 'a0 'a1)) 3683 ((eq? op 'shl) (%cg-emit-rrr cg "shl" 't0 'a0 'a1)) 3684 ((eq? op 'shr) 3685 ;; Shift result type is the promoted LEFT operand's type 3686 ;; (C 6.5.7); arithmetic vs logical shift must follow that 3687 ;; signedness alone, not the rhs's. cg-arith-conv may have 3688 ;; relabeled ta to match an unsigned rhs — guard against 3689 ;; that by checking the original `a` opnd's signedness. 3690 (if (%ctype-unsigned? ta) 3691 (%cg-emit-rrr cg "shr" 't0 'a0 'a1) 3692 (%cg-emit-rrr cg "sar" 't0 'a0 'a1))) 3693 ((eq? op 'div) (%cg-emit-rrr cg "div" 't0 'a0 'a1)) 3694 ((eq? op 'rem) (%cg-emit-rrr cg "rem" 't0 'a0 'a1)) 3695 ((eq? op 'eq) (%cg-emit-cmp cg "eq" 'a0 'a1 't0)) 3696 ((eq? op 'ne) (%cg-emit-cmp cg "ne" 'a0 'a1 't0)) 3697 ((eq? op 'lt) (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a0 'a1 't0)) 3698 ((eq? op 'gt) (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a1 'a0 't0)) 3699 ((eq? op 'le) (%cg-emit-cmp cg (if unsigned? "leu" "le") 'a0 'a1 't0)) 3700 ((eq? op 'ge) (%cg-emit-cmp cg (if unsigned? "geu" "ge") 'a0 'a1 't0)) 3701 (else (die #f "cg-binop: unknown op" op))) 3702 ;; Canonicalize narrow integer results to their type's bit width 3703 ;; before spilling, so the slot's bit-pattern matches result-ty. 3704 ;; Compare ops already yield 0/1; skip them. Pointer-arith branches 3705 ;; above don't reach here. 3706 (cond 3707 ((or (eq? op 'eq) (eq? op 'ne) 3708 (eq? op 'lt) (eq? op 'le) (eq? op 'gt) (eq? op 'ge)) 0) 3709 (else (%cg-canonicalize cg 't0 result-ty))) 3710 (%cg-spill-reg cg 't0 result-ty))))) 3711 3712 ;; Post-increment / post-decrement on the top-of-vstack lval. 3713 ;; Pushes the OLD value (per C semantics) and emits the +1 / -1 store. 3714 ;; Uses cg-dup + cg-load to capture the old rval (which is then in a 3715 ;; never-reused spill slot), then runs the regular dup+load+add+assign 3716 ;; pattern for the store. Pointer scaling falls out of cg-binop add. 3717 (define (%cg-post-inc-dec cg op) 3718 (cg-dup cg) 3719 (cg-load cg) 3720 (let ((old (cg-pop cg))) 3721 (cg-dup cg) 3722 (cg-load cg) 3723 (cg-push-imm cg %t-i32 1) 3724 (cg-binop cg op) 3725 (cg-assign cg) 3726 (cg-pop cg) 3727 (cg-push cg old))) 3728 3729 (define (cg-postinc cg) (%cg-post-inc-dec cg 'add)) 3730 (define (cg-postdec cg) (%cg-post-inc-dec cg 'sub)) 3731 3732 (define (cg-unop cg op) 3733 (let* ((p (cg-pop cg)) (ty (opnd-type p))) 3734 (%cg-load-opnd-into cg p 't0) 3735 (cond 3736 ((eq? op 'neg) 3737 (%cg-emit-many cg (list "%neg(t0, t0, t1)\n")) 3738 (%cg-spill-reg cg 't0 ty)) 3739 ((eq? op 'bnot) 3740 (%cg-emit-many cg (list "%bnot(t0, t0, t1)\n")) 3741 (%cg-spill-reg cg 't0 ty)) 3742 ((eq? op 'lnot) 3743 (%cg-emit-many cg (list "%cmpset_eqz(t0, t0)\n")) 3744 (%cg-spill-reg cg 't0 %t-i32)) 3745 (else (die #f "cg-unop: unknown op" op))))) 3746 3747 (define (cg-assign cg) 3748 ;; Pops rhs, pops lhs, casts rhs to lhs's type (parser cannot peek 3749 ;; deeper than vstack top to do this itself), emits the store, pushes 3750 ;; the assigned value as the result rval. 3751 (let* ((rhs0 (cg-pop cg)) 3752 (lhs (cg-pop cg)) 3753 (ty (opnd-type lhs))) 3754 (cond ((not (opnd-lval? lhs)) (die #f "cg-assign: lhs not lvalue"))) 3755 ;; Cast rhs to lhs's type (no-op when the types already match). 3756 (cg-push cg rhs0) 3757 (cg-cast cg ty) 3758 (let ((rhs (cg-pop cg))) 3759 (%cg-load-opnd-into cg rhs 'a0) 3760 (pmatch lhs 3761 (($ opnd? (kind frame) (ext ,off)) 3762 (guard (%cg-indirect? cg off)) 3763 (%cg-emit-ld-slot cg 't0 off) 3764 (%cg-emit-st-typed cg 'a0 ty 't0 0)) 3765 (($ opnd? (kind frame) (ext ,off)) 3766 (%cg-emit-st-slot-typed cg 'a0 ty off)) 3767 (($ opnd? (kind global) (ext ,lbl)) 3768 (%cg-emit-la cg 't0 lbl) 3769 (%cg-emit-st-typed cg 'a0 ty 't0 0)) 3770 (else (die #f "cg-assign: unsupported lhs kind" (opnd-kind lhs)))) 3771 (%cg-spill-reg cg 'a0 ty)))) 3772 3773 ;; -------------------------------------------------------------------- 3774 ;; Calls 3775 ;; -------------------------------------------------------------------- 3776 (define (cg-call cg arity has-result?) 3777 (let* ((args (let loop ((i 0) (acc '())) 3778 (cond ((= i arity) acc) 3779 (else (loop (+ i 1) (cons (cg-pop cg) acc)))))) 3780 (fn-op (cg-pop cg)) 3781 ;; sret = struct/union > 16B return; shift args by one reg 3782 ;; and place a0 last so it's not clobbered by arg loads. 3783 (fty (opnd-type fn-op)) 3784 (rty (cond 3785 ((eq? (ctype-kind fty) 'fn) (car (ctype-ext fty))) 3786 ((eq? (ctype-kind fty) 'ptr) 3787 (let ((p (ctype-ext fty))) 3788 (if (eq? (ctype-kind p) 'fn) (car (ctype-ext p)) %t-i64))) 3789 (else %t-i64))) 3790 (rk (ctype-kind rty)) 3791 (sret? (and has-result? 3792 (or (eq? rk 'struct) (eq? rk 'union)) 3793 (> (ctype-size rty) 16))) 3794 ;; If the callee is variadic, the callee's save area caps total 3795 ;; incoming args at 16. Reject silent miscompiles up front. 3796 (callee-fty (cond 3797 ((eq? (ctype-kind fty) 'fn) fty) 3798 ((and (eq? (ctype-kind fty) 'ptr) 3799 (eq? (ctype-kind (ctype-ext fty)) 'fn)) 3800 (ctype-ext fty)) 3801 (else #f))) 3802 (callee-variadic? (and callee-fty 3803 (let ((ext (ctype-ext callee-fty))) 3804 (and (pair? ext) (pair? (cdr ext)) 3805 (pair? (cddr ext)) 3806 (car (cddr ext)))))) 3807 (_cap-check (cond 3808 ((and callee-variadic? (> arity 16)) 3809 (die #f "cg-call: variadic call exceeds 16-arg save-area cap" 3810 arity)) 3811 (else 0))) 3812 (sret-shift (if sret? 1 0)) 3813 (recv-slot (cond 3814 (sret? 3815 (cg-alloc-slot cg 3816 (align-up (ctype-size rty) 8) 3817 (max 8 (ctype-align rty)))) 3818 (else #f)))) 3819 (let stage ((xs args) (idx 0)) 3820 (cond 3821 ((null? xs) 0) 3822 (else 3823 (let* ((arg (car xs)) 3824 (aty (opnd-type arg)) 3825 (n (%cg-param-reg-count aty))) 3826 (cond 3827 ;; Aggregate >8B: load both halves into successive arg 3828 ;; regs / stack slots. Stage the struct's address in t0 3829 ;; once and chunk-load 8 bytes at a time. 3830 ((and (%cg-param-aggregate? aty) (> n 1)) 3831 (%cg-emit-addr-of cg arg 't0) 3832 (let chunk ((i 0)) 3833 (cond 3834 ((>= i n) 0) 3835 (else 3836 (let ((tabi (+ idx sret-shift i))) 3837 (cond 3838 ((< tabi 4) 3839 (%cg-emit-many cg 3840 (list "%ld(" 3841 (%cg-reg->bv (%reg-by-idx tabi)) 3842 ", t0, " (%n (* i 8)) ")\n"))) 3843 (else 3844 (%cg-emit-many cg 3845 (list "%ld(t1, t0, " 3846 (%n (* i 8)) ")\n")) 3847 (%cg-emit-st cg 't1 'sp (* 8 (- tabi 4)))))) 3848 (chunk (+ i 1))))) 3849 (stage (cdr xs) (+ idx n))) 3850 (else 3851 (let ((abi (+ idx sret-shift))) 3852 (cond 3853 ((< abi 4) 3854 (%cg-load-opnd-into cg arg (%reg-by-idx abi)) 3855 (stage (cdr xs) (+ idx 1))) 3856 (else 3857 (%cg-load-opnd-into cg arg 't0) 3858 (%cg-emit-st cg 't0 'sp (* 8 (- abi 4))) 3859 (stage (cdr xs) (+ idx 1))))))))))) 3860 ;; Stack-arg footprint accounts for the extra ABI slot any 3861 ;; >8B-aggregate arg consumed beyond its single-position cousin. 3862 (let* ((nabi (let count ((xs args) (n sret-shift)) 3863 (cond ((null? xs) n) 3864 (else (count (cdr xs) 3865 (+ n (%cg-param-reg-count 3866 (opnd-type (car xs))))))))) 3867 (sa (max 0 (- nabi 4)))) 3868 (cond ((> sa 0) (%cg-bump-outgoing! cg sa)) (else 0))) 3869 (cond 3870 (sret? 3871 (%cg-emit-lea-slot cg "a0" (%cg-slot-expr cg recv-slot)))) 3872 (cond 3873 ((and (eq? (opnd-kind fn-op) 'global) (not (opnd-lval? fn-op))) 3874 (%cg-emit-many cg (list "%call(&" (opnd-ext fn-op) ")\n"))) 3875 (else 3876 (%cg-load-opnd-into cg fn-op 't0) 3877 (%cg-emit-many cg (list "%callr(t0)\n")))) 3878 (cond 3879 (has-result? 3880 (cond 3881 ;; >16B sret (A2): a0 holds recv-slot; push as struct lval. 3882 (sret? (cg-push cg (%opnd 'frame rty recv-slot #t))) 3883 ;; ≤16B struct/union (A1): fresh slot, spill from a0/a1. 3884 ((and (or (eq? rk 'struct) (eq? rk 'union)) 3885 (<= (ctype-size rty) 16)) 3886 (let* ((sz (ctype-size rty)) 3887 (al (max 8 (ctype-align rty))) 3888 (slot (cg-alloc-slot cg (align-up sz 8) al))) 3889 (%cg-emit-st-slot cg 'a0 slot) 3890 (cond ((> sz 8) (%cg-emit-st-slot cg 'a1 (+ slot 8)))) 3891 (cg-push cg (%opnd 'frame rty slot #t)))) 3892 (else 3893 (%cg-spill-reg cg 'a0 rty)))) 3894 (else #f)))) 3895 3896 ;; -------------------------------------------------------------------- 3897 ;; Return 3898 ;; -------------------------------------------------------------------- 3899 (define (cg-return cg) 3900 (let* ((ret-slot (%cg-fn-get cg '%fn-ret-slot)) 3901 (ret-type (%cg-fn-get cg '%fn-ret-type)) 3902 (rk (ctype-kind ret-type)) 3903 (sret? (%cg-fn-get cg '%fn-sret?))) 3904 (cond 3905 ((eq? rk 'void) 3906 (%cg-emit-many cg (list "%b(&.ret)\n"))) 3907 ((or (eq? rk 'struct) (eq? rk 'union)) 3908 ;; struct-by-value: ≤16B (A1) → ret-slot; >16B (A2 sret) → *sret-slot. 3909 (let* ((p (cg-pop cg)) (sz (ctype-size ret-type))) 3910 (cond ((not (opnd-lval? p)) 3911 (die #f "cg-return: struct value must be an lvalue"))) 3912 (%cg-emit-addr-of cg p 't0) 3913 (cond 3914 (sret? 3915 (%cg-emit-ld-slot cg 't2 (%cg-fn-get cg '%fn-sret-slot))) 3916 (else 3917 (%cg-emit-lea-slot cg "t2" (%cg-slot-expr cg ret-slot)))) 3918 (%cg-emit-byte-copy cg 't2 't0 't1 sz) 3919 (%cg-emit-many cg (list "%b(&.ret)\n")))) 3920 (else 3921 (let ((p (cg-pop cg))) 3922 (%cg-load-opnd-into cg p 'a0) 3923 (%cg-emit-st-slot cg 'a0 ret-slot) 3924 (%cg-emit-many cg (list "%b(&.ret)\n"))))))) 3925 3926 ;; -------------------------------------------------------------------- 3927 ;; Structured control flow 3928 ;; -------------------------------------------------------------------- 3929 (define (cg-if cg then-thunk) 3930 (let ((p (cg-pop cg))) 3931 (%cg-load-opnd-into cg p 't0) 3932 (%cg-emit-many cg (list "%if_nez(t0, {\n")) 3933 (then-thunk) 3934 (%cg-emit-many cg (list "})\n")))) 3935 3936 (define (cg-ifelse cg then-thunk else-thunk) 3937 (let ((p (cg-pop cg))) 3938 (%cg-load-opnd-into cg p 't0) 3939 (%cg-emit-many cg (list "%ifelse_nez(t0, {\n")) 3940 (then-thunk) 3941 (%cg-emit-many cg (list "}, {\n")) 3942 (else-thunk) 3943 (%cg-emit-many cg (list "})\n")))) 3944 3945 ;; Conditionals-as-values: `cg-ifelse` is correct for if-statements 3946 ;; (thunks push nothing) but each thunk for ternary / `&&` / `||` ends 3947 ;; with one rval on top of the vstack — and after both branches run, 3948 ;; we'd be left with TWO opnds, which breaks the type contract for 3949 ;; the surrounding expression. `cg-ifelse-merge` solves that: pop the 3950 ;; cond, allocate one result slot, and after each thunk runs, pop its 3951 ;; rval and store into the slot. Push the slot as one frame rval. 3952 ;; 3953 ;; Result type follows C11 §6.5.15 ¶5 for ternary: the usual arithmetic 3954 ;; conversions over the two arms' types. The slot stores the raw 8-byte 3955 ;; payload (per cc.scm's canonical-form discipline); %cg-load-opnd-into 3956 ;; then re-canonicalizes on read against whatever common type we picked. 3957 ;; For `&&` / `||` callers both arms are pre-cast to %t-i32 by the 3958 ;; parser, so the merge is a no-op on type. 3959 (define (cg-ifelse-merge cg then-thunk else-thunk) 3960 (let* ((cond-op (cg-pop cg))) 3961 (%cg-load-opnd-into cg cond-op 't0) 3962 (%cg-emit-many cg (list "%ifelse_nez(t0, {\n")) 3963 (then-thunk) 3964 (let* ((p (cg-pop cg)) 3965 (rty1 (opnd-type p)) 3966 (rk1 (ctype-kind rty1)) 3967 ;; Struct/union arms can't ride the canonical 8-byte word 3968 ;; slot — the arm's bytes have to land in a slot sized to 3969 ;; the struct, and each arm memcpys its lvalue in. tcc's 3970 ;; expr_cond does this exact `type = bt1 == 6 ? type1 : type2` 3971 ;; pattern across CType structs, so without this case 3972 ;; cc.scm-compiled tcc-boot2 self-corrupts. 3973 (aggr? (or (eq? rk1 'struct) (eq? rk1 'union))) 3974 (slot (cond (aggr? 3975 (cg-alloc-slot cg 3976 (align-up (ctype-size rty1) 8) 3977 (max 8 (ctype-align rty1)))) 3978 (else 3979 (cg-alloc-slot cg 8 8))))) 3980 (%cg-merge-write-arm cg p slot aggr?) 3981 (%cg-emit-many cg (list "}, {\n")) 3982 (else-thunk) 3983 (let* ((q (cg-pop cg)) 3984 (rty2 (opnd-type q))) 3985 (%cg-merge-write-arm cg q slot aggr?) 3986 (%cg-emit-many cg (list "})\n")) 3987 ;; Aggregate result is pushed as a frame lval so cg-copy-struct 3988 ;; (which asserts src must be lval) accepts it; %cg-emit-addr-of 3989 ;; falls through the `lval? #t` guard (slot is direct, not 3990 ;; indirect) and returns the slot's address either way. 3991 (cg-push cg (%opnd 'frame 3992 (%cg-merge-arith-type rty1 rty2) 3993 slot 3994 aggr?)))))) 3995 3996 (define (%cg-merge-write-arm cg op slot aggr?) 3997 (cond 3998 (aggr? 3999 (%cg-emit-addr-of cg op 't0) 4000 (%cg-emit-lea-slot cg "t2" (%cg-slot-expr cg slot)) 4001 (%cg-emit-byte-copy cg 't2 't0 't1 (ctype-size (opnd-type op)))) 4002 (else 4003 (%cg-load-opnd-into cg op 'a0) 4004 (%cg-emit-st-slot cg 'a0 slot)))) 4005 4006 ;; Usual arithmetic conversion over two ctypes (C11 §6.3.1.8): 4007 ;; integer-promote each (sub-int → int), then pick the wider with 4008 ;; unsigned tie-break. Falls back to t1 for non-arithmetic kinds 4009 ;; (pointer, struct, array — ternary on those preserves the first 4010 ;; arm's type as before). 4011 (define (%cg-merge-arith-type t1 t2) 4012 (cond 4013 ((and (%ctype-arith? t1) (%ctype-arith? t2)) 4014 (let ((p1 (cond ((< (ctype-size t1) 4) %t-i32) (else t1))) 4015 (p2 (cond ((< (ctype-size t2) 4) %t-i32) (else t2)))) 4016 (cond 4017 ((> (ctype-size p1) (ctype-size p2)) p1) 4018 ((> (ctype-size p2) (ctype-size p1)) p2) 4019 ((%ctype-unsigned? p1) p1) 4020 ((%ctype-unsigned? p2) p2) 4021 (else p1)))) 4022 (else t1))) 4023 4024 (define (cg-loop cg head-thunk body-thunk) 4025 ;; body-thunk receives the loop tag as its argument; parser uses 4026 ;; that tag for cg-break / cg-continue inside the body. 4027 (let ((tag (%cg-fresh-loop-tag cg))) 4028 (%cg-emit-many cg (list ".scope\n" 4029 ":.top\n")) 4030 (head-thunk) 4031 (cond 4032 ((zero? (cg-depth cg)) 0) 4033 (else 4034 (let ((c (cg-pop cg))) 4035 (%cg-load-opnd-into cg c 't0) 4036 (%cg-emit-many cg (list "%if_eqz(t0, { %break })\n"))))) 4037 (body-thunk tag) 4038 (%cg-emit-many cg (list "%b(&.top)\n" 4039 ":.end\n" 4040 ".endscope\n")) 4041 tag)) 4042 4043 (define (cg-break cg tag) 4044 (%cg-emit-many cg (list "%break\n"))) 4045 4046 (define (cg-continue cg tag) 4047 (%cg-emit-many cg (list "%continue\n"))) 4048 4049 ;; -------------------------------------------------------------------- 4050 ;; Variadic receive (§G.2). Layout: cg-fn-begin/v reserves a 16-slot 4051 ;; (8 bytes each) save area at known frame offsets, populating each 4052 ;; slot from the appropriate ABI source — a-register for indices 0..3, 4053 ;; LDARG for indices 4..15. va_start sets ap to the address of the 4054 ;; first slot past the named-arg count; va_arg reads *ap, advances ap 4055 ;; by 8, and pushes the value as the requested type. 4056 ;; 4057 ;; ap is an lval (typically a `va_list` local). cg-va-start pops it, 4058 ;; computes the address, stores into *ap (or the slot directly), and 4059 ;; pushes nothing. cg-va-arg pops ap-lval, loads ap, dereferences for 4060 ;; the value, advances ap, stores back, pushes the loaded value. 4061 ;; 4062 ;; Cap: total incoming args (named + variadic) must fit in the 16-slot 4063 ;; save area. Variadic call sites exceeding this die in cg-call; 4064 ;; variadic definitions whose named-arg count exceeds it die in 4065 ;; cg-fn-begin/v. 4066 ;; -------------------------------------------------------------------- 4067 (define (%cg-vararg-first-slot cg) 4068 (let ((s (%cg-fn-get cg '%fn-vararg-first-slot))) 4069 (cond ((not s) (die #f "cg-va-start: not a variadic function")) 4070 (else s)))) 4071 4072 (define (cg-va-start cg) 4073 ;; Pop ap-lval. Materialize "&sp + vararg-first-slot" into a0, 4074 ;; store through ap-lval. Pushes nothing. 4075 (let* ((ap-lv (cg-pop cg)) 4076 (vsl (%cg-vararg-first-slot cg))) 4077 (cond ((not (opnd-lval? ap-lv)) 4078 (die #f "cg-va-start: ap not lvalue"))) 4079 (%cg-emit-lea-slot cg "a0" (%cg-slot-expr cg vsl)) 4080 (%cg-emit-addr-of cg ap-lv 't0) 4081 (%cg-emit-st cg 'a0 't0 0))) 4082 4083 (define (cg-va-arg cg ctype) 4084 ;; Pop ap-lval. Load ap into a0. Read 8 bytes at [a0] into a1. 4085 ;; Advance a0 by 8 and store back through ap-lval. Push a1 as rval 4086 ;; of type ctype (caller cg-cast's if needed). 4087 (let ((ap-lv (cg-pop cg))) 4088 (cond ((not (opnd-lval? ap-lv)) 4089 (die #f "cg-va-arg: ap not lvalue"))) 4090 ;; Address of the storage that holds ap → t0; ap value → a0. 4091 (%cg-emit-addr-of cg ap-lv 't0) 4092 (%cg-emit-ld cg 'a0 't0 0) 4093 ;; Read *ap into a1 (full 8 bytes; cg-cast on the rval the caller 4094 ;; pushes will narrow if needed). Advance ap by 8 and store back. 4095 (%cg-emit-ld cg 'a1 'a0 0) 4096 (%cg-emit-many cg (list "%addi(a0, a0, 8)\n")) 4097 (%cg-emit-st cg 'a0 't0 0) 4098 ;; Spill the loaded value (a1) to a fresh frame slot under ctype. 4099 (%cg-spill-reg cg 'a1 ctype))) 4100 4101 (define (cg-va-end cg) 4102 ;; va_end is a no-op in this design. Pop and discard ap-lval. 4103 (cg-pop cg) 4104 0) 4105 4106 ;; -------------------------------------------------------------------- 4107 ;; Labels and unconditional goto. 4108 ;; C labels have function scope, even when the labelled statement appears 4109 ;; inside a nested block/loop. Emit them as function-qualified global 4110 ;; labels rather than dotted hex2++ locals, because dotted definitions 4111 ;; inside a nested `.scope` would be invisible to gotos outside it. 4112 ;; -------------------------------------------------------------------- 4113 (define (%cg-user-label cg name-bv) 4114 (let ((fn (%cg-fn-get cg '%fn-name))) 4115 (bv-cat (list "cc__" fn "__user_" name-bv)))) 4116 4117 (define (cg-emit-label cg name-bv) 4118 (%cg-emit-many cg (list ":" (%cg-user-label cg name-bv) "\n"))) 4119 4120 (define (cg-goto cg name-bv) 4121 (%cg-emit-many cg (list "%b(&" (%cg-user-label cg name-bv) ")\n"))) 4122 4123 ;; -------------------------------------------------------------------- 4124 ;; switch 4125 ;; -------------------------------------------------------------------- 4126 (define-record-type swctx 4127 (%swctx ctrl-slot end-tag default-lbl) 4128 swctx? 4129 (ctrl-slot swctx-ctrl-slot) 4130 (end-tag swctx-end-tag) 4131 (default-lbl swctx-default-lbl swctx-default-lbl-set!)) 4132 4133 (define (cg-switch-begin cg) 4134 (let* ((p (cg-pop cg)) 4135 (off (cg-alloc-slot cg 8 8)) 4136 (tag (%cg-fresh-loop-tag cg)) 4137 (disp-lbl (bytevector-append "sw_disp_" tag))) 4138 (%cg-load-opnd-into cg p 't0) 4139 (%cg-emit-st-slot cg 't0 off) 4140 (%cg-emit-many cg (list ".scope\n" 4141 "%b(&." disp-lbl ")\n")) 4142 (%swctx off tag #f))) 4143 4144 (define (cg-switch-case cg sw const-int) 4145 (let* ((lbl (%cg-fresh-lbl cg)) 4146 (key (string->symbol 4147 (bytevector-append "%sw_cases__" (swctx-end-tag sw)))) 4148 (cur (or (%cg-fn-get cg key) '())) 4149 (entry (cons const-int lbl))) 4150 (%cg-fn-set! cg key (cons entry cur)) 4151 (%cg-emit-many cg (list ":." lbl "\n")))) 4152 4153 (define (cg-switch-default cg sw) 4154 (let ((lbl (%cg-fresh-lbl cg))) 4155 (swctx-default-lbl-set! sw lbl) 4156 (%cg-emit-many cg (list ":." lbl "\n")))) 4157 4158 (define (cg-switch-end cg sw) 4159 (let* ((tag (swctx-end-tag sw)) 4160 (key (string->symbol (bytevector-append "%sw_cases__" tag))) 4161 (cases (reverse (or (%cg-fn-get cg key) '()))) 4162 (default-lbl (swctx-default-lbl sw)) 4163 (disp-lbl (bytevector-append "sw_disp_" tag))) 4164 (%cg-emit-many cg (list "%break\n" 4165 ":." disp-lbl "\n")) 4166 (%cg-emit-many cg (list "%ld(t0, sp, " 4167 (%cg-slot-expr cg (swctx-ctrl-slot sw)) ")\n")) 4168 (for-each 4169 (lambda (c) 4170 (%cg-emit-many cg (list "%switch_case(t0, t1, " 4171 (%n (car c)) ", &." (cdr c) ")\n"))) 4172 cases) 4173 (cond 4174 (default-lbl (%cg-emit-many cg (list "%b(&." default-lbl ")\n"))) 4175 (else 0)) 4176 (%cg-emit-many cg (list "%break\n" 4177 ":.end\n" 4178 ".endscope\n")))) 4179 4180 ;; -------------------------------------------------------------------- 4181 ;; Globals and data 4182 ;; -------------------------------------------------------------------- 4183 ;; cg-emit-global: emit a global symbol into either .data (initialized) 4184 ;; or .bss (zero-init). 4185 ;; 4186 ;; init can be: 4187 ;; #f — zero-init in .bss (size from sym's ctype). 4188 ;; (piece ...) — initialized in .data; pieces concatenated. 4189 ;; 4190 ;; Each piece is either: 4191 ;; <bytevector> — raw bytes; emitted as bare hex chunks 4192 ;; (64 bytes / 128 hex chars per line). 4193 ;; (label-ref . <label-bv>) — 8-byte pointer slot containing &label; 4194 ;; emitted as `&<label> %(0)` (4B label ref + 4195 ;; 4B zero pad). 4196 (define (%cg-init-piece->bv piece) 4197 (cond 4198 ((bytevector? piece) 4199 (bv-cat (%cg-bv->hex-lines piece #f))) 4200 ((and (pair? piece) (eq? (car piece) 'label-ref)) 4201 (bv-cat (list "&" (cdr piece) " %(0)\n"))) 4202 (else (die #f "cg-emit-global: bad init piece" piece)))) 4203 4204 (define (cg-emit-global cg sym init) 4205 (let* ((lbl (%cg-sym-label sym)) 4206 (sz (ctype-size (sym-type sym))) 4207 (size (if (< sz 0) 8 sz)) 4208 (al (max 1 (ctype-align (sym-type sym))))) 4209 (cond 4210 (init 4211 (buf-push! (cg-data cg) (bv-cat (list "\n.align " (%n al) "\n:" 4212 lbl "\n"))) 4213 (let walk ((ps init)) 4214 (cond 4215 ((null? ps) 0) 4216 (else 4217 (buf-push! (cg-data cg) (%cg-init-piece->bv (car ps))) 4218 (walk (cdr ps)))))) 4219 (else 4220 (buf-push! (cg-bss cg) 4221 (bv-cat (list "\n.align " (%n al) "\n:" lbl "\n" 4222 (let zero-loop ((rem size) (acc '())) 4223 (cond 4224 ((<= rem 0) (bv-cat (reverse acc))) 4225 ((>= rem 8) 4226 (zero-loop (- rem 8) (cons "$(0)\n" acc))) 4227 (else 4228 (zero-loop (- rem 1) (cons "!(0)\n" acc)))))))))) 4229 0)) 4230 4231 (define (cg-emit-extern cg sym) 0) 4232 4233 ;; Record `n` as a tentative file-scope definition: don't emit BSS yet, 4234 ;; but if no full definition appears by end of TU, cg-finish will emit 4235 ;; zero-init storage for it. Idempotent — extra entries with the same 4236 ;; name are harmless (cg-finish dedupes via scope-lookup). 4237 (define (cg-add-tentative! cg n) 4238 (let* ((w (cg-world cg)) 4239 (cur (world-tentatives w))) 4240 (cond 4241 ((member n cur) #t) 4242 (else (world-tentatives-set! w (cons n cur)))))) 4243 4244 ;; End-of-TU pass: for each pending tentative, look up the latest sym 4245 ;; binding. If it's still `defined?=#f`, no real definition replaced it, 4246 ;; so emit zero-init storage now. Otherwise the .data emission already 4247 ;; covered it. 4248 (define (cg-flush-tentatives! cg) 4249 (let* ((w (cg-world cg)) 4250 (top (car (world-scope w)))) 4251 (for-each 4252 (lambda (n) 4253 (let ((sm (alist-ref n top))) 4254 (cond 4255 ((and sm 4256 (eq? (sym-kind sm) 'var) 4257 (not (sym-defined? sm))) 4258 (cg-emit-global cg sm #f))))) 4259 (world-tentatives w)))) 4260 4261 (define (cg-intern-string cg bv-content) 4262 (let ((p (alist-ref bv-content (cg-str-pool cg)))) 4263 (cond 4264 (p p) 4265 (else 4266 (let* ((n (length (cg-str-pool cg))) 4267 (lbl (bytevector-append 4268 (cg-str-prefix cg) "cc__str_" (%n n)))) 4269 (cg-str-pool-set! cg 4270 (alist-set bv-content lbl (cg-str-pool cg))) 4271 (buf-push! (cg-data cg) 4272 (bv-cat (append (list "\n.align " (%n %CG-STR-ALIGN) 4273 "\n:" lbl "\n") 4274 (%cg-bv->hex-lines bv-content #t) 4275 (list ".align " (%n %CG-STR-ALIGN) "\n")))) 4276 lbl))))) 4277 4278 ;; Mint a fresh, never-recurring label for an unnamed file-scope 4279 ;; compound literal. Mirrors cg-intern-string's namer pattern (prefix + 4280 ;; "cc__cl_" + N), with N drawn from cg-label-ctr — the same monotonic 4281 ;; counter the per-fn label minters use. Different prefix → no collision 4282 ;; with `Lcc__N` / `lbl_N`. 4283 (define (%cg-fresh-cl-label cg) 4284 (let* ((n (cg-label-ctr cg)) 4285 (lbl (bytevector-append (cg-str-prefix cg) "cc__cl_" (%n n)))) 4286 (cg-label-ctr-set! cg (+ n 1)) 4287 lbl)) 4288 4289 ;; Render BV's bytes as bare hex accepted directly by hex2++. Lines are 4290 ;; chunked to ≤128 hex chars (= 64 bytes) to keep generated P1pp readable. 4291 ;; 4292 ;; If TRAILING-NUL? is #t, an extra 0x00 byte is appended to terminate 4293 ;; a C string. Alignment is emitted explicitly by callers with .align 4294 ;; so hex2++ owns padding instead of cc.scm manufacturing zero bytes. 4295 ;; The other caller (%cg-init-piece->bv) emits arbitrary initializer 4296 ;; bytes whose length is sized exactly to the C-visible field; padding a 4297 ;; 4-byte int slot to 8 would shift every following struct field. 4298 ;; Returns a list of bytevectors ready for bv-cat. 4299 (define %CG-HEX-CHUNK-BYTES 64) 4300 (define %CG-STR-ALIGN 8) 4301 4302 (define (%cg-bv->hex-lines bv trailing-nul?) 4303 (let* ((len (bytevector-length bv)) 4304 (logical (cond (trailing-nul? (+ len 1)) (else len))) 4305 (total logical)) 4306 (cond 4307 ((= total 0) '()) 4308 (else 4309 (let loop ((i 0) (acc '())) 4310 (cond 4311 ((>= i total) (reverse acc)) 4312 (else 4313 (let ((end (cond ((< (+ i %CG-HEX-CHUNK-BYTES) total) 4314 (+ i %CG-HEX-CHUNK-BYTES)) 4315 (else total)))) 4316 (loop end (cons (%cg-hex-line bv i end len) acc)))))))))) 4317 4318 ;; One `XXXX...XX\n` line covering BV bytes [START, END). Indices 4319 ;; >= LEN render as 0x00 (used for the trailing NUL terminator). 4320 (define (%cg-hex-line bv start end len) 4321 (let* ((nbytes (- end start)) 4322 (out (make-bytevector (+ (* 2 nbytes) 1)))) 4323 (let loop ((j start) (k 0)) 4324 (cond 4325 ((= j end) 4326 (bytevector-u8-set! out k (char->integer #\newline)) 4327 out) 4328 (else 4329 (let ((b (cond ((< j len) (bytevector-u8-ref bv j)) 4330 (else 0)))) 4331 (bytevector-u8-set! out k (%cg-hex-digit 4332 (arithmetic-shift b -4))) 4333 (bytevector-u8-set! out (+ k 1) (%cg-hex-digit (bit-and b 15))) 4334 (loop (+ j 1) (+ k 2)))))))) 4335 4336 (define (%cg-hex-digit n) 4337 (cond ((< n 10) (+ n (char->integer #\0))) 4338 (else (+ (- n 10) (char->integer #\A))))) 4339 4340 ;; -------------------------------------------------------------------- 4341 ;; Frame 4342 ;; -------------------------------------------------------------------- 4343 (define (cg-alloc-slot cg bytes align) 4344 (let* ((aligned (align-up (cg-frame-hi cg) align)) 4345 (new-hi (+ aligned bytes))) 4346 (cg-frame-hi-set! cg new-hi) 4347 aligned)) 4348 ;; cc/parse.scm — recursive-descent + Pratt parser. Minimal scheme1. 4349 4350 (define (make-pstate iter cg) 4351 (%pstate iter (cg-world cg) '() #f cg)) 4352 4353 (define (peek ps) (iter-peek (ps-iter ps))) 4354 (define (peek2 ps) (iter-peek2 (ps-iter ps))) 4355 (define (advance ps) (iter-next (ps-iter ps))) 4356 (define (at-kw? ps s) 4357 (pmatch (peek ps) 4358 (($ tok? (kind KW) (value ,v)) (eq? v s)) 4359 (else #f))) 4360 (define (at-punct? ps s) 4361 (pmatch (peek ps) 4362 (($ tok? (kind PUNCT) (value ,v)) (eq? v s)) 4363 (else #f))) 4364 (define (expect-kw ps s) 4365 (let ((t (peek ps))) 4366 (pmatch t 4367 (($ tok? (kind KW) (value ,v)) (guard (eq? v s)) (advance ps)) 4368 (else (die (tok-loc t) "expected kw" s))))) 4369 (define (expect-punct ps s) 4370 (let ((t (peek ps))) 4371 (pmatch t 4372 (($ tok? (kind PUNCT) (value ,v)) (guard (eq? v s)) (advance ps)) 4373 (else (die (tok-loc t) "expected punct" s))))) 4374 4375 (define (scope-enter! ps) 4376 (ps-scope-set! ps (cons '() (ps-scope ps))) 4377 (ps-tags-set! ps (cons '() (ps-tags ps)))) 4378 (define (scope-leave! ps) 4379 (ps-scope-set! ps (cdr (ps-scope ps))) 4380 (ps-tags-set! ps (cdr (ps-tags ps)))) 4381 (define (ctype-compat? a b) 4382 (cond 4383 ((eq? a b) #t) 4384 ((not (eq? (ctype-kind a) (ctype-kind b))) #f) 4385 (else 4386 (let ((k (ctype-kind a))) 4387 (cond 4388 ((eq? k 'ptr) (ctype-compat? (ctype-ext a) (ctype-ext b))) 4389 ((eq? k 'arr) 4390 (let ((ea (ctype-ext a)) (eb (ctype-ext b))) 4391 (and (ctype-compat? (car ea) (car eb)) 4392 (or (= (cdr ea) (cdr eb)) 4393 (< (cdr ea) 0) (< (cdr eb) 0))))) 4394 ((eq? k 'fn) (%fn-ctype-compat? (ctype-ext a) (ctype-ext b))) 4395 ((or (eq? k 'struct) (eq? k 'union) (eq? k 'enum)) #f) 4396 (else #t)))))) 4397 4398 (define (%fn-ctype-compat? a b) 4399 (and (ctype-compat? (car a) (car b)) 4400 (eq? (car (cddr a)) (car (cddr b))) 4401 (%fn-params-compat? (cadr a) (cadr b)))) 4402 4403 (define (%fn-params-compat? pa pb) 4404 (cond 4405 ((and (null? pa) (null? pb)) #t) 4406 ((or (null? pa) (null? pb)) #f) 4407 ((ctype-compat? (cdar pa) (cdar pb)) 4408 (%fn-params-compat? (cdr pa) (cdr pb))) 4409 (else #f))) 4410 4411 (define (sym-merge old new) 4412 (cond 4413 ((not (eq? (sym-kind old) (sym-kind new))) 4414 (die #f "redecl: kind mismatch" (sym-name old))) 4415 ((not (ctype-compat? (sym-type old) (sym-type new))) 4416 (die #f "redecl: type mismatch" (sym-name old))) 4417 ((eq? (sym-kind old) 'typedef) old) 4418 ((eq? (sym-kind old) 'enum-const) 4419 (cond ((equal? (sym-slot old) (sym-slot new)) old) 4420 (else (die #f "enum-const redecl" (sym-name old))))) 4421 ((and (sym-defined? old) (sym-defined? new)) 4422 (die #f "redefinition" (sym-name old))) 4423 ;; Linkage inherits from the first declaration (C 6.2.2 ¶4): if a 4424 ;; later decl/def of the same identifier doesn't carry a storage 4425 ;; class, it picks up the prior one. tcc.c relies on this with 4426 ;; `static T f(); ... T f() {…}` — the prior `static` makes both 4427 ;; the decl and the def internal-linkage. Without this carry- 4428 ;; through cc.scm split them across two label namespaces. 4429 ((sym-defined? new) 4430 (cond 4431 ((eq? (sym-storage old) 'static) 4432 (%sym (sym-name new) (sym-kind new) 'static 4433 (sym-type new) (sym-slot new) #t)) 4434 (else new))) 4435 (else old))) 4436 4437 (define (scope-bind! ps n s) 4438 (let* ((f (ps-scope ps)) (top (car f)) (r (cdr f)) 4439 (old (alist-ref n top))) 4440 (cond 4441 ((not old) 4442 (ps-scope-set! ps (cons (alist-set n s top) r))) 4443 (else 4444 (let ((merged (sym-merge old s))) 4445 (cond 4446 ((eq? merged old) #t) 4447 (else 4448 (ps-scope-set! ps (cons (alist-set n merged top) r))))))))) 4449 (define (scope-lookup ps n) 4450 (let loop ((f (ps-scope ps))) 4451 (cond ((null? f) #f) 4452 (else 4453 (let ((v (alist-ref n (car f)))) 4454 (if v v (loop (cdr f)))))))) 4455 (define (tag-bind! ps n c) 4456 (let* ((f (ps-tags ps)) (top (car f)) (r (cdr f))) 4457 (ps-tags-set! ps (cons (alist-set n c top) r)))) 4458 (define (tag-lookup ps n) 4459 (let loop ((f (ps-tags ps))) 4460 (cond ((null? f) #f) 4461 (else (let ((v (alist-ref n (car f)))) 4462 (if v v (loop (cdr f)))))))) 4463 (define (typedef? ps n) 4464 (let ((sm (scope-lookup ps n))) 4465 (and sm (eq? (sym-kind sm) 'typedef)))) 4466 4467 (define (%mk-ptr p) (%ctype 'ptr 8 8 p)) 4468 (define (%mk-arr e n) 4469 (%ctype 'arr (if (< n 0) -1 (* n (ctype-size e))) 4470 (ctype-align e) (cons e n))) 4471 (define (%mk-fn r p v) (%ctype 'fn -1 -1 (list r p v))) 4472 (define (ctype-is-ptr? t) (eq? (ctype-kind t) 'ptr)) 4473 (define (ctype-is-fn? t) (eq? (ctype-kind t) 'fn)) 4474 (define (ctype-is-arr? t) (eq? (ctype-kind t) 'arr)) 4475 4476 (define (eat-cv-quals! ps) 4477 (cond ((at-kw? ps '__attribute__) 4478 (skip-gnu-attribute! ps) (eat-cv-quals! ps)) 4479 ((or (at-kw? ps 'const) (at-kw? ps 'volatile) 4480 (at-kw? ps 'restrict)) 4481 (advance ps) (eat-cv-quals! ps)) 4482 (else #t))) 4483 4484 ;; Consume a GNU `__attribute__ (( ... ))` spec and discard. The keyword 4485 ;; has been peeked but not yet consumed. tcc.c's prototypes use these 4486 ;; for noreturn / format / aligned annotations that the bootstrap doesn't 4487 ;; need to honour semantically — same softening pattern as floats and 4488 ;; rejected-but-accepted type specifiers. 4489 (define (skip-gnu-attribute! ps) 4490 (advance ps) 4491 (expect-punct ps 'lparen) 4492 (let loop ((depth 1)) 4493 (let ((t (peek ps))) 4494 (cond 4495 ((eq? (tok-kind t) 'EOF) 4496 (die (tok-loc t) "EOF in __attribute__")) 4497 ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'lparen)) 4498 (advance ps) (loop (+ depth 1))) 4499 ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'rparen)) 4500 (advance ps) 4501 (cond ((= depth 1) #t) 4502 (else (loop (- depth 1))))) 4503 (else (advance ps) (loop depth)))))) 4504 4505 (define (eat-gnu-attributes! ps) 4506 (cond ((at-kw? ps '__attribute__) 4507 (skip-gnu-attribute! ps) (eat-gnu-attributes! ps)) 4508 (else #t))) 4509 4510 (define (parse-decl-spec ps) 4511 (let loop ((sto #f) (sn #f) (lg 0) (b #f) (saw #f)) 4512 (let ((t (peek ps))) 4513 (cond 4514 ((at-kw? ps '__attribute__) 4515 (skip-gnu-attribute! ps) (loop sto sn lg b saw)) 4516 ((or (at-kw? ps 'auto) (at-kw? ps 'register)) 4517 (advance ps) (loop sto sn lg b #t)) 4518 ((at-kw? ps 'static) (advance ps) (loop 'static sn lg b #t)) 4519 ((at-kw? ps 'extern) (advance ps) (loop 'extern sn lg b #t)) 4520 ((at-kw? ps 'typedef) (advance ps) (loop 'typedef sn lg b #t)) 4521 ((or (at-kw? ps 'const) (at-kw? ps 'volatile) 4522 (at-kw? ps 'restrict) (at-kw? ps 'inline)) 4523 (advance ps) (loop sto sn lg b #t)) 4524 ((at-kw? ps 'signed) (advance ps) (loop sto 'signed lg b #t)) 4525 ((at-kw? ps 'unsigned) (advance ps) (loop sto 'unsigned lg b #t)) 4526 ((at-kw? ps 'short) (advance ps) (loop sto sn -1 b #t)) 4527 ((at-kw? ps 'long) (advance ps) (loop sto sn (+ lg 1) b #t)) 4528 ((at-kw? ps 'void) (advance ps) (loop sto sn lg 'void #t)) 4529 ((at-kw? ps 'char) (advance ps) (loop sto sn lg 'char #t)) 4530 ((at-kw? ps 'int) (advance ps) (loop sto sn lg 'int #t)) 4531 ((at-kw? ps '_Bool) (advance ps) (loop sto sn lg 'bool #t)) 4532 ;; Floats: parsed as type specifiers so prototypes and struct 4533 ;; layouts in the flattened TU don't trip the parser. The cg 4534 ;; rejects fp loads/arith/casts at use, see %cg-fp-reject!. 4535 ;; _Complex / _Imaginary are eaten silently — tcc.c only mentions 4536 ;; them inside HAVE_FLOAT-gated paths. 4537 ((at-kw? ps 'float) (advance ps) (loop sto sn lg 'float #t)) 4538 ((at-kw? ps 'double) (advance ps) (loop sto sn lg 'double #t)) 4539 ((or (at-kw? ps '_Complex) (at-kw? ps '_Imaginary)) 4540 (advance ps) (loop sto sn lg b #t)) 4541 ((or (at-kw? ps '_Atomic) (at-kw? ps '_Thread_local) 4542 (at-kw? ps '_Alignas) (at-kw? ps '_Generic) 4543 (at-kw? ps '_Alignof) (at-kw? ps '_Static_assert)) 4544 (die (tok-loc t) "rejected" (tok-value t))) 4545 ((at-kw? ps 'struct) 4546 (loop sto sn lg (parse-aggregate-spec ps 'struct) #t)) 4547 ((at-kw? ps 'union) 4548 (loop sto sn lg (parse-aggregate-spec ps 'union) #t)) 4549 ((at-kw? ps 'enum) 4550 (loop sto sn lg (parse-enum-spec ps) #t)) 4551 ;; __builtin_va_list — gcc/clang builtin type. We don't model 4552 ;; it as a struct; for our P1 ABI a va_list is just a char* 4553 ;; into the stack save area (cg-va-start/arg/end work over an 4554 ;; 8-byte slot). Letting __builtin_va_list mean `char *` here 4555 ;; lets a single header source — `typedef __builtin_va_list 4556 ;; va_list;` — compile cleanly under both cc.scm and stock 4557 ;; gcc/clang (where it's their native struct). 4558 ((and (not b) (eq? (tok-kind t) 'IDENT) 4559 (bv= (tok-value t) "__builtin_va_list")) 4560 (advance ps) 4561 (loop sto sn lg (%ctype 'ptr 8 8 %t-i8) #t)) 4562 ((and (not b) (eq? (tok-kind t) 'IDENT) 4563 (let ((sm (scope-lookup ps (tok-value t)))) 4564 (and sm (eq? (sym-kind sm) 'typedef)))) 4565 (let* ((tk (advance ps)) (sm (scope-lookup ps (tok-value tk)))) 4566 (loop sto sn lg (sym-type sm) #t))) 4567 (else 4568 (cond ((not saw) (die (tok-loc t) "expected decl-spec" 4569 (tok-value t))) 4570 (else (values sto (resolve-base t sn lg b))))))))) 4571 4572 (define (resolve-base loc sn lg b) 4573 (cond 4574 ((eq? b 'void) 4575 (if (or sn (not (zero? lg))) (die loc "void+qual") %t-void)) 4576 ((eq? b 'bool) 4577 (if (or sn (not (zero? lg))) (die loc "bool+qual") %t-bool)) 4578 ((eq? b 'char) 4579 (cond ((eq? sn 'unsigned) %t-u8) (else %t-i8))) 4580 ((or (eq? b 'int) (and (not b) (or sn (not (zero? lg))))) 4581 (cond ((= lg -1) (if (eq? sn 'unsigned) %t-u16 %t-i16)) 4582 ((= lg 0) (if (eq? sn 'unsigned) %t-u32 %t-i32)) 4583 (else (if (eq? sn 'unsigned) %t-u64 %t-i64)))) 4584 ((eq? b 'float) 4585 (if (or sn (not (zero? lg))) (die loc "float+qual") %t-flt)) 4586 ((eq? b 'double) 4587 (cond (sn (die loc "double+sign")) 4588 ((= lg 0) %t-dbl) 4589 ((= lg 1) %t-ldbl) 4590 (else (die loc "double+long*" lg)))) 4591 ((ctype? b) 4592 (if (or sn (not (zero? lg))) (die loc "type+qual") b)) 4593 (else (die loc "unknown decl-spec")))) 4594 4595 (define (parse-aggregate-spec ps kind) 4596 (advance ps) 4597 ;; GCC `__attribute__((...))` may sit between `struct/union` and 4598 ;; the tag/`{`. Eat and discard. 4599 (eat-gnu-attributes! ps) 4600 (let ((tag (pmatch (peek ps) 4601 (($ tok? (kind IDENT)) (tok-value (advance ps))) 4602 (else #f)))) 4603 (eat-gnu-attributes! ps) 4604 (cond 4605 ((at-punct? ps 'lbrace) 4606 (advance ps) 4607 ;; A `struct/union TAG { ... }` declaration introduces (or 4608 ;; completes) the tag in the *current* scope. Looking up in 4609 ;; outer scopes via tag-lookup would let an inner-scope 4610 ;; definition mutate an outer-scope same-tag ctype via 4611 ;; complete-agg!. Restrict the reuse to the top frame, and 4612 ;; only when the existing tag is still incomplete (size < 0); 4613 ;; otherwise this is an attempted redefinition. 4614 (let* ((ex (and tag (alist-ref tag (car (ps-tags ps))))) 4615 (ct (cond ((and ex (eq? (ctype-kind ex) kind) 4616 (< (ctype-size ex) 0)) ex) 4617 ((and ex (eq? (ctype-kind ex) kind)) 4618 (die (tok-loc (peek ps)) "agg redefinition" tag)) 4619 (else (let ((c (%ctype kind -1 -1 4620 (list (or tag #f) #f '())))) 4621 (if tag (tag-bind! ps tag c)) c)))) 4622 (fields (parse-struct-fields ps kind))) 4623 (expect-punct ps 'rbrace) 4624 (complete-agg! ct kind tag fields) ct)) 4625 (tag (let ((ex (tag-lookup ps tag))) 4626 (cond (ex ex) 4627 (else (let ((c (%ctype kind -1 -1 4628 (list tag #f '())))) 4629 (tag-bind! ps tag c) c))))) 4630 (else (die (tok-loc (peek ps)) "anon agg"))))) 4631 4632 (define (parse-struct-fields ps kind) 4633 ;; For unions, every field stays at offset 0; complete-agg! takes the 4634 ;; max of field sizes for the union's overall size. 4635 (let ((struct? (eq? kind 'struct))) 4636 (let loop ((acc '()) (off 0)) 4637 (cond 4638 ((at-punct? ps 'rbrace) (reverse acc)) 4639 (else 4640 (let-values (((_sto bty) (parse-decl-spec ps))) 4641 (let dl ((acc2 acc) (o2 off)) 4642 (let*-values (((nm ty) (parse-declarator ps bty))) 4643 (let* ((al (max (ctype-align ty) 1)) 4644 (sz (ctype-size ty)) 4645 (oa (if struct? (align-up o2 al) 0)) 4646 (next (if struct? (+ oa (max sz 0)) 0))) 4647 (cond 4648 ((at-punct? ps 'comma) 4649 (advance ps) 4650 (dl (cons (list nm ty oa) acc2) next)) 4651 ((at-punct? ps 'semi) 4652 (advance ps) 4653 (loop (cons (list nm ty oa) acc2) next)) 4654 (else (die (tok-loc (peek ps)) "field")))))))))))) 4655 4656 (define (complete-agg! ct k tag fs) 4657 (let* ((ma (let m ((xs fs) (a 1)) 4658 (if (null? xs) a 4659 (m (cdr xs) (max a (ctype-align (cadr (car xs)))))))) 4660 (last (let l ((xs fs) (e 0)) 4661 (if (null? xs) e 4662 (let* ((f (car xs)) (off (car (cddr f))) 4663 (sz (ctype-size (cadr f)))) 4664 (l (cdr xs) (max e (+ off (max sz 0)))))))) 4665 (sz (cond ((eq? k 'union) 4666 (let u ((xs fs) (s 0)) 4667 (if (null? xs) s 4668 (u (cdr xs) 4669 (max s (ctype-size (cadr (car xs)))))))) 4670 (else (align-up last ma))))) 4671 (ctype-size-set! ct sz) 4672 (ctype-align-set! ct ma) 4673 (ctype-ext-set! ct (list tag #t fs)) 4674 ;; Phase 3: if `ct` is a forward-declared struct/union that lived in 4675 ;; main from a prior decl, its newly-set ext lives in scratch and 4676 ;; would dangle on reset-scratch-heap!. Track it so promote-roots! 4677 ;; can rewrite ext in main before the boundary fires. Scratch-resident 4678 ;; ct (defined and completed in this decl) is promoted normally via 4679 ;; the tag walker. 4680 (set! %promote-pending-completions 4681 (cons ct %promote-pending-completions)))) 4682 4683 (define (parse-enum-spec ps) 4684 (advance ps) 4685 (let ((tag (pmatch (peek ps) 4686 (($ tok? (kind IDENT)) (tok-value (advance ps))) 4687 (else #f)))) 4688 (cond 4689 ((at-punct? ps 'lbrace) 4690 (advance ps) 4691 ;; Parse all members first, then construct the enum ctype with 4692 ;; the final members list and tag-bind it. Members reference 4693 ;; earlier enum-consts via scope-lookup (not via the enum tag), 4694 ;; so deferring tag-bind! is safe. 4695 (let loop ((vs '()) (nv 0)) 4696 (cond 4697 ((at-punct? ps 'rbrace) 4698 (advance ps) 4699 (let ((ct (%ctype 'enum 4 4 (list tag (reverse vs))))) 4700 (if tag (tag-bind! ps tag ct)) 4701 ct)) 4702 (else 4703 (let* ((nt (advance ps)) (nm (tok-value nt)) 4704 (val (cond ((at-punct? ps 'assign) 4705 (advance ps) (parse-const-int ps)) 4706 (else nv)))) 4707 (scope-bind! ps nm 4708 (%sym nm 'enum-const #f %t-i32 val #t)) 4709 (cond ((at-punct? ps 'comma) (advance ps)) 4710 ((at-punct? ps 'rbrace) #t) 4711 (else (die (tok-loc (peek ps)) "enum"))) 4712 (loop (cons (cons nm val) vs) (+ val 1))))))) 4713 (tag (let ((e (tag-lookup ps tag))) 4714 (cond (e e) 4715 (else (let ((c (%ctype 'enum 4 4 (list tag '())))) 4716 (tag-bind! ps tag c) c))))) 4717 (else (die (tok-loc (peek ps)) "enum"))))) 4718 4719 ;; ==================================================================== 4720 ;; Integer constant expressions (C99 §6.6). 4721 ;; 4722 ;; parse-const-expr ps -> (value . ctype) 4723 ;; A self-contained walker that never touches cg. The four call sites 4724 ;; that demand an integer constant expression — array bounds, enum 4725 ;; initializers, case labels, file-scope/static initializers — all go 4726 ;; through here. Returns a (value . ctype) pair so a final cast can 4727 ;; truncate at the target type's width (e.g. `(int)(unsigned char)257` 4728 ;; needs the inner cast to mask off to u8 before the outer relabel). 4729 ;; 4730 ;; Operand surface: integer / character literals, enum constants, 4731 ;; sizeof(TYPENAME), unary + - ~ !, binary + - * / % << >> & | ^, 4732 ;; compare < <= > >= == !=, logical && || (short-circuit at the value 4733 ;; layer; both sides are still parsed so the token stream advances), 4734 ;; ternary ?:, cast to integer type, parenthesization. Anything else 4735 ;; dies. Floats / function calls / address-of / non-const idents / VLAs 4736 ;; are out of scope. 4737 ;; ==================================================================== 4738 4739 ;; Truncate VALUE to the width and signedness of CT. Integer types only 4740 ;; — pointer/array/etc. operands abort upstream. 4741 (define (%const-trunc value ct) 4742 (let* ((sz (ctype-size ct)) 4743 (k (ctype-kind ct)) 4744 (mask (cond ((<= sz 0) 0) 4745 ((= sz 1) #xff) 4746 ((= sz 2) #xffff) 4747 ((= sz 4) #xffffffff) 4748 (else -1)))) 4749 (cond 4750 ;; bool: 0 or 1. 4751 ((eq? k 'bool) (if (= value 0) 0 1)) 4752 ;; 8-byte integers — value already fits in scheme's bignum. 4753 ((or (eq? k 'i64) (eq? k 'u64)) 4754 (cond ((eq? k 'u64) 4755 ;; Mask to 64 bits without losing sign on negative values. 4756 (bit-and value #xffffffffffffffff)) 4757 (else value))) 4758 ((%ctype-unsigned? ct) (bit-and value mask)) 4759 (else 4760 ;; Signed: mask to width, then sign-extend if top bit is set. 4761 (let* ((m (bit-and value mask)) 4762 (sign-bit (arithmetic-shift 1 (- (* sz 8) 1)))) 4763 (cond ((= 0 (bit-and m sign-bit)) m) 4764 (else (- m (arithmetic-shift 1 (* sz 8)))))))))) 4765 4766 ;; Usual arithmetic conversions on (value . ctype) pairs. Both operands 4767 ;; have already been integer-promoted (≤ int → int) by the caller. 4768 ;; Returns three values: truncated a, truncated b, and the shared result 4769 ;; ctype. 4770 (define (%const-arith-conv ap bp) 4771 (let* ((av (car ap)) (at (cdr ap)) 4772 (bv (car bp)) (bt (cdr bp)) 4773 (rt (%const-arith-conv-type at bt))) 4774 (values (%const-trunc av rt) (%const-trunc bv rt) rt))) 4775 4776 (define (%const-arith-conv-type at bt) 4777 ;; Pick the wider type; tie-break on unsigned. Caller has already 4778 ;; promoted both to >= int width. 4779 (let ((sa (ctype-size at)) (sb (ctype-size bt))) 4780 (cond 4781 ((> sa sb) at) 4782 ((> sb sa) bt) 4783 ((%ctype-unsigned? at) at) 4784 ((%ctype-unsigned? bt) bt) 4785 (else at)))) 4786 4787 (define (%const-promote vp) 4788 ;; Integer promotion (C11 §6.3.1.1): types narrower than int 4789 ;; (i8/u8/i16/u16/bool) widen to (signed) int — every value of an 4790 ;; unsigned sub-int type fits in int on this target, so the promotion 4791 ;; rank picks signed int, not unsigned int. This matters for the 4792 ;; usual arithmetic conversions in cross-signedness comparisons, 4793 ;; e.g. ((unsigned char)-1 < (int)-1) must promote LHS to int 255 4794 ;; (not u32 0xff) so the result is 0, not 1. 4795 (let* ((v (car vp)) (ct (cdr vp)) 4796 (sz (ctype-size ct))) 4797 (cond 4798 ((< sz 4) (cons (%const-trunc v %t-i32) %t-i32)) 4799 (else vp)))) 4800 4801 (define (%const-bool? vp) (not (= 0 (car vp)))) 4802 4803 (define (parse-const-expr ps) (parse-const-cond ps)) 4804 4805 ;; Ternary (right-associative). Per C11 §6.6 ¶3 + §6.5.15/4 only the 4806 ;; chosen branch is evaluated; the other need not be a valid constant 4807 ;; expression (e.g. `1 ? 2 : 1/0` must yield 2, not abort). The dead 4808 ;; arm is skipped via %const-skip-cond-{mid,rhs}, like the &&/|| 4809 ;; short-circuit paths above. 4810 (define (parse-const-cond ps) 4811 (let ((c (parse-const-lor ps))) 4812 (cond 4813 ((at-punct? ps 'qmark) 4814 (advance ps) 4815 (cond 4816 ((%const-bool? c) 4817 (let* ((t (parse-const-expr ps)) 4818 (_ (expect-punct ps 'colon))) 4819 (%const-skip-dead-arm ps) 4820 t)) 4821 (else 4822 (%const-skip-dead-arm ps) 4823 (expect-punct ps 'colon) 4824 (parse-const-cond ps)))) 4825 (else c)))) 4826 4827 ;; Generic top-level punct scanner used by skip-rhs / skip-cond helpers. 4828 ;; Walks paren/bracket depth (a closing bracket at d=0 always stops) and 4829 ;; optionally tracks ternary `?` depth. STOP? receives the punct value 4830 ;; v at top-level (d=0, q=0 when q-aware?) and returns #t to stop. With 4831 ;; Q-AWARE? = #t, a `?` at top-level opens a nested ternary and a 4832 ;; matching `:` (q>0) closes it; the scanner stops on `:` only when q=0. 4833 (define (%punct-scan ps stop? q-aware?) 4834 (let lp ((d 0) (q 0)) 4835 (let ((t (peek ps))) 4836 (cond 4837 ((eq? (tok-kind t) 'EOF) #t) 4838 ((not (eq? (tok-kind t) 'PUNCT)) 4839 (advance ps) (lp d q)) 4840 (else 4841 (let ((v (tok-value t))) 4842 (cond 4843 ((or (eq? v 'lparen) (eq? v 'lbrack)) 4844 (advance ps) (lp (+ d 1) q)) 4845 ((or (eq? v 'rparen) (eq? v 'rbrack)) 4846 (cond ((zero? d) #t) 4847 (else (advance ps) (lp (- d 1) q)))) 4848 ((and q-aware? (zero? d) (eq? v 'qmark)) 4849 (advance ps) (lp d (+ q 1))) 4850 ((and q-aware? (zero? d) (> q 0) (eq? v 'colon)) 4851 (advance ps) (lp d (- q 1))) 4852 ((and (zero? d) (or (not q-aware?) (zero? q)) (stop? v)) 4853 #t) 4854 (else (advance ps) (lp d q))))))))) 4855 4856 ;; Skip the dead arm of a ternary. Same scanner whether we're skipping 4857 ;; the middle (cond was false; will then expect-punct `:` and parse arm 4858 ;; 3) or the third (cond was true; arm 2 already parsed and `:` already 4859 ;; consumed). Both stop at top-level `:` / `,` / `;` / `}` with no 4860 ;; open inner `?`; nested `?:` pairs are absorbed. 4861 (define (%const-skip-dead-arm ps) 4862 (%punct-scan ps 4863 (lambda (v) 4864 (or (eq? v 'colon) (eq? v 'comma) (eq? v 'semi) (eq? v 'rbrace))) 4865 #t)) 4866 4867 ;; Generic left-associative binary level. 4868 ;; ops: alist of punct-sym → (vp vp → vp). 4869 (define (%const-binl ps next ops) 4870 (let lp ((a (next ps))) 4871 (let* ((t (peek ps)) 4872 (hit (and (eq? (tok-kind t) 'PUNCT) 4873 (alist-ref/eq (tok-value t) ops)))) 4874 (cond ((not hit) a) 4875 (else (advance ps) (lp (hit a (next ps)))))))) 4876 4877 ;; Arithmetic combiner: promote both, arith-conv, apply fn, truncate. 4878 (define (%const-arith-op fn a b) 4879 (let-values (((av bv rt) (%const-arith-conv (%const-promote a) (%const-promote b)))) 4880 (cons (%const-trunc (fn av bv) rt) rt))) 4881 4882 ;; Like %const-arith-op but rejects a zero divisor. 4883 (define (%const-div-op fn a b) 4884 (let-values (((av bv rt) (%const-arith-conv (%const-promote a) (%const-promote b)))) 4885 (cond ((= bv 0) (die #f "const-expr: divide by zero"))) 4886 (cons (%const-trunc (fn av bv) rt) rt))) 4887 4888 ;; Comparison combiner: result is always (0-or-1 . %t-i32). 4889 (define (%const-cmp-op fn a b) 4890 (let-values (((av bv _rt) (%const-arith-conv (%const-promote a) (%const-promote b)))) 4891 (cons (if (fn av bv) 1 0) %t-i32))) 4892 4893 ;; Short-circuit per C11 §6.5.13/14 ¶4: rhs is not evaluated when the 4894 ;; lhs determines the result. Required so `1 || (1/0)` and 4895 ;; `0 && (1/0)` yield 1/0 rather than aborting on divide-by-zero. 4896 (define (parse-const-lor ps) 4897 (let lp ((a (parse-const-land ps))) 4898 (cond 4899 ((at-punct? ps 'lor) 4900 (advance ps) 4901 (cond 4902 ((%const-bool? a) 4903 (%const-skip-lor-rhs ps) 4904 (lp (cons 1 %t-i32))) 4905 (else 4906 (let ((b (parse-const-land ps))) 4907 (lp (cons (if (%const-bool? b) 1 0) %t-i32)))))) 4908 (else a)))) 4909 4910 (define (parse-const-land ps) 4911 (let lp ((a (parse-const-bor ps))) 4912 (cond 4913 ((at-punct? ps 'land) 4914 (advance ps) 4915 (cond 4916 ((not (%const-bool? a)) 4917 (%const-skip-land-rhs ps) 4918 (lp (cons 0 %t-i32))) 4919 (else 4920 (let ((b (parse-const-bor ps))) 4921 (lp (cons (if (%const-bool? b) 1 0) %t-i32)))))) 4922 (else a)))) 4923 4924 ;; Skip the rhs of a short-circuited && / ||. The rhs grammar is 4925 ;; the operand level of the operator: parse-const-bor for &&, 4926 ;; parse-const-land for ||. We can't just call those parsers because 4927 ;; the rhs may itself be invalid as a constant expression (e.g. 4928 ;; `1/0`); instead, scan tokens at paren/brack depth 0 until we hit 4929 ;; another operator at the same-or-lower binding level, comma, 4930 ;; semicolon, colon, qmark, rbrace, rbrack, rparen, or EOF. 4931 (define (%const-skip-land-rhs ps) 4932 ;; rhs of && is a parse-const-bor — stop on `&&`, `||`, `?`, `:`, 4933 ;; `,`, `;`, `}`, and any closing/separator at depth 0. 4934 (%punct-scan ps 4935 (lambda (v) 4936 (or (eq? v 'land) (eq? v 'lor) (eq? v 'qmark) (eq? v 'colon) 4937 (eq? v 'comma) (eq? v 'semi) (eq? v 'rbrace))) 4938 #f)) 4939 (define (%const-skip-lor-rhs ps) 4940 ;; rhs of || is a parse-const-land — stop on `||` (left-assoc), 4941 ;; `?`, `:`, `,`, `;`, `}`. `&&` binds TIGHTER than `||`, so it is 4942 ;; absorbed into the rhs and we do NOT stop on it. 4943 (%punct-scan ps 4944 (lambda (v) 4945 (or (eq? v 'lor) (eq? v 'qmark) (eq? v 'colon) 4946 (eq? v 'comma) (eq? v 'semi) (eq? v 'rbrace))) 4947 #f)) 4948 4949 (define (parse-const-bor ps) 4950 (%const-binl ps parse-const-bxor (list (cons 'bar (lambda (a b) (%const-arith-op bit-or a b)))))) 4951 (define (parse-const-bxor ps) 4952 (%const-binl ps parse-const-band (list (cons 'caret (lambda (a b) (%const-arith-op bit-xor a b)))))) 4953 (define (parse-const-band ps) 4954 (%const-binl ps parse-const-eq (list (cons 'amp (lambda (a b) (%const-arith-op bit-and a b)))))) 4955 4956 (define (parse-const-eq ps) 4957 (%const-binl ps parse-const-rel 4958 (list (cons 'eq2 (lambda (a b) (%const-cmp-op = a b))) 4959 (cons 'ne (lambda (a b) (%const-cmp-op (lambda (x y) (not (= x y))) a b)))))) 4960 4961 (define (parse-const-rel ps) 4962 (%const-binl ps parse-const-shift 4963 (list (cons 'lt (lambda (a b) (%const-cmp-op < a b))) 4964 (cons 'le (lambda (a b) (%const-cmp-op <= a b))) 4965 (cons 'gt (lambda (a b) (%const-cmp-op > a b))) 4966 (cons 'ge (lambda (a b) (%const-cmp-op >= a b)))))) 4967 4968 ;; Shift combiner: result type is the (promoted) lhs type — rhs is 4969 ;; just a count, promoted independently. SIGN selects shl (+1) or shr (-1). 4970 (define (%const-shift-op sign a b) 4971 (let* ((ap (%const-promote a)) 4972 (bp (%const-promote b)) 4973 (rt (cdr ap))) 4974 (cons (%const-trunc (arithmetic-shift (car ap) (* sign (car bp))) rt) 4975 rt))) 4976 4977 (define (parse-const-shift ps) 4978 (%const-binl ps parse-const-add 4979 (list (cons 'shl (lambda (a b) (%const-shift-op 1 a b))) 4980 (cons 'shr (lambda (a b) (%const-shift-op -1 a b)))))) 4981 4982 (define (parse-const-add ps) 4983 (%const-binl ps parse-const-mul 4984 (list (cons 'plus (lambda (a b) (%const-arith-op + a b))) 4985 (cons 'minus (lambda (a b) (%const-arith-op - a b)))))) 4986 4987 (define (parse-const-mul ps) 4988 (%const-binl ps parse-const-cast 4989 (list (cons 'star (lambda (a b) (%const-arith-op * a b))) 4990 (cons 'slash (lambda (a b) (%const-div-op quotient a b))) 4991 (cons 'pct (lambda (a b) (%const-div-op remainder a b)))))) 4992 4993 (define (parse-const-cast ps) 4994 ;; (typename) operand — distinguished from ( expr ) by paren-is-group?. 4995 ;; Pointer casts are accepted only as a type re-tag — the integer 4996 ;; offset rides through unchanged. This is what the offsetof idiom 4997 ;; `(T *)0` and the outer `(size_t) <ptr-const>` need; we do not 4998 ;; admit general pointer arithmetic in const-expr. 4999 (cond 5000 ((at-punct? ps 'lparen) 5001 (cond 5002 ((%const-paren-is-cast? ps) 5003 (advance ps) 5004 (let*-values (((_sto bty) (parse-decl-spec ps)) 5005 ((_n ty) (parse-declarator ps bty))) 5006 (expect-punct ps 'rparen) 5007 (cond 5008 ((%ctype-int? ty) 5009 (let ((v (parse-const-cast ps))) 5010 (cons (%const-trunc (car v) ty) ty))) 5011 ((eq? (ctype-kind ty) 'ptr) 5012 (let ((v (parse-const-cast ps))) 5013 (cons (car v) ty))) 5014 (else 5015 (die (tok-loc (peek ps)) 5016 "const-expr: cast must be integer or pointer" 5017 (ctype-kind ty)))))) 5018 (else (parse-const-unary ps)))) 5019 (else (parse-const-unary ps)))) 5020 5021 (define (%const-paren-is-cast? ps) 5022 ;; A '(' starts a cast iff the following token kicks off a type-name. 5023 (%tok-decl-start? ps (peek2 ps))) 5024 5025 (define (%ctype-int? ty) 5026 (let ((k (ctype-kind ty))) 5027 (or (eq? k 'i8) (eq? k 'u8) (eq? k 'i16) (eq? k 'u16) 5028 (eq? k 'i32) (eq? k 'u32) (eq? k 'i64) (eq? k 'u64) 5029 (eq? k 'bool) (eq? k 'enum)))) 5030 5031 (define (parse-const-unary ps) 5032 (let ((t (peek ps))) 5033 (pmatch t 5034 (($ tok? (kind PUNCT) (value plus)) 5035 (advance ps) (%const-promote (parse-const-cast ps))) 5036 (($ tok? (kind PUNCT) (value minus)) 5037 (advance ps) 5038 (let* ((vp (%const-promote (parse-const-cast ps))) 5039 (rt (cdr vp))) 5040 (cons (%const-trunc (- 0 (car vp)) rt) rt))) 5041 (($ tok? (kind PUNCT) (value tilde)) 5042 (advance ps) 5043 (let* ((vp (%const-promote (parse-const-cast ps))) 5044 (rt (cdr vp))) 5045 (cons (%const-trunc (bit-not (car vp)) rt) rt))) 5046 (($ tok? (kind PUNCT) (value bang)) 5047 (advance ps) 5048 (let ((vp (parse-const-cast ps))) 5049 (cons (if (%const-bool? vp) 0 1) %t-i32))) 5050 (($ tok? (kind PUNCT) (value amp)) 5051 ;; Address-of in const-expr context. Restricted to the offsetof 5052 ;; idiom: a null-pointer-typed base reached via (T *)0 (with 5053 ;; optional grouping/deref) followed by ->/. field selectors. 5054 ;; The integer value is the running byte offset; '&' wraps the 5055 ;; designator's type in a pointer for any outer integer cast to 5056 ;; consume. 5057 (advance ps) 5058 (let* ((dp (%const-parse-addrof-postfix ps))) 5059 (cons (car dp) (%mk-ptr (cdr dp))))) 5060 (($ tok? (kind KW) (value sizeof)) 5061 (advance ps) 5062 (cond 5063 ((at-punct? ps 'lparen) 5064 (advance ps) 5065 (cond 5066 ((%const-tok-is-decl? ps) 5067 (let*-values (((_sto bty) (parse-decl-spec ps)) 5068 ((_n ty) (parse-declarator ps bty))) 5069 (expect-punct ps 'rparen) 5070 (cons (max (ctype-size ty) 0) %t-u64))) 5071 (else 5072 ;; sizeof(EXPR) in const-expr context. Operand is not 5073 ;; evaluated (C11 §6.5.3.4) — snapshot the cg, parse the 5074 ;; expr through the regular parser to recover its ctype, 5075 ;; then rewind to discard any emission/vstack pushes. 5076 (cons (%const-sizeof-expr ps #t) %t-u64)))) 5077 (else 5078 ;; `sizeof EXPR` (no parens). Same no-eval rule. 5079 (cons (%const-sizeof-expr ps #f) %t-u64)))) 5080 (else (parse-const-primary ps))))) 5081 5082 ;; Does TOK begin a type-name? Type specifiers, qualifiers, 5083 ;; struct/union/enum tags, and typedef-name idents. Storage classes 5084 ;; (auto/register/static/extern/typedef) are NOT included — those 5085 ;; appear only at declaration position; callers that need them 5086 ;; (e.g. stmt-starts-decl?) check separately. 5087 (define (%tok-decl-start? ps t) 5088 (pmatch t 5089 (($ tok? (kind KW) (value ,v)) 5090 (or (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int) 5091 (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned) 5092 (eq? v '_Bool) (eq? v 'float) (eq? v 'double) 5093 (eq? v '_Complex) (eq? v '_Imaginary) 5094 (eq? v 'struct) (eq? v 'union) (eq? v 'enum) 5095 (eq? v 'const) (eq? v 'volatile) (eq? v 'restrict) 5096 (eq? v 'inline))) 5097 (($ tok? (kind IDENT) (value ,n)) (typedef? ps n)) 5098 (else #f))) 5099 5100 (define (%const-tok-is-decl? ps) (%tok-decl-start? ps (peek ps))) 5101 5102 (define (parse-const-primary ps) 5103 (let ((t (peek ps))) 5104 (pmatch t 5105 (($ tok? (kind INT) (value ,v)) 5106 (advance ps) 5107 ;; Untyped INT literals ride as i32. Suffixes (L, LL, U) aren't 5108 ;; preserved through to the parser, but const-expr operands at 5109 ;; the granularity 118 cares about all fit in i32. 5110 (cons v %t-i32)) 5111 (($ tok? (kind CHAR) (value ,v)) 5112 (advance ps) 5113 ;; Character constants have type int in C. 5114 (cons v %t-i32)) 5115 (($ tok? (kind PUNCT) (value lparen)) 5116 (advance ps) 5117 (let ((v (parse-const-expr ps))) 5118 (expect-punct ps 'rparen) v)) 5119 (($ tok? (kind IDENT) (value ,n)) 5120 (let ((sm (scope-lookup ps n))) 5121 (cond ((and sm (eq? (sym-kind sm) 'enum-const)) 5122 (advance ps) (cons (sym-slot sm) %t-i32)) 5123 (else (die (tok-loc t) "const-expr: not a constant" n))))) 5124 (else (die (tok-loc t) "const-expr: bad operand" 5125 (tok-value t)))))) 5126 5127 ;; ==================================================================== 5128 ;; offsetof support inside const-expr. 5129 ;; 5130 ;; Recognises `&((T *)0)->FIELD`, `&(*(T *)0).FIELD`, and chains thereof 5131 ;; — the only address-of idioms that show up in static initializers 5132 ;; (tcc.c options_W[] / options_f[] / options_m[] tables, and any 5133 ;; offsetof macro expansion of the same shape). Each helper threads a 5134 ;; (offset . ctype) pair: integer byte offset of the running designator 5135 ;; from the null base, plus the lvalue's ctype. Field lookup reuses 5136 ;; %cg-find-field, so anonymous union/struct members work the same way 5137 ;; as in regular field access. 5138 ;; ==================================================================== 5139 5140 (define (%const-parse-addrof-postfix ps) 5141 ;; postfix: primary ( -> FIELD | . FIELD )* 5142 (let lp ((p (%const-parse-addrof-primary ps))) 5143 (pmatch (peek ps) 5144 (($ tok? (kind PUNCT) (value arrow)) 5145 (advance ps) (lp (%const-addrof-arrow ps p))) 5146 (($ tok? (kind PUNCT) (value dot)) 5147 (advance ps) (lp (%const-addrof-dot ps p))) 5148 (else p)))) 5149 5150 (define (%const-parse-addrof-primary ps) 5151 ;; primary: ( T )expr ; pointer cast — the offsetof base 5152 ;; | ( postfix ) ; grouping 5153 ;; | * primary ; deref 5154 (cond 5155 ((at-punct? ps 'lparen) 5156 (cond 5157 ((%const-paren-is-cast? ps) 5158 (let ((cv (parse-const-cast ps))) 5159 (cond 5160 ((not (eq? (ctype-kind (cdr cv)) 'ptr)) 5161 (die #f "const-expr: addr-of: head must be a pointer cast" 5162 (ctype-kind (cdr cv))))) 5163 cv)) 5164 (else 5165 (advance ps) 5166 (let ((r (%const-parse-addrof-postfix ps))) 5167 (expect-punct ps 'rparen) r)))) 5168 ((at-punct? ps 'star) 5169 (advance ps) 5170 (let ((h (%const-parse-addrof-primary ps))) 5171 (cond 5172 ((not (eq? (ctype-kind (cdr h)) 'ptr)) 5173 (die #f "const-expr: addr-of: '*' on non-pointer" 5174 (ctype-kind (cdr h))))) 5175 (cons (car h) (ctype-ext (cdr h))))) 5176 (else 5177 (die (tok-loc (peek ps)) "const-expr: addr-of: unexpected token" 5178 (tok-value (peek ps)))))) 5179 5180 (define (%const-addrof-arrow ps p) 5181 (let* ((off (car p)) (ty (cdr p))) 5182 (cond ((not (eq? (ctype-kind ty) 'ptr)) 5183 (die (tok-loc (peek ps)) "const-expr: -> on non-pointer" 5184 (ctype-kind ty)))) 5185 (let* ((sty (ctype-ext ty)) (sk (ctype-kind sty))) 5186 (cond ((not (or (eq? sk 'struct) (eq? sk 'union))) 5187 (die (tok-loc (peek ps)) 5188 "const-expr: -> target not aggregate" sk))) 5189 (%const-addrof-field ps sty off)))) 5190 5191 (define (%const-addrof-dot ps p) 5192 (let* ((off (car p)) (ty (cdr p)) (k (ctype-kind ty))) 5193 (cond ((not (or (eq? k 'struct) (eq? k 'union))) 5194 (die (tok-loc (peek ps)) 5195 "const-expr: . on non-aggregate" k))) 5196 (%const-addrof-field ps ty off))) 5197 5198 (define (%const-addrof-field ps sty base-off) 5199 (let ((nt (peek ps))) 5200 (cond ((not (eq? (tok-kind nt) 'IDENT)) 5201 (die (tok-loc nt) 5202 "const-expr: field selector needs an identifier" 5203 (tok-value nt)))) 5204 (advance ps) 5205 (let* ((fields (car (cddr (ctype-ext sty)))) 5206 (f (%cg-find-field fields (tok-value nt)))) 5207 (cond ((not f) (die (tok-loc nt) 5208 "const-expr: no such field" 5209 (tok-value nt)))) 5210 (cons (+ base-off (car (cddr f))) (cadr f))))) 5211 5212 ;; sizeof EXPR / sizeof(EXPR) in const-expr context. Delegates to the 5213 ;; regular expression parser under a cg snapshot/rewind — same contract 5214 ;; as parse-unary's sizeof: the operand is parsed to learn its type but 5215 ;; not evaluated, so any emission or vstack push from the parse is 5216 ;; discarded. Returns the operand's byte size as a non-negative int. 5217 ;; If `paren?`, consumes the closing `)` after parsing. 5218 (define (%const-sizeof-expr ps paren?) 5219 (cond 5220 ((not (ps-cg ps)) 5221 (die #f "#if: sizeof of expression not valid in preprocessor context")) 5222 (else 5223 (let ((tag (cg-snapshot (ps-cg ps)))) 5224 (cond (paren? (parse-expr ps) (expect-punct ps 'rparen)) 5225 (else (parse-unary ps))) 5226 (let* ((tp (cg-top (ps-cg ps))) 5227 (sz (max (ctype-size (opnd-type tp)) 0))) 5228 (cg-rewind (ps-cg ps) tag) 5229 sz))))) 5230 5231 ;; Convenience: returns the integer value alone (callers that don't 5232 ;; need the type half of parse-const-expr's (value . ctype) result). 5233 (define (parse-const-int ps) (car (parse-const-expr ps))) 5234 5235 (define (parse-declarator ps base) 5236 ;; Returns (values name type). 5237 ((cdr (parse-decl-cont ps)) base 5238 (lambda (n t) (values n t)))) 5239 5240 (define (parse-decl-cont ps) 5241 (pmatch (peek ps) 5242 (($ tok? (kind KW) (value __attribute__)) 5243 (skip-gnu-attribute! ps) (parse-decl-cont ps)) 5244 (($ tok? (kind PUNCT) (value star)) 5245 (advance ps) (eat-cv-quals! ps) 5246 (let* ((r (parse-decl-cont ps)) (rf (cdr r))) 5247 (cons (car r) (lambda (b k) (rf (%mk-ptr b) k))))) 5248 (($ tok? (kind PUNCT) (value lparen)) 5249 (guard (paren-is-group? ps)) 5250 (advance ps) 5251 (let* ((i (parse-decl-cont ps)) (if- (cdr i))) 5252 (expect-punct ps 'rparen) 5253 (let ((s (parse-decl-suf-cont ps))) 5254 (cons (car i) (lambda (b k) (if- (s b) k)))))) 5255 (($ tok? (kind IDENT) (value ,n)) 5256 (advance ps) 5257 (let ((s (parse-decl-suf-cont ps))) 5258 (cons n (lambda (b k) (k n (s b)))))) 5259 (else 5260 (let ((s (parse-decl-suf-cont ps))) 5261 (cons #f (lambda (b k) (k #f (s b)))))))) 5262 5263 (define (parse-decl-suf-cont ps) 5264 ;; C declarator suffixes apply RIGHT-TO-LEFT (innermost first): 5265 ;; int a[2][3] ⇒ arr (arr int 3) 2 (outer dim 2) 5266 ;; not arr (arr int 2) 3 (which would treat the leftmost suffix as 5267 ;; outermost). The recursive structure builds the inner suffix's 5268 ;; result first, then this level wraps. 5269 (pmatch (peek ps) 5270 (($ tok? (kind PUNCT) (value lbrack)) 5271 (advance ps) 5272 ;; C99 §6.7.5.2 allows `static`, type qualifiers (const / 5273 ;; volatile / restrict), and `*` (variable length array 5274 ;; placeholder) inside array-of-T brackets in function 5275 ;; parameter declarators. We don't honour the qualifier 5276 ;; semantics — just consume them so the dimension expression 5277 ;; that follows parses. 5278 (let lp () 5279 (cond 5280 ((or (at-kw? ps 'const) (at-kw? ps 'volatile) 5281 (at-kw? ps 'restrict) (at-kw? ps 'static)) 5282 (advance ps) (lp)) 5283 (else #t))) 5284 (let* ((ln (cond ((at-punct? ps 'rbrack) -1) 5285 ((at-punct? ps 'star) (advance ps) -1) 5286 (else (parse-const-int ps)))) 5287 (_ (expect-punct ps 'rbrack)) 5288 (r (parse-decl-suf-cont ps))) 5289 (lambda (b) (%mk-arr (r b) ln)))) 5290 (($ tok? (kind PUNCT) (value lparen)) 5291 (advance ps) 5292 (let-values (((p v) (parse-fn-params ps))) 5293 (expect-punct ps 'rparen) 5294 (let ((r (parse-decl-suf-cont ps))) 5295 (lambda (b) (%mk-fn (r b) p v))))) 5296 (($ tok? (kind KW) (value __attribute__)) 5297 (skip-gnu-attribute! ps) (parse-decl-suf-cont ps)) 5298 (else (lambda (b) b)))) 5299 5300 (define (paren-is-group? ps) 5301 (pmatch (peek2 ps) 5302 (($ tok? (kind KW) (value ,v)) 5303 (cond ((or (eq? v 'void) (eq? v 'char) (eq? v 'short) 5304 (eq? v 'int) (eq? v 'long) (eq? v 'signed) 5305 (eq? v 'unsigned) (eq? v '_Bool) 5306 (eq? v 'float) (eq? v 'double) 5307 (eq? v '_Complex) (eq? v '_Imaginary) 5308 (eq? v 'struct) (eq? v 'union) (eq? v 'enum) 5309 (eq? v 'const) (eq? v 'volatile) 5310 (eq? v 'restrict) (eq? v 'static) 5311 (eq? v 'extern) (eq? v 'register)) #f) 5312 (else #t))) 5313 (($ tok? (kind IDENT) (value ,n)) 5314 (cond ((typedef? ps n) #f) (else #t))) 5315 (($ tok? (kind PUNCT) (value rparen)) #f) 5316 (($ tok? (kind PUNCT) (value star)) #t) 5317 (($ tok? (kind PUNCT) (value lparen)) #t) 5318 (($ tok? (kind PUNCT) (value lbrack)) #t) 5319 (else #f))) 5320 5321 (define (parse-fn-params ps) 5322 ;; Returns (values params variadic?). 5323 (cond 5324 ((at-punct? ps 'rparen) (values '() #f)) 5325 ((and (at-kw? ps 'void) 5326 (eq? (tok-kind (peek2 ps)) 'PUNCT) 5327 (eq? (tok-value (peek2 ps)) 'rparen)) 5328 (advance ps) (values '() #f)) 5329 (else 5330 (let loop ((acc '())) 5331 (cond 5332 ((at-punct? ps 'ellipsis) 5333 (advance ps) (values (reverse acc) #t)) 5334 (else 5335 (let*-values (((_sto bty) (parse-decl-spec ps)) 5336 ((nm ty) (parse-declarator ps bty))) 5337 (let ((ty2 (cond ((ctype-is-arr? ty) 5338 (%mk-ptr (car (ctype-ext ty)))) 5339 ((ctype-is-fn? ty) (%mk-ptr ty)) 5340 (else ty)))) 5341 (cond 5342 ((at-punct? ps 'comma) 5343 (advance ps) (loop (cons (cons nm ty2) acc))) 5344 ((at-punct? ps 'rparen) 5345 (values (reverse (cons (cons nm ty2) acc)) #f)) 5346 (else (die (tok-loc (peek ps)) "param"))))))))))) 5347 5348 ;; ==================================================================== 5349 ;; Phase 3: parse-decl-or-fn boundary — scratch by default, promote 5350 ;; surviving roots into main and reset scratch at each top-level decl. 5351 ;; 5352 ;; Promotion uses the prelude's generic deep-copy via 5353 ;; call-with-scratch-cycle; the per-decl identity-preserving map is 5354 ;; folded into a single deep-copy context shared across all roots. 5355 ;; 5356 ;; Per-decl mutable state retained on the cc side: 5357 ;; %promote-pending-completions: list of struct/union ctypes that 5358 ;; complete-agg! mutated during the current decl. These are 5359 ;; pre-existing main-heap records whose ext now points at scratch; 5360 ;; ext is rewritten via deep-copy before the scratch reset. 5361 ;; ==================================================================== 5362 5363 (define %promote-pending-completions '()) 5364 5365 (define (rewrite-pending-completions! ctx) 5366 (for-each 5367 (lambda (c) 5368 (cond ((heap-in-main? c) 5369 (ctype-ext-set! c (deep-copy ctx (ctype-ext c)))))) 5370 %promote-pending-completions) 5371 (set! %promote-pending-completions '())) 5372 5373 ;; Deep-copy each top-level world alist into main. The world-tags / 5374 ;; world-scope stacks contain only the file-scope frame at decl 5375 ;; boundaries (nested frames popped by scope-leave!); rebuild that head 5376 ;; frame and preserve the (always-empty) tail. world-str-pool is a flat 5377 ;; alist. deep-copy short-circuits prior-decl entries via heap-in-current?, 5378 ;; so this is linear in the new entries only. 5379 (define (promote-roots! w ctx) 5380 (rewrite-pending-completions! ctx) 5381 (let ((tn (world-tags w))) 5382 (world-tags-set! w (cons (deep-copy ctx (car tn)) (cdr tn)))) 5383 (let ((sn (world-scope w))) 5384 (world-scope-set! w (cons (deep-copy ctx (car sn)) (cdr sn)))) 5385 (world-str-pool-set! w (deep-copy ctx (world-str-pool w))) 5386 (world-tentatives-set! w (deep-copy ctx (world-tentatives w)))) 5387 5388 ;; Iter-buffer carryover. The pp-iter / lex-iter records themselves 5389 ;; live in main from cc-init; only their tok-iter-buf slots and the 5390 ;; pp-state's pending / out / cur-file / macros slots can hold 5391 ;; scratch-allocated content. Rewrite each in place via deep-copy. 5392 (define (promote-iter-buffers! pp-it ctx) 5393 (let* ((st (tok-iter-state pp-it)) 5394 (lex-it (pps-lex-iter st))) 5395 (tok-iter-buf-set! pp-it (deep-copy ctx (tok-iter-buf pp-it))) 5396 (cond (lex-it 5397 (tok-iter-buf-set! lex-it 5398 (deep-copy ctx (tok-iter-buf lex-it))))) 5399 (pps-up-pending-set! st (deep-copy ctx (pps-up-pending st))) 5400 (pps-out-buf-set! st (deep-copy ctx (pps-out-buf st))) 5401 (pps-cur-file-set! st (deep-copy ctx (pps-cur-file st))) 5402 (pps-cond-stack-set! st (deep-copy ctx (pps-cond-stack st))) 5403 (pps-macros-set! st (deep-copy ctx (pps-macros st))))) 5404 5405 (define (parse-translation-unit ps) 5406 (let loop () 5407 (let ((at-eof? #f)) 5408 (call-with-scratch-cycle 5409 (lambda () 5410 (cond 5411 ((eq? (tok-kind (peek ps)) 'EOF) (set! at-eof? #t)) 5412 (else 5413 (cond 5414 ((debug-log?) 5415 (let ((loc (tok-loc (peek ps)))) 5416 (debug-log "decl" "line" (loc-line loc) 5417 "heap" (heap-usage))))) 5418 (parse-decl-or-fn ps)))) 5419 (lambda () 5420 (cond 5421 ((not at-eof?) 5422 (let ((ctx (make-deep-copy-context))) 5423 (promote-roots! (ps-world ps) ctx) 5424 (promote-iter-buffers! (ps-iter ps) ctx)) 5425 ;; cg-fn-meta may hold scratch alist conses left over from 5426 ;; the just-finished function; cg-fn-begin would reset it, 5427 ;; but a trailing fn means it'd dangle past the reset. 5428 (cg-fn-meta-set! (ps-cg ps) '()))))) 5429 (cond 5430 (at-eof? #t) 5431 (else (loop)))))) 5432 5433 (define (parse-decl-or-fn ps) 5434 (let-values (((sto b) (parse-decl-spec ps))) 5435 (cond 5436 ((at-punct? ps 'semi) (advance ps) 'decl) 5437 (else 5438 (let-values (((n t) (parse-declarator ps b))) 5439 (cond 5440 ((and (ctype-is-fn? t) (at-punct? ps 'lbrace)) 5441 (parse-fn-body ps sto n t) 'fn) 5442 (else 5443 (handle-decl ps sto n t) 5444 (let lp () 5445 (cond 5446 ((at-punct? ps 'comma) 5447 (advance ps) 5448 (let-values (((n2 t2) (parse-declarator ps b))) 5449 (handle-decl ps sto n2 t2) (lp))) 5450 (else (expect-punct ps 'semi) 'decl)))))))))) 5451 5452 ;; ---- Block-scope inferred-length array length resolution ------------- 5453 ;; The token iterator buffers lookahead in a list (see tok-iter); we 5454 ;; can pull arbitrarily many tokens, then push them all back via 5455 ;; iter-unget!. We use that to peek the initializer that follows `=` 5456 ;; (without consuming it) and count its elements so cg-alloc-slot can 5457 ;; reserve the right number of bytes BEFORE the initializer-emission 5458 ;; loop runs (and starts spilling intermediate values into newly- 5459 ;; allocated frame slots). 5460 ;; 5461 ;; Only the OUTERMOST length is inferred per C99 6.7.8/22, so for 5462 ;; `int x[][3] = {{1,2,3},{4,5,6}};` we just count top-level 5463 ;; brace-or-comma groups; the inner brace groups don't matter. 5464 5465 (define (%peek-inferred-arr-init? ps) 5466 ;; Check whether the next-after-`=` token starts a brace-init or a 5467 ;; string-literal — the only initializer shapes that can resolve a 5468 ;; block-scope inferred-length array. We do NOT consume `=`; we 5469 ;; peek2 instead. 5470 (let ((t2 (peek2 ps))) 5471 (or (and (eq? (tok-kind t2) 'PUNCT) (eq? (tok-value t2) 'lbrace)) 5472 (eq? (tok-kind t2) 'STR)))) 5473 5474 (define (%resolve-inferred-arr-len ps ty) 5475 ;; Returns a fresh array ctype with the resolved length. Does NOT 5476 ;; consume the `=` or any of the initializer tokens — every token 5477 ;; pulled is unget back in original order. 5478 (let* ((eq-tok (iter-next (ps-iter ps))) ; consume `=` (will unget) 5479 (first (iter-next (ps-iter ps))) ; consume `{` or STR 5480 (collected (list first eq-tok)) ; head order: revs at end 5481 (count 5482 (cond 5483 ((eq? (tok-kind first) 'STR) 5484 ;; String length + NUL. 5485 (+ (bytevector-length (tok-value first)) 1)) 5486 (else 5487 ;; first is `{`. Count top-level commas + 1, ignoring a 5488 ;; trailing comma before `}`. Track brace depth so nested 5489 ;; `{` for sub-aggregates are skipped. 5490 (let lp ((depth 1) (n 0) (saw-elem? #f) (last-was-comma? #f) 5491 (acc collected)) 5492 (let ((t (iter-next (ps-iter ps)))) 5493 (let ((acc2 (cons t acc))) 5494 (cond 5495 ((eq? (tok-kind t) 'EOF) 5496 ;; Bail; let the real parser report the error 5497 ;; after we restore tokens. 5498 (%inferred-arr-restore! ps acc2) 5499 (die #f "init: unterminated brace")) 5500 ((and (eq? (tok-kind t) 'PUNCT) 5501 (eq? (tok-value t) 'lbrace)) 5502 (lp (+ depth 1) n #t #f acc2)) 5503 ((and (eq? (tok-kind t) 'PUNCT) 5504 (eq? (tok-value t) 'rbrace)) 5505 (cond 5506 ((= depth 1) 5507 ;; Done. Restore tokens (acc2 includes the 5508 ;; closing `}`). 5509 (%inferred-arr-restore! ps acc2) 5510 (cond ((not saw-elem?) 0) 5511 (last-was-comma? n) 5512 (else (+ n 1)))) 5513 (else (lp (- depth 1) n saw-elem? #f acc2)))) 5514 ((and (eq? (tok-kind t) 'PUNCT) 5515 (eq? (tok-value t) 'comma) 5516 (= depth 1)) 5517 (lp depth (+ n 1) saw-elem? #t acc2)) 5518 (else 5519 (lp depth n #t #f acc2))))))))) 5520 ) 5521 (cond 5522 ((eq? (tok-kind first) 'STR) 5523 (%inferred-arr-restore! ps collected))) 5524 (%init-fixed-arr-type ty count))) 5525 5526 (define (%inferred-arr-restore! ps acc) 5527 ;; acc is a stack of tokens in REVERSE consume order (most-recent 5528 ;; first). iter-unget! prepends one at a time, so iterating acc in 5529 ;; its current order pushes them back in the right sequence — 5530 ;; i.e. the oldest-consumed token ends up at the front of the 5531 ;; lookahead buffer. 5532 (let lp ((xs acc)) 5533 (cond 5534 ((null? xs) #t) 5535 (else (iter-unget! (ps-iter ps) (car xs)) (lp (cdr xs)))))) 5536 5537 (define (handle-decl ps sto n ty) 5538 (cond 5539 ((not n) (die #f "no name")) 5540 ((eq? sto 'typedef) 5541 (scope-bind! ps n (%sym n 'typedef #f ty #f #t))) 5542 ((ctype-is-fn? ty) 5543 (scope-bind! ps n 5544 (%sym n 'fn (or sto 'extern) ty #f #f))) 5545 ;; §I: block-scope `static` routes to a global with a name mangled 5546 ;; on the enclosing function so two functions can each have their 5547 ;; own `static int n;` without colliding. The sym's NAME holds the 5548 ;; mangled form (cg-push-sym / cg-emit-global both prefix "cc__" 5549 ;; onto sym-name to derive the emitted label); scope-bind!s key 5550 ;; remains the original identifier for source-level lookup. 5551 ((and (eq? sto 'static) (ps-fn-ctx ps)) 5552 (let* ((fname (fn-ctx-name (ps-fn-ctx ps))) 5553 (mangled (bytevector-append fname "__" n))) 5554 (cond 5555 ((at-punct? ps 'assign) 5556 (advance ps) 5557 ;; Parse init first so an inferred-length array picks up its 5558 ;; resolved type before sm is constructed (sym is immutable). 5559 (let-values (((pieces ty2) (parse-init-global ps ty))) 5560 (let ((sm (%sym mangled 'var 'static ty2 #f #t))) 5561 (scope-bind! ps n sm) 5562 (cg-emit-global (ps-cg ps) sm pieces)))) 5563 (else 5564 (let ((sm (%sym mangled 'var 'static ty #f #t))) 5565 (scope-bind! ps n sm) 5566 (cg-emit-global (ps-cg ps) sm #f)))))) 5567 (else 5568 (cond 5569 ((not (ps-fn-ctx ps)) 5570 ;; File-scope decls. Three cases: 5571 ;; (a) initializer present -> full external definition. 5572 ;; (b) `extern` no init -> declaration only. 5573 ;; (c) no init, no `extern` -> tentative definition. 5574 ;; (a) emits to .data immediately. (b) is recorded but emits 5575 ;; nothing. (c) is recorded as `defined?=#f` and added to 5576 ;; world-tentatives; cg-finish emits .bss at end of TU only if 5577 ;; no full definition appeared. This lets two `static int x;` 5578 ;; or a `static int x;` followed by `static int x = 1;` 5579 ;; coexist (C 6.9.2 tentative-def merge). 5580 (cond 5581 ((at-punct? ps 'assign) 5582 (advance ps) 5583 (let-values (((pieces ty2) (parse-init-global ps ty))) 5584 (let ((sm (%sym n 'var (or sto 'extern) ty2 #f #t))) 5585 (scope-bind! ps n sm) 5586 (cg-emit-global (ps-cg ps) sm pieces)))) 5587 ((eq? sto 'extern) 5588 (let ((sm (%sym n 'var 'extern ty #f #f))) 5589 (scope-bind! ps n sm) 5590 (cg-emit-extern (ps-cg ps) sm))) 5591 (else 5592 (let ((sm (%sym n 'var (or sto 'extern) ty #f #f))) 5593 (scope-bind! ps n sm) 5594 (cg-add-tentative! (ps-cg ps) n))))) 5595 (else 5596 ;; Block-scope inferred-length array (`int a[] = {…};` or 5597 ;; `char s[] = "…";`): peek the initializer past `=` to count 5598 ;; elements / measure the string and rebuild `ty` with the 5599 ;; resolved length BEFORE cg-alloc-slot. Otherwise the slot 5600 ;; is sized off a -1 / 0 ctype-size (capped to 1 byte) and 5601 ;; the per-element stores in parse-init-local-aggregate write 5602 ;; past frame-hi — the next %cg-spill-reg then allocates 5603 ;; right inside the array, clobbering elements. 5604 (let* ((ty (cond 5605 ((and (eq? (ctype-kind ty) 'arr) 5606 (< (cdr (ctype-ext ty)) 0) 5607 (at-punct? ps 'assign) 5608 (%peek-inferred-arr-init? ps)) 5609 (%resolve-inferred-arr-len ps ty)) 5610 (else ty))) 5611 (sz (max (ctype-size ty) 1)) 5612 (al (max (ctype-align ty) 1)) 5613 (sl (cg-alloc-slot (ps-cg ps) sz al)) 5614 (sm (%sym n 'var (or sto 'auto) ty sl #t))) 5615 (scope-bind! ps n sm) 5616 (cond 5617 ((at-punct? ps 'assign) 5618 (advance ps) 5619 (cond 5620 ;; Aggregate locals get the per-element store treatment. 5621 ((or (at-punct? ps 'lbrace) 5622 (and (eq? (ctype-kind ty) 'arr) 5623 (eq? (tok-kind (peek ps)) 'STR))) 5624 (parse-init-local-aggregate ps sm ty)) 5625 ;; Struct/union initializer from a non-brace expression 5626 ;; (typically a function call returning by-value). The 5627 ;; expr produces a struct lval; we copy bytes into the 5628 ;; destination slot. 5629 ((or (eq? (ctype-kind ty) 'struct) 5630 (eq? (ctype-kind ty) 'union)) 5631 (cg-push-sym (ps-cg ps) sm) 5632 (parse-expr-bp ps 4) 5633 (cg-copy-struct (ps-cg ps))) 5634 (else 5635 (cg-push-sym (ps-cg ps) sm) 5636 (parse-expr-bp ps 4) (rval! ps) 5637 (cg-cast (ps-cg ps) ty) 5638 (cg-assign (ps-cg ps)) 5639 (cg-pop (ps-cg ps))))) 5640 (else #t)))))))) 5641 5642 ;; ==================================================================== 5643 ;; Initializers (see CC.md §Variable initializers). 5644 ;; 5645 ;; parse-init-global ps ty 5646 ;; Reads the initializer following `=` for a file-scope or block-scope 5647 ;; static var of static-storage type `ty` and returns a list of 5648 ;; pieces suitable for cg-emit-global. See cg.scm §cg-emit-global for 5649 ;; the piece grammar. 5650 ;; 5651 ;; parse-init-local ps sm ty 5652 ;; Reads the initializer for an auto-storage variable bound to slot 5653 ;; sym `sm` and emits per-element store cg ops. Returns unspecified. 5654 ;; ==================================================================== 5655 5656 (define (%int->le-bv n nbytes) 5657 ;; N-byte little-endian encoding of integer n into a fresh bv. Bytes 5658 ;; >= sign-bit are filled by repeated >>8 (works for both signed and 5659 ;; unsigned because we only keep the low N bytes). 5660 (let ((out (make-bytevector nbytes 0))) 5661 (let loop ((i 0) (v n)) 5662 (cond 5663 ((= i nbytes) out) 5664 (else 5665 (bytevector-u8-set! out i (bit-and v 255)) 5666 (loop (+ i 1) (arithmetic-shift v -8))))))) 5667 5668 ;; File-scope compound literal (C99 §6.5.2.5). The bracketed initializer 5669 ;; following a typename in a static-storage initializer (or behind `&` 5670 ;; in same) is an unnamed object with static storage duration. Drive 5671 ;; the existing parse-init-global → cg-emit-global pipeline against a 5672 ;; synthetic sym whose label is freshly minted via %cg-fresh-cl-label. 5673 ;; Returns the emitted label; the caller wraps it in a (label-ref . LBL) 5674 ;; piece. The leading `(T)` and the storage-class disambiguation belong 5675 ;; to the caller — this entry point assumes peek = `{`. 5676 (define (%emit-fs-compound-literal ps ty) 5677 (let-values (((pieces ty2) (parse-init-global ps ty))) 5678 (let* ((lbl (%cg-fresh-cl-label (ps-cg ps))) 5679 ;; storage 'extern → %cg-sym-label returns the bare name 5680 ;; unchanged (no extra "cc__" prefix), so the emitted label 5681 ;; matches what we hand back to the caller. 5682 (sm (%sym lbl 'var 'extern ty2 #f #t))) 5683 (cg-emit-global (ps-cg ps) sm pieces) 5684 lbl))) 5685 5686 (define (%const-init-piece ps ty) 5687 ;; Parse a non-brace initializer expression for scalar type `ty` and 5688 ;; return a single piece. Recognised forms: 5689 ;; - INT (with optional unary +/-) -> N-byte LE bv 5690 ;; - enum-const IDENT -> N-byte LE bv 5691 ;; - &IDENT (address of a global var/fn) -> (label-ref . cc__name) 5692 ;; - &(T){...} (address of file-scope literal) -> (label-ref . cc__cl_N) 5693 ;; - IDENT (function name; decays to fn ptr) -> (label-ref . cc__name) 5694 ;; - STR (only for char* targets) -> (label-ref . string-pool-label) 5695 ;; - (T){...} (file-scope compound literal) -> (label-ref . cc__cl_N) 5696 (let ((t (peek ps))) 5697 (cond 5698 ;; Address initializer: &ident -> label-ref 5699 ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'amp)) 5700 (advance ps) 5701 (let ((it (peek ps))) 5702 (cond 5703 ((eq? (tok-kind it) 'IDENT) 5704 (advance ps) 5705 (let ((sm (scope-lookup ps (tok-value it)))) 5706 (cond 5707 ((not sm) (die (tok-loc it) "init: undecl" (tok-value it))) 5708 ((or (eq? (sym-kind sm) 'fn) 5709 (and (eq? (sym-kind sm) 'var) 5710 (or (eq? (sym-storage sm) 'static) 5711 (eq? (sym-storage sm) 'extern)))) 5712 (cons 'label-ref (%cg-sym-label sm))) 5713 (else 5714 (die (tok-loc it) "init: &x must reference a global" 5715 (tok-value it)))))) 5716 ;; &(T){...} — address of an unnamed file-scope compound 5717 ;; literal. Parse the typename, expect `{`, drive the 5718 ;; literal into .data, and yield its label. 5719 ((and (eq? (tok-kind it) 'PUNCT) (eq? (tok-value it) 'lparen) 5720 (%const-paren-is-cast? ps)) 5721 (advance ps) 5722 (let*-values (((_sto bty) (parse-decl-spec ps)) 5723 ((_n ty2) (parse-declarator ps bty))) 5724 (expect-punct ps 'rparen) 5725 (cond 5726 ((not (at-punct? ps 'lbrace)) 5727 (die (tok-loc (peek ps)) 5728 "init: &(T) must be followed by { ... }" 5729 (tok-value (peek ps))))) 5730 (cons 'label-ref (%emit-fs-compound-literal ps ty2)))) 5731 (else (die (tok-loc it) "init: &?" (tok-value it)))))) 5732 ;; (T){...} — file-scope compound literal. The literal is an 5733 ;; lvalue of array/struct/union type; assignment to a pointer 5734 ;; target decays it via its label address (label = first byte). 5735 ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'lparen) 5736 (%const-paren-is-cast? ps) 5737 ;; Speculatively look past `(T)` for `{`. Since we have no 5738 ;; 3-token peek, we have to commit to the (T) parse; if the 5739 ;; following token isn't `{` it's a plain cast, so we fall 5740 ;; back to the const-int path with the type already consumed. 5741 #t) 5742 ;; Take the (T) ourselves so we can dispatch on the next token. 5743 (advance ps) 5744 (let*-values (((_sto bty) (parse-decl-spec ps)) 5745 ((_n ty2) (parse-declarator ps bty))) 5746 (expect-punct ps 'rparen) 5747 (cond 5748 ((at-punct? ps 'lbrace) 5749 (cons 'label-ref (%emit-fs-compound-literal ps ty2))) 5750 (else 5751 ;; Not a compound literal — it's a constant cast, e.g. 5752 ;; `(int)(unsigned char)257`. Mirror parse-const-cast's 5753 ;; cast arm with the already-parsed type. 5754 (cond 5755 ((%ctype-int? ty2) 5756 (let ((v (parse-const-cast ps))) 5757 (%int->le-bv (%const-trunc (car v) ty2) 5758 (max (ctype-size ty) 1)))) 5759 ((eq? (ctype-kind ty2) 'ptr) 5760 ;; Pointer cast in const-expr: type-retag only. We expect 5761 ;; the operand to be an integer-shaped const (e.g. 0) and 5762 ;; emit it as the target's byte width. 5763 (let ((v (parse-const-cast ps))) 5764 (%int->le-bv (car v) (max (ctype-size ty) 1)))) 5765 (else 5766 (die (tok-loc (peek ps)) 5767 "init: cast to non-scalar non-compound-literal" 5768 (ctype-kind ty2)))))))) 5769 ;; Function name or array name as a label-ref initializer. 5770 ;; (Both decay to a pointer when used as a value.) 5771 ((and (eq? (tok-kind t) 'IDENT) 5772 (let ((sm (scope-lookup ps (tok-value t)))) 5773 (and sm 5774 (or (eq? (sym-kind sm) 'fn) 5775 (and (eq? (sym-kind sm) 'var) 5776 (eq? (ctype-kind (sym-type sm)) 'arr) 5777 (or (eq? (sym-storage sm) 'static) 5778 (eq? (sym-storage sm) 'extern))))))) 5779 (advance ps) 5780 (let ((sm (scope-lookup ps (tok-value t)))) 5781 (cons 'label-ref (%cg-sym-label sm)))) 5782 ;; Plain string literal as char* initializer. 5783 ((eq? (tok-kind t) 'STR) 5784 (advance ps) 5785 (let ((lbl (cg-intern-string (ps-cg ps) (tok-value t)))) 5786 (cons 'label-ref lbl))) 5787 ;; Otherwise it's a const integer. 5788 (else 5789 (let ((v (parse-const-int ps))) 5790 (%int->le-bv v (max (ctype-size ty) 1))))))) 5791 5792 (define (%init-array-elem-type ty) 5793 (cond ((eq? (ctype-kind ty) 'arr) (car (ctype-ext ty))) 5794 (else (die #f "init: not an array" ty)))) 5795 5796 (define (%init-array-decl-len ty) 5797 ;; Declared array length (-1 = inferred). 5798 (cond ((eq? (ctype-kind ty) 'arr) (cdr (ctype-ext ty))) (else -1))) 5799 5800 (define (%init-fixed-arr-type ty count) 5801 ;; Construct a fresh array ctype with the inferred length resolved 5802 ;; to `count`. Pure — does not mutate `ty`. For non-inferred or 5803 ;; non-array `ty`, callers should detect this themselves and just 5804 ;; pass `ty` through. 5805 (%mk-arr (car (ctype-ext ty)) count)) 5806 5807 (define (%init-struct-fields ty) 5808 ;; Return ((name-bv ctype offset) ...) for a struct/union ctype. 5809 (let ((ext (ctype-ext ty))) 5810 (cond ((and (pair? ext) (pair? (cdr ext))) (car (cddr ext))) 5811 (else (die #f "init: not a struct" ty))))) 5812 5813 ;; After processing a designated initializer for FNAME, return the 5814 ;; field list with FNAME and all preceding (already-overwritten or 5815 ;; skipped) fields removed. Empty list if FNAME isn't found (caller 5816 ;; should already have validated the field exists). 5817 (define (%init-drop-thru-field fields fname) 5818 (cond ((null? fields) '()) 5819 ((equal? (car (car fields)) fname) (cdr fields)) 5820 (else (%init-drop-thru-field (cdr fields) fname)))) 5821 5822 ;; Element/field dispatch for global aggregate initializers. ELIDE? = #f 5823 ;; means caller has just consumed `{` for this element and we own the 5824 ;; matching `}`; ELIDE? = #t is C99 §6.7.8 ¶22 brace elision (the 5825 ;; sub-aggregate draws items from the parent stream, no inner braces). 5826 ;; Returns the piece-list contributing this element to the encoding. 5827 (define (%global-init-elem ps t elide?) 5828 (let ((k (ctype-kind t))) 5829 (cond 5830 ((eq? k 'arr) 5831 (let-values (((p _c) (cond 5832 (elide? (%parse-init-array-list/mode ps t #f)) 5833 (else (%parse-init-array-list ps t))))) 5834 p)) 5835 ((or (eq? k 'struct) (eq? k 'union)) 5836 (cond 5837 (elide? (%parse-init-struct-list/mode ps t #f)) 5838 (else (%parse-init-struct-list ps t)))) 5839 (else 5840 (let ((p (%const-init-piece ps t))) 5841 (cond 5842 (elide? (list p)) 5843 (else 5844 (cond ((at-punct? ps 'comma) (advance ps))) 5845 (expect-punct ps 'rbrace) 5846 (list p)))))))) 5847 5848 ;; Element/field dispatch for local aggregate initializers. Mirrors 5849 ;; %global-init-elem but emits per-element store ops via cg-assign for 5850 ;; scalar leaves, and recurses into the local-list walkers for 5851 ;; aggregates. Returns 0; the side effect is the emitted code. 5852 (define (%local-init-elem ps sm eoff t elide?) 5853 (let ((k (ctype-kind t))) 5854 (cond 5855 ((eq? k 'arr) 5856 (cond 5857 (elide? (%parse-init-local-array-list/mode ps sm eoff t #f)) 5858 (else (%parse-init-local-array-list ps sm eoff t)))) 5859 ((or (eq? k 'struct) (eq? k 'union)) 5860 (cond 5861 (elide? (%parse-init-local-struct-list/mode ps sm eoff t #f)) 5862 (else (%parse-init-local-struct-list ps sm eoff t)))) 5863 (else 5864 (%push-frame-elem-lval ps eoff t) 5865 (parse-expr-bp ps 4) (rval! ps) 5866 (cg-cast (ps-cg ps) t) 5867 (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)) 5868 (cond 5869 (elide? 0) 5870 (else 5871 (cond ((at-punct? ps 'comma) (advance ps))) 5872 (expect-punct ps 'rbrace))))))) 5873 5874 (define (%pad-piece nbytes) 5875 (make-bytevector nbytes 0)) 5876 5877 ;; Static aggregate init streaming support. parse-translation-unit runs 5878 ;; declarations in scratch, but a large file-scope array initializer can 5879 ;; contain hundreds of independent elements. Parse one array element 5880 ;; past a heap mark, promote the element pieces and parser lookahead into 5881 ;; main, rewind that element's transient lexer/pp/const-expr scratch, and 5882 ;; keep the outer pieces list in main as well. 5883 (define (%init-promote-unit ps thunk) 5884 (let ((mark (heap-mark))) 5885 (let ((scratch-result (thunk))) 5886 (use-main-heap!) 5887 (let ((ctx (make-deep-copy-context))) 5888 (promote-roots! (ps-world ps) ctx) 5889 (promote-iter-buffers! (ps-iter ps) ctx) 5890 (let ((main-result (deep-copy ctx scratch-result))) 5891 (use-scratch-heap!) 5892 (heap-rewind! mark) 5893 main-result))))) 5894 5895 (define (%init-main-cons x xs) 5896 (use-main-heap!) 5897 (let ((r (cons x xs))) 5898 (use-scratch-heap!) 5899 r)) 5900 5901 (define (%init-main-reverse xs) 5902 (use-main-heap!) 5903 (let ((r (reverse xs))) 5904 (use-scratch-heap!) 5905 r)) 5906 5907 (define (%init-main-prepend-reversed xs acc) 5908 (use-main-heap!) 5909 (let loop ((ys xs) (out acc)) 5910 (cond 5911 ((null? ys) 5912 (use-scratch-heap!) 5913 out) 5914 (else 5915 (loop (cdr ys) (cons (car ys) out)))))) 5916 5917 (define (%init-main-pad-piece nbytes) 5918 (use-main-heap!) 5919 (let ((p (%pad-piece nbytes))) 5920 (use-scratch-heap!) 5921 p)) 5922 5923 ;; ----- Global initializers --------------------------------------------- 5924 ;; Returns (values pieces final-ty). For inferred-length array `ty`, 5925 ;; final-ty is a freshly-built array ctype with the resolved length; 5926 ;; otherwise final-ty is `ty` unchanged. 5927 (define (parse-init-global ps ty) 5928 (pmatch (peek ps) 5929 ;; String literal initializer for char[] 5930 (($ tok? (kind STR) (value ,s)) 5931 (guard (and (eq? (ctype-kind ty) 'arr) 5932 (let ((et (car (ctype-ext ty)))) 5933 (or (eq? et %t-i8) (eq? et %t-u8))))) 5934 (advance ps) 5935 (let* ((slen (bytevector-length s)) 5936 (decl (cdr (ctype-ext ty))) 5937 (final (cond ((< decl 0) (+ slen 1)) (else decl))) 5938 (final-ty (cond ((< decl 0) (%init-fixed-arr-type ty final)) 5939 (else ty)))) 5940 (let ((bv (make-bytevector final 0))) 5941 (let loop ((i 0)) 5942 (cond 5943 ((or (= i slen) (>= i final)) 5944 (values (list bv) final-ty)) 5945 (else 5946 (bytevector-u8-set! bv i (bytevector-u8-ref s i)) 5947 (loop (+ i 1)))))))) 5948 ;; Brace-form 5949 (($ tok? (kind PUNCT) (value lbrace)) 5950 (advance ps) 5951 (cond 5952 ((eq? (ctype-kind ty) 'arr) 5953 (let-values (((pieces count) (%parse-init-array-list ps ty))) 5954 (let* ((decl (%init-array-decl-len ty)) 5955 (final-ty (cond ((< decl 0) 5956 (%init-fixed-arr-type ty count)) 5957 (else ty)))) 5958 (values pieces final-ty)))) 5959 ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union)) 5960 (values (%parse-init-struct-list ps ty) ty)) 5961 (else 5962 ;; Brace-wrapped scalar: { expr } 5963 (let ((piece (%const-init-piece ps ty))) 5964 (cond ((at-punct? ps 'comma) (advance ps))) 5965 (expect-punct ps 'rbrace) 5966 (values (list piece) ty))))) 5967 ;; Bare scalar initializer 5968 (else (values (list (%const-init-piece ps ty)) ty)))) 5969 5970 ;; Returns (values pieces count). `count` is the number of element 5971 ;; initializers actually consumed (used by parse-init-global to resolve 5972 ;; an inferred top-level length). C99 forbids inferred length in 5973 ;; nested array elements, so recursive callers ignore `count`. 5974 ;; 5975 ;; `brace?` controls termination: when #t (the normal case), the loop 5976 ;; consumes elements until `}` is seen. When #f (brace-elision recursion 5977 ;; from C99 §6.7.8 ¶22), the loop consumes exactly `decl` elements from 5978 ;; the parent stream and returns without expecting `}`. In no-brace 5979 ;; mode, a leading `.` or `[` designator targets the enclosing aggregate 5980 ;; — the recursion terminates immediately, padding the unfilled tail. 5981 (define (%parse-init-array-list ps ty) 5982 (%parse-init-array-list/mode ps ty #t)) 5983 5984 (define (%parse-init-array-list/mode ps ty brace?) 5985 ;; Element-list array initializer; assumes `{` already consumed when 5986 ;; brace? is #t. 5987 (let* ((elem (%init-array-elem-type ty)) 5988 (esize (ctype-size elem)) 5989 (decl (%init-array-decl-len ty))) 5990 (let lp ((acc '()) (count 0)) 5991 (cond 5992 ((cond (brace? (at-punct? ps 'rbrace)) 5993 (else (or (>= count (cond ((< decl 0) 0) (else decl))) 5994 (at-punct? ps 'rbrace) 5995 (at-punct? ps 'dot) 5996 (at-punct? ps 'lbrack)))) 5997 (cond (brace? (advance ps))) 5998 ;; Pad to declared length if longer than count. 5999 (let* ((final (cond ((< decl 0) count) (else decl))) 6000 (pad (- final count))) 6001 (values 6002 (cond 6003 ((> pad 0) 6004 (%init-main-reverse 6005 (%init-main-cons (%init-main-pad-piece (* pad esize)) acc))) 6006 (else (%init-main-reverse acc))) 6007 count))) 6008 (else 6009 (let ((piece 6010 (%init-promote-unit 6011 ps 6012 (lambda () 6013 ;; The trailing inter-element comma must be consumed 6014 ;; *inside* the mark/rewind window: advance loads pp/lex 6015 ;; lookahead into iter buffers, which promote-iter-buffers! 6016 ;; then deep-copies into main. Consuming it after the 6017 ;; rewind would leave that lookahead leaking on scratch. 6018 (let ((p 6019 (cond 6020 ((at-punct? ps 'lbrace) 6021 (advance ps) 6022 (%global-init-elem ps elem #f)) 6023 (else 6024 (%global-init-elem ps elem #t))))) 6025 ;; Inter-item comma: consume except for the comma 6026 ;; following our LAST item in no-brace mode — that 6027 ;; one belongs to the enclosing parent. 6028 (cond 6029 (brace? 6030 (cond ((at-punct? ps 'comma) (advance ps)))) 6031 (else 6032 ;; no-brace: consume comma only if more items 6033 ;; remain in our quota. 6034 (cond 6035 ((and (< (+ count 1) 6036 (cond ((< decl 0) 0) (else decl))) 6037 (at-punct? ps 'comma)) 6038 (advance ps))))) 6039 p))))) 6040 (lp (%init-main-prepend-reversed piece acc) (+ count 1)))))))) 6041 6042 (define (%piece-bytesize p) 6043 ;; Output width of one piece (cf. %cg-init-piece->bv): a bv emits 6044 ;; one byte per element; a (label-ref . _) emits an 8-byte slot. 6045 (cond 6046 ((bytevector? p) (bytevector-length p)) 6047 ((and (pair? p) (eq? (car p) 'label-ref)) 8) 6048 (else (die #f "init: unknown piece" p)))) 6049 6050 (define (%pieces-bytesize ps-list) 6051 (let loop ((xs ps-list) (n 0)) 6052 (cond ((null? xs) n) 6053 (else (loop (cdr xs) (+ n (%piece-bytesize (car xs)))))))) 6054 6055 (define (%merge-init-entries entries total-size) 6056 ;; entries: list of (abs-offset . piece-list), in source order. 6057 ;; Sort stably by offset (later writes to the same offset win, per C 6058 ;; designated-init semantics) and emit pad pieces in any gaps and at 6059 ;; the tail. Preserves label-ref pieces — we never merge them into a 6060 ;; flat bv. 6061 (let* ((sorted (%init-stable-sort-by-offset entries)) 6062 (out 6063 (let walk ((xs sorted) (cursor 0) (acc '())) 6064 (cond 6065 ((null? xs) 6066 (cond 6067 ((< cursor total-size) 6068 (reverse (cons (%pad-piece (- total-size cursor)) acc))) 6069 (else (reverse acc)))) 6070 (else 6071 (let* ((e (car xs)) 6072 (eoff (car e)) 6073 (epieces (cdr e)) 6074 (esize (%pieces-bytesize epieces)) 6075 (acc1 (cond 6076 ((> eoff cursor) 6077 (cons (%pad-piece (- eoff cursor)) acc)) 6078 (else acc))) 6079 (acc2 (append (reverse epieces) acc1))) 6080 (walk (cdr xs) (+ eoff esize) acc2))))))) 6081 out)) 6082 6083 (define (%init-stable-sort-by-offset entries) 6084 ;; Insertion sort, stable by source order for ties. n is small (one 6085 ;; entry per initialized field) so O(n^2) is fine. 6086 (let lp ((xs entries) (acc '())) 6087 (cond 6088 ((null? xs) acc) 6089 (else 6090 (let ((e (car xs))) 6091 (lp (cdr xs) 6092 (let ins ((ys acc) (head '())) 6093 (cond 6094 ((null? ys) 6095 (append (reverse head) (list e))) 6096 ((<= (car e) (car (car ys))) 6097 (append (reverse head) (cons e ys))) 6098 (else 6099 (ins (cdr ys) (cons (car ys) head))))))))))) 6100 6101 (define (%parse-init-struct-list ps ty) 6102 (%parse-init-struct-list/mode ps ty #t)) 6103 6104 (define (%parse-init-struct-list/mode ps ty brace?) 6105 ;; Struct/union initializer; assumes `{` already consumed when brace?. 6106 ;; In no-brace mode (brace elision, C99 §6.7.8 ¶22), terminate when 6107 ;; positional fields are exhausted, when a `}` is seen (belongs to 6108 ;; the enclosing aggregate), or when a designator (`.`) appears (it 6109 ;; targets the enclosing aggregate). Doesn't consume the trailing 6110 ;; comma after the last field — that belongs to the parent list. 6111 (let* ((fields (%init-struct-fields ty)) 6112 (size (ctype-size ty)) 6113 (union? (eq? (ctype-kind ty) 'union))) 6114 (let lp ((entries '()) (rest fields)) 6115 (cond 6116 ((cond (brace? (at-punct? ps 'rbrace)) 6117 (else (or (null? rest) 6118 (at-punct? ps 'rbrace) 6119 (at-punct? ps 'dot) 6120 ;; Union in brace-elision mode: take one 6121 ;; member then return — the next sibling 6122 ;; initializer belongs to the parent 6123 ;; (C99 §6.7.8 ¶22 + union has one active 6124 ;; member at a time). 6125 (and union? (pair? entries))))) 6126 (cond (brace? (advance ps))) 6127 (%merge-init-entries (reverse entries) size)) 6128 (else 6129 (let* ((designated? (at-punct? ps 'dot)) 6130 (target 6131 (cond 6132 (designated? 6133 (advance ps) 6134 (let ((nt (advance ps))) 6135 (cond 6136 ((not (eq? (tok-kind nt) 'IDENT)) 6137 (die (tok-loc nt) "init: .field expects ident"))) 6138 (let ((f (%cg-find-field fields (tok-value nt)))) 6139 (cond 6140 ((not f) (die (tok-loc nt) "init: no such field" 6141 (tok-value nt)))) 6142 (expect-punct ps 'assign) 6143 f))) 6144 ((null? rest) 6145 (die (tok-loc (peek ps)) "init: too many fields")) 6146 (else (car rest)))) 6147 (fname (car target)) 6148 (fty (car (cdr target))) 6149 (foff (car (cddr target))) 6150 (piece-list 6151 (cond 6152 ((at-punct? ps 'lbrace) 6153 (advance ps) 6154 (%global-init-elem ps fty #f)) 6155 (else 6156 (%global-init-elem ps fty #t)))) 6157 (rest1 6158 (cond 6159 ;; designated init: drop fields up to and including target 6160 (designated? (%init-drop-thru-field fields fname)) 6161 (else (cdr rest))))) 6162 ;; Inter-item comma: consume except for the comma after our 6163 ;; LAST field in no-brace mode (belongs to enclosing list). 6164 (cond 6165 (brace? 6166 (cond ((at-punct? ps 'comma) (advance ps)))) 6167 (else 6168 (cond ((and (not (null? rest1)) 6169 ;; Union in brace-elision mode terminates 6170 ;; after one element regardless of rest1; 6171 ;; that means the comma belongs to the parent. 6172 (not union?) 6173 (at-punct? ps 'comma)) 6174 (advance ps))))) 6175 (lp (cons (cons foff piece-list) entries) rest1))))))) 6176 6177 ;; ----- Local aggregate initializers ------------------------------------ 6178 ;; Emits per-element store sequences via cg ops into the slot of `sm` 6179 ;; (a 'var sym whose slot is the frame offset). Assumes the assignment 6180 ;; `=` has already been consumed. 6181 (define (parse-init-local-aggregate ps sm ty) 6182 (pmatch (peek ps) 6183 ;; Local char[] = "string" — fill from string bytes. 6184 (($ tok? (kind STR) (value ,s)) 6185 (guard (and (eq? (ctype-kind ty) 'arr) 6186 (let ((et (car (ctype-ext ty)))) 6187 (or (eq? et %t-i8) (eq? et %t-u8))))) 6188 (advance ps) 6189 ;; Note: for inferred-length (`int x[] = "..."`) auto arrays the 6190 ;; sm-type still records the original (size=-1) ctype — `sizeof(x)` 6191 ;; in the body would not see the resolved length. The slot is also 6192 ;; sized off the original (= 1 byte), so the path is pre-existing 6193 ;; broken; we don't paper over it here. Real C bootstrap code uses 6194 ;; statics/globals for inferred-length arrays. 6195 (let* ((slen (bytevector-length s)) 6196 (decl (cdr (ctype-ext ty))) 6197 (final (cond ((< decl 0) (+ slen 1)) (else decl)))) 6198 ;; Emit byte stores for each char in s, plus NUL for the 6199 ;; trailing slot if final > slen. 6200 (let loop ((i 0)) 6201 (cond 6202 ((>= i final) #t) 6203 (else 6204 (let ((b (cond ((< i slen) (bytevector-u8-ref s i)) 6205 (else 0))) 6206 (off (+ (sym-slot sm) i))) 6207 (%push-frame-elem-lval ps off %t-u8) 6208 (cg-push-imm (ps-cg ps) %t-u8 b) 6209 (cg-assign (ps-cg ps)) 6210 (cg-pop (ps-cg ps)) 6211 (loop (+ i 1)))))))) 6212 (($ tok? (kind PUNCT) (value lbrace)) 6213 (advance ps) 6214 (cond 6215 ((eq? (ctype-kind ty) 'arr) 6216 (%parse-init-local-array-list ps sm (sym-slot sm) ty)) 6217 ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union)) 6218 (%parse-init-local-struct-list ps sm (sym-slot sm) ty)) 6219 (else (die #f "init local: brace on scalar?")))) 6220 (else (die (tok-loc (peek ps)) "init local aggregate?")))) 6221 6222 (define (%push-frame-elem-lval ps base-off ty) 6223 (cg-push (ps-cg ps) (%opnd 'frame ty base-off #t))) 6224 6225 (define (%parse-init-local-array-list ps sm base-off ty) 6226 (%parse-init-local-array-list/mode ps sm base-off ty #t)) 6227 6228 (define (%parse-init-local-array-list/mode ps sm base-off ty brace?) 6229 (let* ((elem (%init-array-elem-type ty)) 6230 (esize (ctype-size elem)) 6231 (decl (%init-array-decl-len ty))) 6232 (let lp ((i 0)) 6233 (cond 6234 ((cond (brace? (at-punct? ps 'rbrace)) 6235 (else (or (>= i (cond ((< decl 0) 0) (else decl))) 6236 (at-punct? ps 'rbrace) 6237 (at-punct? ps 'dot) 6238 (at-punct? ps 'lbrack)))) 6239 (cond (brace? (advance ps))) 6240 ;; Inferred-length auto path is pre-existing broken (slot 6241 ;; allocated off size=-1, sm-type unfixed). See note in 6242 ;; parse-init-local-aggregate STR branch. 6243 ;; Zero out remaining slots if any (declared length > i). 6244 (let ((final (cond ((< decl 0) i) (else decl)))) 6245 (let zlp ((k i)) 6246 (cond 6247 ((>= k final) #t) 6248 (else 6249 (let ((off (+ base-off (* k esize)))) 6250 (cond 6251 ((or (eq? (ctype-kind elem) 'arr) 6252 (eq? (ctype-kind elem) 'struct) 6253 (eq? (ctype-kind elem) 'union)) 6254 ;; Zero each byte in this aggregate slot. 6255 (let zb ((j 0)) 6256 (cond 6257 ((>= j esize) #t) 6258 (else 6259 (%push-frame-elem-lval ps (+ off j) %t-u8) 6260 (cg-push-imm (ps-cg ps) %t-u8 0) 6261 (cg-assign (ps-cg ps)) 6262 (cg-pop (ps-cg ps)) 6263 (zb (+ j 1)))))) 6264 (else 6265 (%push-frame-elem-lval ps off elem) 6266 (cg-push-imm (ps-cg ps) elem 0) 6267 (cg-assign (ps-cg ps)) 6268 (cg-pop (ps-cg ps))))) 6269 (zlp (+ k 1))))))) 6270 (else 6271 (let ((eoff (+ base-off (* i esize)))) 6272 (cond 6273 ((at-punct? ps 'lbrace) 6274 (advance ps) 6275 (%local-init-elem ps sm eoff elem #f)) 6276 (else 6277 (%local-init-elem ps sm eoff elem #t))) 6278 ;; Inter-item comma: in no-brace mode, don't eat the comma 6279 ;; that follows our LAST item (it belongs to the parent). 6280 (cond 6281 (brace? 6282 (cond ((at-punct? ps 'comma) (advance ps)))) 6283 (else 6284 (cond ((and (< (+ i 1) 6285 (cond ((< decl 0) 0) (else decl))) 6286 (at-punct? ps 'comma)) 6287 (advance ps))))) 6288 (lp (+ i 1)))))))) 6289 6290 (define (%bv-in-list? bv xs) 6291 (cond ((null? xs) #f) 6292 ((equal? bv (car xs)) #t) 6293 (else (%bv-in-list? bv (cdr xs))))) 6294 6295 ;; Does any leaf-name of `f` (a struct/union field tuple, possibly with 6296 ;; a nameless anon-aggregate type) appear in `seen`? Used by the 6297 ;; local-struct zero-pass to skip an anonymous member whose sub-field 6298 ;; was already written through a designator like `.a` (C11 §6.7.2.1). 6299 (define (%anon-touched? f seen) 6300 (let ((fn (car f))) 6301 (cond 6302 (fn (%bv-in-list? fn seen)) 6303 (else 6304 (let ((k (ctype-kind (cadr f)))) 6305 (cond 6306 ((or (eq? k 'struct) (eq? k 'union)) 6307 (let lp ((xs (car (cddr (ctype-ext (cadr f)))))) 6308 (cond 6309 ((null? xs) #f) 6310 ((%anon-touched? (car xs) seen) #t) 6311 (else (lp (cdr xs)))))) 6312 (else #f))))))) 6313 6314 (define (%emit-zero-field ps base-off f) 6315 ;; Note: scheme1's `+` is binary-only — `(+ a b c)` returns (+ a b) 6316 ;; and silently drops the rest. Compute absolute byte offsets via 6317 ;; nested binary +. 6318 (let* ((fty (car (cdr f))) 6319 (foff (car (cddr f))) 6320 (fsize (ctype-size fty)) 6321 (start-off (+ base-off foff))) 6322 (let zb ((j 0)) 6323 (cond 6324 ((>= j fsize) #t) 6325 (else 6326 (%push-frame-elem-lval ps (+ start-off j) %t-u8) 6327 (cg-push-imm (ps-cg ps) %t-u8 0) 6328 (cg-assign (ps-cg ps)) 6329 (cg-pop (ps-cg ps)) 6330 (zb (+ j 1))))))) 6331 6332 (define (%parse-init-local-struct-list ps sm base-off ty) 6333 (%parse-init-local-struct-list/mode ps sm base-off ty #t)) 6334 6335 (define (%parse-init-local-struct-list/mode ps sm base-off ty brace?) 6336 ;; Track each initialized field by name in `seen`; at the closing brace 6337 ;; zero every field NOT in `seen`. Tracking by name (rather than 6338 ;; positional "remaining" fields) handles a designator jumping 6339 ;; backwards correctly — e.g. `{.y = 5}` must still zero `x`. 6340 ;; C requires every unmentioned member of an aggregate with at least 6341 ;; one designator/initializer to be zeroed (C11 §6.7.9 ¶21). 6342 ;; 6343 ;; In no-brace mode (brace elision, C99 §6.7.8 ¶22): terminate when 6344 ;; positional fields exhausted, on `}` (parent's), or on `.` designator 6345 ;; (targets parent). Don't consume trailing comma after our last field. 6346 (let ((fields (%init-struct-fields ty))) 6347 (let lp ((rest fields) (seen '())) 6348 (cond 6349 ((cond (brace? (at-punct? ps 'rbrace)) 6350 (else (or (null? rest) 6351 (at-punct? ps 'rbrace) 6352 (at-punct? ps 'dot)))) 6353 (cond (brace? (advance ps))) 6354 (for-each 6355 (lambda (f) 6356 (cond ((not (%anon-touched? f seen)) 6357 (%emit-zero-field ps base-off f)))) 6358 fields)) 6359 (else 6360 (let* ((designated? (at-punct? ps 'dot)) 6361 (target 6362 (cond 6363 (designated? 6364 (advance ps) 6365 (let ((nt (advance ps))) 6366 (let ((f (%cg-find-field fields (tok-value nt)))) 6367 (cond 6368 ((not f) (die (tok-loc nt) "init: no such field" 6369 (tok-value nt)))) 6370 (expect-punct ps 'assign) 6371 f))) 6372 ((null? rest) 6373 (die (tok-loc (peek ps)) "init: too many fields")) 6374 (else (car rest)))) 6375 (fname (car target)) 6376 (fty (car (cdr target))) 6377 (foff (car (cddr target))) 6378 (eoff (+ base-off foff))) 6379 (cond 6380 ((at-punct? ps 'lbrace) 6381 (advance ps) 6382 (%local-init-elem ps sm eoff fty #f)) 6383 (else 6384 (%local-init-elem ps sm eoff fty #t))) 6385 (let ((rest1 6386 (cond 6387 (designated? (%init-drop-thru-field fields fname)) 6388 (else (cdr rest))))) 6389 ;; Inter-item comma: in no-brace mode, don't eat the comma 6390 ;; that follows our LAST field (belongs to enclosing list). 6391 (cond 6392 (brace? 6393 (cond ((at-punct? ps 'comma) (advance ps)))) 6394 (else 6395 (cond ((and (not (null? rest1)) 6396 (at-punct? ps 'comma)) 6397 (advance ps))))) 6398 (lp rest1 (cons fname seen))))))))) 6399 6400 6401 ;; parse-fn-body: bind the fn-sym for recursive lookup, then parse the 6402 ;; body. Heap discipline is handled at the parse-decl-or-fn boundary — 6403 ;; the body runs in scratch like the rest of the decl, and surviving 6404 ;; roots (block-statics, string literals, block-scope tags that escape 6405 ;; via the global tables) are promoted en masse there. See the Phase 3 6406 ;; section above parse-translation-unit. 6407 (define (parse-fn-body ps sto name dt) 6408 (scope-bind! ps name (%sym name 'fn (or sto 'extern) dt #f #t)) 6409 (%parse-fn-body-inner ps name dt)) 6410 6411 (define (%parse-fn-body-inner ps name dt) 6412 (let* ((e (ctype-ext dt)) (ret (car e)) 6413 (par (cadr e)) (var (car (cddr e)))) 6414 (let ((psyms (cg-fn-begin/v (ps-cg ps) name par ret var))) 6415 (ps-fn-ctx-set! ps 6416 (%fn-ctx name ret (map cdr psyms) var '())) 6417 (scope-enter! ps) 6418 (for-each (lambda (p) (scope-bind! ps (car p) (cdr p))) 6419 psyms) 6420 (expect-punct ps 'lbrace) 6421 (parse-cstmt-body ps) 6422 (expect-punct ps 'rbrace) 6423 (scope-leave! ps) 6424 (ps-fn-ctx-set! ps #f) 6425 (cg-fn-end (ps-cg ps))))) 6426 6427 (define (parse-stmt ps) 6428 (pmatch (peek ps) 6429 (($ tok? (kind PUNCT) (value lbrace)) (parse-cstmt ps)) 6430 (($ tok? (kind KW) (value if)) (parse-if-stmt ps)) 6431 (($ tok? (kind KW) (value while)) (parse-while-stmt ps)) 6432 (($ tok? (kind KW) (value do)) (parse-do-stmt ps)) 6433 (($ tok? (kind KW) (value for)) (parse-for-stmt ps)) 6434 (($ tok? (kind KW) (value switch)) (parse-switch-stmt ps)) 6435 (($ tok? (kind KW) (value return)) (parse-return-stmt ps)) 6436 (($ tok? (kind KW) (value goto)) (parse-goto-stmt ps)) 6437 (($ tok? (kind KW) (value break)) 6438 (advance ps) (expect-punct ps 'semi) (do-break ps)) 6439 (($ tok? (kind KW) (value continue)) 6440 (advance ps) (expect-punct ps 'semi) (do-continue ps)) 6441 (($ tok? (kind KW) (value case)) (parse-case-stmt ps)) 6442 (($ tok? (kind KW) (value default)) (parse-default-stmt ps)) 6443 (($ tok? (kind IDENT)) 6444 (guard (and (eq? (tok-kind (peek2 ps)) 'PUNCT) 6445 (eq? (tok-value (peek2 ps)) 'colon))) 6446 (parse-labelled-stmt ps)) 6447 (else 6448 (cond ((stmt-starts-decl? ps) (parse-local-decl ps)) 6449 (else (parse-expr-stmt ps)))))) 6450 6451 (define (stmt-starts-decl? ps) 6452 (let ((t (peek ps))) 6453 (or (%tok-decl-start? ps t) 6454 ;; Storage classes only appear at declaration position; check here 6455 ;; rather than fold them into %tok-decl-start? (which is also 6456 ;; used for cast typenames where storage classes are illegal). 6457 (pmatch t 6458 (($ tok? (kind KW) (value ,v)) 6459 (or (eq? v 'auto) (eq? v 'register) (eq? v 'static) 6460 (eq? v 'extern) (eq? v 'typedef))) 6461 (else #f))))) 6462 6463 (define (parse-local-decl ps) 6464 (let-values (((sto b) (parse-decl-spec ps))) 6465 (cond 6466 ((at-punct? ps 'semi) (advance ps) #t) 6467 (else 6468 (let lp () 6469 (let-values (((n t) (parse-declarator ps b))) 6470 (handle-decl ps sto n t) 6471 (cond ((at-punct? ps 'comma) (advance ps) (lp)) 6472 (else (expect-punct ps 'semi) #t)))))))) 6473 6474 (define (parse-cstmt ps) 6475 (expect-punct ps 'lbrace) 6476 (scope-enter! ps) 6477 (parse-cstmt-body ps) 6478 (scope-leave! ps) 6479 (expect-punct ps 'rbrace) #t) 6480 6481 (define (parse-cstmt-body ps) 6482 (cond 6483 ((at-punct? ps 'rbrace) #t) 6484 ((eq? (tok-kind (peek ps)) 'EOF) 6485 (die (tok-loc (peek ps)) "EOF in cstmt")) 6486 (else (parse-stmt ps) (parse-cstmt-body ps)))) 6487 6488 (define (parse-compound-stmt ps) (parse-cstmt ps)) 6489 6490 (define (parse-if-stmt ps) 6491 (expect-kw ps 'if) 6492 (expect-punct ps 'lparen) 6493 (parse-expr ps) (rval! ps) 6494 (expect-punct ps 'rparen) 6495 (cg-ifelse (ps-cg ps) 6496 (lambda () (parse-stmt ps)) 6497 (lambda () 6498 (cond ((at-kw? ps 'else) 6499 (advance ps) (parse-stmt ps)) 6500 (else #t))))) 6501 6502 ;; cg-loop's body-thunk receives the tag from cg; the parser threads 6503 ;; it into break/continue via loop-ctx. 6504 6505 (define (parse-while-stmt ps) 6506 (expect-kw ps 'while) 6507 (expect-punct ps 'lparen) 6508 (cg-loop (ps-cg ps) 6509 (lambda () (parse-expr ps) (rval! ps)) 6510 (lambda (tag) 6511 (expect-punct ps 'rparen) 6512 (push-loop-ctx! ps 'while tag #t) 6513 (parse-stmt ps) 6514 (pop-loop-ctx! ps))) #t) 6515 6516 (define (parse-do-stmt ps) 6517 (expect-kw ps 'do) 6518 ;; `continue` in a do-while must jump to the *cond test* (C11 6519 ;; §6.8.6.2 ¶2), not to the top of the body. The scoped loop labels 6520 ;; `.top` at the condition test and `.end` after the loop, so bare 6521 ;; %continue / %break bind through hex2++ local lookup. 6522 ;; 6523 ;; Layout: 6524 ;; .scope 6525 ;; :.body 6526 ;; <body> 6527 ;; :.top ; %continue jumps here 6528 ;; <cond> 6529 ;; %if_eqz(c, %break) 6530 ;; %b(&.body) 6531 ;; :.end 6532 ;; .endscope 6533 (let* ((cg (ps-cg ps)) 6534 (tag (%cg-fresh-loop-tag cg))) 6535 (%cg-emit-many cg (list ".scope\n" 6536 ":.body\n")) 6537 (push-loop-ctx! ps 'do tag #t) 6538 (parse-stmt ps) 6539 (pop-loop-ctx! ps) 6540 (expect-kw ps 'while) (expect-punct ps 'lparen) 6541 (%cg-emit-many cg (list ":.top\n")) 6542 (parse-expr ps) (rval! ps) 6543 (expect-punct ps 'rparen) (expect-punct ps 'semi) 6544 (let ((c (cg-pop cg))) 6545 (%cg-load-opnd-into cg c 't0) 6546 (%cg-emit-many cg (list "%if_eqz(t0, { %break })\n"))) 6547 (%cg-emit-many cg (list "%b(&.body)\n" 6548 ":.end\n" 6549 ".endscope\n"))) 6550 #t) 6551 6552 (define (parse-for-stmt ps) 6553 (expect-kw ps 'for) (expect-punct ps 'lparen) 6554 (scope-enter! ps) 6555 (cond 6556 ((at-punct? ps 'semi) (advance ps)) 6557 ((stmt-starts-decl? ps) (parse-local-decl ps)) 6558 (else (parse-expr ps) (cg-pop (ps-cg ps)) 6559 (expect-punct ps 'semi))) 6560 (let* ((cg (ps-cg ps)) 6561 (cond-toks (cond 6562 ((at-punct? ps 'semi) '()) 6563 (else (collect-til-top-punct ps 'semi "EOF in for-cond")))) 6564 (_ (expect-punct ps 'semi)) 6565 (step-toks (collect-til-rparen ps)) 6566 (_ (expect-punct ps 'rparen)) 6567 (tag (%cg-fresh-loop-tag cg))) 6568 ;; A C `continue` in a for-loop must run the step expression before 6569 ;; retesting the condition. Arrange the loop as: 6570 ;; jump test; top: step; test: condition; body; jump top 6571 (%cg-emit-many cg (list ".scope\n" 6572 "%b(&.test)\n" 6573 ":.top\n")) 6574 (parse-saved-expr-stmt ps step-toks) 6575 (%cg-emit-many cg (list ":.test\n")) 6576 (cond 6577 ((null? cond-toks) (cg-push-imm cg %t-i32 1)) 6578 (else (parse-saved-expr ps cond-toks) (rval! ps))) 6579 (let ((c (cg-pop cg))) 6580 (%cg-load-opnd-into cg c 't0) 6581 (%cg-emit-many cg (list "%if_eqz(t0, { %break })\n"))) 6582 (push-loop-ctx! ps 'for tag #t) 6583 (parse-stmt ps) 6584 (pop-loop-ctx! ps) 6585 (%cg-emit-many cg (list "%b(&.top)\n" 6586 ":.end\n" 6587 ".endscope\n"))) 6588 (scope-leave! ps) #t) 6589 6590 (define (parse-saved-expr ps toks) 6591 (let ((sv (ps-iter ps))) 6592 (ps-iter-set! ps (make-list-iter (append toks (list (make-tok 'EOF #f #f))))) 6593 (parse-expr ps) 6594 (ps-iter-set! ps sv))) 6595 6596 (define (parse-saved-expr-stmt ps toks) 6597 (cond 6598 ((null? toks) #t) 6599 (else (parse-saved-expr ps toks) (cg-pop (ps-cg ps))))) 6600 6601 (define (collect-til-top-punct ps punct err) 6602 (let loop ((acc '()) (d 0)) 6603 (let ((t (peek ps))) 6604 (cond 6605 ((eq? (tok-kind t) 'EOF) 6606 (die (tok-loc t) err)) 6607 ((and (zero? d) (eq? (tok-kind t) 'PUNCT) 6608 (eq? (tok-value t) punct)) (reverse acc)) 6609 (else 6610 (let ((nt (advance ps))) 6611 (loop (cons nt acc) 6612 (cond ((not (eq? (tok-kind nt) 'PUNCT)) d) 6613 ((or (eq? (tok-value nt) 'lparen) 6614 (eq? (tok-value nt) 'lbrack)) (+ d 1)) 6615 ((or (eq? (tok-value nt) 'rparen) 6616 (eq? (tok-value nt) 'rbrack)) (- d 1)) 6617 (else d))))))))) 6618 6619 (define (collect-til-rparen ps) 6620 (collect-til-top-punct ps 'rparen "EOF in for-step")) 6621 6622 (define (parse-switch-stmt ps) 6623 (expect-kw ps 'switch) (expect-punct ps 'lparen) 6624 (parse-expr ps) (rval! ps) 6625 (expect-punct ps 'rparen) 6626 ;; Switch's break-target tag is the swctx's end-tag — cg owns it, 6627 ;; and we read it back so cg-break inside the switch body emits a 6628 ;; tag cg actually labels. 6629 (let* ((sw (cg-switch-begin (ps-cg ps))) 6630 (tg (swctx-end-tag sw))) 6631 (push-loop-ctx-sw! ps 'switch tg sw) 6632 (parse-stmt ps) 6633 (pop-loop-ctx! ps) 6634 (cg-switch-end (ps-cg ps) sw))) 6635 6636 (define (parse-case-stmt ps) 6637 (expect-kw ps 'case) 6638 (let ((v (parse-const-int ps))) 6639 (expect-punct ps 'colon) 6640 (cg-switch-case (ps-cg ps) (innermost-sw ps) v) 6641 (parse-stmt ps))) 6642 6643 (define (parse-default-stmt ps) 6644 (expect-kw ps 'default) (expect-punct ps 'colon) 6645 (cg-switch-default (ps-cg ps) (innermost-sw ps)) 6646 (parse-stmt ps)) 6647 6648 (define (parse-return-stmt ps) 6649 (expect-kw ps 'return) 6650 (cond 6651 ((at-punct? ps 'semi) (advance ps) (cg-return (ps-cg ps))) 6652 (else 6653 (let* ((fc (ps-fn-ctx ps)) 6654 (rty (and fc (fn-ctx-return-type fc))) 6655 (rk (and rty (ctype-kind rty)))) 6656 (cond 6657 ;; Struct/union return — leave the source as a struct lval; 6658 ;; cg-return copies bytes into the function's return slot. 6659 ;; (P1.md §Arguments and return values.) 6660 ((or (eq? rk 'struct) (eq? rk 'union)) 6661 (parse-expr ps) 6662 (cg-return (ps-cg ps))) 6663 (else 6664 (parse-expr ps) (rval! ps) 6665 (cond 6666 ((and fc (not (eq? rk 'void))) 6667 (cg-cast (ps-cg ps) rty)) 6668 (else #t)) 6669 (cg-return (ps-cg ps))))) 6670 (expect-punct ps 'semi)))) 6671 6672 (define (parse-goto-stmt ps) 6673 (expect-kw ps 'goto) 6674 (let ((t (advance ps))) 6675 (cond ((eq? (tok-kind t) 'IDENT) 6676 (cg-goto (ps-cg ps) (tok-value t))) 6677 (else (die (tok-loc t) "label?")))) 6678 (expect-punct ps 'semi)) 6679 6680 (define (parse-labelled-stmt ps) 6681 (let ((t (advance ps))) 6682 (expect-punct ps 'colon) 6683 (cg-emit-label (ps-cg ps) (tok-value t)) 6684 (parse-stmt ps))) 6685 6686 (define (parse-expr-stmt ps) 6687 (cond 6688 ((at-punct? ps 'semi) (advance ps) #t) 6689 (else (parse-expr ps) (cg-pop (ps-cg ps)) 6690 (expect-punct ps 'semi)))) 6691 6692 (define (push-loop-ctx! ps k tg hc) 6693 (ps-loops-set! ps (cons (%loop-ctx k tg hc) (ps-loops ps)))) 6694 (define (push-loop-ctx-sw! ps k tg sw) 6695 (ps-loops-set! ps 6696 (cons (%loop-ctx k (cons tg sw) #f) (ps-loops ps)))) 6697 (define (pop-loop-ctx! ps) 6698 (ps-loops-set! ps (cdr (ps-loops ps)))) 6699 (define (do-break ps) 6700 (let ((c (innermost-loop ps))) 6701 (cond 6702 ((not c) (die #f "break outside")) 6703 ((eq? (loop-ctx-kind c) 'switch) 6704 (cg-break (ps-cg ps) (car (loop-ctx-tag c)))) 6705 (else (cg-break (ps-cg ps) (loop-ctx-tag c)))))) 6706 (define (do-continue ps) 6707 (let ((c (innermost-cont ps))) 6708 (cond ((not c) (die #f "cont outside")) 6709 (else (cg-continue (ps-cg ps) (loop-ctx-tag c)))))) 6710 (define (innermost-loop ps) 6711 (cond ((null? (ps-loops ps)) #f) (else (car (ps-loops ps))))) 6712 (define (innermost-cont ps) 6713 (let lp ((xs (ps-loops ps))) 6714 (cond ((null? xs) #f) 6715 ((eq? (loop-ctx-kind (car xs)) 'switch) (lp (cdr xs))) 6716 (else (car xs))))) 6717 (define (innermost-sw ps) 6718 (let lp ((xs (ps-loops ps))) 6719 (cond ((null? xs) (die #f "case outside switch")) 6720 ((eq? (loop-ctx-kind (car xs)) 'switch) 6721 (cdr (loop-ctx-tag (car xs)))) 6722 (else (lp (cdr xs)))))) 6723 6724 (define %binop-bp 6725 (list 6726 (cons 'comma (cons 1 2)) 6727 (cons 'assign (cons 4 3)) (cons 'plus-eq (cons 4 3)) 6728 (cons 'minus-eq (cons 4 3)) (cons 'star-eq (cons 4 3)) 6729 (cons 'slash-eq (cons 4 3)) (cons 'pct-eq (cons 4 3)) 6730 (cons 'shl-eq (cons 4 3)) (cons 'shr-eq (cons 4 3)) 6731 (cons 'amp-eq (cons 4 3)) (cons 'caret-eq (cons 4 3)) 6732 (cons 'bar-eq (cons 4 3)) (cons 'qmark (cons 6 5)) 6733 (cons 'lor (cons 10 11)) (cons 'land (cons 20 21)) 6734 (cons 'bar (cons 30 31)) (cons 'caret (cons 40 41)) 6735 (cons 'amp (cons 50 51)) 6736 (cons 'eq2 (cons 60 61)) (cons 'ne (cons 60 61)) 6737 (cons 'lt (cons 70 71)) (cons 'le (cons 70 71)) 6738 (cons 'gt (cons 70 71)) (cons 'ge (cons 70 71)) 6739 (cons 'shl (cons 80 81)) (cons 'shr (cons 80 81)) 6740 (cons 'plus (cons 90 91)) (cons 'minus (cons 90 91)) 6741 (cons 'star (cons 100 101)) (cons 'slash (cons 100 101)) 6742 (cons 'pct (cons 100 101)))) 6743 6744 (define (binop-bp-of s) (alist-ref/eq s %binop-bp)) 6745 6746 (define (punct-to-cgop s) 6747 (cond ((eq? s 'plus) 'add) ((eq? s 'minus) 'sub) 6748 ((eq? s 'star) 'mul) ((eq? s 'slash) 'div) 6749 ((eq? s 'pct) 'rem) ((eq? s 'amp) 'and) 6750 ((eq? s 'bar) 'or) ((eq? s 'caret) 'xor) 6751 ((eq? s 'shl) 'shl) ((eq? s 'shr) 'shr) 6752 ((eq? s 'eq2) 'eq) ((eq? s 'ne) 'ne) 6753 ((eq? s 'lt) 'lt) ((eq? s 'le) 'le) 6754 ((eq? s 'gt) 'gt) ((eq? s 'ge) 'ge) 6755 (else (die #f "binop" s)))) 6756 6757 (define (compound-op s) 6758 (cond ((eq? s 'plus-eq) 'add) ((eq? s 'minus-eq) 'sub) 6759 ((eq? s 'star-eq) 'mul) ((eq? s 'slash-eq) 'div) 6760 ((eq? s 'pct-eq) 'rem) ((eq? s 'shl-eq) 'shl) 6761 ((eq? s 'shr-eq) 'shr) ((eq? s 'amp-eq) 'and) 6762 ((eq? s 'caret-eq) 'xor) ((eq? s 'bar-eq) 'or) 6763 (else #f))) 6764 6765 (define (parse-expr ps) (parse-expr-bp ps 0)) 6766 6767 (define (parse-expr-bp ps mn) 6768 (parse-unary ps) (parse-binary-rhs ps mn)) 6769 6770 (define (parse-binary-rhs ps mn) 6771 (let ((t (peek ps))) 6772 (cond 6773 ((not (eq? (tok-kind t) 'PUNCT)) #t) 6774 (else 6775 (let ((bp (binop-bp-of (tok-value t)))) 6776 (cond 6777 ((not bp) #t) 6778 ((< (car bp) mn) #t) 6779 (else 6780 (let ((op (tok-value t)) (rb (cdr bp))) 6781 (advance ps) 6782 (cond 6783 ((eq? op 'comma) 6784 ;; lhs has been parsed; discard it and evaluate rhs. 6785 ;; Result of the comma expr is the rhs's rval. 6786 (cg-pop (ps-cg ps)) 6787 (parse-expr-bp ps rb) (rval! ps)) 6788 ((eq? op 'assign) 6789 ;; Struct/union assignment must memcpy the whole 6790 ;; aggregate. The scalar cg-assign path loads/stores 6791 ;; via a single 8-byte register, dropping any field at 6792 ;; offset >= 8. Detect via the lhs (already on the 6793 ;; vstack) and route to cg-assign-struct, which keeps 6794 ;; rhs as an lvalue and emits a memcpy. 6795 (let* ((lhs-top (cg-top (ps-cg ps))) 6796 (lk (cond ((and (opnd? lhs-top) (opnd-lval? lhs-top)) 6797 (ctype-kind (opnd-type lhs-top))) 6798 (else #f)))) 6799 (cond 6800 ((or (eq? lk 'struct) (eq? lk 'union)) 6801 (parse-expr-bp ps rb) 6802 (cg-assign-struct (ps-cg ps))) 6803 (else 6804 (parse-expr-bp ps rb) (rval! ps) 6805 (cg-assign (ps-cg ps)))))) 6806 ((compound-op op) 6807 (let ((b (compound-op op))) 6808 (cg-dup (ps-cg ps)) 6809 (cg-load (ps-cg ps)) 6810 (parse-expr-bp ps rb) (rval! ps) 6811 ;; Skip the usual arithmetic conversion for shift 6812 ;; compounds (`<<=` / `>>=`) so the lhs's signedness 6813 ;; survives; cg-binop's shr branch then picks the 6814 ;; right arithmetic-vs-logical opcode. 6815 (cond ((or (eq? b 'shl) (eq? b 'shr)) #t) 6816 (else (cg-arith-conv (ps-cg ps)))) 6817 (cg-binop (ps-cg ps) b) 6818 (cg-assign (ps-cg ps)))) 6819 ((eq? op 'qmark) 6820 (rval! ps) 6821 (cg-ifelse-merge (ps-cg ps) 6822 (lambda () 6823 (parse-expr-bp ps 0) (rval! ps)) 6824 (lambda () 6825 (expect-punct ps 'colon) 6826 (parse-expr-bp ps rb) (rval! ps)))) 6827 ((eq? op 'land) 6828 (rval! ps) 6829 ;; Both branches must push i32 0/1. Right side is 6830 ;; coerced via `cg-cast bool` so the merge slot 6831 ;; carries i32 (per §H.2). 6832 (cg-ifelse-merge (ps-cg ps) 6833 (lambda () 6834 (parse-expr-bp ps rb) (rval! ps) 6835 (cg-cast (ps-cg ps) %t-bool) 6836 (cg-cast (ps-cg ps) %t-i32)) 6837 (lambda () 6838 (cg-push-imm (ps-cg ps) %t-i32 0)))) 6839 ((eq? op 'lor) 6840 (rval! ps) 6841 (cg-ifelse-merge (ps-cg ps) 6842 (lambda () 6843 (cg-push-imm (ps-cg ps) %t-i32 1)) 6844 (lambda () 6845 (parse-expr-bp ps rb) (rval! ps) 6846 (cg-cast (ps-cg ps) %t-bool) 6847 (cg-cast (ps-cg ps) %t-i32)))) 6848 (else 6849 (rval! ps) (cg-promote (ps-cg ps)) 6850 (parse-expr-bp ps rb) (rval! ps) 6851 (cg-promote (ps-cg ps)) 6852 ;; Shifts (C 6.5.7) only require integer promotion of 6853 ;; each operand individually; the usual arithmetic 6854 ;; conversion would force the lhs into an unsigned 6855 ;; common type when the rhs is unsigned, breaking 6856 ;; arithmetic-shift semantics for `signed >> unsigned`. 6857 (cond ((or (eq? op 'shl) (eq? op 'shr)) #t) 6858 (else (cg-arith-conv (ps-cg ps)))) 6859 (cg-binop (ps-cg ps) (punct-to-cgop op)))) 6860 (parse-binary-rhs ps mn))))))))) 6861 6862 (define (parse-unary ps) 6863 (pmatch (peek ps) 6864 (($ tok? (kind PUNCT) (value amp)) 6865 (advance ps) (parse-unary ps) 6866 (cg-take-addr (ps-cg ps))) 6867 (($ tok? (kind PUNCT) (value star)) 6868 (advance ps) (parse-unary ps) (rval! ps) 6869 (cg-push-deref (ps-cg ps))) 6870 (($ tok? (kind PUNCT) (value plus)) 6871 (advance ps) (parse-unary ps) 6872 (rval! ps) (cg-promote (ps-cg ps))) 6873 (($ tok? (kind PUNCT) (value minus)) 6874 (advance ps) (parse-unary ps) 6875 (rval! ps) (cg-promote (ps-cg ps)) 6876 (cg-unop (ps-cg ps) 'neg)) 6877 (($ tok? (kind PUNCT) (value tilde)) 6878 (advance ps) (parse-unary ps) 6879 (rval! ps) (cg-promote (ps-cg ps)) 6880 (cg-unop (ps-cg ps) 'bnot)) 6881 (($ tok? (kind PUNCT) (value bang)) 6882 (advance ps) (parse-unary ps) (rval! ps) 6883 (cg-unop (ps-cg ps) 'lnot)) 6884 (($ tok? (kind PUNCT) (value inc)) 6885 (advance ps) (parse-unary ps) 6886 (cg-dup (ps-cg ps)) 6887 (cg-load (ps-cg ps)) 6888 (cg-push-imm (ps-cg ps) %t-i32 1) 6889 (cg-binop (ps-cg ps) 'add) (cg-assign (ps-cg ps))) 6890 (($ tok? (kind PUNCT) (value dec)) 6891 (advance ps) (parse-unary ps) 6892 (cg-dup (ps-cg ps)) 6893 (cg-load (ps-cg ps)) 6894 (cg-push-imm (ps-cg ps) %t-i32 1) 6895 (cg-binop (ps-cg ps) 'sub) (cg-assign (ps-cg ps))) 6896 (($ tok? (kind PUNCT) (value lparen)) (parse-cast-or-unary ps)) 6897 (($ tok? (kind KW) (value sizeof)) 6898 (advance ps) 6899 (cond 6900 ((at-punct? ps 'lparen) 6901 (advance ps) 6902 (cond 6903 ((token-is-decl? ps) 6904 (let*-values (((_sto bty) (parse-decl-spec ps)) 6905 ((_n ty) (parse-declarator ps bty))) 6906 (expect-punct ps 'rparen) 6907 (cg-push-imm (ps-cg ps) %t-u64 6908 (max (ctype-size ty) 0)))) 6909 (else 6910 ;; sizeof(EXPR): C semantics — operand is NOT evaluated. 6911 ;; Snapshot cg state, parse the expr to learn its type, 6912 ;; then rewind to discard any code emission and vstack 6913 ;; pushes the parse incurred (e.g. `sizeof(x++)` must not 6914 ;; increment x). cf. CC.md §Expressions. 6915 (let ((tag (cg-snapshot (ps-cg ps)))) 6916 (parse-expr ps) (expect-punct ps 'rparen) 6917 (let* ((tp (cg-top (ps-cg ps))) 6918 (sz (max (ctype-size (opnd-type tp)) 0))) 6919 (cg-rewind (ps-cg ps) tag) 6920 (cg-push-imm (ps-cg ps) %t-u64 sz)))))) 6921 (else 6922 ;; sizeof EXPR (no parens) — same no-eval rule. 6923 (let ((tag (cg-snapshot (ps-cg ps)))) 6924 (parse-unary ps) 6925 (let* ((tp (cg-top (ps-cg ps))) 6926 (sz (max (ctype-size (opnd-type tp)) 0))) 6927 (cg-rewind (ps-cg ps) tag) 6928 (cg-push-imm (ps-cg ps) %t-u64 sz)))))) 6929 (else (parse-postfix ps)))) 6930 6931 (define (token-is-decl? ps) (%tok-decl-start? ps (peek ps))) 6932 6933 (define (parse-cast-or-unary ps) 6934 (cond 6935 ((or (%tok-decl-start? ps (peek2 ps)) 6936 ;; A leading GNU attribute on the cast typename 6937 ;; (e.g. `((__attribute__((...)) int(*)(void))ptr)()`) — eaten 6938 ;; by parse-decl-spec along with the rest of the decl-spec. 6939 (let ((t (peek2 ps))) 6940 (and (eq? (tok-kind t) 'KW) (eq? (tok-value t) '__attribute__)))) 6941 (advance ps) 6942 (let*-values (((_sto bty) (parse-decl-spec ps)) 6943 ((_n ty) (parse-declarator ps bty))) 6944 (expect-punct ps 'rparen) 6945 (cond 6946 ;; (T){ ... } — compound literal (C99 §6.5.2.5). Looks like a 6947 ;; cast at the typename level but disambiguates on the 6948 ;; following `{` and is a postfix lvalue, not a cast operator. 6949 ((at-punct? ps 'lbrace) (parse-compound-literal ps ty)) 6950 (else 6951 (parse-unary ps) 6952 ;; Cast operand undergoes lvalue conversion first (C semantics): 6953 ;; arrays decay to pointers, lvals become rvals. cg-cast then 6954 ;; bit-casts the resulting rval to the target type. 6955 (rval! ps) 6956 (cg-cast (ps-cg ps) ty))))) 6957 (else (advance ps) (parse-expr ps) 6958 (expect-punct ps 'rparen) 6959 (parse-postfix-rest ps)))) 6960 6961 ;; -------------------------------------------------------------------- 6962 ;; Compound literals (C99 §6.5.2.5): (T){ init-list } 6963 ;; 6964 ;; Block scope — allocate a fresh frame slot sized for T, drive the 6965 ;; existing local-aggregate initializer path against it, then push a 6966 ;; frame lval typed as T. The literal is an lvalue with automatic 6967 ;; storage tied to the enclosing block, so &literal, literal.field, 6968 ;; literal[i], byval pass, and array decay all chain through the 6969 ;; existing primitives (cg-take-addr / cg-push-field / cg-decay-array 6970 ;; via rval!). 6971 ;; 6972 ;; File scope — handled out-of-band in %const-init-piece (incl. its `&` 6973 ;; arm) via %emit-fs-compound-literal: pieces go to .data under a fresh 6974 ;; cc__cl_N label and the enclosing initializer takes a (label-ref . LBL) 6975 ;; piece. Reaching parse-compound-literal at file scope would mean an 6976 ;; expression context outside an initializer (which file scope doesn't 6977 ;; have), so this entry point still rejects it. 6978 ;; -------------------------------------------------------------------- 6979 (define (parse-compound-literal ps ty) 6980 (cond 6981 ((not (ps-fn-ctx ps)) 6982 (die (tok-loc (peek ps)) "compound literal at file scope: unsupported"))) 6983 (let* ((sz (max (ctype-size ty) 1)) 6984 (al (max (ctype-align ty) 1)) 6985 (sl (cg-alloc-slot (ps-cg ps) sz al)) 6986 ;; Synthetic sym: parse-init-local-aggregate only reads 6987 ;; sym-slot at its top-level entry to seed base-off; the 6988 ;; recursive helpers thread `sm` along but never read other 6989 ;; fields. The name is unbound and never enters scope. 6990 (sm (%sym "__cl" 'var 'auto ty sl #t))) 6991 (cond 6992 ((or (eq? (ctype-kind ty) 'arr) 6993 (eq? (ctype-kind ty) 'struct) 6994 (eq? (ctype-kind ty) 'union)) 6995 (parse-init-local-aggregate ps sm ty)) 6996 (else 6997 ;; Scalar (T){expr [,]} — parse-init-local-aggregate's brace arm 6998 ;; only handles aggregates, so emit the single-element store 6999 ;; here directly. 7000 (expect-punct ps 'lbrace) 7001 (cg-push (ps-cg ps) (%opnd 'frame ty sl #t)) 7002 (parse-expr-bp ps 4) (rval! ps) 7003 (cg-cast (ps-cg ps) ty) 7004 (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)) 7005 (cond ((at-punct? ps 'comma) (advance ps))) 7006 (expect-punct ps 'rbrace))) 7007 ;; The literal is an lvalue with automatic storage. ctype-size may 7008 ;; have been resolved by parse-init-local-aggregate (e.g. (int[]) 7009 ;; gets its bound fixed in-place); we re-fetch via the slot's type 7010 ;; pointer (ty) which the init code mutated. 7011 (cg-push (ps-cg ps) (%opnd 'frame ty sl #t)) 7012 (parse-postfix-rest ps))) 7013 7014 (define (parse-postfix ps) 7015 (parse-primary ps) (parse-postfix-rest ps)) 7016 7017 (define (parse-postfix-rest ps) 7018 (let lp () 7019 (pmatch (peek ps) 7020 (($ tok? (kind PUNCT) (value lbrack)) 7021 (advance ps) (rval! ps) 7022 (parse-expr ps) (rval! ps) 7023 (expect-punct ps 'rbrack) 7024 (cg-binop (ps-cg ps) 'add) 7025 (cg-push-deref (ps-cg ps)) (lp)) 7026 (($ tok? (kind PUNCT) (value lparen)) 7027 (advance ps) (rval-not-fn! ps) 7028 (let* ((fn-ty (call-fn-type (ps-cg ps))) 7029 (n (parse-call-args ps fn-ty)) 7030 ;; has-result? = #f for known void returns. Skips the 7031 ;; wasted ST a0 → frame-slot spill that cg-call would 7032 ;; otherwise emit for void calls. 7033 (has-result? 7034 (cond 7035 ((not fn-ty) #t) 7036 ((eq? (ctype-kind (car (ctype-ext fn-ty))) 'void) #f) 7037 (else #t)))) 7038 (expect-punct ps 'rparen) 7039 (cg-call (ps-cg ps) n has-result?) 7040 ;; Maintain parse's "one rval per expression" invariant so 7041 ;; comma / parse-expr-stmt / for-init/step pop sites stay 7042 ;; simple. The placeholder is vstack-only and never 7043 ;; materialized (cg-pop is a vstack op, no emit). 7044 (cond ((not has-result?) 7045 (cg-push-imm (ps-cg ps) %t-i32 0))) 7046 (lp))) 7047 (($ tok? (kind PUNCT) (value dot)) 7048 (advance ps) 7049 (pmatch (advance ps) 7050 (($ tok? (kind IDENT) (value ,n)) 7051 (cg-push-field (ps-cg ps) n) (lp)) 7052 (($ tok? (loc ,l)) (die l "expected field name")))) 7053 (($ tok? (kind PUNCT) (value arrow)) 7054 (advance ps) 7055 (pmatch (advance ps) 7056 (($ tok? (kind IDENT) (value ,n)) 7057 ;; ptr -> field: load the pointer to rval, deref to reach 7058 ;; the struct lval, then push the field. 7059 (rval! ps) 7060 (cg-push-deref (ps-cg ps)) 7061 (cg-push-field (ps-cg ps) n) (lp)) 7062 (($ tok? (loc ,l)) (die l "expected field name")))) 7063 (($ tok? (kind PUNCT) (value inc)) 7064 (advance ps) 7065 (cg-postinc (ps-cg ps)) (lp)) 7066 (($ tok? (kind PUNCT) (value dec)) 7067 (advance ps) 7068 (cg-postdec (ps-cg ps)) (lp)) 7069 (else #t)))) 7070 7071 ;; call-fn-type cg -> ctype-or-#f 7072 ;; The function operand sits at the top of the vstack when 7073 ;; parse-call-args runs (just after rval-not-fn!). Its type may be 7074 ;; `fn` directly (named callee) or `ptr -> fn` (function pointer). 7075 ;; Returns the underlying `fn` ctype, or #f if the operand isn't 7076 ;; recognizably callable (callsite still works — no per-arg cast). 7077 (define (call-fn-type cg) 7078 (let* ((tp (cg-top cg))) 7079 (cond 7080 ((not tp) #f) 7081 (else 7082 (let* ((ty (opnd-type tp)) 7083 (k (ctype-kind ty))) 7084 (cond 7085 ((eq? k 'fn) ty) 7086 ((eq? k 'ptr) 7087 (let ((pe (ctype-ext ty))) 7088 (cond ((and pe (eq? (ctype-kind pe) 'fn)) pe) 7089 (else #f)))) 7090 (else #f))))))) 7091 7092 ;; param-types-of fn-ty -> (params variadic?) with a #f fallback. 7093 (define (call-fn-param-info fn-ty) 7094 (cond 7095 ((not fn-ty) (cons '() #f)) 7096 (else 7097 (let ((ext (ctype-ext fn-ty))) 7098 (cons (cadr ext) (car (cddr ext))))))) 7099 7100 ;; parse-call-args ps fn-ty -> arg-count 7101 ;; Casts each fixed arg to the declared param type (CC.md §K.5). 7102 ;; For variadic args (index >= named-arg count, when variadic? = #t) 7103 ;; applies cg-promote (CC.md §G.1). 7104 (define (parse-call-args ps fn-ty) 7105 (cond 7106 ((at-punct? ps 'rparen) 0) 7107 (else 7108 (let* ((info (call-fn-param-info fn-ty)) 7109 (params (car info)) 7110 (var? (cdr info)) 7111 (nfix (length params))) 7112 (let lp ((n 0) (rem params)) 7113 (parse-expr-bp ps 4) (rval! ps) 7114 (cond 7115 ;; Fixed-arg: cast to declared param type. param entry shape 7116 ;; is (name . ctype) per cg-fn-begin's contract. 7117 ((not (null? rem)) 7118 (cg-cast (ps-cg ps) (cdr (car rem)))) 7119 ;; Variadic position (n >= nfix and var? is true): promote. 7120 (var? 7121 (cg-promote (ps-cg ps)))) 7122 (let ((m (+ n 1)) 7123 (rest (if (null? rem) '() (cdr rem)))) 7124 (cond ((at-punct? ps 'comma) (advance ps) (lp m rest)) 7125 (else m)))))))) 7126 7127 ;; -------------------------------------------------------------------- 7128 ;; __builtin_va_* (§G.2). va_list / va_start / va_arg / va_end in 7129 ;; <stdarg.h> alias these. Each is parsed as: name '(' args ')'. 7130 ;; va_start(ap, last) — last is parsed and discarded; cg only needs 7131 ;; the variadic-first-slot offset, which it already tracks. 7132 ;; va_arg(ap, T) — T is a type-name; result rval has that type. 7133 ;; va_end(ap) — no-op codegen; just consumes ap. 7134 ;; 7135 ;; Pushes a single imm 0 for va_start / va_end so they fit as 7136 ;; expression statements; va_arg pushes the rval. 7137 ;; -------------------------------------------------------------------- 7138 (define (parse-builtin-va-start ps) 7139 (advance ps) ; IDENT 7140 (expect-punct ps 'lparen) 7141 (parse-expr-bp ps 4) ; ap (must be lval) 7142 (expect-punct ps 'comma) 7143 ;; "last" is parsed for syntactic completeness then dropped — cg 7144 ;; doesn't need it; the variadic-first-slot was determined at 7145 ;; cg-fn-begin/v time. 7146 (parse-expr-bp ps 4) (cg-pop (ps-cg ps)) 7147 (expect-punct ps 'rparen) 7148 (cg-va-start (ps-cg ps)) 7149 ;; Push a placeholder rval so the call expression has a value 7150 ;; (matches va_start's "void" but our parser expects all 7151 ;; expressions to leave one rval). 7152 (cg-push-imm (ps-cg ps) %t-i32 0)) 7153 7154 (define (parse-builtin-va-arg ps) 7155 (advance ps) ; IDENT 7156 (expect-punct ps 'lparen) 7157 (parse-expr-bp ps 4) ; ap (lval) 7158 (expect-punct ps 'comma) 7159 (let*-values (((_sto bty) (parse-decl-spec ps)) 7160 ((_n ty) (parse-declarator ps bty))) 7161 (expect-punct ps 'rparen) 7162 (cg-va-arg (ps-cg ps) ty))) 7163 7164 (define (parse-builtin-expect ps) 7165 ;; GCC `__builtin_expect(EXPR, EXPECTED)` — branch-prediction hint. 7166 ;; We ignore the hint and emit just the value of EXPR. 7167 (advance ps) ; IDENT 7168 (expect-punct ps 'lparen) 7169 (parse-expr-bp ps 4) (rval! ps) ; result 7170 (expect-punct ps 'comma) 7171 (parse-expr-bp ps 4) (cg-pop (ps-cg ps)) ; expected (drop) 7172 (expect-punct ps 'rparen)) 7173 7174 (define (parse-builtin-va-end ps) 7175 (advance ps) ; IDENT 7176 (expect-punct ps 'lparen) 7177 (parse-expr-bp ps 4) ; ap 7178 (expect-punct ps 'rparen) 7179 (cg-va-end (ps-cg ps)) 7180 (cg-push-imm (ps-cg ps) %t-i32 0)) 7181 7182 (define (parse-primary ps) 7183 (let ((t (peek ps))) 7184 (pmatch t 7185 (($ tok? (kind INT) (value ,n)) 7186 (advance ps) 7187 ;; C99 §6.4.4.1: pick the smallest type that holds the value. 7188 ;; The lexer drops u/U/l/L suffixes before we get here, so we can't 7189 ;; tell `0x1L` from `0x1`. But a value that doesn't fit in int has 7190 ;; to widen anyway — otherwise `4294967296L + 7L` truncates to 7, 7191 ;; because cg-arith-conv leaves both operands at i32 width. 7192 (cg-push-imm (ps-cg ps) 7193 (cond ((<= n 2147483647) %t-i32) 7194 (else %t-i64)) 7195 n)) 7196 (($ tok? (kind CHAR) (value ,c)) 7197 (advance ps) 7198 ;; C99 §6.4.4.4: an integer character constant has type int. 7199 (cg-push-imm (ps-cg ps) %t-i32 c)) 7200 (($ tok? (kind STR) (value ,s)) 7201 (advance ps) 7202 (cg-push-string (ps-cg ps) s)) 7203 (($ tok? (kind IDENT) (value ,n)) 7204 (cond 7205 ((bv= n "__builtin_va_start") (parse-builtin-va-start ps)) 7206 ((bv= n "__builtin_va_arg") (parse-builtin-va-arg ps)) 7207 ((bv= n "__builtin_va_end") (parse-builtin-va-end ps)) 7208 ((bv= n "__builtin_expect") (parse-builtin-expect ps)) 7209 (else 7210 (let ((sm (scope-lookup ps n))) 7211 (advance ps) 7212 (cond 7213 ((not sm) (die (tok-loc t) "undecl" n)) 7214 ((eq? (sym-kind sm) 'enum-const) 7215 (cg-push-imm (ps-cg ps) %t-i32 (sym-slot sm))) 7216 (else (cg-push-sym (ps-cg ps) sm))))))) 7217 (($ tok? (kind PUNCT) (value lparen)) 7218 (advance ps) (parse-expr ps) (expect-punct ps 'rparen)) 7219 (else (die (tok-loc t) "unexp" (tok-value t)))))) 7220 7221 (define (rval! ps) 7222 (let ((tp (cg-top (ps-cg ps)))) 7223 (cond ((and tp (opnd? tp) (opnd-lval? tp)) 7224 (cg-load (ps-cg ps))) 7225 (else #t)))) 7226 7227 (define (rval-not-fn! ps) 7228 (let ((tp (cg-top (ps-cg ps)))) 7229 (cond ((and tp (opnd? tp) (opnd-lval? tp) 7230 (not (ctype-is-fn? (opnd-type tp)))) 7231 (cg-load (ps-cg ps))) 7232 (else #t)))) 7233 ;; cc/main.scm — driver. Argv, file I/O, ties phases together. 7234 7235 ;; -------------------------------------------------------------------- 7236 ;; CLI: cc [--cc-debug] [--cc-trace-emit] [--lib=PFX] 7237 ;; <input.c> <output.P1pp> 7238 ;; 7239 ;; scheme1 passes (argv) as a list of bvs; argv[0] is "scheme1", argv[1] 7240 ;; is the catm'd compiler source path, argv[2..] are the user-facing 7241 ;; positional args. cc-main strips the first two. 7242 ;; -------------------------------------------------------------------- 7243 7244 (define (%cc-slurp path) 7245 (let ((r (open-input path))) 7246 (cond ((not (car r)) 7247 (die #f "cannot open input" path))) 7248 (let* ((p (cdr r)) 7249 (rd (read-all p))) 7250 (close p) 7251 (cond ((not (car rd)) (die #f "read failed" path))) 7252 (cdr rd)))) 7253 7254 (define (%cc-write path bv) 7255 (let ((r (open-output path))) 7256 (cond ((not (car r)) 7257 (die #f "cannot open output" path))) 7258 (let ((p (cdr r))) 7259 (write-bv-fd (port-fd p) bv) 7260 (close p) 7261 0))) 7262 7263 ;; CC_DEBUG=1 in the env doesn't fly here (no getenv); instead, scan 7264 ;; argv for a sentinel "--cc-debug" flag. When present, debug-log 7265 ;; prints heap usage between phases to fd 2. 7266 (define (%cc-flag? args flag) 7267 (cond ((null? args) #f) 7268 ((bv= (car args) flag) #t) 7269 (else (%cc-flag? (cdr args) flag)))) 7270 7271 (define (%cc-strip-flag args flag) 7272 (cond ((null? args) '()) 7273 ((bv= (car args) flag) (cdr args)) 7274 (else (cons (car args) (%cc-strip-flag (cdr args) flag))))) 7275 7276 ;; --lib=PFX selects library-mode codegen: cc.scm skips the p1_main 7277 ;; entry stub and trailing :ELF_end (the catm chain supplies them 7278 ;; from P1/entry-*.P1pp + P1/elf-end.P1pp once), and namespaces 7279 ;; anonymous string labels as PFX+"cc__str_N" so two cc.scm outputs 7280 ;; in the same link don't collide on cc__str_0..N. Returns 7281 ;; (values prefix-bv rest-args). PREFIX = "" means exec mode (flag 7282 ;; absent). PREFIX = "" with the flag present is rejected — silently 7283 ;; falling back to exec mode would mask a typo'd Makefile rule. 7284 (define (%cc-take-lib args) 7285 (let loop ((acc '()) (rest args) (pfx #f)) 7286 (cond 7287 ((null? rest) 7288 (values (cond (pfx pfx) (else "")) (reverse acc))) 7289 ((bv-prefix? "--lib=" (car rest)) 7290 (cond (pfx (die #f "cc: --lib= specified twice"))) 7291 (let* ((arg (car rest)) 7292 (p (bv-slice arg 6 (bytevector-length arg)))) 7293 (cond ((= 0 (bytevector-length p)) 7294 (die #f "cc: --lib= requires a non-empty PREFIX"))) 7295 (loop acc (cdr rest) p))) 7296 (else 7297 (loop (cons (car rest) acc) (cdr rest) pfx))))) 7298 7299 ;; Predefined macros visible to every translation unit. CCSCM lets 7300 ;; tests/headers branch on "compiled by cc.scm" — e.g. skip <stdarg.h> 7301 ;; and use the __builtin_va_* primitives directly. 7302 (define %cc-initial-defines 7303 (list (cons "CCSCM" (%macro 'obj '() '())))) 7304 7305 (define (cc-main av) 7306 (let* ((raw (cdr (cdr av))) 7307 (dbg (%cc-flag? raw "--cc-debug")) 7308 (a1 (%cc-strip-flag raw "--cc-debug")) 7309 (tr (%cc-flag? a1 "--cc-trace-emit")) 7310 (a2 (%cc-strip-flag a1 "--cc-trace-emit"))) 7311 (cond (dbg (debug-log-on!))) 7312 (cond (tr (trace-emit-on!))) 7313 (let-values (((lib-prefix args) (%cc-take-lib a2))) 7314 (cond 7315 ((or (null? args) (null? (cdr args))) 7316 (die #f "usage: cc [--cc-debug] [--cc-trace-emit] [--lib=PFX] <input.c> <output.P1pp>"))) 7317 (let* ((in-path (car args)) 7318 (out-path (car (cdr args))) 7319 (lib? (cond ((= 0 (bytevector-length lib-prefix)) #f) 7320 (else #t)))) 7321 (debug-log "phase=start" "heap" (heap-usage)) 7322 ;; Streaming pipeline: lex → pp → parser → cg, all concurrent. 7323 ;; Each stage pulls one tok at a time from upstream. Steady-state 7324 ;; live data is bounded by parser/pp state, not source length. 7325 (let* ((src (%cc-slurp in-path)) 7326 (_1 (debug-log "phase=slurp" "heap" (heap-usage) 7327 "src-bytes" (bytevector-length src))) 7328 (lex-iter (make-lex-iter src in-path)) 7329 (pp-iter (make-pp-iter lex-iter %cc-initial-defines)) 7330 (cg (cg-init/v lib? lib-prefix)) 7331 (ps (make-pstate pp-iter cg))) 7332 (parse-translation-unit ps) 7333 (debug-log "phase=parse" "heap" (heap-usage)) 7334 (let ((out (cg-finish cg))) 7335 (debug-log "phase=cg-finish" "heap" (heap-usage) 7336 "out-bytes" (bytevector-length out)) 7337 (%cc-write out-path out)) 7338 0)))))