commit 8a3e64ac5a9f5c9857b856242a5def558f1411ec
parent 1d24d51e2956de403af4ceca808e744b0a807a06
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 27 Apr 2026 12:37:14 -0700
cc: combine files into 1
Diffstat:
| M | Makefile | | | 7 | +++---- |
| M | cc/README.md | | | 17 | ++++------------- |
| A | cc/cc.scm | | | 5222 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| D | cc/cg.scm | | | 1246 | ------------------------------------------------------------------------------- |
| D | cc/data.scm | | | 239 | ------------------------------------------------------------------------------- |
| D | cc/lex.scm | | | 900 | ------------------------------------------------------------------------------- |
| M | cc/main.scm | | | 75 | +++------------------------------------------------------------------------ |
| D | cc/parse.scm | | | 1673 | ------------------------------------------------------------------------------- |
| D | cc/pp.scm | | | 805 | ------------------------------------------------------------------------------- |
| D | cc/util.scm | | | 286 | ------------------------------------------------------------------------------- |
| M | docs/CC-INTERNALS.md | | | 36 | ++++++++++++++++++++---------------- |
| M | scripts/boot-run-tests.sh | | | 10 | +++++----- |
12 files changed, 5257 insertions(+), 5259 deletions(-)
diff --git a/Makefile b/Makefile
@@ -86,6 +86,7 @@ CLOC_FILES := M1pp/M1pp.P1 \
P1/P1pp.P1pp \
$(foreach a,$(CLOC_ARCHES),P1/P1-$(a).M1) \
scheme1/scheme1.P1pp \
+ cc/cc.scm \
$(foreach a,$(CLOC_ARCHES), \
$(foreach f,$(CLOC_SEED_BASES),vendor/seed/$(a)/$(f)))
@@ -161,10 +162,8 @@ SCHEME1_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/scheme1)
# Catm'd cc compiler source. Per-arch only because catm runs in the
# per-arch container; the resulting .scm is identical across arches but
# we keep it under build/$arch/ for naming consistency.
-CC_SRCS := scheme1/prelude.scm cc/util.scm cc/data.scm cc/lex.scm \
- cc/pp.scm cc/cg.scm cc/parse.scm cc/main.scm
+CC_SRCS := scheme1/prelude.scm cc/cc.scm cc/main.scm
CC_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/cc/cc.scm)
-CLOC_FILES += $(CC_SRCS)
m1pp: $(OUT_DIR)/m1pp
pokem: $(OUT_DIR)/pokem
@@ -198,7 +197,7 @@ $(HELLO_BINS): build/%/hello: $(HELLO_SRC) $(P1_BUILD_DEPS)
$(SCHEME1_BINS): build/%/scheme1: $(SCHEME1_SRC) $(P1PP_BUILD_DEPS)
$(call PODMAN,$*) sh scripts/boot-build-p1pp.sh $(SCHEME1_SRC) $@
-# cc.scm: catm prelude + the six cc/*.scm files into one source the
+# cc.scm: catm prelude + cc.scm + main.scm entry into one source the
# scheme1 interpreter can run. Catm runs inside the per-arch container.
$(CC_BINS): build/%/cc/cc.scm: $(CC_SRCS) build/%/.image build/%/tools/M0
mkdir -p $(@D)
diff --git a/cc/README.md b/cc/README.md
@@ -18,25 +18,16 @@ Read in this order:
| File | Purpose |
|------|---------|
-| `util.scm` | Leaf helpers (bv, alist, buf, die). |
-| `data.scm` | Records + symbol alphabets shared across modules. |
-| `lex.scm` | Bytestream → token list. |
-| `pp.scm` | Token list → expanded token list. |
-| `cg.scm` | Codegen state + emission API. |
-| `parse.scm` | Recursive-descent + Pratt; calls cg directly. |
-| `main.scm` | argv handling + driver. |
+| `cc.scm` | All compiler modules in one file: util, data, lex, pp, cg, parse, and the `cc-main` driver. Sections are still labeled with their original `;; cc/<name>.scm —` headers. |
+| `main.scm` | One-liner production entry point: `(sys-exit (cc-main (argv)))`. Tests omit it so they can run their own drivers. |
Build (single source for scheme1):
```
-catm cc/cc.scm \
- cc/util.scm cc/data.scm cc/lex.scm cc/pp.scm \
- cc/cg.scm cc/parse.scm cc/main.scm
+catm build/$ARCH/cc/cc.scm \
+ scheme1/prelude.scm cc/cc.scm cc/main.scm
```
-Run via the existing `boot-run-scheme1.sh` wrapper, which prepends
-`scheme1/prelude.scm` ahead of the catm'd compiler source.
-
## Status
Modules filled in past the scaffold; parser drives all six cg sections
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -0,0 +1,5222 @@
+;; cc/util.scm — leaf helpers. Depends only on the scheme1 prelude.
+;;
+;; Realization of docs/CC-INTERNALS.md §util.scm. Engineers may add
+;; helpers here freely; the listed signatures are the load-bearing
+;; surface other modules call.
+
+;; --------------------------------------------------------------------
+;; bytevector helpers (scheme1 strings ARE bytevectors)
+;; --------------------------------------------------------------------
+(define (bv= a b) (bytevector=? a b))
+
+(define (bv-prefix? p s)
+ ;; Is s a bv that starts with the bytes of p?
+ (let ((plen (bytevector-length p))
+ (slen (bytevector-length s)))
+ (if (< slen plen)
+ #f
+ (let loop ((i 0))
+ (cond ((= i plen) #t)
+ ((= (bytevector-u8-ref p i) (bytevector-u8-ref s i))
+ (loop (+ i 1)))
+ (else #f))))))
+
+(define (bv-find bv byte from)
+ ;; Linear scan for the first byte == `byte` at index >= from.
+ ;; Returns the index, or #f if not found.
+ (let ((n (bytevector-length bv)))
+ (let loop ((i from))
+ (cond ((>= i n) #f)
+ ((= (bytevector-u8-ref bv i) byte) i)
+ (else (loop (+ i 1)))))))
+
+(define (bv-slice bv start end)
+ ;; Fresh copy of bytes in [start, end). bytevector-copy already does
+ ;; this in scheme1 (3-arg form returns a fresh bv).
+ (bytevector-copy bv start end))
+
+(define (bv-of-byte b) (make-bytevector 1 b))
+
+(define (bv-cat lst-of-bv)
+ ;; Concat a list of bytevectors with one allocation. bytevector-append
+ ;; is variadic, so apply does this in a single linear pass.
+ (apply bytevector-append lst-of-bv))
+
+(define (bv->fixnum bv radix)
+ ;; (ok . val) per CC-INTERNALS: (#t . val) on parse, (#f . #f) on fail.
+ ;; string->number is pure and returns #f on parse failure (not the
+ ;; (ok . val) convention, since it's not a syscall).
+ (let ((n (string->number bv radix)))
+ (if n (cons #t n) (cons #f #f))))
+
+(define (fixnum->bv n radix) (number->string n radix))
+
+;; --------------------------------------------------------------------
+;; lists / alists
+;; --------------------------------------------------------------------
+(define (alist-ref key al)
+ ;; equal? compare (intended for bv keys). The prelude's `assoc` uses
+ ;; eq?, so we roll our own for the equal? case.
+ (cond ((null? al) #f)
+ ((equal? (car (car al)) key) (cdr (car al)))
+ (else (alist-ref key (cdr al)))))
+
+(define (alist-ref/eq key al)
+ ;; eq? compare (for symbol keys). Reuses the prelude's assoc, which
+ ;; is eq?-based.
+ (let ((p (assoc key al)))
+ (if p (cdr p) #f)))
+
+(define (alist-set key val al) (cons (cons key val) al))
+
+(define (alist-update key f al)
+ ;; Functional update by equal? key. If found, replace its value with
+ ;; (f old-val). If not found, prepend (cons key (f #f)) so callers
+ ;; can use this as upsert-with-default.
+ (let loop ((xs al) (acc '()))
+ (cond ((null? xs)
+ (cons (cons key (f #f)) (reverse acc)))
+ ((equal? (car (car xs)) key)
+ (append (reverse acc)
+ (cons (cons key (f (cdr (car xs))))
+ (cdr xs))))
+ (else (loop (cdr xs) (cons (car xs) acc))))))
+
+(define (any p xs)
+ (cond ((null? xs) #f)
+ ((p (car xs)) #t)
+ (else (any p (cdr xs)))))
+
+(define (every p xs)
+ (cond ((null? xs) #t)
+ ((p (car xs)) (every p (cdr xs)))
+ (else #f)))
+
+(define (count p xs)
+ (let loop ((xs xs) (n 0))
+ (cond ((null? xs) n)
+ ((p (car xs)) (loop (cdr xs) (+ n 1)))
+ (else (loop (cdr xs) n)))))
+
+;; --------------------------------------------------------------------
+;; ints
+;; --------------------------------------------------------------------
+(define (min3 a b c) (min a (min b c)))
+(define (align-up n k)
+ ;; round n up to the nearest multiple of k (k must be a power of 2)
+ (let ((mask (- k 1)))
+ (bit-and (+ n mask) (bit-not mask))))
+
+;; --------------------------------------------------------------------
+;; output buffer (fixed-size pre-allocated byte storage)
+;;
+;; Every buf owns one bytevector of `cap` bytes, plus a write `offset`.
+;; buf-push! is bytevector-copy! into storage — zero allocation per
+;; push, no chunks list to chase. This is what makes per-function
+;; heap-mark/heap-rewind! safe in cg: the destination buf is fixed-
+;; storage (allocated once, lives pre-mark), so byte-level mutations
+;; survive a rewind that discards the parse/cg scratch.
+;;
+;; Sizing knobs live in one place so they're easy to tune as inputs
+;; grow. cg-init picks per-buf caps; the per-fn bufs are reused
+;; across functions (reset, not re-allocated).
+;; --------------------------------------------------------------------
+
+;; Tuning constants — total fixed pre-allocation ≈ 12.27 MiB on a
+;; 64 MiB heap. Bump these when a workload overflows; the buf-overflow
+;; die() reports off/len/cap so misses are easy to diagnose.
+;;
+;; Each cap is a power of two. scheme1's bv_capacity_for rounds the
+;; requested length up to the smallest power of two ≥ n, so asking for
+;; 2^k bytes consumes exactly 2^k of heap.
+(define %BUF-CAP-TEXT 8388608) ; 8 MiB: .text + entry stub
+(define %BUF-CAP-DATA 2097152) ; 2 MiB: .data (strings, globals)
+(define %BUF-CAP-BSS 2097152) ; 2 MiB: .bss
+(define %BUF-CAP-FN 262144) ; 256 KiB: per-fn body asm
+(define %BUF-CAP-PROLOGUE 16384) ; 16 KiB: per-fn prologue
+(define %BUF-CAP-DEFAULT 65536) ; 64 KiB: make-buf fallback
+
+(define-record-type buf
+ (%buf storage offset cap)
+ buf?
+ (storage buf-storage) ; bv: pre-allocated, never resized
+ (offset buf-offset buf-offset-set!) ; fixnum: bytes written so far
+ (cap buf-cap)) ; fixnum: storage capacity
+
+(define (make-buf/cap cap)
+ (%buf (make-bytevector cap 0) 0 cap))
+
+(define (make-buf) (make-buf/cap %BUF-CAP-DEFAULT))
+
+(define (buf-push! b bv)
+ (let* ((n (bytevector-length bv))
+ (off (buf-offset b))
+ (newoff (+ off n)))
+ (cond
+ ((> newoff (buf-cap b))
+ (die #f "buf overflow" off n (buf-cap b))))
+ (bytevector-copy! (buf-storage b) off bv 0 n)
+ (buf-offset-set! b newoff)))
+
+(define (buf-flush b)
+ ;; Snapshot the used prefix as a fresh bv. One allocation; the
+ ;; underlying storage is unchanged.
+ (bytevector-copy (buf-storage b) 0 (buf-offset b)))
+
+(define (buf-reset! b) (buf-offset-set! b 0))
+
+(define (buf-drain! dst src)
+ ;; Copy src's used bytes into dst at dst's current write head; reset
+ ;; src to empty. dst and src must be distinct bufs.
+ (let* ((slen (buf-offset src))
+ (doff (buf-offset dst))
+ (newoff (+ doff slen)))
+ (cond
+ ((> newoff (buf-cap dst))
+ (die #f "buf-drain overflow" doff slen (buf-cap dst))))
+ (bytevector-copy! (buf-storage dst) doff (buf-storage src) 0 slen)
+ (buf-offset-set! dst newoff)
+ (buf-offset-set! src 0)))
+
+;; --------------------------------------------------------------------
+;; diagnostics + I/O
+;; --------------------------------------------------------------------
+(define (die loc msg . irritants)
+ ;; Format per CC-CONTRACTS §2.3:
+ ;; <file>:<line>:<col>: error: <msg>: <irritant> <irritant> ...
+ ;; When loc is #f, the "<file>:<line>:<col>: " prefix is omitted.
+ ;; irritants are written via display semantics (no quoting); format's
+ ;; ~a handles bv/fixnum/pair/symbol the same way display does.
+ ;;
+ ;; All output is built into a single bv and sent to fd 2 with one
+ ;; sys-write loop, so a partial write doesn't interleave fragments
+ ;; from a concurrent process.
+ (let* ((prefix (if loc
+ (format "~a:~d:~d: error: "
+ (loc-file loc) (loc-line loc) (loc-col loc))
+ "error: "))
+ (head (bytevector-append prefix (format "~a" msg)))
+ ;; Irritants get ": " before the first and " " between the rest.
+ (tail (if (null? irritants)
+ (list NL-BV)
+ (let walk ((xs irritants) (sep ": ") (acc '()))
+ (if (null? xs)
+ (reverse (cons NL-BV acc))
+ (walk (cdr xs)
+ " "
+ (cons (format "~a" (car xs))
+ (cons sep acc)))))))
+ (out (bv-cat (cons head tail))))
+ (write-bv-fd 2 out)
+ (sys-exit 1)))
+
+(define (slurp-fd fd)
+ ;; Read fd to EOF. Uses BUFSIZE chunks (same constant the prelude's
+ ;; port layer uses); bv-concat-reverse builds the result in one
+ ;; allocation so a multi-MB tcc.c stays linear.
+ (let ((buf (make-bytevector BUFSIZE)))
+ (let loop ((acc '()))
+ (let ((r (sys-read fd buf 0 BUFSIZE)))
+ (cond ((not (car r))
+ (die #f "slurp-fd: sys-read failed" (cdr r)))
+ ((zero? (cdr r))
+ (bv-concat-reverse acc))
+ (else
+ (loop (cons (bytevector-copy buf 0 (cdr r)) acc))))))))
+
+(define (write-bv-fd fd bv)
+ ;; Full write or die. sys-write may write fewer bytes than requested;
+ ;; advance the offset and retry the unwritten tail.
+ ;;
+ ;; On failure we sys-exit directly instead of routing through `die`
+ ;; — `die` itself uses write-bv-fd, so a write failure to fd 2 must
+ ;; not recurse infinitely. Status 1 matches the contract for `die`.
+ (let ((len (bytevector-length bv)))
+ (let loop ((off 0))
+ (if (= off len)
+ #t
+ (let ((r (sys-write fd bv off (- len off))))
+ (cond ((not (car r)) (sys-exit 1))
+ ((zero? (cdr r)) (sys-exit 1))
+ (else (loop (+ off (cdr r))))))))))
+
+;; --------------------------------------------------------------------
+;; debug logging
+;;
+;; Cheap sticky on/off: the cc compiler is single-threaded and short-
+;; lived, so a top-level mutable flag is fine. Toggle via
+;; (debug-log-on!) / (debug-log-off!). When on, (debug-log msg . irr)
+;; writes one line to fd 2 in the same display-style format as `die`,
+;; but doesn't abort. The intent is to trace heap usage between cc
+;; phases (lex/pp/parse/cg-finish) without compile-time conditionals.
+;; --------------------------------------------------------------------
+(define %debug-log-enabled #f)
+(define (debug-log-on!) (set! %debug-log-enabled #t))
+(define (debug-log-off!) (set! %debug-log-enabled #f))
+(define (debug-log? ) %debug-log-enabled)
+
+(define (debug-log msg . irritants)
+ (cond
+ (%debug-log-enabled
+ (let* ((head (bytevector-append "[cc] " (format "~a" msg)))
+ (tail (if (null? irritants)
+ (list NL-BV)
+ (let walk ((xs irritants) (sep ": ") (acc '()))
+ (if (null? xs)
+ (reverse (cons NL-BV acc))
+ (walk (cdr xs)
+ " "
+ (cons (format "~a" (car xs))
+ (cons sep acc)))))))
+ (out (bv-cat (cons head tail))))
+ (write-bv-fd 2 out)))
+ (else #t)))
+
+;; --------------------------------------------------------------------
+;; fresh-name generator (used for cg label counters, etc.)
+;; --------------------------------------------------------------------
+(define (make-namer prefix)
+ ;; Returns a thunk; each call yields prefix0, prefix1, ... as a fresh
+ ;; bv. The counter lives in the closure's lexical environment; scheme1
+ ;; closures heap-capture by reference, so set! on ctr is sticky.
+ (let ((ctr 0))
+ (lambda ()
+ (let ((s (bytevector-append prefix (number->string ctr 10))))
+ (set! ctr (+ ctr 1))
+ s))))
+;; cc/data.scm — record types and symbol alphabets shared across modules.
+;;
+;; Concrete realization of:
+;; docs/CC-INTERNALS.md §data.scm
+;; docs/CC-CONTRACTS.md §1
+;;
+;; Adding a record or alphabet symbol requires updating the contract
+;; doc first.
+
+;; --------------------------------------------------------------------
+;; loc — source location for diagnostics
+;; --------------------------------------------------------------------
+(define-record-type loc
+ (%loc file line col)
+ loc?
+ (file loc-file) ; bv
+ (line loc-line) ; fixnum
+ (col loc-col)) ; fixnum
+
+;; --------------------------------------------------------------------
+;; tok — lexer token. See CC-CONTRACTS §1.1 for kind set, §1.2 for
+;; PUNCT value symbols, §1.3 for KW value symbols.
+;; --------------------------------------------------------------------
+(define-record-type tok
+ (%tok kind value loc hide)
+ tok?
+ (kind tok-kind) ; symbol from §1.1
+ (value tok-value) ; bv | fixnum | symbol | #f
+ (loc tok-loc) ; loc
+ (hide tok-hide)) ; list of bv (macro names already expanded)
+
+(define (make-tok kind value loc)
+ (%tok kind value loc '()))
+
+;; --------------------------------------------------------------------
+;; macro — preprocessor macro definition
+;; --------------------------------------------------------------------
+(define-record-type macro
+ (%macro kind params body)
+ macro?
+ (kind macro-kind) ; 'obj | 'fn | 'fn-vararg
+ (params macro-params) ; list of bv
+ (body macro-body)) ; list of tok
+
+;; --------------------------------------------------------------------
+;; ctype — C type. See CC-CONTRACTS §1.4 for kind set, and
+;; CC-INTERNALS §data.scm for the ext payload table.
+;;
+;; Fields that mutate over a ctype's lifetime:
+;; size and align — set to -1/-1 on forward struct/union decl,
+;; fixed when the type is completed.
+;; ext — same; struct/union ext changes shape when
+;; the body is parsed.
+;; --------------------------------------------------------------------
+(define-record-type ctype
+ (%ctype kind size align ext)
+ ctype?
+ (kind ctype-kind)
+ (size ctype-size ctype-size-set!)
+ (align ctype-align ctype-align-set!)
+ (ext ctype-ext ctype-ext-set!))
+
+;; Interned primitive ctypes (CC-CONTRACTS §1.4). Equality is eq?.
+(define %t-void (%ctype 'void -1 -1 #f))
+(define %t-i8 (%ctype 'i8 1 1 #f))
+(define %t-u8 (%ctype 'u8 1 1 #f))
+(define %t-i16 (%ctype 'i16 2 2 #f))
+(define %t-u16 (%ctype 'u16 2 2 #f))
+(define %t-i32 (%ctype 'i32 4 4 #f))
+(define %t-u32 (%ctype 'u32 4 4 #f))
+(define %t-i64 (%ctype 'i64 8 8 #f))
+(define %t-u64 (%ctype 'u64 8 8 #f))
+(define %t-bool (%ctype 'bool 1 1 #f))
+
+;; --------------------------------------------------------------------
+;; sym — declared identifier (function, variable, typedef, …)
+;; See CC-CONTRACTS §1.7 (kind), §1.8 (storage).
+;; --------------------------------------------------------------------
+(define-record-type sym
+ (%sym name kind storage type slot)
+ sym?
+ (name sym-name) ; bv
+ (kind sym-kind) ; symbol from §1.7
+ (storage sym-storage) ; symbol from §1.8 or #f
+ (type sym-type) ; ctype
+ (slot sym-slot)) ; fixnum | bv | #f, per kind
+
+;; --------------------------------------------------------------------
+;; opnd — operand on cg's vstack. See CC-CONTRACTS §1.5 (kind),
+;; §1.10 (reg names).
+;; --------------------------------------------------------------------
+(define-record-type opnd
+ (%opnd kind type ext lval?)
+ opnd?
+ (kind opnd-kind)
+ (type opnd-type)
+ (ext opnd-ext)
+ (lval? opnd-lval?))
+
+;; --------------------------------------------------------------------
+;; loop-ctx — entry on parser's loop/switch context stack.
+;; See CC-CONTRACTS §1.9.
+;; --------------------------------------------------------------------
+(define-record-type loop-ctx
+ (%loop-ctx kind tag has-continue?)
+ loop-ctx?
+ (kind loop-ctx-kind)
+ (tag loop-ctx-tag)
+ (has-continue? loop-ctx-has-continue?))
+
+;; --------------------------------------------------------------------
+;; fn-ctx — current-function context inside the parser.
+;; --------------------------------------------------------------------
+(define-record-type fn-ctx
+ (%fn-ctx name return-type params variadic? labels)
+ fn-ctx?
+ (name fn-ctx-name)
+ (return-type fn-ctx-return-type)
+ (params fn-ctx-params)
+ (variadic? fn-ctx-variadic?)
+ (labels fn-ctx-labels fn-ctx-labels-set!))
+
+;; --------------------------------------------------------------------
+;; pstate — parser state. Owned by parse.scm; read-only to cg.
+;; --------------------------------------------------------------------
+(define-record-type pstate
+ (%pstate toks scope tags loops fn-ctx typedefs cg)
+ pstate?
+ (toks ps-toks ps-toks-set!)
+ (scope ps-scope ps-scope-set!)
+ (tags ps-tags ps-tags-set!)
+ (loops ps-loops ps-loops-set!)
+ (fn-ctx ps-fn-ctx ps-fn-ctx-set!)
+ (typedefs ps-typedefs ps-typedefs-set!)
+ (cg ps-cg))
+
+;; --------------------------------------------------------------------
+;; cg — codegen state. Owned by cg.scm.
+;; --------------------------------------------------------------------
+;; fn-buf and prologue-buf are pre-allocated (cg-init) and reused across
+;; functions — cg-fn-begin/v calls buf-reset! on them, cg-fn-end drains
+;; them into cg-text via buf-drain!. No per-fn allocation, which is what
+;; lets parse-fn-body wrap the body in heap-mark/heap-rewind! safely:
+;; the destination buf storage lives pre-mark, byte writes are stable
+;; across rewind, and the parse/cg scratch dies cleanly.
+;;
+;; in-fn? discriminates "currently inside a function body" so
+;; %cg-emit-buf can route emits to fn-buf during the body and cg-text
+;; outside it (entry stub, etc.).
+;; cg-globals: user-visible globals only (cg-emit-global / cg-emit-extern).
+;; Stable except when user code adds a global — which is exactly what the
+;; parse-fn-body rewind-safety check probes.
+;;
+;; cg-fn-meta: transient per-function state (fn-name, ret-slot, ret-type,
+;; vararg-first-slot, indirect-slots, switch-case lists, ...). Reset on
+;; cg-fn-begin/v; reads via %cg-fn-get / writes via %cg-fn-set!. Kept
+;; out of cg-globals so rewind-safety checks on cg-globals aren't
+;; tripped by every fn-begin.
+(define-record-type cg
+ (%cg text data bss vstack frame-hi label-ctr str-pool globals fn-meta fn-buf prologue-buf max-outgoing in-fn?)
+ cg?
+ (text cg-text)
+ (data cg-data)
+ (bss cg-bss)
+ (vstack cg-vstack cg-vstack-set!)
+ (frame-hi cg-frame-hi cg-frame-hi-set!)
+ (label-ctr cg-label-ctr cg-label-ctr-set!)
+ (str-pool cg-str-pool cg-str-pool-set!)
+ (globals cg-globals cg-globals-set!)
+ (fn-meta cg-fn-meta cg-fn-meta-set!)
+ (fn-buf cg-fn-buf)
+ (prologue-buf cg-prologue-buf)
+ (max-outgoing cg-max-outgoing cg-max-outgoing-set!)
+ (in-fn? cg-in-fn? cg-in-fn?-set!))
+
+;; --------------------------------------------------------------------
+;; Symbol alphabets — canonical alists. See CC-CONTRACTS §1.
+;; --------------------------------------------------------------------
+
+;; CC-CONTRACTS §1.3 — keyword bytevector → keyword symbol.
+(define %keyword-alist
+ '(;; storage
+ ("auto" . auto) ("register" . register) ("static" . static)
+ ("extern" . extern) ("typedef" . typedef)
+ ;; qualifiers (parsed and discarded by parse)
+ ("const" . const) ("volatile" . volatile) ("restrict" . restrict)
+ ("inline" . inline)
+ ;; type specifiers
+ ("void" . void) ("char" . char) ("short" . short)
+ ("int" . int) ("long" . long)
+ ("signed" . signed) ("unsigned" . unsigned) ("_Bool" . _Bool)
+ ;; rejected type specifiers (KW so diagnostics are crisp)
+ ("float" . float) ("double" . double)
+ ;; aggregates
+ ("struct" . struct) ("union" . union) ("enum" . enum)
+ ;; statements
+ ("if" . if) ("else" . else)
+ ("while" . while) ("do" . do) ("for" . for)
+ ("switch" . switch) ("case" . case) ("default" . default)
+ ("break" . break) ("continue" . continue)
+ ("return" . return) ("goto" . goto)
+ ;; operators
+ ("sizeof" . sizeof)
+ ;; reserved-and-rejected (KW so diagnostics are crisp)
+ ("_Generic" . _Generic) ("_Atomic" . _Atomic)
+ ("_Thread_local" . _Thread_local)
+ ("_Alignof" . _Alignof) ("_Alignas" . _Alignas)
+ ("_Static_assert" . _Static_assert)
+ ("_Complex" . _Complex) ("_Imaginary" . _Imaginary)))
+
+;; CC-CONTRACTS §1.2 — punctuator bytevector → punct symbol.
+;; Listed longest-match-first; the lexer scans this list in order.
+;; Digraphs (<: :> <% %> %: %:%:) lex to their standard equivalents.
+(define %punct-alist
+ '(;; 4-byte
+ ("%:%:" . paste)
+ ;; 3-byte
+ ("..." . ellipsis) ("<<=" . shl-eq) (">>=" . shr-eq)
+ ;; 2-byte
+ ("##" . paste) ("->" . arrow)
+ ("++" . inc) ("--" . dec)
+ ("<<" . shl) (">>" . shr)
+ ("<=" . le) (">=" . ge) ("==" . eq2) ("!=" . ne)
+ ("&&" . land) ("||" . lor)
+ ("+=" . plus-eq) ("-=" . minus-eq) ("*=" . star-eq)
+ ("/=" . slash-eq) ("%=" . pct-eq)
+ ("&=" . amp-eq) ("^=" . caret-eq) ("|=" . bar-eq)
+ ;; digraphs (mapped to the standard equivalent symbol)
+ ("<:" . lbrack) (":>" . rbrack)
+ ("<%" . lbrace) ("%>" . rbrace) ("%:" . hash)
+ ;; 1-byte
+ ("[" . lbrack) ("]" . rbrack)
+ ("(" . lparen) (")" . rparen)
+ ("{" . lbrace) ("}" . rbrace)
+ ("." . dot) ("," . comma) (";" . semi) (":" . colon) ("?" . qmark)
+ ("+" . plus) ("-" . minus) ("*" . star) ("/" . slash) ("%" . pct)
+ ("&" . amp) ("|" . bar) ("^" . caret) ("~" . tilde) ("!" . bang)
+ ("<" . lt) (">" . gt) ("=" . assign)
+ ("#" . hash)))
+;; cc/lex.scm — bytestream → token list. Pure function; no I/O,
+;; no macro awareness.
+;;
+;; Realization of docs/CC-INTERNALS.md §lex.scm. Symbol alphabets
+;; (KW, PUNCT, tok-kind) live in cc/data.scm; do not duplicate.
+;;
+;; Owner: <unassigned>
+;;
+;; Implementation notes:
+;;
+;; - The lexer walks `src` byte-by-byte, threading (pos, line, col)
+;; explicitly through every helper (no mutable state). Each token
+;; captures its starting loc; helpers return (tok npos nline ncol).
+;; - Trigraphs and `\<newline>` line splicing are handled via a single
+;; logical-byte primitive `%lex-peek`: it advances over splices and
+;; translates trigraphs in-place, so downstream code only ever sees
+;; the "translation phase 2" stream.
+;; - Comments are stripped at the same level as whitespace.
+;; - NL tokens are emitted at every physical newline so pp can use
+;; them to terminate directives.
+;;
+;; Heap discipline (per tests/scheme1/93-heap-mark-rewind.scm):
+;;
+;; - Token-producing helpers wrap their inner work in a heap-mark /
+;; heap-rewind! arena. The slots that must survive the rewind
+;; (start-loc and the integer holders for npos/nline/ncol) are bound
+;; *before* the (set! mark (heap-mark)) so the let's env extensions
+;; live below the mark. The byte-run scanners' tail-call env frames
+;; and any %lex-peek 4-lists are above the mark and get reclaimed.
+;; For helpers that produce a fresh bytevector (ident, string), the
+;; bv is allocated post-rewind so it persists into the parent arena.
+;; - Numeric digit runs accumulate their value inline via
+;; %accum-int-while; they no longer materialize a per-byte cons list
+;; and then a separate %digits-value walk.
+
+;; --------------------------------------------------------------------
+;; Byte-class predicates (raw u8 values, not chars).
+;; --------------------------------------------------------------------
+(define (%digit? b) (if (< b 48) #f (if (< 57 b) #f #t))) ; '0'..'9'
+(define (%hex? b)
+ (cond ((%digit? b) #t)
+ ((if (< b 65) #f (if (< 70 b) #f #t)) #t) ; 'A'..'F'
+ ((if (< b 97) #f (if (< 102 b) #f #t)) #t) ; 'a'..'f'
+ (else #f)))
+(define (%octal? b) (if (< b 48) #f (if (< 55 b) #f #t))) ; '0'..'7'
+(define (%alpha? b)
+ (cond ((if (< b 65) #f (if (< 90 b) #f #t)) #t) ; 'A'..'Z'
+ ((if (< b 97) #f (if (< 122 b) #f #t)) #t) ; 'a'..'z'
+ (else #f)))
+(define (%ident-start? b) (or (%alpha? b) (= b 95))) ; '_'
+(define (%ident-cont? b) (or (%ident-start? b) (%digit? b)))
+(define (%hspace? b) (or (= b 32) (= b 9) (= b 11) (= b 12))) ; SP TAB VT FF
+(define (%newline? b) (= b 10)) ; '\n'
+
+;; --------------------------------------------------------------------
+;; Logical byte access. %lex-peek returns
+;; (byte npos nline ncol)
+;; where (npos, nline, ncol) points *just past* the consumed physical
+;; bytes. On EOF it returns (#f pos line col).
+;;
+;; Two transformations folded in here:
+;;
+;; - Trigraphs: ??= ??( ??/ ??) ??' ??< ??! ??> ??-
+;; # [ \ ] ^ { | } ~
+;; The pair `??` followed by one of the nine trigraph completers
+;; produces the translated byte and advances 3 source bytes.
+;; - Line splice: a backslash immediately followed by `\n` is removed
+;; as a unit (incrementing line, resetting col to 1) and we recurse
+;; to fetch the next logical byte.
+;;
+;; Other escapes (e.g. `\<not-newline>`) are returned as-is — string and
+;; char literals do their own escape-handling.
+;; --------------------------------------------------------------------
+(define (%trigraph-byte b)
+ ;; Map the third trigraph byte to its replacement, or #f.
+ (cond ((= b 61) 35) ; '=' -> '#'
+ ((= b 40) 91) ; '(' -> '['
+ ((= b 47) 92) ; '/' -> '\\'
+ ((= b 41) 93) ; ')' -> ']'
+ ((= b 39) 94) ; '\'' -> '^'
+ ((= b 60) 123) ; '<' -> '{'
+ ((= b 33) 124) ; '!' -> '|'
+ ((= b 62) 125) ; '>' -> '}'
+ ((= b 45) 126) ; '-' -> '~'
+ (else #f)))
+
+(define (%lex-peek src pos line col)
+ (let ((n (bytevector-length src)))
+ (cond
+ ((>= pos n) (list #f pos line col))
+ (else
+ (let ((b (bytevector-u8-ref src pos)))
+ (cond
+ ;; Trigraph: ?? + completer
+ ((and (= b 63)
+ (< (+ pos 2) n)
+ (= (bytevector-u8-ref src (+ pos 1)) 63))
+ (let ((tr (%trigraph-byte (bytevector-u8-ref src (+ pos 2)))))
+ (if tr
+ (list tr (+ pos 3) line (+ col 3))
+ (list b (+ pos 1) line (+ col 1)))))
+ ;; Line splice: backslash + newline (consume both, no token)
+ ((and (= b 92)
+ (< (+ pos 1) n)
+ (= (bytevector-u8-ref src (+ pos 1)) 10))
+ (%lex-peek src (+ pos 2) (+ line 1) 1))
+ ;; Newline: pass through but caller decides line/col bump
+ ((%newline? b)
+ (list b (+ pos 1) (+ line 1) 1))
+ (else
+ (list b (+ pos 1) line (+ col 1)))))))))
+
+;; Convenience accessors over the 4-list.
+(define (%pk-byte p) (car p))
+(define (%pk-pos p) (car (cdr p)))
+(define (%pk-line p) (car (cdr (cdr p))))
+(define (%pk-col p) (car (cdr (cdr (cdr p)))))
+
+;; Fast-byte test. When (%fast-byte? b) is #t, reading b directly with
+;; bytevector-u8-ref is exactly equivalent to %lex-peek's result: the
+;; logical byte is b, npos = pos+1, nline unchanged, ncol = col+1, and
+;; no list allocation is needed. Excludes the three bytes that %lex-peek
+;; can transform: '?' (trigraph), '\\' (line splice), '\n' (line bump).
+(define (%fast-byte? b)
+ (cond ((= b 63) #f)
+ ((= b 92) #f)
+ ((= b 10) #f)
+ (else #t)))
+
+;; --------------------------------------------------------------------
+;; Whitespace + comment skipper. Returns (pos line col).
+;; Handles spaces/tabs, // line comments, /* block */ comments. Does
+;; *not* consume `\n` — newlines are tokens.
+;; --------------------------------------------------------------------
+(define (%skip-ws-and-comments src pos line col file)
+ (let ((n (bytevector-length src)))
+ (cond
+ ((>= pos n) (list pos line col))
+ (else
+ (let ((b (bytevector-u8-ref src pos)))
+ (cond
+ ((and (%fast-byte? b) (%hspace? b))
+ (%skip-ws-and-comments src (+ pos 1) line (+ col 1) file))
+ ((%fast-byte? b)
+ ;; Fast-byte that isn't hspace. Only '/' is interesting;
+ ;; everything else terminates the skip.
+ (cond
+ ((= b 47) (%maybe-comment src pos line col file))
+ (else (list pos line col))))
+ (else
+ ;; Slow path: trigraph / splice / newline.
+ (let* ((p (%lex-peek src pos line col))
+ (b2 (%pk-byte p)))
+ (cond
+ ((not b2) (list pos line col))
+ ((%hspace? b2)
+ (%skip-ws-and-comments src (%pk-pos p) (%pk-line p) (%pk-col p)
+ file))
+ ((= b2 47) (%maybe-comment src pos line col file))
+ (else (list pos line col)))))))))))
+
+(define (%maybe-comment src pos line col file)
+ ;; Source byte at pos resolves to '/'. Decide between // line comment,
+ ;; /* block comment, or "leave the slash alone" (it's a punctuator).
+ (let* ((p (%lex-peek src pos line col))
+ (q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
+ (b2 (%pk-byte q)))
+ (cond
+ ((and b2 (= b2 47))
+ (%skip-line-comment src (%pk-pos q) (%pk-line q) (%pk-col q) file))
+ ((and b2 (= b2 42))
+ (%skip-block-comment src (%pk-pos q) (%pk-line q) (%pk-col q)
+ file line col))
+ (else (list pos line col)))))
+
+(define (%skip-line-comment src pos line col file)
+ ;; Consume bytes until end-of-stream or until we *see* '\n' (do not
+ ;; consume the newline itself; outer loop emits the NL).
+ (let ((n (bytevector-length src)))
+ (cond
+ ((>= pos n) (%skip-ws-and-comments src pos line col file))
+ (else
+ (let ((b (bytevector-u8-ref src pos)))
+ (cond
+ ;; '\n' terminates without consuming.
+ ((= b 10) (%skip-ws-and-comments src pos line col file))
+ ((%fast-byte? b)
+ (%skip-line-comment src (+ pos 1) line (+ col 1) file))
+ (else
+ ;; Slow path: ?/\ — let %lex-peek handle trigraph/splice.
+ (let* ((p (%lex-peek src pos line col))
+ (b2 (%pk-byte p)))
+ (cond
+ ((not b2) (%skip-ws-and-comments src pos line col file))
+ ((%newline? b2) (%skip-ws-and-comments src pos line col file))
+ (else
+ (%skip-line-comment src (%pk-pos p) (%pk-line p) (%pk-col p)
+ file)))))))))))
+
+(define (%skip-block-comment src pos line col file start-line start-col)
+ (let ((n (bytevector-length src)))
+ (cond
+ ((>= pos n)
+ (die (%loc file start-line start-col)
+ "unterminated /* block comment"))
+ (else
+ (let ((b (bytevector-u8-ref src pos)))
+ (cond
+ ;; Fast path for plain content bytes that aren't '*'.
+ ((and (%fast-byte? b) (not (= b 42)))
+ (%skip-block-comment src (+ pos 1) line (+ col 1)
+ file start-line start-col))
+ (else
+ ;; Slow path: '*', '\n', '?' (trigraph), '\\' (splice).
+ (let* ((p (%lex-peek src pos line col))
+ (b1 (%pk-byte p)))
+ (cond
+ ((not b1)
+ (die (%loc file start-line start-col)
+ "unterminated /* block comment"))
+ ((= b1 42)
+ (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
+ (b2 (%pk-byte q)))
+ (cond
+ ((not b2)
+ (die (%loc file start-line start-col)
+ "unterminated /* block comment"))
+ ((= b2 47)
+ (%skip-ws-and-comments src (%pk-pos q) (%pk-line q) (%pk-col q)
+ file))
+ (else
+ ;; Re-scan starting at the byte after '*'; the '*' was
+ ;; not the closer, but the next byte might itself be '*'.
+ (%skip-block-comment src (%pk-pos p) (%pk-line p) (%pk-col p)
+ file start-line start-col)))))
+ (else
+ (%skip-block-comment src (%pk-pos p) (%pk-line p) (%pk-col p)
+ file start-line start-col)))))))))))
+
+;; --------------------------------------------------------------------
+;; Byte-run scanners.
+;;
+;; Tail-recursive walkers used by ident/number/string readers. None
+;; allocate per scanned byte on the fast path (only %lex-peek 4-lists
+;; on trigraph/splice/newline); the per-iteration env frames allocated
+;; by tail recursion are reclaimed by the caller's heap-rewind!.
+;;
+;; - %scan-while: count bytes that satisfy pred. (count npos nline ncol)
+;; - %fill-while-bv: write matching bytes into a pre-sized bv.
+;; - %accum-int-while: accumulate a base-N integer over digit bytes.
+;; (val count npos nline ncol)
+;; - %accum-octal-bounded: same, but stops after k digits.
+;; --------------------------------------------------------------------
+(define (%scan-while pred src pos line col)
+ (let ((n (bytevector-length src)))
+ (let loop ((pos pos) (line line) (col col) (cnt 0))
+ (cond
+ ((>= pos n) (list cnt pos line col))
+ (else
+ (let ((b (bytevector-u8-ref src pos)))
+ (cond
+ ((%fast-byte? b)
+ (if (pred b)
+ (loop (+ pos 1) line (+ col 1) (+ cnt 1))
+ (list cnt pos line col)))
+ (else
+ (let* ((p (%lex-peek src pos line col))
+ (b2 (%pk-byte p)))
+ (if (and b2 (pred b2))
+ (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ cnt 1))
+ (list cnt pos line col)))))))))))
+
+(define (%fill-while-bv pred src pos line col bv idx)
+ (let ((n (bytevector-length src)))
+ (let loop ((pos pos) (line line) (col col) (idx idx))
+ (cond
+ ((>= pos n) idx)
+ (else
+ (let ((b (bytevector-u8-ref src pos)))
+ (cond
+ ((%fast-byte? b)
+ (cond
+ ((pred b)
+ (bytevector-u8-set! bv idx b)
+ (loop (+ pos 1) line (+ col 1) (+ idx 1)))
+ (else idx)))
+ (else
+ (let* ((p (%lex-peek src pos line col))
+ (b2 (%pk-byte p)))
+ (cond
+ ((and b2 (pred b2))
+ (bytevector-u8-set! bv idx b2)
+ (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1)))
+ (else idx)))))))))))
+
+(define (%digit-val-byte b)
+ ;; ASCII digit byte → integer value. Caller guarantees b is a valid
+ ;; digit in the relevant base (0-9 / 0-7 / 0-9a-fA-F).
+ (cond ((%digit? b) (- b 48))
+ ((if (< b 65) #f (if (< 70 b) #f #t)) (+ (- b 65) 10))
+ ((if (< b 97) #f (if (< 102 b) #f #t)) (+ (- b 97) 10))
+ (else 0)))
+
+(define (%accum-int-while pred src pos line col base)
+ (let ((n (bytevector-length src)))
+ (let loop ((pos pos) (line line) (col col) (val 0) (cnt 0))
+ (cond
+ ((>= pos n) (list val cnt pos line col))
+ (else
+ (let ((b (bytevector-u8-ref src pos)))
+ (cond
+ ((%fast-byte? b)
+ (if (pred b)
+ (loop (+ pos 1) line (+ col 1)
+ (+ (* val base) (%digit-val-byte b)) (+ cnt 1))
+ (list val cnt pos line col)))
+ (else
+ (let* ((p (%lex-peek src pos line col))
+ (b2 (%pk-byte p)))
+ (if (and b2 (pred b2))
+ (loop (%pk-pos p) (%pk-line p) (%pk-col p)
+ (+ (* val base) (%digit-val-byte b2)) (+ cnt 1))
+ (list val cnt pos line col)))))))))))
+
+(define (%accum-octal-bounded src pos line col k)
+ ;; Up to k octal digits. Returns (val count npos nline ncol).
+ (let ((n (bytevector-length src)))
+ (let loop ((pos pos) (line line) (col col) (k k) (val 0) (cnt 0))
+ (cond
+ ((zero? k) (list val cnt pos line col))
+ ((>= pos n) (list val cnt pos line col))
+ (else
+ (let ((b (bytevector-u8-ref src pos)))
+ (cond
+ ((%fast-byte? b)
+ (if (%octal? b)
+ (loop (+ pos 1) line (+ col 1) (- k 1)
+ (+ (* val 8) (- b 48)) (+ cnt 1))
+ (list val cnt pos line col)))
+ (else
+ (let* ((p (%lex-peek src pos line col))
+ (b2 (%pk-byte p)))
+ (if (and b2 (%octal? b2))
+ (loop (%pk-pos p) (%pk-line p) (%pk-col p) (- k 1)
+ (+ (* val 8) (- b2 48)) (+ cnt 1))
+ (list val cnt pos line col)))))))))))
+
+;; --------------------------------------------------------------------
+;; Identifier / keyword reader.
+;;
+;; Returns (tok npos nline ncol). Caller has already verified that the
+;; first byte at `pos` satisfies %ident-start?.
+;;
+;; Two-pass with heap-mark/rewind: pass 1 (%scan-while) sizes the run,
+;; then we rewind, allocate `name` bv post-rewind so it survives, then
+;; pass 2 (%fill-while-bv) writes into it under a fresh mark. The
+;; integer slots count/npos/nline/ncol are bound *before* the mark so
+;; they survive both rewinds.
+;; --------------------------------------------------------------------
+(define (lex-read-ident src pos file)
+ ;; Public for tests. Threads line/col from a fresh start.
+ (%lex-read-ident src pos 1 (+ pos 1) file))
+
+(define (%lex-read-ident src pos line col file)
+ (let ((start-loc (%loc file line col))
+ (count 0) (npos 0) (nline 0) (ncol 0)
+ (mark 0))
+ (set! mark (heap-mark))
+ (let ((sres (%scan-while %ident-cont? src pos line col)))
+ (set! count (car sres))
+ (set! npos (car (cdr sres)))
+ (set! nline (car (cdr (cdr sres))))
+ (set! ncol (car (cdr (cdr (cdr sres))))))
+ (heap-rewind! mark)
+ (let ((name (make-bytevector count 0))
+ (mark2 0))
+ (set! mark2 (heap-mark))
+ (%fill-while-bv %ident-cont? src pos line col name 0)
+ (heap-rewind! mark2)
+ (let ((kw (alist-ref name %keyword-alist)))
+ (cons (if kw
+ (make-tok 'KW kw start-loc)
+ (make-tok 'IDENT name start-loc))
+ (list npos nline ncol))))))
+
+;; --------------------------------------------------------------------
+;; Number reader.
+;;
+;; Decimal: [1-9][0-9]* (suffix: u U l L ll LL combinations)
+;; Hex: 0x[0-9a-fA-F]+ | 0X...
+;; Octal: 0[0-7]*
+;; Float: anything looking like 1.0, 1e3, .5 → die crisply.
+;;
+;; Returns (tok npos nline ncol) on success. Aborts via `die` on float.
+;;
+;; %accum-int-while folds digit collection and value computation into
+;; one walk — no per-byte cons cells, no separate digits-list pass.
+;; --------------------------------------------------------------------
+(define (lex-read-number src pos file)
+ (%lex-read-number src pos 1 (+ pos 1) file))
+
+(define (%lex-read-number src pos line col file)
+ (let* ((start-loc (%loc file line col))
+ (p (%lex-peek src pos line col))
+ (b (%pk-byte p)))
+ (cond
+ ;; '0x' / '0X' hex prefix
+ ((and (= b 48)
+ (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
+ (b2 (%pk-byte q)))
+ (and b2 (or (= b2 120) (= b2 88))))) ; 'x' or 'X'
+ (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
+ (r (%accum-int-while %hex? src
+ (%pk-pos q) (%pk-line q) (%pk-col q) 16))
+ (val (car r))
+ (cnt (car (cdr r)))
+ (pos2 (car (cdr (cdr r))))
+ (line2 (car (cdr (cdr (cdr r)))))
+ (col2 (car (cdr (cdr (cdr (cdr r)))))))
+ (if (zero? cnt)
+ (die start-loc "expected hex digits after 0x")
+ (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file)))
+ (cons (make-tok 'INT val start-loc) after)))))
+ ;; '0' alone → octal sequence (could be just zero)
+ ((= b 48)
+ (let* ((r (%accum-int-while %octal? src
+ (%pk-pos p) (%pk-line p) (%pk-col p) 8))
+ (val (car r))
+ (pos2 (car (cdr (cdr r))))
+ (line2 (car (cdr (cdr (cdr r)))))
+ (col2 (car (cdr (cdr (cdr (cdr r)))))))
+ ;; Reject '.' / 'e' / 'E' immediately after the octal run — float.
+ (%check-no-float src pos2 line2 col2 file start-loc)
+ ;; Reject stray digits 8/9 in an octal context (e.g. 089).
+ (let* ((p3 (%lex-peek src pos2 line2 col2))
+ (b3 (%pk-byte p3)))
+ (if (and b3 (%digit? b3))
+ (die start-loc "invalid octal digit" (bv-of-byte b3))
+ (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file)))
+ (cons (make-tok 'INT val start-loc) after))))))
+ ;; '1'-'9' → decimal
+ ((%digit? b)
+ (let* ((r (%accum-int-while %digit? src pos line col 10))
+ (val (car r))
+ (pos2 (car (cdr (cdr r))))
+ (line2 (car (cdr (cdr (cdr r)))))
+ (col2 (car (cdr (cdr (cdr (cdr r)))))))
+ (%check-no-float src pos2 line2 col2 file start-loc)
+ (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file)))
+ (cons (make-tok 'INT val start-loc) after))))
+ ;; '.' followed by a digit = float-style literal — reject.
+ ((= b 46)
+ (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
+ (b2 (%pk-byte q)))
+ (if (and b2 (%digit? b2))
+ (die start-loc "floating-point literal not supported")
+ ;; Otherwise '.' was a punctuator — caller wouldn't have
+ ;; routed here unless it was a digit-led prefix.
+ (die start-loc "internal: number reader on non-number"))))
+ (else
+ (die start-loc "internal: number reader on non-number")))))
+
+(define (%check-no-float src pos line col file start-loc)
+ ;; If the byte at pos starts a fractional/exponent part, abort.
+ (let* ((p (%lex-peek src pos line col))
+ (b (%pk-byte p)))
+ (cond
+ ((not b) #t)
+ ((= b 46) ; '.'
+ (die start-loc "floating-point literal not supported"))
+ ((or (= b 101) (= b 69)) ; 'e' / 'E'
+ ;; Only a float exponent if followed by [+-]?digit.
+ (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
+ (b2 (%pk-byte q)))
+ (cond
+ ((and b2 (%digit? b2))
+ (die start-loc "floating-point literal not supported"))
+ ((and b2 (or (= b2 43) (= b2 45)))
+ (let* ((r (%lex-peek src (%pk-pos q) (%pk-line q) (%pk-col q)))
+ (b3 (%pk-byte r)))
+ (if (and b3 (%digit? b3))
+ (die start-loc "floating-point literal not supported")
+ #t)))
+ (else #t))))
+ (else #t))))
+
+(define (%lex-strip-int-suffix src pos line col file)
+ ;; Consume any combination of u U l L (the long can be doubled). We
+ ;; don't validate orderings strictly; tcc.c uses the canonical forms.
+ ;; Returns (npos nline ncol).
+ (let loop ((pos pos) (line line) (col col))
+ (let* ((p (%lex-peek src pos line col))
+ (b (%pk-byte p)))
+ (cond
+ ((not b) (list pos line col))
+ ((or (= b 117) (= b 85) ; u U
+ (= b 108) (= b 76)) ; l L
+ (loop (%pk-pos p) (%pk-line p) (%pk-col p)))
+ (else (list pos line col))))))
+
+;; --------------------------------------------------------------------
+;; Escape sequence reader.
+;;
+;; %scan-or-fill-escape decodes one escape sequence starting at `pos`
+;; (which points one past the leading `\\`). When `bv` is a bytevector,
+;; the resulting byte is written to (bv idx); when it is #f, no write
+;; occurs (used during the string-pass scan phase). Returns the 4-list
+;; (val npos nline ncol).
+;; --------------------------------------------------------------------
+(define (%scan-or-fill-escape src pos line col file start-loc bv idx)
+ (let* ((p (%lex-peek src pos line col))
+ (b (%pk-byte p)))
+ (cond
+ ((not b) (die start-loc "unterminated escape sequence"))
+ ;; \xNN — 1+ hex digits (tcc.c uses 1- and 2-digit forms).
+ ((or (= b 120) (= b 88)) ; 'x' / 'X'
+ (let* ((r (%accum-int-while %hex? src
+ (%pk-pos p) (%pk-line p) (%pk-col p) 16))
+ (val0 (car r))
+ (cnt (car (cdr r)))
+ (pos2 (car (cdr (cdr r))))
+ (line2 (car (cdr (cdr (cdr r)))))
+ (col2 (car (cdr (cdr (cdr (cdr r)))))))
+ (cond
+ ((zero? cnt) (die start-loc "expected hex digits after \\x"))
+ (else
+ (let ((val (bit-and val0 255)))
+ (cond (bv (bytevector-u8-set! bv idx val))
+ (else #f))
+ (list val pos2 line2 col2))))))
+ ;; \NNN — 1..3 octal digits.
+ ((%octal? b)
+ (let* ((r (%accum-octal-bounded src pos line col 3))
+ (val0 (car r))
+ (pos2 (car (cdr (cdr r))))
+ (line2 (car (cdr (cdr (cdr r)))))
+ (col2 (car (cdr (cdr (cdr (cdr r))))))
+ (val (bit-and val0 255)))
+ (cond (bv (bytevector-u8-set! bv idx val))
+ (else #f))
+ (list val pos2 line2 col2)))
+ (else
+ (let ((val (cond ((= b 110) 10) ; n
+ ((= b 116) 9) ; t
+ ((= b 114) 13) ; r
+ ((= b 92) 92) ; \\
+ ((= b 39) 39) ; '
+ ((= b 34) 34) ; "
+ ((= b 48) 0) ; 0 (already handled by octal but be safe)
+ ((= b 97) 7) ; \a -> BEL
+ ((= b 98) 8) ; \b
+ ((= b 102) 12) ; \f
+ ((= b 118) 11) ; \v
+ ((= b 63) 63) ; \?
+ (else
+ (die start-loc "unknown escape" (bv-of-byte b))))))
+ (cond (bv (bytevector-u8-set! bv idx val))
+ (else #f))
+ (list val (%pk-pos p) (%pk-line p) (%pk-col p)))))))
+
+;; --------------------------------------------------------------------
+;; String reader.
+;;
+;; Caller has verified src[pos] == '"' (raw byte 34). Returns
+;; (tok npos nline ncol) with the raw decoded bytes (no NUL appended).
+;;
+;; Two-pass: %string-pass with bv=#f counts effective bytes (escapes
+;; collapse to 1 byte each); after rewind we allocate the final bv and
+;; rerun with bv set so the bytes are written directly into it.
+;; --------------------------------------------------------------------
+(define (lex-read-string src pos file)
+ (%lex-read-string src pos 1 (+ pos 1) file))
+
+(define (%lex-read-string src pos line col file)
+ (let ((start-loc (%loc file line col))
+ (cnt 0) (npos 0) (nline 0) (ncol 0)
+ (mark 0))
+ ;; '"' (34) is a fast-byte and never a trigraph result, so the
+ ;; physical byte at `pos` is exactly the opening quote.
+ (cond
+ ((or (>= pos (bytevector-length src))
+ (not (= (bytevector-u8-ref src pos) 34)))
+ (die start-loc "internal: string reader on non-quote"))
+ (else
+ (set! mark (heap-mark))
+ (let ((sres (%string-pass src (+ pos 1) line (+ col 1)
+ file start-loc #f)))
+ (set! cnt (car sres))
+ (set! npos (car (cdr sres)))
+ (set! nline (car (cdr (cdr sres))))
+ (set! ncol (car (cdr (cdr (cdr sres))))))
+ (heap-rewind! mark)
+ (let ((bv (make-bytevector cnt 0))
+ (mark2 0))
+ (set! mark2 (heap-mark))
+ (%string-pass src (+ pos 1) line (+ col 1) file start-loc bv)
+ (heap-rewind! mark2)
+ (cons (make-tok 'STR bv start-loc)
+ (list npos nline ncol)))))))
+
+(define (%string-pass src pos line col file start-loc bv)
+ ;; Walk the string body (after opening "). When `bv` is #f, count
+ ;; effective bytes; when it is a bytevector, write bytes into it at
+ ;; index 0..count-1. Returns (count npos nline ncol).
+ (let ((n (bytevector-length src)))
+ (let loop ((pos pos) (line line) (col col) (idx 0))
+ (cond
+ ((>= pos n) (die start-loc "unterminated string literal"))
+ (else
+ (let ((b (bytevector-u8-ref src pos)))
+ (cond
+ ;; Closing quote — fast byte but special.
+ ((= b 34)
+ (list idx (+ pos 1) line (+ col 1)))
+ ((%fast-byte? b)
+ (cond (bv (bytevector-u8-set! bv idx b))
+ (else #f))
+ (loop (+ pos 1) line (+ col 1) (+ idx 1)))
+ (else
+ ;; Slow path: ?/\ (trigraph/splice/escape) or '\n'.
+ (let* ((p (%lex-peek src pos line col))
+ (b2 (%pk-byte p)))
+ (cond
+ ((not b2)
+ (die start-loc "unterminated string literal"))
+ ((= b2 34)
+ (list idx (%pk-pos p) (%pk-line p) (%pk-col p)))
+ ((%newline? b2)
+ (die start-loc "newline in string literal"))
+ ((= b2 92)
+ (let* ((er (%scan-or-fill-escape
+ src (%pk-pos p) (%pk-line p) (%pk-col p)
+ file start-loc bv idx))
+ (epos (car (cdr er)))
+ (eline (car (cdr (cdr er))))
+ (ecol (car (cdr (cdr (cdr er))))))
+ (loop epos eline ecol (+ idx 1))))
+ (else
+ (cond (bv (bytevector-u8-set! bv idx b2))
+ (else #f))
+ (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1)))))))))))))
+
+;; --------------------------------------------------------------------
+;; Char reader.
+;;
+;; Caller has verified src[pos] == '\''. Multi-character constants
+;; ('AB') are rejected via die.
+;; --------------------------------------------------------------------
+(define (lex-read-char src pos file)
+ (%lex-read-char src pos 1 (+ pos 1) file))
+
+(define (%lex-read-char src pos line col file)
+ (let* ((start-loc (%loc file line col))
+ (p0 (%lex-peek src pos line col))
+ (b0 (%pk-byte p0)))
+ (if (not (and b0 (= b0 39)))
+ (die start-loc "internal: char reader on non-quote")
+ (%collect-char src (%pk-pos p0) (%pk-line p0) (%pk-col p0)
+ file start-loc))))
+
+(define (%collect-char src pos line col file start-loc)
+ ;; Read exactly one byte (handling escapes), then expect closing '\''.
+ (let* ((p (%lex-peek src pos line col))
+ (b (%pk-byte p)))
+ (cond
+ ((not b) (die start-loc "unterminated char literal"))
+ ((= b 39) (die start-loc "empty char literal"))
+ ((%newline? b) (die start-loc "newline in char literal"))
+ ((= b 92) ; escape
+ (let* ((r (%scan-or-fill-escape src
+ (%pk-pos p) (%pk-line p) (%pk-col p)
+ file start-loc #f 0))
+ (val (car r))
+ (pos2 (car (cdr r)))
+ (line2 (car (cdr (cdr r))))
+ (col2 (car (cdr (cdr (cdr r))))))
+ (%expect-char-close src pos2 line2 col2 file start-loc val)))
+ (else
+ (%expect-char-close src (%pk-pos p) (%pk-line p) (%pk-col p)
+ file start-loc b)))))
+
+(define (%expect-char-close src pos line col file start-loc val)
+ (let* ((p (%lex-peek src pos line col))
+ (b (%pk-byte p)))
+ (cond
+ ((not b) (die start-loc "unterminated char literal"))
+ ((= b 39)
+ (cons (make-tok 'CHAR val start-loc)
+ (list (%pk-pos p) (%pk-line p) (%pk-col p))))
+ (else
+ (die start-loc "multi-character char constant not supported")))))
+
+;; --------------------------------------------------------------------
+;; Punctuator reader.
+;;
+;; Greedy longest-match against %punct-alist (cc/data.scm). The alist
+;; is already ordered longest-first. We additionally bucket entries by
+;; their first byte so %lex-read-punct only loops over the small set of
+;; patterns that can start at the current source byte.
+;; --------------------------------------------------------------------
+
+(define (%alist-ref-int k al)
+ ;; Lookup in an int-keyed alist (linear scan, '= compare).
+ (cond ((null? al) #f)
+ ((= (car (car al)) k) (cdr (car al)))
+ (else (%alist-ref-int k (cdr al)))))
+
+(define (%mem-int? k xs)
+ (cond ((null? xs) #f)
+ ((= (car xs) k) #t)
+ (else (%mem-int? k (cdr xs)))))
+
+(define (%filter-by-first-byte b al)
+ ;; Subset of `al` whose pattern starts with byte b, preserving order.
+ (cond
+ ((null? al) '())
+ ((= (bytevector-u8-ref (car (car al)) 0) b)
+ (cons (car al) (%filter-by-first-byte b (cdr al))))
+ (else (%filter-by-first-byte b (cdr al)))))
+
+(define (%group-by-first-byte al)
+ ;; Build ((first-byte . sub-alist) ...) over `al`, one bucket per
+ ;; distinct first byte; sub-alist preserves longest-match-first
+ ;; order from the source list.
+ (let loop ((xs al) (seen '()) (out '()))
+ (cond
+ ((null? xs) (reverse out))
+ (else
+ (let* ((entry (car xs))
+ (pat (car entry))
+ (b (bytevector-u8-ref pat 0)))
+ (cond
+ ((%mem-int? b seen) (loop (cdr xs) seen out))
+ (else
+ (loop (cdr xs)
+ (cons b seen)
+ (cons (cons b (%filter-by-first-byte b al)) out)))))))))
+
+(define %punct-buckets (%group-by-first-byte %punct-alist))
+
+(define (lex-read-punct src pos file)
+ (%lex-read-punct src pos 1 (+ pos 1) file))
+
+(define (%lex-read-punct src pos line col file)
+ (let* ((start-loc (%loc file line col))
+ (p (%lex-peek src pos line col))
+ (b (%pk-byte p)))
+ (cond
+ ((not b) (die start-loc "unrecognized byte" "EOF"))
+ (else
+ (let ((bucket (%alist-ref-int b %punct-buckets)))
+ (cond
+ ((not bucket) (die start-loc "unrecognized byte" (bv-of-byte b)))
+ (else (%punct-loop src pos line col file start-loc bucket))))))))
+
+(define (%punct-loop src pos line col file start-loc al)
+ (cond
+ ((null? al)
+ (let* ((p (%lex-peek src pos line col)))
+ (die start-loc "unrecognized byte"
+ (if (%pk-byte p) (bv-of-byte (%pk-byte p)) "EOF"))))
+ (else
+ (let* ((entry (car al))
+ (pat (car entry))
+ (sym (cdr entry))
+ (m (%match-bytes src pos line col pat 0)))
+ (if m
+ (cons (make-tok 'PUNCT sym start-loc) m)
+ (%punct-loop src pos line col file start-loc (cdr al)))))))
+
+(define (%match-bytes src pos line col pat i)
+ ;; If the next bytes from (pos line col), in logical-byte stream
+ ;; order, equal `pat[i..]`, return (npos nline ncol) after the
+ ;; match. Otherwise #f.
+ (cond
+ ((= i (bytevector-length pat)) (list pos line col))
+ (else
+ (let ((n (bytevector-length src)))
+ (cond
+ ((>= pos n) #f)
+ (else
+ (let ((b (bytevector-u8-ref src pos))
+ (pb (bytevector-u8-ref pat i)))
+ (cond
+ ((%fast-byte? b)
+ (if (= b pb)
+ (%match-bytes src (+ pos 1) line (+ col 1) pat (+ i 1))
+ #f))
+ (else
+ (let* ((p (%lex-peek src pos line col))
+ (b2 (%pk-byte p)))
+ (cond
+ ((not b2) #f)
+ ((= b2 pb)
+ (%match-bytes src (%pk-pos p) (%pk-line p) (%pk-col p)
+ pat (+ i 1)))
+ (else #f))))))))))))
+
+;; --------------------------------------------------------------------
+;; lex-tokenize src file -> list of tok ending in EOF.
+;; --------------------------------------------------------------------
+(define (lex-tokenize src file)
+ (%lex-loop src 0 1 1 file '() #t))
+
+;; bol? — `#t` when no token has been emitted on the current physical
+;; line yet (start of file, or only NL + whitespace seen since the last
+;; line break). pp recognizes a directive only when its leading `#` is
+;; at line-start; we forward that decision into the token stream by
+;; emitting `HASH` instead of `(PUNCT hash …)` for a line-leading `#`.
+(define (%lex-loop src pos line col file acc bol?)
+ (let* ((sw (%skip-ws-and-comments src pos line col file))
+ (pos1 (car sw))
+ (line1 (car (cdr sw)))
+ (col1 (car (cdr (cdr sw))))
+ (p (%lex-peek src pos1 line1 col1))
+ (b (%pk-byte p)))
+ (cond
+ ;; EOF
+ ((not b)
+ (let* ((eof-tok (make-tok 'EOF #f (%loc file line1 col1))))
+ (reverse (cons eof-tok acc))))
+ ;; Newline → emit NL, reset bol?.
+ ((%newline? b)
+ (let ((nl (make-tok 'NL #f (%loc file line1 col1))))
+ (%lex-loop src (%pk-pos p) (%pk-line p) (%pk-col p)
+ file (cons nl acc) #t)))
+ ;; Line-leading `#` → emit HASH, but only the bare `#`. `##` is
+ ;; never line-leading in valid C; if it appears, fall through to
+ ;; normal punctuator handling so it lexes as `paste`.
+ ((and bol? (= b 35))
+ (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
+ (b2 (%pk-byte q)))
+ (cond
+ ((and b2 (= b2 35))
+ (let* ((r (%lex-read-punct src pos1 line1 col1 file))
+ (tok (car r))
+ (npos (car (cdr r)))
+ (nline (car (cdr (cdr r))))
+ (ncol (car (cdr (cdr (cdr r))))))
+ (%lex-loop src npos nline ncol file (cons tok acc) #f)))
+ (else
+ (let ((tok (make-tok 'HASH #f (%loc file line1 col1))))
+ (%lex-loop src (%pk-pos p) (%pk-line p) (%pk-col p)
+ file (cons tok acc) #f))))))
+ ;; Identifier / keyword
+ ((%ident-start? b)
+ (let* ((r (%lex-read-ident src pos1 line1 col1 file))
+ (tok (car r))
+ (npos (car (cdr r)))
+ (nline (car (cdr (cdr r))))
+ (ncol (car (cdr (cdr (cdr r))))))
+ (%lex-loop src npos nline ncol file (cons tok acc) #f)))
+ ;; Number (digit start)
+ ((%digit? b)
+ (let* ((r (%lex-read-number src pos1 line1 col1 file))
+ (tok (car r))
+ (npos (car (cdr r)))
+ (nline (car (cdr (cdr r))))
+ (ncol (car (cdr (cdr (cdr r))))))
+ (%lex-loop src npos nline ncol file (cons tok acc) #f)))
+ ;; '.' might start a number (1.0 actually starts with digit; .5
+ ;; would route here). We keep this as a punctuator unless followed
+ ;; by a digit, in which case the lexer rejects per spec.
+ ((= b 46)
+ (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
+ (b2 (%pk-byte q)))
+ (cond
+ ((and b2 (%digit? b2))
+ (die (%loc file line1 col1) "floating-point literal not supported"))
+ (else
+ (let* ((r (%lex-read-punct src pos1 line1 col1 file))
+ (tok (car r))
+ (npos (car (cdr r)))
+ (nline (car (cdr (cdr r))))
+ (ncol (car (cdr (cdr (cdr r))))))
+ (%lex-loop src npos nline ncol file (cons tok acc) #f))))))
+ ;; String
+ ((= b 34)
+ (let* ((r (%lex-read-string src pos1 line1 col1 file))
+ (tok (car r))
+ (npos (car (cdr r)))
+ (nline (car (cdr (cdr r))))
+ (ncol (car (cdr (cdr (cdr r))))))
+ (%lex-loop src npos nline ncol file (cons tok acc) #f)))
+ ;; Char
+ ((= b 39)
+ (let* ((r (%lex-read-char src pos1 line1 col1 file))
+ (tok (car r))
+ (npos (car (cdr r)))
+ (nline (car (cdr (cdr r))))
+ (ncol (car (cdr (cdr (cdr r))))))
+ (%lex-loop src npos nline ncol file (cons tok acc) #f)))
+ ;; Punctuator (default)
+ (else
+ (let* ((r (%lex-read-punct src pos1 line1 col1 file))
+ (tok (car r))
+ (npos (car (cdr r)))
+ (nline (car (cdr (cdr r))))
+ (ncol (car (cdr (cdr (cdr r))))))
+ (%lex-loop src npos nline ncol file (cons tok acc) #f))))))
+;; cc/pp.scm — token list -> expanded token list.
+;; Realizes docs/CC-INTERNALS.md §pp.scm. Hide-set per C11 6.10.3.4.
+;; #include rejected (CC.md §Toolchain envelope).
+
+;; --- helpers (TODO: promote to util.scm if shared more broadly) ---
+(define (%pp-bv-mem? x xs)
+ (cond ((null? xs) #f)
+ ((bv= x (car xs)) #t)
+ (else (%pp-bv-mem? x (cdr xs)))))
+
+(define (%pp-bv-union a b)
+ (cond ((null? a) b)
+ ((%pp-bv-mem? (car a) b) (%pp-bv-union (cdr a) b))
+ (else (cons (car a) (%pp-bv-union (cdr a) b)))))
+
+(define (%pp-with-hide t hide)
+ (%tok (tok-kind t) (tok-value t) (tok-loc t) hide))
+(define (%pp-with-loc t loc)
+ (%tok (tok-kind t) (tok-value t) loc (tok-hide t)))
+
+;; --- pp-state (private record) ---
+;; cond-stack: list of (active? . has-taken?). Outer-active gating is
+;; computed by walking the stack rather than encoding it in frames.
+(define-record-type pp-state
+ (%pp-state macros cond-stack cur-file line-delta)
+ pp-state?
+ (macros pps-macros pps-macros-set!)
+ (cond-stack pps-cond-stack pps-cond-stack-set!)
+ (cur-file pps-cur-file pps-cur-file-set!)
+ (line-delta pps-line-delta pps-line-delta-set!))
+
+(define (%pp-make-state defs) (%pp-state defs '() #f 0))
+
+(define (%pp-active? state)
+ (let loop ((xs (pps-cond-stack state)))
+ (cond ((null? xs) #t)
+ ((not (car (car xs))) #f)
+ (else (loop (cdr xs))))))
+
+;; Active for the *parent* of the top frame (used by elif/else).
+(define (%pp-parent-active? state)
+ (let ((cs (pps-cond-stack state)))
+ (cond ((null? cs) #t)
+ (else
+ (let loop ((xs (cdr cs)))
+ (cond ((null? xs) #t)
+ ((not (car (car xs))) #f)
+ (else (loop (cdr xs)))))))))
+
+;; --- token classification ---
+(define (%pp-eof? t) (eq? (tok-kind t) 'EOF))
+(define (%pp-nl? t) (eq? (tok-kind t) 'NL))
+(define (%pp-hash? t) (eq? (tok-kind t) 'HASH))
+(define (%pp-ident? t) (eq? (tok-kind t) 'IDENT))
+(define (%pp-int? t) (eq? (tok-kind t) 'INT))
+(define (%pp-punct? t pname)
+ (and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) pname)))
+(define (%pp-ident-name? t name-bv)
+ (and (%pp-ident? t) (bv= (tok-value t) name-bv)))
+(define (%pp-skip-ws toks) toks)
+
+;; --- built-in macro names ---
+(define %pp-bv-FILE "__FILE__")
+(define %pp-bv-LINE "__LINE__")
+(define %pp-bv-STDC "__STDC__")
+(define %pp-bv-LISPCC "__LISPCC__")
+(define %pp-bv-DATE "__DATE__")
+(define %pp-bv-TIME "__TIME__")
+(define %pp-bv-STDC-VERSION "__STDC_VERSION__")
+(define %pp-bv-STDC-HOSTED "__STDC_HOSTED__")
+(define %pp-bv-VA-ARGS "__VA_ARGS__")
+(define %pp-bv-defined "defined")
+
+;; Fixed values for reproducibility — we don't read the wall clock.
+(define %pp-bv-DATE-VALUE "Jan 1 1970")
+(define %pp-bv-TIME-VALUE "00:00:00")
+
+(define (%pp-builtin? name)
+ (or (bv= name %pp-bv-FILE) (bv= name %pp-bv-LINE)
+ (bv= name %pp-bv-STDC) (bv= name %pp-bv-LISPCC)
+ (bv= name %pp-bv-DATE) (bv= name %pp-bv-TIME)
+ (bv= name %pp-bv-STDC-VERSION) (bv= name %pp-bv-STDC-HOSTED)))
+
+(define (%pp-expand-builtin name loc state)
+ (let* ((file (or (pps-cur-file state) (loc-file loc)))
+ (line (+ (loc-line loc) (pps-line-delta state)))
+ (col (loc-col loc))
+ (here (%loc file line col)))
+ (cond
+ ((bv= name %pp-bv-FILE) (list (%tok 'STR file here '())))
+ ((bv= name %pp-bv-LINE) (list (%tok 'INT line here '())))
+ ((bv= name %pp-bv-STDC) (list (%tok 'INT 1 here '())))
+ ((bv= name %pp-bv-LISPCC) (list (%tok 'INT 1 here '())))
+ ((bv= name %pp-bv-DATE) (list (%tok 'STR %pp-bv-DATE-VALUE here '())))
+ ((bv= name %pp-bv-TIME) (list (%tok 'STR %pp-bv-TIME-VALUE here '())))
+ ((bv= name %pp-bv-STDC-VERSION) (list (%tok 'INT 199901 here '())))
+ ((bv= name %pp-bv-STDC-HOSTED) (list (%tok 'INT 1 here '())))
+ (else (die loc "internal: not a builtin" name)))))
+
+;; --- buf-list: simple reversed-list buffer of toks ---
+(define-record-type buf-list
+ (%buf-list xs)
+ buf-list?
+ (xs buf-list-xs buf-list-xs-set!))
+(define (make-buf-list) (%buf-list '()))
+(define (buf-list-push! b t) (buf-list-xs-set! b (cons t (buf-list-xs b))))
+(define (buf-list-push-many! b ts)
+ (let loop ((ts ts))
+ (cond ((null? ts) #t)
+ (else (buf-list-push! b (car ts)) (loop (cdr ts))))))
+(define (buf-list-flush b) (reverse (buf-list-xs b)))
+
+;; Translation phase 6: concatenate adjacent string literals. The merged
+;; token keeps the loc and hide-set of the first; values are byte-appended.
+(define (%pp-merge-adjacent-strs toks)
+ (let loop ((toks toks) (acc '()))
+ (cond
+ ((null? toks) (reverse acc))
+ ((and (not (null? acc))
+ (eq? (tok-kind (car toks)) 'STR)
+ (eq? (tok-kind (car acc)) 'STR))
+ (let* ((prev (car acc))
+ (cur (car toks))
+ (merged (%tok 'STR
+ (bytevector-append (tok-value prev) (tok-value cur))
+ (tok-loc prev)
+ (tok-hide prev))))
+ (loop (cdr toks) (cons merged (cdr acc)))))
+ (else (loop (cdr toks) (cons (car toks) acc))))))
+
+;; --- pp-expand: top-level driver ---
+(define (pp-expand toks initial-defines)
+ (let ((state (%pp-make-state initial-defines))
+ (out (make-buf-list)))
+ (let loop ((toks toks))
+ (cond
+ ((null? toks) (die #f "pp-expand: missing EOF token"))
+ ((%pp-eof? (car toks))
+ (cond ((not (null? (pps-cond-stack state)))
+ (die (tok-loc (car toks)) "unterminated #if/#ifdef/#ifndef"))
+ (else
+ (buf-list-push! out (car toks))
+ (%pp-merge-adjacent-strs (buf-list-flush out)))))
+ ((%pp-nl? (car toks)) (loop (cdr toks)))
+ ((%pp-hash? (car toks))
+ (let* ((lr (%pp-take-line (cdr toks)))
+ (line (car lr)) (rest (cdr lr)))
+ (%pp-dispatch-directive (car toks) line state out)
+ (loop rest)))
+ (else
+ (let* ((lr (%pp-take-line toks))
+ (line (car lr)) (rest (cdr lr)))
+ (cond ((%pp-active? state)
+ (%pp-emit-expanded line state out))
+ (else #t))
+ (loop rest)))))))
+
+;; Take tokens up to (not including) the next NL or EOF. NL is consumed;
+;; EOF is left in the stream so the driver sees it next.
+(define (%pp-take-line toks)
+ (let loop ((toks toks) (acc '()))
+ (cond
+ ((null? toks) (cons (reverse acc) toks))
+ ((%pp-eof? (car toks)) (cons (reverse acc) toks))
+ ((%pp-nl? (car toks)) (cons (reverse acc) (cdr toks)))
+ (else (loop (cdr toks) (cons (car toks) acc))))))
+
+;; --- directive dispatch ---
+;; pmatch-based on the directive name bv. bv literals match by equal?.
+;; Directive name can arrive as IDENT (most cases) or KW (`if` and `else`
+;; are C keywords promoted by lex; their KW symbol values map back to bv
+;; via symbol->string).
+(define (%pp-directive-name t)
+ (cond ((eq? (tok-kind t) 'IDENT) (tok-value t))
+ ((eq? (tok-kind t) 'KW) (symbol->string (tok-value t)))
+ (else #f)))
+
+(define (%pp-dispatch-directive hash-tok line state out)
+ (let ((line (%pp-skip-ws line)))
+ (cond
+ ((null? line) #t) ; bare `#` line — null directive
+ ((%pp-directive-name (car line))
+ (let ((name (%pp-directive-name (car line)))
+ (rest (cdr line))
+ (loc (tok-loc (car line))))
+ (pmatch name
+ ("define" (cond ((%pp-active? state) (%pp-do-define rest state)) (else #t)))
+ ("undef" (cond ((%pp-active? state) (%pp-do-undef rest state)) (else #t)))
+ ("if" (%pp-do-if rest state))
+ ("ifdef" (%pp-do-ifdef rest state))
+ ("ifndef" (%pp-do-ifndef rest state))
+ ("elif" (%pp-do-elif rest state))
+ ("else" (%pp-do-else rest state))
+ ("endif" (%pp-do-endif rest state))
+ ("error" (cond ((%pp-active? state)
+ (%pp-do-error (cons (car line) rest) state))
+ (else #t)))
+ ("line" (cond ((%pp-active? state) (%pp-do-line rest state)) (else #t)))
+ ("pragma" (cond ((%pp-active? state) (%pp-do-pragma rest state)) (else #t)))
+ ("include" (cond ((%pp-active? state) (%pp-do-include rest state)) (else #t)))
+ (else (die loc "unknown preprocessor directive" name)))))
+ (else
+ (die (tok-loc (car line)) "expected directive name after '#'"
+ (tok-kind (car line)))))))
+
+;; --- #define ---
+;; function-like vs object-like is decided by an immediately-adjacent `(`.
+;; "Adjacent" = column of `(` equals column of name + length of name.
+(define (%pp-do-define line state)
+ (cond
+ ((null? line) (die #f "#define requires a macro name"))
+ ((not (%pp-ident? (car line)))
+ (die (tok-loc (car line)) "#define: expected identifier"))
+ (else
+ (let* ((nt (car line)) (name (tok-value nt)) (rest (cdr line)))
+ (cond
+ ((and (not (null? rest))
+ (%pp-punct? (car rest) 'lparen)
+ (= (loc-col (tok-loc (car rest)))
+ (+ (loc-col (tok-loc nt))
+ (bytevector-length name))))
+ (%pp-define-fn name (cdr rest) (tok-loc nt) state))
+ (else
+ (let ((m (%macro 'obj '() rest)))
+ (pps-macros-set! state (alist-set name m (pps-macros state))))))))))
+
+(define (%pp-define-fn name post-lparen nloc state)
+ (let loop ((toks post-lparen) (params '()) (variadic? #f))
+ (cond
+ ((null? toks) (die nloc "#define: unterminated parameter list"))
+ ((%pp-punct? (car toks) 'rparen)
+ (let* ((body (cdr toks))
+ (kind (if variadic? 'fn-vararg 'fn))
+ (m (%macro kind (reverse params) body)))
+ (pps-macros-set! state (alist-set name m (pps-macros state)))))
+ ((%pp-punct? (car toks) 'ellipsis)
+ (let ((rest (cdr toks)))
+ (cond
+ ((null? rest) (die (tok-loc (car toks)) "#define: '...' must precede ')'"))
+ ((%pp-punct? (car rest) 'rparen) (loop rest params #t))
+ (else (die (tok-loc (car rest)) "#define: garbage after '...'")))))
+ ((null? params)
+ (cond
+ ((%pp-ident? (car toks))
+ (loop (cdr toks) (cons (tok-value (car toks)) params) #f))
+ (else (die (tok-loc (car toks)) "#define: expected parameter name"))))
+ (else
+ (cond
+ ((%pp-punct? (car toks) 'comma)
+ (let ((after (cdr toks)))
+ (cond
+ ((null? after) (die (tok-loc (car toks)) "#define: trailing ','"))
+ ((%pp-punct? (car after) 'ellipsis)
+ (let ((aa (cdr after)))
+ (cond
+ ((and (not (null? aa)) (%pp-punct? (car aa) 'rparen))
+ (loop aa params #t))
+ (else (die (tok-loc (car after))
+ "#define: '...' must precede ')'")))))
+ ((%pp-ident? (car after))
+ (loop (cdr after) (cons (tok-value (car after)) params) #f))
+ (else
+ (die (tok-loc (car after))
+ "#define: expected parameter name after ','")))))
+ (else (die (tok-loc (car toks))
+ "#define: expected ',' or ')' in parameter list")))))))
+
+;; --- #undef ---
+(define (%pp-do-undef line state)
+ (cond
+ ((null? line) (die #f "#undef requires a macro name"))
+ ((not (%pp-ident? (car line)))
+ (die (tok-loc (car line)) "#undef: expected identifier"))
+ (else
+ (pps-macros-set! state
+ (%pp-alist-drop (tok-value (car line)) (pps-macros state))))))
+
+(define (%pp-alist-drop key al)
+ (cond ((null? al) '())
+ ((bv= (car (car al)) key) (cdr al))
+ (else (cons (car al) (%pp-alist-drop key (cdr al))))))
+
+;; --- #if / #ifdef / #ifndef / #elif / #else / #endif ---
+(define (%pp-do-if line state)
+ (cond
+ ((not (%pp-active? state))
+ (pps-cond-stack-set! state (cons (cons #f #f) (pps-cond-stack state))))
+ (else
+ (let* ((v (pp-eval-cexpr line (pps-macros state)))
+ (a? (not (= v 0))))
+ (pps-cond-stack-set! state (cons (cons a? a?) (pps-cond-stack state)))))))
+
+(define (%pp-do-ifdef line state)
+ (cond
+ ((not (%pp-active? state))
+ (pps-cond-stack-set! state (cons (cons #f #f) (pps-cond-stack state))))
+ (else
+ (let ((d? (%pp-defined? (%pp-name-of-single line) state)))
+ (pps-cond-stack-set! state
+ (cons (cons d? d?) (pps-cond-stack state)))))))
+
+(define (%pp-do-ifndef line state)
+ (cond
+ ((not (%pp-active? state))
+ (pps-cond-stack-set! state (cons (cons #f #f) (pps-cond-stack state))))
+ (else
+ (let ((a? (not (%pp-defined? (%pp-name-of-single line) state))))
+ (pps-cond-stack-set! state
+ (cons (cons a? a?) (pps-cond-stack state)))))))
+
+(define (%pp-name-of-single line)
+ (cond
+ ((null? line) (die #f "#ifdef/#ifndef: missing identifier"))
+ ((not (%pp-ident? (car line)))
+ (die (tok-loc (car line)) "#ifdef/#ifndef: expected identifier"))
+ (else (tok-value (car line)))))
+
+(define (%pp-defined? name state)
+ (or (alist-ref name (pps-macros state))
+ (%pp-builtin? name)
+ #f))
+
+(define (%pp-do-elif line state)
+ (let ((cs (pps-cond-stack state)))
+ (cond
+ ((null? cs) (die #f "#elif outside #if"))
+ (else
+ (let* ((top (car cs)) (rest (cdr cs))
+ (taken? (cdr top))
+ (par? (%pp-parent-active? state)))
+ (cond
+ ((or (not par?) taken?)
+ (pps-cond-stack-set! state (cons (cons #f taken?) rest)))
+ (else
+ (let* ((v (pp-eval-cexpr line (pps-macros state)))
+ (a? (not (= v 0))))
+ (pps-cond-stack-set! state
+ (cons (cons a? (or a? taken?)) rest))))))))))
+
+(define (%pp-do-else line state)
+ (let ((cs (pps-cond-stack state)))
+ (cond
+ ((null? cs) (die #f "#else outside #if"))
+ (else
+ (let* ((top (car cs)) (rest (cdr cs))
+ (taken? (cdr top))
+ (par? (%pp-parent-active? state)))
+ (cond
+ ((not par?)
+ (pps-cond-stack-set! state (cons (cons #f taken?) rest)))
+ (taken?
+ (pps-cond-stack-set! state (cons (cons #f #t) rest)))
+ (else
+ (pps-cond-stack-set! state (cons (cons #t #t) rest)))))))))
+
+(define (%pp-do-endif line state)
+ (let ((cs (pps-cond-stack state)))
+ (cond ((null? cs) (die #f "#endif outside #if"))
+ (else (pps-cond-stack-set! state (cdr cs))))))
+
+;; --- #error ---
+;; line[0] is the directive name "error"; the rest is the user message.
+(define (%pp-do-error line state)
+ (let* ((msg-toks (if (null? line) '() (cdr line)))
+ (loc (if (null? line) #f (tok-loc (car line))))
+ (msg (%pp-toks->display msg-toks)))
+ (die loc "#error" msg)))
+
+(define (%pp-toks->display toks)
+ (let loop ((toks toks) (acc '()) (first? #t))
+ (cond
+ ((null? toks) (bv-cat (reverse acc)))
+ (else
+ (let ((p (%pp-tok->bv (car toks))))
+ (loop (cdr toks)
+ (if first? (cons p acc) (cons p (cons " " acc)))
+ #f))))))
+
+(define (%pp-tok->bv t)
+ (let ((k (tok-kind t)) (v (tok-value t)))
+ (cond
+ ((eq? k 'IDENT) v)
+ ((eq? k 'INT) (fixnum->bv v 10))
+ ((eq? k 'STR) (%pp-quote-bytes v 34))
+ ((eq? k 'CHAR) (%pp-quote-bytes (bv-of-byte v) 39))
+ ((eq? k 'KW) (symbol->string v))
+ ((eq? k 'PUNCT) (symbol->string v))
+ (else "?"))))
+
+;; Reconstruct a string/char literal source spelling from cooked content.
+;; Per C11 6.10.3.2: insert `\` before each `"` and `\` (or `'` for char).
+;; `delim` is 34 for STR, 39 for CHAR.
+(define (%pp-quote-bytes bv delim)
+ (let* ((n (bytevector-length bv))
+ (delim-bv (bv-of-byte delim)))
+ (let loop ((i 0) (acc (list delim-bv)))
+ (cond
+ ((= i n) (bv-cat (reverse (cons delim-bv acc))))
+ (else
+ (let ((b (bytevector-u8-ref bv i)))
+ (cond
+ ((or (= b delim) (= b 92))
+ (loop (+ i 1) (cons (bv-of-byte b) (cons "\\" acc))))
+ (else
+ (loop (+ i 1) (cons (bv-of-byte b) acc))))))))))
+
+;; --- #line / #pragma / #include ---
+;; Approximate #line: subsequent toks have line = (orig-line + delta),
+;; where delta = (N - here-line - 1). Good enough for most cases.
+(define (%pp-do-line line state)
+ (cond
+ ((null? line) (die #f "#line requires a line number"))
+ ((not (%pp-int? (car line)))
+ (die (tok-loc (car line)) "#line: expected integer"))
+ (else
+ (let* ((nt (car line)) (n (tok-value nt))
+ (rest (cdr line))
+ (here (loc-line (tok-loc nt))))
+ (pps-line-delta-set! state (- n here 1))
+ (cond
+ ((null? rest) #t)
+ ((eq? (tok-kind (car rest)) 'STR)
+ (pps-cur-file-set! state (tok-value (car rest))))
+ (else (die (tok-loc (car rest))
+ "#line: expected string after number")))))))
+
+(define (%pp-do-pragma line state) #t)
+
+(define (%pp-do-include line state)
+ (die (if (null? line) #f (tok-loc (car line)))
+ "#include: file inclusion is handled upstream by pre-flatten"))
+
+;; --- macro expansion engine ---
+;; Walk toks; for each IDENT, look up in macros / builtins. Hide-set:
+;; if the name is in t.hide, don't expand. Otherwise expand and rescan
+;; the produced body, with hide += {name}.
+(define (%pp-emit-expanded toks state out)
+ (let loop ((toks toks))
+ (cond
+ ((null? toks) #t)
+ (else
+ (let* ((t (car toks)) (rest (cdr toks)))
+ (cond
+ ((not (%pp-ident? t))
+ (buf-list-push! out (%pp-relocate t state))
+ (loop rest))
+ (else
+ (let ((name (tok-value t)))
+ (cond
+ ((%pp-bv-mem? name (tok-hide t))
+ (buf-list-push! out (%pp-relocate t state))
+ (loop rest))
+ ((%pp-builtin? name)
+ (buf-list-push-many! out
+ (%pp-expand-builtin name (tok-loc t) state))
+ (loop rest))
+ (else
+ (let ((m (alist-ref name (pps-macros state))))
+ (cond
+ ((not m)
+ (buf-list-push! out (%pp-relocate t state))
+ (loop rest))
+ (else
+ (%pp-apply-macro t m rest state out loop))))))))))))))
+
+(define (%pp-apply-macro t m rest state out cont)
+ (let ((kind (macro-kind m)) (name (tok-value t)))
+ (cond
+ ((eq? kind 'obj)
+ (let ((bodies (%pp-prepare-body (macro-body m)
+ (cons name (tok-hide t)))))
+ (%pp-emit-expanded bodies state out)
+ (cont rest)))
+ (else
+ (let ((after (%pp-skip-ws rest)))
+ (cond
+ ((or (null? after) (not (%pp-punct? (car after) 'lparen)))
+ (buf-list-push! out (%pp-relocate t state))
+ (cont rest))
+ (else
+ (let* ((ar (%pp-collect-args (cdr after) (tok-loc t)))
+ (args (car ar)) (rest2 (cdr ar))
+ (params (macro-params m))
+ (variadic? (eq? kind 'fn-vararg))
+ (env (%pp-bind-args params args variadic? (tok-loc t)))
+ (sub (%pp-substitute (macro-body m) env (tok-loc t)))
+ (bodies (%pp-prepare-body sub
+ (cons name (tok-hide t)))))
+ (%pp-emit-expanded bodies state out)
+ (cont rest2)))))))))
+
+(define (%pp-prepare-body body extra-hide)
+ (map (lambda (t)
+ (%pp-with-hide t (%pp-bv-union extra-hide (tok-hide t))))
+ body))
+
+;; Collect comma-separated args. `toks` starts AFTER `(`. Returns
+;; (args . rest), where args is a list of token-lists.
+(define (%pp-collect-args toks call-loc)
+ (let loop ((toks toks) (depth 0) (cur '()) (args '()))
+ (cond
+ ((null? toks) (die call-loc "macro call: unterminated argument list"))
+ ((%pp-eof? (car toks))
+ (die call-loc "macro call: unterminated argument list"))
+ ((and (= depth 0) (%pp-punct? (car toks) 'rparen))
+ (let ((args*
+ (cond
+ ;; Empty parens count as one empty argument; bind-args
+ ;; degenerates this back to "no args" for 0-param macros.
+ ((and (null? args) (null? cur)) (list '()))
+ (else (reverse (cons (reverse cur) args))))))
+ (cons args* (cdr toks))))
+ ((and (= depth 0) (%pp-punct? (car toks) 'comma))
+ (loop (cdr toks) 0 '() (cons (reverse cur) args)))
+ ((%pp-punct? (car toks) 'lparen)
+ (loop (cdr toks) (+ depth 1) (cons (car toks) cur) args))
+ ((%pp-punct? (car toks) 'rparen)
+ (loop (cdr toks) (- depth 1) (cons (car toks) cur) args))
+ (else
+ (loop (cdr toks) depth (cons (car toks) cur) args)))))
+
+;; Bind formals → token-lists (alist by bv key). Variadic gathers
+;; trailing actuals into __VA_ARGS__, joined with synthetic commas.
+(define (%pp-bind-args params args variadic? call-loc)
+ (let* ((np (length params)) (na (length args)))
+ (cond
+ (variadic?
+ (cond
+ ((< na np) (die call-loc "macro call: too few arguments"))
+ (else
+ (let loop ((ps params) (as args) (acc '()))
+ (cond
+ ((null? ps)
+ (alist-set %pp-bv-VA-ARGS (%pp-join-comma as) acc))
+ (else
+ (loop (cdr ps) (cdr as)
+ (alist-set (car ps) (car as) acc))))))))
+ (else
+ (cond
+ ((and (= np 0) (= na 1) (null? (car args))) '())
+ ((not (= np na)) (die call-loc "macro call: argument count mismatch"))
+ (else
+ (let loop ((ps params) (as args) (acc '()))
+ (cond
+ ((null? ps) acc)
+ (else (loop (cdr ps) (cdr as)
+ (alist-set (car ps) (car as) acc)))))))))))
+
+(define (%pp-join-comma argss)
+ (cond
+ ((null? argss) '())
+ ((null? (cdr argss)) (car argss))
+ (else
+ (append (car argss)
+ (cons (%pp-synth-comma) (%pp-join-comma (cdr argss)))))))
+
+(define (%pp-synth-comma)
+ (%tok 'PUNCT 'comma (%loc "<expand>" 0 0) '()))
+
+;; Body substitution: walk body; replace param IDENTs with arg toks,
+;; handle `#param` (stringize) and `a##b` (paste). For v1 we do not
+;; pre-expand args before substitution; the rescan after substitution
+;; catches the same expansions in practice.
+(define (%pp-substitute body env call-loc)
+ (let loop ((body body) (out '()))
+ (cond
+ ((null? body) (reverse out))
+ (else
+ (let ((t (car body)) (rest (cdr body)))
+ (cond
+ ((%pp-punct? t 'hash)
+ (cond
+ ((or (null? rest) (not (%pp-ident? (car rest))))
+ (die (tok-loc t) "stringize: '#' must precede a parameter name"))
+ (else
+ (let* ((id (car rest)) (pn (tok-value id))
+ (pt (alist-ref pn env)))
+ (cond
+ ((not pt)
+ (die (tok-loc id) "stringize: '#' operand must be a parameter" pn))
+ (else
+ (let ((s (%tok 'STR (%pp-toks->display pt) (tok-loc t) '())))
+ (loop (cdr rest) (cons s out)))))))))
+ ((%pp-punct? t 'paste)
+ (cond
+ ((null? out) (die (tok-loc t) "paste: '##' cannot start a body"))
+ ((null? rest) (die (tok-loc t) "paste: '##' cannot end a body"))
+ (else
+ (let* ((lhs (car out))
+ (rt (car rest))
+ (rhs-list
+ (cond
+ ((and (%pp-ident? rt) (alist-ref (tok-value rt) env))
+ (alist-ref (tok-value rt) env))
+ (else (list rt)))))
+ (cond
+ ((null? rhs-list) (loop (cdr rest) out))
+ (else
+ (let* ((p (%pp-paste-tokens lhs (car rhs-list)))
+ (after (append (cdr rhs-list) (cdr rest))))
+ (loop after (cons p (cdr out))))))))))
+ ((%pp-ident? t)
+ (let* ((pn (tok-value t)) (pt (alist-ref pn env)))
+ (cond
+ ((not pt) (loop rest (cons t out)))
+ ((and (not (null? rest)) (%pp-punct? (car rest) 'paste))
+ (cond
+ ((null? pt) (loop (cdr rest) out))
+ (else (loop rest (append (reverse pt) out)))))
+ (else (loop rest (append (reverse pt) out))))))
+ (else (loop rest (cons t out)))))))))
+
+;; Paste two tokens textually; reparse the result.
+(define (%pp-paste-tokens lhs rhs)
+ (let ((lk (tok-kind lhs)) (rk (tok-kind rhs)))
+ (cond
+ ((and (eq? lk 'IDENT) (eq? rk 'IDENT))
+ (%tok 'IDENT (bytevector-append (tok-value lhs) (tok-value rhs))
+ (tok-loc lhs) (%pp-bv-union (tok-hide lhs) (tok-hide rhs))))
+ ((and (eq? lk 'IDENT) (eq? rk 'INT))
+ (%tok 'IDENT (bytevector-append (tok-value lhs) (fixnum->bv (tok-value rhs) 10))
+ (tok-loc lhs) (%pp-bv-union (tok-hide lhs) (tok-hide rhs))))
+ ((and (eq? lk 'INT) (eq? rk 'INT))
+ (let* ((s (bytevector-append (fixnum->bv (tok-value lhs) 10)
+ (fixnum->bv (tok-value rhs) 10)))
+ (pr (bv->fixnum s 10)))
+ (cond
+ ((not (car pr)) (die (tok-loc lhs) "paste: cannot reparse as integer" s))
+ (else (%tok 'INT (cdr pr) (tok-loc lhs)
+ (%pp-bv-union (tok-hide lhs) (tok-hide rhs)))))))
+ (else (die (tok-loc lhs) "paste: unsupported token kinds" lk rk)))))
+
+(define (%pp-relocate t state)
+ (cond
+ ((and (= (pps-line-delta state) 0) (not (pps-cur-file state))) t)
+ (else
+ (let* ((l (tok-loc t))
+ (f (or (pps-cur-file state) (loc-file l)))
+ (ln (+ (loc-line l) (pps-line-delta state)))
+ (c (loc-col l)))
+ (%pp-with-loc t (%loc f ln c))))))
+
+;; --- pp-eval-cexpr: #if expression evaluator ---
+;; Steps: resolve `defined NAME`, macro-expand the rest, treat any
+;; remaining IDENT as 0, parse with recursive descent.
+;;
+;; Arena boundary (test_93 A→B→C pattern). Everything between the mark
+;; and the rewind is scratch: `s1`/`s2`/`s3` (each a fresh token list,
+;; where `s2` runs the full macro-expansion engine), plus the recursive
+;; parser's (val . rest) cons cell at every level. The result is a
+;; fixnum, so no pre-allocated out cell is needed — `val` survives the
+;; rewind by virtue of being an immediate. The error path goes through
+;; `die` (which sys-exits), so no rewind there.
+(define (pp-eval-cexpr toks macros)
+ (let ((mark (heap-mark)))
+ (let* ((state (%pp-state macros '() #f 0))
+ (s1 (%pp-resolve-defined toks state))
+ (s2 (%pp-expand-line s1 state))
+ (s3 (%pp-idents-as-zero s2)))
+ (let* ((p (%pp-cx-expr s3))
+ (val (car p)) (rest (cdr p)))
+ (cond
+ ((null? rest) (heap-rewind! mark) val)
+ (else (die (tok-loc (car rest)) "#if: garbage at end of expression"
+ (tok-kind (car rest)))))))))
+
+(define (%pp-expand-line toks state)
+ (let ((out (make-buf-list)))
+ (%pp-emit-expanded toks state out)
+ (buf-list-flush out)))
+
+(define (%pp-resolve-defined toks state)
+ (let loop ((toks toks) (acc '()))
+ (cond
+ ((null? toks) (reverse acc))
+ ((%pp-ident-name? (car toks) %pp-bv-defined)
+ (let ((rest (cdr toks)))
+ (cond
+ ((null? rest) (die (tok-loc (car toks)) "defined: missing operand"))
+ ((%pp-ident? (car rest))
+ (let ((v (if (%pp-defined? (tok-value (car rest)) state) 1 0)))
+ (loop (cdr rest)
+ (cons (%tok 'INT v (tok-loc (car toks)) '()) acc))))
+ ((%pp-punct? (car rest) 'lparen)
+ (let ((after (cdr rest)))
+ (cond
+ ((or (null? after) (not (%pp-ident? (car after))))
+ (die (tok-loc (car toks)) "defined: expected identifier"))
+ (else
+ (let ((aa (cdr after)))
+ (cond
+ ((or (null? aa) (not (%pp-punct? (car aa) 'rparen)))
+ (die (tok-loc (car toks)) "defined: expected ')'"))
+ (else
+ (let ((v (if (%pp-defined? (tok-value (car after)) state) 1 0)))
+ (loop (cdr aa)
+ (cons (%tok 'INT v (tok-loc (car toks)) '()) acc)))))))) ))
+ (else (die (tok-loc (car rest)) "defined: expected identifier or '('")))))
+ (else (loop (cdr toks) (cons (car toks) acc))))))
+
+(define (%pp-idents-as-zero toks)
+ (map (lambda (t)
+ (cond ((%pp-ident? t) (%tok 'INT 0 (tok-loc t) '()))
+ (else t)))
+ toks))
+
+;; --- recursive-descent #if expression parser ---
+;; Returns (value . rest).
+(define (%pp-cx-expr toks) (%pp-cx-cond toks))
+
+(define (%pp-cx-cond toks)
+ (let* ((p (%pp-cx-lor toks))
+ (v (car p)) (rest (cdr p)))
+ (cond
+ ((and (not (null? rest)) (%pp-punct? (car rest) 'qmark))
+ (let* ((p2 (%pp-cx-expr (cdr rest)))
+ (vt (car p2)) (after (cdr p2)))
+ (cond
+ ((or (null? after) (not (%pp-punct? (car after) 'colon)))
+ (die (if (null? after) #f (tok-loc (car after))) "?: missing ':'"))
+ (else
+ (let* ((p3 (%pp-cx-cond (cdr after)))
+ (vf (car p3)) (rest3 (cdr p3)))
+ (cons (if (not (= v 0)) vt vf) rest3))))))
+ (else (cons v rest)))))
+
+(define (%pp-cx-binl next ops toks)
+ (let loop ((p (next toks)))
+ (let ((v (car p)) (rest (cdr p)))
+ (cond
+ ((null? rest) p)
+ (else
+ (let* ((tt (car rest))
+ (hit (and (eq? (tok-kind tt) 'PUNCT)
+ (alist-ref/eq (tok-value tt) ops))))
+ (cond
+ ((not hit) p)
+ (else
+ (let* ((p2 (next (cdr rest)))
+ (v2 (car p2)) (rest2 (cdr p2)))
+ (loop (cons (hit v v2) rest2)))))))))))
+
+(define (%pp-cx-lor toks)
+ (%pp-cx-binl %pp-cx-land
+ (list (cons 'lor (lambda (a b) (if (or (not (= a 0)) (not (= b 0))) 1 0))))
+ toks))
+(define (%pp-cx-land toks)
+ (%pp-cx-binl %pp-cx-bor
+ (list (cons 'land (lambda (a b) (if (and (not (= a 0)) (not (= b 0))) 1 0))))
+ toks))
+(define (%pp-cx-bor toks) (%pp-cx-binl %pp-cx-bxor (list (cons 'bar bit-or)) toks))
+(define (%pp-cx-bxor toks) (%pp-cx-binl %pp-cx-band (list (cons 'caret bit-xor)) toks))
+(define (%pp-cx-band toks) (%pp-cx-binl %pp-cx-eq (list (cons 'amp bit-and)) toks))
+(define (%pp-cx-eq toks)
+ (%pp-cx-binl %pp-cx-rel
+ (list (cons 'eq2 (lambda (a b) (if (= a b) 1 0)))
+ (cons 'ne (lambda (a b) (if (= a b) 0 1))))
+ toks))
+(define (%pp-cx-rel toks)
+ (%pp-cx-binl %pp-cx-shift
+ (list (cons 'lt (lambda (a b) (if (< a b) 1 0)))
+ (cons 'le (lambda (a b) (if (<= a b) 1 0)))
+ (cons 'gt (lambda (a b) (if (> a b) 1 0)))
+ (cons 'ge (lambda (a b) (if (>= a b) 1 0))))
+ toks))
+(define (%pp-cx-shift toks)
+ (%pp-cx-binl %pp-cx-add
+ (list (cons 'shl (lambda (a b) (arithmetic-shift a b)))
+ (cons 'shr (lambda (a b) (arithmetic-shift a (- 0 b)))))
+ toks))
+(define (%pp-cx-add toks)
+ (%pp-cx-binl %pp-cx-mul (list (cons 'plus +) (cons 'minus -)) toks))
+(define (%pp-cx-mul toks)
+ (%pp-cx-binl %pp-cx-unary
+ (list (cons 'star *) (cons 'slash quotient) (cons 'pct remainder))
+ toks))
+
+(define (%pp-cx-unary toks)
+ (cond
+ ((null? toks) (die #f "#if: unexpected end of expression"))
+ ((%pp-punct? (car toks) 'plus) (%pp-cx-unary (cdr toks)))
+ ((%pp-punct? (car toks) 'minus)
+ (let* ((p (%pp-cx-unary (cdr toks))) (v (car p)) (r (cdr p)))
+ (cons (- 0 v) r)))
+ ((%pp-punct? (car toks) 'bang)
+ (let* ((p (%pp-cx-unary (cdr toks))) (v (car p)) (r (cdr p)))
+ (cons (if (= v 0) 1 0) r)))
+ ((%pp-punct? (car toks) 'tilde)
+ (let* ((p (%pp-cx-unary (cdr toks))) (v (car p)) (r (cdr p)))
+ (cons (bit-not v) r)))
+ (else (%pp-cx-primary toks))))
+
+(define (%pp-cx-primary toks)
+ (cond
+ ((null? toks) (die #f "#if: expected expression"))
+ ((%pp-int? (car toks)) (cons (tok-value (car toks)) (cdr toks)))
+ ((eq? (tok-kind (car toks)) 'CHAR)
+ (cons (tok-value (car toks)) (cdr toks)))
+ ((%pp-punct? (car toks) 'lparen)
+ (let* ((p (%pp-cx-expr (cdr toks))) (v (car p)) (r (cdr p)))
+ (cond
+ ((or (null? r) (not (%pp-punct? (car r) 'rparen)))
+ (die (if (null? r) #f (tok-loc (car r))) "#if: missing ')'"))
+ (else (cons v (cdr r))))))
+ (else (die (tok-loc (car toks)) "#if: unexpected token" (tok-kind (car toks))))))
+;; cc/cg.scm — codegen state and emission API.
+;; Realization of docs/CC-INTERNALS.md §cg.scm.
+;; Conversion split per CC-CONTRACTS §4: parse owns promotion etc;
+;; cg owns sign extension, signed/unsigned dispatch, pointer scaling.
+;;
+;; Output uses libp1pp's structured macros (%fn, %ifelse_nez,
+;; %loop_tag, %break, %continue) per docs/LIBP1PP.md.
+;;
+;; Frame layout (CC-CONTRACTS §3):
+;; [sp + 0 .. staging*8) outgoing-arg staging
+;; [sp + staging*8 ..) locals + spilled vstack values
+;; Slot offsets are emitted symbolically as `(+ %<fn>__SO N)` so the
+;; staging size, only known at fn-end, can be filled in via a 0-arg
+;; M1pp macro `<fn>__SO` defined just before the `%fn(...)` block.
+
+(define (%cg-emit-buf cg)
+ (cond ((cg-in-fn? cg) (cg-fn-buf cg)) (else (cg-text cg))))
+
+(define (%cg-emit cg bv)
+ (buf-push! (%cg-emit-buf cg) bv))
+
+(define (%cg-emit-many cg bvs)
+ (for-each (lambda (b) (%cg-emit cg b)) bvs))
+
+(define (%n n) (number->string n 10))
+
+;; Per-fn metadata (name, ret-slot, ret-type, switch-case lists, ...)
+;; lives on cg-fn-meta, reset at every cg-fn-begin/v. Keeping it off
+;; cg-globals means cg-globals only mutates when the user emits a real
+;; global, which is what parse-fn-body's rewind-safety check needs.
+(define (%cg-fn-set! cg key val)
+ (cg-fn-meta-set! cg (alist-update key (lambda (_) val) (cg-fn-meta cg))))
+
+(define (%cg-fn-get cg key) (alist-ref/eq key (cg-fn-meta cg)))
+
+(define (%cg-fresh-label cg prefix)
+ (let* ((n (cg-label-ctr cg))
+ (bv (bytevector-append prefix (%n n))))
+ (cg-label-ctr-set! cg (+ n 1))
+ bv))
+
+(define (%cg-fresh-loop-tag cg) (%cg-fresh-label cg "L"))
+(define (%cg-fresh-lbl cg) (%cg-fresh-label cg "lbl_"))
+
+(define (%cg-bump-outgoing! cg n)
+ (if (< (cg-max-outgoing cg) n) (cg-max-outgoing-set! cg n) 0))
+
+(define (%cg-slot-expr cg logical-off)
+ (let ((nm (%cg-fn-get cg '%fn-name)))
+ (bv-cat (list "(+ %" nm "__SO " (%n logical-off) ")"))))
+
+(define (%cg-mangle-global name-bv)
+ (bytevector-append "cc__" name-bv))
+
+(define (%cg-reg->bv r) (symbol->string r))
+
+(define (%cg-emit-li cg reg n)
+ (%cg-emit-many cg (list "%li(" (%cg-reg->bv reg) ", " (%n n) ")\n")))
+
+(define (%cg-emit-la cg reg label-bv)
+ (%cg-emit-many cg (list "%la(" (%cg-reg->bv reg) ", &" label-bv ")\n")))
+
+(define (%cg-emit-ld-slot cg reg logical-off)
+ (%cg-emit-many cg (list "%ld(" (%cg-reg->bv reg) ", sp, "
+ (%cg-slot-expr cg logical-off) ")\n")))
+
+(define (%cg-emit-st-slot cg reg logical-off)
+ (%cg-emit-many cg (list "%st(" (%cg-reg->bv reg) ", sp, "
+ (%cg-slot-expr cg logical-off) ")\n")))
+
+(define (%cg-emit-ld cg reg base off)
+ (%cg-emit-many cg (list "%ld(" (%cg-reg->bv reg) ", "
+ (%cg-reg->bv base) ", " (%n off) ")\n")))
+
+(define (%cg-emit-st cg reg base off)
+ (%cg-emit-many cg (list "%st(" (%cg-reg->bv reg) ", "
+ (%cg-reg->bv base) ", " (%n off) ")\n")))
+
+;; Width-aware load/store. Dispatches on ctype-size:
+;; 1: %lb / %sb (LB zero-extends; for signed i8 we sign-extend by
+;; shli/sari 56 to materialize the canonical 64-bit form).
+;; 2/4: byte-decomposed (P1 has only 1-byte and 8-byte memory ops,
+;; and word ops require natural alignment which we can't promise
+;; for struct fields or non-word-aligned local slots). Loads
+;; gather bytes via %lb + shli/or; stores scatter via shri/%sb.
+;; Signed loads (i16/i32) sign-extend via shli/sari to canonical
+;; 64-bit form.
+;; 8 (or anything else for now): %ld / %st.
+;; Scratch convention: helpers may clobber t1; callers never pass
+;; reg=t1.
+
+(define (%cg-emit-ldN-bytes cg reg base-bv off-expr-fn n-bytes)
+ ;; Emit n-bytes %lb gathers into reg with shift+OR. byte 0 is low.
+ ;; off-expr-fn is a procedure: (off-expr-fn k) returns the bv
+ ;; expression for offset k.
+ (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", " base-bv ", "
+ (off-expr-fn 0) ")\n"))
+ (let loop ((k 1))
+ (cond
+ ((= k n-bytes) 0)
+ (else
+ (%cg-emit-many cg (list
+ "%lb(t1, " base-bv ", " (off-expr-fn k) ")\n"
+ "%shli(t1, t1, " (%n (* 8 k)) ")\n"
+ "%or(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", t1)\n"))
+ (loop (+ k 1))))))
+
+(define (%cg-emit-stN-bytes cg reg base-bv off-expr-fn n-bytes)
+ ;; Emit n-bytes %sb scatters from reg via shri-shifted t1.
+ (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", " base-bv ", "
+ (off-expr-fn 0) ")\n"))
+ (let loop ((k 1))
+ (cond
+ ((= k n-bytes) 0)
+ (else
+ (%cg-emit-many cg (list
+ "%shri(t1, " (%cg-reg->bv reg) ", " (%n (* 8 k)) ")\n"
+ "%sb(t1, " base-bv ", " (off-expr-fn k) ")\n"))
+ (loop (+ k 1))))))
+
+(define (%cg-emit-sext cg reg shift-amount)
+ (%cg-emit-many cg (list
+ "%shli(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", "
+ (%n shift-amount) ")\n"
+ "%sari(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", "
+ (%n shift-amount) ")\n")))
+
+(define (%cg-emit-ld-slot-typed cg reg ctype logical-off)
+ (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype))
+ (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k)))))
+ (cond
+ ((= sz 1)
+ (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", sp, "
+ (off-fn 0) ")\n"))
+ (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56))))
+ ((= sz 2)
+ (%cg-emit-ldN-bytes cg reg "sp" off-fn 2)
+ (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48))))
+ ((= sz 4)
+ (%cg-emit-ldN-bytes cg reg "sp" off-fn 4)
+ (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32))))
+ (else (%cg-emit-ld-slot cg reg logical-off)))))
+
+(define (%cg-emit-st-slot-typed cg reg ctype logical-off)
+ (let* ((sz (ctype-size ctype))
+ (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k)))))
+ (cond
+ ((= sz 1)
+ (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", sp, "
+ (off-fn 0) ")\n")))
+ ((= sz 2) (%cg-emit-stN-bytes cg reg "sp" off-fn 2))
+ ((= sz 4) (%cg-emit-stN-bytes cg reg "sp" off-fn 4))
+ (else (%cg-emit-st-slot cg reg logical-off)))))
+
+(define (%cg-emit-ld-typed cg reg ctype base off)
+ (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype))
+ (base-bv (%cg-reg->bv base))
+ (off-fn (lambda (k) (%n (+ off k)))))
+ (cond
+ ((= sz 1)
+ (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", "
+ base-bv ", " (off-fn 0) ")\n"))
+ (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56))))
+ ((= sz 2)
+ (%cg-emit-ldN-bytes cg reg base-bv off-fn 2)
+ (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48))))
+ ((= sz 4)
+ (%cg-emit-ldN-bytes cg reg base-bv off-fn 4)
+ (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32))))
+ (else (%cg-emit-ld cg reg base off)))))
+
+(define (%cg-emit-st-typed cg reg ctype base off)
+ (let* ((sz (ctype-size ctype))
+ (base-bv (%cg-reg->bv base))
+ (off-fn (lambda (k) (%n (+ off k)))))
+ (cond
+ ((= sz 1)
+ (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", "
+ base-bv ", " (off-fn 0) ")\n")))
+ ((= sz 2) (%cg-emit-stN-bytes cg reg base-bv off-fn 2))
+ ((= sz 4) (%cg-emit-stN-bytes cg reg base-bv off-fn 4))
+ (else (%cg-emit-st cg reg base off)))))
+
+(define (%cg-load-opnd-into cg op reg)
+ ;; frame lval: load at type width. frame rval is a spilled word
+ ;; (alloc-slot 8 8 in %cg-spill-reg) — always 8-byte load.
+ ;; global lval width > 1 byte-gathers must not alias dest with base —
+ ;; the first %lb would otherwise clobber the address before subsequent
+ ;; byte loads. Stage the address in t2.
+ (pmatch op
+ (($ opnd? (kind imm) (ext ,n)) (%cg-emit-li cg reg n))
+ (($ opnd? (kind frame) (lval? #t) (type ,ty) (ext ,off))
+ (%cg-emit-ld-slot-typed cg reg ty off))
+ (($ opnd? (kind frame) (ext ,off)) (%cg-emit-ld-slot cg reg off))
+ (($ opnd? (kind global) (lval? #f) (ext ,lbl)) (%cg-emit-la cg reg lbl))
+ (($ opnd? (kind global) (type ,ty) (ext ,lbl))
+ (%cg-emit-la cg 't2 lbl)
+ (%cg-emit-ld-typed cg reg ty 't2 0))
+ (else (die #f "cg internal: unknown opnd-kind" (opnd-kind op)))))
+
+(define (%cg-spill-reg cg reg ty)
+ (let* ((off (cg-alloc-slot cg 8 8))
+ (op (%opnd 'frame ty off #f)))
+ (%cg-emit-st-slot cg reg off)
+ (cg-vstack-set! cg (cons op (cg-vstack cg)))
+ op))
+
+(define (%ctype-ptr? t)
+ (let ((k (ctype-kind t)))
+ (if (eq? k 'ptr) #t (eq? k 'arr))))
+
+(define (%ctype-pointee t)
+ (cond ((eq? (ctype-kind t) 'ptr) (ctype-ext t))
+ ((eq? (ctype-kind t) 'arr) (car (ctype-ext t)))
+ (else #f)))
+
+(define (%ctype-unsigned? t)
+ (let ((k (ctype-kind t)))
+ (cond ((eq? k 'u8) #t) ((eq? k 'u16) #t) ((eq? k 'u32) #t)
+ ((eq? k 'u64) #t) ((eq? k 'bool) #t)
+ ((eq? k 'ptr) #t) ((eq? k 'arr) #t) ((eq? k 'fn) #t)
+ (else #f))))
+
+(define (%ctype-size t) (ctype-size t))
+
+(define (%reg-by-idx i)
+ (cond ((= i 0) 'a0) ((= i 1) 'a1) ((= i 2) 'a2) ((= i 3) 'a3)
+ (else (die #f "cg: param idx > 3 needs ldarg path" i))))
+
+;; --------------------------------------------------------------------
+;; Lifecycle
+;; --------------------------------------------------------------------
+
+(define (cg-init)
+ (%cg (make-buf/cap %BUF-CAP-TEXT) ; text
+ (make-buf/cap %BUF-CAP-DATA) ; data
+ (make-buf/cap %BUF-CAP-BSS) ; bss
+ '() ; vstack
+ 0 ; frame-hi
+ 0 ; label-ctr
+ '() ; str-pool
+ '() ; globals
+ '() ; fn-meta
+ (make-buf/cap %BUF-CAP-FN) ; fn-buf (reused per fn)
+ (make-buf/cap %BUF-CAP-PROLOGUE) ; prologue-buf (reused per fn)
+ 0 ; max-outgoing
+ #f)) ; in-fn?
+
+(define (cg-finish cg)
+ ;; Entry stub. P1's program-entry contract (docs/P1.md §Program Entry)
+ ;; delivers argc in a0 and argv in a1 at p1_main. %call doesn't
+ ;; clobber a0/a1, so falling straight through to cc__main forwards
+ ;; them unchanged. The 16-byte frame is just enough for %enter's
+ ;; saved-fp/lr to fit; cc__main builds its own frame on top.
+ ;; (CC-CONTRACTS §J.1, §5.4.)
+ (let ((tb (cg-text cg)))
+ (buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to cc__main\n")
+ (buf-push! tb "%fn(p1_main, 16, {\n")
+ (buf-push! tb "%call(&cc__main)\n")
+ (buf-push! tb "})\n"))
+ ;; Every P1pp translation unit must end with :ELF_end so the ELF
+ ;; header can compute file-size and ph_memsz boundaries.
+ (bv-cat (list (buf-flush (cg-text cg))
+ (buf-flush (cg-data cg))
+ (buf-flush (cg-bss cg))
+ ":ELF_end\n")))
+
+(define (cg-fn-begin cg name params return-type)
+ (cg-fn-begin/v cg name params return-type #f))
+
+;; Variadic-aware variant. variadic? = #t reserves 16 contiguous 8-byte
+;; slots covering incoming arg indices 0..15, populating each from the
+;; appropriate source: a-register for idx 0..3, LDARG slot (idx-4) for
+;; idx 4..15. va_start computes the address of the slot at index =
+;; named-arg count, so va_arg walks linearly through the rest.
+;; Indices 4..15 may be garbage when the caller passed fewer args; user
+;; code stops walking based on a count or sentinel before those slots
+;; are read. Limit of 15 variadic args (after named) is enough for
+;; tcc.c's logging shapes; bump VARARG_WINDOW if you need more.
+(define (cg-fn-begin/v cg name params return-type variadic?)
+ (buf-reset! (cg-fn-buf cg))
+ (buf-reset! (cg-prologue-buf cg))
+ (cg-in-fn?-set! cg #t)
+ (cg-vstack-set! cg '())
+ (cg-frame-hi-set! cg 0)
+ ;; cg-label-ctr is NOT reset per-fn. Loop tags emit single-colon
+ ;; (global) labels via libp1pp's %loop_tag macro (`:L0_top`,
+ ;; `:L0_end`) — see P1/P1pp.P1pp:loop_tag — so two functions both
+ ;; using L0 would produce duplicate global labels and break linking.
+ ;; Switch dispatch labels (`sw_disp_L<N>`) inherit the same tag and
+ ;; are also single-colon. Keeping the counter monotonic across
+ ;; functions guarantees uniqueness without needing to mangle.
+ (cg-max-outgoing-set! cg 0)
+ (cg-fn-meta-set! cg '())
+ (%cg-fn-set! cg '%fn-name name)
+ (%cg-fn-set! cg '%fn-ret-type return-type)
+ (%cg-fn-set! cg '%indirect-slots '())
+ (%cg-fn-set! cg '%fn-variadic? variadic?)
+ (let ((ret-slot (cg-alloc-slot cg 8 8)))
+ (%cg-fn-set! cg '%fn-ret-slot ret-slot)
+ (cond
+ ((not (eq? (ctype-kind return-type) 'void))
+ (buf-push! (cg-prologue-buf cg)
+ (bv-cat (list "%li(t0, 0)\n"
+ "%st(t0, sp, "
+ (%cg-slot-expr cg ret-slot) ")\n"))))))
+ ;; params per CC-CONTRACTS §3.1: list of (name-bv . ctype). We
+ ;; return an alist (name-bv . sym) the parser binds into scope.
+ (let walk ((ps params) (idx 0) (out '()) (first-slot #f))
+ (cond
+ ((null? ps)
+ (cond
+ (variadic?
+ ;; Pad the incoming-arg window out to 16 slots. For idx 0..3
+ ;; the slot is filled from a-register; for idx 4..15 from
+ ;; LDARG slot (idx-4). va_start points at the slot whose
+ ;; index equals the named-arg count, and va_arg walks
+ ;; linearly from there through the rest of the window.
+ (let pad ((i idx) (vfirst #f) (fs first-slot))
+ (cond
+ ((>= i 16)
+ ;; If named-arg count was 0, vfirst is the very first
+ ;; slot of the save area (= fs).
+ (%cg-fn-set! cg '%fn-vararg-first-slot
+ (or vfirst fs))
+ (reverse out))
+ (else
+ (let ((off (cg-alloc-slot cg 8 8)))
+ (cond
+ ((< i 4)
+ (let ((ar (%reg-by-idx i)))
+ (buf-push! (cg-prologue-buf cg)
+ (bv-cat (list "%st(" (%cg-reg->bv ar)
+ ", sp, "
+ (%cg-slot-expr cg off) ")\n")))))
+ (else
+ (buf-push! (cg-prologue-buf cg)
+ (bv-cat (list "%ldarg(t0, " (%n (- i 4)) ")\n"
+ "%st(t0, sp, "
+ (%cg-slot-expr cg off) ")\n")))))
+ (pad (+ i 1)
+ (or vfirst off)
+ (or fs off)))))))
+ (else (reverse out))))
+ (else
+ (let* ((p (car ps))
+ (nm (car p))
+ (ty (cdr p))
+ (off (cg-alloc-slot cg 8 8))
+ (psym (%sym nm 'param #f ty off)))
+ (cond
+ ((< idx 4)
+ (let ((ar (%reg-by-idx idx)))
+ (buf-push! (cg-prologue-buf cg)
+ (bv-cat (list "%st(" (%cg-reg->bv ar)
+ ", sp, " (%cg-slot-expr cg off) ")\n")))))
+ (else
+ (buf-push! (cg-prologue-buf cg)
+ (bv-cat (list "%ldarg(t0, " (%n (- idx 4)) ")\n"
+ "%st(t0, sp, " (%cg-slot-expr cg off) ")\n")))))
+ (walk (cdr ps) (+ idx 1) (cons (cons nm psym) out)
+ (or first-slot off)))))))
+
+(define (cg-fn-end cg)
+ ;; Drain prologue-buf and fn-buf directly into cg-text via buf-drain!
+ ;; (memcpy, no allocation). Header/footer pieces go through buf-push!
+ ;; on cg-text — also memcpy. Net result: zero net heap allocation in
+ ;; cg-fn-end other than the small (%n N) bvs for staging-bytes /
+ ;; frame-size, which the surrounding parse-fn-body's heap-rewind!
+ ;; reclaims.
+ (let* ((name (%cg-fn-get cg '%fn-name))
+ (ret-slot (%cg-fn-get cg '%fn-ret-slot))
+ (ret-type (%cg-fn-get cg '%fn-ret-type))
+ (locals-hi (cg-frame-hi cg))
+ (staging-bytes (* 8 (cg-max-outgoing cg)))
+ (raw-size (+ staging-bytes locals-hi))
+ (frame-size (align-up raw-size 16))
+ (mangled (%cg-mangle-global name))
+ (tb (cg-text cg)))
+ ;; Now that the body is fully emitted, leave fn dispatch so any
+ ;; trailing emits in this function (including the ret-block below)
+ ;; route to cg-text directly.
+ (cg-in-fn?-set! cg #f)
+ ;; staging-size macro
+ (buf-push! tb "%macro ")
+ (buf-push! tb name)
+ (buf-push! tb "__SO()\n")
+ (buf-push! tb (%n staging-bytes))
+ (buf-push! tb "\n%endm\n")
+ ;; %fn header
+ (buf-push! tb "%fn(")
+ (buf-push! tb mangled)
+ (buf-push! tb ", ")
+ (buf-push! tb (%n frame-size))
+ (buf-push! tb ", {\n")
+ ;; prologue + body, drained byte-for-byte
+ (buf-drain! tb (cg-prologue-buf cg))
+ (buf-drain! tb (cg-fn-buf cg))
+ ;; ret block
+ (buf-push! tb "::ret\n")
+ (cond
+ ((eq? (ctype-kind ret-type) 'void)
+ (buf-push! tb "%li(a0, 0)\n"))
+ (else
+ (buf-push! tb "%ld(a0, sp, ")
+ (buf-push! tb (%cg-slot-expr cg ret-slot))
+ (buf-push! tb ")\n")))
+ (buf-push! tb "})\n")
+ (cg-vstack-set! cg '())
+ (cg-frame-hi-set! cg 0)
+ (cg-max-outgoing-set! cg 0)
+ 0))
+
+;; --------------------------------------------------------------------
+;; Vstack
+;; --------------------------------------------------------------------
+(define (cg-push cg op)
+ (cg-vstack-set! cg (cons op (cg-vstack cg)))
+ op)
+
+(define (cg-pop cg)
+ (let ((s (cg-vstack cg)))
+ (cond ((null? s) (die #f "cg-pop: empty vstack"))
+ (else (cg-vstack-set! cg (cdr s)) (car s)))))
+
+(define (cg-top cg)
+ (let ((s (cg-vstack cg)))
+ (cond ((null? s) (die #f "cg-top: empty vstack")) (else (car s)))))
+
+(define (cg-depth cg) (length (cg-vstack cg)))
+
+;; Duplicate the top vstack entry. For lvals this is safe — the slot
+;; (or label, or indirect-marked frame) backing the lval keeps existing
+;; until the function ends. For rvals it duplicates the descriptor of
+;; the spilled value; both copies refer to the same already-emitted
+;; storage. CC-CONTRACTS §4.1: used for `lhs += rhs` and `++lhs` to
+;; preserve the lhs across a `cg-load` so the subsequent `cg-assign`
+;; still has its address.
+(define (cg-dup cg)
+ (let ((p (cg-top cg))) (cg-push cg p) p))
+
+;; --------------------------------------------------------------------
+;; Materialize
+;; --------------------------------------------------------------------
+(define (cg-push-imm cg ctype value)
+ (cg-push cg (%opnd 'imm ctype value #f)))
+
+(define (cg-push-string cg bv-content)
+ (let* ((label (cg-intern-string cg bv-content))
+ (cp-ty (%ctype 'ptr 8 8 %t-i8)))
+ (cg-push cg (%opnd 'global cp-ty label #f))))
+
+(define (cg-push-sym cg sym)
+ (pmatch sym
+ (($ sym? (kind fn) (type ,ty) (name ,nm))
+ (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #f)))
+ (($ sym? (kind enum-const) (type ,ty) (slot ,v))
+ (cg-push cg (%opnd 'imm ty v #f)))
+ (($ sym? (kind var) (storage extern) (type ,ty) (name ,nm))
+ (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #t)))
+ (($ sym? (kind var) (storage static) (type ,ty) (name ,nm))
+ (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #t)))
+ (($ sym? (kind var) (type ,ty) (slot ,off))
+ (cg-push cg (%opnd 'frame ty off #t)))
+ (($ sym? (kind param) (type ,ty) (slot ,off))
+ (cg-push cg (%opnd 'frame ty off #t)))
+ (else (die #f "cg-push-sym: unsupported sym-kind" (sym-kind sym)))))
+
+;; A cg-push-deref result is a frame-lval whose slot HOLDS THE ADDRESS
+;; (not the value). To distinguish from ordinary frame-lvals (whose
+;; slot holds the value directly), we tag indirect slots in
+;; %indirect-slots so cg-load and cg-assign can do the extra
+;; indirection.
+(define (%cg-mark-indirect! cg off)
+ (let ((cur (or (%cg-fn-get cg '%indirect-slots) '())))
+ (%cg-fn-set! cg '%indirect-slots (cons off cur))))
+
+(define (%cg-indirect? cg off)
+ (let ((cur (or (%cg-fn-get cg '%indirect-slots) '())))
+ (let loop ((xs cur))
+ (cond ((null? xs) #f) ((= (car xs) off) #t) (else (loop (cdr xs)))))))
+
+(define (cg-push-deref cg)
+ (let* ((p (cg-pop cg))
+ (pt (opnd-type p))
+ (pe (cond ((eq? (ctype-kind pt) 'ptr) (ctype-ext pt))
+ ((eq? (ctype-kind pt) 'arr) (car (ctype-ext pt)))
+ (else #f))))
+ (cond
+ ((not pe) (die #f "cg-push-deref: not a pointer" pt))
+ (else
+ (%cg-load-opnd-into cg p 't0)
+ (let ((off (cg-alloc-slot cg 8 8)))
+ (%cg-emit-st-slot cg 't0 off)
+ (%cg-mark-indirect! cg off)
+ (cg-push cg (%opnd 'frame pe off #t)))))))
+
+;; --------------------------------------------------------------------
+;; Aggregate field access (§D.1–D.4)
+;; --------------------------------------------------------------------
+;; cg-push-field cg fname:
+;; pop a struct/union lval; look up `fname` in the struct's fields
+;; list (data.scm: ext = (tag complete? fields), where each field
+;; is (name-bv ctype offset)); push a new lval at the field's
+;; offset with the field's ctype.
+;;
+;; Three input cases:
+;; - direct frame lval at slot `off` -> frame lval at off+fo
+;; - indirect frame lval (slot holds addr) -> new indirect slot for
+;; addr+fo
+;; - global lval at label L -> indirect slot for
+;; la(L)+fo
+;; In all cases the resulting lval has the field's ctype.
+
+(define (%cg-find-field fields fname)
+ (let loop ((xs fields))
+ (cond
+ ((null? xs) #f)
+ ((bv= (car (car xs)) fname) (car xs))
+ (else (loop (cdr xs))))))
+
+(define (cg-push-field cg fname)
+ (let* ((s (cg-pop cg))
+ (sty (opnd-type s))
+ (k (ctype-kind sty)))
+ (cond
+ ((not (or (eq? k 'struct) (eq? k 'union)))
+ (die #f "cg-push-field: not a struct/union" k))
+ ((not (opnd-lval? s))
+ (die #f "cg-push-field: not an lvalue" k))
+ (else
+ (let* ((fields (car (cddr (ctype-ext sty))))
+ (f (%cg-find-field fields fname)))
+ (cond
+ ((not f) (die #f "cg-push-field: no such field" fname))
+ (else
+ (let* ((fty (cadr f)) (fo (car (cddr f))))
+ (pmatch s
+ ;; direct frame lval: just shift the slot offset.
+ (($ opnd? (kind frame) (ext ,off))
+ (guard (not (%cg-indirect? cg off)))
+ (cg-push cg (%opnd 'frame fty (+ off fo) #t)))
+ ;; indirect frame lval: addr lives in the slot. Compute
+ ;; addr+fo into a new indirect slot.
+ (($ opnd? (kind frame) (ext ,off))
+ (%cg-emit-ld-slot cg 't0 off)
+ (cond
+ ((> fo 0)
+ (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n"))))
+ (let ((no (cg-alloc-slot cg 8 8)))
+ (%cg-emit-st-slot cg 't0 no)
+ (%cg-mark-indirect! cg no)
+ (cg-push cg (%opnd 'frame fty no #t))))
+ ;; global lval: load addr, add offset, indirect slot.
+ (($ opnd? (kind global) (ext ,lbl))
+ (%cg-emit-la cg 't0 lbl)
+ (cond
+ ((> fo 0)
+ (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n"))))
+ (let ((no (cg-alloc-slot cg 8 8)))
+ (%cg-emit-st-slot cg 't0 no)
+ (%cg-mark-indirect! cg no)
+ (cg-push cg (%opnd 'frame fty no #t))))
+ (else
+ (die #f "cg-push-field: unsupported lval kind"
+ (opnd-kind s))))))))))))
+
+;; cg-decay-array:
+;; if top of vstack is an arr-typed lval, replace it with a ptr-rval
+;; to the first element. C arrays decay to T* in most contexts;
+;; parse calls this before rval-style operations. No-op otherwise.
+(define (cg-decay-array cg)
+ (let ((tp (cg-top cg)))
+ (cond
+ ((and (opnd-lval? tp) (eq? (ctype-kind (opnd-type tp)) 'arr))
+ (let* ((p (cg-pop cg))
+ (et (car (ctype-ext (opnd-type p))))
+ (pty (%ctype 'ptr 8 8 et)))
+ (pmatch p
+ ;; direct frame lval: address is sp+off.
+ (($ opnd? (kind frame) (ext ,off))
+ (guard (not (%cg-indirect? cg off)))
+ (%cg-emit-many cg (list "%mov(t0, sp)\n"
+ "%addi(t0, t0, "
+ (%cg-slot-expr cg off) ")\n"))
+ (%cg-spill-reg cg 't0 pty))
+ ;; indirect frame lval (rare for arrays, but support it):
+ ;; the slot holds the address already.
+ (($ opnd? (kind frame) (ext ,off))
+ (%cg-emit-ld-slot cg 't0 off)
+ (%cg-spill-reg cg 't0 pty))
+ ;; global array: la(label) is the address.
+ (($ opnd? (kind global) (ext ,lbl))
+ (%cg-emit-la cg 't0 lbl)
+ (%cg-spill-reg cg 't0 pty))
+ (else (die #f "cg-decay-array: unsupported lval kind"
+ (opnd-kind p))))))
+ (else tp))))
+
+;; --------------------------------------------------------------------
+;; Address & deref
+;; --------------------------------------------------------------------
+(define (cg-take-addr cg)
+ (let* ((p (cg-pop cg))
+ (ty (opnd-type p))
+ ;; &arr yields T(*)[N] per strict C. Pointer arithmetic on
+ ;; the result scales by sizeof(T[N]) (the whole array), so
+ ;; &arr + 1 is one-past-end. Array-to-pointer decay happens
+ ;; on use via cg-decay-array, not at the & operator.
+ (pty (%ctype 'ptr 8 8 ty)))
+ (pmatch p
+ (($ opnd? (lval? #f)) (die #f "cg-take-addr: not an lvalue"))
+ ;; The address itself lives at sp+slot — &*p degenerates to p.
+ (($ opnd? (kind frame) (ext ,off))
+ (guard (%cg-indirect? cg off))
+ (%cg-emit-ld-slot cg 't0 off)
+ (%cg-spill-reg cg 't0 pty))
+ ;; %mov(rd, sp) gives the portable-sp pointer (the backend
+ ;; handles any hidden frame-header offset). Then add slot.
+ (($ opnd? (kind frame) (ext ,off))
+ (%cg-emit-many cg (list "%mov(t0, sp)\n"
+ "%addi(t0, t0, "
+ (%cg-slot-expr cg off) ")\n"))
+ (%cg-spill-reg cg 't0 pty))
+ (($ opnd? (kind global) (ext ,lbl))
+ (%cg-emit-la cg 't0 lbl)
+ (%cg-spill-reg cg 't0 pty))
+ (else (die #f "cg-take-addr: non-addressable" (opnd-kind p))))))
+
+(define (cg-load cg)
+ (let* ((p (cg-pop cg)) (ty (opnd-type p)))
+ (cond
+ ((not (opnd-lval? p)) (die #f "cg-load: not an lvalue"))
+ ;; Array lvalues decay to a ptr-rval addressing the first
+ ;; element (C array-to-pointer decay). We push the lval back
+ ;; and route through cg-decay-array for a single source of truth.
+ ((eq? (ctype-kind ty) 'arr)
+ (cg-push cg p) (cg-decay-array cg))
+ ((and (eq? (opnd-kind p) 'frame)
+ (%cg-indirect? cg (opnd-ext p)))
+ ;; Indirect frame-lval: slot holds the address. Stage the
+ ;; address in t2 so multi-byte gathers don't alias dest with
+ ;; base.
+ (%cg-emit-ld-slot cg 't2 (opnd-ext p))
+ (%cg-emit-ld-typed cg 't0 ty 't2 0)
+ (%cg-spill-reg cg 't0 ty))
+ (else (%cg-load-opnd-into cg p 't0) (%cg-spill-reg cg 't0 ty)))))
+
+;; --------------------------------------------------------------------
+;; Type conversions
+;; --------------------------------------------------------------------
+(define (cg-cast cg to-type)
+ (let* ((p (cg-pop cg))
+ (from-ty (opnd-type p))
+ (from-sz (%ctype-size from-ty))
+ (to-sz (%ctype-size to-type))
+ (to-kind (ctype-kind to-type)))
+ (cond
+ ((eq? to-kind 'bool)
+ (%cg-load-opnd-into cg p 't0)
+ (%cg-emit-many cg (list
+ "%ifelse_eqz(t0, { %li(t0, 0) }, { %li(t0, 1) })\n"))
+ (%cg-spill-reg cg 't0 to-type))
+ ((or (eq? to-kind 'ptr)
+ (and (or (eq? to-kind 'i64) (eq? to-kind 'u64))
+ (or (eq? (ctype-kind from-ty) 'ptr)
+ (eq? (ctype-kind from-ty) 'arr))))
+ (cg-push cg (%opnd (opnd-kind p) to-type (opnd-ext p) (opnd-lval? p))))
+ ((>= to-sz from-sz)
+ (cg-push cg (%opnd (opnd-kind p) to-type (opnd-ext p) (opnd-lval? p))))
+ (else
+ ;; Narrowing cast. Signed targets (i8/i16/i32) shli/sari to
+ ;; truncate-and-sign-extend in one step, so the slot holds the
+ ;; canonical 64-bit form and a subsequent widening cast (which
+ ;; is relabel-only) restores the value. Unsigned targets mask
+ ;; off high bits to zero-extend.
+ (%cg-load-opnd-into cg p 't0)
+ (cond
+ ((eq? to-kind 'i8) (%cg-emit-sext cg 't0 56))
+ ((eq? to-kind 'i16) (%cg-emit-sext cg 't0 48))
+ ((eq? to-kind 'i32) (%cg-emit-sext cg 't0 32))
+ ((= to-sz 1) (%cg-emit-many cg (list "%andi(t0, t0, 255)\n")))
+ ((= to-sz 2)
+ (%cg-emit-many cg (list "%li(t1, 65535)\n%and(t0, t0, t1)\n")))
+ ((= to-sz 4)
+ (%cg-emit-many cg (list "%li(t1, 4294967295)\n%and(t0, t0, t1)\n")))
+ (else 0))
+ (%cg-spill-reg cg 't0 to-type)))))
+
+(define (cg-promote cg)
+ (let* ((p (cg-pop cg))
+ (ty (opnd-type p))
+ (sz (%ctype-size ty)))
+ (cond
+ ((< sz 4)
+ (cond
+ ((%ctype-unsigned? ty)
+ (cg-push cg (%opnd (opnd-kind p) %t-u32 (opnd-ext p) (opnd-lval? p))))
+ (else
+ (cg-push cg (%opnd (opnd-kind p) %t-i32 (opnd-ext p) (opnd-lval? p))))))
+ (else (cg-push cg p)))))
+
+(define (cg-arith-conv cg)
+ ;; Usual arithmetic conversions. CC-CONTRACTS §4.2: applies to
+ ;; arithmetic operands. When either operand is a pointer (or array,
+ ;; which behaves as a pointer in arithmetic), the pair is a
+ ;; pointer-arith case — leave the types alone so cg-binop can detect
+ ;; the ptr operand and apply the right scaling.
+ (let* ((b (cg-pop cg))
+ (a (cg-pop cg))
+ (ta (opnd-type a))
+ (tb (opnd-type b))
+ (sa (%ctype-size ta))
+ (sb (%ctype-size tb)))
+ (cond
+ ;; Pointer/array arithmetic: leave types alone so cg-binop's
+ ;; ptr-aware add/sub branch fires with the correct pointee type
+ ;; (and doesn't see two pointers, which would skip scaling).
+ ((or (%ctype-ptr? ta) (%ctype-ptr? tb))
+ (cg-push cg a)
+ (cg-push cg b))
+ (else
+ (let ((common (cond
+ ((> sa sb) ta)
+ ((> sb sa) tb)
+ ((%ctype-unsigned? ta) ta)
+ ((%ctype-unsigned? tb) tb)
+ (else ta))))
+ (cg-push cg (%opnd (opnd-kind a) common (opnd-ext a) (opnd-lval? a)))
+ (cg-push cg (%opnd (opnd-kind b) common (opnd-ext b) (opnd-lval? b))))))))
+
+;; --------------------------------------------------------------------
+;; Operators
+;; --------------------------------------------------------------------
+(define (%cg-emit-rrr cg op rd ra rb)
+ (%cg-emit-many cg (list "%" op "(" (%cg-reg->bv rd) ", "
+ (%cg-reg->bv ra) ", " (%cg-reg->bv rb) ")\n")))
+
+(define (%cg-emit-cmp cg cc ra rb rd)
+ (%cg-emit-many cg (list "%ifelse_" cc "("
+ (%cg-reg->bv ra) ", " (%cg-reg->bv rb)
+ ", { %li(" (%cg-reg->bv rd) ", 1) }, "
+ "{ %li(" (%cg-reg->bv rd) ", 0) })\n")))
+
+(define (cg-binop cg op)
+ (let* ((b (cg-pop cg))
+ (a (cg-pop cg))
+ (ta (opnd-type a))
+ (tb (opnd-type b))
+ (unsigned? (or (%ctype-unsigned? ta) (%ctype-unsigned? tb)))
+ (a-ptr? (%ctype-ptr? ta))
+ (b-ptr? (%ctype-ptr? tb))
+ (result-ty
+ (cond
+ ((or (eq? op 'eq) (eq? op 'ne)
+ (eq? op 'lt) (eq? op 'le) (eq? op 'gt) (eq? op 'ge))
+ %t-i32)
+ ((and a-ptr? b-ptr? (eq? op 'sub)) %t-i64)
+ (a-ptr? ta)
+ (b-ptr? tb)
+ (else ta))))
+ (cond
+ ((and a-ptr? (or (eq? op 'add) (eq? op 'sub)) (not b-ptr?))
+ (%cg-load-opnd-into cg a 'a0)
+ (%cg-load-opnd-into cg b 'a1)
+ (let ((sz (%ctype-size (%ctype-pointee ta))))
+ (cond ((> sz 1) (%cg-emit-many cg (list "%li(t0, " (%n sz) ")\n"))
+ (%cg-emit-rrr cg "mul" 'a1 'a1 't0))
+ (else 0)))
+ (%cg-emit-rrr cg (if (eq? op 'add) "add" "sub") 't0 'a0 'a1)
+ (%cg-spill-reg cg 't0 result-ty))
+ ((and b-ptr? (eq? op 'add) (not a-ptr?))
+ (%cg-load-opnd-into cg a 'a0)
+ (%cg-load-opnd-into cg b 'a1)
+ (let ((sz (%ctype-size (%ctype-pointee tb))))
+ (cond ((> sz 1) (%cg-emit-many cg (list "%li(t0, " (%n sz) ")\n"))
+ (%cg-emit-rrr cg "mul" 'a0 'a0 't0))
+ (else 0)))
+ (%cg-emit-rrr cg "add" 't0 'a0 'a1)
+ (%cg-spill-reg cg 't0 result-ty))
+ ((and a-ptr? b-ptr? (eq? op 'sub))
+ (%cg-load-opnd-into cg a 'a0)
+ (%cg-load-opnd-into cg b 'a1)
+ (%cg-emit-rrr cg "sub" 't0 'a0 'a1)
+ (let ((sz (%ctype-size (%ctype-pointee ta))))
+ (cond ((> sz 1) (%cg-emit-many cg (list "%li(t1, " (%n sz) ")\n"))
+ (%cg-emit-rrr cg "div" 't0 't0 't1))
+ (else 0)))
+ (%cg-spill-reg cg 't0 result-ty))
+ (else
+ (%cg-load-opnd-into cg a 'a0)
+ (%cg-load-opnd-into cg b 'a1)
+ (cond
+ ((eq? op 'add) (%cg-emit-rrr cg "add" 't0 'a0 'a1))
+ ((eq? op 'sub) (%cg-emit-rrr cg "sub" 't0 'a0 'a1))
+ ((eq? op 'mul) (%cg-emit-rrr cg "mul" 't0 'a0 'a1))
+ ((eq? op 'and) (%cg-emit-rrr cg "and" 't0 'a0 'a1))
+ ((eq? op 'or) (%cg-emit-rrr cg "or" 't0 'a0 'a1))
+ ((eq? op 'xor) (%cg-emit-rrr cg "xor" 't0 'a0 'a1))
+ ((eq? op 'shl) (%cg-emit-rrr cg "shl" 't0 'a0 'a1))
+ ((eq? op 'shr)
+ (if unsigned? (%cg-emit-rrr cg "shr" 't0 'a0 'a1)
+ (%cg-emit-rrr cg "sar" 't0 'a0 'a1)))
+ ((eq? op 'div) (%cg-emit-rrr cg "div" 't0 'a0 'a1))
+ ((eq? op 'rem) (%cg-emit-rrr cg "rem" 't0 'a0 'a1))
+ ((eq? op 'eq) (%cg-emit-cmp cg "eq" 'a0 'a1 't0))
+ ((eq? op 'ne) (%cg-emit-cmp cg "ne" 'a0 'a1 't0))
+ ((eq? op 'lt) (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a0 'a1 't0))
+ ((eq? op 'gt) (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a1 'a0 't0))
+ ((eq? op 'le)
+ (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a1 'a0 't0)
+ (%cg-emit-many cg (list "%li(t1, 1)\n%xor(t0, t0, t1)\n")))
+ ((eq? op 'ge)
+ (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a0 'a1 't0)
+ (%cg-emit-many cg (list "%li(t1, 1)\n%xor(t0, t0, t1)\n")))
+ (else (die #f "cg-binop: unknown op" op)))
+ (%cg-spill-reg cg 't0 result-ty)))))
+
+;; Post-increment / post-decrement on the top-of-vstack lval.
+;; Pushes the OLD value (per C semantics) and emits the +1 / -1 store.
+;; Uses cg-dup + cg-load to capture the old rval (which is then in a
+;; never-reused spill slot), then runs the regular dup+load+add+assign
+;; pattern for the store. Pointer scaling falls out of cg-binop add.
+(define (%cg-post-inc-dec cg op)
+ (cg-dup cg)
+ (cg-load cg)
+ (let ((old (cg-pop cg)))
+ (cg-dup cg)
+ (cg-load cg)
+ (cg-push-imm cg %t-i32 1)
+ (cg-binop cg op)
+ (cg-assign cg)
+ (cg-pop cg)
+ (cg-push cg old)))
+
+(define (cg-postinc cg) (%cg-post-inc-dec cg 'add))
+(define (cg-postdec cg) (%cg-post-inc-dec cg 'sub))
+
+(define (cg-unop cg op)
+ (let* ((p (cg-pop cg)) (ty (opnd-type p)))
+ (%cg-load-opnd-into cg p 't0)
+ (cond
+ ((eq? op 'neg)
+ (%cg-emit-many cg (list "%li(t1, 0)\n%sub(t0, t1, t0)\n"))
+ (%cg-spill-reg cg 't0 ty))
+ ((eq? op 'bnot)
+ (%cg-emit-many cg (list "%li(t1, -1)\n%xor(t0, t0, t1)\n"))
+ (%cg-spill-reg cg 't0 ty))
+ ((eq? op 'lnot)
+ (%cg-emit-many cg (list "%ifelse_eqz(t0, { %li(t0, 1) }, { %li(t0, 0) })\n"))
+ (%cg-spill-reg cg 't0 %t-i32))
+ (else (die #f "cg-unop: unknown op" op)))))
+
+(define (cg-assign cg)
+ ;; Pops rhs, pops lhs, casts rhs to lhs's type (parser cannot peek
+ ;; deeper than vstack top to do this itself — CC-CONTRACTS §4.2),
+ ;; emits the store, pushes the assigned value as the result rval.
+ (let* ((rhs0 (cg-pop cg))
+ (lhs (cg-pop cg))
+ (ty (opnd-type lhs)))
+ (cond ((not (opnd-lval? lhs)) (die #f "cg-assign: lhs not lvalue")))
+ ;; Cast rhs to lhs's type (no-op when the types already match).
+ (cg-push cg rhs0)
+ (cg-cast cg ty)
+ (let ((rhs (cg-pop cg)))
+ (%cg-load-opnd-into cg rhs 'a0)
+ (pmatch lhs
+ (($ opnd? (kind frame) (ext ,off))
+ (guard (%cg-indirect? cg off))
+ (%cg-emit-ld-slot cg 't0 off)
+ (%cg-emit-st-typed cg 'a0 ty 't0 0))
+ (($ opnd? (kind frame) (ext ,off))
+ (%cg-emit-st-slot-typed cg 'a0 ty off))
+ (($ opnd? (kind global) (ext ,lbl))
+ (%cg-emit-la cg 't0 lbl)
+ (%cg-emit-st-typed cg 'a0 ty 't0 0))
+ (else (die #f "cg-assign: unsupported lhs kind" (opnd-kind lhs))))
+ (%cg-spill-reg cg 'a0 ty))))
+
+;; --------------------------------------------------------------------
+;; Calls
+;; --------------------------------------------------------------------
+(define (cg-call cg arity has-result?)
+ (let* ((args (let loop ((i 0) (acc '()))
+ (cond ((= i arity) acc)
+ (else (loop (+ i 1) (cons (cg-pop cg) acc))))))
+ (fn-op (cg-pop cg)))
+ (let stage ((xs args) (idx 0))
+ (cond
+ ((null? xs) 0)
+ ((< idx 4)
+ (%cg-load-opnd-into cg (car xs) (%reg-by-idx idx))
+ (stage (cdr xs) (+ idx 1)))
+ (else
+ (%cg-load-opnd-into cg (car xs) 't0)
+ (%cg-emit-st cg 't0 'sp (* 8 (- idx 4)))
+ (stage (cdr xs) (+ idx 1)))))
+ (cond ((> arity 4) (%cg-bump-outgoing! cg (- arity 4))) (else 0))
+ (cond
+ ((and (eq? (opnd-kind fn-op) 'global) (not (opnd-lval? fn-op)))
+ (%cg-emit-many cg (list "%call(&" (opnd-ext fn-op) ")\n")))
+ (else
+ (%cg-load-opnd-into cg fn-op 't0)
+ (%cg-emit-many cg (list "%callr(t0)\n"))))
+ (cond
+ (has-result?
+ (let* ((fty (opnd-type fn-op))
+ (rty (cond
+ ((eq? (ctype-kind fty) 'fn) (car (ctype-ext fty)))
+ ((eq? (ctype-kind fty) 'ptr)
+ (let ((p (ctype-ext fty)))
+ (if (eq? (ctype-kind p) 'fn) (car (ctype-ext p)) %t-i64)))
+ (else %t-i64))))
+ (%cg-spill-reg cg 'a0 rty)))
+ (else #f))))
+
+;; --------------------------------------------------------------------
+;; Return
+;; --------------------------------------------------------------------
+(define (cg-return cg)
+ (let ((ret-slot (%cg-fn-get cg '%fn-ret-slot))
+ (ret-type (%cg-fn-get cg '%fn-ret-type)))
+ (cond
+ ((eq? (ctype-kind ret-type) 'void)
+ (%cg-emit-many cg (list "%b(&::ret)\n")))
+ (else
+ (let ((p (cg-pop cg)))
+ (%cg-load-opnd-into cg p 'a0)
+ (%cg-emit-st-slot cg 'a0 ret-slot)
+ (%cg-emit-many cg (list "%b(&::ret)\n")))))))
+
+;; --------------------------------------------------------------------
+;; Structured control flow
+;; --------------------------------------------------------------------
+(define (cg-if cg then-thunk)
+ (let ((p (cg-pop cg)))
+ (%cg-load-opnd-into cg p 't0)
+ (%cg-emit-many cg (list "%if_nez(t0, {\n"))
+ (then-thunk)
+ (%cg-emit-many cg (list "})\n"))))
+
+(define (cg-ifelse cg then-thunk else-thunk)
+ (let ((p (cg-pop cg)))
+ (%cg-load-opnd-into cg p 't0)
+ (%cg-emit-many cg (list "%ifelse_nez(t0, {\n"))
+ (then-thunk)
+ (%cg-emit-many cg (list "}, {\n"))
+ (else-thunk)
+ (%cg-emit-many cg (list "})\n"))))
+
+;; Conditionals-as-values: `cg-ifelse` is correct for if-statements
+;; (thunks push nothing) but each thunk for ternary / `&&` / `||` ends
+;; with one rval on top of the vstack — and after both branches run,
+;; we'd be left with TWO opnds, which breaks the type contract for
+;; the surrounding expression. `cg-ifelse-merge` solves that: pop the
+;; cond, allocate one result slot, and after each thunk runs, pop its
+;; rval and store into the slot. Push the slot as one frame rval.
+;; Both branches must push exactly one opnd; the result type is the
+;; type of the first thunk's pushed opnd (parser must arrange for
+;; both branches to push compatible types — either by passing
+;; pre-coerced operands or by injecting a `cg-cast` inside the thunk).
+(define (cg-ifelse-merge cg then-thunk else-thunk)
+ (let* ((cond-op (cg-pop cg))
+ (slot (cg-alloc-slot cg 8 8)))
+ (%cg-load-opnd-into cg cond-op 't0)
+ (%cg-emit-many cg (list "%ifelse_nez(t0, {\n"))
+ (then-thunk)
+ (let* ((p (cg-pop cg))
+ (rty (opnd-type p)))
+ (%cg-load-opnd-into cg p 'a0)
+ (%cg-emit-st-slot cg 'a0 slot)
+ (%cg-emit-many cg (list "}, {\n"))
+ (else-thunk)
+ (let ((q (cg-pop cg)))
+ (%cg-load-opnd-into cg q 'a0)
+ (%cg-emit-st-slot cg 'a0 slot))
+ (%cg-emit-many cg (list "})\n"))
+ (cg-push cg (%opnd 'frame rty slot #f)))))
+
+(define (cg-loop cg head-thunk body-thunk)
+ ;; body-thunk receives the loop tag as its argument; parser uses
+ ;; that tag for cg-break / cg-continue inside the body. CC-CONTRACTS
+ ;; §1.9 / §3.3.
+ (let ((tag (%cg-fresh-loop-tag cg)))
+ (%cg-emit-many cg (list "%loop_tag(" tag ", {\n"))
+ (head-thunk)
+ (cond
+ ((zero? (cg-depth cg)) 0)
+ (else
+ (let ((c (cg-pop cg)))
+ (%cg-load-opnd-into cg c 't0)
+ (%cg-emit-many cg (list "%if_eqz(t0, { %break(" tag ") })\n")))))
+ (body-thunk tag)
+ (%cg-emit-many cg (list "})\n"))
+ tag))
+
+(define (cg-break cg tag)
+ (%cg-emit-many cg (list "%break(" tag ")\n")))
+
+(define (cg-continue cg tag)
+ (%cg-emit-many cg (list "%continue(" tag ")\n")))
+
+;; --------------------------------------------------------------------
+;; Variadic receive (§G.2). Layout: cg-fn-begin/v reserves a 4-slot
+;; saved-register area at known frame offsets; va_start sets ap to the
+;; address of the first slot past the named-arg count; va_arg reads
+;; *ap, advances ap by 8, and pushes the value as the requested type.
+;;
+;; ap is an lval (typically a `va_list` local). cg-va-start pops it,
+;; computes the address, stores into *ap (or the slot directly), and
+;; pushes nothing. cg-va-arg pops ap-lval, loads ap, dereferences for
+;; the value, advances ap, stores back, pushes the loaded value.
+;;
+;; Limitation: only first 4 incoming args (named + variadic) live in
+;; the save area; variadic args at index >= 4 need LDARG and are not
+;; yet supported. See punchlist §G.2 for the gap.
+;; --------------------------------------------------------------------
+(define (%cg-vararg-first-slot cg)
+ (let ((s (%cg-fn-get cg '%fn-vararg-first-slot)))
+ (cond ((not s) (die #f "cg-va-start: not a variadic function"))
+ (else s))))
+
+(define (cg-va-start cg)
+ ;; Pop ap-lval. Materialize "&sp + vararg-first-slot" into a0,
+ ;; store through ap-lval. Pushes nothing.
+ (let* ((ap-lv (cg-pop cg))
+ (vsl (%cg-vararg-first-slot cg)))
+ (cond ((not (opnd-lval? ap-lv))
+ (die #f "cg-va-start: ap not lvalue")))
+ ;; Compute address into a0.
+ (%cg-emit-many cg (list "%mov(a0, sp)\n"
+ "%addi(a0, a0, "
+ (%cg-slot-expr cg vsl) ")\n"))
+ ;; Store a0 at ap-lval.
+ (cond
+ ((eq? (opnd-kind ap-lv) 'frame)
+ (cond
+ ((%cg-indirect? cg (opnd-ext ap-lv))
+ (%cg-emit-ld-slot cg 't0 (opnd-ext ap-lv))
+ (%cg-emit-st cg 'a0 't0 0))
+ (else (%cg-emit-st-slot cg 'a0 (opnd-ext ap-lv)))))
+ ((eq? (opnd-kind ap-lv) 'global)
+ (%cg-emit-la cg 't0 (opnd-ext ap-lv))
+ (%cg-emit-st cg 'a0 't0 0))
+ (else (die #f "cg-va-start: bad ap kind" (opnd-kind ap-lv))))))
+
+(define (cg-va-arg cg ctype)
+ ;; Pop ap-lval. Load ap into a0. Read 8 bytes at [a0] into a1.
+ ;; Advance a0 by 8 and store back through ap-lval. Push a1 as rval
+ ;; of type ctype (caller cg-cast's if needed).
+ (let ((ap-lv (cg-pop cg)))
+ (cond ((not (opnd-lval? ap-lv))
+ (die #f "cg-va-arg: ap not lvalue")))
+ ;; Load ap into a0.
+ (pmatch ap-lv
+ (($ opnd? (kind frame) (ext ,off))
+ (guard (%cg-indirect? cg off))
+ (%cg-emit-ld-slot cg 't0 off)
+ (%cg-emit-ld cg 'a0 't0 0))
+ (($ opnd? (kind frame) (ext ,off))
+ (%cg-emit-ld-slot cg 'a0 off))
+ (($ opnd? (kind global) (ext ,lbl))
+ (%cg-emit-la cg 't0 lbl)
+ (%cg-emit-ld cg 'a0 't0 0))
+ (else (die #f "cg-va-arg: bad ap kind" (opnd-kind ap-lv))))
+ ;; Load value at [a0] into a1 (full 8 bytes; cg-cast on the rval
+ ;; the caller pushes will narrow if needed).
+ (%cg-emit-ld cg 'a1 'a0 0)
+ ;; Advance ap by 8.
+ (%cg-emit-many cg (list "%addi(a0, a0, 8)\n"))
+ ;; Store advanced ap back.
+ (pmatch ap-lv
+ (($ opnd? (kind frame) (ext ,off))
+ (guard (%cg-indirect? cg off))
+ (%cg-emit-ld-slot cg 't0 off)
+ (%cg-emit-st cg 'a0 't0 0))
+ (($ opnd? (kind frame) (ext ,off))
+ (%cg-emit-st-slot cg 'a0 off))
+ (($ opnd? (kind global) (ext ,lbl))
+ (%cg-emit-la cg 't0 lbl)
+ (%cg-emit-st cg 'a0 't0 0))
+ (else 0))
+ ;; Spill the loaded value (a1) to a fresh frame slot under ctype.
+ (%cg-spill-reg cg 'a1 ctype)))
+
+(define (cg-va-end cg)
+ ;; va_end is a no-op in this design. Pop and discard ap-lval.
+ (cg-pop cg)
+ 0)
+
+;; --------------------------------------------------------------------
+;; Labels and unconditional goto (§F.4 / CC-CONTRACTS §5.3).
+;; user_<name> namespace keeps the user's label space disjoint from
+;; the compiler-internal ::ret and ::lbl_<n>. Labels resolve through
+;; libp1pp's %scope mechanism, so forward references inside the same
+;; %fn block work without explicit forward declaration.
+;; --------------------------------------------------------------------
+(define (cg-emit-label cg name-bv)
+ (%cg-emit-many cg (list "::user_" name-bv "\n")))
+
+(define (cg-goto cg name-bv)
+ (%cg-emit-many cg (list "%b(&::user_" name-bv ")\n")))
+
+;; --------------------------------------------------------------------
+;; switch
+;; --------------------------------------------------------------------
+(define-record-type swctx
+ (%swctx ctrl-slot end-tag default-lbl)
+ swctx?
+ (ctrl-slot swctx-ctrl-slot)
+ (end-tag swctx-end-tag)
+ (default-lbl swctx-default-lbl swctx-default-lbl-set!))
+
+(define (cg-switch-begin cg)
+ (let* ((p (cg-pop cg))
+ (off (cg-alloc-slot cg 8 8))
+ (tag (%cg-fresh-loop-tag cg))
+ (disp-lbl (bytevector-append "sw_disp_" tag)))
+ (%cg-load-opnd-into cg p 't0)
+ (%cg-emit-st-slot cg 't0 off)
+ (%cg-emit-many cg (list "%loop_tag(" tag ", {\n"
+ "%b(&::" disp-lbl ")\n"))
+ (%swctx off tag #f)))
+
+(define (cg-switch-case cg sw const-int)
+ (let* ((lbl (%cg-fresh-lbl cg))
+ (key (string->symbol
+ (bytevector-append "%sw_cases__" (swctx-end-tag sw))))
+ (cur (or (%cg-fn-get cg key) '()))
+ (entry (cons const-int lbl)))
+ (%cg-fn-set! cg key (cons entry cur))
+ (%cg-emit-many cg (list "::" lbl "\n"))))
+
+(define (cg-switch-default cg sw)
+ (let ((lbl (%cg-fresh-lbl cg)))
+ (swctx-default-lbl-set! sw lbl)
+ (%cg-emit-many cg (list "::" lbl "\n"))))
+
+(define (cg-switch-end cg sw)
+ (let* ((tag (swctx-end-tag sw))
+ (key (string->symbol (bytevector-append "%sw_cases__" tag)))
+ (cases (reverse (or (%cg-fn-get cg key) '())))
+ (default-lbl (swctx-default-lbl sw))
+ (disp-lbl (bytevector-append "sw_disp_" tag)))
+ (%cg-emit-many cg (list "%break(" tag ")\n"
+ "::" disp-lbl "\n"))
+ (%cg-emit-many cg (list "%ld(t0, sp, "
+ (%cg-slot-expr cg (swctx-ctrl-slot sw)) ")\n"))
+ (for-each
+ (lambda (c)
+ (%cg-emit-many cg (list "%li(t1, " (%n (car c)) ")\n"
+ "%beq(t0, t1, &::" (cdr c) ")\n")))
+ cases)
+ (cond
+ (default-lbl (%cg-emit-many cg (list "%b(&::" default-lbl ")\n")))
+ (else 0))
+ (%cg-emit-many cg (list "%break(" tag ")\n"
+ "})\n"))))
+
+;; --------------------------------------------------------------------
+;; Globals and data
+;; --------------------------------------------------------------------
+;; cg-emit-global: emit a global symbol into either .data (initialized)
+;; or .bss (zero-init).
+;;
+;; init can be:
+;; #f — zero-init in .bss (size from sym's ctype).
+;; (piece ...) — initialized in .data; pieces concatenated.
+;;
+;; Each piece is either:
+;; <bytevector> — raw bytes; emitted as N×!(byte) entries.
+;; (label-ref . <label-bv>) — 8-byte pointer slot containing &label;
+;; emitted as `&<label> %(0)` (4B label ref +
+;; 4B zero pad).
+(define (%cg-init-piece->bv piece)
+ (cond
+ ((bytevector? piece)
+ (let ((n (bytevector-length piece)))
+ (let loop ((i 0) (acc '()))
+ (cond
+ ((= i n) (bv-cat (reverse acc)))
+ (else
+ (loop (+ i 1)
+ (cons (bv-cat (list "!("
+ (number->string
+ (bytevector-u8-ref piece i) 10)
+ ")\n"))
+ acc)))))))
+ ((and (pair? piece) (eq? (car piece) 'label-ref))
+ (bv-cat (list "&" (cdr piece) " %(0)\n")))
+ (else (die #f "cg-emit-global: bad init piece" piece))))
+
+(define (cg-emit-global cg sym init)
+ (let* ((nm (sym-name sym))
+ (lbl (%cg-mangle-global nm))
+ (sz (ctype-size (sym-type sym)))
+ (size (if (< sz 0) 8 sz)))
+ (cond
+ (init
+ (buf-push! (cg-data cg) (bv-cat (list "\n:" lbl "\n")))
+ (let walk ((ps init))
+ (cond
+ ((null? ps) 0)
+ (else
+ (buf-push! (cg-data cg) (%cg-init-piece->bv (car ps)))
+ (walk (cdr ps))))))
+ (else
+ (buf-push! (cg-bss cg)
+ (bv-cat (list "\n:" lbl "\n"
+ (let zero-loop ((rem size) (acc '()))
+ (cond
+ ((<= rem 0) (bv-cat (reverse acc)))
+ ((>= rem 8)
+ (zero-loop (- rem 8) (cons "$(0)\n" acc)))
+ (else
+ (zero-loop (- rem 1) (cons "!(0)\n" acc))))))))))
+ (cg-globals-set! cg (alist-set (sym-name sym) sym (cg-globals cg)))
+ 0))
+
+(define (cg-emit-extern cg sym)
+ (cg-globals-set! cg (alist-set (sym-name sym) sym (cg-globals cg)))
+ 0)
+
+(define (cg-intern-string cg bv-content)
+ (let ((p (alist-ref bv-content (cg-str-pool cg))))
+ (cond
+ (p p)
+ (else
+ (let* ((n (length (cg-str-pool cg)))
+ (lbl (bytevector-append "cc__str_" (%n n))))
+ (cg-str-pool-set! cg
+ (alist-set bv-content lbl (cg-str-pool cg)))
+ (buf-push! (cg-data cg)
+ (bv-cat (list "\n:" lbl "\n"
+ "\"" bv-content "\"\n"
+ "!(0)\n")))
+ lbl)))))
+
+;; --------------------------------------------------------------------
+;; Frame
+;; --------------------------------------------------------------------
+(define (cg-alloc-slot cg bytes align)
+ (let* ((aligned (align-up (cg-frame-hi cg) align))
+ (new-hi (+ aligned bytes)))
+ (cg-frame-hi-set! cg new-hi)
+ aligned))
+;; cc/parse.scm — recursive-descent + Pratt parser. Minimal scheme1.
+
+(define (make-pstate toks cg)
+ (%pstate toks (list '()) (list '()) '() #f '() cg))
+
+(define (peek ps) (car (ps-toks ps)))
+(define (peek2 ps)
+ (let ((r (cdr (ps-toks ps))))
+ (if (null? r) (car (ps-toks ps)) (car r))))
+(define (advance ps)
+ (let ((t (peek ps))) (ps-toks-set! ps (cdr (ps-toks ps))) t))
+(define (at-kw? ps s)
+ (pmatch (peek ps)
+ (($ tok? (kind KW) (value ,v)) (eq? v s))
+ (else #f)))
+(define (at-punct? ps s)
+ (pmatch (peek ps)
+ (($ tok? (kind PUNCT) (value ,v)) (eq? v s))
+ (else #f)))
+(define (expect-kw ps s)
+ (let ((t (peek ps)))
+ (pmatch t
+ (($ tok? (kind KW) (value ,v)) (guard (eq? v s)) (advance ps))
+ (else (die (tok-loc t) "expected kw" s)))))
+(define (expect-punct ps s)
+ (let ((t (peek ps)))
+ (pmatch t
+ (($ tok? (kind PUNCT) (value ,v)) (guard (eq? v s)) (advance ps))
+ (else (die (tok-loc t) "expected punct" s)))))
+
+(define (scope-enter! ps)
+ (ps-scope-set! ps (cons '() (ps-scope ps)))
+ (ps-tags-set! ps (cons '() (ps-tags ps))))
+(define (scope-leave! ps)
+ (ps-scope-set! ps (cdr (ps-scope ps)))
+ (ps-tags-set! ps (cdr (ps-tags ps))))
+(define (scope-bind! ps n s)
+ (let* ((f (ps-scope ps)) (top (car f)) (r (cdr f)))
+ (if (alist-ref n top) (die #f "dup decl" n)
+ (ps-scope-set! ps (cons (alist-set n s top) r)))))
+(define (scope-lookup ps n)
+ (let loop ((f (ps-scope ps)))
+ (cond ((null? f) #f)
+ (else
+ (let ((v (alist-ref n (car f))))
+ (if v v (loop (cdr f))))))))
+(define (tag-bind! ps n c)
+ (let* ((f (ps-tags ps)) (top (car f)) (r (cdr f)))
+ (ps-tags-set! ps (cons (alist-set n c top) r))))
+(define (tag-lookup ps n)
+ (let loop ((f (ps-tags ps)))
+ (cond ((null? f) #f)
+ (else (let ((v (alist-ref n (car f))))
+ (if v v (loop (cdr f))))))))
+(define (typedef-add! ps n)
+ (ps-typedefs-set! ps (alist-set n #t (ps-typedefs ps))))
+(define (typedef? ps n)
+ (if (alist-ref n (ps-typedefs ps)) #t #f))
+
+(define (%mk-ptr p) (%ctype 'ptr 8 8 p))
+(define (%mk-arr e n)
+ (%ctype 'arr (if (< n 0) -1 (* n (ctype-size e)))
+ (ctype-align e) (cons e n)))
+(define (%mk-fn r p v) (%ctype 'fn -1 -1 (list r p v)))
+(define (ctype-is-ptr? t) (eq? (ctype-kind t) 'ptr))
+(define (ctype-is-fn? t) (eq? (ctype-kind t) 'fn))
+(define (ctype-is-arr? t) (eq? (ctype-kind t) 'arr))
+
+(define (eat-cv-quals! ps)
+ (cond ((or (at-kw? ps 'const) (at-kw? ps 'volatile)
+ (at-kw? ps 'restrict))
+ (advance ps) (eat-cv-quals! ps))
+ (else #t)))
+
+(define (parse-decl-spec ps)
+ (let loop ((sto #f) (sn #f) (lg 0) (b #f) (saw #f))
+ (let ((t (peek ps)))
+ (cond
+ ((or (at-kw? ps 'auto) (at-kw? ps 'register))
+ (advance ps) (loop sto sn lg b #t))
+ ((at-kw? ps 'static) (advance ps) (loop 'static sn lg b #t))
+ ((at-kw? ps 'extern) (advance ps) (loop 'extern sn lg b #t))
+ ((at-kw? ps 'typedef) (advance ps) (loop 'typedef sn lg b #t))
+ ((or (at-kw? ps 'const) (at-kw? ps 'volatile)
+ (at-kw? ps 'restrict) (at-kw? ps 'inline))
+ (advance ps) (loop sto sn lg b #t))
+ ((at-kw? ps 'signed) (advance ps) (loop sto 'signed lg b #t))
+ ((at-kw? ps 'unsigned) (advance ps) (loop sto 'unsigned lg b #t))
+ ((at-kw? ps 'short) (advance ps) (loop sto sn -1 b #t))
+ ((at-kw? ps 'long) (advance ps) (loop sto sn (+ lg 1) b #t))
+ ((at-kw? ps 'void) (advance ps) (loop sto sn lg 'void #t))
+ ((at-kw? ps 'char) (advance ps) (loop sto sn lg 'char #t))
+ ((at-kw? ps 'int) (advance ps) (loop sto sn lg 'int #t))
+ ((at-kw? ps '_Bool) (advance ps) (loop sto sn lg 'bool #t))
+ ((or (at-kw? ps 'float) (at-kw? ps 'double)
+ (at-kw? ps '_Complex) (at-kw? ps '_Imaginary))
+ (die (tok-loc t) "no float" (tok-value t)))
+ ((or (at-kw? ps '_Atomic) (at-kw? ps '_Thread_local)
+ (at-kw? ps '_Alignas) (at-kw? ps '_Generic)
+ (at-kw? ps '_Alignof) (at-kw? ps '_Static_assert))
+ (die (tok-loc t) "rejected" (tok-value t)))
+ ((at-kw? ps 'struct)
+ (loop sto sn lg (parse-aggregate-spec ps 'struct) #t))
+ ((at-kw? ps 'union)
+ (loop sto sn lg (parse-aggregate-spec ps 'union) #t))
+ ((at-kw? ps 'enum)
+ (loop sto sn lg (parse-enum-spec ps) #t))
+ ((and (not b) (eq? (tok-kind t) 'IDENT)
+ (typedef? ps (tok-value t)))
+ (let* ((tk (advance ps)) (sm (scope-lookup ps (tok-value tk))))
+ (if (and sm (eq? (sym-kind sm) 'typedef))
+ (loop sto sn lg (sym-type sm) #t)
+ (die (tok-loc tk) "typedef no sym" (tok-value tk)))))
+ (else
+ (cond ((not saw) (die (tok-loc t) "expected decl-spec"
+ (tok-value t)))
+ (else (cons sto (resolve-base t sn lg b)))))))))
+
+(define (resolve-base loc sn lg b)
+ (cond
+ ((eq? b 'void)
+ (if (or sn (not (zero? lg))) (die loc "void+qual") %t-void))
+ ((eq? b 'bool)
+ (if (or sn (not (zero? lg))) (die loc "bool+qual") %t-bool))
+ ((eq? b 'char)
+ (cond ((eq? sn 'unsigned) %t-u8) (else %t-i8)))
+ ((or (eq? b 'int) (and (not b) (or sn (not (zero? lg)))))
+ (cond ((= lg -1) (if (eq? sn 'unsigned) %t-u16 %t-i16))
+ ((= lg 0) (if (eq? sn 'unsigned) %t-u32 %t-i32))
+ (else (if (eq? sn 'unsigned) %t-u64 %t-i64))))
+ ((ctype? b)
+ (if (or sn (not (zero? lg))) (die loc "type+qual") b))
+ (else (die loc "unknown decl-spec"))))
+
+(define (parse-aggregate-spec ps kind)
+ (advance ps)
+ (let ((tag (pmatch (peek ps)
+ (($ tok? (kind IDENT)) (tok-value (advance ps)))
+ (else #f))))
+ (cond
+ ((at-punct? ps 'lbrace)
+ (advance ps)
+ (let* ((ex (and tag (tag-lookup ps tag)))
+ (ct (cond ((and ex (eq? (ctype-kind ex) kind)) ex)
+ (else (let ((c (%ctype kind -1 -1
+ (list (or tag #f) #f '()))))
+ (if tag (tag-bind! ps tag c)) c))))
+ (fields (parse-struct-fields ps)))
+ (expect-punct ps 'rbrace)
+ (complete-agg! ct kind tag fields) ct))
+ (tag (let ((ex (tag-lookup ps tag)))
+ (cond (ex ex)
+ (else (let ((c (%ctype kind -1 -1
+ (list tag #f '()))))
+ (tag-bind! ps tag c) c)))))
+ (else (die (tok-loc (peek ps)) "anon agg")))))
+
+(define (parse-struct-fields ps)
+ (let loop ((acc '()) (off 0))
+ (cond
+ ((at-punct? ps 'rbrace) (reverse acc))
+ (else
+ (let ((spec (parse-decl-spec ps)))
+ (let dl ((acc2 acc) (o2 off))
+ (let* ((p (parse-declarator ps (cdr spec)))
+ (nm (car p)) (ty (cdr p))
+ (al (max (ctype-align ty) 1))
+ (sz (ctype-size ty))
+ (oa (align-up o2 al)))
+ (cond
+ ((at-punct? ps 'comma)
+ (advance ps)
+ (dl (cons (list nm ty oa) acc2)
+ (+ oa (max sz 0))))
+ ((at-punct? ps 'semi)
+ (advance ps)
+ (loop (cons (list nm ty oa) acc2)
+ (+ oa (max sz 0))))
+ (else (die (tok-loc (peek ps)) "field"))))))))))
+
+(define (complete-agg! ct k tag fs)
+ (let* ((ma (let m ((xs fs) (a 1))
+ (if (null? xs) a
+ (m (cdr xs) (max a (ctype-align (cadr (car xs))))))))
+ (last (let l ((xs fs) (e 0))
+ (if (null? xs) e
+ (let* ((f (car xs)) (off (car (cddr f)))
+ (sz (ctype-size (cadr f))))
+ (l (cdr xs) (max e (+ off (max sz 0))))))))
+ (sz (cond ((eq? k 'union)
+ (let u ((xs fs) (s 0))
+ (if (null? xs) s
+ (u (cdr xs)
+ (max s (ctype-size (cadr (car xs))))))))
+ (else (align-up last ma)))))
+ (ctype-size-set! ct sz)
+ (ctype-align-set! ct ma)
+ (ctype-ext-set! ct (list tag #t fs))))
+
+(define (parse-enum-spec ps)
+ (advance ps)
+ (let ((tag (pmatch (peek ps)
+ (($ tok? (kind IDENT)) (tok-value (advance ps)))
+ (else #f))))
+ (cond
+ ((at-punct? ps 'lbrace)
+ (advance ps)
+ (let ((ct (%ctype 'enum 4 4 (list tag '()))))
+ (if tag (tag-bind! ps tag ct))
+ (let loop ((vs '()) (nv 0))
+ (cond
+ ((at-punct? ps 'rbrace)
+ (advance ps)
+ (ctype-ext-set! ct (list tag (reverse vs))) ct)
+ (else
+ (let* ((nt (advance ps)) (nm (tok-value nt))
+ (val (cond ((at-punct? ps 'assign)
+ (advance ps) (parse-const-int ps))
+ (else nv))))
+ (scope-bind! ps nm
+ (%sym nm 'enum-const #f %t-i32 val))
+ (cond ((at-punct? ps 'comma) (advance ps))
+ ((at-punct? ps 'rbrace) #t)
+ (else (die (tok-loc (peek ps)) "enum")))
+ (loop (cons (cons nm val) vs) (+ val 1))))))))
+ (tag (let ((e (tag-lookup ps tag)))
+ (cond (e e)
+ (else (let ((c (%ctype 'enum 4 4 (list tag '()))))
+ (tag-bind! ps tag c) c)))))
+ (else (die (tok-loc (peek ps)) "enum")))))
+
+(define (parse-const-int ps)
+ (let ((t (peek ps)))
+ (pmatch t
+ (($ tok? (kind INT)) (tok-value (advance ps)))
+ (($ tok? (kind PUNCT) (value minus)) (advance ps) (- 0 (parse-const-int ps)))
+ (($ tok? (kind PUNCT) (value plus)) (advance ps) (parse-const-int ps))
+ (($ tok? (kind IDENT) (value ,n))
+ (let ((sm (scope-lookup ps n)))
+ (cond ((and sm (eq? (sym-kind sm) 'enum-const))
+ (advance ps) (sym-slot sm))
+ (else (die (tok-loc t) "const?" n)))))
+ (else (die (tok-loc t) "const?" (tok-value t))))))
+
+(define (parse-declarator ps base)
+ ((cdr (parse-decl-cont ps)) base
+ (lambda (n t) (cons n t))))
+
+(define (parse-decl-cont ps)
+ (pmatch (peek ps)
+ (($ tok? (kind PUNCT) (value star))
+ (advance ps) (eat-cv-quals! ps)
+ (let* ((r (parse-decl-cont ps)) (rf (cdr r)))
+ (cons (car r) (lambda (b k) (rf (%mk-ptr b) k)))))
+ (($ tok? (kind PUNCT) (value lparen))
+ (guard (paren-is-group? ps))
+ (advance ps)
+ (let* ((i (parse-decl-cont ps)) (if- (cdr i)))
+ (expect-punct ps 'rparen)
+ (let ((s (parse-decl-suf-cont ps)))
+ (cons (car i) (lambda (b k) (if- (s b) k))))))
+ (($ tok? (kind IDENT) (value ,n))
+ (advance ps)
+ (let ((s (parse-decl-suf-cont ps)))
+ (cons n (lambda (b k) (k n (s b))))))
+ (else
+ (let ((s (parse-decl-suf-cont ps)))
+ (cons #f (lambda (b k) (k #f (s b))))))))
+
+(define (parse-decl-suf-cont ps)
+ ;; C declarator suffixes apply RIGHT-TO-LEFT (innermost first):
+ ;; int a[2][3] ⇒ arr (arr int 3) 2 (outer dim 2)
+ ;; not arr (arr int 2) 3 (which would treat the leftmost suffix as
+ ;; outermost). The recursive structure builds the inner suffix's
+ ;; result first, then this level wraps.
+ (pmatch (peek ps)
+ (($ tok? (kind PUNCT) (value lbrack))
+ (advance ps)
+ (let* ((ln (cond ((at-punct? ps 'rbrack) -1)
+ (else (parse-const-int ps))))
+ (_ (expect-punct ps 'rbrack))
+ (r (parse-decl-suf-cont ps)))
+ (lambda (b) (%mk-arr (r b) ln))))
+ (($ tok? (kind PUNCT) (value lparen))
+ (advance ps)
+ (let* ((res (parse-fn-params ps))
+ (p (car res)) (v (cdr res)))
+ (expect-punct ps 'rparen)
+ (let ((r (parse-decl-suf-cont ps)))
+ (lambda (b) (%mk-fn (r b) p v)))))
+ (else (lambda (b) b))))
+
+(define (paren-is-group? ps)
+ (pmatch (peek2 ps)
+ (($ tok? (kind KW) (value ,v))
+ (cond ((or (eq? v 'void) (eq? v 'char) (eq? v 'short)
+ (eq? v 'int) (eq? v 'long) (eq? v 'signed)
+ (eq? v 'unsigned) (eq? v '_Bool)
+ (eq? v 'struct) (eq? v 'union) (eq? v 'enum)
+ (eq? v 'const) (eq? v 'volatile)
+ (eq? v 'restrict) (eq? v 'static)
+ (eq? v 'extern) (eq? v 'register)) #f)
+ (else #t)))
+ (($ tok? (kind IDENT) (value ,n))
+ (cond ((typedef? ps n) #f) (else #t)))
+ (($ tok? (kind PUNCT) (value rparen)) #f)
+ (($ tok? (kind PUNCT) (value star)) #t)
+ (($ tok? (kind PUNCT) (value lparen)) #t)
+ (($ tok? (kind PUNCT) (value lbrack)) #t)
+ (else #f)))
+
+(define (parse-fn-params ps)
+ (cond
+ ((at-punct? ps 'rparen) (cons '() #f))
+ ((and (at-kw? ps 'void)
+ (eq? (tok-kind (peek2 ps)) 'PUNCT)
+ (eq? (tok-value (peek2 ps)) 'rparen))
+ (advance ps) (cons '() #f))
+ (else
+ (let loop ((acc '()))
+ (cond
+ ((at-punct? ps 'ellipsis)
+ (advance ps) (cons (reverse acc) #t))
+ (else
+ (let* ((sp (parse-decl-spec ps))
+ (p (parse-declarator ps (cdr sp)))
+ (nm (car p)) (ty (cdr p))
+ (ty2 (cond ((ctype-is-arr? ty)
+ (%mk-ptr (car (ctype-ext ty))))
+ ((ctype-is-fn? ty) (%mk-ptr ty))
+ (else ty))))
+ (cond
+ ((at-punct? ps 'comma)
+ (advance ps) (loop (cons (cons nm ty2) acc)))
+ ((at-punct? ps 'rparen)
+ (cons (reverse (cons (cons nm ty2) acc)) #f))
+ (else (die (tok-loc (peek ps)) "param"))))))))))
+
+(define (parse-translation-unit ps)
+ (cond
+ ((eq? (tok-kind (peek ps)) 'EOF) #t)
+ (else (parse-decl-or-fn ps) (parse-translation-unit ps))))
+
+(define (parse-decl-or-fn ps)
+ (let* ((sp (parse-decl-spec ps))
+ (sto (car sp)) (b (cdr sp)))
+ (cond
+ ((at-punct? ps 'semi) (advance ps) 'decl)
+ (else
+ (let* ((p (parse-declarator ps b))
+ (n (car p)) (t (cdr p)))
+ (cond
+ ((and (ctype-is-fn? t) (at-punct? ps 'lbrace))
+ (parse-fn-body ps n t) 'fn)
+ (else
+ (handle-decl ps sto n t)
+ (let lp ()
+ (cond
+ ((at-punct? ps 'comma)
+ (advance ps)
+ (let* ((p2 (parse-declarator ps b))
+ (n2 (car p2)) (t2 (cdr p2)))
+ (handle-decl ps sto n2 t2) (lp)))
+ (else (expect-punct ps 'semi) 'decl))))))))))
+
+(define (handle-decl ps sto n ty)
+ (cond
+ ((not n) (die #f "no name"))
+ ((eq? sto 'typedef)
+ (typedef-add! ps n)
+ (scope-bind! ps n (%sym n 'typedef #f ty #f)))
+ ((ctype-is-fn? ty)
+ (scope-bind! ps n
+ (%sym n 'fn (or sto 'extern) ty
+ (bytevector-append "cc__" n))))
+ ;; §I: block-scope `static` routes to a global with a name mangled
+ ;; on the enclosing function so two functions can each have their
+ ;; own `static int n;` without colliding. The sym's NAME holds the
+ ;; mangled form (cg-push-sym / cg-emit-global both prefix "cc__"
+ ;; onto sym-name to derive the emitted label); scope-bind!s key
+ ;; remains the original identifier for source-level lookup.
+ ((and (eq? sto 'static) (ps-fn-ctx ps))
+ (let* ((fname (fn-ctx-name (ps-fn-ctx ps)))
+ (mangled (bytevector-append fname "__" n))
+ (sm (%sym mangled 'var 'static ty
+ (bytevector-append "cc__" mangled))))
+ (scope-bind! ps n sm)
+ (cond
+ ((at-punct? ps 'assign)
+ (advance ps)
+ (cg-emit-global (ps-cg ps) sm (parse-init-global ps ty)))
+ (else (cg-emit-global (ps-cg ps) sm #f)))))
+ (else
+ (cond
+ ((not (ps-fn-ctx ps))
+ (let ((sm (%sym n 'var (or sto 'extern) ty
+ (bytevector-append "cc__" n))))
+ (scope-bind! ps n sm)
+ (cond
+ ((at-punct? ps 'assign)
+ (advance ps)
+ (cg-emit-global (ps-cg ps) sm
+ (parse-init-global ps ty)))
+ ((eq? sto 'extern) (cg-emit-extern (ps-cg ps) sm))
+ (else (cg-emit-global (ps-cg ps) sm #f)))))
+ (else
+ (let* ((sz (max (ctype-size ty) 1))
+ (al (max (ctype-align ty) 1))
+ (sl (cg-alloc-slot (ps-cg ps) sz al))
+ (sm (%sym n 'var (or sto 'auto) ty sl)))
+ (scope-bind! ps n sm)
+ (cond
+ ((at-punct? ps 'assign)
+ (advance ps)
+ (cond
+ ;; Aggregate locals get the per-element store treatment.
+ ((or (at-punct? ps 'lbrace)
+ (and (eq? (ctype-kind ty) 'arr)
+ (eq? (tok-kind (peek ps)) 'STR)))
+ (parse-init-local-aggregate ps sm ty))
+ (else
+ (cg-push-sym (ps-cg ps) sm)
+ (parse-expr-bp ps 4) (rval! ps)
+ (cg-cast (ps-cg ps) ty)
+ (cg-assign (ps-cg ps))
+ (cg-pop (ps-cg ps)))))
+ (else #t))))))))
+
+;; ====================================================================
+;; Initializers (CC.md §Variable initializers, §E of CC-PUNCHLIST).
+;;
+;; parse-init-global ps ty
+;; Reads the initializer following `=` for a file-scope or block-scope
+;; static var of static-storage type `ty` and returns a list of
+;; pieces suitable for cg-emit-global. See cg.scm §cg-emit-global for
+;; the piece grammar.
+;;
+;; parse-init-local ps sm ty
+;; Reads the initializer for an auto-storage variable bound to slot
+;; sym `sm` and emits per-element store cg ops. Returns unspecified.
+;; ====================================================================
+
+(define (%int->le-bv n nbytes)
+ ;; N-byte little-endian encoding of integer n into a fresh bv. Bytes
+ ;; >= sign-bit are filled by repeated >>8 (works for both signed and
+ ;; unsigned because we only keep the low N bytes).
+ (let ((out (make-bytevector nbytes 0)))
+ (let loop ((i 0) (v n))
+ (cond
+ ((= i nbytes) out)
+ (else
+ (bytevector-u8-set! out i (bit-and v 255))
+ (loop (+ i 1) (arithmetic-shift v -8)))))))
+
+(define (%const-init-piece ps ty)
+ ;; Parse a non-brace initializer expression for scalar type `ty` and
+ ;; return a single piece. Recognised forms:
+ ;; - INT (with optional unary +/-) -> N-byte LE bv
+ ;; - enum-const IDENT -> N-byte LE bv
+ ;; - &IDENT (address of a global var/fn) -> (label-ref . cc__name)
+ ;; - IDENT (function name; decays to fn ptr) -> (label-ref . cc__name)
+ ;; - STR (only for char* targets) -> (label-ref . string-pool-label)
+ (let ((t (peek ps)))
+ (cond
+ ;; Address initializer: &ident -> label-ref
+ ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'amp))
+ (advance ps)
+ (let ((it (peek ps)))
+ (cond
+ ((eq? (tok-kind it) 'IDENT)
+ (advance ps)
+ (let ((sm (scope-lookup ps (tok-value it))))
+ (cond
+ ((not sm) (die (tok-loc it) "init: undecl" (tok-value it)))
+ ((or (eq? (sym-kind sm) 'fn)
+ (and (eq? (sym-kind sm) 'var)
+ (or (eq? (sym-storage sm) 'static)
+ (eq? (sym-storage sm) 'extern))))
+ (cons 'label-ref (sym-slot sm)))
+ (else
+ (die (tok-loc it) "init: &x must reference a global"
+ (tok-value it))))))
+ (else (die (tok-loc it) "init: &?" (tok-value it))))))
+ ;; Function name or array name as a label-ref initializer.
+ ;; (Both decay to a pointer when used as a value.)
+ ((and (eq? (tok-kind t) 'IDENT)
+ (let ((sm (scope-lookup ps (tok-value t))))
+ (and sm
+ (or (eq? (sym-kind sm) 'fn)
+ (and (eq? (sym-kind sm) 'var)
+ (eq? (ctype-kind (sym-type sm)) 'arr)
+ (or (eq? (sym-storage sm) 'static)
+ (eq? (sym-storage sm) 'extern)))))))
+ (advance ps)
+ (let ((sm (scope-lookup ps (tok-value t))))
+ (cons 'label-ref (sym-slot sm))))
+ ;; Plain string literal as char* initializer.
+ ((eq? (tok-kind t) 'STR)
+ (advance ps)
+ (let ((lbl (cg-intern-string (ps-cg ps) (tok-value t))))
+ (cons 'label-ref lbl)))
+ ;; Otherwise it's a const integer.
+ (else
+ (let ((v (parse-const-int ps)))
+ (%int->le-bv v (max (ctype-size ty) 1)))))))
+
+(define (%init-array-elem-type ty)
+ (cond ((eq? (ctype-kind ty) 'arr) (car (ctype-ext ty)))
+ (else (die #f "init: not an array" ty))))
+
+(define (%init-array-decl-len ty)
+ ;; Declared array length (-1 = inferred).
+ (cond ((eq? (ctype-kind ty) 'arr) (cdr (ctype-ext ty))) (else -1)))
+
+(define (%init-fix-array-size! ty count)
+ ;; Patch an inferred-length array to `count`.
+ (let ((elem (car (ctype-ext ty))))
+ (ctype-ext-set! ty (cons elem count))
+ (ctype-size-set! ty (* count (ctype-size elem)))))
+
+(define (%init-struct-fields ty)
+ ;; Return ((name-bv ctype offset) ...) for a struct/union ctype.
+ (let ((ext (ctype-ext ty)))
+ (cond ((and (pair? ext) (pair? (cdr ext))) (car (cddr ext)))
+ (else (die #f "init: not a struct" ty)))))
+
+(define (%find-field fields nm)
+ (cond ((null? fields) #f)
+ ((equal? (car (car fields)) nm) (car fields))
+ (else (%find-field (cdr fields) nm))))
+
+(define (%pad-piece nbytes)
+ (make-bytevector nbytes 0))
+
+;; ----- Global initializers ---------------------------------------------
+(define (parse-init-global ps ty)
+ (pmatch (peek ps)
+ ;; String literal initializer for char[]
+ (($ tok? (kind STR) (value ,s))
+ (guard (and (eq? (ctype-kind ty) 'arr)
+ (let ((et (car (ctype-ext ty))))
+ (or (eq? et %t-i8) (eq? et %t-u8)))))
+ (advance ps)
+ (let* ((slen (bytevector-length s))
+ (decl (cdr (ctype-ext ty)))
+ (final (cond ((< decl 0) (+ slen 1)) (else decl))))
+ (cond ((< decl 0) (%init-fix-array-size! ty final)))
+ (let ((bv (make-bytevector final 0)))
+ (let loop ((i 0))
+ (cond
+ ((or (= i slen) (>= i final)) (list bv))
+ (else
+ (bytevector-u8-set! bv i (bytevector-u8-ref s i))
+ (loop (+ i 1))))))))
+ ;; Brace-form
+ (($ tok? (kind PUNCT) (value lbrace))
+ (advance ps)
+ (cond
+ ((eq? (ctype-kind ty) 'arr)
+ (%parse-init-array-list ps ty))
+ ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union))
+ (%parse-init-struct-list ps ty))
+ (else
+ ;; Brace-wrapped scalar: { expr }
+ (let ((piece (%const-init-piece ps ty)))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (expect-punct ps 'rbrace)
+ (list piece)))))
+ ;; Bare scalar initializer
+ (else (list (%const-init-piece ps ty)))))
+
+(define (%parse-init-array-list ps ty)
+ ;; Element-list array initializer; assumes `{` already consumed.
+ (let* ((elem (%init-array-elem-type ty))
+ (esize (ctype-size elem))
+ (decl (%init-array-decl-len ty)))
+ (let lp ((acc '()) (count 0))
+ (cond
+ ((at-punct? ps 'rbrace)
+ (advance ps)
+ (cond ((< decl 0) (%init-fix-array-size! ty count)))
+ ;; Pad to declared length if longer than count.
+ (let* ((final (cond ((< decl 0) count) (else decl)))
+ (pad (- final count)))
+ (cond
+ ((> pad 0)
+ (reverse (cons (%pad-piece (* pad esize)) acc)))
+ (else (reverse acc)))))
+ (else
+ (let ((piece
+ (cond
+ ((at-punct? ps 'lbrace)
+ ;; Nested aggregate: brace-flatten via recursion.
+ (advance ps)
+ ;; element is itself struct/array
+ (cond
+ ((eq? (ctype-kind elem) 'arr)
+ (%parse-init-array-list ps elem))
+ ((or (eq? (ctype-kind elem) 'struct)
+ (eq? (ctype-kind elem) 'union))
+ (%parse-init-struct-list ps elem))
+ (else
+ (let ((p (%const-init-piece ps elem)))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (expect-punct ps 'rbrace)
+ (list p)))))
+ (else
+ (list (%const-init-piece ps elem))))))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (lp (append (reverse piece) acc) (+ count 1))))))))
+
+(define (%piece-bytesize p)
+ ;; Output width of one piece (cf. %cg-init-piece->bv): a bv emits
+ ;; one byte per element; a (label-ref . _) emits an 8-byte slot.
+ (cond
+ ((bytevector? p) (bytevector-length p))
+ ((and (pair? p) (eq? (car p) 'label-ref)) 8)
+ (else (die #f "init: unknown piece" p))))
+
+(define (%pieces-bytesize ps-list)
+ (let loop ((xs ps-list) (n 0))
+ (cond ((null? xs) n)
+ (else (loop (cdr xs) (+ n (%piece-bytesize (car xs))))))))
+
+(define (%merge-init-entries entries total-size)
+ ;; entries: list of (abs-offset . piece-list), in source order.
+ ;; Sort stably by offset (later writes to the same offset win, per C
+ ;; designated-init semantics) and emit pad pieces in any gaps and at
+ ;; the tail. Preserves label-ref pieces — we never merge them into a
+ ;; flat bv.
+ (let* ((sorted (%init-stable-sort-by-offset entries))
+ (out
+ (let walk ((xs sorted) (cursor 0) (acc '()))
+ (cond
+ ((null? xs)
+ (cond
+ ((< cursor total-size)
+ (reverse (cons (%pad-piece (- total-size cursor)) acc)))
+ (else (reverse acc))))
+ (else
+ (let* ((e (car xs))
+ (eoff (car e))
+ (epieces (cdr e))
+ (esize (%pieces-bytesize epieces))
+ (acc1 (cond
+ ((> eoff cursor)
+ (cons (%pad-piece (- eoff cursor)) acc))
+ (else acc)))
+ (acc2 (append (reverse epieces) acc1)))
+ (walk (cdr xs) (+ eoff esize) acc2)))))))
+ out))
+
+(define (%init-stable-sort-by-offset entries)
+ ;; Insertion sort, stable by source order for ties. n is small (one
+ ;; entry per initialized field) so O(n^2) is fine.
+ (let lp ((xs entries) (acc '()))
+ (cond
+ ((null? xs) acc)
+ (else
+ (let ((e (car xs)))
+ (lp (cdr xs)
+ (let ins ((ys acc) (head '()))
+ (cond
+ ((null? ys)
+ (append (reverse head) (list e)))
+ ((<= (car e) (car (car ys)))
+ (append (reverse head) (cons e ys)))
+ (else
+ (ins (cdr ys) (cons (car ys) head)))))))))))
+
+(define (%parse-init-struct-list ps ty)
+ ;; Struct/union initializer; assumes `{` already consumed. Supports
+ ;; positional and `.field = expr` forms — including out-of-order
+ ;; designators ({.y=5, .x=7}). Each entry records its absolute
+ ;; offset; %merge-init-entries sorts and pads at the closing brace.
+ (let* ((fields (%init-struct-fields ty))
+ (size (ctype-size ty)))
+ (let lp ((entries '()) (rest fields))
+ (cond
+ ((at-punct? ps 'rbrace)
+ (advance ps)
+ (%merge-init-entries (reverse entries) size))
+ (else
+ (let* ((designated? (at-punct? ps 'dot))
+ (target
+ (cond
+ (designated?
+ (advance ps)
+ (let ((nt (advance ps)))
+ (cond
+ ((not (eq? (tok-kind nt) 'IDENT))
+ (die (tok-loc nt) "init: .field expects ident")))
+ (let ((f (%find-field fields (tok-value nt))))
+ (cond
+ ((not f) (die (tok-loc nt) "init: no such field"
+ (tok-value nt))))
+ (expect-punct ps 'assign)
+ f)))
+ ((null? rest)
+ (die (tok-loc (peek ps)) "init: too many fields"))
+ (else (car rest))))
+ (fname (car target))
+ (fty (car (cdr target)))
+ (foff (car (cddr target)))
+ (piece-list
+ (cond
+ ((at-punct? ps 'lbrace)
+ (advance ps)
+ (cond
+ ((eq? (ctype-kind fty) 'arr)
+ (%parse-init-array-list ps fty))
+ ((or (eq? (ctype-kind fty) 'struct)
+ (eq? (ctype-kind fty) 'union))
+ (%parse-init-struct-list ps fty))
+ (else
+ (let ((p (%const-init-piece ps fty)))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (expect-punct ps 'rbrace)
+ (list p)))))
+ (else
+ (list (%const-init-piece ps fty))))))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (lp (cons (cons foff piece-list) entries)
+ (cond
+ (designated?
+ ;; designated init: drop fields up to and including target
+ (let drop ((xs fields))
+ (cond
+ ((null? xs) '())
+ ((equal? (car (car xs)) fname) (cdr xs))
+ (else (drop (cdr xs))))))
+ (else (cdr rest))))))))))
+
+;; ----- Local aggregate initializers ------------------------------------
+;; Emits per-element store sequences via cg ops into the slot of `sm`
+;; (a 'var sym whose slot is the frame offset). Assumes the assignment
+;; `=` has already been consumed.
+(define (parse-init-local-aggregate ps sm ty)
+ (pmatch (peek ps)
+ ;; Local char[] = "string" — fill from string bytes.
+ (($ tok? (kind STR) (value ,s))
+ (guard (and (eq? (ctype-kind ty) 'arr)
+ (let ((et (car (ctype-ext ty))))
+ (or (eq? et %t-i8) (eq? et %t-u8)))))
+ (advance ps)
+ (let* ((slen (bytevector-length s))
+ (decl (cdr (ctype-ext ty)))
+ (final (cond ((< decl 0) (+ slen 1)) (else decl))))
+ (cond ((< decl 0) (%init-fix-array-size! ty final)))
+ ;; Emit byte stores for each char in s, plus NUL for the
+ ;; trailing slot if final > slen.
+ (let loop ((i 0))
+ (cond
+ ((>= i final) #t)
+ (else
+ (let ((b (cond ((< i slen) (bytevector-u8-ref s i))
+ (else 0)))
+ (off (+ (sym-slot sm) i)))
+ (%push-frame-elem-lval ps off %t-u8)
+ (cg-push-imm (ps-cg ps) %t-u8 b)
+ (cg-assign (ps-cg ps))
+ (cg-pop (ps-cg ps))
+ (loop (+ i 1))))))))
+ (($ tok? (kind PUNCT) (value lbrace))
+ (advance ps)
+ (cond
+ ((eq? (ctype-kind ty) 'arr)
+ (%parse-init-local-array-list ps sm (sym-slot sm) ty))
+ ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union))
+ (%parse-init-local-struct-list ps sm (sym-slot sm) ty))
+ (else (die #f "init local: brace on scalar?"))))
+ (else (die (tok-loc (peek ps)) "init local aggregate?"))))
+
+(define (%emit-local-elem-store ps sm rel-off elem-ty piece-or-thunk)
+ ;; Emit a single scalar store at slot[base + rel-off]. piece is the
+ ;; raw initializer expression — but here we want to actually evaluate
+ ;; it via parse-expr to allow non-const expressions for autos.
+ ;; Caller handles this; this helper handles the store-into-frame ops.
+ 0)
+
+(define (%push-frame-elem-lval ps base-off ty)
+ (cg-push (ps-cg ps) (%opnd 'frame ty base-off #t)))
+
+(define (%parse-init-local-array-list ps sm base-off ty)
+ (let* ((elem (%init-array-elem-type ty))
+ (esize (ctype-size elem))
+ (decl (%init-array-decl-len ty)))
+ (let lp ((i 0))
+ (cond
+ ((at-punct? ps 'rbrace)
+ (advance ps)
+ (cond ((< decl 0) (%init-fix-array-size! ty i)))
+ ;; Zero out remaining slots if any (declared length > i).
+ (let ((final (cond ((< decl 0) i) (else decl))))
+ (let zlp ((k i))
+ (cond
+ ((>= k final) #t)
+ (else
+ (let ((off (+ base-off (* k esize))))
+ (cond
+ ((or (eq? (ctype-kind elem) 'arr)
+ (eq? (ctype-kind elem) 'struct)
+ (eq? (ctype-kind elem) 'union))
+ ;; Zero each byte in this aggregate slot.
+ (let zb ((j 0))
+ (cond
+ ((>= j esize) #t)
+ (else
+ (%push-frame-elem-lval ps (+ off j) %t-u8)
+ (cg-push-imm (ps-cg ps) %t-u8 0)
+ (cg-assign (ps-cg ps))
+ (cg-pop (ps-cg ps))
+ (zb (+ j 1))))))
+ (else
+ (%push-frame-elem-lval ps off elem)
+ (cg-push-imm (ps-cg ps) elem 0)
+ (cg-assign (ps-cg ps))
+ (cg-pop (ps-cg ps)))))
+ (zlp (+ k 1)))))))
+ (else
+ (let ((eoff (+ base-off (* i esize))))
+ (cond
+ ((at-punct? ps 'lbrace)
+ (advance ps)
+ (cond
+ ((eq? (ctype-kind elem) 'arr)
+ (%parse-init-local-array-list ps sm eoff elem))
+ ((or (eq? (ctype-kind elem) 'struct)
+ (eq? (ctype-kind elem) 'union))
+ (%parse-init-local-struct-list ps sm eoff elem))
+ (else
+ (%push-frame-elem-lval ps eoff elem)
+ (parse-expr-bp ps 4) (rval! ps)
+ (cg-cast (ps-cg ps) elem)
+ (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (expect-punct ps 'rbrace))))
+ (else
+ (%push-frame-elem-lval ps eoff elem)
+ (parse-expr-bp ps 4) (rval! ps)
+ (cg-cast (ps-cg ps) elem)
+ (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (lp (+ i 1))))))))
+
+(define (%bv-in-list? bv xs)
+ (cond ((null? xs) #f)
+ ((equal? bv (car xs)) #t)
+ (else (%bv-in-list? bv (cdr xs)))))
+
+(define (%emit-zero-field ps base-off f)
+ ;; Note: scheme1's `+` is binary-only — `(+ a b c)` returns (+ a b)
+ ;; and silently drops the rest. Compute absolute byte offsets via
+ ;; nested binary +.
+ (let* ((fty (car (cdr f)))
+ (foff (car (cddr f)))
+ (fsize (ctype-size fty))
+ (start-off (+ base-off foff)))
+ (let zb ((j 0))
+ (cond
+ ((>= j fsize) #t)
+ (else
+ (%push-frame-elem-lval ps (+ start-off j) %t-u8)
+ (cg-push-imm (ps-cg ps) %t-u8 0)
+ (cg-assign (ps-cg ps))
+ (cg-pop (ps-cg ps))
+ (zb (+ j 1)))))))
+
+(define (%parse-init-local-struct-list ps sm base-off ty)
+ ;; Track each initialized field by name in `seen`; at the closing brace
+ ;; zero every field NOT in `seen`. The previous design tracked positional
+ ;; "remaining fields" via `rest`, which silently dropped earlier fields
+ ;; when a designator jumped backwards (e.g. `{.y = 5}` left `x`
+ ;; uninitialized). C requires every unmentioned member of an aggregate
+ ;; with at least one designator/initializer to be zeroed (C11 §6.7.9 ¶21).
+ (let ((fields (%init-struct-fields ty)))
+ (let lp ((rest fields) (seen '()))
+ (cond
+ ((at-punct? ps 'rbrace)
+ (advance ps)
+ (for-each
+ (lambda (f)
+ (cond ((not (%bv-in-list? (car f) seen))
+ (%emit-zero-field ps base-off f))))
+ fields))
+ (else
+ (let* ((designated? (at-punct? ps 'dot))
+ (target
+ (cond
+ (designated?
+ (advance ps)
+ (let ((nt (advance ps)))
+ (let ((f (%find-field fields (tok-value nt))))
+ (cond
+ ((not f) (die (tok-loc nt) "init: no such field"
+ (tok-value nt))))
+ (expect-punct ps 'assign)
+ f)))
+ ((null? rest)
+ (die (tok-loc (peek ps)) "init: too many fields"))
+ (else (car rest))))
+ (fname (car target))
+ (fty (car (cdr target)))
+ (foff (car (cddr target)))
+ (eoff (+ base-off foff)))
+ (cond
+ ((at-punct? ps 'lbrace)
+ (advance ps)
+ (cond
+ ((eq? (ctype-kind fty) 'arr)
+ (%parse-init-local-array-list ps sm eoff fty))
+ ((or (eq? (ctype-kind fty) 'struct)
+ (eq? (ctype-kind fty) 'union))
+ (%parse-init-local-struct-list ps sm eoff fty))
+ (else
+ (%push-frame-elem-lval ps eoff fty)
+ (parse-expr-bp ps 4) (rval! ps)
+ (cg-cast (ps-cg ps) fty)
+ (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (expect-punct ps 'rbrace))))
+ (else
+ (%push-frame-elem-lval ps eoff fty)
+ (parse-expr-bp ps 4) (rval! ps)
+ (cg-cast (ps-cg ps) fty)
+ (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (lp (cond
+ (designated?
+ (let drop ((xs fields))
+ (cond
+ ((null? xs) '())
+ ((equal? (car (car xs)) fname) (cdr xs))
+ (else (drop (cdr xs))))))
+ (else (cdr rest)))
+ (cons fname seen))))))))
+
+
+;; A → B → C arena pattern from tests/scheme1/93-heap-mark-rewind.scm:
+;;
+;; A = parse-decl-or-fn (caller, arena-unaware)
+;; B = parse-fn-body (this fn — arena boundary)
+;; C = %parse-fn-body-inner (the real per-fn parse + cg work)
+;;
+;; B's "out" is the cg's fixed-storage bufs (cg-text and friends): they
+;; were allocated at cg-init (pre-mark) and only mutate via byte writes,
+;; so byte-level work survives heap-rewind!. Everything C allocates —
+;; vstack opnds, intermediate bvs, ctype scratch, scope frames, switch
+;; case alists — is post-mark and discarded.
+;;
+;; The fn-name binding into the surrounding scope (used so recursive
+;; calls can resolve the name during the body) is done BEFORE the mark
+;; so it survives. Inner scope frames are popped via scope-leave! before
+;; the rewind, so their cells become unreachable; rewind reclaims them.
+;;
+;; Rewind-safety guard: the body might add user-visible globals
+;; (block-statics), strings (literals), tags, or typedefs. Those entries
+;; are post-mark and would dangle on rewind. We snapshot the relevant
+;; alists before parsing and skip the rewind if any changed — paying
+;; full heap cost only for functions that genuinely mutate global state.
+(define (parse-fn-body ps name dt)
+ ;; Hoist the recursive-binding scope-bind! out of the marked region
+ ;; so the fn-sym cons survives rewind.
+ (cond ((not (scope-lookup ps name))
+ (scope-bind! ps name
+ (%sym name 'fn 'extern dt
+ (bytevector-append "cc__" name)))))
+ (let* ((cg (ps-cg ps))
+ (mark (heap-mark))
+ (globals-before (cg-globals cg))
+ (str-pool-before (cg-str-pool cg))
+ (typedefs-before (ps-typedefs ps))
+ (tags-before (ps-tags ps)))
+ (%parse-fn-body-inner ps name dt)
+ (cond
+ ((and (eq? globals-before (cg-globals cg))
+ (eq? str-pool-before (cg-str-pool cg))
+ (eq? typedefs-before (ps-typedefs ps))
+ (eq? tags-before (ps-tags ps)))
+ ;; cg-fn-meta points at post-mark alist conses (fn metadata,
+ ;; switch-case lists, indirect-slots). Drop the reference before
+ ;; rewinding so the cg record holds no dangling pointers — the
+ ;; next cg-fn-begin/v would reset it anyway, but if this is the
+ ;; last fn, leaving it set leaves a latent landmine.
+ (cg-fn-meta-set! cg '())
+ (heap-rewind! mark)
+ (debug-log "fn-rewound" name "heap" (heap-usage)))
+ (else
+ (debug-log "fn-kept" name "heap" (heap-usage))))))
+
+(define (%parse-fn-body-inner ps name dt)
+ (let* ((e (ctype-ext dt)) (ret (car e))
+ (par (cadr e)) (var (car (cddr e))))
+ (let ((psyms (cg-fn-begin/v (ps-cg ps) name par ret var)))
+ (ps-fn-ctx-set! ps
+ (%fn-ctx name ret (map cdr psyms) var '()))
+ (scope-enter! ps)
+ (for-each (lambda (p) (scope-bind! ps (car p) (cdr p)))
+ psyms)
+ (expect-punct ps 'lbrace)
+ (parse-cstmt-body ps)
+ (expect-punct ps 'rbrace)
+ (scope-leave! ps)
+ (ps-fn-ctx-set! ps #f)
+ (cg-fn-end (ps-cg ps)))))
+
+(define (parse-stmt ps)
+ (pmatch (peek ps)
+ (($ tok? (kind PUNCT) (value lbrace)) (parse-cstmt ps))
+ (($ tok? (kind KW) (value if)) (parse-if-stmt ps))
+ (($ tok? (kind KW) (value while)) (parse-while-stmt ps))
+ (($ tok? (kind KW) (value do)) (parse-do-stmt ps))
+ (($ tok? (kind KW) (value for)) (parse-for-stmt ps))
+ (($ tok? (kind KW) (value switch)) (parse-switch-stmt ps))
+ (($ tok? (kind KW) (value return)) (parse-return-stmt ps))
+ (($ tok? (kind KW) (value goto)) (parse-goto-stmt ps))
+ (($ tok? (kind KW) (value break))
+ (advance ps) (expect-punct ps 'semi) (do-break ps))
+ (($ tok? (kind KW) (value continue))
+ (advance ps) (expect-punct ps 'semi) (do-continue ps))
+ (($ tok? (kind KW) (value case)) (parse-case-stmt ps))
+ (($ tok? (kind KW) (value default)) (parse-default-stmt ps))
+ (($ tok? (kind IDENT))
+ (guard (and (eq? (tok-kind (peek2 ps)) 'PUNCT)
+ (eq? (tok-value (peek2 ps)) 'colon)))
+ (parse-labelled-stmt ps))
+ (else
+ (cond ((stmt-starts-decl? ps) (parse-local-decl ps))
+ (else (parse-expr-stmt ps))))))
+
+(define (stmt-starts-decl? ps)
+ (pmatch (peek ps)
+ (($ tok? (kind KW) (value ,v))
+ (or (eq? v 'auto) (eq? v 'register) (eq? v 'static)
+ (eq? v 'extern) (eq? v 'typedef) (eq? v 'const)
+ (eq? v 'volatile) (eq? v 'restrict) (eq? v 'inline)
+ (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int)
+ (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned)
+ (eq? v '_Bool) (eq? v 'struct) (eq? v 'union)
+ (eq? v 'enum)))
+ (($ tok? (kind IDENT) (value ,n)) (typedef? ps n))
+ (else #f)))
+
+(define (parse-local-decl ps)
+ (let* ((sp (parse-decl-spec ps))
+ (sto (car sp)) (b (cdr sp)))
+ (cond
+ ((at-punct? ps 'semi) (advance ps) #t)
+ (else
+ (let lp ()
+ (let* ((p (parse-declarator ps b))
+ (n (car p)) (t (cdr p)))
+ (handle-decl ps sto n t)
+ (cond ((at-punct? ps 'comma) (advance ps) (lp))
+ (else (expect-punct ps 'semi) #t))))))))
+
+(define (parse-cstmt ps)
+ (expect-punct ps 'lbrace)
+ (scope-enter! ps)
+ (parse-cstmt-body ps)
+ (scope-leave! ps)
+ (expect-punct ps 'rbrace) #t)
+
+(define (parse-cstmt-body ps)
+ (cond
+ ((at-punct? ps 'rbrace) #t)
+ ((eq? (tok-kind (peek ps)) 'EOF)
+ (die (tok-loc (peek ps)) "EOF in cstmt"))
+ (else (parse-stmt ps) (parse-cstmt-body ps))))
+
+(define (parse-compound-stmt ps) (parse-cstmt ps))
+
+(define (parse-if-stmt ps)
+ (expect-kw ps 'if)
+ (expect-punct ps 'lparen)
+ (parse-expr ps) (rval! ps)
+ (expect-punct ps 'rparen)
+ (cg-ifelse (ps-cg ps)
+ (lambda () (parse-stmt ps))
+ (lambda ()
+ (cond ((at-kw? ps 'else)
+ (advance ps) (parse-stmt ps))
+ (else #t)))))
+
+;; cg-loop's body-thunk now receives the tag from cg (CC-CONTRACTS
+;; §3.3); the parser threads it into break/continue via loop-ctx.
+
+(define (parse-while-stmt ps)
+ (expect-kw ps 'while)
+ (expect-punct ps 'lparen)
+ (cg-loop (ps-cg ps)
+ (lambda () (parse-expr ps) (rval! ps))
+ (lambda (tag)
+ (expect-punct ps 'rparen)
+ (push-loop-ctx! ps 'while tag #t)
+ (parse-stmt ps)
+ (pop-loop-ctx! ps))) #t)
+
+(define (parse-do-stmt ps)
+ (expect-kw ps 'do)
+ ;; do-while needs its tag known *before* the body parses, so we
+ ;; capture it inside the body-thunk and stash it for pop-loop-ctx
+ ;; via a side cell.
+ (cg-loop (ps-cg ps)
+ (lambda () #t)
+ (lambda (tag)
+ (push-loop-ctx! ps 'do tag #t)
+ (parse-stmt ps)
+ (pop-loop-ctx! ps)
+ (expect-kw ps 'while) (expect-punct ps 'lparen)
+ (parse-expr ps) (rval! ps)
+ (expect-punct ps 'rparen) (expect-punct ps 'semi)
+ (cg-unop (ps-cg ps) 'lnot)
+ (cg-if (ps-cg ps)
+ (lambda () (cg-break (ps-cg ps) tag)))))
+ #t)
+
+(define (parse-for-stmt ps)
+ (expect-kw ps 'for) (expect-punct ps 'lparen)
+ (scope-enter! ps)
+ (cond
+ ((at-punct? ps 'semi) (advance ps))
+ ((stmt-starts-decl? ps) (parse-local-decl ps))
+ (else (parse-expr ps) (cg-pop (ps-cg ps))
+ (expect-punct ps 'semi)))
+ (cg-loop (ps-cg ps)
+ (lambda ()
+ (cond ((at-punct? ps 'semi)
+ (cg-push-imm (ps-cg ps) %t-i32 1))
+ (else (parse-expr ps) (rval! ps)))
+ (expect-punct ps 'semi))
+ (lambda (tag)
+ (let ((stk (collect-til-rparen ps)))
+ (expect-punct ps 'rparen)
+ (push-loop-ctx! ps 'for tag #t)
+ (parse-stmt ps)
+ (pop-loop-ctx! ps)
+ (cond
+ ((null? stk) #t)
+ (else
+ (let ((sv (ps-toks ps)))
+ (ps-toks-set! ps
+ (append stk (list (make-tok 'EOF #f #f))))
+ (parse-expr ps) (cg-pop (ps-cg ps))
+ (ps-toks-set! ps sv)))))))
+ (scope-leave! ps) #t)
+
+(define (collect-til-rparen ps)
+ (let loop ((acc '()) (d 0))
+ (let ((t (peek ps)))
+ (cond
+ ((eq? (tok-kind t) 'EOF)
+ (die (tok-loc t) "EOF in for-step"))
+ ((and (zero? d) (eq? (tok-kind t) 'PUNCT)
+ (eq? (tok-value t) 'rparen)) (reverse acc))
+ (else
+ (let ((nt (advance ps)))
+ (loop (cons nt acc)
+ (cond ((not (eq? (tok-kind nt) 'PUNCT)) d)
+ ((eq? (tok-value nt) 'lparen) (+ d 1))
+ ((eq? (tok-value nt) 'rparen) (- d 1))
+ (else d)))))))))
+
+(define (parse-switch-stmt ps)
+ (expect-kw ps 'switch) (expect-punct ps 'lparen)
+ (parse-expr ps) (rval! ps)
+ (expect-punct ps 'rparen)
+ ;; Switch's break-target tag is the swctx's end-tag — cg owns it,
+ ;; and we read it back so cg-break inside the switch body emits a
+ ;; tag cg actually labels.
+ (let* ((sw (cg-switch-begin (ps-cg ps)))
+ (tg (swctx-end-tag sw)))
+ (push-loop-ctx-sw! ps 'switch tg sw)
+ (parse-stmt ps)
+ (pop-loop-ctx! ps)
+ (cg-switch-end (ps-cg ps) sw)))
+
+(define (parse-case-stmt ps)
+ (expect-kw ps 'case)
+ (let ((v (parse-const-int ps)))
+ (expect-punct ps 'colon)
+ (cg-switch-case (ps-cg ps) (innermost-sw ps) v)
+ (parse-stmt ps)))
+
+(define (parse-default-stmt ps)
+ (expect-kw ps 'default) (expect-punct ps 'colon)
+ (cg-switch-default (ps-cg ps) (innermost-sw ps))
+ (parse-stmt ps))
+
+(define (parse-return-stmt ps)
+ (expect-kw ps 'return)
+ (cond
+ ((at-punct? ps 'semi) (advance ps) (cg-return (ps-cg ps)))
+ (else
+ (parse-expr ps) (rval! ps)
+ (let ((fc (ps-fn-ctx ps)))
+ (cond
+ ((and fc (not (eq? (ctype-kind (fn-ctx-return-type fc)) 'void)))
+ (cg-cast (ps-cg ps) (fn-ctx-return-type fc)))
+ (else #t)))
+ (cg-return (ps-cg ps))
+ (expect-punct ps 'semi))))
+
+(define (parse-goto-stmt ps)
+ (expect-kw ps 'goto)
+ (let ((t (advance ps)))
+ (cond ((eq? (tok-kind t) 'IDENT)
+ (cg-goto (ps-cg ps) (tok-value t)))
+ (else (die (tok-loc t) "label?"))))
+ (expect-punct ps 'semi))
+
+(define (parse-labelled-stmt ps)
+ (let ((t (advance ps)))
+ (expect-punct ps 'colon)
+ (cg-emit-label (ps-cg ps) (tok-value t))
+ (parse-stmt ps)))
+
+(define (parse-expr-stmt ps)
+ (cond
+ ((at-punct? ps 'semi) (advance ps) #t)
+ (else (parse-expr ps) (cg-pop (ps-cg ps))
+ (expect-punct ps 'semi))))
+
+(define (push-loop-ctx! ps k tg hc)
+ (ps-loops-set! ps (cons (%loop-ctx k tg hc) (ps-loops ps))))
+(define (push-loop-ctx-sw! ps k tg sw)
+ (ps-loops-set! ps
+ (cons (%loop-ctx k (cons tg sw) #f) (ps-loops ps))))
+(define (pop-loop-ctx! ps)
+ (ps-loops-set! ps (cdr (ps-loops ps))))
+(define (do-break ps)
+ (let ((c (innermost-loop ps)))
+ (cond
+ ((not c) (die #f "break outside"))
+ ((eq? (loop-ctx-kind c) 'switch)
+ (cg-break (ps-cg ps) (car (loop-ctx-tag c))))
+ (else (cg-break (ps-cg ps) (loop-ctx-tag c))))))
+(define (do-continue ps)
+ (let ((c (innermost-cont ps)))
+ (cond ((not c) (die #f "cont outside"))
+ (else (cg-continue (ps-cg ps) (loop-ctx-tag c))))))
+(define (innermost-loop ps)
+ (cond ((null? (ps-loops ps)) #f) (else (car (ps-loops ps)))))
+(define (innermost-cont ps)
+ (let lp ((xs (ps-loops ps)))
+ (cond ((null? xs) #f)
+ ((eq? (loop-ctx-kind (car xs)) 'switch) (lp (cdr xs)))
+ (else (car xs)))))
+(define (innermost-sw ps)
+ (let lp ((xs (ps-loops ps)))
+ (cond ((null? xs) (die #f "case outside switch"))
+ ((eq? (loop-ctx-kind (car xs)) 'switch)
+ (cdr (loop-ctx-tag (car xs))))
+ (else (lp (cdr xs))))))
+
+(define %binop-bp
+ (list
+ (cons 'comma (cons 1 2))
+ (cons 'assign (cons 4 3)) (cons 'plus-eq (cons 4 3))
+ (cons 'minus-eq (cons 4 3)) (cons 'star-eq (cons 4 3))
+ (cons 'slash-eq (cons 4 3)) (cons 'pct-eq (cons 4 3))
+ (cons 'shl-eq (cons 4 3)) (cons 'shr-eq (cons 4 3))
+ (cons 'amp-eq (cons 4 3)) (cons 'caret-eq (cons 4 3))
+ (cons 'bar-eq (cons 4 3)) (cons 'qmark (cons 6 5))
+ (cons 'lor (cons 10 11)) (cons 'land (cons 20 21))
+ (cons 'bar (cons 30 31)) (cons 'caret (cons 40 41))
+ (cons 'amp (cons 50 51))
+ (cons 'eq2 (cons 60 61)) (cons 'ne (cons 60 61))
+ (cons 'lt (cons 70 71)) (cons 'le (cons 70 71))
+ (cons 'gt (cons 70 71)) (cons 'ge (cons 70 71))
+ (cons 'shl (cons 80 81)) (cons 'shr (cons 80 81))
+ (cons 'plus (cons 90 91)) (cons 'minus (cons 90 91))
+ (cons 'star (cons 100 101)) (cons 'slash (cons 100 101))
+ (cons 'pct (cons 100 101))))
+
+(define (binop-bp-of s) (alist-ref/eq s %binop-bp))
+
+(define (punct-to-cgop s)
+ (cond ((eq? s 'plus) 'add) ((eq? s 'minus) 'sub)
+ ((eq? s 'star) 'mul) ((eq? s 'slash) 'div)
+ ((eq? s 'pct) 'rem) ((eq? s 'amp) 'and)
+ ((eq? s 'bar) 'or) ((eq? s 'caret) 'xor)
+ ((eq? s 'shl) 'shl) ((eq? s 'shr) 'shr)
+ ((eq? s 'eq2) 'eq) ((eq? s 'ne) 'ne)
+ ((eq? s 'lt) 'lt) ((eq? s 'le) 'le)
+ ((eq? s 'gt) 'gt) ((eq? s 'ge) 'ge)
+ (else (die #f "binop" s))))
+
+(define (compound-op s)
+ (cond ((eq? s 'plus-eq) 'add) ((eq? s 'minus-eq) 'sub)
+ ((eq? s 'star-eq) 'mul) ((eq? s 'slash-eq) 'div)
+ ((eq? s 'pct-eq) 'rem) ((eq? s 'shl-eq) 'shl)
+ ((eq? s 'shr-eq) 'shr) ((eq? s 'amp-eq) 'and)
+ ((eq? s 'caret-eq) 'xor) ((eq? s 'bar-eq) 'or)
+ (else #f)))
+
+(define (parse-expr ps) (parse-expr-bp ps 0))
+
+(define (parse-expr-bp ps mn)
+ (parse-unary ps) (parse-binary-rhs ps mn))
+
+(define (parse-binary-rhs ps mn)
+ (let ((t (peek ps)))
+ (cond
+ ((not (eq? (tok-kind t) 'PUNCT)) #t)
+ (else
+ (let ((bp (binop-bp-of (tok-value t))))
+ (cond
+ ((not bp) #t)
+ ((< (car bp) mn) #t)
+ (else
+ (let ((op (tok-value t)) (rb (cdr bp)))
+ (advance ps)
+ (cond
+ ((eq? op 'comma)
+ ;; lhs has been parsed; discard it and evaluate rhs.
+ ;; Result of the comma expr is the rhs's rval.
+ (cg-pop (ps-cg ps))
+ (parse-expr-bp ps rb) (rval! ps))
+ ((eq? op 'assign)
+ (parse-expr-bp ps rb) (rval! ps)
+ (cg-assign (ps-cg ps)))
+ ((compound-op op)
+ (let ((b (compound-op op)))
+ (cg-dup (ps-cg ps))
+ (cg-load (ps-cg ps))
+ (parse-expr-bp ps rb) (rval! ps)
+ (cg-arith-conv (ps-cg ps))
+ (cg-binop (ps-cg ps) b)
+ (cg-assign (ps-cg ps))))
+ ((eq? op 'qmark)
+ (rval! ps)
+ (cg-ifelse-merge (ps-cg ps)
+ (lambda ()
+ (parse-expr-bp ps 0) (rval! ps))
+ (lambda ()
+ (expect-punct ps 'colon)
+ (parse-expr-bp ps rb) (rval! ps))))
+ ((eq? op 'land)
+ (rval! ps)
+ ;; Both branches must push i32 0/1. Right side is
+ ;; coerced via `cg-cast bool` so the merge slot
+ ;; carries i32 (per §H.2).
+ (cg-ifelse-merge (ps-cg ps)
+ (lambda ()
+ (parse-expr-bp ps rb) (rval! ps)
+ (cg-cast (ps-cg ps) %t-bool)
+ (cg-cast (ps-cg ps) %t-i32))
+ (lambda ()
+ (cg-push-imm (ps-cg ps) %t-i32 0))))
+ ((eq? op 'lor)
+ (rval! ps)
+ (cg-ifelse-merge (ps-cg ps)
+ (lambda ()
+ (cg-push-imm (ps-cg ps) %t-i32 1))
+ (lambda ()
+ (parse-expr-bp ps rb) (rval! ps)
+ (cg-cast (ps-cg ps) %t-bool)
+ (cg-cast (ps-cg ps) %t-i32))))
+ (else
+ (rval! ps) (cg-promote (ps-cg ps))
+ (parse-expr-bp ps rb) (rval! ps)
+ (cg-promote (ps-cg ps))
+ (cg-arith-conv (ps-cg ps))
+ (cg-binop (ps-cg ps) (punct-to-cgop op))))
+ (parse-binary-rhs ps mn)))))))))
+
+(define (parse-unary ps)
+ (pmatch (peek ps)
+ (($ tok? (kind PUNCT) (value amp))
+ (advance ps) (parse-unary ps)
+ (cg-take-addr (ps-cg ps)))
+ (($ tok? (kind PUNCT) (value star))
+ (advance ps) (parse-unary ps) (rval! ps)
+ (cg-push-deref (ps-cg ps)))
+ (($ tok? (kind PUNCT) (value plus))
+ (advance ps) (parse-unary ps)
+ (rval! ps) (cg-promote (ps-cg ps)))
+ (($ tok? (kind PUNCT) (value minus))
+ (advance ps) (parse-unary ps)
+ (rval! ps) (cg-promote (ps-cg ps))
+ (cg-unop (ps-cg ps) 'neg))
+ (($ tok? (kind PUNCT) (value tilde))
+ (advance ps) (parse-unary ps)
+ (rval! ps) (cg-promote (ps-cg ps))
+ (cg-unop (ps-cg ps) 'bnot))
+ (($ tok? (kind PUNCT) (value bang))
+ (advance ps) (parse-unary ps) (rval! ps)
+ (cg-unop (ps-cg ps) 'lnot))
+ (($ tok? (kind PUNCT) (value inc))
+ (advance ps) (parse-unary ps)
+ (cg-dup (ps-cg ps))
+ (cg-load (ps-cg ps))
+ (cg-push-imm (ps-cg ps) %t-i32 1)
+ (cg-binop (ps-cg ps) 'add) (cg-assign (ps-cg ps)))
+ (($ tok? (kind PUNCT) (value dec))
+ (advance ps) (parse-unary ps)
+ (cg-dup (ps-cg ps))
+ (cg-load (ps-cg ps))
+ (cg-push-imm (ps-cg ps) %t-i32 1)
+ (cg-binop (ps-cg ps) 'sub) (cg-assign (ps-cg ps)))
+ (($ tok? (kind PUNCT) (value lparen)) (parse-cast-or-unary ps))
+ (($ tok? (kind KW) (value sizeof))
+ (advance ps)
+ (cond
+ ((at-punct? ps 'lparen)
+ (advance ps)
+ (cond
+ ((token-is-decl? ps)
+ (let* ((sp (parse-decl-spec ps))
+ (p (parse-declarator ps (cdr sp)))
+ (ty (cdr p)))
+ (expect-punct ps 'rparen)
+ (cg-push-imm (ps-cg ps) %t-u64
+ (max (ctype-size ty) 0))))
+ (else
+ (parse-expr ps) (expect-punct ps 'rparen)
+ (let* ((tp (cg-top (ps-cg ps)))
+ (sz (max (ctype-size (opnd-type tp)) 0)))
+ (cg-pop (ps-cg ps))
+ (cg-push-imm (ps-cg ps) %t-u64 sz)))))
+ (else (parse-unary ps)
+ (let* ((tp (cg-top (ps-cg ps)))
+ (sz (max (ctype-size (opnd-type tp)) 0)))
+ (cg-pop (ps-cg ps))
+ (cg-push-imm (ps-cg ps) %t-u64 sz)))))
+ (else (parse-postfix ps))))
+
+(define (token-is-decl? ps)
+ (pmatch (peek ps)
+ (($ tok? (kind KW) (value ,v))
+ (or (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int)
+ (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned)
+ (eq? v '_Bool) (eq? v 'struct) (eq? v 'union)
+ (eq? v 'enum) (eq? v 'const) (eq? v 'volatile)
+ (eq? v 'restrict) (eq? v 'inline)))
+ (($ tok? (kind IDENT) (value ,n)) (typedef? ps n))
+ (else #f)))
+
+(define (parse-cast-or-unary ps)
+ (pmatch (peek2 ps)
+ (($ tok? (kind KW) (value ,v))
+ (guard (or (eq? v 'void) (eq? v 'char) (eq? v 'short)
+ (eq? v 'int) (eq? v 'long) (eq? v 'signed)
+ (eq? v 'unsigned) (eq? v '_Bool)
+ (eq? v 'struct) (eq? v 'union) (eq? v 'enum)
+ (eq? v 'const) (eq? v 'volatile)
+ (eq? v 'restrict)))
+ (advance ps)
+ (let* ((sp (parse-decl-spec ps))
+ (p (parse-declarator ps (cdr sp)))
+ (ty (cdr p)))
+ (expect-punct ps 'rparen)
+ (parse-unary ps)
+ ;; Cast operand undergoes lvalue conversion first (C semantics):
+ ;; arrays decay to pointers, lvals become rvals. cg-cast then
+ ;; bit-casts the resulting rval to the target type.
+ (rval! ps)
+ (cg-cast (ps-cg ps) ty)))
+ (($ tok? (kind IDENT) (value ,n))
+ (guard (typedef? ps n))
+ (advance ps)
+ (let* ((sp (parse-decl-spec ps))
+ (p (parse-declarator ps (cdr sp)))
+ (ty (cdr p)))
+ (expect-punct ps 'rparen)
+ (parse-unary ps)
+ (rval! ps)
+ (cg-cast (ps-cg ps) ty)))
+ (else (advance ps) (parse-expr ps)
+ (expect-punct ps 'rparen)
+ (parse-postfix-rest ps))))
+
+(define (parse-postfix ps)
+ (parse-primary ps) (parse-postfix-rest ps))
+
+(define (parse-postfix-rest ps)
+ (let lp ()
+ (pmatch (peek ps)
+ (($ tok? (kind PUNCT) (value lbrack))
+ (advance ps) (rval! ps)
+ (parse-expr ps) (rval! ps)
+ (expect-punct ps 'rbrack)
+ (cg-binop (ps-cg ps) 'add)
+ (cg-push-deref (ps-cg ps)) (lp))
+ (($ tok? (kind PUNCT) (value lparen))
+ (advance ps) (rval-not-fn! ps)
+ (let* ((fn-ty (call-fn-type (ps-cg ps)))
+ (n (parse-call-args ps fn-ty))
+ ;; has-result? = #f for known void returns. Skips the
+ ;; wasted ST a0 → frame-slot spill that cg-call would
+ ;; otherwise emit for void calls.
+ (has-result?
+ (cond
+ ((not fn-ty) #t)
+ ((eq? (ctype-kind (car (ctype-ext fn-ty))) 'void) #f)
+ (else #t))))
+ (expect-punct ps 'rparen)
+ (cg-call (ps-cg ps) n has-result?)
+ ;; Maintain parse's "one rval per expression" invariant so
+ ;; comma / parse-expr-stmt / for-init/step pop sites stay
+ ;; simple. The placeholder is vstack-only and never
+ ;; materialized (cg-pop is a vstack op, no emit).
+ (cond ((not has-result?)
+ (cg-push-imm (ps-cg ps) %t-i32 0)))
+ (lp)))
+ (($ tok? (kind PUNCT) (value dot))
+ (advance ps)
+ (pmatch (advance ps)
+ (($ tok? (kind IDENT) (value ,n))
+ (cg-push-field (ps-cg ps) n) (lp))
+ (($ tok? (loc ,l)) (die l "expected field name"))))
+ (($ tok? (kind PUNCT) (value arrow))
+ (advance ps)
+ (pmatch (advance ps)
+ (($ tok? (kind IDENT) (value ,n))
+ ;; ptr -> field: load the pointer to rval, deref to reach
+ ;; the struct lval, then push the field.
+ (rval! ps)
+ (cg-push-deref (ps-cg ps))
+ (cg-push-field (ps-cg ps) n) (lp))
+ (($ tok? (loc ,l)) (die l "expected field name"))))
+ (($ tok? (kind PUNCT) (value inc))
+ (advance ps)
+ (cg-postinc (ps-cg ps)) (lp))
+ (($ tok? (kind PUNCT) (value dec))
+ (advance ps)
+ (cg-postdec (ps-cg ps)) (lp))
+ (else #t))))
+
+;; call-fn-type cg -> ctype-or-#f
+;; The function operand sits at the top of the vstack when
+;; parse-call-args runs (just after rval-not-fn!). Its type may be
+;; `fn` directly (named callee) or `ptr -> fn` (function pointer).
+;; Returns the underlying `fn` ctype, or #f if the operand isn't
+;; recognizably callable (callsite still works — no per-arg cast).
+(define (call-fn-type cg)
+ (let* ((tp (cg-top cg)))
+ (cond
+ ((not tp) #f)
+ (else
+ (let* ((ty (opnd-type tp))
+ (k (ctype-kind ty)))
+ (cond
+ ((eq? k 'fn) ty)
+ ((eq? k 'ptr)
+ (let ((pe (ctype-ext ty)))
+ (cond ((and pe (eq? (ctype-kind pe) 'fn)) pe)
+ (else #f))))
+ (else #f)))))))
+
+;; param-types-of fn-ty -> (params variadic?) with a #f fallback.
+(define (call-fn-param-info fn-ty)
+ (cond
+ ((not fn-ty) (cons '() #f))
+ (else
+ (let ((ext (ctype-ext fn-ty)))
+ (cons (cadr ext) (car (cddr ext)))))))
+
+;; parse-call-args ps fn-ty -> arg-count
+;; Casts each fixed arg to the declared param type (CC.md §K.5).
+;; For variadic args (index >= named-arg count, when variadic? = #t)
+;; applies cg-promote (CC.md §G.1).
+(define (parse-call-args ps fn-ty)
+ (cond
+ ((at-punct? ps 'rparen) 0)
+ (else
+ (let* ((info (call-fn-param-info fn-ty))
+ (params (car info))
+ (var? (cdr info))
+ (nfix (length params)))
+ (let lp ((n 0) (rem params))
+ (parse-expr-bp ps 4) (rval! ps)
+ (cond
+ ;; Fixed-arg: cast to declared param type. param entry shape
+ ;; is (name . ctype) per cg-fn-begin's contract.
+ ((not (null? rem))
+ (cg-cast (ps-cg ps) (cdr (car rem))))
+ ;; Variadic position (n >= nfix and var? is true): promote.
+ (var?
+ (cg-promote (ps-cg ps))))
+ (let ((m (+ n 1))
+ (rest (if (null? rem) '() (cdr rem))))
+ (cond ((at-punct? ps 'comma) (advance ps) (lp m rest))
+ (else m))))))))
+
+;; --------------------------------------------------------------------
+;; __builtin_va_* (§G.2). va_list / va_start / va_arg / va_end in
+;; <stdarg.h> alias these. Each is parsed as: name '(' args ')'.
+;; va_start(ap, last) — last is parsed and discarded; cg only needs
+;; the variadic-first-slot offset, which it already tracks.
+;; va_arg(ap, T) — T is a type-name; result rval has that type.
+;; va_end(ap) — no-op codegen; just consumes ap.
+;;
+;; Pushes a single imm 0 for va_start / va_end so they fit as
+;; expression statements; va_arg pushes the rval.
+;; --------------------------------------------------------------------
+(define (parse-builtin-va-start ps)
+ (advance ps) ; IDENT
+ (expect-punct ps 'lparen)
+ (parse-expr-bp ps 4) ; ap (must be lval)
+ (expect-punct ps 'comma)
+ ;; "last" is parsed for syntactic completeness then dropped — cg
+ ;; doesn't need it; the variadic-first-slot was determined at
+ ;; cg-fn-begin/v time.
+ (parse-expr-bp ps 4) (cg-pop (ps-cg ps))
+ (expect-punct ps 'rparen)
+ (cg-va-start (ps-cg ps))
+ ;; Push a placeholder rval so the call expression has a value
+ ;; (matches va_start's "void" but our parser expects all
+ ;; expressions to leave one rval).
+ (cg-push-imm (ps-cg ps) %t-i32 0))
+
+(define (parse-builtin-va-arg ps)
+ (advance ps) ; IDENT
+ (expect-punct ps 'lparen)
+ (parse-expr-bp ps 4) ; ap (lval)
+ (expect-punct ps 'comma)
+ (let* ((sp (parse-decl-spec ps))
+ (p (parse-declarator ps (cdr sp)))
+ (ty (cdr p)))
+ (expect-punct ps 'rparen)
+ (cg-va-arg (ps-cg ps) ty)))
+
+(define (parse-builtin-va-end ps)
+ (advance ps) ; IDENT
+ (expect-punct ps 'lparen)
+ (parse-expr-bp ps 4) ; ap
+ (expect-punct ps 'rparen)
+ (cg-va-end (ps-cg ps))
+ (cg-push-imm (ps-cg ps) %t-i32 0))
+
+(define (parse-primary ps)
+ (let ((t (peek ps)))
+ (pmatch t
+ (($ tok? (kind INT) (value ,n))
+ (advance ps)
+ (cg-push-imm (ps-cg ps) %t-i32 n))
+ (($ tok? (kind CHAR) (value ,c))
+ (advance ps)
+ (cg-push-imm (ps-cg ps) %t-i8 c))
+ (($ tok? (kind STR) (value ,s))
+ (advance ps)
+ (cg-push-string (ps-cg ps) s))
+ (($ tok? (kind IDENT) (value ,n))
+ (cond
+ ((bv= n "__builtin_va_start") (parse-builtin-va-start ps))
+ ((bv= n "__builtin_va_arg") (parse-builtin-va-arg ps))
+ ((bv= n "__builtin_va_end") (parse-builtin-va-end ps))
+ (else
+ (let ((sm (scope-lookup ps n)))
+ (advance ps)
+ (cond
+ ((not sm) (die (tok-loc t) "undecl" n))
+ ((eq? (sym-kind sm) 'enum-const)
+ (cg-push-imm (ps-cg ps) %t-i32 (sym-slot sm)))
+ (else (cg-push-sym (ps-cg ps) sm)))))))
+ (($ tok? (kind PUNCT) (value lparen))
+ (advance ps) (parse-expr ps) (expect-punct ps 'rparen))
+ (else (die (tok-loc t) "unexp" (tok-value t))))))
+
+(define (rval! ps)
+ (let ((tp (cg-top (ps-cg ps))))
+ (cond ((and tp (opnd? tp) (opnd-lval? tp))
+ (cg-load (ps-cg ps)))
+ (else #t))))
+
+(define (rval-not-fn! ps)
+ (let ((tp (cg-top (ps-cg ps))))
+ (cond ((and tp (opnd? tp) (opnd-lval? tp)
+ (not (ctype-is-fn? (opnd-type tp))))
+ (cg-load (ps-cg ps)))
+ (else #t))))
+;; cc/main.scm — driver. Argv, file I/O, ties phases together.
+;;
+;; Realization of docs/CC-INTERNALS.md §main.scm.
+
+;; --------------------------------------------------------------------
+;; CLI: cc <input.c> <output.P1pp>
+;; (-o flag and -D flags are deferred — phase-1 runner doesn't need them.)
+;;
+;; scheme1 passes (argv) as a list of bvs; argv[0] is "scheme1", argv[1]
+;; is the catm'd compiler source path, argv[2..] are the user-facing
+;; positional args. cc-main strips the first two.
+;; --------------------------------------------------------------------
+
+(define (%cc-slurp path)
+ (let ((r (open-input path)))
+ (cond ((not (car r))
+ (die #f "cannot open input" path)))
+ (let* ((p (cdr r))
+ (rd (read-all p)))
+ (close p)
+ (cond ((not (car rd)) (die #f "read failed" path)))
+ (cdr rd))))
+
+(define (%cc-write path bv)
+ (let ((r (open-output path)))
+ (cond ((not (car r))
+ (die #f "cannot open output" path)))
+ (let ((p (cdr r)))
+ (write-bv-fd (port-fd p) bv)
+ (close p)
+ 0)))
+
+;; CC_DEBUG=1 in the env doesn't fly here (no getenv); instead, scan
+;; argv for a sentinel "--cc-debug" flag. When present, debug-log
+;; prints heap usage between phases to fd 2.
+(define (%cc-flag? args flag)
+ (cond ((null? args) #f)
+ ((bv= (car args) flag) #t)
+ (else (%cc-flag? (cdr args) flag))))
+
+(define (%cc-strip-flag args flag)
+ (cond ((null? args) '())
+ ((bv= (car args) flag) (cdr args))
+ (else (cons (car args) (%cc-strip-flag (cdr args) flag)))))
+
+(define (cc-main av)
+ (let* ((raw (cdr (cdr av)))
+ (dbg (%cc-flag? raw "--cc-debug"))
+ (args (%cc-strip-flag raw "--cc-debug")))
+ (cond (dbg (debug-log-on!)))
+ (cond
+ ((or (null? args) (null? (cdr args)))
+ (die #f "usage: cc [--cc-debug] <input.c> <output.P1pp>")))
+ (let* ((in-path (car args))
+ (out-path (car (cdr args))))
+ (debug-log "phase=start" "heap" (heap-usage))
+ (let* ((src (%cc-slurp in-path))
+ (_1 (debug-log "phase=slurp" "heap" (heap-usage)
+ "src-bytes" (bytevector-length src)))
+ (toks (lex-tokenize src in-path))
+ (_2 (debug-log "phase=lex" "heap" (heap-usage)))
+ (expanded (pp-expand toks '()))
+ (_3 (debug-log "phase=pp" "heap" (heap-usage)))
+ (cg (cg-init))
+ (ps (make-pstate expanded cg)))
+ (parse-translation-unit ps)
+ (debug-log "phase=parse" "heap" (heap-usage))
+ (let ((out (cg-finish cg)))
+ (debug-log "phase=cg-finish" "heap" (heap-usage)
+ "out-bytes" (bytevector-length out))
+ (%cc-write out-path out))
+ 0))))
+
diff --git a/cc/cg.scm b/cc/cg.scm
@@ -1,1246 +0,0 @@
-;; cc/cg.scm — codegen state and emission API.
-;; Realization of docs/CC-INTERNALS.md §cg.scm.
-;; Conversion split per CC-CONTRACTS §4: parse owns promotion etc;
-;; cg owns sign extension, signed/unsigned dispatch, pointer scaling.
-;;
-;; Output uses libp1pp's structured macros (%fn, %ifelse_nez,
-;; %loop_tag, %break, %continue) per docs/LIBP1PP.md.
-;;
-;; Frame layout (CC-CONTRACTS §3):
-;; [sp + 0 .. staging*8) outgoing-arg staging
-;; [sp + staging*8 ..) locals + spilled vstack values
-;; Slot offsets are emitted symbolically as `(+ %<fn>__SO N)` so the
-;; staging size, only known at fn-end, can be filled in via a 0-arg
-;; M1pp macro `<fn>__SO` defined just before the `%fn(...)` block.
-
-(define (%cg-emit-buf cg)
- (cond ((cg-in-fn? cg) (cg-fn-buf cg)) (else (cg-text cg))))
-
-(define (%cg-emit cg bv)
- (buf-push! (%cg-emit-buf cg) bv))
-
-(define (%cg-emit-many cg bvs)
- (for-each (lambda (b) (%cg-emit cg b)) bvs))
-
-(define (%n n) (number->string n 10))
-
-;; Per-fn metadata (name, ret-slot, ret-type, switch-case lists, ...)
-;; lives on cg-fn-meta, reset at every cg-fn-begin/v. Keeping it off
-;; cg-globals means cg-globals only mutates when the user emits a real
-;; global, which is what parse-fn-body's rewind-safety check needs.
-(define (%cg-fn-set! cg key val)
- (cg-fn-meta-set! cg (alist-update key (lambda (_) val) (cg-fn-meta cg))))
-
-(define (%cg-fn-get cg key) (alist-ref/eq key (cg-fn-meta cg)))
-
-(define (%cg-fresh-label cg prefix)
- (let* ((n (cg-label-ctr cg))
- (bv (bytevector-append prefix (%n n))))
- (cg-label-ctr-set! cg (+ n 1))
- bv))
-
-(define (%cg-fresh-loop-tag cg) (%cg-fresh-label cg "L"))
-(define (%cg-fresh-lbl cg) (%cg-fresh-label cg "lbl_"))
-
-(define (%cg-bump-outgoing! cg n)
- (if (< (cg-max-outgoing cg) n) (cg-max-outgoing-set! cg n) 0))
-
-(define (%cg-slot-expr cg logical-off)
- (let ((nm (%cg-fn-get cg '%fn-name)))
- (bv-cat (list "(+ %" nm "__SO " (%n logical-off) ")"))))
-
-(define (%cg-mangle-global name-bv)
- (bytevector-append "cc__" name-bv))
-
-(define (%cg-reg->bv r) (symbol->string r))
-
-(define (%cg-emit-li cg reg n)
- (%cg-emit-many cg (list "%li(" (%cg-reg->bv reg) ", " (%n n) ")\n")))
-
-(define (%cg-emit-la cg reg label-bv)
- (%cg-emit-many cg (list "%la(" (%cg-reg->bv reg) ", &" label-bv ")\n")))
-
-(define (%cg-emit-ld-slot cg reg logical-off)
- (%cg-emit-many cg (list "%ld(" (%cg-reg->bv reg) ", sp, "
- (%cg-slot-expr cg logical-off) ")\n")))
-
-(define (%cg-emit-st-slot cg reg logical-off)
- (%cg-emit-many cg (list "%st(" (%cg-reg->bv reg) ", sp, "
- (%cg-slot-expr cg logical-off) ")\n")))
-
-(define (%cg-emit-ld cg reg base off)
- (%cg-emit-many cg (list "%ld(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv base) ", " (%n off) ")\n")))
-
-(define (%cg-emit-st cg reg base off)
- (%cg-emit-many cg (list "%st(" (%cg-reg->bv reg) ", "
- (%cg-reg->bv base) ", " (%n off) ")\n")))
-
-;; Width-aware load/store. Dispatches on ctype-size:
-;; 1: %lb / %sb (LB zero-extends; for signed i8 we sign-extend by
-;; shli/sari 56 to materialize the canonical 64-bit form).
-;; 2/4: byte-decomposed (P1 has only 1-byte and 8-byte memory ops,
-;; and word ops require natural alignment which we can't promise
-;; for struct fields or non-word-aligned local slots). Loads
-;; gather bytes via %lb + shli/or; stores scatter via shri/%sb.
-;; Signed loads (i16/i32) sign-extend via shli/sari to canonical
-;; 64-bit form.
-;; 8 (or anything else for now): %ld / %st.
-;; Scratch convention: helpers may clobber t1; callers never pass
-;; reg=t1.
-
-(define (%cg-emit-ldN-bytes cg reg base-bv off-expr-fn n-bytes)
- ;; Emit n-bytes %lb gathers into reg with shift+OR. byte 0 is low.
- ;; off-expr-fn is a procedure: (off-expr-fn k) returns the bv
- ;; expression for offset k.
- (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", " base-bv ", "
- (off-expr-fn 0) ")\n"))
- (let loop ((k 1))
- (cond
- ((= k n-bytes) 0)
- (else
- (%cg-emit-many cg (list
- "%lb(t1, " base-bv ", " (off-expr-fn k) ")\n"
- "%shli(t1, t1, " (%n (* 8 k)) ")\n"
- "%or(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", t1)\n"))
- (loop (+ k 1))))))
-
-(define (%cg-emit-stN-bytes cg reg base-bv off-expr-fn n-bytes)
- ;; Emit n-bytes %sb scatters from reg via shri-shifted t1.
- (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", " base-bv ", "
- (off-expr-fn 0) ")\n"))
- (let loop ((k 1))
- (cond
- ((= k n-bytes) 0)
- (else
- (%cg-emit-many cg (list
- "%shri(t1, " (%cg-reg->bv reg) ", " (%n (* 8 k)) ")\n"
- "%sb(t1, " base-bv ", " (off-expr-fn k) ")\n"))
- (loop (+ k 1))))))
-
-(define (%cg-emit-sext cg reg shift-amount)
- (%cg-emit-many cg (list
- "%shli(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", "
- (%n shift-amount) ")\n"
- "%sari(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", "
- (%n shift-amount) ")\n")))
-
-(define (%cg-emit-ld-slot-typed cg reg ctype logical-off)
- (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype))
- (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k)))))
- (cond
- ((= sz 1)
- (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", sp, "
- (off-fn 0) ")\n"))
- (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56))))
- ((= sz 2)
- (%cg-emit-ldN-bytes cg reg "sp" off-fn 2)
- (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48))))
- ((= sz 4)
- (%cg-emit-ldN-bytes cg reg "sp" off-fn 4)
- (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32))))
- (else (%cg-emit-ld-slot cg reg logical-off)))))
-
-(define (%cg-emit-st-slot-typed cg reg ctype logical-off)
- (let* ((sz (ctype-size ctype))
- (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k)))))
- (cond
- ((= sz 1)
- (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", sp, "
- (off-fn 0) ")\n")))
- ((= sz 2) (%cg-emit-stN-bytes cg reg "sp" off-fn 2))
- ((= sz 4) (%cg-emit-stN-bytes cg reg "sp" off-fn 4))
- (else (%cg-emit-st-slot cg reg logical-off)))))
-
-(define (%cg-emit-ld-typed cg reg ctype base off)
- (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype))
- (base-bv (%cg-reg->bv base))
- (off-fn (lambda (k) (%n (+ off k)))))
- (cond
- ((= sz 1)
- (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", "
- base-bv ", " (off-fn 0) ")\n"))
- (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56))))
- ((= sz 2)
- (%cg-emit-ldN-bytes cg reg base-bv off-fn 2)
- (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48))))
- ((= sz 4)
- (%cg-emit-ldN-bytes cg reg base-bv off-fn 4)
- (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32))))
- (else (%cg-emit-ld cg reg base off)))))
-
-(define (%cg-emit-st-typed cg reg ctype base off)
- (let* ((sz (ctype-size ctype))
- (base-bv (%cg-reg->bv base))
- (off-fn (lambda (k) (%n (+ off k)))))
- (cond
- ((= sz 1)
- (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", "
- base-bv ", " (off-fn 0) ")\n")))
- ((= sz 2) (%cg-emit-stN-bytes cg reg base-bv off-fn 2))
- ((= sz 4) (%cg-emit-stN-bytes cg reg base-bv off-fn 4))
- (else (%cg-emit-st cg reg base off)))))
-
-(define (%cg-load-opnd-into cg op reg)
- ;; frame lval: load at type width. frame rval is a spilled word
- ;; (alloc-slot 8 8 in %cg-spill-reg) — always 8-byte load.
- ;; global lval width > 1 byte-gathers must not alias dest with base —
- ;; the first %lb would otherwise clobber the address before subsequent
- ;; byte loads. Stage the address in t2.
- (pmatch op
- (($ opnd? (kind imm) (ext ,n)) (%cg-emit-li cg reg n))
- (($ opnd? (kind frame) (lval? #t) (type ,ty) (ext ,off))
- (%cg-emit-ld-slot-typed cg reg ty off))
- (($ opnd? (kind frame) (ext ,off)) (%cg-emit-ld-slot cg reg off))
- (($ opnd? (kind global) (lval? #f) (ext ,lbl)) (%cg-emit-la cg reg lbl))
- (($ opnd? (kind global) (type ,ty) (ext ,lbl))
- (%cg-emit-la cg 't2 lbl)
- (%cg-emit-ld-typed cg reg ty 't2 0))
- (else (die #f "cg internal: unknown opnd-kind" (opnd-kind op)))))
-
-(define (%cg-spill-reg cg reg ty)
- (let* ((off (cg-alloc-slot cg 8 8))
- (op (%opnd 'frame ty off #f)))
- (%cg-emit-st-slot cg reg off)
- (cg-vstack-set! cg (cons op (cg-vstack cg)))
- op))
-
-(define (%ctype-ptr? t)
- (let ((k (ctype-kind t)))
- (if (eq? k 'ptr) #t (eq? k 'arr))))
-
-(define (%ctype-pointee t)
- (cond ((eq? (ctype-kind t) 'ptr) (ctype-ext t))
- ((eq? (ctype-kind t) 'arr) (car (ctype-ext t)))
- (else #f)))
-
-(define (%ctype-unsigned? t)
- (let ((k (ctype-kind t)))
- (cond ((eq? k 'u8) #t) ((eq? k 'u16) #t) ((eq? k 'u32) #t)
- ((eq? k 'u64) #t) ((eq? k 'bool) #t)
- ((eq? k 'ptr) #t) ((eq? k 'arr) #t) ((eq? k 'fn) #t)
- (else #f))))
-
-(define (%ctype-size t) (ctype-size t))
-
-(define (%reg-by-idx i)
- (cond ((= i 0) 'a0) ((= i 1) 'a1) ((= i 2) 'a2) ((= i 3) 'a3)
- (else (die #f "cg: param idx > 3 needs ldarg path" i))))
-
-;; --------------------------------------------------------------------
-;; Lifecycle
-;; --------------------------------------------------------------------
-
-(define (cg-init)
- (%cg (make-buf/cap %BUF-CAP-TEXT) ; text
- (make-buf/cap %BUF-CAP-DATA) ; data
- (make-buf/cap %BUF-CAP-BSS) ; bss
- '() ; vstack
- 0 ; frame-hi
- 0 ; label-ctr
- '() ; str-pool
- '() ; globals
- '() ; fn-meta
- (make-buf/cap %BUF-CAP-FN) ; fn-buf (reused per fn)
- (make-buf/cap %BUF-CAP-PROLOGUE) ; prologue-buf (reused per fn)
- 0 ; max-outgoing
- #f)) ; in-fn?
-
-(define (cg-finish cg)
- ;; Entry stub. P1's program-entry contract (docs/P1.md §Program Entry)
- ;; delivers argc in a0 and argv in a1 at p1_main. %call doesn't
- ;; clobber a0/a1, so falling straight through to cc__main forwards
- ;; them unchanged. The 16-byte frame is just enough for %enter's
- ;; saved-fp/lr to fit; cc__main builds its own frame on top.
- ;; (CC-CONTRACTS §J.1, §5.4.)
- (let ((tb (cg-text cg)))
- (buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to cc__main\n")
- (buf-push! tb "%fn(p1_main, 16, {\n")
- (buf-push! tb "%call(&cc__main)\n")
- (buf-push! tb "})\n"))
- ;; Every P1pp translation unit must end with :ELF_end so the ELF
- ;; header can compute file-size and ph_memsz boundaries.
- (bv-cat (list (buf-flush (cg-text cg))
- (buf-flush (cg-data cg))
- (buf-flush (cg-bss cg))
- ":ELF_end\n")))
-
-(define (cg-fn-begin cg name params return-type)
- (cg-fn-begin/v cg name params return-type #f))
-
-;; Variadic-aware variant. variadic? = #t reserves 16 contiguous 8-byte
-;; slots covering incoming arg indices 0..15, populating each from the
-;; appropriate source: a-register for idx 0..3, LDARG slot (idx-4) for
-;; idx 4..15. va_start computes the address of the slot at index =
-;; named-arg count, so va_arg walks linearly through the rest.
-;; Indices 4..15 may be garbage when the caller passed fewer args; user
-;; code stops walking based on a count or sentinel before those slots
-;; are read. Limit of 15 variadic args (after named) is enough for
-;; tcc.c's logging shapes; bump VARARG_WINDOW if you need more.
-(define (cg-fn-begin/v cg name params return-type variadic?)
- (buf-reset! (cg-fn-buf cg))
- (buf-reset! (cg-prologue-buf cg))
- (cg-in-fn?-set! cg #t)
- (cg-vstack-set! cg '())
- (cg-frame-hi-set! cg 0)
- ;; cg-label-ctr is NOT reset per-fn. Loop tags emit single-colon
- ;; (global) labels via libp1pp's %loop_tag macro (`:L0_top`,
- ;; `:L0_end`) — see P1/P1pp.P1pp:loop_tag — so two functions both
- ;; using L0 would produce duplicate global labels and break linking.
- ;; Switch dispatch labels (`sw_disp_L<N>`) inherit the same tag and
- ;; are also single-colon. Keeping the counter monotonic across
- ;; functions guarantees uniqueness without needing to mangle.
- (cg-max-outgoing-set! cg 0)
- (cg-fn-meta-set! cg '())
- (%cg-fn-set! cg '%fn-name name)
- (%cg-fn-set! cg '%fn-ret-type return-type)
- (%cg-fn-set! cg '%indirect-slots '())
- (%cg-fn-set! cg '%fn-variadic? variadic?)
- (let ((ret-slot (cg-alloc-slot cg 8 8)))
- (%cg-fn-set! cg '%fn-ret-slot ret-slot)
- (cond
- ((not (eq? (ctype-kind return-type) 'void))
- (buf-push! (cg-prologue-buf cg)
- (bv-cat (list "%li(t0, 0)\n"
- "%st(t0, sp, "
- (%cg-slot-expr cg ret-slot) ")\n"))))))
- ;; params per CC-CONTRACTS §3.1: list of (name-bv . ctype). We
- ;; return an alist (name-bv . sym) the parser binds into scope.
- (let walk ((ps params) (idx 0) (out '()) (first-slot #f))
- (cond
- ((null? ps)
- (cond
- (variadic?
- ;; Pad the incoming-arg window out to 16 slots. For idx 0..3
- ;; the slot is filled from a-register; for idx 4..15 from
- ;; LDARG slot (idx-4). va_start points at the slot whose
- ;; index equals the named-arg count, and va_arg walks
- ;; linearly from there through the rest of the window.
- (let pad ((i idx) (vfirst #f) (fs first-slot))
- (cond
- ((>= i 16)
- ;; If named-arg count was 0, vfirst is the very first
- ;; slot of the save area (= fs).
- (%cg-fn-set! cg '%fn-vararg-first-slot
- (or vfirst fs))
- (reverse out))
- (else
- (let ((off (cg-alloc-slot cg 8 8)))
- (cond
- ((< i 4)
- (let ((ar (%reg-by-idx i)))
- (buf-push! (cg-prologue-buf cg)
- (bv-cat (list "%st(" (%cg-reg->bv ar)
- ", sp, "
- (%cg-slot-expr cg off) ")\n")))))
- (else
- (buf-push! (cg-prologue-buf cg)
- (bv-cat (list "%ldarg(t0, " (%n (- i 4)) ")\n"
- "%st(t0, sp, "
- (%cg-slot-expr cg off) ")\n")))))
- (pad (+ i 1)
- (or vfirst off)
- (or fs off)))))))
- (else (reverse out))))
- (else
- (let* ((p (car ps))
- (nm (car p))
- (ty (cdr p))
- (off (cg-alloc-slot cg 8 8))
- (psym (%sym nm 'param #f ty off)))
- (cond
- ((< idx 4)
- (let ((ar (%reg-by-idx idx)))
- (buf-push! (cg-prologue-buf cg)
- (bv-cat (list "%st(" (%cg-reg->bv ar)
- ", sp, " (%cg-slot-expr cg off) ")\n")))))
- (else
- (buf-push! (cg-prologue-buf cg)
- (bv-cat (list "%ldarg(t0, " (%n (- idx 4)) ")\n"
- "%st(t0, sp, " (%cg-slot-expr cg off) ")\n")))))
- (walk (cdr ps) (+ idx 1) (cons (cons nm psym) out)
- (or first-slot off)))))))
-
-(define (cg-fn-end cg)
- ;; Drain prologue-buf and fn-buf directly into cg-text via buf-drain!
- ;; (memcpy, no allocation). Header/footer pieces go through buf-push!
- ;; on cg-text — also memcpy. Net result: zero net heap allocation in
- ;; cg-fn-end other than the small (%n N) bvs for staging-bytes /
- ;; frame-size, which the surrounding parse-fn-body's heap-rewind!
- ;; reclaims.
- (let* ((name (%cg-fn-get cg '%fn-name))
- (ret-slot (%cg-fn-get cg '%fn-ret-slot))
- (ret-type (%cg-fn-get cg '%fn-ret-type))
- (locals-hi (cg-frame-hi cg))
- (staging-bytes (* 8 (cg-max-outgoing cg)))
- (raw-size (+ staging-bytes locals-hi))
- (frame-size (align-up raw-size 16))
- (mangled (%cg-mangle-global name))
- (tb (cg-text cg)))
- ;; Now that the body is fully emitted, leave fn dispatch so any
- ;; trailing emits in this function (including the ret-block below)
- ;; route to cg-text directly.
- (cg-in-fn?-set! cg #f)
- ;; staging-size macro
- (buf-push! tb "%macro ")
- (buf-push! tb name)
- (buf-push! tb "__SO()\n")
- (buf-push! tb (%n staging-bytes))
- (buf-push! tb "\n%endm\n")
- ;; %fn header
- (buf-push! tb "%fn(")
- (buf-push! tb mangled)
- (buf-push! tb ", ")
- (buf-push! tb (%n frame-size))
- (buf-push! tb ", {\n")
- ;; prologue + body, drained byte-for-byte
- (buf-drain! tb (cg-prologue-buf cg))
- (buf-drain! tb (cg-fn-buf cg))
- ;; ret block
- (buf-push! tb "::ret\n")
- (cond
- ((eq? (ctype-kind ret-type) 'void)
- (buf-push! tb "%li(a0, 0)\n"))
- (else
- (buf-push! tb "%ld(a0, sp, ")
- (buf-push! tb (%cg-slot-expr cg ret-slot))
- (buf-push! tb ")\n")))
- (buf-push! tb "})\n")
- (cg-vstack-set! cg '())
- (cg-frame-hi-set! cg 0)
- (cg-max-outgoing-set! cg 0)
- 0))
-
-;; --------------------------------------------------------------------
-;; Vstack
-;; --------------------------------------------------------------------
-(define (cg-push cg op)
- (cg-vstack-set! cg (cons op (cg-vstack cg)))
- op)
-
-(define (cg-pop cg)
- (let ((s (cg-vstack cg)))
- (cond ((null? s) (die #f "cg-pop: empty vstack"))
- (else (cg-vstack-set! cg (cdr s)) (car s)))))
-
-(define (cg-top cg)
- (let ((s (cg-vstack cg)))
- (cond ((null? s) (die #f "cg-top: empty vstack")) (else (car s)))))
-
-(define (cg-depth cg) (length (cg-vstack cg)))
-
-;; Duplicate the top vstack entry. For lvals this is safe — the slot
-;; (or label, or indirect-marked frame) backing the lval keeps existing
-;; until the function ends. For rvals it duplicates the descriptor of
-;; the spilled value; both copies refer to the same already-emitted
-;; storage. CC-CONTRACTS §4.1: used for `lhs += rhs` and `++lhs` to
-;; preserve the lhs across a `cg-load` so the subsequent `cg-assign`
-;; still has its address.
-(define (cg-dup cg)
- (let ((p (cg-top cg))) (cg-push cg p) p))
-
-;; --------------------------------------------------------------------
-;; Materialize
-;; --------------------------------------------------------------------
-(define (cg-push-imm cg ctype value)
- (cg-push cg (%opnd 'imm ctype value #f)))
-
-(define (cg-push-string cg bv-content)
- (let* ((label (cg-intern-string cg bv-content))
- (cp-ty (%ctype 'ptr 8 8 %t-i8)))
- (cg-push cg (%opnd 'global cp-ty label #f))))
-
-(define (cg-push-sym cg sym)
- (pmatch sym
- (($ sym? (kind fn) (type ,ty) (name ,nm))
- (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #f)))
- (($ sym? (kind enum-const) (type ,ty) (slot ,v))
- (cg-push cg (%opnd 'imm ty v #f)))
- (($ sym? (kind var) (storage extern) (type ,ty) (name ,nm))
- (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #t)))
- (($ sym? (kind var) (storage static) (type ,ty) (name ,nm))
- (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #t)))
- (($ sym? (kind var) (type ,ty) (slot ,off))
- (cg-push cg (%opnd 'frame ty off #t)))
- (($ sym? (kind param) (type ,ty) (slot ,off))
- (cg-push cg (%opnd 'frame ty off #t)))
- (else (die #f "cg-push-sym: unsupported sym-kind" (sym-kind sym)))))
-
-;; A cg-push-deref result is a frame-lval whose slot HOLDS THE ADDRESS
-;; (not the value). To distinguish from ordinary frame-lvals (whose
-;; slot holds the value directly), we tag indirect slots in
-;; %indirect-slots so cg-load and cg-assign can do the extra
-;; indirection.
-(define (%cg-mark-indirect! cg off)
- (let ((cur (or (%cg-fn-get cg '%indirect-slots) '())))
- (%cg-fn-set! cg '%indirect-slots (cons off cur))))
-
-(define (%cg-indirect? cg off)
- (let ((cur (or (%cg-fn-get cg '%indirect-slots) '())))
- (let loop ((xs cur))
- (cond ((null? xs) #f) ((= (car xs) off) #t) (else (loop (cdr xs)))))))
-
-(define (cg-push-deref cg)
- (let* ((p (cg-pop cg))
- (pt (opnd-type p))
- (pe (cond ((eq? (ctype-kind pt) 'ptr) (ctype-ext pt))
- ((eq? (ctype-kind pt) 'arr) (car (ctype-ext pt)))
- (else #f))))
- (cond
- ((not pe) (die #f "cg-push-deref: not a pointer" pt))
- (else
- (%cg-load-opnd-into cg p 't0)
- (let ((off (cg-alloc-slot cg 8 8)))
- (%cg-emit-st-slot cg 't0 off)
- (%cg-mark-indirect! cg off)
- (cg-push cg (%opnd 'frame pe off #t)))))))
-
-;; --------------------------------------------------------------------
-;; Aggregate field access (§D.1–D.4)
-;; --------------------------------------------------------------------
-;; cg-push-field cg fname:
-;; pop a struct/union lval; look up `fname` in the struct's fields
-;; list (data.scm: ext = (tag complete? fields), where each field
-;; is (name-bv ctype offset)); push a new lval at the field's
-;; offset with the field's ctype.
-;;
-;; Three input cases:
-;; - direct frame lval at slot `off` -> frame lval at off+fo
-;; - indirect frame lval (slot holds addr) -> new indirect slot for
-;; addr+fo
-;; - global lval at label L -> indirect slot for
-;; la(L)+fo
-;; In all cases the resulting lval has the field's ctype.
-
-(define (%cg-find-field fields fname)
- (let loop ((xs fields))
- (cond
- ((null? xs) #f)
- ((bv= (car (car xs)) fname) (car xs))
- (else (loop (cdr xs))))))
-
-(define (cg-push-field cg fname)
- (let* ((s (cg-pop cg))
- (sty (opnd-type s))
- (k (ctype-kind sty)))
- (cond
- ((not (or (eq? k 'struct) (eq? k 'union)))
- (die #f "cg-push-field: not a struct/union" k))
- ((not (opnd-lval? s))
- (die #f "cg-push-field: not an lvalue" k))
- (else
- (let* ((fields (car (cddr (ctype-ext sty))))
- (f (%cg-find-field fields fname)))
- (cond
- ((not f) (die #f "cg-push-field: no such field" fname))
- (else
- (let* ((fty (cadr f)) (fo (car (cddr f))))
- (pmatch s
- ;; direct frame lval: just shift the slot offset.
- (($ opnd? (kind frame) (ext ,off))
- (guard (not (%cg-indirect? cg off)))
- (cg-push cg (%opnd 'frame fty (+ off fo) #t)))
- ;; indirect frame lval: addr lives in the slot. Compute
- ;; addr+fo into a new indirect slot.
- (($ opnd? (kind frame) (ext ,off))
- (%cg-emit-ld-slot cg 't0 off)
- (cond
- ((> fo 0)
- (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n"))))
- (let ((no (cg-alloc-slot cg 8 8)))
- (%cg-emit-st-slot cg 't0 no)
- (%cg-mark-indirect! cg no)
- (cg-push cg (%opnd 'frame fty no #t))))
- ;; global lval: load addr, add offset, indirect slot.
- (($ opnd? (kind global) (ext ,lbl))
- (%cg-emit-la cg 't0 lbl)
- (cond
- ((> fo 0)
- (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n"))))
- (let ((no (cg-alloc-slot cg 8 8)))
- (%cg-emit-st-slot cg 't0 no)
- (%cg-mark-indirect! cg no)
- (cg-push cg (%opnd 'frame fty no #t))))
- (else
- (die #f "cg-push-field: unsupported lval kind"
- (opnd-kind s))))))))))))
-
-;; cg-decay-array:
-;; if top of vstack is an arr-typed lval, replace it with a ptr-rval
-;; to the first element. C arrays decay to T* in most contexts;
-;; parse calls this before rval-style operations. No-op otherwise.
-(define (cg-decay-array cg)
- (let ((tp (cg-top cg)))
- (cond
- ((and (opnd-lval? tp) (eq? (ctype-kind (opnd-type tp)) 'arr))
- (let* ((p (cg-pop cg))
- (et (car (ctype-ext (opnd-type p))))
- (pty (%ctype 'ptr 8 8 et)))
- (pmatch p
- ;; direct frame lval: address is sp+off.
- (($ opnd? (kind frame) (ext ,off))
- (guard (not (%cg-indirect? cg off)))
- (%cg-emit-many cg (list "%mov(t0, sp)\n"
- "%addi(t0, t0, "
- (%cg-slot-expr cg off) ")\n"))
- (%cg-spill-reg cg 't0 pty))
- ;; indirect frame lval (rare for arrays, but support it):
- ;; the slot holds the address already.
- (($ opnd? (kind frame) (ext ,off))
- (%cg-emit-ld-slot cg 't0 off)
- (%cg-spill-reg cg 't0 pty))
- ;; global array: la(label) is the address.
- (($ opnd? (kind global) (ext ,lbl))
- (%cg-emit-la cg 't0 lbl)
- (%cg-spill-reg cg 't0 pty))
- (else (die #f "cg-decay-array: unsupported lval kind"
- (opnd-kind p))))))
- (else tp))))
-
-;; --------------------------------------------------------------------
-;; Address & deref
-;; --------------------------------------------------------------------
-(define (cg-take-addr cg)
- (let* ((p (cg-pop cg))
- (ty (opnd-type p))
- ;; &arr yields T(*)[N] per strict C. Pointer arithmetic on
- ;; the result scales by sizeof(T[N]) (the whole array), so
- ;; &arr + 1 is one-past-end. Array-to-pointer decay happens
- ;; on use via cg-decay-array, not at the & operator.
- (pty (%ctype 'ptr 8 8 ty)))
- (pmatch p
- (($ opnd? (lval? #f)) (die #f "cg-take-addr: not an lvalue"))
- ;; The address itself lives at sp+slot — &*p degenerates to p.
- (($ opnd? (kind frame) (ext ,off))
- (guard (%cg-indirect? cg off))
- (%cg-emit-ld-slot cg 't0 off)
- (%cg-spill-reg cg 't0 pty))
- ;; %mov(rd, sp) gives the portable-sp pointer (the backend
- ;; handles any hidden frame-header offset). Then add slot.
- (($ opnd? (kind frame) (ext ,off))
- (%cg-emit-many cg (list "%mov(t0, sp)\n"
- "%addi(t0, t0, "
- (%cg-slot-expr cg off) ")\n"))
- (%cg-spill-reg cg 't0 pty))
- (($ opnd? (kind global) (ext ,lbl))
- (%cg-emit-la cg 't0 lbl)
- (%cg-spill-reg cg 't0 pty))
- (else (die #f "cg-take-addr: non-addressable" (opnd-kind p))))))
-
-(define (cg-load cg)
- (let* ((p (cg-pop cg)) (ty (opnd-type p)))
- (cond
- ((not (opnd-lval? p)) (die #f "cg-load: not an lvalue"))
- ;; Array lvalues decay to a ptr-rval addressing the first
- ;; element (C array-to-pointer decay). We push the lval back
- ;; and route through cg-decay-array for a single source of truth.
- ((eq? (ctype-kind ty) 'arr)
- (cg-push cg p) (cg-decay-array cg))
- ((and (eq? (opnd-kind p) 'frame)
- (%cg-indirect? cg (opnd-ext p)))
- ;; Indirect frame-lval: slot holds the address. Stage the
- ;; address in t2 so multi-byte gathers don't alias dest with
- ;; base.
- (%cg-emit-ld-slot cg 't2 (opnd-ext p))
- (%cg-emit-ld-typed cg 't0 ty 't2 0)
- (%cg-spill-reg cg 't0 ty))
- (else (%cg-load-opnd-into cg p 't0) (%cg-spill-reg cg 't0 ty)))))
-
-;; --------------------------------------------------------------------
-;; Type conversions
-;; --------------------------------------------------------------------
-(define (cg-cast cg to-type)
- (let* ((p (cg-pop cg))
- (from-ty (opnd-type p))
- (from-sz (%ctype-size from-ty))
- (to-sz (%ctype-size to-type))
- (to-kind (ctype-kind to-type)))
- (cond
- ((eq? to-kind 'bool)
- (%cg-load-opnd-into cg p 't0)
- (%cg-emit-many cg (list
- "%ifelse_eqz(t0, { %li(t0, 0) }, { %li(t0, 1) })\n"))
- (%cg-spill-reg cg 't0 to-type))
- ((or (eq? to-kind 'ptr)
- (and (or (eq? to-kind 'i64) (eq? to-kind 'u64))
- (or (eq? (ctype-kind from-ty) 'ptr)
- (eq? (ctype-kind from-ty) 'arr))))
- (cg-push cg (%opnd (opnd-kind p) to-type (opnd-ext p) (opnd-lval? p))))
- ((>= to-sz from-sz)
- (cg-push cg (%opnd (opnd-kind p) to-type (opnd-ext p) (opnd-lval? p))))
- (else
- ;; Narrowing cast. Signed targets (i8/i16/i32) shli/sari to
- ;; truncate-and-sign-extend in one step, so the slot holds the
- ;; canonical 64-bit form and a subsequent widening cast (which
- ;; is relabel-only) restores the value. Unsigned targets mask
- ;; off high bits to zero-extend.
- (%cg-load-opnd-into cg p 't0)
- (cond
- ((eq? to-kind 'i8) (%cg-emit-sext cg 't0 56))
- ((eq? to-kind 'i16) (%cg-emit-sext cg 't0 48))
- ((eq? to-kind 'i32) (%cg-emit-sext cg 't0 32))
- ((= to-sz 1) (%cg-emit-many cg (list "%andi(t0, t0, 255)\n")))
- ((= to-sz 2)
- (%cg-emit-many cg (list "%li(t1, 65535)\n%and(t0, t0, t1)\n")))
- ((= to-sz 4)
- (%cg-emit-many cg (list "%li(t1, 4294967295)\n%and(t0, t0, t1)\n")))
- (else 0))
- (%cg-spill-reg cg 't0 to-type)))))
-
-(define (cg-promote cg)
- (let* ((p (cg-pop cg))
- (ty (opnd-type p))
- (sz (%ctype-size ty)))
- (cond
- ((< sz 4)
- (cond
- ((%ctype-unsigned? ty)
- (cg-push cg (%opnd (opnd-kind p) %t-u32 (opnd-ext p) (opnd-lval? p))))
- (else
- (cg-push cg (%opnd (opnd-kind p) %t-i32 (opnd-ext p) (opnd-lval? p))))))
- (else (cg-push cg p)))))
-
-(define (cg-arith-conv cg)
- ;; Usual arithmetic conversions. CC-CONTRACTS §4.2: applies to
- ;; arithmetic operands. When either operand is a pointer (or array,
- ;; which behaves as a pointer in arithmetic), the pair is a
- ;; pointer-arith case — leave the types alone so cg-binop can detect
- ;; the ptr operand and apply the right scaling.
- (let* ((b (cg-pop cg))
- (a (cg-pop cg))
- (ta (opnd-type a))
- (tb (opnd-type b))
- (sa (%ctype-size ta))
- (sb (%ctype-size tb)))
- (cond
- ;; Pointer/array arithmetic: leave types alone so cg-binop's
- ;; ptr-aware add/sub branch fires with the correct pointee type
- ;; (and doesn't see two pointers, which would skip scaling).
- ((or (%ctype-ptr? ta) (%ctype-ptr? tb))
- (cg-push cg a)
- (cg-push cg b))
- (else
- (let ((common (cond
- ((> sa sb) ta)
- ((> sb sa) tb)
- ((%ctype-unsigned? ta) ta)
- ((%ctype-unsigned? tb) tb)
- (else ta))))
- (cg-push cg (%opnd (opnd-kind a) common (opnd-ext a) (opnd-lval? a)))
- (cg-push cg (%opnd (opnd-kind b) common (opnd-ext b) (opnd-lval? b))))))))
-
-;; --------------------------------------------------------------------
-;; Operators
-;; --------------------------------------------------------------------
-(define (%cg-emit-rrr cg op rd ra rb)
- (%cg-emit-many cg (list "%" op "(" (%cg-reg->bv rd) ", "
- (%cg-reg->bv ra) ", " (%cg-reg->bv rb) ")\n")))
-
-(define (%cg-emit-cmp cg cc ra rb rd)
- (%cg-emit-many cg (list "%ifelse_" cc "("
- (%cg-reg->bv ra) ", " (%cg-reg->bv rb)
- ", { %li(" (%cg-reg->bv rd) ", 1) }, "
- "{ %li(" (%cg-reg->bv rd) ", 0) })\n")))
-
-(define (cg-binop cg op)
- (let* ((b (cg-pop cg))
- (a (cg-pop cg))
- (ta (opnd-type a))
- (tb (opnd-type b))
- (unsigned? (or (%ctype-unsigned? ta) (%ctype-unsigned? tb)))
- (a-ptr? (%ctype-ptr? ta))
- (b-ptr? (%ctype-ptr? tb))
- (result-ty
- (cond
- ((or (eq? op 'eq) (eq? op 'ne)
- (eq? op 'lt) (eq? op 'le) (eq? op 'gt) (eq? op 'ge))
- %t-i32)
- ((and a-ptr? b-ptr? (eq? op 'sub)) %t-i64)
- (a-ptr? ta)
- (b-ptr? tb)
- (else ta))))
- (cond
- ((and a-ptr? (or (eq? op 'add) (eq? op 'sub)) (not b-ptr?))
- (%cg-load-opnd-into cg a 'a0)
- (%cg-load-opnd-into cg b 'a1)
- (let ((sz (%ctype-size (%ctype-pointee ta))))
- (cond ((> sz 1) (%cg-emit-many cg (list "%li(t0, " (%n sz) ")\n"))
- (%cg-emit-rrr cg "mul" 'a1 'a1 't0))
- (else 0)))
- (%cg-emit-rrr cg (if (eq? op 'add) "add" "sub") 't0 'a0 'a1)
- (%cg-spill-reg cg 't0 result-ty))
- ((and b-ptr? (eq? op 'add) (not a-ptr?))
- (%cg-load-opnd-into cg a 'a0)
- (%cg-load-opnd-into cg b 'a1)
- (let ((sz (%ctype-size (%ctype-pointee tb))))
- (cond ((> sz 1) (%cg-emit-many cg (list "%li(t0, " (%n sz) ")\n"))
- (%cg-emit-rrr cg "mul" 'a0 'a0 't0))
- (else 0)))
- (%cg-emit-rrr cg "add" 't0 'a0 'a1)
- (%cg-spill-reg cg 't0 result-ty))
- ((and a-ptr? b-ptr? (eq? op 'sub))
- (%cg-load-opnd-into cg a 'a0)
- (%cg-load-opnd-into cg b 'a1)
- (%cg-emit-rrr cg "sub" 't0 'a0 'a1)
- (let ((sz (%ctype-size (%ctype-pointee ta))))
- (cond ((> sz 1) (%cg-emit-many cg (list "%li(t1, " (%n sz) ")\n"))
- (%cg-emit-rrr cg "div" 't0 't0 't1))
- (else 0)))
- (%cg-spill-reg cg 't0 result-ty))
- (else
- (%cg-load-opnd-into cg a 'a0)
- (%cg-load-opnd-into cg b 'a1)
- (cond
- ((eq? op 'add) (%cg-emit-rrr cg "add" 't0 'a0 'a1))
- ((eq? op 'sub) (%cg-emit-rrr cg "sub" 't0 'a0 'a1))
- ((eq? op 'mul) (%cg-emit-rrr cg "mul" 't0 'a0 'a1))
- ((eq? op 'and) (%cg-emit-rrr cg "and" 't0 'a0 'a1))
- ((eq? op 'or) (%cg-emit-rrr cg "or" 't0 'a0 'a1))
- ((eq? op 'xor) (%cg-emit-rrr cg "xor" 't0 'a0 'a1))
- ((eq? op 'shl) (%cg-emit-rrr cg "shl" 't0 'a0 'a1))
- ((eq? op 'shr)
- (if unsigned? (%cg-emit-rrr cg "shr" 't0 'a0 'a1)
- (%cg-emit-rrr cg "sar" 't0 'a0 'a1)))
- ((eq? op 'div) (%cg-emit-rrr cg "div" 't0 'a0 'a1))
- ((eq? op 'rem) (%cg-emit-rrr cg "rem" 't0 'a0 'a1))
- ((eq? op 'eq) (%cg-emit-cmp cg "eq" 'a0 'a1 't0))
- ((eq? op 'ne) (%cg-emit-cmp cg "ne" 'a0 'a1 't0))
- ((eq? op 'lt) (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a0 'a1 't0))
- ((eq? op 'gt) (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a1 'a0 't0))
- ((eq? op 'le)
- (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a1 'a0 't0)
- (%cg-emit-many cg (list "%li(t1, 1)\n%xor(t0, t0, t1)\n")))
- ((eq? op 'ge)
- (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a0 'a1 't0)
- (%cg-emit-many cg (list "%li(t1, 1)\n%xor(t0, t0, t1)\n")))
- (else (die #f "cg-binop: unknown op" op)))
- (%cg-spill-reg cg 't0 result-ty)))))
-
-;; Post-increment / post-decrement on the top-of-vstack lval.
-;; Pushes the OLD value (per C semantics) and emits the +1 / -1 store.
-;; Uses cg-dup + cg-load to capture the old rval (which is then in a
-;; never-reused spill slot), then runs the regular dup+load+add+assign
-;; pattern for the store. Pointer scaling falls out of cg-binop add.
-(define (%cg-post-inc-dec cg op)
- (cg-dup cg)
- (cg-load cg)
- (let ((old (cg-pop cg)))
- (cg-dup cg)
- (cg-load cg)
- (cg-push-imm cg %t-i32 1)
- (cg-binop cg op)
- (cg-assign cg)
- (cg-pop cg)
- (cg-push cg old)))
-
-(define (cg-postinc cg) (%cg-post-inc-dec cg 'add))
-(define (cg-postdec cg) (%cg-post-inc-dec cg 'sub))
-
-(define (cg-unop cg op)
- (let* ((p (cg-pop cg)) (ty (opnd-type p)))
- (%cg-load-opnd-into cg p 't0)
- (cond
- ((eq? op 'neg)
- (%cg-emit-many cg (list "%li(t1, 0)\n%sub(t0, t1, t0)\n"))
- (%cg-spill-reg cg 't0 ty))
- ((eq? op 'bnot)
- (%cg-emit-many cg (list "%li(t1, -1)\n%xor(t0, t0, t1)\n"))
- (%cg-spill-reg cg 't0 ty))
- ((eq? op 'lnot)
- (%cg-emit-many cg (list "%ifelse_eqz(t0, { %li(t0, 1) }, { %li(t0, 0) })\n"))
- (%cg-spill-reg cg 't0 %t-i32))
- (else (die #f "cg-unop: unknown op" op)))))
-
-(define (cg-assign cg)
- ;; Pops rhs, pops lhs, casts rhs to lhs's type (parser cannot peek
- ;; deeper than vstack top to do this itself — CC-CONTRACTS §4.2),
- ;; emits the store, pushes the assigned value as the result rval.
- (let* ((rhs0 (cg-pop cg))
- (lhs (cg-pop cg))
- (ty (opnd-type lhs)))
- (cond ((not (opnd-lval? lhs)) (die #f "cg-assign: lhs not lvalue")))
- ;; Cast rhs to lhs's type (no-op when the types already match).
- (cg-push cg rhs0)
- (cg-cast cg ty)
- (let ((rhs (cg-pop cg)))
- (%cg-load-opnd-into cg rhs 'a0)
- (pmatch lhs
- (($ opnd? (kind frame) (ext ,off))
- (guard (%cg-indirect? cg off))
- (%cg-emit-ld-slot cg 't0 off)
- (%cg-emit-st-typed cg 'a0 ty 't0 0))
- (($ opnd? (kind frame) (ext ,off))
- (%cg-emit-st-slot-typed cg 'a0 ty off))
- (($ opnd? (kind global) (ext ,lbl))
- (%cg-emit-la cg 't0 lbl)
- (%cg-emit-st-typed cg 'a0 ty 't0 0))
- (else (die #f "cg-assign: unsupported lhs kind" (opnd-kind lhs))))
- (%cg-spill-reg cg 'a0 ty))))
-
-;; --------------------------------------------------------------------
-;; Calls
-;; --------------------------------------------------------------------
-(define (cg-call cg arity has-result?)
- (let* ((args (let loop ((i 0) (acc '()))
- (cond ((= i arity) acc)
- (else (loop (+ i 1) (cons (cg-pop cg) acc))))))
- (fn-op (cg-pop cg)))
- (let stage ((xs args) (idx 0))
- (cond
- ((null? xs) 0)
- ((< idx 4)
- (%cg-load-opnd-into cg (car xs) (%reg-by-idx idx))
- (stage (cdr xs) (+ idx 1)))
- (else
- (%cg-load-opnd-into cg (car xs) 't0)
- (%cg-emit-st cg 't0 'sp (* 8 (- idx 4)))
- (stage (cdr xs) (+ idx 1)))))
- (cond ((> arity 4) (%cg-bump-outgoing! cg (- arity 4))) (else 0))
- (cond
- ((and (eq? (opnd-kind fn-op) 'global) (not (opnd-lval? fn-op)))
- (%cg-emit-many cg (list "%call(&" (opnd-ext fn-op) ")\n")))
- (else
- (%cg-load-opnd-into cg fn-op 't0)
- (%cg-emit-many cg (list "%callr(t0)\n"))))
- (cond
- (has-result?
- (let* ((fty (opnd-type fn-op))
- (rty (cond
- ((eq? (ctype-kind fty) 'fn) (car (ctype-ext fty)))
- ((eq? (ctype-kind fty) 'ptr)
- (let ((p (ctype-ext fty)))
- (if (eq? (ctype-kind p) 'fn) (car (ctype-ext p)) %t-i64)))
- (else %t-i64))))
- (%cg-spill-reg cg 'a0 rty)))
- (else #f))))
-
-;; --------------------------------------------------------------------
-;; Return
-;; --------------------------------------------------------------------
-(define (cg-return cg)
- (let ((ret-slot (%cg-fn-get cg '%fn-ret-slot))
- (ret-type (%cg-fn-get cg '%fn-ret-type)))
- (cond
- ((eq? (ctype-kind ret-type) 'void)
- (%cg-emit-many cg (list "%b(&::ret)\n")))
- (else
- (let ((p (cg-pop cg)))
- (%cg-load-opnd-into cg p 'a0)
- (%cg-emit-st-slot cg 'a0 ret-slot)
- (%cg-emit-many cg (list "%b(&::ret)\n")))))))
-
-;; --------------------------------------------------------------------
-;; Structured control flow
-;; --------------------------------------------------------------------
-(define (cg-if cg then-thunk)
- (let ((p (cg-pop cg)))
- (%cg-load-opnd-into cg p 't0)
- (%cg-emit-many cg (list "%if_nez(t0, {\n"))
- (then-thunk)
- (%cg-emit-many cg (list "})\n"))))
-
-(define (cg-ifelse cg then-thunk else-thunk)
- (let ((p (cg-pop cg)))
- (%cg-load-opnd-into cg p 't0)
- (%cg-emit-many cg (list "%ifelse_nez(t0, {\n"))
- (then-thunk)
- (%cg-emit-many cg (list "}, {\n"))
- (else-thunk)
- (%cg-emit-many cg (list "})\n"))))
-
-;; Conditionals-as-values: `cg-ifelse` is correct for if-statements
-;; (thunks push nothing) but each thunk for ternary / `&&` / `||` ends
-;; with one rval on top of the vstack — and after both branches run,
-;; we'd be left with TWO opnds, which breaks the type contract for
-;; the surrounding expression. `cg-ifelse-merge` solves that: pop the
-;; cond, allocate one result slot, and after each thunk runs, pop its
-;; rval and store into the slot. Push the slot as one frame rval.
-;; Both branches must push exactly one opnd; the result type is the
-;; type of the first thunk's pushed opnd (parser must arrange for
-;; both branches to push compatible types — either by passing
-;; pre-coerced operands or by injecting a `cg-cast` inside the thunk).
-(define (cg-ifelse-merge cg then-thunk else-thunk)
- (let* ((cond-op (cg-pop cg))
- (slot (cg-alloc-slot cg 8 8)))
- (%cg-load-opnd-into cg cond-op 't0)
- (%cg-emit-many cg (list "%ifelse_nez(t0, {\n"))
- (then-thunk)
- (let* ((p (cg-pop cg))
- (rty (opnd-type p)))
- (%cg-load-opnd-into cg p 'a0)
- (%cg-emit-st-slot cg 'a0 slot)
- (%cg-emit-many cg (list "}, {\n"))
- (else-thunk)
- (let ((q (cg-pop cg)))
- (%cg-load-opnd-into cg q 'a0)
- (%cg-emit-st-slot cg 'a0 slot))
- (%cg-emit-many cg (list "})\n"))
- (cg-push cg (%opnd 'frame rty slot #f)))))
-
-(define (cg-loop cg head-thunk body-thunk)
- ;; body-thunk receives the loop tag as its argument; parser uses
- ;; that tag for cg-break / cg-continue inside the body. CC-CONTRACTS
- ;; §1.9 / §3.3.
- (let ((tag (%cg-fresh-loop-tag cg)))
- (%cg-emit-many cg (list "%loop_tag(" tag ", {\n"))
- (head-thunk)
- (cond
- ((zero? (cg-depth cg)) 0)
- (else
- (let ((c (cg-pop cg)))
- (%cg-load-opnd-into cg c 't0)
- (%cg-emit-many cg (list "%if_eqz(t0, { %break(" tag ") })\n")))))
- (body-thunk tag)
- (%cg-emit-many cg (list "})\n"))
- tag))
-
-(define (cg-break cg tag)
- (%cg-emit-many cg (list "%break(" tag ")\n")))
-
-(define (cg-continue cg tag)
- (%cg-emit-many cg (list "%continue(" tag ")\n")))
-
-;; --------------------------------------------------------------------
-;; Variadic receive (§G.2). Layout: cg-fn-begin/v reserves a 4-slot
-;; saved-register area at known frame offsets; va_start sets ap to the
-;; address of the first slot past the named-arg count; va_arg reads
-;; *ap, advances ap by 8, and pushes the value as the requested type.
-;;
-;; ap is an lval (typically a `va_list` local). cg-va-start pops it,
-;; computes the address, stores into *ap (or the slot directly), and
-;; pushes nothing. cg-va-arg pops ap-lval, loads ap, dereferences for
-;; the value, advances ap, stores back, pushes the loaded value.
-;;
-;; Limitation: only first 4 incoming args (named + variadic) live in
-;; the save area; variadic args at index >= 4 need LDARG and are not
-;; yet supported. See punchlist §G.2 for the gap.
-;; --------------------------------------------------------------------
-(define (%cg-vararg-first-slot cg)
- (let ((s (%cg-fn-get cg '%fn-vararg-first-slot)))
- (cond ((not s) (die #f "cg-va-start: not a variadic function"))
- (else s))))
-
-(define (cg-va-start cg)
- ;; Pop ap-lval. Materialize "&sp + vararg-first-slot" into a0,
- ;; store through ap-lval. Pushes nothing.
- (let* ((ap-lv (cg-pop cg))
- (vsl (%cg-vararg-first-slot cg)))
- (cond ((not (opnd-lval? ap-lv))
- (die #f "cg-va-start: ap not lvalue")))
- ;; Compute address into a0.
- (%cg-emit-many cg (list "%mov(a0, sp)\n"
- "%addi(a0, a0, "
- (%cg-slot-expr cg vsl) ")\n"))
- ;; Store a0 at ap-lval.
- (cond
- ((eq? (opnd-kind ap-lv) 'frame)
- (cond
- ((%cg-indirect? cg (opnd-ext ap-lv))
- (%cg-emit-ld-slot cg 't0 (opnd-ext ap-lv))
- (%cg-emit-st cg 'a0 't0 0))
- (else (%cg-emit-st-slot cg 'a0 (opnd-ext ap-lv)))))
- ((eq? (opnd-kind ap-lv) 'global)
- (%cg-emit-la cg 't0 (opnd-ext ap-lv))
- (%cg-emit-st cg 'a0 't0 0))
- (else (die #f "cg-va-start: bad ap kind" (opnd-kind ap-lv))))))
-
-(define (cg-va-arg cg ctype)
- ;; Pop ap-lval. Load ap into a0. Read 8 bytes at [a0] into a1.
- ;; Advance a0 by 8 and store back through ap-lval. Push a1 as rval
- ;; of type ctype (caller cg-cast's if needed).
- (let ((ap-lv (cg-pop cg)))
- (cond ((not (opnd-lval? ap-lv))
- (die #f "cg-va-arg: ap not lvalue")))
- ;; Load ap into a0.
- (pmatch ap-lv
- (($ opnd? (kind frame) (ext ,off))
- (guard (%cg-indirect? cg off))
- (%cg-emit-ld-slot cg 't0 off)
- (%cg-emit-ld cg 'a0 't0 0))
- (($ opnd? (kind frame) (ext ,off))
- (%cg-emit-ld-slot cg 'a0 off))
- (($ opnd? (kind global) (ext ,lbl))
- (%cg-emit-la cg 't0 lbl)
- (%cg-emit-ld cg 'a0 't0 0))
- (else (die #f "cg-va-arg: bad ap kind" (opnd-kind ap-lv))))
- ;; Load value at [a0] into a1 (full 8 bytes; cg-cast on the rval
- ;; the caller pushes will narrow if needed).
- (%cg-emit-ld cg 'a1 'a0 0)
- ;; Advance ap by 8.
- (%cg-emit-many cg (list "%addi(a0, a0, 8)\n"))
- ;; Store advanced ap back.
- (pmatch ap-lv
- (($ opnd? (kind frame) (ext ,off))
- (guard (%cg-indirect? cg off))
- (%cg-emit-ld-slot cg 't0 off)
- (%cg-emit-st cg 'a0 't0 0))
- (($ opnd? (kind frame) (ext ,off))
- (%cg-emit-st-slot cg 'a0 off))
- (($ opnd? (kind global) (ext ,lbl))
- (%cg-emit-la cg 't0 lbl)
- (%cg-emit-st cg 'a0 't0 0))
- (else 0))
- ;; Spill the loaded value (a1) to a fresh frame slot under ctype.
- (%cg-spill-reg cg 'a1 ctype)))
-
-(define (cg-va-end cg)
- ;; va_end is a no-op in this design. Pop and discard ap-lval.
- (cg-pop cg)
- 0)
-
-;; --------------------------------------------------------------------
-;; Labels and unconditional goto (§F.4 / CC-CONTRACTS §5.3).
-;; user_<name> namespace keeps the user's label space disjoint from
-;; the compiler-internal ::ret and ::lbl_<n>. Labels resolve through
-;; libp1pp's %scope mechanism, so forward references inside the same
-;; %fn block work without explicit forward declaration.
-;; --------------------------------------------------------------------
-(define (cg-emit-label cg name-bv)
- (%cg-emit-many cg (list "::user_" name-bv "\n")))
-
-(define (cg-goto cg name-bv)
- (%cg-emit-many cg (list "%b(&::user_" name-bv ")\n")))
-
-;; --------------------------------------------------------------------
-;; switch
-;; --------------------------------------------------------------------
-(define-record-type swctx
- (%swctx ctrl-slot end-tag default-lbl)
- swctx?
- (ctrl-slot swctx-ctrl-slot)
- (end-tag swctx-end-tag)
- (default-lbl swctx-default-lbl swctx-default-lbl-set!))
-
-(define (cg-switch-begin cg)
- (let* ((p (cg-pop cg))
- (off (cg-alloc-slot cg 8 8))
- (tag (%cg-fresh-loop-tag cg))
- (disp-lbl (bytevector-append "sw_disp_" tag)))
- (%cg-load-opnd-into cg p 't0)
- (%cg-emit-st-slot cg 't0 off)
- (%cg-emit-many cg (list "%loop_tag(" tag ", {\n"
- "%b(&::" disp-lbl ")\n"))
- (%swctx off tag #f)))
-
-(define (cg-switch-case cg sw const-int)
- (let* ((lbl (%cg-fresh-lbl cg))
- (key (string->symbol
- (bytevector-append "%sw_cases__" (swctx-end-tag sw))))
- (cur (or (%cg-fn-get cg key) '()))
- (entry (cons const-int lbl)))
- (%cg-fn-set! cg key (cons entry cur))
- (%cg-emit-many cg (list "::" lbl "\n"))))
-
-(define (cg-switch-default cg sw)
- (let ((lbl (%cg-fresh-lbl cg)))
- (swctx-default-lbl-set! sw lbl)
- (%cg-emit-many cg (list "::" lbl "\n"))))
-
-(define (cg-switch-end cg sw)
- (let* ((tag (swctx-end-tag sw))
- (key (string->symbol (bytevector-append "%sw_cases__" tag)))
- (cases (reverse (or (%cg-fn-get cg key) '())))
- (default-lbl (swctx-default-lbl sw))
- (disp-lbl (bytevector-append "sw_disp_" tag)))
- (%cg-emit-many cg (list "%break(" tag ")\n"
- "::" disp-lbl "\n"))
- (%cg-emit-many cg (list "%ld(t0, sp, "
- (%cg-slot-expr cg (swctx-ctrl-slot sw)) ")\n"))
- (for-each
- (lambda (c)
- (%cg-emit-many cg (list "%li(t1, " (%n (car c)) ")\n"
- "%beq(t0, t1, &::" (cdr c) ")\n")))
- cases)
- (cond
- (default-lbl (%cg-emit-many cg (list "%b(&::" default-lbl ")\n")))
- (else 0))
- (%cg-emit-many cg (list "%break(" tag ")\n"
- "})\n"))))
-
-;; --------------------------------------------------------------------
-;; Globals and data
-;; --------------------------------------------------------------------
-;; cg-emit-global: emit a global symbol into either .data (initialized)
-;; or .bss (zero-init).
-;;
-;; init can be:
-;; #f — zero-init in .bss (size from sym's ctype).
-;; (piece ...) — initialized in .data; pieces concatenated.
-;;
-;; Each piece is either:
-;; <bytevector> — raw bytes; emitted as N×!(byte) entries.
-;; (label-ref . <label-bv>) — 8-byte pointer slot containing &label;
-;; emitted as `&<label> %(0)` (4B label ref +
-;; 4B zero pad).
-(define (%cg-init-piece->bv piece)
- (cond
- ((bytevector? piece)
- (let ((n (bytevector-length piece)))
- (let loop ((i 0) (acc '()))
- (cond
- ((= i n) (bv-cat (reverse acc)))
- (else
- (loop (+ i 1)
- (cons (bv-cat (list "!("
- (number->string
- (bytevector-u8-ref piece i) 10)
- ")\n"))
- acc)))))))
- ((and (pair? piece) (eq? (car piece) 'label-ref))
- (bv-cat (list "&" (cdr piece) " %(0)\n")))
- (else (die #f "cg-emit-global: bad init piece" piece))))
-
-(define (cg-emit-global cg sym init)
- (let* ((nm (sym-name sym))
- (lbl (%cg-mangle-global nm))
- (sz (ctype-size (sym-type sym)))
- (size (if (< sz 0) 8 sz)))
- (cond
- (init
- (buf-push! (cg-data cg) (bv-cat (list "\n:" lbl "\n")))
- (let walk ((ps init))
- (cond
- ((null? ps) 0)
- (else
- (buf-push! (cg-data cg) (%cg-init-piece->bv (car ps)))
- (walk (cdr ps))))))
- (else
- (buf-push! (cg-bss cg)
- (bv-cat (list "\n:" lbl "\n"
- (let zero-loop ((rem size) (acc '()))
- (cond
- ((<= rem 0) (bv-cat (reverse acc)))
- ((>= rem 8)
- (zero-loop (- rem 8) (cons "$(0)\n" acc)))
- (else
- (zero-loop (- rem 1) (cons "!(0)\n" acc))))))))))
- (cg-globals-set! cg (alist-set (sym-name sym) sym (cg-globals cg)))
- 0))
-
-(define (cg-emit-extern cg sym)
- (cg-globals-set! cg (alist-set (sym-name sym) sym (cg-globals cg)))
- 0)
-
-(define (cg-intern-string cg bv-content)
- (let ((p (alist-ref bv-content (cg-str-pool cg))))
- (cond
- (p p)
- (else
- (let* ((n (length (cg-str-pool cg)))
- (lbl (bytevector-append "cc__str_" (%n n))))
- (cg-str-pool-set! cg
- (alist-set bv-content lbl (cg-str-pool cg)))
- (buf-push! (cg-data cg)
- (bv-cat (list "\n:" lbl "\n"
- "\"" bv-content "\"\n"
- "!(0)\n")))
- lbl)))))
-
-;; --------------------------------------------------------------------
-;; Frame
-;; --------------------------------------------------------------------
-(define (cg-alloc-slot cg bytes align)
- (let* ((aligned (align-up (cg-frame-hi cg) align))
- (new-hi (+ aligned bytes)))
- (cg-frame-hi-set! cg new-hi)
- aligned))
diff --git a/cc/data.scm b/cc/data.scm
@@ -1,239 +0,0 @@
-;; cc/data.scm — record types and symbol alphabets shared across modules.
-;;
-;; Concrete realization of:
-;; docs/CC-INTERNALS.md §data.scm
-;; docs/CC-CONTRACTS.md §1
-;;
-;; Adding a record or alphabet symbol requires updating the contract
-;; doc first.
-
-;; --------------------------------------------------------------------
-;; loc — source location for diagnostics
-;; --------------------------------------------------------------------
-(define-record-type loc
- (%loc file line col)
- loc?
- (file loc-file) ; bv
- (line loc-line) ; fixnum
- (col loc-col)) ; fixnum
-
-;; --------------------------------------------------------------------
-;; tok — lexer token. See CC-CONTRACTS §1.1 for kind set, §1.2 for
-;; PUNCT value symbols, §1.3 for KW value symbols.
-;; --------------------------------------------------------------------
-(define-record-type tok
- (%tok kind value loc hide)
- tok?
- (kind tok-kind) ; symbol from §1.1
- (value tok-value) ; bv | fixnum | symbol | #f
- (loc tok-loc) ; loc
- (hide tok-hide)) ; list of bv (macro names already expanded)
-
-(define (make-tok kind value loc)
- (%tok kind value loc '()))
-
-;; --------------------------------------------------------------------
-;; macro — preprocessor macro definition
-;; --------------------------------------------------------------------
-(define-record-type macro
- (%macro kind params body)
- macro?
- (kind macro-kind) ; 'obj | 'fn | 'fn-vararg
- (params macro-params) ; list of bv
- (body macro-body)) ; list of tok
-
-;; --------------------------------------------------------------------
-;; ctype — C type. See CC-CONTRACTS §1.4 for kind set, and
-;; CC-INTERNALS §data.scm for the ext payload table.
-;;
-;; Fields that mutate over a ctype's lifetime:
-;; size and align — set to -1/-1 on forward struct/union decl,
-;; fixed when the type is completed.
-;; ext — same; struct/union ext changes shape when
-;; the body is parsed.
-;; --------------------------------------------------------------------
-(define-record-type ctype
- (%ctype kind size align ext)
- ctype?
- (kind ctype-kind)
- (size ctype-size ctype-size-set!)
- (align ctype-align ctype-align-set!)
- (ext ctype-ext ctype-ext-set!))
-
-;; Interned primitive ctypes (CC-CONTRACTS §1.4). Equality is eq?.
-(define %t-void (%ctype 'void -1 -1 #f))
-(define %t-i8 (%ctype 'i8 1 1 #f))
-(define %t-u8 (%ctype 'u8 1 1 #f))
-(define %t-i16 (%ctype 'i16 2 2 #f))
-(define %t-u16 (%ctype 'u16 2 2 #f))
-(define %t-i32 (%ctype 'i32 4 4 #f))
-(define %t-u32 (%ctype 'u32 4 4 #f))
-(define %t-i64 (%ctype 'i64 8 8 #f))
-(define %t-u64 (%ctype 'u64 8 8 #f))
-(define %t-bool (%ctype 'bool 1 1 #f))
-
-;; --------------------------------------------------------------------
-;; sym — declared identifier (function, variable, typedef, …)
-;; See CC-CONTRACTS §1.7 (kind), §1.8 (storage).
-;; --------------------------------------------------------------------
-(define-record-type sym
- (%sym name kind storage type slot)
- sym?
- (name sym-name) ; bv
- (kind sym-kind) ; symbol from §1.7
- (storage sym-storage) ; symbol from §1.8 or #f
- (type sym-type) ; ctype
- (slot sym-slot)) ; fixnum | bv | #f, per kind
-
-;; --------------------------------------------------------------------
-;; opnd — operand on cg's vstack. See CC-CONTRACTS §1.5 (kind),
-;; §1.10 (reg names).
-;; --------------------------------------------------------------------
-(define-record-type opnd
- (%opnd kind type ext lval?)
- opnd?
- (kind opnd-kind)
- (type opnd-type)
- (ext opnd-ext)
- (lval? opnd-lval?))
-
-;; --------------------------------------------------------------------
-;; loop-ctx — entry on parser's loop/switch context stack.
-;; See CC-CONTRACTS §1.9.
-;; --------------------------------------------------------------------
-(define-record-type loop-ctx
- (%loop-ctx kind tag has-continue?)
- loop-ctx?
- (kind loop-ctx-kind)
- (tag loop-ctx-tag)
- (has-continue? loop-ctx-has-continue?))
-
-;; --------------------------------------------------------------------
-;; fn-ctx — current-function context inside the parser.
-;; --------------------------------------------------------------------
-(define-record-type fn-ctx
- (%fn-ctx name return-type params variadic? labels)
- fn-ctx?
- (name fn-ctx-name)
- (return-type fn-ctx-return-type)
- (params fn-ctx-params)
- (variadic? fn-ctx-variadic?)
- (labels fn-ctx-labels fn-ctx-labels-set!))
-
-;; --------------------------------------------------------------------
-;; pstate — parser state. Owned by parse.scm; read-only to cg.
-;; --------------------------------------------------------------------
-(define-record-type pstate
- (%pstate toks scope tags loops fn-ctx typedefs cg)
- pstate?
- (toks ps-toks ps-toks-set!)
- (scope ps-scope ps-scope-set!)
- (tags ps-tags ps-tags-set!)
- (loops ps-loops ps-loops-set!)
- (fn-ctx ps-fn-ctx ps-fn-ctx-set!)
- (typedefs ps-typedefs ps-typedefs-set!)
- (cg ps-cg))
-
-;; --------------------------------------------------------------------
-;; cg — codegen state. Owned by cg.scm.
-;; --------------------------------------------------------------------
-;; fn-buf and prologue-buf are pre-allocated (cg-init) and reused across
-;; functions — cg-fn-begin/v calls buf-reset! on them, cg-fn-end drains
-;; them into cg-text via buf-drain!. No per-fn allocation, which is what
-;; lets parse-fn-body wrap the body in heap-mark/heap-rewind! safely:
-;; the destination buf storage lives pre-mark, byte writes are stable
-;; across rewind, and the parse/cg scratch dies cleanly.
-;;
-;; in-fn? discriminates "currently inside a function body" so
-;; %cg-emit-buf can route emits to fn-buf during the body and cg-text
-;; outside it (entry stub, etc.).
-;; cg-globals: user-visible globals only (cg-emit-global / cg-emit-extern).
-;; Stable except when user code adds a global — which is exactly what the
-;; parse-fn-body rewind-safety check probes.
-;;
-;; cg-fn-meta: transient per-function state (fn-name, ret-slot, ret-type,
-;; vararg-first-slot, indirect-slots, switch-case lists, ...). Reset on
-;; cg-fn-begin/v; reads via %cg-fn-get / writes via %cg-fn-set!. Kept
-;; out of cg-globals so rewind-safety checks on cg-globals aren't
-;; tripped by every fn-begin.
-(define-record-type cg
- (%cg text data bss vstack frame-hi label-ctr str-pool globals fn-meta fn-buf prologue-buf max-outgoing in-fn?)
- cg?
- (text cg-text)
- (data cg-data)
- (bss cg-bss)
- (vstack cg-vstack cg-vstack-set!)
- (frame-hi cg-frame-hi cg-frame-hi-set!)
- (label-ctr cg-label-ctr cg-label-ctr-set!)
- (str-pool cg-str-pool cg-str-pool-set!)
- (globals cg-globals cg-globals-set!)
- (fn-meta cg-fn-meta cg-fn-meta-set!)
- (fn-buf cg-fn-buf)
- (prologue-buf cg-prologue-buf)
- (max-outgoing cg-max-outgoing cg-max-outgoing-set!)
- (in-fn? cg-in-fn? cg-in-fn?-set!))
-
-;; --------------------------------------------------------------------
-;; Symbol alphabets — canonical alists. See CC-CONTRACTS §1.
-;; --------------------------------------------------------------------
-
-;; CC-CONTRACTS §1.3 — keyword bytevector → keyword symbol.
-(define %keyword-alist
- '(;; storage
- ("auto" . auto) ("register" . register) ("static" . static)
- ("extern" . extern) ("typedef" . typedef)
- ;; qualifiers (parsed and discarded by parse)
- ("const" . const) ("volatile" . volatile) ("restrict" . restrict)
- ("inline" . inline)
- ;; type specifiers
- ("void" . void) ("char" . char) ("short" . short)
- ("int" . int) ("long" . long)
- ("signed" . signed) ("unsigned" . unsigned) ("_Bool" . _Bool)
- ;; rejected type specifiers (KW so diagnostics are crisp)
- ("float" . float) ("double" . double)
- ;; aggregates
- ("struct" . struct) ("union" . union) ("enum" . enum)
- ;; statements
- ("if" . if) ("else" . else)
- ("while" . while) ("do" . do) ("for" . for)
- ("switch" . switch) ("case" . case) ("default" . default)
- ("break" . break) ("continue" . continue)
- ("return" . return) ("goto" . goto)
- ;; operators
- ("sizeof" . sizeof)
- ;; reserved-and-rejected (KW so diagnostics are crisp)
- ("_Generic" . _Generic) ("_Atomic" . _Atomic)
- ("_Thread_local" . _Thread_local)
- ("_Alignof" . _Alignof) ("_Alignas" . _Alignas)
- ("_Static_assert" . _Static_assert)
- ("_Complex" . _Complex) ("_Imaginary" . _Imaginary)))
-
-;; CC-CONTRACTS §1.2 — punctuator bytevector → punct symbol.
-;; Listed longest-match-first; the lexer scans this list in order.
-;; Digraphs (<: :> <% %> %: %:%:) lex to their standard equivalents.
-(define %punct-alist
- '(;; 4-byte
- ("%:%:" . paste)
- ;; 3-byte
- ("..." . ellipsis) ("<<=" . shl-eq) (">>=" . shr-eq)
- ;; 2-byte
- ("##" . paste) ("->" . arrow)
- ("++" . inc) ("--" . dec)
- ("<<" . shl) (">>" . shr)
- ("<=" . le) (">=" . ge) ("==" . eq2) ("!=" . ne)
- ("&&" . land) ("||" . lor)
- ("+=" . plus-eq) ("-=" . minus-eq) ("*=" . star-eq)
- ("/=" . slash-eq) ("%=" . pct-eq)
- ("&=" . amp-eq) ("^=" . caret-eq) ("|=" . bar-eq)
- ;; digraphs (mapped to the standard equivalent symbol)
- ("<:" . lbrack) (":>" . rbrack)
- ("<%" . lbrace) ("%>" . rbrace) ("%:" . hash)
- ;; 1-byte
- ("[" . lbrack) ("]" . rbrack)
- ("(" . lparen) (")" . rparen)
- ("{" . lbrace) ("}" . rbrace)
- ("." . dot) ("," . comma) (";" . semi) (":" . colon) ("?" . qmark)
- ("+" . plus) ("-" . minus) ("*" . star) ("/" . slash) ("%" . pct)
- ("&" . amp) ("|" . bar) ("^" . caret) ("~" . tilde) ("!" . bang)
- ("<" . lt) (">" . gt) ("=" . assign)
- ("#" . hash)))
diff --git a/cc/lex.scm b/cc/lex.scm
@@ -1,900 +0,0 @@
-;; cc/lex.scm — bytestream → token list. Pure function; no I/O,
-;; no macro awareness.
-;;
-;; Realization of docs/CC-INTERNALS.md §lex.scm. Symbol alphabets
-;; (KW, PUNCT, tok-kind) live in cc/data.scm; do not duplicate.
-;;
-;; Owner: <unassigned>
-;;
-;; Implementation notes:
-;;
-;; - The lexer walks `src` byte-by-byte, threading (pos, line, col)
-;; explicitly through every helper (no mutable state). Each token
-;; captures its starting loc; helpers return (tok npos nline ncol).
-;; - Trigraphs and `\<newline>` line splicing are handled via a single
-;; logical-byte primitive `%lex-peek`: it advances over splices and
-;; translates trigraphs in-place, so downstream code only ever sees
-;; the "translation phase 2" stream.
-;; - Comments are stripped at the same level as whitespace.
-;; - NL tokens are emitted at every physical newline so pp can use
-;; them to terminate directives.
-;;
-;; Heap discipline (per tests/scheme1/93-heap-mark-rewind.scm):
-;;
-;; - Token-producing helpers wrap their inner work in a heap-mark /
-;; heap-rewind! arena. The slots that must survive the rewind
-;; (start-loc and the integer holders for npos/nline/ncol) are bound
-;; *before* the (set! mark (heap-mark)) so the let's env extensions
-;; live below the mark. The byte-run scanners' tail-call env frames
-;; and any %lex-peek 4-lists are above the mark and get reclaimed.
-;; For helpers that produce a fresh bytevector (ident, string), the
-;; bv is allocated post-rewind so it persists into the parent arena.
-;; - Numeric digit runs accumulate their value inline via
-;; %accum-int-while; they no longer materialize a per-byte cons list
-;; and then a separate %digits-value walk.
-
-;; --------------------------------------------------------------------
-;; Byte-class predicates (raw u8 values, not chars).
-;; --------------------------------------------------------------------
-(define (%digit? b) (if (< b 48) #f (if (< 57 b) #f #t))) ; '0'..'9'
-(define (%hex? b)
- (cond ((%digit? b) #t)
- ((if (< b 65) #f (if (< 70 b) #f #t)) #t) ; 'A'..'F'
- ((if (< b 97) #f (if (< 102 b) #f #t)) #t) ; 'a'..'f'
- (else #f)))
-(define (%octal? b) (if (< b 48) #f (if (< 55 b) #f #t))) ; '0'..'7'
-(define (%alpha? b)
- (cond ((if (< b 65) #f (if (< 90 b) #f #t)) #t) ; 'A'..'Z'
- ((if (< b 97) #f (if (< 122 b) #f #t)) #t) ; 'a'..'z'
- (else #f)))
-(define (%ident-start? b) (or (%alpha? b) (= b 95))) ; '_'
-(define (%ident-cont? b) (or (%ident-start? b) (%digit? b)))
-(define (%hspace? b) (or (= b 32) (= b 9) (= b 11) (= b 12))) ; SP TAB VT FF
-(define (%newline? b) (= b 10)) ; '\n'
-
-;; --------------------------------------------------------------------
-;; Logical byte access. %lex-peek returns
-;; (byte npos nline ncol)
-;; where (npos, nline, ncol) points *just past* the consumed physical
-;; bytes. On EOF it returns (#f pos line col).
-;;
-;; Two transformations folded in here:
-;;
-;; - Trigraphs: ??= ??( ??/ ??) ??' ??< ??! ??> ??-
-;; # [ \ ] ^ { | } ~
-;; The pair `??` followed by one of the nine trigraph completers
-;; produces the translated byte and advances 3 source bytes.
-;; - Line splice: a backslash immediately followed by `\n` is removed
-;; as a unit (incrementing line, resetting col to 1) and we recurse
-;; to fetch the next logical byte.
-;;
-;; Other escapes (e.g. `\<not-newline>`) are returned as-is — string and
-;; char literals do their own escape-handling.
-;; --------------------------------------------------------------------
-(define (%trigraph-byte b)
- ;; Map the third trigraph byte to its replacement, or #f.
- (cond ((= b 61) 35) ; '=' -> '#'
- ((= b 40) 91) ; '(' -> '['
- ((= b 47) 92) ; '/' -> '\\'
- ((= b 41) 93) ; ')' -> ']'
- ((= b 39) 94) ; '\'' -> '^'
- ((= b 60) 123) ; '<' -> '{'
- ((= b 33) 124) ; '!' -> '|'
- ((= b 62) 125) ; '>' -> '}'
- ((= b 45) 126) ; '-' -> '~'
- (else #f)))
-
-(define (%lex-peek src pos line col)
- (let ((n (bytevector-length src)))
- (cond
- ((>= pos n) (list #f pos line col))
- (else
- (let ((b (bytevector-u8-ref src pos)))
- (cond
- ;; Trigraph: ?? + completer
- ((and (= b 63)
- (< (+ pos 2) n)
- (= (bytevector-u8-ref src (+ pos 1)) 63))
- (let ((tr (%trigraph-byte (bytevector-u8-ref src (+ pos 2)))))
- (if tr
- (list tr (+ pos 3) line (+ col 3))
- (list b (+ pos 1) line (+ col 1)))))
- ;; Line splice: backslash + newline (consume both, no token)
- ((and (= b 92)
- (< (+ pos 1) n)
- (= (bytevector-u8-ref src (+ pos 1)) 10))
- (%lex-peek src (+ pos 2) (+ line 1) 1))
- ;; Newline: pass through but caller decides line/col bump
- ((%newline? b)
- (list b (+ pos 1) (+ line 1) 1))
- (else
- (list b (+ pos 1) line (+ col 1)))))))))
-
-;; Convenience accessors over the 4-list.
-(define (%pk-byte p) (car p))
-(define (%pk-pos p) (car (cdr p)))
-(define (%pk-line p) (car (cdr (cdr p))))
-(define (%pk-col p) (car (cdr (cdr (cdr p)))))
-
-;; Fast-byte test. When (%fast-byte? b) is #t, reading b directly with
-;; bytevector-u8-ref is exactly equivalent to %lex-peek's result: the
-;; logical byte is b, npos = pos+1, nline unchanged, ncol = col+1, and
-;; no list allocation is needed. Excludes the three bytes that %lex-peek
-;; can transform: '?' (trigraph), '\\' (line splice), '\n' (line bump).
-(define (%fast-byte? b)
- (cond ((= b 63) #f)
- ((= b 92) #f)
- ((= b 10) #f)
- (else #t)))
-
-;; --------------------------------------------------------------------
-;; Whitespace + comment skipper. Returns (pos line col).
-;; Handles spaces/tabs, // line comments, /* block */ comments. Does
-;; *not* consume `\n` — newlines are tokens.
-;; --------------------------------------------------------------------
-(define (%skip-ws-and-comments src pos line col file)
- (let ((n (bytevector-length src)))
- (cond
- ((>= pos n) (list pos line col))
- (else
- (let ((b (bytevector-u8-ref src pos)))
- (cond
- ((and (%fast-byte? b) (%hspace? b))
- (%skip-ws-and-comments src (+ pos 1) line (+ col 1) file))
- ((%fast-byte? b)
- ;; Fast-byte that isn't hspace. Only '/' is interesting;
- ;; everything else terminates the skip.
- (cond
- ((= b 47) (%maybe-comment src pos line col file))
- (else (list pos line col))))
- (else
- ;; Slow path: trigraph / splice / newline.
- (let* ((p (%lex-peek src pos line col))
- (b2 (%pk-byte p)))
- (cond
- ((not b2) (list pos line col))
- ((%hspace? b2)
- (%skip-ws-and-comments src (%pk-pos p) (%pk-line p) (%pk-col p)
- file))
- ((= b2 47) (%maybe-comment src pos line col file))
- (else (list pos line col)))))))))))
-
-(define (%maybe-comment src pos line col file)
- ;; Source byte at pos resolves to '/'. Decide between // line comment,
- ;; /* block comment, or "leave the slash alone" (it's a punctuator).
- (let* ((p (%lex-peek src pos line col))
- (q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
- (b2 (%pk-byte q)))
- (cond
- ((and b2 (= b2 47))
- (%skip-line-comment src (%pk-pos q) (%pk-line q) (%pk-col q) file))
- ((and b2 (= b2 42))
- (%skip-block-comment src (%pk-pos q) (%pk-line q) (%pk-col q)
- file line col))
- (else (list pos line col)))))
-
-(define (%skip-line-comment src pos line col file)
- ;; Consume bytes until end-of-stream or until we *see* '\n' (do not
- ;; consume the newline itself; outer loop emits the NL).
- (let ((n (bytevector-length src)))
- (cond
- ((>= pos n) (%skip-ws-and-comments src pos line col file))
- (else
- (let ((b (bytevector-u8-ref src pos)))
- (cond
- ;; '\n' terminates without consuming.
- ((= b 10) (%skip-ws-and-comments src pos line col file))
- ((%fast-byte? b)
- (%skip-line-comment src (+ pos 1) line (+ col 1) file))
- (else
- ;; Slow path: ?/\ — let %lex-peek handle trigraph/splice.
- (let* ((p (%lex-peek src pos line col))
- (b2 (%pk-byte p)))
- (cond
- ((not b2) (%skip-ws-and-comments src pos line col file))
- ((%newline? b2) (%skip-ws-and-comments src pos line col file))
- (else
- (%skip-line-comment src (%pk-pos p) (%pk-line p) (%pk-col p)
- file)))))))))))
-
-(define (%skip-block-comment src pos line col file start-line start-col)
- (let ((n (bytevector-length src)))
- (cond
- ((>= pos n)
- (die (%loc file start-line start-col)
- "unterminated /* block comment"))
- (else
- (let ((b (bytevector-u8-ref src pos)))
- (cond
- ;; Fast path for plain content bytes that aren't '*'.
- ((and (%fast-byte? b) (not (= b 42)))
- (%skip-block-comment src (+ pos 1) line (+ col 1)
- file start-line start-col))
- (else
- ;; Slow path: '*', '\n', '?' (trigraph), '\\' (splice).
- (let* ((p (%lex-peek src pos line col))
- (b1 (%pk-byte p)))
- (cond
- ((not b1)
- (die (%loc file start-line start-col)
- "unterminated /* block comment"))
- ((= b1 42)
- (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
- (b2 (%pk-byte q)))
- (cond
- ((not b2)
- (die (%loc file start-line start-col)
- "unterminated /* block comment"))
- ((= b2 47)
- (%skip-ws-and-comments src (%pk-pos q) (%pk-line q) (%pk-col q)
- file))
- (else
- ;; Re-scan starting at the byte after '*'; the '*' was
- ;; not the closer, but the next byte might itself be '*'.
- (%skip-block-comment src (%pk-pos p) (%pk-line p) (%pk-col p)
- file start-line start-col)))))
- (else
- (%skip-block-comment src (%pk-pos p) (%pk-line p) (%pk-col p)
- file start-line start-col)))))))))))
-
-;; --------------------------------------------------------------------
-;; Byte-run scanners.
-;;
-;; Tail-recursive walkers used by ident/number/string readers. None
-;; allocate per scanned byte on the fast path (only %lex-peek 4-lists
-;; on trigraph/splice/newline); the per-iteration env frames allocated
-;; by tail recursion are reclaimed by the caller's heap-rewind!.
-;;
-;; - %scan-while: count bytes that satisfy pred. (count npos nline ncol)
-;; - %fill-while-bv: write matching bytes into a pre-sized bv.
-;; - %accum-int-while: accumulate a base-N integer over digit bytes.
-;; (val count npos nline ncol)
-;; - %accum-octal-bounded: same, but stops after k digits.
-;; --------------------------------------------------------------------
-(define (%scan-while pred src pos line col)
- (let ((n (bytevector-length src)))
- (let loop ((pos pos) (line line) (col col) (cnt 0))
- (cond
- ((>= pos n) (list cnt pos line col))
- (else
- (let ((b (bytevector-u8-ref src pos)))
- (cond
- ((%fast-byte? b)
- (if (pred b)
- (loop (+ pos 1) line (+ col 1) (+ cnt 1))
- (list cnt pos line col)))
- (else
- (let* ((p (%lex-peek src pos line col))
- (b2 (%pk-byte p)))
- (if (and b2 (pred b2))
- (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ cnt 1))
- (list cnt pos line col)))))))))))
-
-(define (%fill-while-bv pred src pos line col bv idx)
- (let ((n (bytevector-length src)))
- (let loop ((pos pos) (line line) (col col) (idx idx))
- (cond
- ((>= pos n) idx)
- (else
- (let ((b (bytevector-u8-ref src pos)))
- (cond
- ((%fast-byte? b)
- (cond
- ((pred b)
- (bytevector-u8-set! bv idx b)
- (loop (+ pos 1) line (+ col 1) (+ idx 1)))
- (else idx)))
- (else
- (let* ((p (%lex-peek src pos line col))
- (b2 (%pk-byte p)))
- (cond
- ((and b2 (pred b2))
- (bytevector-u8-set! bv idx b2)
- (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1)))
- (else idx)))))))))))
-
-(define (%digit-val-byte b)
- ;; ASCII digit byte → integer value. Caller guarantees b is a valid
- ;; digit in the relevant base (0-9 / 0-7 / 0-9a-fA-F).
- (cond ((%digit? b) (- b 48))
- ((if (< b 65) #f (if (< 70 b) #f #t)) (+ (- b 65) 10))
- ((if (< b 97) #f (if (< 102 b) #f #t)) (+ (- b 97) 10))
- (else 0)))
-
-(define (%accum-int-while pred src pos line col base)
- (let ((n (bytevector-length src)))
- (let loop ((pos pos) (line line) (col col) (val 0) (cnt 0))
- (cond
- ((>= pos n) (list val cnt pos line col))
- (else
- (let ((b (bytevector-u8-ref src pos)))
- (cond
- ((%fast-byte? b)
- (if (pred b)
- (loop (+ pos 1) line (+ col 1)
- (+ (* val base) (%digit-val-byte b)) (+ cnt 1))
- (list val cnt pos line col)))
- (else
- (let* ((p (%lex-peek src pos line col))
- (b2 (%pk-byte p)))
- (if (and b2 (pred b2))
- (loop (%pk-pos p) (%pk-line p) (%pk-col p)
- (+ (* val base) (%digit-val-byte b2)) (+ cnt 1))
- (list val cnt pos line col)))))))))))
-
-(define (%accum-octal-bounded src pos line col k)
- ;; Up to k octal digits. Returns (val count npos nline ncol).
- (let ((n (bytevector-length src)))
- (let loop ((pos pos) (line line) (col col) (k k) (val 0) (cnt 0))
- (cond
- ((zero? k) (list val cnt pos line col))
- ((>= pos n) (list val cnt pos line col))
- (else
- (let ((b (bytevector-u8-ref src pos)))
- (cond
- ((%fast-byte? b)
- (if (%octal? b)
- (loop (+ pos 1) line (+ col 1) (- k 1)
- (+ (* val 8) (- b 48)) (+ cnt 1))
- (list val cnt pos line col)))
- (else
- (let* ((p (%lex-peek src pos line col))
- (b2 (%pk-byte p)))
- (if (and b2 (%octal? b2))
- (loop (%pk-pos p) (%pk-line p) (%pk-col p) (- k 1)
- (+ (* val 8) (- b2 48)) (+ cnt 1))
- (list val cnt pos line col)))))))))))
-
-;; --------------------------------------------------------------------
-;; Identifier / keyword reader.
-;;
-;; Returns (tok npos nline ncol). Caller has already verified that the
-;; first byte at `pos` satisfies %ident-start?.
-;;
-;; Two-pass with heap-mark/rewind: pass 1 (%scan-while) sizes the run,
-;; then we rewind, allocate `name` bv post-rewind so it survives, then
-;; pass 2 (%fill-while-bv) writes into it under a fresh mark. The
-;; integer slots count/npos/nline/ncol are bound *before* the mark so
-;; they survive both rewinds.
-;; --------------------------------------------------------------------
-(define (lex-read-ident src pos file)
- ;; Public for tests. Threads line/col from a fresh start.
- (%lex-read-ident src pos 1 (+ pos 1) file))
-
-(define (%lex-read-ident src pos line col file)
- (let ((start-loc (%loc file line col))
- (count 0) (npos 0) (nline 0) (ncol 0)
- (mark 0))
- (set! mark (heap-mark))
- (let ((sres (%scan-while %ident-cont? src pos line col)))
- (set! count (car sres))
- (set! npos (car (cdr sres)))
- (set! nline (car (cdr (cdr sres))))
- (set! ncol (car (cdr (cdr (cdr sres))))))
- (heap-rewind! mark)
- (let ((name (make-bytevector count 0))
- (mark2 0))
- (set! mark2 (heap-mark))
- (%fill-while-bv %ident-cont? src pos line col name 0)
- (heap-rewind! mark2)
- (let ((kw (alist-ref name %keyword-alist)))
- (cons (if kw
- (make-tok 'KW kw start-loc)
- (make-tok 'IDENT name start-loc))
- (list npos nline ncol))))))
-
-;; --------------------------------------------------------------------
-;; Number reader.
-;;
-;; Decimal: [1-9][0-9]* (suffix: u U l L ll LL combinations)
-;; Hex: 0x[0-9a-fA-F]+ | 0X...
-;; Octal: 0[0-7]*
-;; Float: anything looking like 1.0, 1e3, .5 → die crisply.
-;;
-;; Returns (tok npos nline ncol) on success. Aborts via `die` on float.
-;;
-;; %accum-int-while folds digit collection and value computation into
-;; one walk — no per-byte cons cells, no separate digits-list pass.
-;; --------------------------------------------------------------------
-(define (lex-read-number src pos file)
- (%lex-read-number src pos 1 (+ pos 1) file))
-
-(define (%lex-read-number src pos line col file)
- (let* ((start-loc (%loc file line col))
- (p (%lex-peek src pos line col))
- (b (%pk-byte p)))
- (cond
- ;; '0x' / '0X' hex prefix
- ((and (= b 48)
- (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
- (b2 (%pk-byte q)))
- (and b2 (or (= b2 120) (= b2 88))))) ; 'x' or 'X'
- (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
- (r (%accum-int-while %hex? src
- (%pk-pos q) (%pk-line q) (%pk-col q) 16))
- (val (car r))
- (cnt (car (cdr r)))
- (pos2 (car (cdr (cdr r))))
- (line2 (car (cdr (cdr (cdr r)))))
- (col2 (car (cdr (cdr (cdr (cdr r)))))))
- (if (zero? cnt)
- (die start-loc "expected hex digits after 0x")
- (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file)))
- (cons (make-tok 'INT val start-loc) after)))))
- ;; '0' alone → octal sequence (could be just zero)
- ((= b 48)
- (let* ((r (%accum-int-while %octal? src
- (%pk-pos p) (%pk-line p) (%pk-col p) 8))
- (val (car r))
- (pos2 (car (cdr (cdr r))))
- (line2 (car (cdr (cdr (cdr r)))))
- (col2 (car (cdr (cdr (cdr (cdr r)))))))
- ;; Reject '.' / 'e' / 'E' immediately after the octal run — float.
- (%check-no-float src pos2 line2 col2 file start-loc)
- ;; Reject stray digits 8/9 in an octal context (e.g. 089).
- (let* ((p3 (%lex-peek src pos2 line2 col2))
- (b3 (%pk-byte p3)))
- (if (and b3 (%digit? b3))
- (die start-loc "invalid octal digit" (bv-of-byte b3))
- (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file)))
- (cons (make-tok 'INT val start-loc) after))))))
- ;; '1'-'9' → decimal
- ((%digit? b)
- (let* ((r (%accum-int-while %digit? src pos line col 10))
- (val (car r))
- (pos2 (car (cdr (cdr r))))
- (line2 (car (cdr (cdr (cdr r)))))
- (col2 (car (cdr (cdr (cdr (cdr r)))))))
- (%check-no-float src pos2 line2 col2 file start-loc)
- (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file)))
- (cons (make-tok 'INT val start-loc) after))))
- ;; '.' followed by a digit = float-style literal — reject.
- ((= b 46)
- (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
- (b2 (%pk-byte q)))
- (if (and b2 (%digit? b2))
- (die start-loc "floating-point literal not supported")
- ;; Otherwise '.' was a punctuator — caller wouldn't have
- ;; routed here unless it was a digit-led prefix.
- (die start-loc "internal: number reader on non-number"))))
- (else
- (die start-loc "internal: number reader on non-number")))))
-
-(define (%check-no-float src pos line col file start-loc)
- ;; If the byte at pos starts a fractional/exponent part, abort.
- (let* ((p (%lex-peek src pos line col))
- (b (%pk-byte p)))
- (cond
- ((not b) #t)
- ((= b 46) ; '.'
- (die start-loc "floating-point literal not supported"))
- ((or (= b 101) (= b 69)) ; 'e' / 'E'
- ;; Only a float exponent if followed by [+-]?digit.
- (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
- (b2 (%pk-byte q)))
- (cond
- ((and b2 (%digit? b2))
- (die start-loc "floating-point literal not supported"))
- ((and b2 (or (= b2 43) (= b2 45)))
- (let* ((r (%lex-peek src (%pk-pos q) (%pk-line q) (%pk-col q)))
- (b3 (%pk-byte r)))
- (if (and b3 (%digit? b3))
- (die start-loc "floating-point literal not supported")
- #t)))
- (else #t))))
- (else #t))))
-
-(define (%lex-strip-int-suffix src pos line col file)
- ;; Consume any combination of u U l L (the long can be doubled). We
- ;; don't validate orderings strictly; tcc.c uses the canonical forms.
- ;; Returns (npos nline ncol).
- (let loop ((pos pos) (line line) (col col))
- (let* ((p (%lex-peek src pos line col))
- (b (%pk-byte p)))
- (cond
- ((not b) (list pos line col))
- ((or (= b 117) (= b 85) ; u U
- (= b 108) (= b 76)) ; l L
- (loop (%pk-pos p) (%pk-line p) (%pk-col p)))
- (else (list pos line col))))))
-
-;; --------------------------------------------------------------------
-;; Escape sequence reader.
-;;
-;; %scan-or-fill-escape decodes one escape sequence starting at `pos`
-;; (which points one past the leading `\\`). When `bv` is a bytevector,
-;; the resulting byte is written to (bv idx); when it is #f, no write
-;; occurs (used during the string-pass scan phase). Returns the 4-list
-;; (val npos nline ncol).
-;; --------------------------------------------------------------------
-(define (%scan-or-fill-escape src pos line col file start-loc bv idx)
- (let* ((p (%lex-peek src pos line col))
- (b (%pk-byte p)))
- (cond
- ((not b) (die start-loc "unterminated escape sequence"))
- ;; \xNN — 1+ hex digits (tcc.c uses 1- and 2-digit forms).
- ((or (= b 120) (= b 88)) ; 'x' / 'X'
- (let* ((r (%accum-int-while %hex? src
- (%pk-pos p) (%pk-line p) (%pk-col p) 16))
- (val0 (car r))
- (cnt (car (cdr r)))
- (pos2 (car (cdr (cdr r))))
- (line2 (car (cdr (cdr (cdr r)))))
- (col2 (car (cdr (cdr (cdr (cdr r)))))))
- (cond
- ((zero? cnt) (die start-loc "expected hex digits after \\x"))
- (else
- (let ((val (bit-and val0 255)))
- (cond (bv (bytevector-u8-set! bv idx val))
- (else #f))
- (list val pos2 line2 col2))))))
- ;; \NNN — 1..3 octal digits.
- ((%octal? b)
- (let* ((r (%accum-octal-bounded src pos line col 3))
- (val0 (car r))
- (pos2 (car (cdr (cdr r))))
- (line2 (car (cdr (cdr (cdr r)))))
- (col2 (car (cdr (cdr (cdr (cdr r))))))
- (val (bit-and val0 255)))
- (cond (bv (bytevector-u8-set! bv idx val))
- (else #f))
- (list val pos2 line2 col2)))
- (else
- (let ((val (cond ((= b 110) 10) ; n
- ((= b 116) 9) ; t
- ((= b 114) 13) ; r
- ((= b 92) 92) ; \\
- ((= b 39) 39) ; '
- ((= b 34) 34) ; "
- ((= b 48) 0) ; 0 (already handled by octal but be safe)
- ((= b 97) 7) ; \a -> BEL
- ((= b 98) 8) ; \b
- ((= b 102) 12) ; \f
- ((= b 118) 11) ; \v
- ((= b 63) 63) ; \?
- (else
- (die start-loc "unknown escape" (bv-of-byte b))))))
- (cond (bv (bytevector-u8-set! bv idx val))
- (else #f))
- (list val (%pk-pos p) (%pk-line p) (%pk-col p)))))))
-
-;; --------------------------------------------------------------------
-;; String reader.
-;;
-;; Caller has verified src[pos] == '"' (raw byte 34). Returns
-;; (tok npos nline ncol) with the raw decoded bytes (no NUL appended).
-;;
-;; Two-pass: %string-pass with bv=#f counts effective bytes (escapes
-;; collapse to 1 byte each); after rewind we allocate the final bv and
-;; rerun with bv set so the bytes are written directly into it.
-;; --------------------------------------------------------------------
-(define (lex-read-string src pos file)
- (%lex-read-string src pos 1 (+ pos 1) file))
-
-(define (%lex-read-string src pos line col file)
- (let ((start-loc (%loc file line col))
- (cnt 0) (npos 0) (nline 0) (ncol 0)
- (mark 0))
- ;; '"' (34) is a fast-byte and never a trigraph result, so the
- ;; physical byte at `pos` is exactly the opening quote.
- (cond
- ((or (>= pos (bytevector-length src))
- (not (= (bytevector-u8-ref src pos) 34)))
- (die start-loc "internal: string reader on non-quote"))
- (else
- (set! mark (heap-mark))
- (let ((sres (%string-pass src (+ pos 1) line (+ col 1)
- file start-loc #f)))
- (set! cnt (car sres))
- (set! npos (car (cdr sres)))
- (set! nline (car (cdr (cdr sres))))
- (set! ncol (car (cdr (cdr (cdr sres))))))
- (heap-rewind! mark)
- (let ((bv (make-bytevector cnt 0))
- (mark2 0))
- (set! mark2 (heap-mark))
- (%string-pass src (+ pos 1) line (+ col 1) file start-loc bv)
- (heap-rewind! mark2)
- (cons (make-tok 'STR bv start-loc)
- (list npos nline ncol)))))))
-
-(define (%string-pass src pos line col file start-loc bv)
- ;; Walk the string body (after opening "). When `bv` is #f, count
- ;; effective bytes; when it is a bytevector, write bytes into it at
- ;; index 0..count-1. Returns (count npos nline ncol).
- (let ((n (bytevector-length src)))
- (let loop ((pos pos) (line line) (col col) (idx 0))
- (cond
- ((>= pos n) (die start-loc "unterminated string literal"))
- (else
- (let ((b (bytevector-u8-ref src pos)))
- (cond
- ;; Closing quote — fast byte but special.
- ((= b 34)
- (list idx (+ pos 1) line (+ col 1)))
- ((%fast-byte? b)
- (cond (bv (bytevector-u8-set! bv idx b))
- (else #f))
- (loop (+ pos 1) line (+ col 1) (+ idx 1)))
- (else
- ;; Slow path: ?/\ (trigraph/splice/escape) or '\n'.
- (let* ((p (%lex-peek src pos line col))
- (b2 (%pk-byte p)))
- (cond
- ((not b2)
- (die start-loc "unterminated string literal"))
- ((= b2 34)
- (list idx (%pk-pos p) (%pk-line p) (%pk-col p)))
- ((%newline? b2)
- (die start-loc "newline in string literal"))
- ((= b2 92)
- (let* ((er (%scan-or-fill-escape
- src (%pk-pos p) (%pk-line p) (%pk-col p)
- file start-loc bv idx))
- (epos (car (cdr er)))
- (eline (car (cdr (cdr er))))
- (ecol (car (cdr (cdr (cdr er))))))
- (loop epos eline ecol (+ idx 1))))
- (else
- (cond (bv (bytevector-u8-set! bv idx b2))
- (else #f))
- (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1)))))))))))))
-
-;; --------------------------------------------------------------------
-;; Char reader.
-;;
-;; Caller has verified src[pos] == '\''. Multi-character constants
-;; ('AB') are rejected via die.
-;; --------------------------------------------------------------------
-(define (lex-read-char src pos file)
- (%lex-read-char src pos 1 (+ pos 1) file))
-
-(define (%lex-read-char src pos line col file)
- (let* ((start-loc (%loc file line col))
- (p0 (%lex-peek src pos line col))
- (b0 (%pk-byte p0)))
- (if (not (and b0 (= b0 39)))
- (die start-loc "internal: char reader on non-quote")
- (%collect-char src (%pk-pos p0) (%pk-line p0) (%pk-col p0)
- file start-loc))))
-
-(define (%collect-char src pos line col file start-loc)
- ;; Read exactly one byte (handling escapes), then expect closing '\''.
- (let* ((p (%lex-peek src pos line col))
- (b (%pk-byte p)))
- (cond
- ((not b) (die start-loc "unterminated char literal"))
- ((= b 39) (die start-loc "empty char literal"))
- ((%newline? b) (die start-loc "newline in char literal"))
- ((= b 92) ; escape
- (let* ((r (%scan-or-fill-escape src
- (%pk-pos p) (%pk-line p) (%pk-col p)
- file start-loc #f 0))
- (val (car r))
- (pos2 (car (cdr r)))
- (line2 (car (cdr (cdr r))))
- (col2 (car (cdr (cdr (cdr r))))))
- (%expect-char-close src pos2 line2 col2 file start-loc val)))
- (else
- (%expect-char-close src (%pk-pos p) (%pk-line p) (%pk-col p)
- file start-loc b)))))
-
-(define (%expect-char-close src pos line col file start-loc val)
- (let* ((p (%lex-peek src pos line col))
- (b (%pk-byte p)))
- (cond
- ((not b) (die start-loc "unterminated char literal"))
- ((= b 39)
- (cons (make-tok 'CHAR val start-loc)
- (list (%pk-pos p) (%pk-line p) (%pk-col p))))
- (else
- (die start-loc "multi-character char constant not supported")))))
-
-;; --------------------------------------------------------------------
-;; Punctuator reader.
-;;
-;; Greedy longest-match against %punct-alist (cc/data.scm). The alist
-;; is already ordered longest-first. We additionally bucket entries by
-;; their first byte so %lex-read-punct only loops over the small set of
-;; patterns that can start at the current source byte.
-;; --------------------------------------------------------------------
-
-(define (%alist-ref-int k al)
- ;; Lookup in an int-keyed alist (linear scan, '= compare).
- (cond ((null? al) #f)
- ((= (car (car al)) k) (cdr (car al)))
- (else (%alist-ref-int k (cdr al)))))
-
-(define (%mem-int? k xs)
- (cond ((null? xs) #f)
- ((= (car xs) k) #t)
- (else (%mem-int? k (cdr xs)))))
-
-(define (%filter-by-first-byte b al)
- ;; Subset of `al` whose pattern starts with byte b, preserving order.
- (cond
- ((null? al) '())
- ((= (bytevector-u8-ref (car (car al)) 0) b)
- (cons (car al) (%filter-by-first-byte b (cdr al))))
- (else (%filter-by-first-byte b (cdr al)))))
-
-(define (%group-by-first-byte al)
- ;; Build ((first-byte . sub-alist) ...) over `al`, one bucket per
- ;; distinct first byte; sub-alist preserves longest-match-first
- ;; order from the source list.
- (let loop ((xs al) (seen '()) (out '()))
- (cond
- ((null? xs) (reverse out))
- (else
- (let* ((entry (car xs))
- (pat (car entry))
- (b (bytevector-u8-ref pat 0)))
- (cond
- ((%mem-int? b seen) (loop (cdr xs) seen out))
- (else
- (loop (cdr xs)
- (cons b seen)
- (cons (cons b (%filter-by-first-byte b al)) out)))))))))
-
-(define %punct-buckets (%group-by-first-byte %punct-alist))
-
-(define (lex-read-punct src pos file)
- (%lex-read-punct src pos 1 (+ pos 1) file))
-
-(define (%lex-read-punct src pos line col file)
- (let* ((start-loc (%loc file line col))
- (p (%lex-peek src pos line col))
- (b (%pk-byte p)))
- (cond
- ((not b) (die start-loc "unrecognized byte" "EOF"))
- (else
- (let ((bucket (%alist-ref-int b %punct-buckets)))
- (cond
- ((not bucket) (die start-loc "unrecognized byte" (bv-of-byte b)))
- (else (%punct-loop src pos line col file start-loc bucket))))))))
-
-(define (%punct-loop src pos line col file start-loc al)
- (cond
- ((null? al)
- (let* ((p (%lex-peek src pos line col)))
- (die start-loc "unrecognized byte"
- (if (%pk-byte p) (bv-of-byte (%pk-byte p)) "EOF"))))
- (else
- (let* ((entry (car al))
- (pat (car entry))
- (sym (cdr entry))
- (m (%match-bytes src pos line col pat 0)))
- (if m
- (cons (make-tok 'PUNCT sym start-loc) m)
- (%punct-loop src pos line col file start-loc (cdr al)))))))
-
-(define (%match-bytes src pos line col pat i)
- ;; If the next bytes from (pos line col), in logical-byte stream
- ;; order, equal `pat[i..]`, return (npos nline ncol) after the
- ;; match. Otherwise #f.
- (cond
- ((= i (bytevector-length pat)) (list pos line col))
- (else
- (let ((n (bytevector-length src)))
- (cond
- ((>= pos n) #f)
- (else
- (let ((b (bytevector-u8-ref src pos))
- (pb (bytevector-u8-ref pat i)))
- (cond
- ((%fast-byte? b)
- (if (= b pb)
- (%match-bytes src (+ pos 1) line (+ col 1) pat (+ i 1))
- #f))
- (else
- (let* ((p (%lex-peek src pos line col))
- (b2 (%pk-byte p)))
- (cond
- ((not b2) #f)
- ((= b2 pb)
- (%match-bytes src (%pk-pos p) (%pk-line p) (%pk-col p)
- pat (+ i 1)))
- (else #f))))))))))))
-
-;; --------------------------------------------------------------------
-;; lex-tokenize src file -> list of tok ending in EOF.
-;; --------------------------------------------------------------------
-(define (lex-tokenize src file)
- (%lex-loop src 0 1 1 file '() #t))
-
-;; bol? — `#t` when no token has been emitted on the current physical
-;; line yet (start of file, or only NL + whitespace seen since the last
-;; line break). pp recognizes a directive only when its leading `#` is
-;; at line-start; we forward that decision into the token stream by
-;; emitting `HASH` instead of `(PUNCT hash …)` for a line-leading `#`.
-(define (%lex-loop src pos line col file acc bol?)
- (let* ((sw (%skip-ws-and-comments src pos line col file))
- (pos1 (car sw))
- (line1 (car (cdr sw)))
- (col1 (car (cdr (cdr sw))))
- (p (%lex-peek src pos1 line1 col1))
- (b (%pk-byte p)))
- (cond
- ;; EOF
- ((not b)
- (let* ((eof-tok (make-tok 'EOF #f (%loc file line1 col1))))
- (reverse (cons eof-tok acc))))
- ;; Newline → emit NL, reset bol?.
- ((%newline? b)
- (let ((nl (make-tok 'NL #f (%loc file line1 col1))))
- (%lex-loop src (%pk-pos p) (%pk-line p) (%pk-col p)
- file (cons nl acc) #t)))
- ;; Line-leading `#` → emit HASH, but only the bare `#`. `##` is
- ;; never line-leading in valid C; if it appears, fall through to
- ;; normal punctuator handling so it lexes as `paste`.
- ((and bol? (= b 35))
- (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
- (b2 (%pk-byte q)))
- (cond
- ((and b2 (= b2 35))
- (let* ((r (%lex-read-punct src pos1 line1 col1 file))
- (tok (car r))
- (npos (car (cdr r)))
- (nline (car (cdr (cdr r))))
- (ncol (car (cdr (cdr (cdr r))))))
- (%lex-loop src npos nline ncol file (cons tok acc) #f)))
- (else
- (let ((tok (make-tok 'HASH #f (%loc file line1 col1))))
- (%lex-loop src (%pk-pos p) (%pk-line p) (%pk-col p)
- file (cons tok acc) #f))))))
- ;; Identifier / keyword
- ((%ident-start? b)
- (let* ((r (%lex-read-ident src pos1 line1 col1 file))
- (tok (car r))
- (npos (car (cdr r)))
- (nline (car (cdr (cdr r))))
- (ncol (car (cdr (cdr (cdr r))))))
- (%lex-loop src npos nline ncol file (cons tok acc) #f)))
- ;; Number (digit start)
- ((%digit? b)
- (let* ((r (%lex-read-number src pos1 line1 col1 file))
- (tok (car r))
- (npos (car (cdr r)))
- (nline (car (cdr (cdr r))))
- (ncol (car (cdr (cdr (cdr r))))))
- (%lex-loop src npos nline ncol file (cons tok acc) #f)))
- ;; '.' might start a number (1.0 actually starts with digit; .5
- ;; would route here). We keep this as a punctuator unless followed
- ;; by a digit, in which case the lexer rejects per spec.
- ((= b 46)
- (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
- (b2 (%pk-byte q)))
- (cond
- ((and b2 (%digit? b2))
- (die (%loc file line1 col1) "floating-point literal not supported"))
- (else
- (let* ((r (%lex-read-punct src pos1 line1 col1 file))
- (tok (car r))
- (npos (car (cdr r)))
- (nline (car (cdr (cdr r))))
- (ncol (car (cdr (cdr (cdr r))))))
- (%lex-loop src npos nline ncol file (cons tok acc) #f))))))
- ;; String
- ((= b 34)
- (let* ((r (%lex-read-string src pos1 line1 col1 file))
- (tok (car r))
- (npos (car (cdr r)))
- (nline (car (cdr (cdr r))))
- (ncol (car (cdr (cdr (cdr r))))))
- (%lex-loop src npos nline ncol file (cons tok acc) #f)))
- ;; Char
- ((= b 39)
- (let* ((r (%lex-read-char src pos1 line1 col1 file))
- (tok (car r))
- (npos (car (cdr r)))
- (nline (car (cdr (cdr r))))
- (ncol (car (cdr (cdr (cdr r))))))
- (%lex-loop src npos nline ncol file (cons tok acc) #f)))
- ;; Punctuator (default)
- (else
- (let* ((r (%lex-read-punct src pos1 line1 col1 file))
- (tok (car r))
- (npos (car (cdr r)))
- (nline (car (cdr (cdr r))))
- (ncol (car (cdr (cdr (cdr r))))))
- (%lex-loop src npos nline ncol file (cons tok acc) #f))))))
diff --git a/cc/main.scm b/cc/main.scm
@@ -1,74 +1,5 @@
-;; cc/main.scm — driver. Argv, file I/O, ties phases together.
-;;
-;; Realization of docs/CC-INTERNALS.md §main.scm.
-
-;; --------------------------------------------------------------------
-;; CLI: cc <input.c> <output.P1pp>
-;; (-o flag and -D flags are deferred — phase-1 runner doesn't need them.)
-;;
-;; scheme1 passes (argv) as a list of bvs; argv[0] is "scheme1", argv[1]
-;; is the catm'd compiler source path, argv[2..] are the user-facing
-;; positional args. cc-main strips the first two.
-;; --------------------------------------------------------------------
-
-(define (%cc-slurp path)
- (let ((r (open-input path)))
- (cond ((not (car r))
- (die #f "cannot open input" path)))
- (let* ((p (cdr r))
- (rd (read-all p)))
- (close p)
- (cond ((not (car rd)) (die #f "read failed" path)))
- (cdr rd))))
-
-(define (%cc-write path bv)
- (let ((r (open-output path)))
- (cond ((not (car r))
- (die #f "cannot open output" path)))
- (let ((p (cdr r)))
- (write-bv-fd (port-fd p) bv)
- (close p)
- 0)))
-
-;; CC_DEBUG=1 in the env doesn't fly here (no getenv); instead, scan
-;; argv for a sentinel "--cc-debug" flag. When present, debug-log
-;; prints heap usage between phases to fd 2.
-(define (%cc-flag? args flag)
- (cond ((null? args) #f)
- ((bv= (car args) flag) #t)
- (else (%cc-flag? (cdr args) flag))))
-
-(define (%cc-strip-flag args flag)
- (cond ((null? args) '())
- ((bv= (car args) flag) (cdr args))
- (else (cons (car args) (%cc-strip-flag (cdr args) flag)))))
-
-(define (cc-main av)
- (let* ((raw (cdr (cdr av)))
- (dbg (%cc-flag? raw "--cc-debug"))
- (args (%cc-strip-flag raw "--cc-debug")))
- (cond (dbg (debug-log-on!)))
- (cond
- ((or (null? args) (null? (cdr args)))
- (die #f "usage: cc [--cc-debug] <input.c> <output.P1pp>")))
- (let* ((in-path (car args))
- (out-path (car (cdr args))))
- (debug-log "phase=start" "heap" (heap-usage))
- (let* ((src (%cc-slurp in-path))
- (_1 (debug-log "phase=slurp" "heap" (heap-usage)
- "src-bytes" (bytevector-length src)))
- (toks (lex-tokenize src in-path))
- (_2 (debug-log "phase=lex" "heap" (heap-usage)))
- (expanded (pp-expand toks '()))
- (_3 (debug-log "phase=pp" "heap" (heap-usage)))
- (cg (cg-init))
- (ps (make-pstate expanded cg)))
- (parse-translation-unit ps)
- (debug-log "phase=parse" "heap" (heap-usage))
- (let ((out (cg-finish cg)))
- (debug-log "phase=cg-finish" "heap" (heap-usage)
- "out-bytes" (bytevector-length out))
- (%cc-write out-path out))
- 0))))
+;; cc/main.scm — production entry point. cc.scm defines cc-main; this
+;; one-liner fires it under (argv). Tests catm cc.scm without this file
+;; so they can run their own drivers without sys-exit pre-empting them.
(sys-exit (cc-main (argv)))
diff --git a/cc/parse.scm b/cc/parse.scm
@@ -1,1673 +0,0 @@
-;; cc/parse.scm — recursive-descent + Pratt parser. Minimal scheme1.
-
-(define (make-pstate toks cg)
- (%pstate toks (list '()) (list '()) '() #f '() cg))
-
-(define (peek ps) (car (ps-toks ps)))
-(define (peek2 ps)
- (let ((r (cdr (ps-toks ps))))
- (if (null? r) (car (ps-toks ps)) (car r))))
-(define (advance ps)
- (let ((t (peek ps))) (ps-toks-set! ps (cdr (ps-toks ps))) t))
-(define (at-kw? ps s)
- (pmatch (peek ps)
- (($ tok? (kind KW) (value ,v)) (eq? v s))
- (else #f)))
-(define (at-punct? ps s)
- (pmatch (peek ps)
- (($ tok? (kind PUNCT) (value ,v)) (eq? v s))
- (else #f)))
-(define (expect-kw ps s)
- (let ((t (peek ps)))
- (pmatch t
- (($ tok? (kind KW) (value ,v)) (guard (eq? v s)) (advance ps))
- (else (die (tok-loc t) "expected kw" s)))))
-(define (expect-punct ps s)
- (let ((t (peek ps)))
- (pmatch t
- (($ tok? (kind PUNCT) (value ,v)) (guard (eq? v s)) (advance ps))
- (else (die (tok-loc t) "expected punct" s)))))
-
-(define (scope-enter! ps)
- (ps-scope-set! ps (cons '() (ps-scope ps)))
- (ps-tags-set! ps (cons '() (ps-tags ps))))
-(define (scope-leave! ps)
- (ps-scope-set! ps (cdr (ps-scope ps)))
- (ps-tags-set! ps (cdr (ps-tags ps))))
-(define (scope-bind! ps n s)
- (let* ((f (ps-scope ps)) (top (car f)) (r (cdr f)))
- (if (alist-ref n top) (die #f "dup decl" n)
- (ps-scope-set! ps (cons (alist-set n s top) r)))))
-(define (scope-lookup ps n)
- (let loop ((f (ps-scope ps)))
- (cond ((null? f) #f)
- (else
- (let ((v (alist-ref n (car f))))
- (if v v (loop (cdr f))))))))
-(define (tag-bind! ps n c)
- (let* ((f (ps-tags ps)) (top (car f)) (r (cdr f)))
- (ps-tags-set! ps (cons (alist-set n c top) r))))
-(define (tag-lookup ps n)
- (let loop ((f (ps-tags ps)))
- (cond ((null? f) #f)
- (else (let ((v (alist-ref n (car f))))
- (if v v (loop (cdr f))))))))
-(define (typedef-add! ps n)
- (ps-typedefs-set! ps (alist-set n #t (ps-typedefs ps))))
-(define (typedef? ps n)
- (if (alist-ref n (ps-typedefs ps)) #t #f))
-
-(define (%mk-ptr p) (%ctype 'ptr 8 8 p))
-(define (%mk-arr e n)
- (%ctype 'arr (if (< n 0) -1 (* n (ctype-size e)))
- (ctype-align e) (cons e n)))
-(define (%mk-fn r p v) (%ctype 'fn -1 -1 (list r p v)))
-(define (ctype-is-ptr? t) (eq? (ctype-kind t) 'ptr))
-(define (ctype-is-fn? t) (eq? (ctype-kind t) 'fn))
-(define (ctype-is-arr? t) (eq? (ctype-kind t) 'arr))
-
-(define (eat-cv-quals! ps)
- (cond ((or (at-kw? ps 'const) (at-kw? ps 'volatile)
- (at-kw? ps 'restrict))
- (advance ps) (eat-cv-quals! ps))
- (else #t)))
-
-(define (parse-decl-spec ps)
- (let loop ((sto #f) (sn #f) (lg 0) (b #f) (saw #f))
- (let ((t (peek ps)))
- (cond
- ((or (at-kw? ps 'auto) (at-kw? ps 'register))
- (advance ps) (loop sto sn lg b #t))
- ((at-kw? ps 'static) (advance ps) (loop 'static sn lg b #t))
- ((at-kw? ps 'extern) (advance ps) (loop 'extern sn lg b #t))
- ((at-kw? ps 'typedef) (advance ps) (loop 'typedef sn lg b #t))
- ((or (at-kw? ps 'const) (at-kw? ps 'volatile)
- (at-kw? ps 'restrict) (at-kw? ps 'inline))
- (advance ps) (loop sto sn lg b #t))
- ((at-kw? ps 'signed) (advance ps) (loop sto 'signed lg b #t))
- ((at-kw? ps 'unsigned) (advance ps) (loop sto 'unsigned lg b #t))
- ((at-kw? ps 'short) (advance ps) (loop sto sn -1 b #t))
- ((at-kw? ps 'long) (advance ps) (loop sto sn (+ lg 1) b #t))
- ((at-kw? ps 'void) (advance ps) (loop sto sn lg 'void #t))
- ((at-kw? ps 'char) (advance ps) (loop sto sn lg 'char #t))
- ((at-kw? ps 'int) (advance ps) (loop sto sn lg 'int #t))
- ((at-kw? ps '_Bool) (advance ps) (loop sto sn lg 'bool #t))
- ((or (at-kw? ps 'float) (at-kw? ps 'double)
- (at-kw? ps '_Complex) (at-kw? ps '_Imaginary))
- (die (tok-loc t) "no float" (tok-value t)))
- ((or (at-kw? ps '_Atomic) (at-kw? ps '_Thread_local)
- (at-kw? ps '_Alignas) (at-kw? ps '_Generic)
- (at-kw? ps '_Alignof) (at-kw? ps '_Static_assert))
- (die (tok-loc t) "rejected" (tok-value t)))
- ((at-kw? ps 'struct)
- (loop sto sn lg (parse-aggregate-spec ps 'struct) #t))
- ((at-kw? ps 'union)
- (loop sto sn lg (parse-aggregate-spec ps 'union) #t))
- ((at-kw? ps 'enum)
- (loop sto sn lg (parse-enum-spec ps) #t))
- ((and (not b) (eq? (tok-kind t) 'IDENT)
- (typedef? ps (tok-value t)))
- (let* ((tk (advance ps)) (sm (scope-lookup ps (tok-value tk))))
- (if (and sm (eq? (sym-kind sm) 'typedef))
- (loop sto sn lg (sym-type sm) #t)
- (die (tok-loc tk) "typedef no sym" (tok-value tk)))))
- (else
- (cond ((not saw) (die (tok-loc t) "expected decl-spec"
- (tok-value t)))
- (else (cons sto (resolve-base t sn lg b)))))))))
-
-(define (resolve-base loc sn lg b)
- (cond
- ((eq? b 'void)
- (if (or sn (not (zero? lg))) (die loc "void+qual") %t-void))
- ((eq? b 'bool)
- (if (or sn (not (zero? lg))) (die loc "bool+qual") %t-bool))
- ((eq? b 'char)
- (cond ((eq? sn 'unsigned) %t-u8) (else %t-i8)))
- ((or (eq? b 'int) (and (not b) (or sn (not (zero? lg)))))
- (cond ((= lg -1) (if (eq? sn 'unsigned) %t-u16 %t-i16))
- ((= lg 0) (if (eq? sn 'unsigned) %t-u32 %t-i32))
- (else (if (eq? sn 'unsigned) %t-u64 %t-i64))))
- ((ctype? b)
- (if (or sn (not (zero? lg))) (die loc "type+qual") b))
- (else (die loc "unknown decl-spec"))))
-
-(define (parse-aggregate-spec ps kind)
- (advance ps)
- (let ((tag (pmatch (peek ps)
- (($ tok? (kind IDENT)) (tok-value (advance ps)))
- (else #f))))
- (cond
- ((at-punct? ps 'lbrace)
- (advance ps)
- (let* ((ex (and tag (tag-lookup ps tag)))
- (ct (cond ((and ex (eq? (ctype-kind ex) kind)) ex)
- (else (let ((c (%ctype kind -1 -1
- (list (or tag #f) #f '()))))
- (if tag (tag-bind! ps tag c)) c))))
- (fields (parse-struct-fields ps)))
- (expect-punct ps 'rbrace)
- (complete-agg! ct kind tag fields) ct))
- (tag (let ((ex (tag-lookup ps tag)))
- (cond (ex ex)
- (else (let ((c (%ctype kind -1 -1
- (list tag #f '()))))
- (tag-bind! ps tag c) c)))))
- (else (die (tok-loc (peek ps)) "anon agg")))))
-
-(define (parse-struct-fields ps)
- (let loop ((acc '()) (off 0))
- (cond
- ((at-punct? ps 'rbrace) (reverse acc))
- (else
- (let ((spec (parse-decl-spec ps)))
- (let dl ((acc2 acc) (o2 off))
- (let* ((p (parse-declarator ps (cdr spec)))
- (nm (car p)) (ty (cdr p))
- (al (max (ctype-align ty) 1))
- (sz (ctype-size ty))
- (oa (align-up o2 al)))
- (cond
- ((at-punct? ps 'comma)
- (advance ps)
- (dl (cons (list nm ty oa) acc2)
- (+ oa (max sz 0))))
- ((at-punct? ps 'semi)
- (advance ps)
- (loop (cons (list nm ty oa) acc2)
- (+ oa (max sz 0))))
- (else (die (tok-loc (peek ps)) "field"))))))))))
-
-(define (complete-agg! ct k tag fs)
- (let* ((ma (let m ((xs fs) (a 1))
- (if (null? xs) a
- (m (cdr xs) (max a (ctype-align (cadr (car xs))))))))
- (last (let l ((xs fs) (e 0))
- (if (null? xs) e
- (let* ((f (car xs)) (off (car (cddr f)))
- (sz (ctype-size (cadr f))))
- (l (cdr xs) (max e (+ off (max sz 0))))))))
- (sz (cond ((eq? k 'union)
- (let u ((xs fs) (s 0))
- (if (null? xs) s
- (u (cdr xs)
- (max s (ctype-size (cadr (car xs))))))))
- (else (align-up last ma)))))
- (ctype-size-set! ct sz)
- (ctype-align-set! ct ma)
- (ctype-ext-set! ct (list tag #t fs))))
-
-(define (parse-enum-spec ps)
- (advance ps)
- (let ((tag (pmatch (peek ps)
- (($ tok? (kind IDENT)) (tok-value (advance ps)))
- (else #f))))
- (cond
- ((at-punct? ps 'lbrace)
- (advance ps)
- (let ((ct (%ctype 'enum 4 4 (list tag '()))))
- (if tag (tag-bind! ps tag ct))
- (let loop ((vs '()) (nv 0))
- (cond
- ((at-punct? ps 'rbrace)
- (advance ps)
- (ctype-ext-set! ct (list tag (reverse vs))) ct)
- (else
- (let* ((nt (advance ps)) (nm (tok-value nt))
- (val (cond ((at-punct? ps 'assign)
- (advance ps) (parse-const-int ps))
- (else nv))))
- (scope-bind! ps nm
- (%sym nm 'enum-const #f %t-i32 val))
- (cond ((at-punct? ps 'comma) (advance ps))
- ((at-punct? ps 'rbrace) #t)
- (else (die (tok-loc (peek ps)) "enum")))
- (loop (cons (cons nm val) vs) (+ val 1))))))))
- (tag (let ((e (tag-lookup ps tag)))
- (cond (e e)
- (else (let ((c (%ctype 'enum 4 4 (list tag '()))))
- (tag-bind! ps tag c) c)))))
- (else (die (tok-loc (peek ps)) "enum")))))
-
-(define (parse-const-int ps)
- (let ((t (peek ps)))
- (pmatch t
- (($ tok? (kind INT)) (tok-value (advance ps)))
- (($ tok? (kind PUNCT) (value minus)) (advance ps) (- 0 (parse-const-int ps)))
- (($ tok? (kind PUNCT) (value plus)) (advance ps) (parse-const-int ps))
- (($ tok? (kind IDENT) (value ,n))
- (let ((sm (scope-lookup ps n)))
- (cond ((and sm (eq? (sym-kind sm) 'enum-const))
- (advance ps) (sym-slot sm))
- (else (die (tok-loc t) "const?" n)))))
- (else (die (tok-loc t) "const?" (tok-value t))))))
-
-(define (parse-declarator ps base)
- ((cdr (parse-decl-cont ps)) base
- (lambda (n t) (cons n t))))
-
-(define (parse-decl-cont ps)
- (pmatch (peek ps)
- (($ tok? (kind PUNCT) (value star))
- (advance ps) (eat-cv-quals! ps)
- (let* ((r (parse-decl-cont ps)) (rf (cdr r)))
- (cons (car r) (lambda (b k) (rf (%mk-ptr b) k)))))
- (($ tok? (kind PUNCT) (value lparen))
- (guard (paren-is-group? ps))
- (advance ps)
- (let* ((i (parse-decl-cont ps)) (if- (cdr i)))
- (expect-punct ps 'rparen)
- (let ((s (parse-decl-suf-cont ps)))
- (cons (car i) (lambda (b k) (if- (s b) k))))))
- (($ tok? (kind IDENT) (value ,n))
- (advance ps)
- (let ((s (parse-decl-suf-cont ps)))
- (cons n (lambda (b k) (k n (s b))))))
- (else
- (let ((s (parse-decl-suf-cont ps)))
- (cons #f (lambda (b k) (k #f (s b))))))))
-
-(define (parse-decl-suf-cont ps)
- ;; C declarator suffixes apply RIGHT-TO-LEFT (innermost first):
- ;; int a[2][3] ⇒ arr (arr int 3) 2 (outer dim 2)
- ;; not arr (arr int 2) 3 (which would treat the leftmost suffix as
- ;; outermost). The recursive structure builds the inner suffix's
- ;; result first, then this level wraps.
- (pmatch (peek ps)
- (($ tok? (kind PUNCT) (value lbrack))
- (advance ps)
- (let* ((ln (cond ((at-punct? ps 'rbrack) -1)
- (else (parse-const-int ps))))
- (_ (expect-punct ps 'rbrack))
- (r (parse-decl-suf-cont ps)))
- (lambda (b) (%mk-arr (r b) ln))))
- (($ tok? (kind PUNCT) (value lparen))
- (advance ps)
- (let* ((res (parse-fn-params ps))
- (p (car res)) (v (cdr res)))
- (expect-punct ps 'rparen)
- (let ((r (parse-decl-suf-cont ps)))
- (lambda (b) (%mk-fn (r b) p v)))))
- (else (lambda (b) b))))
-
-(define (paren-is-group? ps)
- (pmatch (peek2 ps)
- (($ tok? (kind KW) (value ,v))
- (cond ((or (eq? v 'void) (eq? v 'char) (eq? v 'short)
- (eq? v 'int) (eq? v 'long) (eq? v 'signed)
- (eq? v 'unsigned) (eq? v '_Bool)
- (eq? v 'struct) (eq? v 'union) (eq? v 'enum)
- (eq? v 'const) (eq? v 'volatile)
- (eq? v 'restrict) (eq? v 'static)
- (eq? v 'extern) (eq? v 'register)) #f)
- (else #t)))
- (($ tok? (kind IDENT) (value ,n))
- (cond ((typedef? ps n) #f) (else #t)))
- (($ tok? (kind PUNCT) (value rparen)) #f)
- (($ tok? (kind PUNCT) (value star)) #t)
- (($ tok? (kind PUNCT) (value lparen)) #t)
- (($ tok? (kind PUNCT) (value lbrack)) #t)
- (else #f)))
-
-(define (parse-fn-params ps)
- (cond
- ((at-punct? ps 'rparen) (cons '() #f))
- ((and (at-kw? ps 'void)
- (eq? (tok-kind (peek2 ps)) 'PUNCT)
- (eq? (tok-value (peek2 ps)) 'rparen))
- (advance ps) (cons '() #f))
- (else
- (let loop ((acc '()))
- (cond
- ((at-punct? ps 'ellipsis)
- (advance ps) (cons (reverse acc) #t))
- (else
- (let* ((sp (parse-decl-spec ps))
- (p (parse-declarator ps (cdr sp)))
- (nm (car p)) (ty (cdr p))
- (ty2 (cond ((ctype-is-arr? ty)
- (%mk-ptr (car (ctype-ext ty))))
- ((ctype-is-fn? ty) (%mk-ptr ty))
- (else ty))))
- (cond
- ((at-punct? ps 'comma)
- (advance ps) (loop (cons (cons nm ty2) acc)))
- ((at-punct? ps 'rparen)
- (cons (reverse (cons (cons nm ty2) acc)) #f))
- (else (die (tok-loc (peek ps)) "param"))))))))))
-
-(define (parse-translation-unit ps)
- (cond
- ((eq? (tok-kind (peek ps)) 'EOF) #t)
- (else (parse-decl-or-fn ps) (parse-translation-unit ps))))
-
-(define (parse-decl-or-fn ps)
- (let* ((sp (parse-decl-spec ps))
- (sto (car sp)) (b (cdr sp)))
- (cond
- ((at-punct? ps 'semi) (advance ps) 'decl)
- (else
- (let* ((p (parse-declarator ps b))
- (n (car p)) (t (cdr p)))
- (cond
- ((and (ctype-is-fn? t) (at-punct? ps 'lbrace))
- (parse-fn-body ps n t) 'fn)
- (else
- (handle-decl ps sto n t)
- (let lp ()
- (cond
- ((at-punct? ps 'comma)
- (advance ps)
- (let* ((p2 (parse-declarator ps b))
- (n2 (car p2)) (t2 (cdr p2)))
- (handle-decl ps sto n2 t2) (lp)))
- (else (expect-punct ps 'semi) 'decl))))))))))
-
-(define (handle-decl ps sto n ty)
- (cond
- ((not n) (die #f "no name"))
- ((eq? sto 'typedef)
- (typedef-add! ps n)
- (scope-bind! ps n (%sym n 'typedef #f ty #f)))
- ((ctype-is-fn? ty)
- (scope-bind! ps n
- (%sym n 'fn (or sto 'extern) ty
- (bytevector-append "cc__" n))))
- ;; §I: block-scope `static` routes to a global with a name mangled
- ;; on the enclosing function so two functions can each have their
- ;; own `static int n;` without colliding. The sym's NAME holds the
- ;; mangled form (cg-push-sym / cg-emit-global both prefix "cc__"
- ;; onto sym-name to derive the emitted label); scope-bind!s key
- ;; remains the original identifier for source-level lookup.
- ((and (eq? sto 'static) (ps-fn-ctx ps))
- (let* ((fname (fn-ctx-name (ps-fn-ctx ps)))
- (mangled (bytevector-append fname "__" n))
- (sm (%sym mangled 'var 'static ty
- (bytevector-append "cc__" mangled))))
- (scope-bind! ps n sm)
- (cond
- ((at-punct? ps 'assign)
- (advance ps)
- (cg-emit-global (ps-cg ps) sm (parse-init-global ps ty)))
- (else (cg-emit-global (ps-cg ps) sm #f)))))
- (else
- (cond
- ((not (ps-fn-ctx ps))
- (let ((sm (%sym n 'var (or sto 'extern) ty
- (bytevector-append "cc__" n))))
- (scope-bind! ps n sm)
- (cond
- ((at-punct? ps 'assign)
- (advance ps)
- (cg-emit-global (ps-cg ps) sm
- (parse-init-global ps ty)))
- ((eq? sto 'extern) (cg-emit-extern (ps-cg ps) sm))
- (else (cg-emit-global (ps-cg ps) sm #f)))))
- (else
- (let* ((sz (max (ctype-size ty) 1))
- (al (max (ctype-align ty) 1))
- (sl (cg-alloc-slot (ps-cg ps) sz al))
- (sm (%sym n 'var (or sto 'auto) ty sl)))
- (scope-bind! ps n sm)
- (cond
- ((at-punct? ps 'assign)
- (advance ps)
- (cond
- ;; Aggregate locals get the per-element store treatment.
- ((or (at-punct? ps 'lbrace)
- (and (eq? (ctype-kind ty) 'arr)
- (eq? (tok-kind (peek ps)) 'STR)))
- (parse-init-local-aggregate ps sm ty))
- (else
- (cg-push-sym (ps-cg ps) sm)
- (parse-expr-bp ps 4) (rval! ps)
- (cg-cast (ps-cg ps) ty)
- (cg-assign (ps-cg ps))
- (cg-pop (ps-cg ps)))))
- (else #t))))))))
-
-;; ====================================================================
-;; Initializers (CC.md §Variable initializers, §E of CC-PUNCHLIST).
-;;
-;; parse-init-global ps ty
-;; Reads the initializer following `=` for a file-scope or block-scope
-;; static var of static-storage type `ty` and returns a list of
-;; pieces suitable for cg-emit-global. See cg.scm §cg-emit-global for
-;; the piece grammar.
-;;
-;; parse-init-local ps sm ty
-;; Reads the initializer for an auto-storage variable bound to slot
-;; sym `sm` and emits per-element store cg ops. Returns unspecified.
-;; ====================================================================
-
-(define (%int->le-bv n nbytes)
- ;; N-byte little-endian encoding of integer n into a fresh bv. Bytes
- ;; >= sign-bit are filled by repeated >>8 (works for both signed and
- ;; unsigned because we only keep the low N bytes).
- (let ((out (make-bytevector nbytes 0)))
- (let loop ((i 0) (v n))
- (cond
- ((= i nbytes) out)
- (else
- (bytevector-u8-set! out i (bit-and v 255))
- (loop (+ i 1) (arithmetic-shift v -8)))))))
-
-(define (%const-init-piece ps ty)
- ;; Parse a non-brace initializer expression for scalar type `ty` and
- ;; return a single piece. Recognised forms:
- ;; - INT (with optional unary +/-) -> N-byte LE bv
- ;; - enum-const IDENT -> N-byte LE bv
- ;; - &IDENT (address of a global var/fn) -> (label-ref . cc__name)
- ;; - IDENT (function name; decays to fn ptr) -> (label-ref . cc__name)
- ;; - STR (only for char* targets) -> (label-ref . string-pool-label)
- (let ((t (peek ps)))
- (cond
- ;; Address initializer: &ident -> label-ref
- ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'amp))
- (advance ps)
- (let ((it (peek ps)))
- (cond
- ((eq? (tok-kind it) 'IDENT)
- (advance ps)
- (let ((sm (scope-lookup ps (tok-value it))))
- (cond
- ((not sm) (die (tok-loc it) "init: undecl" (tok-value it)))
- ((or (eq? (sym-kind sm) 'fn)
- (and (eq? (sym-kind sm) 'var)
- (or (eq? (sym-storage sm) 'static)
- (eq? (sym-storage sm) 'extern))))
- (cons 'label-ref (sym-slot sm)))
- (else
- (die (tok-loc it) "init: &x must reference a global"
- (tok-value it))))))
- (else (die (tok-loc it) "init: &?" (tok-value it))))))
- ;; Function name or array name as a label-ref initializer.
- ;; (Both decay to a pointer when used as a value.)
- ((and (eq? (tok-kind t) 'IDENT)
- (let ((sm (scope-lookup ps (tok-value t))))
- (and sm
- (or (eq? (sym-kind sm) 'fn)
- (and (eq? (sym-kind sm) 'var)
- (eq? (ctype-kind (sym-type sm)) 'arr)
- (or (eq? (sym-storage sm) 'static)
- (eq? (sym-storage sm) 'extern)))))))
- (advance ps)
- (let ((sm (scope-lookup ps (tok-value t))))
- (cons 'label-ref (sym-slot sm))))
- ;; Plain string literal as char* initializer.
- ((eq? (tok-kind t) 'STR)
- (advance ps)
- (let ((lbl (cg-intern-string (ps-cg ps) (tok-value t))))
- (cons 'label-ref lbl)))
- ;; Otherwise it's a const integer.
- (else
- (let ((v (parse-const-int ps)))
- (%int->le-bv v (max (ctype-size ty) 1)))))))
-
-(define (%init-array-elem-type ty)
- (cond ((eq? (ctype-kind ty) 'arr) (car (ctype-ext ty)))
- (else (die #f "init: not an array" ty))))
-
-(define (%init-array-decl-len ty)
- ;; Declared array length (-1 = inferred).
- (cond ((eq? (ctype-kind ty) 'arr) (cdr (ctype-ext ty))) (else -1)))
-
-(define (%init-fix-array-size! ty count)
- ;; Patch an inferred-length array to `count`.
- (let ((elem (car (ctype-ext ty))))
- (ctype-ext-set! ty (cons elem count))
- (ctype-size-set! ty (* count (ctype-size elem)))))
-
-(define (%init-struct-fields ty)
- ;; Return ((name-bv ctype offset) ...) for a struct/union ctype.
- (let ((ext (ctype-ext ty)))
- (cond ((and (pair? ext) (pair? (cdr ext))) (car (cddr ext)))
- (else (die #f "init: not a struct" ty)))))
-
-(define (%find-field fields nm)
- (cond ((null? fields) #f)
- ((equal? (car (car fields)) nm) (car fields))
- (else (%find-field (cdr fields) nm))))
-
-(define (%pad-piece nbytes)
- (make-bytevector nbytes 0))
-
-;; ----- Global initializers ---------------------------------------------
-(define (parse-init-global ps ty)
- (pmatch (peek ps)
- ;; String literal initializer for char[]
- (($ tok? (kind STR) (value ,s))
- (guard (and (eq? (ctype-kind ty) 'arr)
- (let ((et (car (ctype-ext ty))))
- (or (eq? et %t-i8) (eq? et %t-u8)))))
- (advance ps)
- (let* ((slen (bytevector-length s))
- (decl (cdr (ctype-ext ty)))
- (final (cond ((< decl 0) (+ slen 1)) (else decl))))
- (cond ((< decl 0) (%init-fix-array-size! ty final)))
- (let ((bv (make-bytevector final 0)))
- (let loop ((i 0))
- (cond
- ((or (= i slen) (>= i final)) (list bv))
- (else
- (bytevector-u8-set! bv i (bytevector-u8-ref s i))
- (loop (+ i 1))))))))
- ;; Brace-form
- (($ tok? (kind PUNCT) (value lbrace))
- (advance ps)
- (cond
- ((eq? (ctype-kind ty) 'arr)
- (%parse-init-array-list ps ty))
- ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union))
- (%parse-init-struct-list ps ty))
- (else
- ;; Brace-wrapped scalar: { expr }
- (let ((piece (%const-init-piece ps ty)))
- (cond ((at-punct? ps 'comma) (advance ps)))
- (expect-punct ps 'rbrace)
- (list piece)))))
- ;; Bare scalar initializer
- (else (list (%const-init-piece ps ty)))))
-
-(define (%parse-init-array-list ps ty)
- ;; Element-list array initializer; assumes `{` already consumed.
- (let* ((elem (%init-array-elem-type ty))
- (esize (ctype-size elem))
- (decl (%init-array-decl-len ty)))
- (let lp ((acc '()) (count 0))
- (cond
- ((at-punct? ps 'rbrace)
- (advance ps)
- (cond ((< decl 0) (%init-fix-array-size! ty count)))
- ;; Pad to declared length if longer than count.
- (let* ((final (cond ((< decl 0) count) (else decl)))
- (pad (- final count)))
- (cond
- ((> pad 0)
- (reverse (cons (%pad-piece (* pad esize)) acc)))
- (else (reverse acc)))))
- (else
- (let ((piece
- (cond
- ((at-punct? ps 'lbrace)
- ;; Nested aggregate: brace-flatten via recursion.
- (advance ps)
- ;; element is itself struct/array
- (cond
- ((eq? (ctype-kind elem) 'arr)
- (%parse-init-array-list ps elem))
- ((or (eq? (ctype-kind elem) 'struct)
- (eq? (ctype-kind elem) 'union))
- (%parse-init-struct-list ps elem))
- (else
- (let ((p (%const-init-piece ps elem)))
- (cond ((at-punct? ps 'comma) (advance ps)))
- (expect-punct ps 'rbrace)
- (list p)))))
- (else
- (list (%const-init-piece ps elem))))))
- (cond ((at-punct? ps 'comma) (advance ps)))
- (lp (append (reverse piece) acc) (+ count 1))))))))
-
-(define (%piece-bytesize p)
- ;; Output width of one piece (cf. %cg-init-piece->bv): a bv emits
- ;; one byte per element; a (label-ref . _) emits an 8-byte slot.
- (cond
- ((bytevector? p) (bytevector-length p))
- ((and (pair? p) (eq? (car p) 'label-ref)) 8)
- (else (die #f "init: unknown piece" p))))
-
-(define (%pieces-bytesize ps-list)
- (let loop ((xs ps-list) (n 0))
- (cond ((null? xs) n)
- (else (loop (cdr xs) (+ n (%piece-bytesize (car xs))))))))
-
-(define (%merge-init-entries entries total-size)
- ;; entries: list of (abs-offset . piece-list), in source order.
- ;; Sort stably by offset (later writes to the same offset win, per C
- ;; designated-init semantics) and emit pad pieces in any gaps and at
- ;; the tail. Preserves label-ref pieces — we never merge them into a
- ;; flat bv.
- (let* ((sorted (%init-stable-sort-by-offset entries))
- (out
- (let walk ((xs sorted) (cursor 0) (acc '()))
- (cond
- ((null? xs)
- (cond
- ((< cursor total-size)
- (reverse (cons (%pad-piece (- total-size cursor)) acc)))
- (else (reverse acc))))
- (else
- (let* ((e (car xs))
- (eoff (car e))
- (epieces (cdr e))
- (esize (%pieces-bytesize epieces))
- (acc1 (cond
- ((> eoff cursor)
- (cons (%pad-piece (- eoff cursor)) acc))
- (else acc)))
- (acc2 (append (reverse epieces) acc1)))
- (walk (cdr xs) (+ eoff esize) acc2)))))))
- out))
-
-(define (%init-stable-sort-by-offset entries)
- ;; Insertion sort, stable by source order for ties. n is small (one
- ;; entry per initialized field) so O(n^2) is fine.
- (let lp ((xs entries) (acc '()))
- (cond
- ((null? xs) acc)
- (else
- (let ((e (car xs)))
- (lp (cdr xs)
- (let ins ((ys acc) (head '()))
- (cond
- ((null? ys)
- (append (reverse head) (list e)))
- ((<= (car e) (car (car ys)))
- (append (reverse head) (cons e ys)))
- (else
- (ins (cdr ys) (cons (car ys) head)))))))))))
-
-(define (%parse-init-struct-list ps ty)
- ;; Struct/union initializer; assumes `{` already consumed. Supports
- ;; positional and `.field = expr` forms — including out-of-order
- ;; designators ({.y=5, .x=7}). Each entry records its absolute
- ;; offset; %merge-init-entries sorts and pads at the closing brace.
- (let* ((fields (%init-struct-fields ty))
- (size (ctype-size ty)))
- (let lp ((entries '()) (rest fields))
- (cond
- ((at-punct? ps 'rbrace)
- (advance ps)
- (%merge-init-entries (reverse entries) size))
- (else
- (let* ((designated? (at-punct? ps 'dot))
- (target
- (cond
- (designated?
- (advance ps)
- (let ((nt (advance ps)))
- (cond
- ((not (eq? (tok-kind nt) 'IDENT))
- (die (tok-loc nt) "init: .field expects ident")))
- (let ((f (%find-field fields (tok-value nt))))
- (cond
- ((not f) (die (tok-loc nt) "init: no such field"
- (tok-value nt))))
- (expect-punct ps 'assign)
- f)))
- ((null? rest)
- (die (tok-loc (peek ps)) "init: too many fields"))
- (else (car rest))))
- (fname (car target))
- (fty (car (cdr target)))
- (foff (car (cddr target)))
- (piece-list
- (cond
- ((at-punct? ps 'lbrace)
- (advance ps)
- (cond
- ((eq? (ctype-kind fty) 'arr)
- (%parse-init-array-list ps fty))
- ((or (eq? (ctype-kind fty) 'struct)
- (eq? (ctype-kind fty) 'union))
- (%parse-init-struct-list ps fty))
- (else
- (let ((p (%const-init-piece ps fty)))
- (cond ((at-punct? ps 'comma) (advance ps)))
- (expect-punct ps 'rbrace)
- (list p)))))
- (else
- (list (%const-init-piece ps fty))))))
- (cond ((at-punct? ps 'comma) (advance ps)))
- (lp (cons (cons foff piece-list) entries)
- (cond
- (designated?
- ;; designated init: drop fields up to and including target
- (let drop ((xs fields))
- (cond
- ((null? xs) '())
- ((equal? (car (car xs)) fname) (cdr xs))
- (else (drop (cdr xs))))))
- (else (cdr rest))))))))))
-
-;; ----- Local aggregate initializers ------------------------------------
-;; Emits per-element store sequences via cg ops into the slot of `sm`
-;; (a 'var sym whose slot is the frame offset). Assumes the assignment
-;; `=` has already been consumed.
-(define (parse-init-local-aggregate ps sm ty)
- (pmatch (peek ps)
- ;; Local char[] = "string" — fill from string bytes.
- (($ tok? (kind STR) (value ,s))
- (guard (and (eq? (ctype-kind ty) 'arr)
- (let ((et (car (ctype-ext ty))))
- (or (eq? et %t-i8) (eq? et %t-u8)))))
- (advance ps)
- (let* ((slen (bytevector-length s))
- (decl (cdr (ctype-ext ty)))
- (final (cond ((< decl 0) (+ slen 1)) (else decl))))
- (cond ((< decl 0) (%init-fix-array-size! ty final)))
- ;; Emit byte stores for each char in s, plus NUL for the
- ;; trailing slot if final > slen.
- (let loop ((i 0))
- (cond
- ((>= i final) #t)
- (else
- (let ((b (cond ((< i slen) (bytevector-u8-ref s i))
- (else 0)))
- (off (+ (sym-slot sm) i)))
- (%push-frame-elem-lval ps off %t-u8)
- (cg-push-imm (ps-cg ps) %t-u8 b)
- (cg-assign (ps-cg ps))
- (cg-pop (ps-cg ps))
- (loop (+ i 1))))))))
- (($ tok? (kind PUNCT) (value lbrace))
- (advance ps)
- (cond
- ((eq? (ctype-kind ty) 'arr)
- (%parse-init-local-array-list ps sm (sym-slot sm) ty))
- ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union))
- (%parse-init-local-struct-list ps sm (sym-slot sm) ty))
- (else (die #f "init local: brace on scalar?"))))
- (else (die (tok-loc (peek ps)) "init local aggregate?"))))
-
-(define (%emit-local-elem-store ps sm rel-off elem-ty piece-or-thunk)
- ;; Emit a single scalar store at slot[base + rel-off]. piece is the
- ;; raw initializer expression — but here we want to actually evaluate
- ;; it via parse-expr to allow non-const expressions for autos.
- ;; Caller handles this; this helper handles the store-into-frame ops.
- 0)
-
-(define (%push-frame-elem-lval ps base-off ty)
- (cg-push (ps-cg ps) (%opnd 'frame ty base-off #t)))
-
-(define (%parse-init-local-array-list ps sm base-off ty)
- (let* ((elem (%init-array-elem-type ty))
- (esize (ctype-size elem))
- (decl (%init-array-decl-len ty)))
- (let lp ((i 0))
- (cond
- ((at-punct? ps 'rbrace)
- (advance ps)
- (cond ((< decl 0) (%init-fix-array-size! ty i)))
- ;; Zero out remaining slots if any (declared length > i).
- (let ((final (cond ((< decl 0) i) (else decl))))
- (let zlp ((k i))
- (cond
- ((>= k final) #t)
- (else
- (let ((off (+ base-off (* k esize))))
- (cond
- ((or (eq? (ctype-kind elem) 'arr)
- (eq? (ctype-kind elem) 'struct)
- (eq? (ctype-kind elem) 'union))
- ;; Zero each byte in this aggregate slot.
- (let zb ((j 0))
- (cond
- ((>= j esize) #t)
- (else
- (%push-frame-elem-lval ps (+ off j) %t-u8)
- (cg-push-imm (ps-cg ps) %t-u8 0)
- (cg-assign (ps-cg ps))
- (cg-pop (ps-cg ps))
- (zb (+ j 1))))))
- (else
- (%push-frame-elem-lval ps off elem)
- (cg-push-imm (ps-cg ps) elem 0)
- (cg-assign (ps-cg ps))
- (cg-pop (ps-cg ps)))))
- (zlp (+ k 1)))))))
- (else
- (let ((eoff (+ base-off (* i esize))))
- (cond
- ((at-punct? ps 'lbrace)
- (advance ps)
- (cond
- ((eq? (ctype-kind elem) 'arr)
- (%parse-init-local-array-list ps sm eoff elem))
- ((or (eq? (ctype-kind elem) 'struct)
- (eq? (ctype-kind elem) 'union))
- (%parse-init-local-struct-list ps sm eoff elem))
- (else
- (%push-frame-elem-lval ps eoff elem)
- (parse-expr-bp ps 4) (rval! ps)
- (cg-cast (ps-cg ps) elem)
- (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))
- (cond ((at-punct? ps 'comma) (advance ps)))
- (expect-punct ps 'rbrace))))
- (else
- (%push-frame-elem-lval ps eoff elem)
- (parse-expr-bp ps 4) (rval! ps)
- (cg-cast (ps-cg ps) elem)
- (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))))
- (cond ((at-punct? ps 'comma) (advance ps)))
- (lp (+ i 1))))))))
-
-(define (%bv-in-list? bv xs)
- (cond ((null? xs) #f)
- ((equal? bv (car xs)) #t)
- (else (%bv-in-list? bv (cdr xs)))))
-
-(define (%emit-zero-field ps base-off f)
- ;; Note: scheme1's `+` is binary-only — `(+ a b c)` returns (+ a b)
- ;; and silently drops the rest. Compute absolute byte offsets via
- ;; nested binary +.
- (let* ((fty (car (cdr f)))
- (foff (car (cddr f)))
- (fsize (ctype-size fty))
- (start-off (+ base-off foff)))
- (let zb ((j 0))
- (cond
- ((>= j fsize) #t)
- (else
- (%push-frame-elem-lval ps (+ start-off j) %t-u8)
- (cg-push-imm (ps-cg ps) %t-u8 0)
- (cg-assign (ps-cg ps))
- (cg-pop (ps-cg ps))
- (zb (+ j 1)))))))
-
-(define (%parse-init-local-struct-list ps sm base-off ty)
- ;; Track each initialized field by name in `seen`; at the closing brace
- ;; zero every field NOT in `seen`. The previous design tracked positional
- ;; "remaining fields" via `rest`, which silently dropped earlier fields
- ;; when a designator jumped backwards (e.g. `{.y = 5}` left `x`
- ;; uninitialized). C requires every unmentioned member of an aggregate
- ;; with at least one designator/initializer to be zeroed (C11 §6.7.9 ¶21).
- (let ((fields (%init-struct-fields ty)))
- (let lp ((rest fields) (seen '()))
- (cond
- ((at-punct? ps 'rbrace)
- (advance ps)
- (for-each
- (lambda (f)
- (cond ((not (%bv-in-list? (car f) seen))
- (%emit-zero-field ps base-off f))))
- fields))
- (else
- (let* ((designated? (at-punct? ps 'dot))
- (target
- (cond
- (designated?
- (advance ps)
- (let ((nt (advance ps)))
- (let ((f (%find-field fields (tok-value nt))))
- (cond
- ((not f) (die (tok-loc nt) "init: no such field"
- (tok-value nt))))
- (expect-punct ps 'assign)
- f)))
- ((null? rest)
- (die (tok-loc (peek ps)) "init: too many fields"))
- (else (car rest))))
- (fname (car target))
- (fty (car (cdr target)))
- (foff (car (cddr target)))
- (eoff (+ base-off foff)))
- (cond
- ((at-punct? ps 'lbrace)
- (advance ps)
- (cond
- ((eq? (ctype-kind fty) 'arr)
- (%parse-init-local-array-list ps sm eoff fty))
- ((or (eq? (ctype-kind fty) 'struct)
- (eq? (ctype-kind fty) 'union))
- (%parse-init-local-struct-list ps sm eoff fty))
- (else
- (%push-frame-elem-lval ps eoff fty)
- (parse-expr-bp ps 4) (rval! ps)
- (cg-cast (ps-cg ps) fty)
- (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))
- (cond ((at-punct? ps 'comma) (advance ps)))
- (expect-punct ps 'rbrace))))
- (else
- (%push-frame-elem-lval ps eoff fty)
- (parse-expr-bp ps 4) (rval! ps)
- (cg-cast (ps-cg ps) fty)
- (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))))
- (cond ((at-punct? ps 'comma) (advance ps)))
- (lp (cond
- (designated?
- (let drop ((xs fields))
- (cond
- ((null? xs) '())
- ((equal? (car (car xs)) fname) (cdr xs))
- (else (drop (cdr xs))))))
- (else (cdr rest)))
- (cons fname seen))))))))
-
-
-;; A → B → C arena pattern from tests/scheme1/93-heap-mark-rewind.scm:
-;;
-;; A = parse-decl-or-fn (caller, arena-unaware)
-;; B = parse-fn-body (this fn — arena boundary)
-;; C = %parse-fn-body-inner (the real per-fn parse + cg work)
-;;
-;; B's "out" is the cg's fixed-storage bufs (cg-text and friends): they
-;; were allocated at cg-init (pre-mark) and only mutate via byte writes,
-;; so byte-level work survives heap-rewind!. Everything C allocates —
-;; vstack opnds, intermediate bvs, ctype scratch, scope frames, switch
-;; case alists — is post-mark and discarded.
-;;
-;; The fn-name binding into the surrounding scope (used so recursive
-;; calls can resolve the name during the body) is done BEFORE the mark
-;; so it survives. Inner scope frames are popped via scope-leave! before
-;; the rewind, so their cells become unreachable; rewind reclaims them.
-;;
-;; Rewind-safety guard: the body might add user-visible globals
-;; (block-statics), strings (literals), tags, or typedefs. Those entries
-;; are post-mark and would dangle on rewind. We snapshot the relevant
-;; alists before parsing and skip the rewind if any changed — paying
-;; full heap cost only for functions that genuinely mutate global state.
-(define (parse-fn-body ps name dt)
- ;; Hoist the recursive-binding scope-bind! out of the marked region
- ;; so the fn-sym cons survives rewind.
- (cond ((not (scope-lookup ps name))
- (scope-bind! ps name
- (%sym name 'fn 'extern dt
- (bytevector-append "cc__" name)))))
- (let* ((cg (ps-cg ps))
- (mark (heap-mark))
- (globals-before (cg-globals cg))
- (str-pool-before (cg-str-pool cg))
- (typedefs-before (ps-typedefs ps))
- (tags-before (ps-tags ps)))
- (%parse-fn-body-inner ps name dt)
- (cond
- ((and (eq? globals-before (cg-globals cg))
- (eq? str-pool-before (cg-str-pool cg))
- (eq? typedefs-before (ps-typedefs ps))
- (eq? tags-before (ps-tags ps)))
- ;; cg-fn-meta points at post-mark alist conses (fn metadata,
- ;; switch-case lists, indirect-slots). Drop the reference before
- ;; rewinding so the cg record holds no dangling pointers — the
- ;; next cg-fn-begin/v would reset it anyway, but if this is the
- ;; last fn, leaving it set leaves a latent landmine.
- (cg-fn-meta-set! cg '())
- (heap-rewind! mark)
- (debug-log "fn-rewound" name "heap" (heap-usage)))
- (else
- (debug-log "fn-kept" name "heap" (heap-usage))))))
-
-(define (%parse-fn-body-inner ps name dt)
- (let* ((e (ctype-ext dt)) (ret (car e))
- (par (cadr e)) (var (car (cddr e))))
- (let ((psyms (cg-fn-begin/v (ps-cg ps) name par ret var)))
- (ps-fn-ctx-set! ps
- (%fn-ctx name ret (map cdr psyms) var '()))
- (scope-enter! ps)
- (for-each (lambda (p) (scope-bind! ps (car p) (cdr p)))
- psyms)
- (expect-punct ps 'lbrace)
- (parse-cstmt-body ps)
- (expect-punct ps 'rbrace)
- (scope-leave! ps)
- (ps-fn-ctx-set! ps #f)
- (cg-fn-end (ps-cg ps)))))
-
-(define (parse-stmt ps)
- (pmatch (peek ps)
- (($ tok? (kind PUNCT) (value lbrace)) (parse-cstmt ps))
- (($ tok? (kind KW) (value if)) (parse-if-stmt ps))
- (($ tok? (kind KW) (value while)) (parse-while-stmt ps))
- (($ tok? (kind KW) (value do)) (parse-do-stmt ps))
- (($ tok? (kind KW) (value for)) (parse-for-stmt ps))
- (($ tok? (kind KW) (value switch)) (parse-switch-stmt ps))
- (($ tok? (kind KW) (value return)) (parse-return-stmt ps))
- (($ tok? (kind KW) (value goto)) (parse-goto-stmt ps))
- (($ tok? (kind KW) (value break))
- (advance ps) (expect-punct ps 'semi) (do-break ps))
- (($ tok? (kind KW) (value continue))
- (advance ps) (expect-punct ps 'semi) (do-continue ps))
- (($ tok? (kind KW) (value case)) (parse-case-stmt ps))
- (($ tok? (kind KW) (value default)) (parse-default-stmt ps))
- (($ tok? (kind IDENT))
- (guard (and (eq? (tok-kind (peek2 ps)) 'PUNCT)
- (eq? (tok-value (peek2 ps)) 'colon)))
- (parse-labelled-stmt ps))
- (else
- (cond ((stmt-starts-decl? ps) (parse-local-decl ps))
- (else (parse-expr-stmt ps))))))
-
-(define (stmt-starts-decl? ps)
- (pmatch (peek ps)
- (($ tok? (kind KW) (value ,v))
- (or (eq? v 'auto) (eq? v 'register) (eq? v 'static)
- (eq? v 'extern) (eq? v 'typedef) (eq? v 'const)
- (eq? v 'volatile) (eq? v 'restrict) (eq? v 'inline)
- (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int)
- (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned)
- (eq? v '_Bool) (eq? v 'struct) (eq? v 'union)
- (eq? v 'enum)))
- (($ tok? (kind IDENT) (value ,n)) (typedef? ps n))
- (else #f)))
-
-(define (parse-local-decl ps)
- (let* ((sp (parse-decl-spec ps))
- (sto (car sp)) (b (cdr sp)))
- (cond
- ((at-punct? ps 'semi) (advance ps) #t)
- (else
- (let lp ()
- (let* ((p (parse-declarator ps b))
- (n (car p)) (t (cdr p)))
- (handle-decl ps sto n t)
- (cond ((at-punct? ps 'comma) (advance ps) (lp))
- (else (expect-punct ps 'semi) #t))))))))
-
-(define (parse-cstmt ps)
- (expect-punct ps 'lbrace)
- (scope-enter! ps)
- (parse-cstmt-body ps)
- (scope-leave! ps)
- (expect-punct ps 'rbrace) #t)
-
-(define (parse-cstmt-body ps)
- (cond
- ((at-punct? ps 'rbrace) #t)
- ((eq? (tok-kind (peek ps)) 'EOF)
- (die (tok-loc (peek ps)) "EOF in cstmt"))
- (else (parse-stmt ps) (parse-cstmt-body ps))))
-
-(define (parse-compound-stmt ps) (parse-cstmt ps))
-
-(define (parse-if-stmt ps)
- (expect-kw ps 'if)
- (expect-punct ps 'lparen)
- (parse-expr ps) (rval! ps)
- (expect-punct ps 'rparen)
- (cg-ifelse (ps-cg ps)
- (lambda () (parse-stmt ps))
- (lambda ()
- (cond ((at-kw? ps 'else)
- (advance ps) (parse-stmt ps))
- (else #t)))))
-
-;; cg-loop's body-thunk now receives the tag from cg (CC-CONTRACTS
-;; §3.3); the parser threads it into break/continue via loop-ctx.
-
-(define (parse-while-stmt ps)
- (expect-kw ps 'while)
- (expect-punct ps 'lparen)
- (cg-loop (ps-cg ps)
- (lambda () (parse-expr ps) (rval! ps))
- (lambda (tag)
- (expect-punct ps 'rparen)
- (push-loop-ctx! ps 'while tag #t)
- (parse-stmt ps)
- (pop-loop-ctx! ps))) #t)
-
-(define (parse-do-stmt ps)
- (expect-kw ps 'do)
- ;; do-while needs its tag known *before* the body parses, so we
- ;; capture it inside the body-thunk and stash it for pop-loop-ctx
- ;; via a side cell.
- (cg-loop (ps-cg ps)
- (lambda () #t)
- (lambda (tag)
- (push-loop-ctx! ps 'do tag #t)
- (parse-stmt ps)
- (pop-loop-ctx! ps)
- (expect-kw ps 'while) (expect-punct ps 'lparen)
- (parse-expr ps) (rval! ps)
- (expect-punct ps 'rparen) (expect-punct ps 'semi)
- (cg-unop (ps-cg ps) 'lnot)
- (cg-if (ps-cg ps)
- (lambda () (cg-break (ps-cg ps) tag)))))
- #t)
-
-(define (parse-for-stmt ps)
- (expect-kw ps 'for) (expect-punct ps 'lparen)
- (scope-enter! ps)
- (cond
- ((at-punct? ps 'semi) (advance ps))
- ((stmt-starts-decl? ps) (parse-local-decl ps))
- (else (parse-expr ps) (cg-pop (ps-cg ps))
- (expect-punct ps 'semi)))
- (cg-loop (ps-cg ps)
- (lambda ()
- (cond ((at-punct? ps 'semi)
- (cg-push-imm (ps-cg ps) %t-i32 1))
- (else (parse-expr ps) (rval! ps)))
- (expect-punct ps 'semi))
- (lambda (tag)
- (let ((stk (collect-til-rparen ps)))
- (expect-punct ps 'rparen)
- (push-loop-ctx! ps 'for tag #t)
- (parse-stmt ps)
- (pop-loop-ctx! ps)
- (cond
- ((null? stk) #t)
- (else
- (let ((sv (ps-toks ps)))
- (ps-toks-set! ps
- (append stk (list (make-tok 'EOF #f #f))))
- (parse-expr ps) (cg-pop (ps-cg ps))
- (ps-toks-set! ps sv)))))))
- (scope-leave! ps) #t)
-
-(define (collect-til-rparen ps)
- (let loop ((acc '()) (d 0))
- (let ((t (peek ps)))
- (cond
- ((eq? (tok-kind t) 'EOF)
- (die (tok-loc t) "EOF in for-step"))
- ((and (zero? d) (eq? (tok-kind t) 'PUNCT)
- (eq? (tok-value t) 'rparen)) (reverse acc))
- (else
- (let ((nt (advance ps)))
- (loop (cons nt acc)
- (cond ((not (eq? (tok-kind nt) 'PUNCT)) d)
- ((eq? (tok-value nt) 'lparen) (+ d 1))
- ((eq? (tok-value nt) 'rparen) (- d 1))
- (else d)))))))))
-
-(define (parse-switch-stmt ps)
- (expect-kw ps 'switch) (expect-punct ps 'lparen)
- (parse-expr ps) (rval! ps)
- (expect-punct ps 'rparen)
- ;; Switch's break-target tag is the swctx's end-tag — cg owns it,
- ;; and we read it back so cg-break inside the switch body emits a
- ;; tag cg actually labels.
- (let* ((sw (cg-switch-begin (ps-cg ps)))
- (tg (swctx-end-tag sw)))
- (push-loop-ctx-sw! ps 'switch tg sw)
- (parse-stmt ps)
- (pop-loop-ctx! ps)
- (cg-switch-end (ps-cg ps) sw)))
-
-(define (parse-case-stmt ps)
- (expect-kw ps 'case)
- (let ((v (parse-const-int ps)))
- (expect-punct ps 'colon)
- (cg-switch-case (ps-cg ps) (innermost-sw ps) v)
- (parse-stmt ps)))
-
-(define (parse-default-stmt ps)
- (expect-kw ps 'default) (expect-punct ps 'colon)
- (cg-switch-default (ps-cg ps) (innermost-sw ps))
- (parse-stmt ps))
-
-(define (parse-return-stmt ps)
- (expect-kw ps 'return)
- (cond
- ((at-punct? ps 'semi) (advance ps) (cg-return (ps-cg ps)))
- (else
- (parse-expr ps) (rval! ps)
- (let ((fc (ps-fn-ctx ps)))
- (cond
- ((and fc (not (eq? (ctype-kind (fn-ctx-return-type fc)) 'void)))
- (cg-cast (ps-cg ps) (fn-ctx-return-type fc)))
- (else #t)))
- (cg-return (ps-cg ps))
- (expect-punct ps 'semi))))
-
-(define (parse-goto-stmt ps)
- (expect-kw ps 'goto)
- (let ((t (advance ps)))
- (cond ((eq? (tok-kind t) 'IDENT)
- (cg-goto (ps-cg ps) (tok-value t)))
- (else (die (tok-loc t) "label?"))))
- (expect-punct ps 'semi))
-
-(define (parse-labelled-stmt ps)
- (let ((t (advance ps)))
- (expect-punct ps 'colon)
- (cg-emit-label (ps-cg ps) (tok-value t))
- (parse-stmt ps)))
-
-(define (parse-expr-stmt ps)
- (cond
- ((at-punct? ps 'semi) (advance ps) #t)
- (else (parse-expr ps) (cg-pop (ps-cg ps))
- (expect-punct ps 'semi))))
-
-(define (push-loop-ctx! ps k tg hc)
- (ps-loops-set! ps (cons (%loop-ctx k tg hc) (ps-loops ps))))
-(define (push-loop-ctx-sw! ps k tg sw)
- (ps-loops-set! ps
- (cons (%loop-ctx k (cons tg sw) #f) (ps-loops ps))))
-(define (pop-loop-ctx! ps)
- (ps-loops-set! ps (cdr (ps-loops ps))))
-(define (do-break ps)
- (let ((c (innermost-loop ps)))
- (cond
- ((not c) (die #f "break outside"))
- ((eq? (loop-ctx-kind c) 'switch)
- (cg-break (ps-cg ps) (car (loop-ctx-tag c))))
- (else (cg-break (ps-cg ps) (loop-ctx-tag c))))))
-(define (do-continue ps)
- (let ((c (innermost-cont ps)))
- (cond ((not c) (die #f "cont outside"))
- (else (cg-continue (ps-cg ps) (loop-ctx-tag c))))))
-(define (innermost-loop ps)
- (cond ((null? (ps-loops ps)) #f) (else (car (ps-loops ps)))))
-(define (innermost-cont ps)
- (let lp ((xs (ps-loops ps)))
- (cond ((null? xs) #f)
- ((eq? (loop-ctx-kind (car xs)) 'switch) (lp (cdr xs)))
- (else (car xs)))))
-(define (innermost-sw ps)
- (let lp ((xs (ps-loops ps)))
- (cond ((null? xs) (die #f "case outside switch"))
- ((eq? (loop-ctx-kind (car xs)) 'switch)
- (cdr (loop-ctx-tag (car xs))))
- (else (lp (cdr xs))))))
-
-(define %binop-bp
- (list
- (cons 'comma (cons 1 2))
- (cons 'assign (cons 4 3)) (cons 'plus-eq (cons 4 3))
- (cons 'minus-eq (cons 4 3)) (cons 'star-eq (cons 4 3))
- (cons 'slash-eq (cons 4 3)) (cons 'pct-eq (cons 4 3))
- (cons 'shl-eq (cons 4 3)) (cons 'shr-eq (cons 4 3))
- (cons 'amp-eq (cons 4 3)) (cons 'caret-eq (cons 4 3))
- (cons 'bar-eq (cons 4 3)) (cons 'qmark (cons 6 5))
- (cons 'lor (cons 10 11)) (cons 'land (cons 20 21))
- (cons 'bar (cons 30 31)) (cons 'caret (cons 40 41))
- (cons 'amp (cons 50 51))
- (cons 'eq2 (cons 60 61)) (cons 'ne (cons 60 61))
- (cons 'lt (cons 70 71)) (cons 'le (cons 70 71))
- (cons 'gt (cons 70 71)) (cons 'ge (cons 70 71))
- (cons 'shl (cons 80 81)) (cons 'shr (cons 80 81))
- (cons 'plus (cons 90 91)) (cons 'minus (cons 90 91))
- (cons 'star (cons 100 101)) (cons 'slash (cons 100 101))
- (cons 'pct (cons 100 101))))
-
-(define (binop-bp-of s) (alist-ref/eq s %binop-bp))
-
-(define (punct-to-cgop s)
- (cond ((eq? s 'plus) 'add) ((eq? s 'minus) 'sub)
- ((eq? s 'star) 'mul) ((eq? s 'slash) 'div)
- ((eq? s 'pct) 'rem) ((eq? s 'amp) 'and)
- ((eq? s 'bar) 'or) ((eq? s 'caret) 'xor)
- ((eq? s 'shl) 'shl) ((eq? s 'shr) 'shr)
- ((eq? s 'eq2) 'eq) ((eq? s 'ne) 'ne)
- ((eq? s 'lt) 'lt) ((eq? s 'le) 'le)
- ((eq? s 'gt) 'gt) ((eq? s 'ge) 'ge)
- (else (die #f "binop" s))))
-
-(define (compound-op s)
- (cond ((eq? s 'plus-eq) 'add) ((eq? s 'minus-eq) 'sub)
- ((eq? s 'star-eq) 'mul) ((eq? s 'slash-eq) 'div)
- ((eq? s 'pct-eq) 'rem) ((eq? s 'shl-eq) 'shl)
- ((eq? s 'shr-eq) 'shr) ((eq? s 'amp-eq) 'and)
- ((eq? s 'caret-eq) 'xor) ((eq? s 'bar-eq) 'or)
- (else #f)))
-
-(define (parse-expr ps) (parse-expr-bp ps 0))
-
-(define (parse-expr-bp ps mn)
- (parse-unary ps) (parse-binary-rhs ps mn))
-
-(define (parse-binary-rhs ps mn)
- (let ((t (peek ps)))
- (cond
- ((not (eq? (tok-kind t) 'PUNCT)) #t)
- (else
- (let ((bp (binop-bp-of (tok-value t))))
- (cond
- ((not bp) #t)
- ((< (car bp) mn) #t)
- (else
- (let ((op (tok-value t)) (rb (cdr bp)))
- (advance ps)
- (cond
- ((eq? op 'comma)
- ;; lhs has been parsed; discard it and evaluate rhs.
- ;; Result of the comma expr is the rhs's rval.
- (cg-pop (ps-cg ps))
- (parse-expr-bp ps rb) (rval! ps))
- ((eq? op 'assign)
- (parse-expr-bp ps rb) (rval! ps)
- (cg-assign (ps-cg ps)))
- ((compound-op op)
- (let ((b (compound-op op)))
- (cg-dup (ps-cg ps))
- (cg-load (ps-cg ps))
- (parse-expr-bp ps rb) (rval! ps)
- (cg-arith-conv (ps-cg ps))
- (cg-binop (ps-cg ps) b)
- (cg-assign (ps-cg ps))))
- ((eq? op 'qmark)
- (rval! ps)
- (cg-ifelse-merge (ps-cg ps)
- (lambda ()
- (parse-expr-bp ps 0) (rval! ps))
- (lambda ()
- (expect-punct ps 'colon)
- (parse-expr-bp ps rb) (rval! ps))))
- ((eq? op 'land)
- (rval! ps)
- ;; Both branches must push i32 0/1. Right side is
- ;; coerced via `cg-cast bool` so the merge slot
- ;; carries i32 (per §H.2).
- (cg-ifelse-merge (ps-cg ps)
- (lambda ()
- (parse-expr-bp ps rb) (rval! ps)
- (cg-cast (ps-cg ps) %t-bool)
- (cg-cast (ps-cg ps) %t-i32))
- (lambda ()
- (cg-push-imm (ps-cg ps) %t-i32 0))))
- ((eq? op 'lor)
- (rval! ps)
- (cg-ifelse-merge (ps-cg ps)
- (lambda ()
- (cg-push-imm (ps-cg ps) %t-i32 1))
- (lambda ()
- (parse-expr-bp ps rb) (rval! ps)
- (cg-cast (ps-cg ps) %t-bool)
- (cg-cast (ps-cg ps) %t-i32))))
- (else
- (rval! ps) (cg-promote (ps-cg ps))
- (parse-expr-bp ps rb) (rval! ps)
- (cg-promote (ps-cg ps))
- (cg-arith-conv (ps-cg ps))
- (cg-binop (ps-cg ps) (punct-to-cgop op))))
- (parse-binary-rhs ps mn)))))))))
-
-(define (parse-unary ps)
- (pmatch (peek ps)
- (($ tok? (kind PUNCT) (value amp))
- (advance ps) (parse-unary ps)
- (cg-take-addr (ps-cg ps)))
- (($ tok? (kind PUNCT) (value star))
- (advance ps) (parse-unary ps) (rval! ps)
- (cg-push-deref (ps-cg ps)))
- (($ tok? (kind PUNCT) (value plus))
- (advance ps) (parse-unary ps)
- (rval! ps) (cg-promote (ps-cg ps)))
- (($ tok? (kind PUNCT) (value minus))
- (advance ps) (parse-unary ps)
- (rval! ps) (cg-promote (ps-cg ps))
- (cg-unop (ps-cg ps) 'neg))
- (($ tok? (kind PUNCT) (value tilde))
- (advance ps) (parse-unary ps)
- (rval! ps) (cg-promote (ps-cg ps))
- (cg-unop (ps-cg ps) 'bnot))
- (($ tok? (kind PUNCT) (value bang))
- (advance ps) (parse-unary ps) (rval! ps)
- (cg-unop (ps-cg ps) 'lnot))
- (($ tok? (kind PUNCT) (value inc))
- (advance ps) (parse-unary ps)
- (cg-dup (ps-cg ps))
- (cg-load (ps-cg ps))
- (cg-push-imm (ps-cg ps) %t-i32 1)
- (cg-binop (ps-cg ps) 'add) (cg-assign (ps-cg ps)))
- (($ tok? (kind PUNCT) (value dec))
- (advance ps) (parse-unary ps)
- (cg-dup (ps-cg ps))
- (cg-load (ps-cg ps))
- (cg-push-imm (ps-cg ps) %t-i32 1)
- (cg-binop (ps-cg ps) 'sub) (cg-assign (ps-cg ps)))
- (($ tok? (kind PUNCT) (value lparen)) (parse-cast-or-unary ps))
- (($ tok? (kind KW) (value sizeof))
- (advance ps)
- (cond
- ((at-punct? ps 'lparen)
- (advance ps)
- (cond
- ((token-is-decl? ps)
- (let* ((sp (parse-decl-spec ps))
- (p (parse-declarator ps (cdr sp)))
- (ty (cdr p)))
- (expect-punct ps 'rparen)
- (cg-push-imm (ps-cg ps) %t-u64
- (max (ctype-size ty) 0))))
- (else
- (parse-expr ps) (expect-punct ps 'rparen)
- (let* ((tp (cg-top (ps-cg ps)))
- (sz (max (ctype-size (opnd-type tp)) 0)))
- (cg-pop (ps-cg ps))
- (cg-push-imm (ps-cg ps) %t-u64 sz)))))
- (else (parse-unary ps)
- (let* ((tp (cg-top (ps-cg ps)))
- (sz (max (ctype-size (opnd-type tp)) 0)))
- (cg-pop (ps-cg ps))
- (cg-push-imm (ps-cg ps) %t-u64 sz)))))
- (else (parse-postfix ps))))
-
-(define (token-is-decl? ps)
- (pmatch (peek ps)
- (($ tok? (kind KW) (value ,v))
- (or (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int)
- (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned)
- (eq? v '_Bool) (eq? v 'struct) (eq? v 'union)
- (eq? v 'enum) (eq? v 'const) (eq? v 'volatile)
- (eq? v 'restrict) (eq? v 'inline)))
- (($ tok? (kind IDENT) (value ,n)) (typedef? ps n))
- (else #f)))
-
-(define (parse-cast-or-unary ps)
- (pmatch (peek2 ps)
- (($ tok? (kind KW) (value ,v))
- (guard (or (eq? v 'void) (eq? v 'char) (eq? v 'short)
- (eq? v 'int) (eq? v 'long) (eq? v 'signed)
- (eq? v 'unsigned) (eq? v '_Bool)
- (eq? v 'struct) (eq? v 'union) (eq? v 'enum)
- (eq? v 'const) (eq? v 'volatile)
- (eq? v 'restrict)))
- (advance ps)
- (let* ((sp (parse-decl-spec ps))
- (p (parse-declarator ps (cdr sp)))
- (ty (cdr p)))
- (expect-punct ps 'rparen)
- (parse-unary ps)
- ;; Cast operand undergoes lvalue conversion first (C semantics):
- ;; arrays decay to pointers, lvals become rvals. cg-cast then
- ;; bit-casts the resulting rval to the target type.
- (rval! ps)
- (cg-cast (ps-cg ps) ty)))
- (($ tok? (kind IDENT) (value ,n))
- (guard (typedef? ps n))
- (advance ps)
- (let* ((sp (parse-decl-spec ps))
- (p (parse-declarator ps (cdr sp)))
- (ty (cdr p)))
- (expect-punct ps 'rparen)
- (parse-unary ps)
- (rval! ps)
- (cg-cast (ps-cg ps) ty)))
- (else (advance ps) (parse-expr ps)
- (expect-punct ps 'rparen)
- (parse-postfix-rest ps))))
-
-(define (parse-postfix ps)
- (parse-primary ps) (parse-postfix-rest ps))
-
-(define (parse-postfix-rest ps)
- (let lp ()
- (pmatch (peek ps)
- (($ tok? (kind PUNCT) (value lbrack))
- (advance ps) (rval! ps)
- (parse-expr ps) (rval! ps)
- (expect-punct ps 'rbrack)
- (cg-binop (ps-cg ps) 'add)
- (cg-push-deref (ps-cg ps)) (lp))
- (($ tok? (kind PUNCT) (value lparen))
- (advance ps) (rval-not-fn! ps)
- (let* ((fn-ty (call-fn-type (ps-cg ps)))
- (n (parse-call-args ps fn-ty))
- ;; has-result? = #f for known void returns. Skips the
- ;; wasted ST a0 → frame-slot spill that cg-call would
- ;; otherwise emit for void calls.
- (has-result?
- (cond
- ((not fn-ty) #t)
- ((eq? (ctype-kind (car (ctype-ext fn-ty))) 'void) #f)
- (else #t))))
- (expect-punct ps 'rparen)
- (cg-call (ps-cg ps) n has-result?)
- ;; Maintain parse's "one rval per expression" invariant so
- ;; comma / parse-expr-stmt / for-init/step pop sites stay
- ;; simple. The placeholder is vstack-only and never
- ;; materialized (cg-pop is a vstack op, no emit).
- (cond ((not has-result?)
- (cg-push-imm (ps-cg ps) %t-i32 0)))
- (lp)))
- (($ tok? (kind PUNCT) (value dot))
- (advance ps)
- (pmatch (advance ps)
- (($ tok? (kind IDENT) (value ,n))
- (cg-push-field (ps-cg ps) n) (lp))
- (($ tok? (loc ,l)) (die l "expected field name"))))
- (($ tok? (kind PUNCT) (value arrow))
- (advance ps)
- (pmatch (advance ps)
- (($ tok? (kind IDENT) (value ,n))
- ;; ptr -> field: load the pointer to rval, deref to reach
- ;; the struct lval, then push the field.
- (rval! ps)
- (cg-push-deref (ps-cg ps))
- (cg-push-field (ps-cg ps) n) (lp))
- (($ tok? (loc ,l)) (die l "expected field name"))))
- (($ tok? (kind PUNCT) (value inc))
- (advance ps)
- (cg-postinc (ps-cg ps)) (lp))
- (($ tok? (kind PUNCT) (value dec))
- (advance ps)
- (cg-postdec (ps-cg ps)) (lp))
- (else #t))))
-
-;; call-fn-type cg -> ctype-or-#f
-;; The function operand sits at the top of the vstack when
-;; parse-call-args runs (just after rval-not-fn!). Its type may be
-;; `fn` directly (named callee) or `ptr -> fn` (function pointer).
-;; Returns the underlying `fn` ctype, or #f if the operand isn't
-;; recognizably callable (callsite still works — no per-arg cast).
-(define (call-fn-type cg)
- (let* ((tp (cg-top cg)))
- (cond
- ((not tp) #f)
- (else
- (let* ((ty (opnd-type tp))
- (k (ctype-kind ty)))
- (cond
- ((eq? k 'fn) ty)
- ((eq? k 'ptr)
- (let ((pe (ctype-ext ty)))
- (cond ((and pe (eq? (ctype-kind pe) 'fn)) pe)
- (else #f))))
- (else #f)))))))
-
-;; param-types-of fn-ty -> (params variadic?) with a #f fallback.
-(define (call-fn-param-info fn-ty)
- (cond
- ((not fn-ty) (cons '() #f))
- (else
- (let ((ext (ctype-ext fn-ty)))
- (cons (cadr ext) (car (cddr ext)))))))
-
-;; parse-call-args ps fn-ty -> arg-count
-;; Casts each fixed arg to the declared param type (CC.md §K.5).
-;; For variadic args (index >= named-arg count, when variadic? = #t)
-;; applies cg-promote (CC.md §G.1).
-(define (parse-call-args ps fn-ty)
- (cond
- ((at-punct? ps 'rparen) 0)
- (else
- (let* ((info (call-fn-param-info fn-ty))
- (params (car info))
- (var? (cdr info))
- (nfix (length params)))
- (let lp ((n 0) (rem params))
- (parse-expr-bp ps 4) (rval! ps)
- (cond
- ;; Fixed-arg: cast to declared param type. param entry shape
- ;; is (name . ctype) per cg-fn-begin's contract.
- ((not (null? rem))
- (cg-cast (ps-cg ps) (cdr (car rem))))
- ;; Variadic position (n >= nfix and var? is true): promote.
- (var?
- (cg-promote (ps-cg ps))))
- (let ((m (+ n 1))
- (rest (if (null? rem) '() (cdr rem))))
- (cond ((at-punct? ps 'comma) (advance ps) (lp m rest))
- (else m))))))))
-
-;; --------------------------------------------------------------------
-;; __builtin_va_* (§G.2). va_list / va_start / va_arg / va_end in
-;; <stdarg.h> alias these. Each is parsed as: name '(' args ')'.
-;; va_start(ap, last) — last is parsed and discarded; cg only needs
-;; the variadic-first-slot offset, which it already tracks.
-;; va_arg(ap, T) — T is a type-name; result rval has that type.
-;; va_end(ap) — no-op codegen; just consumes ap.
-;;
-;; Pushes a single imm 0 for va_start / va_end so they fit as
-;; expression statements; va_arg pushes the rval.
-;; --------------------------------------------------------------------
-(define (parse-builtin-va-start ps)
- (advance ps) ; IDENT
- (expect-punct ps 'lparen)
- (parse-expr-bp ps 4) ; ap (must be lval)
- (expect-punct ps 'comma)
- ;; "last" is parsed for syntactic completeness then dropped — cg
- ;; doesn't need it; the variadic-first-slot was determined at
- ;; cg-fn-begin/v time.
- (parse-expr-bp ps 4) (cg-pop (ps-cg ps))
- (expect-punct ps 'rparen)
- (cg-va-start (ps-cg ps))
- ;; Push a placeholder rval so the call expression has a value
- ;; (matches va_start's "void" but our parser expects all
- ;; expressions to leave one rval).
- (cg-push-imm (ps-cg ps) %t-i32 0))
-
-(define (parse-builtin-va-arg ps)
- (advance ps) ; IDENT
- (expect-punct ps 'lparen)
- (parse-expr-bp ps 4) ; ap (lval)
- (expect-punct ps 'comma)
- (let* ((sp (parse-decl-spec ps))
- (p (parse-declarator ps (cdr sp)))
- (ty (cdr p)))
- (expect-punct ps 'rparen)
- (cg-va-arg (ps-cg ps) ty)))
-
-(define (parse-builtin-va-end ps)
- (advance ps) ; IDENT
- (expect-punct ps 'lparen)
- (parse-expr-bp ps 4) ; ap
- (expect-punct ps 'rparen)
- (cg-va-end (ps-cg ps))
- (cg-push-imm (ps-cg ps) %t-i32 0))
-
-(define (parse-primary ps)
- (let ((t (peek ps)))
- (pmatch t
- (($ tok? (kind INT) (value ,n))
- (advance ps)
- (cg-push-imm (ps-cg ps) %t-i32 n))
- (($ tok? (kind CHAR) (value ,c))
- (advance ps)
- (cg-push-imm (ps-cg ps) %t-i8 c))
- (($ tok? (kind STR) (value ,s))
- (advance ps)
- (cg-push-string (ps-cg ps) s))
- (($ tok? (kind IDENT) (value ,n))
- (cond
- ((bv= n "__builtin_va_start") (parse-builtin-va-start ps))
- ((bv= n "__builtin_va_arg") (parse-builtin-va-arg ps))
- ((bv= n "__builtin_va_end") (parse-builtin-va-end ps))
- (else
- (let ((sm (scope-lookup ps n)))
- (advance ps)
- (cond
- ((not sm) (die (tok-loc t) "undecl" n))
- ((eq? (sym-kind sm) 'enum-const)
- (cg-push-imm (ps-cg ps) %t-i32 (sym-slot sm)))
- (else (cg-push-sym (ps-cg ps) sm)))))))
- (($ tok? (kind PUNCT) (value lparen))
- (advance ps) (parse-expr ps) (expect-punct ps 'rparen))
- (else (die (tok-loc t) "unexp" (tok-value t))))))
-
-(define (rval! ps)
- (let ((tp (cg-top (ps-cg ps))))
- (cond ((and tp (opnd? tp) (opnd-lval? tp))
- (cg-load (ps-cg ps)))
- (else #t))))
-
-(define (rval-not-fn! ps)
- (let ((tp (cg-top (ps-cg ps))))
- (cond ((and tp (opnd? tp) (opnd-lval? tp)
- (not (ctype-is-fn? (opnd-type tp))))
- (cg-load (ps-cg ps)))
- (else #t))))
diff --git a/cc/pp.scm b/cc/pp.scm
@@ -1,805 +0,0 @@
-;; cc/pp.scm — token list -> expanded token list.
-;; Realizes docs/CC-INTERNALS.md §pp.scm. Hide-set per C11 6.10.3.4.
-;; #include rejected (CC.md §Toolchain envelope).
-
-;; --- helpers (TODO: promote to util.scm if shared more broadly) ---
-(define (%pp-bv-mem? x xs)
- (cond ((null? xs) #f)
- ((bv= x (car xs)) #t)
- (else (%pp-bv-mem? x (cdr xs)))))
-
-(define (%pp-bv-union a b)
- (cond ((null? a) b)
- ((%pp-bv-mem? (car a) b) (%pp-bv-union (cdr a) b))
- (else (cons (car a) (%pp-bv-union (cdr a) b)))))
-
-(define (%pp-with-hide t hide)
- (%tok (tok-kind t) (tok-value t) (tok-loc t) hide))
-(define (%pp-with-loc t loc)
- (%tok (tok-kind t) (tok-value t) loc (tok-hide t)))
-
-;; --- pp-state (private record) ---
-;; cond-stack: list of (active? . has-taken?). Outer-active gating is
-;; computed by walking the stack rather than encoding it in frames.
-(define-record-type pp-state
- (%pp-state macros cond-stack cur-file line-delta)
- pp-state?
- (macros pps-macros pps-macros-set!)
- (cond-stack pps-cond-stack pps-cond-stack-set!)
- (cur-file pps-cur-file pps-cur-file-set!)
- (line-delta pps-line-delta pps-line-delta-set!))
-
-(define (%pp-make-state defs) (%pp-state defs '() #f 0))
-
-(define (%pp-active? state)
- (let loop ((xs (pps-cond-stack state)))
- (cond ((null? xs) #t)
- ((not (car (car xs))) #f)
- (else (loop (cdr xs))))))
-
-;; Active for the *parent* of the top frame (used by elif/else).
-(define (%pp-parent-active? state)
- (let ((cs (pps-cond-stack state)))
- (cond ((null? cs) #t)
- (else
- (let loop ((xs (cdr cs)))
- (cond ((null? xs) #t)
- ((not (car (car xs))) #f)
- (else (loop (cdr xs)))))))))
-
-;; --- token classification ---
-(define (%pp-eof? t) (eq? (tok-kind t) 'EOF))
-(define (%pp-nl? t) (eq? (tok-kind t) 'NL))
-(define (%pp-hash? t) (eq? (tok-kind t) 'HASH))
-(define (%pp-ident? t) (eq? (tok-kind t) 'IDENT))
-(define (%pp-int? t) (eq? (tok-kind t) 'INT))
-(define (%pp-punct? t pname)
- (and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) pname)))
-(define (%pp-ident-name? t name-bv)
- (and (%pp-ident? t) (bv= (tok-value t) name-bv)))
-(define (%pp-skip-ws toks) toks)
-
-;; --- built-in macro names ---
-(define %pp-bv-FILE "__FILE__")
-(define %pp-bv-LINE "__LINE__")
-(define %pp-bv-STDC "__STDC__")
-(define %pp-bv-LISPCC "__LISPCC__")
-(define %pp-bv-DATE "__DATE__")
-(define %pp-bv-TIME "__TIME__")
-(define %pp-bv-STDC-VERSION "__STDC_VERSION__")
-(define %pp-bv-STDC-HOSTED "__STDC_HOSTED__")
-(define %pp-bv-VA-ARGS "__VA_ARGS__")
-(define %pp-bv-defined "defined")
-
-;; Fixed values for reproducibility — we don't read the wall clock.
-(define %pp-bv-DATE-VALUE "Jan 1 1970")
-(define %pp-bv-TIME-VALUE "00:00:00")
-
-(define (%pp-builtin? name)
- (or (bv= name %pp-bv-FILE) (bv= name %pp-bv-LINE)
- (bv= name %pp-bv-STDC) (bv= name %pp-bv-LISPCC)
- (bv= name %pp-bv-DATE) (bv= name %pp-bv-TIME)
- (bv= name %pp-bv-STDC-VERSION) (bv= name %pp-bv-STDC-HOSTED)))
-
-(define (%pp-expand-builtin name loc state)
- (let* ((file (or (pps-cur-file state) (loc-file loc)))
- (line (+ (loc-line loc) (pps-line-delta state)))
- (col (loc-col loc))
- (here (%loc file line col)))
- (cond
- ((bv= name %pp-bv-FILE) (list (%tok 'STR file here '())))
- ((bv= name %pp-bv-LINE) (list (%tok 'INT line here '())))
- ((bv= name %pp-bv-STDC) (list (%tok 'INT 1 here '())))
- ((bv= name %pp-bv-LISPCC) (list (%tok 'INT 1 here '())))
- ((bv= name %pp-bv-DATE) (list (%tok 'STR %pp-bv-DATE-VALUE here '())))
- ((bv= name %pp-bv-TIME) (list (%tok 'STR %pp-bv-TIME-VALUE here '())))
- ((bv= name %pp-bv-STDC-VERSION) (list (%tok 'INT 199901 here '())))
- ((bv= name %pp-bv-STDC-HOSTED) (list (%tok 'INT 1 here '())))
- (else (die loc "internal: not a builtin" name)))))
-
-;; --- buf-list: simple reversed-list buffer of toks ---
-(define-record-type buf-list
- (%buf-list xs)
- buf-list?
- (xs buf-list-xs buf-list-xs-set!))
-(define (make-buf-list) (%buf-list '()))
-(define (buf-list-push! b t) (buf-list-xs-set! b (cons t (buf-list-xs b))))
-(define (buf-list-push-many! b ts)
- (let loop ((ts ts))
- (cond ((null? ts) #t)
- (else (buf-list-push! b (car ts)) (loop (cdr ts))))))
-(define (buf-list-flush b) (reverse (buf-list-xs b)))
-
-;; Translation phase 6: concatenate adjacent string literals. The merged
-;; token keeps the loc and hide-set of the first; values are byte-appended.
-(define (%pp-merge-adjacent-strs toks)
- (let loop ((toks toks) (acc '()))
- (cond
- ((null? toks) (reverse acc))
- ((and (not (null? acc))
- (eq? (tok-kind (car toks)) 'STR)
- (eq? (tok-kind (car acc)) 'STR))
- (let* ((prev (car acc))
- (cur (car toks))
- (merged (%tok 'STR
- (bytevector-append (tok-value prev) (tok-value cur))
- (tok-loc prev)
- (tok-hide prev))))
- (loop (cdr toks) (cons merged (cdr acc)))))
- (else (loop (cdr toks) (cons (car toks) acc))))))
-
-;; --- pp-expand: top-level driver ---
-(define (pp-expand toks initial-defines)
- (let ((state (%pp-make-state initial-defines))
- (out (make-buf-list)))
- (let loop ((toks toks))
- (cond
- ((null? toks) (die #f "pp-expand: missing EOF token"))
- ((%pp-eof? (car toks))
- (cond ((not (null? (pps-cond-stack state)))
- (die (tok-loc (car toks)) "unterminated #if/#ifdef/#ifndef"))
- (else
- (buf-list-push! out (car toks))
- (%pp-merge-adjacent-strs (buf-list-flush out)))))
- ((%pp-nl? (car toks)) (loop (cdr toks)))
- ((%pp-hash? (car toks))
- (let* ((lr (%pp-take-line (cdr toks)))
- (line (car lr)) (rest (cdr lr)))
- (%pp-dispatch-directive (car toks) line state out)
- (loop rest)))
- (else
- (let* ((lr (%pp-take-line toks))
- (line (car lr)) (rest (cdr lr)))
- (cond ((%pp-active? state)
- (%pp-emit-expanded line state out))
- (else #t))
- (loop rest)))))))
-
-;; Take tokens up to (not including) the next NL or EOF. NL is consumed;
-;; EOF is left in the stream so the driver sees it next.
-(define (%pp-take-line toks)
- (let loop ((toks toks) (acc '()))
- (cond
- ((null? toks) (cons (reverse acc) toks))
- ((%pp-eof? (car toks)) (cons (reverse acc) toks))
- ((%pp-nl? (car toks)) (cons (reverse acc) (cdr toks)))
- (else (loop (cdr toks) (cons (car toks) acc))))))
-
-;; --- directive dispatch ---
-;; pmatch-based on the directive name bv. bv literals match by equal?.
-;; Directive name can arrive as IDENT (most cases) or KW (`if` and `else`
-;; are C keywords promoted by lex; their KW symbol values map back to bv
-;; via symbol->string).
-(define (%pp-directive-name t)
- (cond ((eq? (tok-kind t) 'IDENT) (tok-value t))
- ((eq? (tok-kind t) 'KW) (symbol->string (tok-value t)))
- (else #f)))
-
-(define (%pp-dispatch-directive hash-tok line state out)
- (let ((line (%pp-skip-ws line)))
- (cond
- ((null? line) #t) ; bare `#` line — null directive
- ((%pp-directive-name (car line))
- (let ((name (%pp-directive-name (car line)))
- (rest (cdr line))
- (loc (tok-loc (car line))))
- (pmatch name
- ("define" (cond ((%pp-active? state) (%pp-do-define rest state)) (else #t)))
- ("undef" (cond ((%pp-active? state) (%pp-do-undef rest state)) (else #t)))
- ("if" (%pp-do-if rest state))
- ("ifdef" (%pp-do-ifdef rest state))
- ("ifndef" (%pp-do-ifndef rest state))
- ("elif" (%pp-do-elif rest state))
- ("else" (%pp-do-else rest state))
- ("endif" (%pp-do-endif rest state))
- ("error" (cond ((%pp-active? state)
- (%pp-do-error (cons (car line) rest) state))
- (else #t)))
- ("line" (cond ((%pp-active? state) (%pp-do-line rest state)) (else #t)))
- ("pragma" (cond ((%pp-active? state) (%pp-do-pragma rest state)) (else #t)))
- ("include" (cond ((%pp-active? state) (%pp-do-include rest state)) (else #t)))
- (else (die loc "unknown preprocessor directive" name)))))
- (else
- (die (tok-loc (car line)) "expected directive name after '#'"
- (tok-kind (car line)))))))
-
-;; --- #define ---
-;; function-like vs object-like is decided by an immediately-adjacent `(`.
-;; "Adjacent" = column of `(` equals column of name + length of name.
-(define (%pp-do-define line state)
- (cond
- ((null? line) (die #f "#define requires a macro name"))
- ((not (%pp-ident? (car line)))
- (die (tok-loc (car line)) "#define: expected identifier"))
- (else
- (let* ((nt (car line)) (name (tok-value nt)) (rest (cdr line)))
- (cond
- ((and (not (null? rest))
- (%pp-punct? (car rest) 'lparen)
- (= (loc-col (tok-loc (car rest)))
- (+ (loc-col (tok-loc nt))
- (bytevector-length name))))
- (%pp-define-fn name (cdr rest) (tok-loc nt) state))
- (else
- (let ((m (%macro 'obj '() rest)))
- (pps-macros-set! state (alist-set name m (pps-macros state))))))))))
-
-(define (%pp-define-fn name post-lparen nloc state)
- (let loop ((toks post-lparen) (params '()) (variadic? #f))
- (cond
- ((null? toks) (die nloc "#define: unterminated parameter list"))
- ((%pp-punct? (car toks) 'rparen)
- (let* ((body (cdr toks))
- (kind (if variadic? 'fn-vararg 'fn))
- (m (%macro kind (reverse params) body)))
- (pps-macros-set! state (alist-set name m (pps-macros state)))))
- ((%pp-punct? (car toks) 'ellipsis)
- (let ((rest (cdr toks)))
- (cond
- ((null? rest) (die (tok-loc (car toks)) "#define: '...' must precede ')'"))
- ((%pp-punct? (car rest) 'rparen) (loop rest params #t))
- (else (die (tok-loc (car rest)) "#define: garbage after '...'")))))
- ((null? params)
- (cond
- ((%pp-ident? (car toks))
- (loop (cdr toks) (cons (tok-value (car toks)) params) #f))
- (else (die (tok-loc (car toks)) "#define: expected parameter name"))))
- (else
- (cond
- ((%pp-punct? (car toks) 'comma)
- (let ((after (cdr toks)))
- (cond
- ((null? after) (die (tok-loc (car toks)) "#define: trailing ','"))
- ((%pp-punct? (car after) 'ellipsis)
- (let ((aa (cdr after)))
- (cond
- ((and (not (null? aa)) (%pp-punct? (car aa) 'rparen))
- (loop aa params #t))
- (else (die (tok-loc (car after))
- "#define: '...' must precede ')'")))))
- ((%pp-ident? (car after))
- (loop (cdr after) (cons (tok-value (car after)) params) #f))
- (else
- (die (tok-loc (car after))
- "#define: expected parameter name after ','")))))
- (else (die (tok-loc (car toks))
- "#define: expected ',' or ')' in parameter list")))))))
-
-;; --- #undef ---
-(define (%pp-do-undef line state)
- (cond
- ((null? line) (die #f "#undef requires a macro name"))
- ((not (%pp-ident? (car line)))
- (die (tok-loc (car line)) "#undef: expected identifier"))
- (else
- (pps-macros-set! state
- (%pp-alist-drop (tok-value (car line)) (pps-macros state))))))
-
-(define (%pp-alist-drop key al)
- (cond ((null? al) '())
- ((bv= (car (car al)) key) (cdr al))
- (else (cons (car al) (%pp-alist-drop key (cdr al))))))
-
-;; --- #if / #ifdef / #ifndef / #elif / #else / #endif ---
-(define (%pp-do-if line state)
- (cond
- ((not (%pp-active? state))
- (pps-cond-stack-set! state (cons (cons #f #f) (pps-cond-stack state))))
- (else
- (let* ((v (pp-eval-cexpr line (pps-macros state)))
- (a? (not (= v 0))))
- (pps-cond-stack-set! state (cons (cons a? a?) (pps-cond-stack state)))))))
-
-(define (%pp-do-ifdef line state)
- (cond
- ((not (%pp-active? state))
- (pps-cond-stack-set! state (cons (cons #f #f) (pps-cond-stack state))))
- (else
- (let ((d? (%pp-defined? (%pp-name-of-single line) state)))
- (pps-cond-stack-set! state
- (cons (cons d? d?) (pps-cond-stack state)))))))
-
-(define (%pp-do-ifndef line state)
- (cond
- ((not (%pp-active? state))
- (pps-cond-stack-set! state (cons (cons #f #f) (pps-cond-stack state))))
- (else
- (let ((a? (not (%pp-defined? (%pp-name-of-single line) state))))
- (pps-cond-stack-set! state
- (cons (cons a? a?) (pps-cond-stack state)))))))
-
-(define (%pp-name-of-single line)
- (cond
- ((null? line) (die #f "#ifdef/#ifndef: missing identifier"))
- ((not (%pp-ident? (car line)))
- (die (tok-loc (car line)) "#ifdef/#ifndef: expected identifier"))
- (else (tok-value (car line)))))
-
-(define (%pp-defined? name state)
- (or (alist-ref name (pps-macros state))
- (%pp-builtin? name)
- #f))
-
-(define (%pp-do-elif line state)
- (let ((cs (pps-cond-stack state)))
- (cond
- ((null? cs) (die #f "#elif outside #if"))
- (else
- (let* ((top (car cs)) (rest (cdr cs))
- (taken? (cdr top))
- (par? (%pp-parent-active? state)))
- (cond
- ((or (not par?) taken?)
- (pps-cond-stack-set! state (cons (cons #f taken?) rest)))
- (else
- (let* ((v (pp-eval-cexpr line (pps-macros state)))
- (a? (not (= v 0))))
- (pps-cond-stack-set! state
- (cons (cons a? (or a? taken?)) rest))))))))))
-
-(define (%pp-do-else line state)
- (let ((cs (pps-cond-stack state)))
- (cond
- ((null? cs) (die #f "#else outside #if"))
- (else
- (let* ((top (car cs)) (rest (cdr cs))
- (taken? (cdr top))
- (par? (%pp-parent-active? state)))
- (cond
- ((not par?)
- (pps-cond-stack-set! state (cons (cons #f taken?) rest)))
- (taken?
- (pps-cond-stack-set! state (cons (cons #f #t) rest)))
- (else
- (pps-cond-stack-set! state (cons (cons #t #t) rest)))))))))
-
-(define (%pp-do-endif line state)
- (let ((cs (pps-cond-stack state)))
- (cond ((null? cs) (die #f "#endif outside #if"))
- (else (pps-cond-stack-set! state (cdr cs))))))
-
-;; --- #error ---
-;; line[0] is the directive name "error"; the rest is the user message.
-(define (%pp-do-error line state)
- (let* ((msg-toks (if (null? line) '() (cdr line)))
- (loc (if (null? line) #f (tok-loc (car line))))
- (msg (%pp-toks->display msg-toks)))
- (die loc "#error" msg)))
-
-(define (%pp-toks->display toks)
- (let loop ((toks toks) (acc '()) (first? #t))
- (cond
- ((null? toks) (bv-cat (reverse acc)))
- (else
- (let ((p (%pp-tok->bv (car toks))))
- (loop (cdr toks)
- (if first? (cons p acc) (cons p (cons " " acc)))
- #f))))))
-
-(define (%pp-tok->bv t)
- (let ((k (tok-kind t)) (v (tok-value t)))
- (cond
- ((eq? k 'IDENT) v)
- ((eq? k 'INT) (fixnum->bv v 10))
- ((eq? k 'STR) (%pp-quote-bytes v 34))
- ((eq? k 'CHAR) (%pp-quote-bytes (bv-of-byte v) 39))
- ((eq? k 'KW) (symbol->string v))
- ((eq? k 'PUNCT) (symbol->string v))
- (else "?"))))
-
-;; Reconstruct a string/char literal source spelling from cooked content.
-;; Per C11 6.10.3.2: insert `\` before each `"` and `\` (or `'` for char).
-;; `delim` is 34 for STR, 39 for CHAR.
-(define (%pp-quote-bytes bv delim)
- (let* ((n (bytevector-length bv))
- (delim-bv (bv-of-byte delim)))
- (let loop ((i 0) (acc (list delim-bv)))
- (cond
- ((= i n) (bv-cat (reverse (cons delim-bv acc))))
- (else
- (let ((b (bytevector-u8-ref bv i)))
- (cond
- ((or (= b delim) (= b 92))
- (loop (+ i 1) (cons (bv-of-byte b) (cons "\\" acc))))
- (else
- (loop (+ i 1) (cons (bv-of-byte b) acc))))))))))
-
-;; --- #line / #pragma / #include ---
-;; Approximate #line: subsequent toks have line = (orig-line + delta),
-;; where delta = (N - here-line - 1). Good enough for most cases.
-(define (%pp-do-line line state)
- (cond
- ((null? line) (die #f "#line requires a line number"))
- ((not (%pp-int? (car line)))
- (die (tok-loc (car line)) "#line: expected integer"))
- (else
- (let* ((nt (car line)) (n (tok-value nt))
- (rest (cdr line))
- (here (loc-line (tok-loc nt))))
- (pps-line-delta-set! state (- n here 1))
- (cond
- ((null? rest) #t)
- ((eq? (tok-kind (car rest)) 'STR)
- (pps-cur-file-set! state (tok-value (car rest))))
- (else (die (tok-loc (car rest))
- "#line: expected string after number")))))))
-
-(define (%pp-do-pragma line state) #t)
-
-(define (%pp-do-include line state)
- (die (if (null? line) #f (tok-loc (car line)))
- "#include: file inclusion is handled upstream by pre-flatten"))
-
-;; --- macro expansion engine ---
-;; Walk toks; for each IDENT, look up in macros / builtins. Hide-set:
-;; if the name is in t.hide, don't expand. Otherwise expand and rescan
-;; the produced body, with hide += {name}.
-(define (%pp-emit-expanded toks state out)
- (let loop ((toks toks))
- (cond
- ((null? toks) #t)
- (else
- (let* ((t (car toks)) (rest (cdr toks)))
- (cond
- ((not (%pp-ident? t))
- (buf-list-push! out (%pp-relocate t state))
- (loop rest))
- (else
- (let ((name (tok-value t)))
- (cond
- ((%pp-bv-mem? name (tok-hide t))
- (buf-list-push! out (%pp-relocate t state))
- (loop rest))
- ((%pp-builtin? name)
- (buf-list-push-many! out
- (%pp-expand-builtin name (tok-loc t) state))
- (loop rest))
- (else
- (let ((m (alist-ref name (pps-macros state))))
- (cond
- ((not m)
- (buf-list-push! out (%pp-relocate t state))
- (loop rest))
- (else
- (%pp-apply-macro t m rest state out loop))))))))))))))
-
-(define (%pp-apply-macro t m rest state out cont)
- (let ((kind (macro-kind m)) (name (tok-value t)))
- (cond
- ((eq? kind 'obj)
- (let ((bodies (%pp-prepare-body (macro-body m)
- (cons name (tok-hide t)))))
- (%pp-emit-expanded bodies state out)
- (cont rest)))
- (else
- (let ((after (%pp-skip-ws rest)))
- (cond
- ((or (null? after) (not (%pp-punct? (car after) 'lparen)))
- (buf-list-push! out (%pp-relocate t state))
- (cont rest))
- (else
- (let* ((ar (%pp-collect-args (cdr after) (tok-loc t)))
- (args (car ar)) (rest2 (cdr ar))
- (params (macro-params m))
- (variadic? (eq? kind 'fn-vararg))
- (env (%pp-bind-args params args variadic? (tok-loc t)))
- (sub (%pp-substitute (macro-body m) env (tok-loc t)))
- (bodies (%pp-prepare-body sub
- (cons name (tok-hide t)))))
- (%pp-emit-expanded bodies state out)
- (cont rest2)))))))))
-
-(define (%pp-prepare-body body extra-hide)
- (map (lambda (t)
- (%pp-with-hide t (%pp-bv-union extra-hide (tok-hide t))))
- body))
-
-;; Collect comma-separated args. `toks` starts AFTER `(`. Returns
-;; (args . rest), where args is a list of token-lists.
-(define (%pp-collect-args toks call-loc)
- (let loop ((toks toks) (depth 0) (cur '()) (args '()))
- (cond
- ((null? toks) (die call-loc "macro call: unterminated argument list"))
- ((%pp-eof? (car toks))
- (die call-loc "macro call: unterminated argument list"))
- ((and (= depth 0) (%pp-punct? (car toks) 'rparen))
- (let ((args*
- (cond
- ;; Empty parens count as one empty argument; bind-args
- ;; degenerates this back to "no args" for 0-param macros.
- ((and (null? args) (null? cur)) (list '()))
- (else (reverse (cons (reverse cur) args))))))
- (cons args* (cdr toks))))
- ((and (= depth 0) (%pp-punct? (car toks) 'comma))
- (loop (cdr toks) 0 '() (cons (reverse cur) args)))
- ((%pp-punct? (car toks) 'lparen)
- (loop (cdr toks) (+ depth 1) (cons (car toks) cur) args))
- ((%pp-punct? (car toks) 'rparen)
- (loop (cdr toks) (- depth 1) (cons (car toks) cur) args))
- (else
- (loop (cdr toks) depth (cons (car toks) cur) args)))))
-
-;; Bind formals → token-lists (alist by bv key). Variadic gathers
-;; trailing actuals into __VA_ARGS__, joined with synthetic commas.
-(define (%pp-bind-args params args variadic? call-loc)
- (let* ((np (length params)) (na (length args)))
- (cond
- (variadic?
- (cond
- ((< na np) (die call-loc "macro call: too few arguments"))
- (else
- (let loop ((ps params) (as args) (acc '()))
- (cond
- ((null? ps)
- (alist-set %pp-bv-VA-ARGS (%pp-join-comma as) acc))
- (else
- (loop (cdr ps) (cdr as)
- (alist-set (car ps) (car as) acc))))))))
- (else
- (cond
- ((and (= np 0) (= na 1) (null? (car args))) '())
- ((not (= np na)) (die call-loc "macro call: argument count mismatch"))
- (else
- (let loop ((ps params) (as args) (acc '()))
- (cond
- ((null? ps) acc)
- (else (loop (cdr ps) (cdr as)
- (alist-set (car ps) (car as) acc)))))))))))
-
-(define (%pp-join-comma argss)
- (cond
- ((null? argss) '())
- ((null? (cdr argss)) (car argss))
- (else
- (append (car argss)
- (cons (%pp-synth-comma) (%pp-join-comma (cdr argss)))))))
-
-(define (%pp-synth-comma)
- (%tok 'PUNCT 'comma (%loc "<expand>" 0 0) '()))
-
-;; Body substitution: walk body; replace param IDENTs with arg toks,
-;; handle `#param` (stringize) and `a##b` (paste). For v1 we do not
-;; pre-expand args before substitution; the rescan after substitution
-;; catches the same expansions in practice.
-(define (%pp-substitute body env call-loc)
- (let loop ((body body) (out '()))
- (cond
- ((null? body) (reverse out))
- (else
- (let ((t (car body)) (rest (cdr body)))
- (cond
- ((%pp-punct? t 'hash)
- (cond
- ((or (null? rest) (not (%pp-ident? (car rest))))
- (die (tok-loc t) "stringize: '#' must precede a parameter name"))
- (else
- (let* ((id (car rest)) (pn (tok-value id))
- (pt (alist-ref pn env)))
- (cond
- ((not pt)
- (die (tok-loc id) "stringize: '#' operand must be a parameter" pn))
- (else
- (let ((s (%tok 'STR (%pp-toks->display pt) (tok-loc t) '())))
- (loop (cdr rest) (cons s out)))))))))
- ((%pp-punct? t 'paste)
- (cond
- ((null? out) (die (tok-loc t) "paste: '##' cannot start a body"))
- ((null? rest) (die (tok-loc t) "paste: '##' cannot end a body"))
- (else
- (let* ((lhs (car out))
- (rt (car rest))
- (rhs-list
- (cond
- ((and (%pp-ident? rt) (alist-ref (tok-value rt) env))
- (alist-ref (tok-value rt) env))
- (else (list rt)))))
- (cond
- ((null? rhs-list) (loop (cdr rest) out))
- (else
- (let* ((p (%pp-paste-tokens lhs (car rhs-list)))
- (after (append (cdr rhs-list) (cdr rest))))
- (loop after (cons p (cdr out))))))))))
- ((%pp-ident? t)
- (let* ((pn (tok-value t)) (pt (alist-ref pn env)))
- (cond
- ((not pt) (loop rest (cons t out)))
- ((and (not (null? rest)) (%pp-punct? (car rest) 'paste))
- (cond
- ((null? pt) (loop (cdr rest) out))
- (else (loop rest (append (reverse pt) out)))))
- (else (loop rest (append (reverse pt) out))))))
- (else (loop rest (cons t out)))))))))
-
-;; Paste two tokens textually; reparse the result.
-(define (%pp-paste-tokens lhs rhs)
- (let ((lk (tok-kind lhs)) (rk (tok-kind rhs)))
- (cond
- ((and (eq? lk 'IDENT) (eq? rk 'IDENT))
- (%tok 'IDENT (bytevector-append (tok-value lhs) (tok-value rhs))
- (tok-loc lhs) (%pp-bv-union (tok-hide lhs) (tok-hide rhs))))
- ((and (eq? lk 'IDENT) (eq? rk 'INT))
- (%tok 'IDENT (bytevector-append (tok-value lhs) (fixnum->bv (tok-value rhs) 10))
- (tok-loc lhs) (%pp-bv-union (tok-hide lhs) (tok-hide rhs))))
- ((and (eq? lk 'INT) (eq? rk 'INT))
- (let* ((s (bytevector-append (fixnum->bv (tok-value lhs) 10)
- (fixnum->bv (tok-value rhs) 10)))
- (pr (bv->fixnum s 10)))
- (cond
- ((not (car pr)) (die (tok-loc lhs) "paste: cannot reparse as integer" s))
- (else (%tok 'INT (cdr pr) (tok-loc lhs)
- (%pp-bv-union (tok-hide lhs) (tok-hide rhs)))))))
- (else (die (tok-loc lhs) "paste: unsupported token kinds" lk rk)))))
-
-(define (%pp-relocate t state)
- (cond
- ((and (= (pps-line-delta state) 0) (not (pps-cur-file state))) t)
- (else
- (let* ((l (tok-loc t))
- (f (or (pps-cur-file state) (loc-file l)))
- (ln (+ (loc-line l) (pps-line-delta state)))
- (c (loc-col l)))
- (%pp-with-loc t (%loc f ln c))))))
-
-;; --- pp-eval-cexpr: #if expression evaluator ---
-;; Steps: resolve `defined NAME`, macro-expand the rest, treat any
-;; remaining IDENT as 0, parse with recursive descent.
-;;
-;; Arena boundary (test_93 A→B→C pattern). Everything between the mark
-;; and the rewind is scratch: `s1`/`s2`/`s3` (each a fresh token list,
-;; where `s2` runs the full macro-expansion engine), plus the recursive
-;; parser's (val . rest) cons cell at every level. The result is a
-;; fixnum, so no pre-allocated out cell is needed — `val` survives the
-;; rewind by virtue of being an immediate. The error path goes through
-;; `die` (which sys-exits), so no rewind there.
-(define (pp-eval-cexpr toks macros)
- (let ((mark (heap-mark)))
- (let* ((state (%pp-state macros '() #f 0))
- (s1 (%pp-resolve-defined toks state))
- (s2 (%pp-expand-line s1 state))
- (s3 (%pp-idents-as-zero s2)))
- (let* ((p (%pp-cx-expr s3))
- (val (car p)) (rest (cdr p)))
- (cond
- ((null? rest) (heap-rewind! mark) val)
- (else (die (tok-loc (car rest)) "#if: garbage at end of expression"
- (tok-kind (car rest)))))))))
-
-(define (%pp-expand-line toks state)
- (let ((out (make-buf-list)))
- (%pp-emit-expanded toks state out)
- (buf-list-flush out)))
-
-(define (%pp-resolve-defined toks state)
- (let loop ((toks toks) (acc '()))
- (cond
- ((null? toks) (reverse acc))
- ((%pp-ident-name? (car toks) %pp-bv-defined)
- (let ((rest (cdr toks)))
- (cond
- ((null? rest) (die (tok-loc (car toks)) "defined: missing operand"))
- ((%pp-ident? (car rest))
- (let ((v (if (%pp-defined? (tok-value (car rest)) state) 1 0)))
- (loop (cdr rest)
- (cons (%tok 'INT v (tok-loc (car toks)) '()) acc))))
- ((%pp-punct? (car rest) 'lparen)
- (let ((after (cdr rest)))
- (cond
- ((or (null? after) (not (%pp-ident? (car after))))
- (die (tok-loc (car toks)) "defined: expected identifier"))
- (else
- (let ((aa (cdr after)))
- (cond
- ((or (null? aa) (not (%pp-punct? (car aa) 'rparen)))
- (die (tok-loc (car toks)) "defined: expected ')'"))
- (else
- (let ((v (if (%pp-defined? (tok-value (car after)) state) 1 0)))
- (loop (cdr aa)
- (cons (%tok 'INT v (tok-loc (car toks)) '()) acc)))))))) ))
- (else (die (tok-loc (car rest)) "defined: expected identifier or '('")))))
- (else (loop (cdr toks) (cons (car toks) acc))))))
-
-(define (%pp-idents-as-zero toks)
- (map (lambda (t)
- (cond ((%pp-ident? t) (%tok 'INT 0 (tok-loc t) '()))
- (else t)))
- toks))
-
-;; --- recursive-descent #if expression parser ---
-;; Returns (value . rest).
-(define (%pp-cx-expr toks) (%pp-cx-cond toks))
-
-(define (%pp-cx-cond toks)
- (let* ((p (%pp-cx-lor toks))
- (v (car p)) (rest (cdr p)))
- (cond
- ((and (not (null? rest)) (%pp-punct? (car rest) 'qmark))
- (let* ((p2 (%pp-cx-expr (cdr rest)))
- (vt (car p2)) (after (cdr p2)))
- (cond
- ((or (null? after) (not (%pp-punct? (car after) 'colon)))
- (die (if (null? after) #f (tok-loc (car after))) "?: missing ':'"))
- (else
- (let* ((p3 (%pp-cx-cond (cdr after)))
- (vf (car p3)) (rest3 (cdr p3)))
- (cons (if (not (= v 0)) vt vf) rest3))))))
- (else (cons v rest)))))
-
-(define (%pp-cx-binl next ops toks)
- (let loop ((p (next toks)))
- (let ((v (car p)) (rest (cdr p)))
- (cond
- ((null? rest) p)
- (else
- (let* ((tt (car rest))
- (hit (and (eq? (tok-kind tt) 'PUNCT)
- (alist-ref/eq (tok-value tt) ops))))
- (cond
- ((not hit) p)
- (else
- (let* ((p2 (next (cdr rest)))
- (v2 (car p2)) (rest2 (cdr p2)))
- (loop (cons (hit v v2) rest2)))))))))))
-
-(define (%pp-cx-lor toks)
- (%pp-cx-binl %pp-cx-land
- (list (cons 'lor (lambda (a b) (if (or (not (= a 0)) (not (= b 0))) 1 0))))
- toks))
-(define (%pp-cx-land toks)
- (%pp-cx-binl %pp-cx-bor
- (list (cons 'land (lambda (a b) (if (and (not (= a 0)) (not (= b 0))) 1 0))))
- toks))
-(define (%pp-cx-bor toks) (%pp-cx-binl %pp-cx-bxor (list (cons 'bar bit-or)) toks))
-(define (%pp-cx-bxor toks) (%pp-cx-binl %pp-cx-band (list (cons 'caret bit-xor)) toks))
-(define (%pp-cx-band toks) (%pp-cx-binl %pp-cx-eq (list (cons 'amp bit-and)) toks))
-(define (%pp-cx-eq toks)
- (%pp-cx-binl %pp-cx-rel
- (list (cons 'eq2 (lambda (a b) (if (= a b) 1 0)))
- (cons 'ne (lambda (a b) (if (= a b) 0 1))))
- toks))
-(define (%pp-cx-rel toks)
- (%pp-cx-binl %pp-cx-shift
- (list (cons 'lt (lambda (a b) (if (< a b) 1 0)))
- (cons 'le (lambda (a b) (if (<= a b) 1 0)))
- (cons 'gt (lambda (a b) (if (> a b) 1 0)))
- (cons 'ge (lambda (a b) (if (>= a b) 1 0))))
- toks))
-(define (%pp-cx-shift toks)
- (%pp-cx-binl %pp-cx-add
- (list (cons 'shl (lambda (a b) (arithmetic-shift a b)))
- (cons 'shr (lambda (a b) (arithmetic-shift a (- 0 b)))))
- toks))
-(define (%pp-cx-add toks)
- (%pp-cx-binl %pp-cx-mul (list (cons 'plus +) (cons 'minus -)) toks))
-(define (%pp-cx-mul toks)
- (%pp-cx-binl %pp-cx-unary
- (list (cons 'star *) (cons 'slash quotient) (cons 'pct remainder))
- toks))
-
-(define (%pp-cx-unary toks)
- (cond
- ((null? toks) (die #f "#if: unexpected end of expression"))
- ((%pp-punct? (car toks) 'plus) (%pp-cx-unary (cdr toks)))
- ((%pp-punct? (car toks) 'minus)
- (let* ((p (%pp-cx-unary (cdr toks))) (v (car p)) (r (cdr p)))
- (cons (- 0 v) r)))
- ((%pp-punct? (car toks) 'bang)
- (let* ((p (%pp-cx-unary (cdr toks))) (v (car p)) (r (cdr p)))
- (cons (if (= v 0) 1 0) r)))
- ((%pp-punct? (car toks) 'tilde)
- (let* ((p (%pp-cx-unary (cdr toks))) (v (car p)) (r (cdr p)))
- (cons (bit-not v) r)))
- (else (%pp-cx-primary toks))))
-
-(define (%pp-cx-primary toks)
- (cond
- ((null? toks) (die #f "#if: expected expression"))
- ((%pp-int? (car toks)) (cons (tok-value (car toks)) (cdr toks)))
- ((eq? (tok-kind (car toks)) 'CHAR)
- (cons (tok-value (car toks)) (cdr toks)))
- ((%pp-punct? (car toks) 'lparen)
- (let* ((p (%pp-cx-expr (cdr toks))) (v (car p)) (r (cdr p)))
- (cond
- ((or (null? r) (not (%pp-punct? (car r) 'rparen)))
- (die (if (null? r) #f (tok-loc (car r))) "#if: missing ')'"))
- (else (cons v (cdr r))))))
- (else (die (tok-loc (car toks)) "#if: unexpected token" (tok-kind (car toks))))))
diff --git a/cc/util.scm b/cc/util.scm
@@ -1,286 +0,0 @@
-;; cc/util.scm — leaf helpers. Depends only on the scheme1 prelude.
-;;
-;; Realization of docs/CC-INTERNALS.md §util.scm. Engineers may add
-;; helpers here freely; the listed signatures are the load-bearing
-;; surface other modules call.
-
-;; --------------------------------------------------------------------
-;; bytevector helpers (scheme1 strings ARE bytevectors)
-;; --------------------------------------------------------------------
-(define (bv= a b) (bytevector=? a b))
-
-(define (bv-prefix? p s)
- ;; Is s a bv that starts with the bytes of p?
- (let ((plen (bytevector-length p))
- (slen (bytevector-length s)))
- (if (< slen plen)
- #f
- (let loop ((i 0))
- (cond ((= i plen) #t)
- ((= (bytevector-u8-ref p i) (bytevector-u8-ref s i))
- (loop (+ i 1)))
- (else #f))))))
-
-(define (bv-find bv byte from)
- ;; Linear scan for the first byte == `byte` at index >= from.
- ;; Returns the index, or #f if not found.
- (let ((n (bytevector-length bv)))
- (let loop ((i from))
- (cond ((>= i n) #f)
- ((= (bytevector-u8-ref bv i) byte) i)
- (else (loop (+ i 1)))))))
-
-(define (bv-slice bv start end)
- ;; Fresh copy of bytes in [start, end). bytevector-copy already does
- ;; this in scheme1 (3-arg form returns a fresh bv).
- (bytevector-copy bv start end))
-
-(define (bv-of-byte b) (make-bytevector 1 b))
-
-(define (bv-cat lst-of-bv)
- ;; Concat a list of bytevectors with one allocation. bytevector-append
- ;; is variadic, so apply does this in a single linear pass.
- (apply bytevector-append lst-of-bv))
-
-(define (bv->fixnum bv radix)
- ;; (ok . val) per CC-INTERNALS: (#t . val) on parse, (#f . #f) on fail.
- ;; string->number is pure and returns #f on parse failure (not the
- ;; (ok . val) convention, since it's not a syscall).
- (let ((n (string->number bv radix)))
- (if n (cons #t n) (cons #f #f))))
-
-(define (fixnum->bv n radix) (number->string n radix))
-
-;; --------------------------------------------------------------------
-;; lists / alists
-;; --------------------------------------------------------------------
-(define (alist-ref key al)
- ;; equal? compare (intended for bv keys). The prelude's `assoc` uses
- ;; eq?, so we roll our own for the equal? case.
- (cond ((null? al) #f)
- ((equal? (car (car al)) key) (cdr (car al)))
- (else (alist-ref key (cdr al)))))
-
-(define (alist-ref/eq key al)
- ;; eq? compare (for symbol keys). Reuses the prelude's assoc, which
- ;; is eq?-based.
- (let ((p (assoc key al)))
- (if p (cdr p) #f)))
-
-(define (alist-set key val al) (cons (cons key val) al))
-
-(define (alist-update key f al)
- ;; Functional update by equal? key. If found, replace its value with
- ;; (f old-val). If not found, prepend (cons key (f #f)) so callers
- ;; can use this as upsert-with-default.
- (let loop ((xs al) (acc '()))
- (cond ((null? xs)
- (cons (cons key (f #f)) (reverse acc)))
- ((equal? (car (car xs)) key)
- (append (reverse acc)
- (cons (cons key (f (cdr (car xs))))
- (cdr xs))))
- (else (loop (cdr xs) (cons (car xs) acc))))))
-
-(define (any p xs)
- (cond ((null? xs) #f)
- ((p (car xs)) #t)
- (else (any p (cdr xs)))))
-
-(define (every p xs)
- (cond ((null? xs) #t)
- ((p (car xs)) (every p (cdr xs)))
- (else #f)))
-
-(define (count p xs)
- (let loop ((xs xs) (n 0))
- (cond ((null? xs) n)
- ((p (car xs)) (loop (cdr xs) (+ n 1)))
- (else (loop (cdr xs) n)))))
-
-;; --------------------------------------------------------------------
-;; ints
-;; --------------------------------------------------------------------
-(define (min3 a b c) (min a (min b c)))
-(define (align-up n k)
- ;; round n up to the nearest multiple of k (k must be a power of 2)
- (let ((mask (- k 1)))
- (bit-and (+ n mask) (bit-not mask))))
-
-;; --------------------------------------------------------------------
-;; output buffer (fixed-size pre-allocated byte storage)
-;;
-;; Every buf owns one bytevector of `cap` bytes, plus a write `offset`.
-;; buf-push! is bytevector-copy! into storage — zero allocation per
-;; push, no chunks list to chase. This is what makes per-function
-;; heap-mark/heap-rewind! safe in cg: the destination buf is fixed-
-;; storage (allocated once, lives pre-mark), so byte-level mutations
-;; survive a rewind that discards the parse/cg scratch.
-;;
-;; Sizing knobs live in one place so they're easy to tune as inputs
-;; grow. cg-init picks per-buf caps; the per-fn bufs are reused
-;; across functions (reset, not re-allocated).
-;; --------------------------------------------------------------------
-
-;; Tuning constants — total fixed pre-allocation ≈ 12.27 MiB on a
-;; 64 MiB heap. Bump these when a workload overflows; the buf-overflow
-;; die() reports off/len/cap so misses are easy to diagnose.
-;;
-;; Each cap is a power of two. scheme1's bv_capacity_for rounds the
-;; requested length up to the smallest power of two ≥ n, so asking for
-;; 2^k bytes consumes exactly 2^k of heap.
-(define %BUF-CAP-TEXT 8388608) ; 8 MiB: .text + entry stub
-(define %BUF-CAP-DATA 2097152) ; 2 MiB: .data (strings, globals)
-(define %BUF-CAP-BSS 2097152) ; 2 MiB: .bss
-(define %BUF-CAP-FN 262144) ; 256 KiB: per-fn body asm
-(define %BUF-CAP-PROLOGUE 16384) ; 16 KiB: per-fn prologue
-(define %BUF-CAP-DEFAULT 65536) ; 64 KiB: make-buf fallback
-
-(define-record-type buf
- (%buf storage offset cap)
- buf?
- (storage buf-storage) ; bv: pre-allocated, never resized
- (offset buf-offset buf-offset-set!) ; fixnum: bytes written so far
- (cap buf-cap)) ; fixnum: storage capacity
-
-(define (make-buf/cap cap)
- (%buf (make-bytevector cap 0) 0 cap))
-
-(define (make-buf) (make-buf/cap %BUF-CAP-DEFAULT))
-
-(define (buf-push! b bv)
- (let* ((n (bytevector-length bv))
- (off (buf-offset b))
- (newoff (+ off n)))
- (cond
- ((> newoff (buf-cap b))
- (die #f "buf overflow" off n (buf-cap b))))
- (bytevector-copy! (buf-storage b) off bv 0 n)
- (buf-offset-set! b newoff)))
-
-(define (buf-flush b)
- ;; Snapshot the used prefix as a fresh bv. One allocation; the
- ;; underlying storage is unchanged.
- (bytevector-copy (buf-storage b) 0 (buf-offset b)))
-
-(define (buf-reset! b) (buf-offset-set! b 0))
-
-(define (buf-drain! dst src)
- ;; Copy src's used bytes into dst at dst's current write head; reset
- ;; src to empty. dst and src must be distinct bufs.
- (let* ((slen (buf-offset src))
- (doff (buf-offset dst))
- (newoff (+ doff slen)))
- (cond
- ((> newoff (buf-cap dst))
- (die #f "buf-drain overflow" doff slen (buf-cap dst))))
- (bytevector-copy! (buf-storage dst) doff (buf-storage src) 0 slen)
- (buf-offset-set! dst newoff)
- (buf-offset-set! src 0)))
-
-;; --------------------------------------------------------------------
-;; diagnostics + I/O
-;; --------------------------------------------------------------------
-(define (die loc msg . irritants)
- ;; Format per CC-CONTRACTS §2.3:
- ;; <file>:<line>:<col>: error: <msg>: <irritant> <irritant> ...
- ;; When loc is #f, the "<file>:<line>:<col>: " prefix is omitted.
- ;; irritants are written via display semantics (no quoting); format's
- ;; ~a handles bv/fixnum/pair/symbol the same way display does.
- ;;
- ;; All output is built into a single bv and sent to fd 2 with one
- ;; sys-write loop, so a partial write doesn't interleave fragments
- ;; from a concurrent process.
- (let* ((prefix (if loc
- (format "~a:~d:~d: error: "
- (loc-file loc) (loc-line loc) (loc-col loc))
- "error: "))
- (head (bytevector-append prefix (format "~a" msg)))
- ;; Irritants get ": " before the first and " " between the rest.
- (tail (if (null? irritants)
- (list NL-BV)
- (let walk ((xs irritants) (sep ": ") (acc '()))
- (if (null? xs)
- (reverse (cons NL-BV acc))
- (walk (cdr xs)
- " "
- (cons (format "~a" (car xs))
- (cons sep acc)))))))
- (out (bv-cat (cons head tail))))
- (write-bv-fd 2 out)
- (sys-exit 1)))
-
-(define (slurp-fd fd)
- ;; Read fd to EOF. Uses BUFSIZE chunks (same constant the prelude's
- ;; port layer uses); bv-concat-reverse builds the result in one
- ;; allocation so a multi-MB tcc.c stays linear.
- (let ((buf (make-bytevector BUFSIZE)))
- (let loop ((acc '()))
- (let ((r (sys-read fd buf 0 BUFSIZE)))
- (cond ((not (car r))
- (die #f "slurp-fd: sys-read failed" (cdr r)))
- ((zero? (cdr r))
- (bv-concat-reverse acc))
- (else
- (loop (cons (bytevector-copy buf 0 (cdr r)) acc))))))))
-
-(define (write-bv-fd fd bv)
- ;; Full write or die. sys-write may write fewer bytes than requested;
- ;; advance the offset and retry the unwritten tail.
- ;;
- ;; On failure we sys-exit directly instead of routing through `die`
- ;; — `die` itself uses write-bv-fd, so a write failure to fd 2 must
- ;; not recurse infinitely. Status 1 matches the contract for `die`.
- (let ((len (bytevector-length bv)))
- (let loop ((off 0))
- (if (= off len)
- #t
- (let ((r (sys-write fd bv off (- len off))))
- (cond ((not (car r)) (sys-exit 1))
- ((zero? (cdr r)) (sys-exit 1))
- (else (loop (+ off (cdr r))))))))))
-
-;; --------------------------------------------------------------------
-;; debug logging
-;;
-;; Cheap sticky on/off: the cc compiler is single-threaded and short-
-;; lived, so a top-level mutable flag is fine. Toggle via
-;; (debug-log-on!) / (debug-log-off!). When on, (debug-log msg . irr)
-;; writes one line to fd 2 in the same display-style format as `die`,
-;; but doesn't abort. The intent is to trace heap usage between cc
-;; phases (lex/pp/parse/cg-finish) without compile-time conditionals.
-;; --------------------------------------------------------------------
-(define %debug-log-enabled #f)
-(define (debug-log-on!) (set! %debug-log-enabled #t))
-(define (debug-log-off!) (set! %debug-log-enabled #f))
-(define (debug-log? ) %debug-log-enabled)
-
-(define (debug-log msg . irritants)
- (cond
- (%debug-log-enabled
- (let* ((head (bytevector-append "[cc] " (format "~a" msg)))
- (tail (if (null? irritants)
- (list NL-BV)
- (let walk ((xs irritants) (sep ": ") (acc '()))
- (if (null? xs)
- (reverse (cons NL-BV acc))
- (walk (cdr xs)
- " "
- (cons (format "~a" (car xs))
- (cons sep acc)))))))
- (out (bv-cat (cons head tail))))
- (write-bv-fd 2 out)))
- (else #t)))
-
-;; --------------------------------------------------------------------
-;; fresh-name generator (used for cg label counters, etc.)
-;; --------------------------------------------------------------------
-(define (make-namer prefix)
- ;; Returns a thunk; each call yields prefix0, prefix1, ... as a fresh
- ;; bv. The counter lives in the closure's lexical environment; scheme1
- ;; closures heap-capture by reference, so set! on ctr is sticky.
- (let ((ctr 0))
- (lambda ()
- (let ((s (bytevector-append prefix (number->string ctr 10))))
- (set! ctr (+ ctr 1))
- s))))
diff --git a/docs/CC-INTERNALS.md b/docs/CC-INTERNALS.md
@@ -4,16 +4,18 @@ Companion to [CC.md](CC.md). CC.md says what we accept; this doc says
how the implementation is organized so engineers can split work and
test independently.
-The compiler is one scheme1 program assembled from six files at build
-time:
+The compiler lives in one file, `cc/cc.scm`, that bundles every
+module section in order; `cc/main.scm` is the one-liner entry point
+that fires `cc-main`:
```
-build: catm cc/util.scm cc/data.scm cc/lex.scm cc/pp.scm cc/cg.scm cc/parse.scm cc/main.scm > cc/cc.scm
-run: scheme1 < cc/cc.scm -- input.flat.c output.P1pp
+build: catm build/$ARCH/cc/cc.scm scheme1/prelude.scm cc/cc.scm cc/main.scm
+run: scheme1 build/$ARCH/cc/cc.scm input.flat.c output.P1pp
```
-(Driver shell-script details — argv plumbing, scheme1 prelude prepend
-— belong in scripts/, not in this doc.)
+Sections inside `cc.scm` are delimited by their original
+`;; cc/<name>.scm —` headers (util, data, lex, pp, cg, parse, main),
+so the layering below is still navigable by search.
## Module DAG
@@ -25,16 +27,16 @@ data ────────┼─► lex ──► pp ──► parse
└──────────────────────► cg ──► main
```
-- **util.scm** — leaf helpers; depends only on the scheme1 prelude.
-- **data.scm** — record-type definitions used across modules.
-- **lex.scm** — bytestream → token list. Pure function.
-- **pp.scm** — token list → expanded token list. Pure function.
-- **cg.scm** — codegen state and emission API. Mutates a `cg` record.
-- **parse.scm** — token list + cg → P1pp output. Mutates `pstate` and
+- **util** — leaf helpers; depends only on the scheme1 prelude.
+- **data** — record-type definitions used across modules.
+- **lex** — bytestream → token list. Pure function.
+- **pp** — token list → expanded token list. Pure function.
+- **cg** — codegen state and emission API. Mutates a `cg` record.
+- **parse** — token list + cg → P1pp output. Mutates `pstate` and
drives `cg`.
-- **main.scm** — argv handling, file I/O, ties phases together.
+- **main** — argv handling, file I/O, ties phases together.
-Cycles are forbidden. parse.scm calls cg.scm but never the reverse.
+Cycles are forbidden. The parse section calls cg but never the reverse.
## Feature workflow
@@ -46,12 +48,14 @@ When adding any new codegen-touching feature, **always** in this order:
`.expected-exit` (default `0`) and optional `.expected` stdout file.
At this point the test fails — that's the spec.
2. **Implement the cg primitive(s).** Add or fix the API surface in
- `cc/cg.scm`. Iterate until `make test SUITE=cc-cg` passes.
+ the cg section of `cc/cc.scm`. Iterate until `make test SUITE=cc-cg`
+ passes.
3. **cc fixture next.** Write a `tests/cc/<n>-name.c` that
exercises the same feature from the C source side, with a `main`
that exits with a known value. Add `.expected-exit` / `.expected`.
Test fails — that's the spec for the parser.
-4. **Implement the parse changes.** Wire C syntax through `cc/parse.scm`
+4. **Implement the parse changes.** Wire C syntax through the parse
+ section of `cc/cc.scm`
using the cg primitives from step 2. Iterate until
`make test SUITE=cc` passes.
diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh
@@ -248,7 +248,7 @@ _cc_unit_suite() {
}
run_cc_util_suite() {
- _cc_unit_suite cc-util expected "scheme1/prelude.scm cc/util.scm"
+ _cc_unit_suite cc-util expected "scheme1/prelude.scm cc/cc.scm"
}
# _cc_pipeline_suite <suite-name> <expected-ext> <layers>
@@ -289,7 +289,7 @@ _cc_pipeline_suite() {
run_cc_lex_suite() {
_cc_pipeline_suite cc-lex expected-toks \
- "scheme1/prelude.scm cc/util.scm cc/data.scm cc/lex.scm tests/cc-lex/_run-lex.scm"
+ "scheme1/prelude.scm cc/cc.scm tests/cc-lex/_run-lex.scm"
}
# Two passes: .c fixtures via the lex+pp pipeline; the lone .scm fixture
@@ -299,10 +299,10 @@ run_cc_lex_suite() {
run_cc_pp_suite() {
saved=$NAMES
_cc_pipeline_suite cc-pp expected-toks \
- "scheme1/prelude.scm cc/util.scm cc/data.scm cc/lex.scm cc/pp.scm tests/cc-pp/_run-pp.scm"
+ "scheme1/prelude.scm cc/cc.scm tests/cc-pp/_run-pp.scm"
NAMES=$saved
_cc_unit_suite cc-pp expected \
- "scheme1/prelude.scm cc/util.scm cc/data.scm cc/pp.scm"
+ "scheme1/prelude.scm cc/cc.scm"
}
# _cc_runtime_suite <suite-name> <fixture-ext> <layers> [<fixture-as-arg?>]
@@ -368,7 +368,7 @@ _cc_runtime_suite() {
run_cc_cg_suite() {
_cc_runtime_suite cc-cg scm \
- "scheme1/prelude.scm cc/util.scm cc/data.scm cc/cg.scm" 0
+ "scheme1/prelude.scm cc/cc.scm" 0
}
run_cc_suite() {