boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 8a3e64ac5a9f5c9857b856242a5def558f1411ec
parent 1d24d51e2956de403af4ceca808e744b0a807a06
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 27 Apr 2026 12:37:14 -0700

cc: combine files into 1

Diffstat:
MMakefile | 7+++----
Mcc/README.md | 17++++-------------
Acc/cc.scm | 5222+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dcc/cg.scm | 1246-------------------------------------------------------------------------------
Dcc/data.scm | 239-------------------------------------------------------------------------------
Dcc/lex.scm | 900-------------------------------------------------------------------------------
Mcc/main.scm | 75+++------------------------------------------------------------------------
Dcc/parse.scm | 1673-------------------------------------------------------------------------------
Dcc/pp.scm | 805-------------------------------------------------------------------------------
Dcc/util.scm | 286-------------------------------------------------------------------------------
Mdocs/CC-INTERNALS.md | 36++++++++++++++++++++----------------
Mscripts/boot-run-tests.sh | 10+++++-----
12 files changed, 5257 insertions(+), 5259 deletions(-)

diff --git a/Makefile b/Makefile @@ -86,6 +86,7 @@ CLOC_FILES := M1pp/M1pp.P1 \ P1/P1pp.P1pp \ $(foreach a,$(CLOC_ARCHES),P1/P1-$(a).M1) \ scheme1/scheme1.P1pp \ + cc/cc.scm \ $(foreach a,$(CLOC_ARCHES), \ $(foreach f,$(CLOC_SEED_BASES),vendor/seed/$(a)/$(f))) @@ -161,10 +162,8 @@ SCHEME1_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/scheme1) # Catm'd cc compiler source. Per-arch only because catm runs in the # per-arch container; the resulting .scm is identical across arches but # we keep it under build/$arch/ for naming consistency. -CC_SRCS := scheme1/prelude.scm cc/util.scm cc/data.scm cc/lex.scm \ - cc/pp.scm cc/cg.scm cc/parse.scm cc/main.scm +CC_SRCS := scheme1/prelude.scm cc/cc.scm cc/main.scm CC_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/cc/cc.scm) -CLOC_FILES += $(CC_SRCS) m1pp: $(OUT_DIR)/m1pp pokem: $(OUT_DIR)/pokem @@ -198,7 +197,7 @@ $(HELLO_BINS): build/%/hello: $(HELLO_SRC) $(P1_BUILD_DEPS) $(SCHEME1_BINS): build/%/scheme1: $(SCHEME1_SRC) $(P1PP_BUILD_DEPS) $(call PODMAN,$*) sh scripts/boot-build-p1pp.sh $(SCHEME1_SRC) $@ -# cc.scm: catm prelude + the six cc/*.scm files into one source the +# cc.scm: catm prelude + cc.scm + main.scm entry into one source the # scheme1 interpreter can run. Catm runs inside the per-arch container. $(CC_BINS): build/%/cc/cc.scm: $(CC_SRCS) build/%/.image build/%/tools/M0 mkdir -p $(@D) diff --git a/cc/README.md b/cc/README.md @@ -18,25 +18,16 @@ Read in this order: | File | Purpose | |------|---------| -| `util.scm` | Leaf helpers (bv, alist, buf, die). | -| `data.scm` | Records + symbol alphabets shared across modules. | -| `lex.scm` | Bytestream → token list. | -| `pp.scm` | Token list → expanded token list. | -| `cg.scm` | Codegen state + emission API. | -| `parse.scm` | Recursive-descent + Pratt; calls cg directly. | -| `main.scm` | argv handling + driver. | +| `cc.scm` | All compiler modules in one file: util, data, lex, pp, cg, parse, and the `cc-main` driver. Sections are still labeled with their original `;; cc/<name>.scm —` headers. | +| `main.scm` | One-liner production entry point: `(sys-exit (cc-main (argv)))`. Tests omit it so they can run their own drivers. | Build (single source for scheme1): ``` -catm cc/cc.scm \ - cc/util.scm cc/data.scm cc/lex.scm cc/pp.scm \ - cc/cg.scm cc/parse.scm cc/main.scm +catm build/$ARCH/cc/cc.scm \ + scheme1/prelude.scm cc/cc.scm cc/main.scm ``` -Run via the existing `boot-run-scheme1.sh` wrapper, which prepends -`scheme1/prelude.scm` ahead of the catm'd compiler source. - ## Status Modules filled in past the scaffold; parser drives all six cg sections diff --git a/cc/cc.scm b/cc/cc.scm @@ -0,0 +1,5222 @@ +;; cc/util.scm — leaf helpers. Depends only on the scheme1 prelude. +;; +;; Realization of docs/CC-INTERNALS.md §util.scm. Engineers may add +;; helpers here freely; the listed signatures are the load-bearing +;; surface other modules call. + +;; -------------------------------------------------------------------- +;; bytevector helpers (scheme1 strings ARE bytevectors) +;; -------------------------------------------------------------------- +(define (bv= a b) (bytevector=? a b)) + +(define (bv-prefix? p s) + ;; Is s a bv that starts with the bytes of p? + (let ((plen (bytevector-length p)) + (slen (bytevector-length s))) + (if (< slen plen) + #f + (let loop ((i 0)) + (cond ((= i plen) #t) + ((= (bytevector-u8-ref p i) (bytevector-u8-ref s i)) + (loop (+ i 1))) + (else #f)))))) + +(define (bv-find bv byte from) + ;; Linear scan for the first byte == `byte` at index >= from. + ;; Returns the index, or #f if not found. + (let ((n (bytevector-length bv))) + (let loop ((i from)) + (cond ((>= i n) #f) + ((= (bytevector-u8-ref bv i) byte) i) + (else (loop (+ i 1))))))) + +(define (bv-slice bv start end) + ;; Fresh copy of bytes in [start, end). bytevector-copy already does + ;; this in scheme1 (3-arg form returns a fresh bv). + (bytevector-copy bv start end)) + +(define (bv-of-byte b) (make-bytevector 1 b)) + +(define (bv-cat lst-of-bv) + ;; Concat a list of bytevectors with one allocation. bytevector-append + ;; is variadic, so apply does this in a single linear pass. + (apply bytevector-append lst-of-bv)) + +(define (bv->fixnum bv radix) + ;; (ok . val) per CC-INTERNALS: (#t . val) on parse, (#f . #f) on fail. + ;; string->number is pure and returns #f on parse failure (not the + ;; (ok . val) convention, since it's not a syscall). + (let ((n (string->number bv radix))) + (if n (cons #t n) (cons #f #f)))) + +(define (fixnum->bv n radix) (number->string n radix)) + +;; -------------------------------------------------------------------- +;; lists / alists +;; -------------------------------------------------------------------- +(define (alist-ref key al) + ;; equal? compare (intended for bv keys). The prelude's `assoc` uses + ;; eq?, so we roll our own for the equal? case. + (cond ((null? al) #f) + ((equal? (car (car al)) key) (cdr (car al))) + (else (alist-ref key (cdr al))))) + +(define (alist-ref/eq key al) + ;; eq? compare (for symbol keys). Reuses the prelude's assoc, which + ;; is eq?-based. + (let ((p (assoc key al))) + (if p (cdr p) #f))) + +(define (alist-set key val al) (cons (cons key val) al)) + +(define (alist-update key f al) + ;; Functional update by equal? key. If found, replace its value with + ;; (f old-val). If not found, prepend (cons key (f #f)) so callers + ;; can use this as upsert-with-default. + (let loop ((xs al) (acc '())) + (cond ((null? xs) + (cons (cons key (f #f)) (reverse acc))) + ((equal? (car (car xs)) key) + (append (reverse acc) + (cons (cons key (f (cdr (car xs)))) + (cdr xs)))) + (else (loop (cdr xs) (cons (car xs) acc)))))) + +(define (any p xs) + (cond ((null? xs) #f) + ((p (car xs)) #t) + (else (any p (cdr xs))))) + +(define (every p xs) + (cond ((null? xs) #t) + ((p (car xs)) (every p (cdr xs))) + (else #f))) + +(define (count p xs) + (let loop ((xs xs) (n 0)) + (cond ((null? xs) n) + ((p (car xs)) (loop (cdr xs) (+ n 1))) + (else (loop (cdr xs) n))))) + +;; -------------------------------------------------------------------- +;; ints +;; -------------------------------------------------------------------- +(define (min3 a b c) (min a (min b c))) +(define (align-up n k) + ;; round n up to the nearest multiple of k (k must be a power of 2) + (let ((mask (- k 1))) + (bit-and (+ n mask) (bit-not mask)))) + +;; -------------------------------------------------------------------- +;; output buffer (fixed-size pre-allocated byte storage) +;; +;; Every buf owns one bytevector of `cap` bytes, plus a write `offset`. +;; buf-push! is bytevector-copy! into storage — zero allocation per +;; push, no chunks list to chase. This is what makes per-function +;; heap-mark/heap-rewind! safe in cg: the destination buf is fixed- +;; storage (allocated once, lives pre-mark), so byte-level mutations +;; survive a rewind that discards the parse/cg scratch. +;; +;; Sizing knobs live in one place so they're easy to tune as inputs +;; grow. cg-init picks per-buf caps; the per-fn bufs are reused +;; across functions (reset, not re-allocated). +;; -------------------------------------------------------------------- + +;; Tuning constants — total fixed pre-allocation ≈ 12.27 MiB on a +;; 64 MiB heap. Bump these when a workload overflows; the buf-overflow +;; die() reports off/len/cap so misses are easy to diagnose. +;; +;; Each cap is a power of two. scheme1's bv_capacity_for rounds the +;; requested length up to the smallest power of two ≥ n, so asking for +;; 2^k bytes consumes exactly 2^k of heap. +(define %BUF-CAP-TEXT 8388608) ; 8 MiB: .text + entry stub +(define %BUF-CAP-DATA 2097152) ; 2 MiB: .data (strings, globals) +(define %BUF-CAP-BSS 2097152) ; 2 MiB: .bss +(define %BUF-CAP-FN 262144) ; 256 KiB: per-fn body asm +(define %BUF-CAP-PROLOGUE 16384) ; 16 KiB: per-fn prologue +(define %BUF-CAP-DEFAULT 65536) ; 64 KiB: make-buf fallback + +(define-record-type buf + (%buf storage offset cap) + buf? + (storage buf-storage) ; bv: pre-allocated, never resized + (offset buf-offset buf-offset-set!) ; fixnum: bytes written so far + (cap buf-cap)) ; fixnum: storage capacity + +(define (make-buf/cap cap) + (%buf (make-bytevector cap 0) 0 cap)) + +(define (make-buf) (make-buf/cap %BUF-CAP-DEFAULT)) + +(define (buf-push! b bv) + (let* ((n (bytevector-length bv)) + (off (buf-offset b)) + (newoff (+ off n))) + (cond + ((> newoff (buf-cap b)) + (die #f "buf overflow" off n (buf-cap b)))) + (bytevector-copy! (buf-storage b) off bv 0 n) + (buf-offset-set! b newoff))) + +(define (buf-flush b) + ;; Snapshot the used prefix as a fresh bv. One allocation; the + ;; underlying storage is unchanged. + (bytevector-copy (buf-storage b) 0 (buf-offset b))) + +(define (buf-reset! b) (buf-offset-set! b 0)) + +(define (buf-drain! dst src) + ;; Copy src's used bytes into dst at dst's current write head; reset + ;; src to empty. dst and src must be distinct bufs. + (let* ((slen (buf-offset src)) + (doff (buf-offset dst)) + (newoff (+ doff slen))) + (cond + ((> newoff (buf-cap dst)) + (die #f "buf-drain overflow" doff slen (buf-cap dst)))) + (bytevector-copy! (buf-storage dst) doff (buf-storage src) 0 slen) + (buf-offset-set! dst newoff) + (buf-offset-set! src 0))) + +;; -------------------------------------------------------------------- +;; diagnostics + I/O +;; -------------------------------------------------------------------- +(define (die loc msg . irritants) + ;; Format per CC-CONTRACTS §2.3: + ;; <file>:<line>:<col>: error: <msg>: <irritant> <irritant> ... + ;; When loc is #f, the "<file>:<line>:<col>: " prefix is omitted. + ;; irritants are written via display semantics (no quoting); format's + ;; ~a handles bv/fixnum/pair/symbol the same way display does. + ;; + ;; All output is built into a single bv and sent to fd 2 with one + ;; sys-write loop, so a partial write doesn't interleave fragments + ;; from a concurrent process. + (let* ((prefix (if loc + (format "~a:~d:~d: error: " + (loc-file loc) (loc-line loc) (loc-col loc)) + "error: ")) + (head (bytevector-append prefix (format "~a" msg))) + ;; Irritants get ": " before the first and " " between the rest. + (tail (if (null? irritants) + (list NL-BV) + (let walk ((xs irritants) (sep ": ") (acc '())) + (if (null? xs) + (reverse (cons NL-BV acc)) + (walk (cdr xs) + " " + (cons (format "~a" (car xs)) + (cons sep acc))))))) + (out (bv-cat (cons head tail)))) + (write-bv-fd 2 out) + (sys-exit 1))) + +(define (slurp-fd fd) + ;; Read fd to EOF. Uses BUFSIZE chunks (same constant the prelude's + ;; port layer uses); bv-concat-reverse builds the result in one + ;; allocation so a multi-MB tcc.c stays linear. + (let ((buf (make-bytevector BUFSIZE))) + (let loop ((acc '())) + (let ((r (sys-read fd buf 0 BUFSIZE))) + (cond ((not (car r)) + (die #f "slurp-fd: sys-read failed" (cdr r))) + ((zero? (cdr r)) + (bv-concat-reverse acc)) + (else + (loop (cons (bytevector-copy buf 0 (cdr r)) acc)))))))) + +(define (write-bv-fd fd bv) + ;; Full write or die. sys-write may write fewer bytes than requested; + ;; advance the offset and retry the unwritten tail. + ;; + ;; On failure we sys-exit directly instead of routing through `die` + ;; — `die` itself uses write-bv-fd, so a write failure to fd 2 must + ;; not recurse infinitely. Status 1 matches the contract for `die`. + (let ((len (bytevector-length bv))) + (let loop ((off 0)) + (if (= off len) + #t + (let ((r (sys-write fd bv off (- len off)))) + (cond ((not (car r)) (sys-exit 1)) + ((zero? (cdr r)) (sys-exit 1)) + (else (loop (+ off (cdr r)))))))))) + +;; -------------------------------------------------------------------- +;; debug logging +;; +;; Cheap sticky on/off: the cc compiler is single-threaded and short- +;; lived, so a top-level mutable flag is fine. Toggle via +;; (debug-log-on!) / (debug-log-off!). When on, (debug-log msg . irr) +;; writes one line to fd 2 in the same display-style format as `die`, +;; but doesn't abort. The intent is to trace heap usage between cc +;; phases (lex/pp/parse/cg-finish) without compile-time conditionals. +;; -------------------------------------------------------------------- +(define %debug-log-enabled #f) +(define (debug-log-on!) (set! %debug-log-enabled #t)) +(define (debug-log-off!) (set! %debug-log-enabled #f)) +(define (debug-log? ) %debug-log-enabled) + +(define (debug-log msg . irritants) + (cond + (%debug-log-enabled + (let* ((head (bytevector-append "[cc] " (format "~a" msg))) + (tail (if (null? irritants) + (list NL-BV) + (let walk ((xs irritants) (sep ": ") (acc '())) + (if (null? xs) + (reverse (cons NL-BV acc)) + (walk (cdr xs) + " " + (cons (format "~a" (car xs)) + (cons sep acc))))))) + (out (bv-cat (cons head tail)))) + (write-bv-fd 2 out))) + (else #t))) + +;; -------------------------------------------------------------------- +;; fresh-name generator (used for cg label counters, etc.) +;; -------------------------------------------------------------------- +(define (make-namer prefix) + ;; Returns a thunk; each call yields prefix0, prefix1, ... as a fresh + ;; bv. The counter lives in the closure's lexical environment; scheme1 + ;; closures heap-capture by reference, so set! on ctr is sticky. + (let ((ctr 0)) + (lambda () + (let ((s (bytevector-append prefix (number->string ctr 10)))) + (set! ctr (+ ctr 1)) + s)))) +;; cc/data.scm — record types and symbol alphabets shared across modules. +;; +;; Concrete realization of: +;; docs/CC-INTERNALS.md §data.scm +;; docs/CC-CONTRACTS.md §1 +;; +;; Adding a record or alphabet symbol requires updating the contract +;; doc first. + +;; -------------------------------------------------------------------- +;; loc — source location for diagnostics +;; -------------------------------------------------------------------- +(define-record-type loc + (%loc file line col) + loc? + (file loc-file) ; bv + (line loc-line) ; fixnum + (col loc-col)) ; fixnum + +;; -------------------------------------------------------------------- +;; tok — lexer token. See CC-CONTRACTS §1.1 for kind set, §1.2 for +;; PUNCT value symbols, §1.3 for KW value symbols. +;; -------------------------------------------------------------------- +(define-record-type tok + (%tok kind value loc hide) + tok? + (kind tok-kind) ; symbol from §1.1 + (value tok-value) ; bv | fixnum | symbol | #f + (loc tok-loc) ; loc + (hide tok-hide)) ; list of bv (macro names already expanded) + +(define (make-tok kind value loc) + (%tok kind value loc '())) + +;; -------------------------------------------------------------------- +;; macro — preprocessor macro definition +;; -------------------------------------------------------------------- +(define-record-type macro + (%macro kind params body) + macro? + (kind macro-kind) ; 'obj | 'fn | 'fn-vararg + (params macro-params) ; list of bv + (body macro-body)) ; list of tok + +;; -------------------------------------------------------------------- +;; ctype — C type. See CC-CONTRACTS §1.4 for kind set, and +;; CC-INTERNALS §data.scm for the ext payload table. +;; +;; Fields that mutate over a ctype's lifetime: +;; size and align — set to -1/-1 on forward struct/union decl, +;; fixed when the type is completed. +;; ext — same; struct/union ext changes shape when +;; the body is parsed. +;; -------------------------------------------------------------------- +(define-record-type ctype + (%ctype kind size align ext) + ctype? + (kind ctype-kind) + (size ctype-size ctype-size-set!) + (align ctype-align ctype-align-set!) + (ext ctype-ext ctype-ext-set!)) + +;; Interned primitive ctypes (CC-CONTRACTS §1.4). Equality is eq?. +(define %t-void (%ctype 'void -1 -1 #f)) +(define %t-i8 (%ctype 'i8 1 1 #f)) +(define %t-u8 (%ctype 'u8 1 1 #f)) +(define %t-i16 (%ctype 'i16 2 2 #f)) +(define %t-u16 (%ctype 'u16 2 2 #f)) +(define %t-i32 (%ctype 'i32 4 4 #f)) +(define %t-u32 (%ctype 'u32 4 4 #f)) +(define %t-i64 (%ctype 'i64 8 8 #f)) +(define %t-u64 (%ctype 'u64 8 8 #f)) +(define %t-bool (%ctype 'bool 1 1 #f)) + +;; -------------------------------------------------------------------- +;; sym — declared identifier (function, variable, typedef, …) +;; See CC-CONTRACTS §1.7 (kind), §1.8 (storage). +;; -------------------------------------------------------------------- +(define-record-type sym + (%sym name kind storage type slot) + sym? + (name sym-name) ; bv + (kind sym-kind) ; symbol from §1.7 + (storage sym-storage) ; symbol from §1.8 or #f + (type sym-type) ; ctype + (slot sym-slot)) ; fixnum | bv | #f, per kind + +;; -------------------------------------------------------------------- +;; opnd — operand on cg's vstack. See CC-CONTRACTS §1.5 (kind), +;; §1.10 (reg names). +;; -------------------------------------------------------------------- +(define-record-type opnd + (%opnd kind type ext lval?) + opnd? + (kind opnd-kind) + (type opnd-type) + (ext opnd-ext) + (lval? opnd-lval?)) + +;; -------------------------------------------------------------------- +;; loop-ctx — entry on parser's loop/switch context stack. +;; See CC-CONTRACTS §1.9. +;; -------------------------------------------------------------------- +(define-record-type loop-ctx + (%loop-ctx kind tag has-continue?) + loop-ctx? + (kind loop-ctx-kind) + (tag loop-ctx-tag) + (has-continue? loop-ctx-has-continue?)) + +;; -------------------------------------------------------------------- +;; fn-ctx — current-function context inside the parser. +;; -------------------------------------------------------------------- +(define-record-type fn-ctx + (%fn-ctx name return-type params variadic? labels) + fn-ctx? + (name fn-ctx-name) + (return-type fn-ctx-return-type) + (params fn-ctx-params) + (variadic? fn-ctx-variadic?) + (labels fn-ctx-labels fn-ctx-labels-set!)) + +;; -------------------------------------------------------------------- +;; pstate — parser state. Owned by parse.scm; read-only to cg. +;; -------------------------------------------------------------------- +(define-record-type pstate + (%pstate toks scope tags loops fn-ctx typedefs cg) + pstate? + (toks ps-toks ps-toks-set!) + (scope ps-scope ps-scope-set!) + (tags ps-tags ps-tags-set!) + (loops ps-loops ps-loops-set!) + (fn-ctx ps-fn-ctx ps-fn-ctx-set!) + (typedefs ps-typedefs ps-typedefs-set!) + (cg ps-cg)) + +;; -------------------------------------------------------------------- +;; cg — codegen state. Owned by cg.scm. +;; -------------------------------------------------------------------- +;; fn-buf and prologue-buf are pre-allocated (cg-init) and reused across +;; functions — cg-fn-begin/v calls buf-reset! on them, cg-fn-end drains +;; them into cg-text via buf-drain!. No per-fn allocation, which is what +;; lets parse-fn-body wrap the body in heap-mark/heap-rewind! safely: +;; the destination buf storage lives pre-mark, byte writes are stable +;; across rewind, and the parse/cg scratch dies cleanly. +;; +;; in-fn? discriminates "currently inside a function body" so +;; %cg-emit-buf can route emits to fn-buf during the body and cg-text +;; outside it (entry stub, etc.). +;; cg-globals: user-visible globals only (cg-emit-global / cg-emit-extern). +;; Stable except when user code adds a global — which is exactly what the +;; parse-fn-body rewind-safety check probes. +;; +;; cg-fn-meta: transient per-function state (fn-name, ret-slot, ret-type, +;; vararg-first-slot, indirect-slots, switch-case lists, ...). Reset on +;; cg-fn-begin/v; reads via %cg-fn-get / writes via %cg-fn-set!. Kept +;; out of cg-globals so rewind-safety checks on cg-globals aren't +;; tripped by every fn-begin. +(define-record-type cg + (%cg text data bss vstack frame-hi label-ctr str-pool globals fn-meta fn-buf prologue-buf max-outgoing in-fn?) + cg? + (text cg-text) + (data cg-data) + (bss cg-bss) + (vstack cg-vstack cg-vstack-set!) + (frame-hi cg-frame-hi cg-frame-hi-set!) + (label-ctr cg-label-ctr cg-label-ctr-set!) + (str-pool cg-str-pool cg-str-pool-set!) + (globals cg-globals cg-globals-set!) + (fn-meta cg-fn-meta cg-fn-meta-set!) + (fn-buf cg-fn-buf) + (prologue-buf cg-prologue-buf) + (max-outgoing cg-max-outgoing cg-max-outgoing-set!) + (in-fn? cg-in-fn? cg-in-fn?-set!)) + +;; -------------------------------------------------------------------- +;; Symbol alphabets — canonical alists. See CC-CONTRACTS §1. +;; -------------------------------------------------------------------- + +;; CC-CONTRACTS §1.3 — keyword bytevector → keyword symbol. +(define %keyword-alist + '(;; storage + ("auto" . auto) ("register" . register) ("static" . static) + ("extern" . extern) ("typedef" . typedef) + ;; qualifiers (parsed and discarded by parse) + ("const" . const) ("volatile" . volatile) ("restrict" . restrict) + ("inline" . inline) + ;; type specifiers + ("void" . void) ("char" . char) ("short" . short) + ("int" . int) ("long" . long) + ("signed" . signed) ("unsigned" . unsigned) ("_Bool" . _Bool) + ;; rejected type specifiers (KW so diagnostics are crisp) + ("float" . float) ("double" . double) + ;; aggregates + ("struct" . struct) ("union" . union) ("enum" . enum) + ;; statements + ("if" . if) ("else" . else) + ("while" . while) ("do" . do) ("for" . for) + ("switch" . switch) ("case" . case) ("default" . default) + ("break" . break) ("continue" . continue) + ("return" . return) ("goto" . goto) + ;; operators + ("sizeof" . sizeof) + ;; reserved-and-rejected (KW so diagnostics are crisp) + ("_Generic" . _Generic) ("_Atomic" . _Atomic) + ("_Thread_local" . _Thread_local) + ("_Alignof" . _Alignof) ("_Alignas" . _Alignas) + ("_Static_assert" . _Static_assert) + ("_Complex" . _Complex) ("_Imaginary" . _Imaginary))) + +;; CC-CONTRACTS §1.2 — punctuator bytevector → punct symbol. +;; Listed longest-match-first; the lexer scans this list in order. +;; Digraphs (<: :> <% %> %: %:%:) lex to their standard equivalents. +(define %punct-alist + '(;; 4-byte + ("%:%:" . paste) + ;; 3-byte + ("..." . ellipsis) ("<<=" . shl-eq) (">>=" . shr-eq) + ;; 2-byte + ("##" . paste) ("->" . arrow) + ("++" . inc) ("--" . dec) + ("<<" . shl) (">>" . shr) + ("<=" . le) (">=" . ge) ("==" . eq2) ("!=" . ne) + ("&&" . land) ("||" . lor) + ("+=" . plus-eq) ("-=" . minus-eq) ("*=" . star-eq) + ("/=" . slash-eq) ("%=" . pct-eq) + ("&=" . amp-eq) ("^=" . caret-eq) ("|=" . bar-eq) + ;; digraphs (mapped to the standard equivalent symbol) + ("<:" . lbrack) (":>" . rbrack) + ("<%" . lbrace) ("%>" . rbrace) ("%:" . hash) + ;; 1-byte + ("[" . lbrack) ("]" . rbrack) + ("(" . lparen) (")" . rparen) + ("{" . lbrace) ("}" . rbrace) + ("." . dot) ("," . comma) (";" . semi) (":" . colon) ("?" . qmark) + ("+" . plus) ("-" . minus) ("*" . star) ("/" . slash) ("%" . pct) + ("&" . amp) ("|" . bar) ("^" . caret) ("~" . tilde) ("!" . bang) + ("<" . lt) (">" . gt) ("=" . assign) + ("#" . hash))) +;; cc/lex.scm — bytestream → token list. Pure function; no I/O, +;; no macro awareness. +;; +;; Realization of docs/CC-INTERNALS.md §lex.scm. Symbol alphabets +;; (KW, PUNCT, tok-kind) live in cc/data.scm; do not duplicate. +;; +;; Owner: <unassigned> +;; +;; Implementation notes: +;; +;; - The lexer walks `src` byte-by-byte, threading (pos, line, col) +;; explicitly through every helper (no mutable state). Each token +;; captures its starting loc; helpers return (tok npos nline ncol). +;; - Trigraphs and `\<newline>` line splicing are handled via a single +;; logical-byte primitive `%lex-peek`: it advances over splices and +;; translates trigraphs in-place, so downstream code only ever sees +;; the "translation phase 2" stream. +;; - Comments are stripped at the same level as whitespace. +;; - NL tokens are emitted at every physical newline so pp can use +;; them to terminate directives. +;; +;; Heap discipline (per tests/scheme1/93-heap-mark-rewind.scm): +;; +;; - Token-producing helpers wrap their inner work in a heap-mark / +;; heap-rewind! arena. The slots that must survive the rewind +;; (start-loc and the integer holders for npos/nline/ncol) are bound +;; *before* the (set! mark (heap-mark)) so the let's env extensions +;; live below the mark. The byte-run scanners' tail-call env frames +;; and any %lex-peek 4-lists are above the mark and get reclaimed. +;; For helpers that produce a fresh bytevector (ident, string), the +;; bv is allocated post-rewind so it persists into the parent arena. +;; - Numeric digit runs accumulate their value inline via +;; %accum-int-while; they no longer materialize a per-byte cons list +;; and then a separate %digits-value walk. + +;; -------------------------------------------------------------------- +;; Byte-class predicates (raw u8 values, not chars). +;; -------------------------------------------------------------------- +(define (%digit? b) (if (< b 48) #f (if (< 57 b) #f #t))) ; '0'..'9' +(define (%hex? b) + (cond ((%digit? b) #t) + ((if (< b 65) #f (if (< 70 b) #f #t)) #t) ; 'A'..'F' + ((if (< b 97) #f (if (< 102 b) #f #t)) #t) ; 'a'..'f' + (else #f))) +(define (%octal? b) (if (< b 48) #f (if (< 55 b) #f #t))) ; '0'..'7' +(define (%alpha? b) + (cond ((if (< b 65) #f (if (< 90 b) #f #t)) #t) ; 'A'..'Z' + ((if (< b 97) #f (if (< 122 b) #f #t)) #t) ; 'a'..'z' + (else #f))) +(define (%ident-start? b) (or (%alpha? b) (= b 95))) ; '_' +(define (%ident-cont? b) (or (%ident-start? b) (%digit? b))) +(define (%hspace? b) (or (= b 32) (= b 9) (= b 11) (= b 12))) ; SP TAB VT FF +(define (%newline? b) (= b 10)) ; '\n' + +;; -------------------------------------------------------------------- +;; Logical byte access. %lex-peek returns +;; (byte npos nline ncol) +;; where (npos, nline, ncol) points *just past* the consumed physical +;; bytes. On EOF it returns (#f pos line col). +;; +;; Two transformations folded in here: +;; +;; - Trigraphs: ??= ??( ??/ ??) ??' ??< ??! ??> ??- +;; # [ \ ] ^ { | } ~ +;; The pair `??` followed by one of the nine trigraph completers +;; produces the translated byte and advances 3 source bytes. +;; - Line splice: a backslash immediately followed by `\n` is removed +;; as a unit (incrementing line, resetting col to 1) and we recurse +;; to fetch the next logical byte. +;; +;; Other escapes (e.g. `\<not-newline>`) are returned as-is — string and +;; char literals do their own escape-handling. +;; -------------------------------------------------------------------- +(define (%trigraph-byte b) + ;; Map the third trigraph byte to its replacement, or #f. + (cond ((= b 61) 35) ; '=' -> '#' + ((= b 40) 91) ; '(' -> '[' + ((= b 47) 92) ; '/' -> '\\' + ((= b 41) 93) ; ')' -> ']' + ((= b 39) 94) ; '\'' -> '^' + ((= b 60) 123) ; '<' -> '{' + ((= b 33) 124) ; '!' -> '|' + ((= b 62) 125) ; '>' -> '}' + ((= b 45) 126) ; '-' -> '~' + (else #f))) + +(define (%lex-peek src pos line col) + (let ((n (bytevector-length src))) + (cond + ((>= pos n) (list #f pos line col)) + (else + (let ((b (bytevector-u8-ref src pos))) + (cond + ;; Trigraph: ?? + completer + ((and (= b 63) + (< (+ pos 2) n) + (= (bytevector-u8-ref src (+ pos 1)) 63)) + (let ((tr (%trigraph-byte (bytevector-u8-ref src (+ pos 2))))) + (if tr + (list tr (+ pos 3) line (+ col 3)) + (list b (+ pos 1) line (+ col 1))))) + ;; Line splice: backslash + newline (consume both, no token) + ((and (= b 92) + (< (+ pos 1) n) + (= (bytevector-u8-ref src (+ pos 1)) 10)) + (%lex-peek src (+ pos 2) (+ line 1) 1)) + ;; Newline: pass through but caller decides line/col bump + ((%newline? b) + (list b (+ pos 1) (+ line 1) 1)) + (else + (list b (+ pos 1) line (+ col 1))))))))) + +;; Convenience accessors over the 4-list. +(define (%pk-byte p) (car p)) +(define (%pk-pos p) (car (cdr p))) +(define (%pk-line p) (car (cdr (cdr p)))) +(define (%pk-col p) (car (cdr (cdr (cdr p))))) + +;; Fast-byte test. When (%fast-byte? b) is #t, reading b directly with +;; bytevector-u8-ref is exactly equivalent to %lex-peek's result: the +;; logical byte is b, npos = pos+1, nline unchanged, ncol = col+1, and +;; no list allocation is needed. Excludes the three bytes that %lex-peek +;; can transform: '?' (trigraph), '\\' (line splice), '\n' (line bump). +(define (%fast-byte? b) + (cond ((= b 63) #f) + ((= b 92) #f) + ((= b 10) #f) + (else #t))) + +;; -------------------------------------------------------------------- +;; Whitespace + comment skipper. Returns (pos line col). +;; Handles spaces/tabs, // line comments, /* block */ comments. Does +;; *not* consume `\n` — newlines are tokens. +;; -------------------------------------------------------------------- +(define (%skip-ws-and-comments src pos line col file) + (let ((n (bytevector-length src))) + (cond + ((>= pos n) (list pos line col)) + (else + (let ((b (bytevector-u8-ref src pos))) + (cond + ((and (%fast-byte? b) (%hspace? b)) + (%skip-ws-and-comments src (+ pos 1) line (+ col 1) file)) + ((%fast-byte? b) + ;; Fast-byte that isn't hspace. Only '/' is interesting; + ;; everything else terminates the skip. + (cond + ((= b 47) (%maybe-comment src pos line col file)) + (else (list pos line col)))) + (else + ;; Slow path: trigraph / splice / newline. + (let* ((p (%lex-peek src pos line col)) + (b2 (%pk-byte p))) + (cond + ((not b2) (list pos line col)) + ((%hspace? b2) + (%skip-ws-and-comments src (%pk-pos p) (%pk-line p) (%pk-col p) + file)) + ((= b2 47) (%maybe-comment src pos line col file)) + (else (list pos line col))))))))))) + +(define (%maybe-comment src pos line col file) + ;; Source byte at pos resolves to '/'. Decide between // line comment, + ;; /* block comment, or "leave the slash alone" (it's a punctuator). + (let* ((p (%lex-peek src pos line col)) + (q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) + (b2 (%pk-byte q))) + (cond + ((and b2 (= b2 47)) + (%skip-line-comment src (%pk-pos q) (%pk-line q) (%pk-col q) file)) + ((and b2 (= b2 42)) + (%skip-block-comment src (%pk-pos q) (%pk-line q) (%pk-col q) + file line col)) + (else (list pos line col))))) + +(define (%skip-line-comment src pos line col file) + ;; Consume bytes until end-of-stream or until we *see* '\n' (do not + ;; consume the newline itself; outer loop emits the NL). + (let ((n (bytevector-length src))) + (cond + ((>= pos n) (%skip-ws-and-comments src pos line col file)) + (else + (let ((b (bytevector-u8-ref src pos))) + (cond + ;; '\n' terminates without consuming. + ((= b 10) (%skip-ws-and-comments src pos line col file)) + ((%fast-byte? b) + (%skip-line-comment src (+ pos 1) line (+ col 1) file)) + (else + ;; Slow path: ?/\ — let %lex-peek handle trigraph/splice. + (let* ((p (%lex-peek src pos line col)) + (b2 (%pk-byte p))) + (cond + ((not b2) (%skip-ws-and-comments src pos line col file)) + ((%newline? b2) (%skip-ws-and-comments src pos line col file)) + (else + (%skip-line-comment src (%pk-pos p) (%pk-line p) (%pk-col p) + file))))))))))) + +(define (%skip-block-comment src pos line col file start-line start-col) + (let ((n (bytevector-length src))) + (cond + ((>= pos n) + (die (%loc file start-line start-col) + "unterminated /* block comment")) + (else + (let ((b (bytevector-u8-ref src pos))) + (cond + ;; Fast path for plain content bytes that aren't '*'. + ((and (%fast-byte? b) (not (= b 42))) + (%skip-block-comment src (+ pos 1) line (+ col 1) + file start-line start-col)) + (else + ;; Slow path: '*', '\n', '?' (trigraph), '\\' (splice). + (let* ((p (%lex-peek src pos line col)) + (b1 (%pk-byte p))) + (cond + ((not b1) + (die (%loc file start-line start-col) + "unterminated /* block comment")) + ((= b1 42) + (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) + (b2 (%pk-byte q))) + (cond + ((not b2) + (die (%loc file start-line start-col) + "unterminated /* block comment")) + ((= b2 47) + (%skip-ws-and-comments src (%pk-pos q) (%pk-line q) (%pk-col q) + file)) + (else + ;; Re-scan starting at the byte after '*'; the '*' was + ;; not the closer, but the next byte might itself be '*'. + (%skip-block-comment src (%pk-pos p) (%pk-line p) (%pk-col p) + file start-line start-col))))) + (else + (%skip-block-comment src (%pk-pos p) (%pk-line p) (%pk-col p) + file start-line start-col))))))))))) + +;; -------------------------------------------------------------------- +;; Byte-run scanners. +;; +;; Tail-recursive walkers used by ident/number/string readers. None +;; allocate per scanned byte on the fast path (only %lex-peek 4-lists +;; on trigraph/splice/newline); the per-iteration env frames allocated +;; by tail recursion are reclaimed by the caller's heap-rewind!. +;; +;; - %scan-while: count bytes that satisfy pred. (count npos nline ncol) +;; - %fill-while-bv: write matching bytes into a pre-sized bv. +;; - %accum-int-while: accumulate a base-N integer over digit bytes. +;; (val count npos nline ncol) +;; - %accum-octal-bounded: same, but stops after k digits. +;; -------------------------------------------------------------------- +(define (%scan-while pred src pos line col) + (let ((n (bytevector-length src))) + (let loop ((pos pos) (line line) (col col) (cnt 0)) + (cond + ((>= pos n) (list cnt pos line col)) + (else + (let ((b (bytevector-u8-ref src pos))) + (cond + ((%fast-byte? b) + (if (pred b) + (loop (+ pos 1) line (+ col 1) (+ cnt 1)) + (list cnt pos line col))) + (else + (let* ((p (%lex-peek src pos line col)) + (b2 (%pk-byte p))) + (if (and b2 (pred b2)) + (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ cnt 1)) + (list cnt pos line col))))))))))) + +(define (%fill-while-bv pred src pos line col bv idx) + (let ((n (bytevector-length src))) + (let loop ((pos pos) (line line) (col col) (idx idx)) + (cond + ((>= pos n) idx) + (else + (let ((b (bytevector-u8-ref src pos))) + (cond + ((%fast-byte? b) + (cond + ((pred b) + (bytevector-u8-set! bv idx b) + (loop (+ pos 1) line (+ col 1) (+ idx 1))) + (else idx))) + (else + (let* ((p (%lex-peek src pos line col)) + (b2 (%pk-byte p))) + (cond + ((and b2 (pred b2)) + (bytevector-u8-set! bv idx b2) + (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1))) + (else idx))))))))))) + +(define (%digit-val-byte b) + ;; ASCII digit byte → integer value. Caller guarantees b is a valid + ;; digit in the relevant base (0-9 / 0-7 / 0-9a-fA-F). + (cond ((%digit? b) (- b 48)) + ((if (< b 65) #f (if (< 70 b) #f #t)) (+ (- b 65) 10)) + ((if (< b 97) #f (if (< 102 b) #f #t)) (+ (- b 97) 10)) + (else 0))) + +(define (%accum-int-while pred src pos line col base) + (let ((n (bytevector-length src))) + (let loop ((pos pos) (line line) (col col) (val 0) (cnt 0)) + (cond + ((>= pos n) (list val cnt pos line col)) + (else + (let ((b (bytevector-u8-ref src pos))) + (cond + ((%fast-byte? b) + (if (pred b) + (loop (+ pos 1) line (+ col 1) + (+ (* val base) (%digit-val-byte b)) (+ cnt 1)) + (list val cnt pos line col))) + (else + (let* ((p (%lex-peek src pos line col)) + (b2 (%pk-byte p))) + (if (and b2 (pred b2)) + (loop (%pk-pos p) (%pk-line p) (%pk-col p) + (+ (* val base) (%digit-val-byte b2)) (+ cnt 1)) + (list val cnt pos line col))))))))))) + +(define (%accum-octal-bounded src pos line col k) + ;; Up to k octal digits. Returns (val count npos nline ncol). + (let ((n (bytevector-length src))) + (let loop ((pos pos) (line line) (col col) (k k) (val 0) (cnt 0)) + (cond + ((zero? k) (list val cnt pos line col)) + ((>= pos n) (list val cnt pos line col)) + (else + (let ((b (bytevector-u8-ref src pos))) + (cond + ((%fast-byte? b) + (if (%octal? b) + (loop (+ pos 1) line (+ col 1) (- k 1) + (+ (* val 8) (- b 48)) (+ cnt 1)) + (list val cnt pos line col))) + (else + (let* ((p (%lex-peek src pos line col)) + (b2 (%pk-byte p))) + (if (and b2 (%octal? b2)) + (loop (%pk-pos p) (%pk-line p) (%pk-col p) (- k 1) + (+ (* val 8) (- b2 48)) (+ cnt 1)) + (list val cnt pos line col))))))))))) + +;; -------------------------------------------------------------------- +;; Identifier / keyword reader. +;; +;; Returns (tok npos nline ncol). Caller has already verified that the +;; first byte at `pos` satisfies %ident-start?. +;; +;; Two-pass with heap-mark/rewind: pass 1 (%scan-while) sizes the run, +;; then we rewind, allocate `name` bv post-rewind so it survives, then +;; pass 2 (%fill-while-bv) writes into it under a fresh mark. The +;; integer slots count/npos/nline/ncol are bound *before* the mark so +;; they survive both rewinds. +;; -------------------------------------------------------------------- +(define (lex-read-ident src pos file) + ;; Public for tests. Threads line/col from a fresh start. + (%lex-read-ident src pos 1 (+ pos 1) file)) + +(define (%lex-read-ident src pos line col file) + (let ((start-loc (%loc file line col)) + (count 0) (npos 0) (nline 0) (ncol 0) + (mark 0)) + (set! mark (heap-mark)) + (let ((sres (%scan-while %ident-cont? src pos line col))) + (set! count (car sres)) + (set! npos (car (cdr sres))) + (set! nline (car (cdr (cdr sres)))) + (set! ncol (car (cdr (cdr (cdr sres)))))) + (heap-rewind! mark) + (let ((name (make-bytevector count 0)) + (mark2 0)) + (set! mark2 (heap-mark)) + (%fill-while-bv %ident-cont? src pos line col name 0) + (heap-rewind! mark2) + (let ((kw (alist-ref name %keyword-alist))) + (cons (if kw + (make-tok 'KW kw start-loc) + (make-tok 'IDENT name start-loc)) + (list npos nline ncol)))))) + +;; -------------------------------------------------------------------- +;; Number reader. +;; +;; Decimal: [1-9][0-9]* (suffix: u U l L ll LL combinations) +;; Hex: 0x[0-9a-fA-F]+ | 0X... +;; Octal: 0[0-7]* +;; Float: anything looking like 1.0, 1e3, .5 → die crisply. +;; +;; Returns (tok npos nline ncol) on success. Aborts via `die` on float. +;; +;; %accum-int-while folds digit collection and value computation into +;; one walk — no per-byte cons cells, no separate digits-list pass. +;; -------------------------------------------------------------------- +(define (lex-read-number src pos file) + (%lex-read-number src pos 1 (+ pos 1) file)) + +(define (%lex-read-number src pos line col file) + (let* ((start-loc (%loc file line col)) + (p (%lex-peek src pos line col)) + (b (%pk-byte p))) + (cond + ;; '0x' / '0X' hex prefix + ((and (= b 48) + (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) + (b2 (%pk-byte q))) + (and b2 (or (= b2 120) (= b2 88))))) ; 'x' or 'X' + (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) + (r (%accum-int-while %hex? src + (%pk-pos q) (%pk-line q) (%pk-col q) 16)) + (val (car r)) + (cnt (car (cdr r))) + (pos2 (car (cdr (cdr r)))) + (line2 (car (cdr (cdr (cdr r))))) + (col2 (car (cdr (cdr (cdr (cdr r))))))) + (if (zero? cnt) + (die start-loc "expected hex digits after 0x") + (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file))) + (cons (make-tok 'INT val start-loc) after))))) + ;; '0' alone → octal sequence (could be just zero) + ((= b 48) + (let* ((r (%accum-int-while %octal? src + (%pk-pos p) (%pk-line p) (%pk-col p) 8)) + (val (car r)) + (pos2 (car (cdr (cdr r)))) + (line2 (car (cdr (cdr (cdr r))))) + (col2 (car (cdr (cdr (cdr (cdr r))))))) + ;; Reject '.' / 'e' / 'E' immediately after the octal run — float. + (%check-no-float src pos2 line2 col2 file start-loc) + ;; Reject stray digits 8/9 in an octal context (e.g. 089). + (let* ((p3 (%lex-peek src pos2 line2 col2)) + (b3 (%pk-byte p3))) + (if (and b3 (%digit? b3)) + (die start-loc "invalid octal digit" (bv-of-byte b3)) + (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file))) + (cons (make-tok 'INT val start-loc) after)))))) + ;; '1'-'9' → decimal + ((%digit? b) + (let* ((r (%accum-int-while %digit? src pos line col 10)) + (val (car r)) + (pos2 (car (cdr (cdr r)))) + (line2 (car (cdr (cdr (cdr r))))) + (col2 (car (cdr (cdr (cdr (cdr r))))))) + (%check-no-float src pos2 line2 col2 file start-loc) + (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file))) + (cons (make-tok 'INT val start-loc) after)))) + ;; '.' followed by a digit = float-style literal — reject. + ((= b 46) + (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) + (b2 (%pk-byte q))) + (if (and b2 (%digit? b2)) + (die start-loc "floating-point literal not supported") + ;; Otherwise '.' was a punctuator — caller wouldn't have + ;; routed here unless it was a digit-led prefix. + (die start-loc "internal: number reader on non-number")))) + (else + (die start-loc "internal: number reader on non-number"))))) + +(define (%check-no-float src pos line col file start-loc) + ;; If the byte at pos starts a fractional/exponent part, abort. + (let* ((p (%lex-peek src pos line col)) + (b (%pk-byte p))) + (cond + ((not b) #t) + ((= b 46) ; '.' + (die start-loc "floating-point literal not supported")) + ((or (= b 101) (= b 69)) ; 'e' / 'E' + ;; Only a float exponent if followed by [+-]?digit. + (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) + (b2 (%pk-byte q))) + (cond + ((and b2 (%digit? b2)) + (die start-loc "floating-point literal not supported")) + ((and b2 (or (= b2 43) (= b2 45))) + (let* ((r (%lex-peek src (%pk-pos q) (%pk-line q) (%pk-col q))) + (b3 (%pk-byte r))) + (if (and b3 (%digit? b3)) + (die start-loc "floating-point literal not supported") + #t))) + (else #t)))) + (else #t)))) + +(define (%lex-strip-int-suffix src pos line col file) + ;; Consume any combination of u U l L (the long can be doubled). We + ;; don't validate orderings strictly; tcc.c uses the canonical forms. + ;; Returns (npos nline ncol). + (let loop ((pos pos) (line line) (col col)) + (let* ((p (%lex-peek src pos line col)) + (b (%pk-byte p))) + (cond + ((not b) (list pos line col)) + ((or (= b 117) (= b 85) ; u U + (= b 108) (= b 76)) ; l L + (loop (%pk-pos p) (%pk-line p) (%pk-col p))) + (else (list pos line col)))))) + +;; -------------------------------------------------------------------- +;; Escape sequence reader. +;; +;; %scan-or-fill-escape decodes one escape sequence starting at `pos` +;; (which points one past the leading `\\`). When `bv` is a bytevector, +;; the resulting byte is written to (bv idx); when it is #f, no write +;; occurs (used during the string-pass scan phase). Returns the 4-list +;; (val npos nline ncol). +;; -------------------------------------------------------------------- +(define (%scan-or-fill-escape src pos line col file start-loc bv idx) + (let* ((p (%lex-peek src pos line col)) + (b (%pk-byte p))) + (cond + ((not b) (die start-loc "unterminated escape sequence")) + ;; \xNN — 1+ hex digits (tcc.c uses 1- and 2-digit forms). + ((or (= b 120) (= b 88)) ; 'x' / 'X' + (let* ((r (%accum-int-while %hex? src + (%pk-pos p) (%pk-line p) (%pk-col p) 16)) + (val0 (car r)) + (cnt (car (cdr r))) + (pos2 (car (cdr (cdr r)))) + (line2 (car (cdr (cdr (cdr r))))) + (col2 (car (cdr (cdr (cdr (cdr r))))))) + (cond + ((zero? cnt) (die start-loc "expected hex digits after \\x")) + (else + (let ((val (bit-and val0 255))) + (cond (bv (bytevector-u8-set! bv idx val)) + (else #f)) + (list val pos2 line2 col2)))))) + ;; \NNN — 1..3 octal digits. + ((%octal? b) + (let* ((r (%accum-octal-bounded src pos line col 3)) + (val0 (car r)) + (pos2 (car (cdr (cdr r)))) + (line2 (car (cdr (cdr (cdr r))))) + (col2 (car (cdr (cdr (cdr (cdr r)))))) + (val (bit-and val0 255))) + (cond (bv (bytevector-u8-set! bv idx val)) + (else #f)) + (list val pos2 line2 col2))) + (else + (let ((val (cond ((= b 110) 10) ; n + ((= b 116) 9) ; t + ((= b 114) 13) ; r + ((= b 92) 92) ; \\ + ((= b 39) 39) ; ' + ((= b 34) 34) ; " + ((= b 48) 0) ; 0 (already handled by octal but be safe) + ((= b 97) 7) ; \a -> BEL + ((= b 98) 8) ; \b + ((= b 102) 12) ; \f + ((= b 118) 11) ; \v + ((= b 63) 63) ; \? + (else + (die start-loc "unknown escape" (bv-of-byte b)))))) + (cond (bv (bytevector-u8-set! bv idx val)) + (else #f)) + (list val (%pk-pos p) (%pk-line p) (%pk-col p))))))) + +;; -------------------------------------------------------------------- +;; String reader. +;; +;; Caller has verified src[pos] == '"' (raw byte 34). Returns +;; (tok npos nline ncol) with the raw decoded bytes (no NUL appended). +;; +;; Two-pass: %string-pass with bv=#f counts effective bytes (escapes +;; collapse to 1 byte each); after rewind we allocate the final bv and +;; rerun with bv set so the bytes are written directly into it. +;; -------------------------------------------------------------------- +(define (lex-read-string src pos file) + (%lex-read-string src pos 1 (+ pos 1) file)) + +(define (%lex-read-string src pos line col file) + (let ((start-loc (%loc file line col)) + (cnt 0) (npos 0) (nline 0) (ncol 0) + (mark 0)) + ;; '"' (34) is a fast-byte and never a trigraph result, so the + ;; physical byte at `pos` is exactly the opening quote. + (cond + ((or (>= pos (bytevector-length src)) + (not (= (bytevector-u8-ref src pos) 34))) + (die start-loc "internal: string reader on non-quote")) + (else + (set! mark (heap-mark)) + (let ((sres (%string-pass src (+ pos 1) line (+ col 1) + file start-loc #f))) + (set! cnt (car sres)) + (set! npos (car (cdr sres))) + (set! nline (car (cdr (cdr sres)))) + (set! ncol (car (cdr (cdr (cdr sres)))))) + (heap-rewind! mark) + (let ((bv (make-bytevector cnt 0)) + (mark2 0)) + (set! mark2 (heap-mark)) + (%string-pass src (+ pos 1) line (+ col 1) file start-loc bv) + (heap-rewind! mark2) + (cons (make-tok 'STR bv start-loc) + (list npos nline ncol))))))) + +(define (%string-pass src pos line col file start-loc bv) + ;; Walk the string body (after opening "). When `bv` is #f, count + ;; effective bytes; when it is a bytevector, write bytes into it at + ;; index 0..count-1. Returns (count npos nline ncol). + (let ((n (bytevector-length src))) + (let loop ((pos pos) (line line) (col col) (idx 0)) + (cond + ((>= pos n) (die start-loc "unterminated string literal")) + (else + (let ((b (bytevector-u8-ref src pos))) + (cond + ;; Closing quote — fast byte but special. + ((= b 34) + (list idx (+ pos 1) line (+ col 1))) + ((%fast-byte? b) + (cond (bv (bytevector-u8-set! bv idx b)) + (else #f)) + (loop (+ pos 1) line (+ col 1) (+ idx 1))) + (else + ;; Slow path: ?/\ (trigraph/splice/escape) or '\n'. + (let* ((p (%lex-peek src pos line col)) + (b2 (%pk-byte p))) + (cond + ((not b2) + (die start-loc "unterminated string literal")) + ((= b2 34) + (list idx (%pk-pos p) (%pk-line p) (%pk-col p))) + ((%newline? b2) + (die start-loc "newline in string literal")) + ((= b2 92) + (let* ((er (%scan-or-fill-escape + src (%pk-pos p) (%pk-line p) (%pk-col p) + file start-loc bv idx)) + (epos (car (cdr er))) + (eline (car (cdr (cdr er)))) + (ecol (car (cdr (cdr (cdr er)))))) + (loop epos eline ecol (+ idx 1)))) + (else + (cond (bv (bytevector-u8-set! bv idx b2)) + (else #f)) + (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1))))))))))))) + +;; -------------------------------------------------------------------- +;; Char reader. +;; +;; Caller has verified src[pos] == '\''. Multi-character constants +;; ('AB') are rejected via die. +;; -------------------------------------------------------------------- +(define (lex-read-char src pos file) + (%lex-read-char src pos 1 (+ pos 1) file)) + +(define (%lex-read-char src pos line col file) + (let* ((start-loc (%loc file line col)) + (p0 (%lex-peek src pos line col)) + (b0 (%pk-byte p0))) + (if (not (and b0 (= b0 39))) + (die start-loc "internal: char reader on non-quote") + (%collect-char src (%pk-pos p0) (%pk-line p0) (%pk-col p0) + file start-loc)))) + +(define (%collect-char src pos line col file start-loc) + ;; Read exactly one byte (handling escapes), then expect closing '\''. + (let* ((p (%lex-peek src pos line col)) + (b (%pk-byte p))) + (cond + ((not b) (die start-loc "unterminated char literal")) + ((= b 39) (die start-loc "empty char literal")) + ((%newline? b) (die start-loc "newline in char literal")) + ((= b 92) ; escape + (let* ((r (%scan-or-fill-escape src + (%pk-pos p) (%pk-line p) (%pk-col p) + file start-loc #f 0)) + (val (car r)) + (pos2 (car (cdr r))) + (line2 (car (cdr (cdr r)))) + (col2 (car (cdr (cdr (cdr r)))))) + (%expect-char-close src pos2 line2 col2 file start-loc val))) + (else + (%expect-char-close src (%pk-pos p) (%pk-line p) (%pk-col p) + file start-loc b))))) + +(define (%expect-char-close src pos line col file start-loc val) + (let* ((p (%lex-peek src pos line col)) + (b (%pk-byte p))) + (cond + ((not b) (die start-loc "unterminated char literal")) + ((= b 39) + (cons (make-tok 'CHAR val start-loc) + (list (%pk-pos p) (%pk-line p) (%pk-col p)))) + (else + (die start-loc "multi-character char constant not supported"))))) + +;; -------------------------------------------------------------------- +;; Punctuator reader. +;; +;; Greedy longest-match against %punct-alist (cc/data.scm). The alist +;; is already ordered longest-first. We additionally bucket entries by +;; their first byte so %lex-read-punct only loops over the small set of +;; patterns that can start at the current source byte. +;; -------------------------------------------------------------------- + +(define (%alist-ref-int k al) + ;; Lookup in an int-keyed alist (linear scan, '= compare). + (cond ((null? al) #f) + ((= (car (car al)) k) (cdr (car al))) + (else (%alist-ref-int k (cdr al))))) + +(define (%mem-int? k xs) + (cond ((null? xs) #f) + ((= (car xs) k) #t) + (else (%mem-int? k (cdr xs))))) + +(define (%filter-by-first-byte b al) + ;; Subset of `al` whose pattern starts with byte b, preserving order. + (cond + ((null? al) '()) + ((= (bytevector-u8-ref (car (car al)) 0) b) + (cons (car al) (%filter-by-first-byte b (cdr al)))) + (else (%filter-by-first-byte b (cdr al))))) + +(define (%group-by-first-byte al) + ;; Build ((first-byte . sub-alist) ...) over `al`, one bucket per + ;; distinct first byte; sub-alist preserves longest-match-first + ;; order from the source list. + (let loop ((xs al) (seen '()) (out '())) + (cond + ((null? xs) (reverse out)) + (else + (let* ((entry (car xs)) + (pat (car entry)) + (b (bytevector-u8-ref pat 0))) + (cond + ((%mem-int? b seen) (loop (cdr xs) seen out)) + (else + (loop (cdr xs) + (cons b seen) + (cons (cons b (%filter-by-first-byte b al)) out))))))))) + +(define %punct-buckets (%group-by-first-byte %punct-alist)) + +(define (lex-read-punct src pos file) + (%lex-read-punct src pos 1 (+ pos 1) file)) + +(define (%lex-read-punct src pos line col file) + (let* ((start-loc (%loc file line col)) + (p (%lex-peek src pos line col)) + (b (%pk-byte p))) + (cond + ((not b) (die start-loc "unrecognized byte" "EOF")) + (else + (let ((bucket (%alist-ref-int b %punct-buckets))) + (cond + ((not bucket) (die start-loc "unrecognized byte" (bv-of-byte b))) + (else (%punct-loop src pos line col file start-loc bucket)))))))) + +(define (%punct-loop src pos line col file start-loc al) + (cond + ((null? al) + (let* ((p (%lex-peek src pos line col))) + (die start-loc "unrecognized byte" + (if (%pk-byte p) (bv-of-byte (%pk-byte p)) "EOF")))) + (else + (let* ((entry (car al)) + (pat (car entry)) + (sym (cdr entry)) + (m (%match-bytes src pos line col pat 0))) + (if m + (cons (make-tok 'PUNCT sym start-loc) m) + (%punct-loop src pos line col file start-loc (cdr al))))))) + +(define (%match-bytes src pos line col pat i) + ;; If the next bytes from (pos line col), in logical-byte stream + ;; order, equal `pat[i..]`, return (npos nline ncol) after the + ;; match. Otherwise #f. + (cond + ((= i (bytevector-length pat)) (list pos line col)) + (else + (let ((n (bytevector-length src))) + (cond + ((>= pos n) #f) + (else + (let ((b (bytevector-u8-ref src pos)) + (pb (bytevector-u8-ref pat i))) + (cond + ((%fast-byte? b) + (if (= b pb) + (%match-bytes src (+ pos 1) line (+ col 1) pat (+ i 1)) + #f)) + (else + (let* ((p (%lex-peek src pos line col)) + (b2 (%pk-byte p))) + (cond + ((not b2) #f) + ((= b2 pb) + (%match-bytes src (%pk-pos p) (%pk-line p) (%pk-col p) + pat (+ i 1))) + (else #f)))))))))))) + +;; -------------------------------------------------------------------- +;; lex-tokenize src file -> list of tok ending in EOF. +;; -------------------------------------------------------------------- +(define (lex-tokenize src file) + (%lex-loop src 0 1 1 file '() #t)) + +;; bol? — `#t` when no token has been emitted on the current physical +;; line yet (start of file, or only NL + whitespace seen since the last +;; line break). pp recognizes a directive only when its leading `#` is +;; at line-start; we forward that decision into the token stream by +;; emitting `HASH` instead of `(PUNCT hash …)` for a line-leading `#`. +(define (%lex-loop src pos line col file acc bol?) + (let* ((sw (%skip-ws-and-comments src pos line col file)) + (pos1 (car sw)) + (line1 (car (cdr sw))) + (col1 (car (cdr (cdr sw)))) + (p (%lex-peek src pos1 line1 col1)) + (b (%pk-byte p))) + (cond + ;; EOF + ((not b) + (let* ((eof-tok (make-tok 'EOF #f (%loc file line1 col1)))) + (reverse (cons eof-tok acc)))) + ;; Newline → emit NL, reset bol?. + ((%newline? b) + (let ((nl (make-tok 'NL #f (%loc file line1 col1)))) + (%lex-loop src (%pk-pos p) (%pk-line p) (%pk-col p) + file (cons nl acc) #t))) + ;; Line-leading `#` → emit HASH, but only the bare `#`. `##` is + ;; never line-leading in valid C; if it appears, fall through to + ;; normal punctuator handling so it lexes as `paste`. + ((and bol? (= b 35)) + (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) + (b2 (%pk-byte q))) + (cond + ((and b2 (= b2 35)) + (let* ((r (%lex-read-punct src pos1 line1 col1 file)) + (tok (car r)) + (npos (car (cdr r))) + (nline (car (cdr (cdr r)))) + (ncol (car (cdr (cdr (cdr r)))))) + (%lex-loop src npos nline ncol file (cons tok acc) #f))) + (else + (let ((tok (make-tok 'HASH #f (%loc file line1 col1)))) + (%lex-loop src (%pk-pos p) (%pk-line p) (%pk-col p) + file (cons tok acc) #f)))))) + ;; Identifier / keyword + ((%ident-start? b) + (let* ((r (%lex-read-ident src pos1 line1 col1 file)) + (tok (car r)) + (npos (car (cdr r))) + (nline (car (cdr (cdr r)))) + (ncol (car (cdr (cdr (cdr r)))))) + (%lex-loop src npos nline ncol file (cons tok acc) #f))) + ;; Number (digit start) + ((%digit? b) + (let* ((r (%lex-read-number src pos1 line1 col1 file)) + (tok (car r)) + (npos (car (cdr r))) + (nline (car (cdr (cdr r)))) + (ncol (car (cdr (cdr (cdr r)))))) + (%lex-loop src npos nline ncol file (cons tok acc) #f))) + ;; '.' might start a number (1.0 actually starts with digit; .5 + ;; would route here). We keep this as a punctuator unless followed + ;; by a digit, in which case the lexer rejects per spec. + ((= b 46) + (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) + (b2 (%pk-byte q))) + (cond + ((and b2 (%digit? b2)) + (die (%loc file line1 col1) "floating-point literal not supported")) + (else + (let* ((r (%lex-read-punct src pos1 line1 col1 file)) + (tok (car r)) + (npos (car (cdr r))) + (nline (car (cdr (cdr r)))) + (ncol (car (cdr (cdr (cdr r)))))) + (%lex-loop src npos nline ncol file (cons tok acc) #f)))))) + ;; String + ((= b 34) + (let* ((r (%lex-read-string src pos1 line1 col1 file)) + (tok (car r)) + (npos (car (cdr r))) + (nline (car (cdr (cdr r)))) + (ncol (car (cdr (cdr (cdr r)))))) + (%lex-loop src npos nline ncol file (cons tok acc) #f))) + ;; Char + ((= b 39) + (let* ((r (%lex-read-char src pos1 line1 col1 file)) + (tok (car r)) + (npos (car (cdr r))) + (nline (car (cdr (cdr r)))) + (ncol (car (cdr (cdr (cdr r)))))) + (%lex-loop src npos nline ncol file (cons tok acc) #f))) + ;; Punctuator (default) + (else + (let* ((r (%lex-read-punct src pos1 line1 col1 file)) + (tok (car r)) + (npos (car (cdr r))) + (nline (car (cdr (cdr r)))) + (ncol (car (cdr (cdr (cdr r)))))) + (%lex-loop src npos nline ncol file (cons tok acc) #f)))))) +;; cc/pp.scm — token list -> expanded token list. +;; Realizes docs/CC-INTERNALS.md §pp.scm. Hide-set per C11 6.10.3.4. +;; #include rejected (CC.md §Toolchain envelope). + +;; --- helpers (TODO: promote to util.scm if shared more broadly) --- +(define (%pp-bv-mem? x xs) + (cond ((null? xs) #f) + ((bv= x (car xs)) #t) + (else (%pp-bv-mem? x (cdr xs))))) + +(define (%pp-bv-union a b) + (cond ((null? a) b) + ((%pp-bv-mem? (car a) b) (%pp-bv-union (cdr a) b)) + (else (cons (car a) (%pp-bv-union (cdr a) b))))) + +(define (%pp-with-hide t hide) + (%tok (tok-kind t) (tok-value t) (tok-loc t) hide)) +(define (%pp-with-loc t loc) + (%tok (tok-kind t) (tok-value t) loc (tok-hide t))) + +;; --- pp-state (private record) --- +;; cond-stack: list of (active? . has-taken?). Outer-active gating is +;; computed by walking the stack rather than encoding it in frames. +(define-record-type pp-state + (%pp-state macros cond-stack cur-file line-delta) + pp-state? + (macros pps-macros pps-macros-set!) + (cond-stack pps-cond-stack pps-cond-stack-set!) + (cur-file pps-cur-file pps-cur-file-set!) + (line-delta pps-line-delta pps-line-delta-set!)) + +(define (%pp-make-state defs) (%pp-state defs '() #f 0)) + +(define (%pp-active? state) + (let loop ((xs (pps-cond-stack state))) + (cond ((null? xs) #t) + ((not (car (car xs))) #f) + (else (loop (cdr xs)))))) + +;; Active for the *parent* of the top frame (used by elif/else). +(define (%pp-parent-active? state) + (let ((cs (pps-cond-stack state))) + (cond ((null? cs) #t) + (else + (let loop ((xs (cdr cs))) + (cond ((null? xs) #t) + ((not (car (car xs))) #f) + (else (loop (cdr xs))))))))) + +;; --- token classification --- +(define (%pp-eof? t) (eq? (tok-kind t) 'EOF)) +(define (%pp-nl? t) (eq? (tok-kind t) 'NL)) +(define (%pp-hash? t) (eq? (tok-kind t) 'HASH)) +(define (%pp-ident? t) (eq? (tok-kind t) 'IDENT)) +(define (%pp-int? t) (eq? (tok-kind t) 'INT)) +(define (%pp-punct? t pname) + (and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) pname))) +(define (%pp-ident-name? t name-bv) + (and (%pp-ident? t) (bv= (tok-value t) name-bv))) +(define (%pp-skip-ws toks) toks) + +;; --- built-in macro names --- +(define %pp-bv-FILE "__FILE__") +(define %pp-bv-LINE "__LINE__") +(define %pp-bv-STDC "__STDC__") +(define %pp-bv-LISPCC "__LISPCC__") +(define %pp-bv-DATE "__DATE__") +(define %pp-bv-TIME "__TIME__") +(define %pp-bv-STDC-VERSION "__STDC_VERSION__") +(define %pp-bv-STDC-HOSTED "__STDC_HOSTED__") +(define %pp-bv-VA-ARGS "__VA_ARGS__") +(define %pp-bv-defined "defined") + +;; Fixed values for reproducibility — we don't read the wall clock. +(define %pp-bv-DATE-VALUE "Jan 1 1970") +(define %pp-bv-TIME-VALUE "00:00:00") + +(define (%pp-builtin? name) + (or (bv= name %pp-bv-FILE) (bv= name %pp-bv-LINE) + (bv= name %pp-bv-STDC) (bv= name %pp-bv-LISPCC) + (bv= name %pp-bv-DATE) (bv= name %pp-bv-TIME) + (bv= name %pp-bv-STDC-VERSION) (bv= name %pp-bv-STDC-HOSTED))) + +(define (%pp-expand-builtin name loc state) + (let* ((file (or (pps-cur-file state) (loc-file loc))) + (line (+ (loc-line loc) (pps-line-delta state))) + (col (loc-col loc)) + (here (%loc file line col))) + (cond + ((bv= name %pp-bv-FILE) (list (%tok 'STR file here '()))) + ((bv= name %pp-bv-LINE) (list (%tok 'INT line here '()))) + ((bv= name %pp-bv-STDC) (list (%tok 'INT 1 here '()))) + ((bv= name %pp-bv-LISPCC) (list (%tok 'INT 1 here '()))) + ((bv= name %pp-bv-DATE) (list (%tok 'STR %pp-bv-DATE-VALUE here '()))) + ((bv= name %pp-bv-TIME) (list (%tok 'STR %pp-bv-TIME-VALUE here '()))) + ((bv= name %pp-bv-STDC-VERSION) (list (%tok 'INT 199901 here '()))) + ((bv= name %pp-bv-STDC-HOSTED) (list (%tok 'INT 1 here '()))) + (else (die loc "internal: not a builtin" name))))) + +;; --- buf-list: simple reversed-list buffer of toks --- +(define-record-type buf-list + (%buf-list xs) + buf-list? + (xs buf-list-xs buf-list-xs-set!)) +(define (make-buf-list) (%buf-list '())) +(define (buf-list-push! b t) (buf-list-xs-set! b (cons t (buf-list-xs b)))) +(define (buf-list-push-many! b ts) + (let loop ((ts ts)) + (cond ((null? ts) #t) + (else (buf-list-push! b (car ts)) (loop (cdr ts)))))) +(define (buf-list-flush b) (reverse (buf-list-xs b))) + +;; Translation phase 6: concatenate adjacent string literals. The merged +;; token keeps the loc and hide-set of the first; values are byte-appended. +(define (%pp-merge-adjacent-strs toks) + (let loop ((toks toks) (acc '())) + (cond + ((null? toks) (reverse acc)) + ((and (not (null? acc)) + (eq? (tok-kind (car toks)) 'STR) + (eq? (tok-kind (car acc)) 'STR)) + (let* ((prev (car acc)) + (cur (car toks)) + (merged (%tok 'STR + (bytevector-append (tok-value prev) (tok-value cur)) + (tok-loc prev) + (tok-hide prev)))) + (loop (cdr toks) (cons merged (cdr acc))))) + (else (loop (cdr toks) (cons (car toks) acc)))))) + +;; --- pp-expand: top-level driver --- +(define (pp-expand toks initial-defines) + (let ((state (%pp-make-state initial-defines)) + (out (make-buf-list))) + (let loop ((toks toks)) + (cond + ((null? toks) (die #f "pp-expand: missing EOF token")) + ((%pp-eof? (car toks)) + (cond ((not (null? (pps-cond-stack state))) + (die (tok-loc (car toks)) "unterminated #if/#ifdef/#ifndef")) + (else + (buf-list-push! out (car toks)) + (%pp-merge-adjacent-strs (buf-list-flush out))))) + ((%pp-nl? (car toks)) (loop (cdr toks))) + ((%pp-hash? (car toks)) + (let* ((lr (%pp-take-line (cdr toks))) + (line (car lr)) (rest (cdr lr))) + (%pp-dispatch-directive (car toks) line state out) + (loop rest))) + (else + (let* ((lr (%pp-take-line toks)) + (line (car lr)) (rest (cdr lr))) + (cond ((%pp-active? state) + (%pp-emit-expanded line state out)) + (else #t)) + (loop rest))))))) + +;; Take tokens up to (not including) the next NL or EOF. NL is consumed; +;; EOF is left in the stream so the driver sees it next. +(define (%pp-take-line toks) + (let loop ((toks toks) (acc '())) + (cond + ((null? toks) (cons (reverse acc) toks)) + ((%pp-eof? (car toks)) (cons (reverse acc) toks)) + ((%pp-nl? (car toks)) (cons (reverse acc) (cdr toks))) + (else (loop (cdr toks) (cons (car toks) acc)))))) + +;; --- directive dispatch --- +;; pmatch-based on the directive name bv. bv literals match by equal?. +;; Directive name can arrive as IDENT (most cases) or KW (`if` and `else` +;; are C keywords promoted by lex; their KW symbol values map back to bv +;; via symbol->string). +(define (%pp-directive-name t) + (cond ((eq? (tok-kind t) 'IDENT) (tok-value t)) + ((eq? (tok-kind t) 'KW) (symbol->string (tok-value t))) + (else #f))) + +(define (%pp-dispatch-directive hash-tok line state out) + (let ((line (%pp-skip-ws line))) + (cond + ((null? line) #t) ; bare `#` line — null directive + ((%pp-directive-name (car line)) + (let ((name (%pp-directive-name (car line))) + (rest (cdr line)) + (loc (tok-loc (car line)))) + (pmatch name + ("define" (cond ((%pp-active? state) (%pp-do-define rest state)) (else #t))) + ("undef" (cond ((%pp-active? state) (%pp-do-undef rest state)) (else #t))) + ("if" (%pp-do-if rest state)) + ("ifdef" (%pp-do-ifdef rest state)) + ("ifndef" (%pp-do-ifndef rest state)) + ("elif" (%pp-do-elif rest state)) + ("else" (%pp-do-else rest state)) + ("endif" (%pp-do-endif rest state)) + ("error" (cond ((%pp-active? state) + (%pp-do-error (cons (car line) rest) state)) + (else #t))) + ("line" (cond ((%pp-active? state) (%pp-do-line rest state)) (else #t))) + ("pragma" (cond ((%pp-active? state) (%pp-do-pragma rest state)) (else #t))) + ("include" (cond ((%pp-active? state) (%pp-do-include rest state)) (else #t))) + (else (die loc "unknown preprocessor directive" name))))) + (else + (die (tok-loc (car line)) "expected directive name after '#'" + (tok-kind (car line))))))) + +;; --- #define --- +;; function-like vs object-like is decided by an immediately-adjacent `(`. +;; "Adjacent" = column of `(` equals column of name + length of name. +(define (%pp-do-define line state) + (cond + ((null? line) (die #f "#define requires a macro name")) + ((not (%pp-ident? (car line))) + (die (tok-loc (car line)) "#define: expected identifier")) + (else + (let* ((nt (car line)) (name (tok-value nt)) (rest (cdr line))) + (cond + ((and (not (null? rest)) + (%pp-punct? (car rest) 'lparen) + (= (loc-col (tok-loc (car rest))) + (+ (loc-col (tok-loc nt)) + (bytevector-length name)))) + (%pp-define-fn name (cdr rest) (tok-loc nt) state)) + (else + (let ((m (%macro 'obj '() rest))) + (pps-macros-set! state (alist-set name m (pps-macros state)))))))))) + +(define (%pp-define-fn name post-lparen nloc state) + (let loop ((toks post-lparen) (params '()) (variadic? #f)) + (cond + ((null? toks) (die nloc "#define: unterminated parameter list")) + ((%pp-punct? (car toks) 'rparen) + (let* ((body (cdr toks)) + (kind (if variadic? 'fn-vararg 'fn)) + (m (%macro kind (reverse params) body))) + (pps-macros-set! state (alist-set name m (pps-macros state))))) + ((%pp-punct? (car toks) 'ellipsis) + (let ((rest (cdr toks))) + (cond + ((null? rest) (die (tok-loc (car toks)) "#define: '...' must precede ')'")) + ((%pp-punct? (car rest) 'rparen) (loop rest params #t)) + (else (die (tok-loc (car rest)) "#define: garbage after '...'"))))) + ((null? params) + (cond + ((%pp-ident? (car toks)) + (loop (cdr toks) (cons (tok-value (car toks)) params) #f)) + (else (die (tok-loc (car toks)) "#define: expected parameter name")))) + (else + (cond + ((%pp-punct? (car toks) 'comma) + (let ((after (cdr toks))) + (cond + ((null? after) (die (tok-loc (car toks)) "#define: trailing ','")) + ((%pp-punct? (car after) 'ellipsis) + (let ((aa (cdr after))) + (cond + ((and (not (null? aa)) (%pp-punct? (car aa) 'rparen)) + (loop aa params #t)) + (else (die (tok-loc (car after)) + "#define: '...' must precede ')'"))))) + ((%pp-ident? (car after)) + (loop (cdr after) (cons (tok-value (car after)) params) #f)) + (else + (die (tok-loc (car after)) + "#define: expected parameter name after ','"))))) + (else (die (tok-loc (car toks)) + "#define: expected ',' or ')' in parameter list"))))))) + +;; --- #undef --- +(define (%pp-do-undef line state) + (cond + ((null? line) (die #f "#undef requires a macro name")) + ((not (%pp-ident? (car line))) + (die (tok-loc (car line)) "#undef: expected identifier")) + (else + (pps-macros-set! state + (%pp-alist-drop (tok-value (car line)) (pps-macros state)))))) + +(define (%pp-alist-drop key al) + (cond ((null? al) '()) + ((bv= (car (car al)) key) (cdr al)) + (else (cons (car al) (%pp-alist-drop key (cdr al)))))) + +;; --- #if / #ifdef / #ifndef / #elif / #else / #endif --- +(define (%pp-do-if line state) + (cond + ((not (%pp-active? state)) + (pps-cond-stack-set! state (cons (cons #f #f) (pps-cond-stack state)))) + (else + (let* ((v (pp-eval-cexpr line (pps-macros state))) + (a? (not (= v 0)))) + (pps-cond-stack-set! state (cons (cons a? a?) (pps-cond-stack state))))))) + +(define (%pp-do-ifdef line state) + (cond + ((not (%pp-active? state)) + (pps-cond-stack-set! state (cons (cons #f #f) (pps-cond-stack state)))) + (else + (let ((d? (%pp-defined? (%pp-name-of-single line) state))) + (pps-cond-stack-set! state + (cons (cons d? d?) (pps-cond-stack state))))))) + +(define (%pp-do-ifndef line state) + (cond + ((not (%pp-active? state)) + (pps-cond-stack-set! state (cons (cons #f #f) (pps-cond-stack state)))) + (else + (let ((a? (not (%pp-defined? (%pp-name-of-single line) state)))) + (pps-cond-stack-set! state + (cons (cons a? a?) (pps-cond-stack state))))))) + +(define (%pp-name-of-single line) + (cond + ((null? line) (die #f "#ifdef/#ifndef: missing identifier")) + ((not (%pp-ident? (car line))) + (die (tok-loc (car line)) "#ifdef/#ifndef: expected identifier")) + (else (tok-value (car line))))) + +(define (%pp-defined? name state) + (or (alist-ref name (pps-macros state)) + (%pp-builtin? name) + #f)) + +(define (%pp-do-elif line state) + (let ((cs (pps-cond-stack state))) + (cond + ((null? cs) (die #f "#elif outside #if")) + (else + (let* ((top (car cs)) (rest (cdr cs)) + (taken? (cdr top)) + (par? (%pp-parent-active? state))) + (cond + ((or (not par?) taken?) + (pps-cond-stack-set! state (cons (cons #f taken?) rest))) + (else + (let* ((v (pp-eval-cexpr line (pps-macros state))) + (a? (not (= v 0)))) + (pps-cond-stack-set! state + (cons (cons a? (or a? taken?)) rest)))))))))) + +(define (%pp-do-else line state) + (let ((cs (pps-cond-stack state))) + (cond + ((null? cs) (die #f "#else outside #if")) + (else + (let* ((top (car cs)) (rest (cdr cs)) + (taken? (cdr top)) + (par? (%pp-parent-active? state))) + (cond + ((not par?) + (pps-cond-stack-set! state (cons (cons #f taken?) rest))) + (taken? + (pps-cond-stack-set! state (cons (cons #f #t) rest))) + (else + (pps-cond-stack-set! state (cons (cons #t #t) rest))))))))) + +(define (%pp-do-endif line state) + (let ((cs (pps-cond-stack state))) + (cond ((null? cs) (die #f "#endif outside #if")) + (else (pps-cond-stack-set! state (cdr cs)))))) + +;; --- #error --- +;; line[0] is the directive name "error"; the rest is the user message. +(define (%pp-do-error line state) + (let* ((msg-toks (if (null? line) '() (cdr line))) + (loc (if (null? line) #f (tok-loc (car line)))) + (msg (%pp-toks->display msg-toks))) + (die loc "#error" msg))) + +(define (%pp-toks->display toks) + (let loop ((toks toks) (acc '()) (first? #t)) + (cond + ((null? toks) (bv-cat (reverse acc))) + (else + (let ((p (%pp-tok->bv (car toks)))) + (loop (cdr toks) + (if first? (cons p acc) (cons p (cons " " acc))) + #f)))))) + +(define (%pp-tok->bv t) + (let ((k (tok-kind t)) (v (tok-value t))) + (cond + ((eq? k 'IDENT) v) + ((eq? k 'INT) (fixnum->bv v 10)) + ((eq? k 'STR) (%pp-quote-bytes v 34)) + ((eq? k 'CHAR) (%pp-quote-bytes (bv-of-byte v) 39)) + ((eq? k 'KW) (symbol->string v)) + ((eq? k 'PUNCT) (symbol->string v)) + (else "?")))) + +;; Reconstruct a string/char literal source spelling from cooked content. +;; Per C11 6.10.3.2: insert `\` before each `"` and `\` (or `'` for char). +;; `delim` is 34 for STR, 39 for CHAR. +(define (%pp-quote-bytes bv delim) + (let* ((n (bytevector-length bv)) + (delim-bv (bv-of-byte delim))) + (let loop ((i 0) (acc (list delim-bv))) + (cond + ((= i n) (bv-cat (reverse (cons delim-bv acc)))) + (else + (let ((b (bytevector-u8-ref bv i))) + (cond + ((or (= b delim) (= b 92)) + (loop (+ i 1) (cons (bv-of-byte b) (cons "\\" acc)))) + (else + (loop (+ i 1) (cons (bv-of-byte b) acc)))))))))) + +;; --- #line / #pragma / #include --- +;; Approximate #line: subsequent toks have line = (orig-line + delta), +;; where delta = (N - here-line - 1). Good enough for most cases. +(define (%pp-do-line line state) + (cond + ((null? line) (die #f "#line requires a line number")) + ((not (%pp-int? (car line))) + (die (tok-loc (car line)) "#line: expected integer")) + (else + (let* ((nt (car line)) (n (tok-value nt)) + (rest (cdr line)) + (here (loc-line (tok-loc nt)))) + (pps-line-delta-set! state (- n here 1)) + (cond + ((null? rest) #t) + ((eq? (tok-kind (car rest)) 'STR) + (pps-cur-file-set! state (tok-value (car rest)))) + (else (die (tok-loc (car rest)) + "#line: expected string after number"))))))) + +(define (%pp-do-pragma line state) #t) + +(define (%pp-do-include line state) + (die (if (null? line) #f (tok-loc (car line))) + "#include: file inclusion is handled upstream by pre-flatten")) + +;; --- macro expansion engine --- +;; Walk toks; for each IDENT, look up in macros / builtins. Hide-set: +;; if the name is in t.hide, don't expand. Otherwise expand and rescan +;; the produced body, with hide += {name}. +(define (%pp-emit-expanded toks state out) + (let loop ((toks toks)) + (cond + ((null? toks) #t) + (else + (let* ((t (car toks)) (rest (cdr toks))) + (cond + ((not (%pp-ident? t)) + (buf-list-push! out (%pp-relocate t state)) + (loop rest)) + (else + (let ((name (tok-value t))) + (cond + ((%pp-bv-mem? name (tok-hide t)) + (buf-list-push! out (%pp-relocate t state)) + (loop rest)) + ((%pp-builtin? name) + (buf-list-push-many! out + (%pp-expand-builtin name (tok-loc t) state)) + (loop rest)) + (else + (let ((m (alist-ref name (pps-macros state)))) + (cond + ((not m) + (buf-list-push! out (%pp-relocate t state)) + (loop rest)) + (else + (%pp-apply-macro t m rest state out loop)))))))))))))) + +(define (%pp-apply-macro t m rest state out cont) + (let ((kind (macro-kind m)) (name (tok-value t))) + (cond + ((eq? kind 'obj) + (let ((bodies (%pp-prepare-body (macro-body m) + (cons name (tok-hide t))))) + (%pp-emit-expanded bodies state out) + (cont rest))) + (else + (let ((after (%pp-skip-ws rest))) + (cond + ((or (null? after) (not (%pp-punct? (car after) 'lparen))) + (buf-list-push! out (%pp-relocate t state)) + (cont rest)) + (else + (let* ((ar (%pp-collect-args (cdr after) (tok-loc t))) + (args (car ar)) (rest2 (cdr ar)) + (params (macro-params m)) + (variadic? (eq? kind 'fn-vararg)) + (env (%pp-bind-args params args variadic? (tok-loc t))) + (sub (%pp-substitute (macro-body m) env (tok-loc t))) + (bodies (%pp-prepare-body sub + (cons name (tok-hide t))))) + (%pp-emit-expanded bodies state out) + (cont rest2))))))))) + +(define (%pp-prepare-body body extra-hide) + (map (lambda (t) + (%pp-with-hide t (%pp-bv-union extra-hide (tok-hide t)))) + body)) + +;; Collect comma-separated args. `toks` starts AFTER `(`. Returns +;; (args . rest), where args is a list of token-lists. +(define (%pp-collect-args toks call-loc) + (let loop ((toks toks) (depth 0) (cur '()) (args '())) + (cond + ((null? toks) (die call-loc "macro call: unterminated argument list")) + ((%pp-eof? (car toks)) + (die call-loc "macro call: unterminated argument list")) + ((and (= depth 0) (%pp-punct? (car toks) 'rparen)) + (let ((args* + (cond + ;; Empty parens count as one empty argument; bind-args + ;; degenerates this back to "no args" for 0-param macros. + ((and (null? args) (null? cur)) (list '())) + (else (reverse (cons (reverse cur) args)))))) + (cons args* (cdr toks)))) + ((and (= depth 0) (%pp-punct? (car toks) 'comma)) + (loop (cdr toks) 0 '() (cons (reverse cur) args))) + ((%pp-punct? (car toks) 'lparen) + (loop (cdr toks) (+ depth 1) (cons (car toks) cur) args)) + ((%pp-punct? (car toks) 'rparen) + (loop (cdr toks) (- depth 1) (cons (car toks) cur) args)) + (else + (loop (cdr toks) depth (cons (car toks) cur) args))))) + +;; Bind formals → token-lists (alist by bv key). Variadic gathers +;; trailing actuals into __VA_ARGS__, joined with synthetic commas. +(define (%pp-bind-args params args variadic? call-loc) + (let* ((np (length params)) (na (length args))) + (cond + (variadic? + (cond + ((< na np) (die call-loc "macro call: too few arguments")) + (else + (let loop ((ps params) (as args) (acc '())) + (cond + ((null? ps) + (alist-set %pp-bv-VA-ARGS (%pp-join-comma as) acc)) + (else + (loop (cdr ps) (cdr as) + (alist-set (car ps) (car as) acc)))))))) + (else + (cond + ((and (= np 0) (= na 1) (null? (car args))) '()) + ((not (= np na)) (die call-loc "macro call: argument count mismatch")) + (else + (let loop ((ps params) (as args) (acc '())) + (cond + ((null? ps) acc) + (else (loop (cdr ps) (cdr as) + (alist-set (car ps) (car as) acc))))))))))) + +(define (%pp-join-comma argss) + (cond + ((null? argss) '()) + ((null? (cdr argss)) (car argss)) + (else + (append (car argss) + (cons (%pp-synth-comma) (%pp-join-comma (cdr argss))))))) + +(define (%pp-synth-comma) + (%tok 'PUNCT 'comma (%loc "<expand>" 0 0) '())) + +;; Body substitution: walk body; replace param IDENTs with arg toks, +;; handle `#param` (stringize) and `a##b` (paste). For v1 we do not +;; pre-expand args before substitution; the rescan after substitution +;; catches the same expansions in practice. +(define (%pp-substitute body env call-loc) + (let loop ((body body) (out '())) + (cond + ((null? body) (reverse out)) + (else + (let ((t (car body)) (rest (cdr body))) + (cond + ((%pp-punct? t 'hash) + (cond + ((or (null? rest) (not (%pp-ident? (car rest)))) + (die (tok-loc t) "stringize: '#' must precede a parameter name")) + (else + (let* ((id (car rest)) (pn (tok-value id)) + (pt (alist-ref pn env))) + (cond + ((not pt) + (die (tok-loc id) "stringize: '#' operand must be a parameter" pn)) + (else + (let ((s (%tok 'STR (%pp-toks->display pt) (tok-loc t) '()))) + (loop (cdr rest) (cons s out))))))))) + ((%pp-punct? t 'paste) + (cond + ((null? out) (die (tok-loc t) "paste: '##' cannot start a body")) + ((null? rest) (die (tok-loc t) "paste: '##' cannot end a body")) + (else + (let* ((lhs (car out)) + (rt (car rest)) + (rhs-list + (cond + ((and (%pp-ident? rt) (alist-ref (tok-value rt) env)) + (alist-ref (tok-value rt) env)) + (else (list rt))))) + (cond + ((null? rhs-list) (loop (cdr rest) out)) + (else + (let* ((p (%pp-paste-tokens lhs (car rhs-list))) + (after (append (cdr rhs-list) (cdr rest)))) + (loop after (cons p (cdr out)))))))))) + ((%pp-ident? t) + (let* ((pn (tok-value t)) (pt (alist-ref pn env))) + (cond + ((not pt) (loop rest (cons t out))) + ((and (not (null? rest)) (%pp-punct? (car rest) 'paste)) + (cond + ((null? pt) (loop (cdr rest) out)) + (else (loop rest (append (reverse pt) out))))) + (else (loop rest (append (reverse pt) out)))))) + (else (loop rest (cons t out))))))))) + +;; Paste two tokens textually; reparse the result. +(define (%pp-paste-tokens lhs rhs) + (let ((lk (tok-kind lhs)) (rk (tok-kind rhs))) + (cond + ((and (eq? lk 'IDENT) (eq? rk 'IDENT)) + (%tok 'IDENT (bytevector-append (tok-value lhs) (tok-value rhs)) + (tok-loc lhs) (%pp-bv-union (tok-hide lhs) (tok-hide rhs)))) + ((and (eq? lk 'IDENT) (eq? rk 'INT)) + (%tok 'IDENT (bytevector-append (tok-value lhs) (fixnum->bv (tok-value rhs) 10)) + (tok-loc lhs) (%pp-bv-union (tok-hide lhs) (tok-hide rhs)))) + ((and (eq? lk 'INT) (eq? rk 'INT)) + (let* ((s (bytevector-append (fixnum->bv (tok-value lhs) 10) + (fixnum->bv (tok-value rhs) 10))) + (pr (bv->fixnum s 10))) + (cond + ((not (car pr)) (die (tok-loc lhs) "paste: cannot reparse as integer" s)) + (else (%tok 'INT (cdr pr) (tok-loc lhs) + (%pp-bv-union (tok-hide lhs) (tok-hide rhs))))))) + (else (die (tok-loc lhs) "paste: unsupported token kinds" lk rk))))) + +(define (%pp-relocate t state) + (cond + ((and (= (pps-line-delta state) 0) (not (pps-cur-file state))) t) + (else + (let* ((l (tok-loc t)) + (f (or (pps-cur-file state) (loc-file l))) + (ln (+ (loc-line l) (pps-line-delta state))) + (c (loc-col l))) + (%pp-with-loc t (%loc f ln c)))))) + +;; --- pp-eval-cexpr: #if expression evaluator --- +;; Steps: resolve `defined NAME`, macro-expand the rest, treat any +;; remaining IDENT as 0, parse with recursive descent. +;; +;; Arena boundary (test_93 A→B→C pattern). Everything between the mark +;; and the rewind is scratch: `s1`/`s2`/`s3` (each a fresh token list, +;; where `s2` runs the full macro-expansion engine), plus the recursive +;; parser's (val . rest) cons cell at every level. The result is a +;; fixnum, so no pre-allocated out cell is needed — `val` survives the +;; rewind by virtue of being an immediate. The error path goes through +;; `die` (which sys-exits), so no rewind there. +(define (pp-eval-cexpr toks macros) + (let ((mark (heap-mark))) + (let* ((state (%pp-state macros '() #f 0)) + (s1 (%pp-resolve-defined toks state)) + (s2 (%pp-expand-line s1 state)) + (s3 (%pp-idents-as-zero s2))) + (let* ((p (%pp-cx-expr s3)) + (val (car p)) (rest (cdr p))) + (cond + ((null? rest) (heap-rewind! mark) val) + (else (die (tok-loc (car rest)) "#if: garbage at end of expression" + (tok-kind (car rest))))))))) + +(define (%pp-expand-line toks state) + (let ((out (make-buf-list))) + (%pp-emit-expanded toks state out) + (buf-list-flush out))) + +(define (%pp-resolve-defined toks state) + (let loop ((toks toks) (acc '())) + (cond + ((null? toks) (reverse acc)) + ((%pp-ident-name? (car toks) %pp-bv-defined) + (let ((rest (cdr toks))) + (cond + ((null? rest) (die (tok-loc (car toks)) "defined: missing operand")) + ((%pp-ident? (car rest)) + (let ((v (if (%pp-defined? (tok-value (car rest)) state) 1 0))) + (loop (cdr rest) + (cons (%tok 'INT v (tok-loc (car toks)) '()) acc)))) + ((%pp-punct? (car rest) 'lparen) + (let ((after (cdr rest))) + (cond + ((or (null? after) (not (%pp-ident? (car after)))) + (die (tok-loc (car toks)) "defined: expected identifier")) + (else + (let ((aa (cdr after))) + (cond + ((or (null? aa) (not (%pp-punct? (car aa) 'rparen))) + (die (tok-loc (car toks)) "defined: expected ')'")) + (else + (let ((v (if (%pp-defined? (tok-value (car after)) state) 1 0))) + (loop (cdr aa) + (cons (%tok 'INT v (tok-loc (car toks)) '()) acc)))))))) )) + (else (die (tok-loc (car rest)) "defined: expected identifier or '('"))))) + (else (loop (cdr toks) (cons (car toks) acc)))))) + +(define (%pp-idents-as-zero toks) + (map (lambda (t) + (cond ((%pp-ident? t) (%tok 'INT 0 (tok-loc t) '())) + (else t))) + toks)) + +;; --- recursive-descent #if expression parser --- +;; Returns (value . rest). +(define (%pp-cx-expr toks) (%pp-cx-cond toks)) + +(define (%pp-cx-cond toks) + (let* ((p (%pp-cx-lor toks)) + (v (car p)) (rest (cdr p))) + (cond + ((and (not (null? rest)) (%pp-punct? (car rest) 'qmark)) + (let* ((p2 (%pp-cx-expr (cdr rest))) + (vt (car p2)) (after (cdr p2))) + (cond + ((or (null? after) (not (%pp-punct? (car after) 'colon))) + (die (if (null? after) #f (tok-loc (car after))) "?: missing ':'")) + (else + (let* ((p3 (%pp-cx-cond (cdr after))) + (vf (car p3)) (rest3 (cdr p3))) + (cons (if (not (= v 0)) vt vf) rest3)))))) + (else (cons v rest))))) + +(define (%pp-cx-binl next ops toks) + (let loop ((p (next toks))) + (let ((v (car p)) (rest (cdr p))) + (cond + ((null? rest) p) + (else + (let* ((tt (car rest)) + (hit (and (eq? (tok-kind tt) 'PUNCT) + (alist-ref/eq (tok-value tt) ops)))) + (cond + ((not hit) p) + (else + (let* ((p2 (next (cdr rest))) + (v2 (car p2)) (rest2 (cdr p2))) + (loop (cons (hit v v2) rest2))))))))))) + +(define (%pp-cx-lor toks) + (%pp-cx-binl %pp-cx-land + (list (cons 'lor (lambda (a b) (if (or (not (= a 0)) (not (= b 0))) 1 0)))) + toks)) +(define (%pp-cx-land toks) + (%pp-cx-binl %pp-cx-bor + (list (cons 'land (lambda (a b) (if (and (not (= a 0)) (not (= b 0))) 1 0)))) + toks)) +(define (%pp-cx-bor toks) (%pp-cx-binl %pp-cx-bxor (list (cons 'bar bit-or)) toks)) +(define (%pp-cx-bxor toks) (%pp-cx-binl %pp-cx-band (list (cons 'caret bit-xor)) toks)) +(define (%pp-cx-band toks) (%pp-cx-binl %pp-cx-eq (list (cons 'amp bit-and)) toks)) +(define (%pp-cx-eq toks) + (%pp-cx-binl %pp-cx-rel + (list (cons 'eq2 (lambda (a b) (if (= a b) 1 0))) + (cons 'ne (lambda (a b) (if (= a b) 0 1)))) + toks)) +(define (%pp-cx-rel toks) + (%pp-cx-binl %pp-cx-shift + (list (cons 'lt (lambda (a b) (if (< a b) 1 0))) + (cons 'le (lambda (a b) (if (<= a b) 1 0))) + (cons 'gt (lambda (a b) (if (> a b) 1 0))) + (cons 'ge (lambda (a b) (if (>= a b) 1 0)))) + toks)) +(define (%pp-cx-shift toks) + (%pp-cx-binl %pp-cx-add + (list (cons 'shl (lambda (a b) (arithmetic-shift a b))) + (cons 'shr (lambda (a b) (arithmetic-shift a (- 0 b))))) + toks)) +(define (%pp-cx-add toks) + (%pp-cx-binl %pp-cx-mul (list (cons 'plus +) (cons 'minus -)) toks)) +(define (%pp-cx-mul toks) + (%pp-cx-binl %pp-cx-unary + (list (cons 'star *) (cons 'slash quotient) (cons 'pct remainder)) + toks)) + +(define (%pp-cx-unary toks) + (cond + ((null? toks) (die #f "#if: unexpected end of expression")) + ((%pp-punct? (car toks) 'plus) (%pp-cx-unary (cdr toks))) + ((%pp-punct? (car toks) 'minus) + (let* ((p (%pp-cx-unary (cdr toks))) (v (car p)) (r (cdr p))) + (cons (- 0 v) r))) + ((%pp-punct? (car toks) 'bang) + (let* ((p (%pp-cx-unary (cdr toks))) (v (car p)) (r (cdr p))) + (cons (if (= v 0) 1 0) r))) + ((%pp-punct? (car toks) 'tilde) + (let* ((p (%pp-cx-unary (cdr toks))) (v (car p)) (r (cdr p))) + (cons (bit-not v) r))) + (else (%pp-cx-primary toks)))) + +(define (%pp-cx-primary toks) + (cond + ((null? toks) (die #f "#if: expected expression")) + ((%pp-int? (car toks)) (cons (tok-value (car toks)) (cdr toks))) + ((eq? (tok-kind (car toks)) 'CHAR) + (cons (tok-value (car toks)) (cdr toks))) + ((%pp-punct? (car toks) 'lparen) + (let* ((p (%pp-cx-expr (cdr toks))) (v (car p)) (r (cdr p))) + (cond + ((or (null? r) (not (%pp-punct? (car r) 'rparen))) + (die (if (null? r) #f (tok-loc (car r))) "#if: missing ')'")) + (else (cons v (cdr r)))))) + (else (die (tok-loc (car toks)) "#if: unexpected token" (tok-kind (car toks)))))) +;; cc/cg.scm — codegen state and emission API. +;; Realization of docs/CC-INTERNALS.md §cg.scm. +;; Conversion split per CC-CONTRACTS §4: parse owns promotion etc; +;; cg owns sign extension, signed/unsigned dispatch, pointer scaling. +;; +;; Output uses libp1pp's structured macros (%fn, %ifelse_nez, +;; %loop_tag, %break, %continue) per docs/LIBP1PP.md. +;; +;; Frame layout (CC-CONTRACTS §3): +;; [sp + 0 .. staging*8) outgoing-arg staging +;; [sp + staging*8 ..) locals + spilled vstack values +;; Slot offsets are emitted symbolically as `(+ %<fn>__SO N)` so the +;; staging size, only known at fn-end, can be filled in via a 0-arg +;; M1pp macro `<fn>__SO` defined just before the `%fn(...)` block. + +(define (%cg-emit-buf cg) + (cond ((cg-in-fn? cg) (cg-fn-buf cg)) (else (cg-text cg)))) + +(define (%cg-emit cg bv) + (buf-push! (%cg-emit-buf cg) bv)) + +(define (%cg-emit-many cg bvs) + (for-each (lambda (b) (%cg-emit cg b)) bvs)) + +(define (%n n) (number->string n 10)) + +;; Per-fn metadata (name, ret-slot, ret-type, switch-case lists, ...) +;; lives on cg-fn-meta, reset at every cg-fn-begin/v. Keeping it off +;; cg-globals means cg-globals only mutates when the user emits a real +;; global, which is what parse-fn-body's rewind-safety check needs. +(define (%cg-fn-set! cg key val) + (cg-fn-meta-set! cg (alist-update key (lambda (_) val) (cg-fn-meta cg)))) + +(define (%cg-fn-get cg key) (alist-ref/eq key (cg-fn-meta cg))) + +(define (%cg-fresh-label cg prefix) + (let* ((n (cg-label-ctr cg)) + (bv (bytevector-append prefix (%n n)))) + (cg-label-ctr-set! cg (+ n 1)) + bv)) + +(define (%cg-fresh-loop-tag cg) (%cg-fresh-label cg "L")) +(define (%cg-fresh-lbl cg) (%cg-fresh-label cg "lbl_")) + +(define (%cg-bump-outgoing! cg n) + (if (< (cg-max-outgoing cg) n) (cg-max-outgoing-set! cg n) 0)) + +(define (%cg-slot-expr cg logical-off) + (let ((nm (%cg-fn-get cg '%fn-name))) + (bv-cat (list "(+ %" nm "__SO " (%n logical-off) ")")))) + +(define (%cg-mangle-global name-bv) + (bytevector-append "cc__" name-bv)) + +(define (%cg-reg->bv r) (symbol->string r)) + +(define (%cg-emit-li cg reg n) + (%cg-emit-many cg (list "%li(" (%cg-reg->bv reg) ", " (%n n) ")\n"))) + +(define (%cg-emit-la cg reg label-bv) + (%cg-emit-many cg (list "%la(" (%cg-reg->bv reg) ", &" label-bv ")\n"))) + +(define (%cg-emit-ld-slot cg reg logical-off) + (%cg-emit-many cg (list "%ld(" (%cg-reg->bv reg) ", sp, " + (%cg-slot-expr cg logical-off) ")\n"))) + +(define (%cg-emit-st-slot cg reg logical-off) + (%cg-emit-many cg (list "%st(" (%cg-reg->bv reg) ", sp, " + (%cg-slot-expr cg logical-off) ")\n"))) + +(define (%cg-emit-ld cg reg base off) + (%cg-emit-many cg (list "%ld(" (%cg-reg->bv reg) ", " + (%cg-reg->bv base) ", " (%n off) ")\n"))) + +(define (%cg-emit-st cg reg base off) + (%cg-emit-many cg (list "%st(" (%cg-reg->bv reg) ", " + (%cg-reg->bv base) ", " (%n off) ")\n"))) + +;; Width-aware load/store. Dispatches on ctype-size: +;; 1: %lb / %sb (LB zero-extends; for signed i8 we sign-extend by +;; shli/sari 56 to materialize the canonical 64-bit form). +;; 2/4: byte-decomposed (P1 has only 1-byte and 8-byte memory ops, +;; and word ops require natural alignment which we can't promise +;; for struct fields or non-word-aligned local slots). Loads +;; gather bytes via %lb + shli/or; stores scatter via shri/%sb. +;; Signed loads (i16/i32) sign-extend via shli/sari to canonical +;; 64-bit form. +;; 8 (or anything else for now): %ld / %st. +;; Scratch convention: helpers may clobber t1; callers never pass +;; reg=t1. + +(define (%cg-emit-ldN-bytes cg reg base-bv off-expr-fn n-bytes) + ;; Emit n-bytes %lb gathers into reg with shift+OR. byte 0 is low. + ;; off-expr-fn is a procedure: (off-expr-fn k) returns the bv + ;; expression for offset k. + (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", " base-bv ", " + (off-expr-fn 0) ")\n")) + (let loop ((k 1)) + (cond + ((= k n-bytes) 0) + (else + (%cg-emit-many cg (list + "%lb(t1, " base-bv ", " (off-expr-fn k) ")\n" + "%shli(t1, t1, " (%n (* 8 k)) ")\n" + "%or(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", t1)\n")) + (loop (+ k 1)))))) + +(define (%cg-emit-stN-bytes cg reg base-bv off-expr-fn n-bytes) + ;; Emit n-bytes %sb scatters from reg via shri-shifted t1. + (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", " base-bv ", " + (off-expr-fn 0) ")\n")) + (let loop ((k 1)) + (cond + ((= k n-bytes) 0) + (else + (%cg-emit-many cg (list + "%shri(t1, " (%cg-reg->bv reg) ", " (%n (* 8 k)) ")\n" + "%sb(t1, " base-bv ", " (off-expr-fn k) ")\n")) + (loop (+ k 1)))))) + +(define (%cg-emit-sext cg reg shift-amount) + (%cg-emit-many cg (list + "%shli(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", " + (%n shift-amount) ")\n" + "%sari(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", " + (%n shift-amount) ")\n"))) + +(define (%cg-emit-ld-slot-typed cg reg ctype logical-off) + (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype)) + (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k))))) + (cond + ((= sz 1) + (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", sp, " + (off-fn 0) ")\n")) + (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56)))) + ((= sz 2) + (%cg-emit-ldN-bytes cg reg "sp" off-fn 2) + (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48)))) + ((= sz 4) + (%cg-emit-ldN-bytes cg reg "sp" off-fn 4) + (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32)))) + (else (%cg-emit-ld-slot cg reg logical-off))))) + +(define (%cg-emit-st-slot-typed cg reg ctype logical-off) + (let* ((sz (ctype-size ctype)) + (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k))))) + (cond + ((= sz 1) + (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", sp, " + (off-fn 0) ")\n"))) + ((= sz 2) (%cg-emit-stN-bytes cg reg "sp" off-fn 2)) + ((= sz 4) (%cg-emit-stN-bytes cg reg "sp" off-fn 4)) + (else (%cg-emit-st-slot cg reg logical-off))))) + +(define (%cg-emit-ld-typed cg reg ctype base off) + (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype)) + (base-bv (%cg-reg->bv base)) + (off-fn (lambda (k) (%n (+ off k))))) + (cond + ((= sz 1) + (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", " + base-bv ", " (off-fn 0) ")\n")) + (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56)))) + ((= sz 2) + (%cg-emit-ldN-bytes cg reg base-bv off-fn 2) + (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48)))) + ((= sz 4) + (%cg-emit-ldN-bytes cg reg base-bv off-fn 4) + (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32)))) + (else (%cg-emit-ld cg reg base off))))) + +(define (%cg-emit-st-typed cg reg ctype base off) + (let* ((sz (ctype-size ctype)) + (base-bv (%cg-reg->bv base)) + (off-fn (lambda (k) (%n (+ off k))))) + (cond + ((= sz 1) + (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", " + base-bv ", " (off-fn 0) ")\n"))) + ((= sz 2) (%cg-emit-stN-bytes cg reg base-bv off-fn 2)) + ((= sz 4) (%cg-emit-stN-bytes cg reg base-bv off-fn 4)) + (else (%cg-emit-st cg reg base off))))) + +(define (%cg-load-opnd-into cg op reg) + ;; frame lval: load at type width. frame rval is a spilled word + ;; (alloc-slot 8 8 in %cg-spill-reg) — always 8-byte load. + ;; global lval width > 1 byte-gathers must not alias dest with base — + ;; the first %lb would otherwise clobber the address before subsequent + ;; byte loads. Stage the address in t2. + (pmatch op + (($ opnd? (kind imm) (ext ,n)) (%cg-emit-li cg reg n)) + (($ opnd? (kind frame) (lval? #t) (type ,ty) (ext ,off)) + (%cg-emit-ld-slot-typed cg reg ty off)) + (($ opnd? (kind frame) (ext ,off)) (%cg-emit-ld-slot cg reg off)) + (($ opnd? (kind global) (lval? #f) (ext ,lbl)) (%cg-emit-la cg reg lbl)) + (($ opnd? (kind global) (type ,ty) (ext ,lbl)) + (%cg-emit-la cg 't2 lbl) + (%cg-emit-ld-typed cg reg ty 't2 0)) + (else (die #f "cg internal: unknown opnd-kind" (opnd-kind op))))) + +(define (%cg-spill-reg cg reg ty) + (let* ((off (cg-alloc-slot cg 8 8)) + (op (%opnd 'frame ty off #f))) + (%cg-emit-st-slot cg reg off) + (cg-vstack-set! cg (cons op (cg-vstack cg))) + op)) + +(define (%ctype-ptr? t) + (let ((k (ctype-kind t))) + (if (eq? k 'ptr) #t (eq? k 'arr)))) + +(define (%ctype-pointee t) + (cond ((eq? (ctype-kind t) 'ptr) (ctype-ext t)) + ((eq? (ctype-kind t) 'arr) (car (ctype-ext t))) + (else #f))) + +(define (%ctype-unsigned? t) + (let ((k (ctype-kind t))) + (cond ((eq? k 'u8) #t) ((eq? k 'u16) #t) ((eq? k 'u32) #t) + ((eq? k 'u64) #t) ((eq? k 'bool) #t) + ((eq? k 'ptr) #t) ((eq? k 'arr) #t) ((eq? k 'fn) #t) + (else #f)))) + +(define (%ctype-size t) (ctype-size t)) + +(define (%reg-by-idx i) + (cond ((= i 0) 'a0) ((= i 1) 'a1) ((= i 2) 'a2) ((= i 3) 'a3) + (else (die #f "cg: param idx > 3 needs ldarg path" i)))) + +;; -------------------------------------------------------------------- +;; Lifecycle +;; -------------------------------------------------------------------- + +(define (cg-init) + (%cg (make-buf/cap %BUF-CAP-TEXT) ; text + (make-buf/cap %BUF-CAP-DATA) ; data + (make-buf/cap %BUF-CAP-BSS) ; bss + '() ; vstack + 0 ; frame-hi + 0 ; label-ctr + '() ; str-pool + '() ; globals + '() ; fn-meta + (make-buf/cap %BUF-CAP-FN) ; fn-buf (reused per fn) + (make-buf/cap %BUF-CAP-PROLOGUE) ; prologue-buf (reused per fn) + 0 ; max-outgoing + #f)) ; in-fn? + +(define (cg-finish cg) + ;; Entry stub. P1's program-entry contract (docs/P1.md §Program Entry) + ;; delivers argc in a0 and argv in a1 at p1_main. %call doesn't + ;; clobber a0/a1, so falling straight through to cc__main forwards + ;; them unchanged. The 16-byte frame is just enough for %enter's + ;; saved-fp/lr to fit; cc__main builds its own frame on top. + ;; (CC-CONTRACTS §J.1, §5.4.) + (let ((tb (cg-text cg))) + (buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to cc__main\n") + (buf-push! tb "%fn(p1_main, 16, {\n") + (buf-push! tb "%call(&cc__main)\n") + (buf-push! tb "})\n")) + ;; Every P1pp translation unit must end with :ELF_end so the ELF + ;; header can compute file-size and ph_memsz boundaries. + (bv-cat (list (buf-flush (cg-text cg)) + (buf-flush (cg-data cg)) + (buf-flush (cg-bss cg)) + ":ELF_end\n"))) + +(define (cg-fn-begin cg name params return-type) + (cg-fn-begin/v cg name params return-type #f)) + +;; Variadic-aware variant. variadic? = #t reserves 16 contiguous 8-byte +;; slots covering incoming arg indices 0..15, populating each from the +;; appropriate source: a-register for idx 0..3, LDARG slot (idx-4) for +;; idx 4..15. va_start computes the address of the slot at index = +;; named-arg count, so va_arg walks linearly through the rest. +;; Indices 4..15 may be garbage when the caller passed fewer args; user +;; code stops walking based on a count or sentinel before those slots +;; are read. Limit of 15 variadic args (after named) is enough for +;; tcc.c's logging shapes; bump VARARG_WINDOW if you need more. +(define (cg-fn-begin/v cg name params return-type variadic?) + (buf-reset! (cg-fn-buf cg)) + (buf-reset! (cg-prologue-buf cg)) + (cg-in-fn?-set! cg #t) + (cg-vstack-set! cg '()) + (cg-frame-hi-set! cg 0) + ;; cg-label-ctr is NOT reset per-fn. Loop tags emit single-colon + ;; (global) labels via libp1pp's %loop_tag macro (`:L0_top`, + ;; `:L0_end`) — see P1/P1pp.P1pp:loop_tag — so two functions both + ;; using L0 would produce duplicate global labels and break linking. + ;; Switch dispatch labels (`sw_disp_L<N>`) inherit the same tag and + ;; are also single-colon. Keeping the counter monotonic across + ;; functions guarantees uniqueness without needing to mangle. + (cg-max-outgoing-set! cg 0) + (cg-fn-meta-set! cg '()) + (%cg-fn-set! cg '%fn-name name) + (%cg-fn-set! cg '%fn-ret-type return-type) + (%cg-fn-set! cg '%indirect-slots '()) + (%cg-fn-set! cg '%fn-variadic? variadic?) + (let ((ret-slot (cg-alloc-slot cg 8 8))) + (%cg-fn-set! cg '%fn-ret-slot ret-slot) + (cond + ((not (eq? (ctype-kind return-type) 'void)) + (buf-push! (cg-prologue-buf cg) + (bv-cat (list "%li(t0, 0)\n" + "%st(t0, sp, " + (%cg-slot-expr cg ret-slot) ")\n")))))) + ;; params per CC-CONTRACTS §3.1: list of (name-bv . ctype). We + ;; return an alist (name-bv . sym) the parser binds into scope. + (let walk ((ps params) (idx 0) (out '()) (first-slot #f)) + (cond + ((null? ps) + (cond + (variadic? + ;; Pad the incoming-arg window out to 16 slots. For idx 0..3 + ;; the slot is filled from a-register; for idx 4..15 from + ;; LDARG slot (idx-4). va_start points at the slot whose + ;; index equals the named-arg count, and va_arg walks + ;; linearly from there through the rest of the window. + (let pad ((i idx) (vfirst #f) (fs first-slot)) + (cond + ((>= i 16) + ;; If named-arg count was 0, vfirst is the very first + ;; slot of the save area (= fs). + (%cg-fn-set! cg '%fn-vararg-first-slot + (or vfirst fs)) + (reverse out)) + (else + (let ((off (cg-alloc-slot cg 8 8))) + (cond + ((< i 4) + (let ((ar (%reg-by-idx i))) + (buf-push! (cg-prologue-buf cg) + (bv-cat (list "%st(" (%cg-reg->bv ar) + ", sp, " + (%cg-slot-expr cg off) ")\n"))))) + (else + (buf-push! (cg-prologue-buf cg) + (bv-cat (list "%ldarg(t0, " (%n (- i 4)) ")\n" + "%st(t0, sp, " + (%cg-slot-expr cg off) ")\n"))))) + (pad (+ i 1) + (or vfirst off) + (or fs off))))))) + (else (reverse out)))) + (else + (let* ((p (car ps)) + (nm (car p)) + (ty (cdr p)) + (off (cg-alloc-slot cg 8 8)) + (psym (%sym nm 'param #f ty off))) + (cond + ((< idx 4) + (let ((ar (%reg-by-idx idx))) + (buf-push! (cg-prologue-buf cg) + (bv-cat (list "%st(" (%cg-reg->bv ar) + ", sp, " (%cg-slot-expr cg off) ")\n"))))) + (else + (buf-push! (cg-prologue-buf cg) + (bv-cat (list "%ldarg(t0, " (%n (- idx 4)) ")\n" + "%st(t0, sp, " (%cg-slot-expr cg off) ")\n"))))) + (walk (cdr ps) (+ idx 1) (cons (cons nm psym) out) + (or first-slot off))))))) + +(define (cg-fn-end cg) + ;; Drain prologue-buf and fn-buf directly into cg-text via buf-drain! + ;; (memcpy, no allocation). Header/footer pieces go through buf-push! + ;; on cg-text — also memcpy. Net result: zero net heap allocation in + ;; cg-fn-end other than the small (%n N) bvs for staging-bytes / + ;; frame-size, which the surrounding parse-fn-body's heap-rewind! + ;; reclaims. + (let* ((name (%cg-fn-get cg '%fn-name)) + (ret-slot (%cg-fn-get cg '%fn-ret-slot)) + (ret-type (%cg-fn-get cg '%fn-ret-type)) + (locals-hi (cg-frame-hi cg)) + (staging-bytes (* 8 (cg-max-outgoing cg))) + (raw-size (+ staging-bytes locals-hi)) + (frame-size (align-up raw-size 16)) + (mangled (%cg-mangle-global name)) + (tb (cg-text cg))) + ;; Now that the body is fully emitted, leave fn dispatch so any + ;; trailing emits in this function (including the ret-block below) + ;; route to cg-text directly. + (cg-in-fn?-set! cg #f) + ;; staging-size macro + (buf-push! tb "%macro ") + (buf-push! tb name) + (buf-push! tb "__SO()\n") + (buf-push! tb (%n staging-bytes)) + (buf-push! tb "\n%endm\n") + ;; %fn header + (buf-push! tb "%fn(") + (buf-push! tb mangled) + (buf-push! tb ", ") + (buf-push! tb (%n frame-size)) + (buf-push! tb ", {\n") + ;; prologue + body, drained byte-for-byte + (buf-drain! tb (cg-prologue-buf cg)) + (buf-drain! tb (cg-fn-buf cg)) + ;; ret block + (buf-push! tb "::ret\n") + (cond + ((eq? (ctype-kind ret-type) 'void) + (buf-push! tb "%li(a0, 0)\n")) + (else + (buf-push! tb "%ld(a0, sp, ") + (buf-push! tb (%cg-slot-expr cg ret-slot)) + (buf-push! tb ")\n"))) + (buf-push! tb "})\n") + (cg-vstack-set! cg '()) + (cg-frame-hi-set! cg 0) + (cg-max-outgoing-set! cg 0) + 0)) + +;; -------------------------------------------------------------------- +;; Vstack +;; -------------------------------------------------------------------- +(define (cg-push cg op) + (cg-vstack-set! cg (cons op (cg-vstack cg))) + op) + +(define (cg-pop cg) + (let ((s (cg-vstack cg))) + (cond ((null? s) (die #f "cg-pop: empty vstack")) + (else (cg-vstack-set! cg (cdr s)) (car s))))) + +(define (cg-top cg) + (let ((s (cg-vstack cg))) + (cond ((null? s) (die #f "cg-top: empty vstack")) (else (car s))))) + +(define (cg-depth cg) (length (cg-vstack cg))) + +;; Duplicate the top vstack entry. For lvals this is safe — the slot +;; (or label, or indirect-marked frame) backing the lval keeps existing +;; until the function ends. For rvals it duplicates the descriptor of +;; the spilled value; both copies refer to the same already-emitted +;; storage. CC-CONTRACTS §4.1: used for `lhs += rhs` and `++lhs` to +;; preserve the lhs across a `cg-load` so the subsequent `cg-assign` +;; still has its address. +(define (cg-dup cg) + (let ((p (cg-top cg))) (cg-push cg p) p)) + +;; -------------------------------------------------------------------- +;; Materialize +;; -------------------------------------------------------------------- +(define (cg-push-imm cg ctype value) + (cg-push cg (%opnd 'imm ctype value #f))) + +(define (cg-push-string cg bv-content) + (let* ((label (cg-intern-string cg bv-content)) + (cp-ty (%ctype 'ptr 8 8 %t-i8))) + (cg-push cg (%opnd 'global cp-ty label #f)))) + +(define (cg-push-sym cg sym) + (pmatch sym + (($ sym? (kind fn) (type ,ty) (name ,nm)) + (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #f))) + (($ sym? (kind enum-const) (type ,ty) (slot ,v)) + (cg-push cg (%opnd 'imm ty v #f))) + (($ sym? (kind var) (storage extern) (type ,ty) (name ,nm)) + (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #t))) + (($ sym? (kind var) (storage static) (type ,ty) (name ,nm)) + (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #t))) + (($ sym? (kind var) (type ,ty) (slot ,off)) + (cg-push cg (%opnd 'frame ty off #t))) + (($ sym? (kind param) (type ,ty) (slot ,off)) + (cg-push cg (%opnd 'frame ty off #t))) + (else (die #f "cg-push-sym: unsupported sym-kind" (sym-kind sym))))) + +;; A cg-push-deref result is a frame-lval whose slot HOLDS THE ADDRESS +;; (not the value). To distinguish from ordinary frame-lvals (whose +;; slot holds the value directly), we tag indirect slots in +;; %indirect-slots so cg-load and cg-assign can do the extra +;; indirection. +(define (%cg-mark-indirect! cg off) + (let ((cur (or (%cg-fn-get cg '%indirect-slots) '()))) + (%cg-fn-set! cg '%indirect-slots (cons off cur)))) + +(define (%cg-indirect? cg off) + (let ((cur (or (%cg-fn-get cg '%indirect-slots) '()))) + (let loop ((xs cur)) + (cond ((null? xs) #f) ((= (car xs) off) #t) (else (loop (cdr xs))))))) + +(define (cg-push-deref cg) + (let* ((p (cg-pop cg)) + (pt (opnd-type p)) + (pe (cond ((eq? (ctype-kind pt) 'ptr) (ctype-ext pt)) + ((eq? (ctype-kind pt) 'arr) (car (ctype-ext pt))) + (else #f)))) + (cond + ((not pe) (die #f "cg-push-deref: not a pointer" pt)) + (else + (%cg-load-opnd-into cg p 't0) + (let ((off (cg-alloc-slot cg 8 8))) + (%cg-emit-st-slot cg 't0 off) + (%cg-mark-indirect! cg off) + (cg-push cg (%opnd 'frame pe off #t))))))) + +;; -------------------------------------------------------------------- +;; Aggregate field access (§D.1–D.4) +;; -------------------------------------------------------------------- +;; cg-push-field cg fname: +;; pop a struct/union lval; look up `fname` in the struct's fields +;; list (data.scm: ext = (tag complete? fields), where each field +;; is (name-bv ctype offset)); push a new lval at the field's +;; offset with the field's ctype. +;; +;; Three input cases: +;; - direct frame lval at slot `off` -> frame lval at off+fo +;; - indirect frame lval (slot holds addr) -> new indirect slot for +;; addr+fo +;; - global lval at label L -> indirect slot for +;; la(L)+fo +;; In all cases the resulting lval has the field's ctype. + +(define (%cg-find-field fields fname) + (let loop ((xs fields)) + (cond + ((null? xs) #f) + ((bv= (car (car xs)) fname) (car xs)) + (else (loop (cdr xs)))))) + +(define (cg-push-field cg fname) + (let* ((s (cg-pop cg)) + (sty (opnd-type s)) + (k (ctype-kind sty))) + (cond + ((not (or (eq? k 'struct) (eq? k 'union))) + (die #f "cg-push-field: not a struct/union" k)) + ((not (opnd-lval? s)) + (die #f "cg-push-field: not an lvalue" k)) + (else + (let* ((fields (car (cddr (ctype-ext sty)))) + (f (%cg-find-field fields fname))) + (cond + ((not f) (die #f "cg-push-field: no such field" fname)) + (else + (let* ((fty (cadr f)) (fo (car (cddr f)))) + (pmatch s + ;; direct frame lval: just shift the slot offset. + (($ opnd? (kind frame) (ext ,off)) + (guard (not (%cg-indirect? cg off))) + (cg-push cg (%opnd 'frame fty (+ off fo) #t))) + ;; indirect frame lval: addr lives in the slot. Compute + ;; addr+fo into a new indirect slot. + (($ opnd? (kind frame) (ext ,off)) + (%cg-emit-ld-slot cg 't0 off) + (cond + ((> fo 0) + (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n")))) + (let ((no (cg-alloc-slot cg 8 8))) + (%cg-emit-st-slot cg 't0 no) + (%cg-mark-indirect! cg no) + (cg-push cg (%opnd 'frame fty no #t)))) + ;; global lval: load addr, add offset, indirect slot. + (($ opnd? (kind global) (ext ,lbl)) + (%cg-emit-la cg 't0 lbl) + (cond + ((> fo 0) + (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n")))) + (let ((no (cg-alloc-slot cg 8 8))) + (%cg-emit-st-slot cg 't0 no) + (%cg-mark-indirect! cg no) + (cg-push cg (%opnd 'frame fty no #t)))) + (else + (die #f "cg-push-field: unsupported lval kind" + (opnd-kind s)))))))))))) + +;; cg-decay-array: +;; if top of vstack is an arr-typed lval, replace it with a ptr-rval +;; to the first element. C arrays decay to T* in most contexts; +;; parse calls this before rval-style operations. No-op otherwise. +(define (cg-decay-array cg) + (let ((tp (cg-top cg))) + (cond + ((and (opnd-lval? tp) (eq? (ctype-kind (opnd-type tp)) 'arr)) + (let* ((p (cg-pop cg)) + (et (car (ctype-ext (opnd-type p)))) + (pty (%ctype 'ptr 8 8 et))) + (pmatch p + ;; direct frame lval: address is sp+off. + (($ opnd? (kind frame) (ext ,off)) + (guard (not (%cg-indirect? cg off))) + (%cg-emit-many cg (list "%mov(t0, sp)\n" + "%addi(t0, t0, " + (%cg-slot-expr cg off) ")\n")) + (%cg-spill-reg cg 't0 pty)) + ;; indirect frame lval (rare for arrays, but support it): + ;; the slot holds the address already. + (($ opnd? (kind frame) (ext ,off)) + (%cg-emit-ld-slot cg 't0 off) + (%cg-spill-reg cg 't0 pty)) + ;; global array: la(label) is the address. + (($ opnd? (kind global) (ext ,lbl)) + (%cg-emit-la cg 't0 lbl) + (%cg-spill-reg cg 't0 pty)) + (else (die #f "cg-decay-array: unsupported lval kind" + (opnd-kind p)))))) + (else tp)))) + +;; -------------------------------------------------------------------- +;; Address & deref +;; -------------------------------------------------------------------- +(define (cg-take-addr cg) + (let* ((p (cg-pop cg)) + (ty (opnd-type p)) + ;; &arr yields T(*)[N] per strict C. Pointer arithmetic on + ;; the result scales by sizeof(T[N]) (the whole array), so + ;; &arr + 1 is one-past-end. Array-to-pointer decay happens + ;; on use via cg-decay-array, not at the & operator. + (pty (%ctype 'ptr 8 8 ty))) + (pmatch p + (($ opnd? (lval? #f)) (die #f "cg-take-addr: not an lvalue")) + ;; The address itself lives at sp+slot — &*p degenerates to p. + (($ opnd? (kind frame) (ext ,off)) + (guard (%cg-indirect? cg off)) + (%cg-emit-ld-slot cg 't0 off) + (%cg-spill-reg cg 't0 pty)) + ;; %mov(rd, sp) gives the portable-sp pointer (the backend + ;; handles any hidden frame-header offset). Then add slot. + (($ opnd? (kind frame) (ext ,off)) + (%cg-emit-many cg (list "%mov(t0, sp)\n" + "%addi(t0, t0, " + (%cg-slot-expr cg off) ")\n")) + (%cg-spill-reg cg 't0 pty)) + (($ opnd? (kind global) (ext ,lbl)) + (%cg-emit-la cg 't0 lbl) + (%cg-spill-reg cg 't0 pty)) + (else (die #f "cg-take-addr: non-addressable" (opnd-kind p)))))) + +(define (cg-load cg) + (let* ((p (cg-pop cg)) (ty (opnd-type p))) + (cond + ((not (opnd-lval? p)) (die #f "cg-load: not an lvalue")) + ;; Array lvalues decay to a ptr-rval addressing the first + ;; element (C array-to-pointer decay). We push the lval back + ;; and route through cg-decay-array for a single source of truth. + ((eq? (ctype-kind ty) 'arr) + (cg-push cg p) (cg-decay-array cg)) + ((and (eq? (opnd-kind p) 'frame) + (%cg-indirect? cg (opnd-ext p))) + ;; Indirect frame-lval: slot holds the address. Stage the + ;; address in t2 so multi-byte gathers don't alias dest with + ;; base. + (%cg-emit-ld-slot cg 't2 (opnd-ext p)) + (%cg-emit-ld-typed cg 't0 ty 't2 0) + (%cg-spill-reg cg 't0 ty)) + (else (%cg-load-opnd-into cg p 't0) (%cg-spill-reg cg 't0 ty))))) + +;; -------------------------------------------------------------------- +;; Type conversions +;; -------------------------------------------------------------------- +(define (cg-cast cg to-type) + (let* ((p (cg-pop cg)) + (from-ty (opnd-type p)) + (from-sz (%ctype-size from-ty)) + (to-sz (%ctype-size to-type)) + (to-kind (ctype-kind to-type))) + (cond + ((eq? to-kind 'bool) + (%cg-load-opnd-into cg p 't0) + (%cg-emit-many cg (list + "%ifelse_eqz(t0, { %li(t0, 0) }, { %li(t0, 1) })\n")) + (%cg-spill-reg cg 't0 to-type)) + ((or (eq? to-kind 'ptr) + (and (or (eq? to-kind 'i64) (eq? to-kind 'u64)) + (or (eq? (ctype-kind from-ty) 'ptr) + (eq? (ctype-kind from-ty) 'arr)))) + (cg-push cg (%opnd (opnd-kind p) to-type (opnd-ext p) (opnd-lval? p)))) + ((>= to-sz from-sz) + (cg-push cg (%opnd (opnd-kind p) to-type (opnd-ext p) (opnd-lval? p)))) + (else + ;; Narrowing cast. Signed targets (i8/i16/i32) shli/sari to + ;; truncate-and-sign-extend in one step, so the slot holds the + ;; canonical 64-bit form and a subsequent widening cast (which + ;; is relabel-only) restores the value. Unsigned targets mask + ;; off high bits to zero-extend. + (%cg-load-opnd-into cg p 't0) + (cond + ((eq? to-kind 'i8) (%cg-emit-sext cg 't0 56)) + ((eq? to-kind 'i16) (%cg-emit-sext cg 't0 48)) + ((eq? to-kind 'i32) (%cg-emit-sext cg 't0 32)) + ((= to-sz 1) (%cg-emit-many cg (list "%andi(t0, t0, 255)\n"))) + ((= to-sz 2) + (%cg-emit-many cg (list "%li(t1, 65535)\n%and(t0, t0, t1)\n"))) + ((= to-sz 4) + (%cg-emit-many cg (list "%li(t1, 4294967295)\n%and(t0, t0, t1)\n"))) + (else 0)) + (%cg-spill-reg cg 't0 to-type))))) + +(define (cg-promote cg) + (let* ((p (cg-pop cg)) + (ty (opnd-type p)) + (sz (%ctype-size ty))) + (cond + ((< sz 4) + (cond + ((%ctype-unsigned? ty) + (cg-push cg (%opnd (opnd-kind p) %t-u32 (opnd-ext p) (opnd-lval? p)))) + (else + (cg-push cg (%opnd (opnd-kind p) %t-i32 (opnd-ext p) (opnd-lval? p)))))) + (else (cg-push cg p))))) + +(define (cg-arith-conv cg) + ;; Usual arithmetic conversions. CC-CONTRACTS §4.2: applies to + ;; arithmetic operands. When either operand is a pointer (or array, + ;; which behaves as a pointer in arithmetic), the pair is a + ;; pointer-arith case — leave the types alone so cg-binop can detect + ;; the ptr operand and apply the right scaling. + (let* ((b (cg-pop cg)) + (a (cg-pop cg)) + (ta (opnd-type a)) + (tb (opnd-type b)) + (sa (%ctype-size ta)) + (sb (%ctype-size tb))) + (cond + ;; Pointer/array arithmetic: leave types alone so cg-binop's + ;; ptr-aware add/sub branch fires with the correct pointee type + ;; (and doesn't see two pointers, which would skip scaling). + ((or (%ctype-ptr? ta) (%ctype-ptr? tb)) + (cg-push cg a) + (cg-push cg b)) + (else + (let ((common (cond + ((> sa sb) ta) + ((> sb sa) tb) + ((%ctype-unsigned? ta) ta) + ((%ctype-unsigned? tb) tb) + (else ta)))) + (cg-push cg (%opnd (opnd-kind a) common (opnd-ext a) (opnd-lval? a))) + (cg-push cg (%opnd (opnd-kind b) common (opnd-ext b) (opnd-lval? b)))))))) + +;; -------------------------------------------------------------------- +;; Operators +;; -------------------------------------------------------------------- +(define (%cg-emit-rrr cg op rd ra rb) + (%cg-emit-many cg (list "%" op "(" (%cg-reg->bv rd) ", " + (%cg-reg->bv ra) ", " (%cg-reg->bv rb) ")\n"))) + +(define (%cg-emit-cmp cg cc ra rb rd) + (%cg-emit-many cg (list "%ifelse_" cc "(" + (%cg-reg->bv ra) ", " (%cg-reg->bv rb) + ", { %li(" (%cg-reg->bv rd) ", 1) }, " + "{ %li(" (%cg-reg->bv rd) ", 0) })\n"))) + +(define (cg-binop cg op) + (let* ((b (cg-pop cg)) + (a (cg-pop cg)) + (ta (opnd-type a)) + (tb (opnd-type b)) + (unsigned? (or (%ctype-unsigned? ta) (%ctype-unsigned? tb))) + (a-ptr? (%ctype-ptr? ta)) + (b-ptr? (%ctype-ptr? tb)) + (result-ty + (cond + ((or (eq? op 'eq) (eq? op 'ne) + (eq? op 'lt) (eq? op 'le) (eq? op 'gt) (eq? op 'ge)) + %t-i32) + ((and a-ptr? b-ptr? (eq? op 'sub)) %t-i64) + (a-ptr? ta) + (b-ptr? tb) + (else ta)))) + (cond + ((and a-ptr? (or (eq? op 'add) (eq? op 'sub)) (not b-ptr?)) + (%cg-load-opnd-into cg a 'a0) + (%cg-load-opnd-into cg b 'a1) + (let ((sz (%ctype-size (%ctype-pointee ta)))) + (cond ((> sz 1) (%cg-emit-many cg (list "%li(t0, " (%n sz) ")\n")) + (%cg-emit-rrr cg "mul" 'a1 'a1 't0)) + (else 0))) + (%cg-emit-rrr cg (if (eq? op 'add) "add" "sub") 't0 'a0 'a1) + (%cg-spill-reg cg 't0 result-ty)) + ((and b-ptr? (eq? op 'add) (not a-ptr?)) + (%cg-load-opnd-into cg a 'a0) + (%cg-load-opnd-into cg b 'a1) + (let ((sz (%ctype-size (%ctype-pointee tb)))) + (cond ((> sz 1) (%cg-emit-many cg (list "%li(t0, " (%n sz) ")\n")) + (%cg-emit-rrr cg "mul" 'a0 'a0 't0)) + (else 0))) + (%cg-emit-rrr cg "add" 't0 'a0 'a1) + (%cg-spill-reg cg 't0 result-ty)) + ((and a-ptr? b-ptr? (eq? op 'sub)) + (%cg-load-opnd-into cg a 'a0) + (%cg-load-opnd-into cg b 'a1) + (%cg-emit-rrr cg "sub" 't0 'a0 'a1) + (let ((sz (%ctype-size (%ctype-pointee ta)))) + (cond ((> sz 1) (%cg-emit-many cg (list "%li(t1, " (%n sz) ")\n")) + (%cg-emit-rrr cg "div" 't0 't0 't1)) + (else 0))) + (%cg-spill-reg cg 't0 result-ty)) + (else + (%cg-load-opnd-into cg a 'a0) + (%cg-load-opnd-into cg b 'a1) + (cond + ((eq? op 'add) (%cg-emit-rrr cg "add" 't0 'a0 'a1)) + ((eq? op 'sub) (%cg-emit-rrr cg "sub" 't0 'a0 'a1)) + ((eq? op 'mul) (%cg-emit-rrr cg "mul" 't0 'a0 'a1)) + ((eq? op 'and) (%cg-emit-rrr cg "and" 't0 'a0 'a1)) + ((eq? op 'or) (%cg-emit-rrr cg "or" 't0 'a0 'a1)) + ((eq? op 'xor) (%cg-emit-rrr cg "xor" 't0 'a0 'a1)) + ((eq? op 'shl) (%cg-emit-rrr cg "shl" 't0 'a0 'a1)) + ((eq? op 'shr) + (if unsigned? (%cg-emit-rrr cg "shr" 't0 'a0 'a1) + (%cg-emit-rrr cg "sar" 't0 'a0 'a1))) + ((eq? op 'div) (%cg-emit-rrr cg "div" 't0 'a0 'a1)) + ((eq? op 'rem) (%cg-emit-rrr cg "rem" 't0 'a0 'a1)) + ((eq? op 'eq) (%cg-emit-cmp cg "eq" 'a0 'a1 't0)) + ((eq? op 'ne) (%cg-emit-cmp cg "ne" 'a0 'a1 't0)) + ((eq? op 'lt) (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a0 'a1 't0)) + ((eq? op 'gt) (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a1 'a0 't0)) + ((eq? op 'le) + (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a1 'a0 't0) + (%cg-emit-many cg (list "%li(t1, 1)\n%xor(t0, t0, t1)\n"))) + ((eq? op 'ge) + (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a0 'a1 't0) + (%cg-emit-many cg (list "%li(t1, 1)\n%xor(t0, t0, t1)\n"))) + (else (die #f "cg-binop: unknown op" op))) + (%cg-spill-reg cg 't0 result-ty))))) + +;; Post-increment / post-decrement on the top-of-vstack lval. +;; Pushes the OLD value (per C semantics) and emits the +1 / -1 store. +;; Uses cg-dup + cg-load to capture the old rval (which is then in a +;; never-reused spill slot), then runs the regular dup+load+add+assign +;; pattern for the store. Pointer scaling falls out of cg-binop add. +(define (%cg-post-inc-dec cg op) + (cg-dup cg) + (cg-load cg) + (let ((old (cg-pop cg))) + (cg-dup cg) + (cg-load cg) + (cg-push-imm cg %t-i32 1) + (cg-binop cg op) + (cg-assign cg) + (cg-pop cg) + (cg-push cg old))) + +(define (cg-postinc cg) (%cg-post-inc-dec cg 'add)) +(define (cg-postdec cg) (%cg-post-inc-dec cg 'sub)) + +(define (cg-unop cg op) + (let* ((p (cg-pop cg)) (ty (opnd-type p))) + (%cg-load-opnd-into cg p 't0) + (cond + ((eq? op 'neg) + (%cg-emit-many cg (list "%li(t1, 0)\n%sub(t0, t1, t0)\n")) + (%cg-spill-reg cg 't0 ty)) + ((eq? op 'bnot) + (%cg-emit-many cg (list "%li(t1, -1)\n%xor(t0, t0, t1)\n")) + (%cg-spill-reg cg 't0 ty)) + ((eq? op 'lnot) + (%cg-emit-many cg (list "%ifelse_eqz(t0, { %li(t0, 1) }, { %li(t0, 0) })\n")) + (%cg-spill-reg cg 't0 %t-i32)) + (else (die #f "cg-unop: unknown op" op))))) + +(define (cg-assign cg) + ;; Pops rhs, pops lhs, casts rhs to lhs's type (parser cannot peek + ;; deeper than vstack top to do this itself — CC-CONTRACTS §4.2), + ;; emits the store, pushes the assigned value as the result rval. + (let* ((rhs0 (cg-pop cg)) + (lhs (cg-pop cg)) + (ty (opnd-type lhs))) + (cond ((not (opnd-lval? lhs)) (die #f "cg-assign: lhs not lvalue"))) + ;; Cast rhs to lhs's type (no-op when the types already match). + (cg-push cg rhs0) + (cg-cast cg ty) + (let ((rhs (cg-pop cg))) + (%cg-load-opnd-into cg rhs 'a0) + (pmatch lhs + (($ opnd? (kind frame) (ext ,off)) + (guard (%cg-indirect? cg off)) + (%cg-emit-ld-slot cg 't0 off) + (%cg-emit-st-typed cg 'a0 ty 't0 0)) + (($ opnd? (kind frame) (ext ,off)) + (%cg-emit-st-slot-typed cg 'a0 ty off)) + (($ opnd? (kind global) (ext ,lbl)) + (%cg-emit-la cg 't0 lbl) + (%cg-emit-st-typed cg 'a0 ty 't0 0)) + (else (die #f "cg-assign: unsupported lhs kind" (opnd-kind lhs)))) + (%cg-spill-reg cg 'a0 ty)))) + +;; -------------------------------------------------------------------- +;; Calls +;; -------------------------------------------------------------------- +(define (cg-call cg arity has-result?) + (let* ((args (let loop ((i 0) (acc '())) + (cond ((= i arity) acc) + (else (loop (+ i 1) (cons (cg-pop cg) acc)))))) + (fn-op (cg-pop cg))) + (let stage ((xs args) (idx 0)) + (cond + ((null? xs) 0) + ((< idx 4) + (%cg-load-opnd-into cg (car xs) (%reg-by-idx idx)) + (stage (cdr xs) (+ idx 1))) + (else + (%cg-load-opnd-into cg (car xs) 't0) + (%cg-emit-st cg 't0 'sp (* 8 (- idx 4))) + (stage (cdr xs) (+ idx 1))))) + (cond ((> arity 4) (%cg-bump-outgoing! cg (- arity 4))) (else 0)) + (cond + ((and (eq? (opnd-kind fn-op) 'global) (not (opnd-lval? fn-op))) + (%cg-emit-many cg (list "%call(&" (opnd-ext fn-op) ")\n"))) + (else + (%cg-load-opnd-into cg fn-op 't0) + (%cg-emit-many cg (list "%callr(t0)\n")))) + (cond + (has-result? + (let* ((fty (opnd-type fn-op)) + (rty (cond + ((eq? (ctype-kind fty) 'fn) (car (ctype-ext fty))) + ((eq? (ctype-kind fty) 'ptr) + (let ((p (ctype-ext fty))) + (if (eq? (ctype-kind p) 'fn) (car (ctype-ext p)) %t-i64))) + (else %t-i64)))) + (%cg-spill-reg cg 'a0 rty))) + (else #f)))) + +;; -------------------------------------------------------------------- +;; Return +;; -------------------------------------------------------------------- +(define (cg-return cg) + (let ((ret-slot (%cg-fn-get cg '%fn-ret-slot)) + (ret-type (%cg-fn-get cg '%fn-ret-type))) + (cond + ((eq? (ctype-kind ret-type) 'void) + (%cg-emit-many cg (list "%b(&::ret)\n"))) + (else + (let ((p (cg-pop cg))) + (%cg-load-opnd-into cg p 'a0) + (%cg-emit-st-slot cg 'a0 ret-slot) + (%cg-emit-many cg (list "%b(&::ret)\n"))))))) + +;; -------------------------------------------------------------------- +;; Structured control flow +;; -------------------------------------------------------------------- +(define (cg-if cg then-thunk) + (let ((p (cg-pop cg))) + (%cg-load-opnd-into cg p 't0) + (%cg-emit-many cg (list "%if_nez(t0, {\n")) + (then-thunk) + (%cg-emit-many cg (list "})\n")))) + +(define (cg-ifelse cg then-thunk else-thunk) + (let ((p (cg-pop cg))) + (%cg-load-opnd-into cg p 't0) + (%cg-emit-many cg (list "%ifelse_nez(t0, {\n")) + (then-thunk) + (%cg-emit-many cg (list "}, {\n")) + (else-thunk) + (%cg-emit-many cg (list "})\n")))) + +;; Conditionals-as-values: `cg-ifelse` is correct for if-statements +;; (thunks push nothing) but each thunk for ternary / `&&` / `||` ends +;; with one rval on top of the vstack — and after both branches run, +;; we'd be left with TWO opnds, which breaks the type contract for +;; the surrounding expression. `cg-ifelse-merge` solves that: pop the +;; cond, allocate one result slot, and after each thunk runs, pop its +;; rval and store into the slot. Push the slot as one frame rval. +;; Both branches must push exactly one opnd; the result type is the +;; type of the first thunk's pushed opnd (parser must arrange for +;; both branches to push compatible types — either by passing +;; pre-coerced operands or by injecting a `cg-cast` inside the thunk). +(define (cg-ifelse-merge cg then-thunk else-thunk) + (let* ((cond-op (cg-pop cg)) + (slot (cg-alloc-slot cg 8 8))) + (%cg-load-opnd-into cg cond-op 't0) + (%cg-emit-many cg (list "%ifelse_nez(t0, {\n")) + (then-thunk) + (let* ((p (cg-pop cg)) + (rty (opnd-type p))) + (%cg-load-opnd-into cg p 'a0) + (%cg-emit-st-slot cg 'a0 slot) + (%cg-emit-many cg (list "}, {\n")) + (else-thunk) + (let ((q (cg-pop cg))) + (%cg-load-opnd-into cg q 'a0) + (%cg-emit-st-slot cg 'a0 slot)) + (%cg-emit-many cg (list "})\n")) + (cg-push cg (%opnd 'frame rty slot #f))))) + +(define (cg-loop cg head-thunk body-thunk) + ;; body-thunk receives the loop tag as its argument; parser uses + ;; that tag for cg-break / cg-continue inside the body. CC-CONTRACTS + ;; §1.9 / §3.3. + (let ((tag (%cg-fresh-loop-tag cg))) + (%cg-emit-many cg (list "%loop_tag(" tag ", {\n")) + (head-thunk) + (cond + ((zero? (cg-depth cg)) 0) + (else + (let ((c (cg-pop cg))) + (%cg-load-opnd-into cg c 't0) + (%cg-emit-many cg (list "%if_eqz(t0, { %break(" tag ") })\n"))))) + (body-thunk tag) + (%cg-emit-many cg (list "})\n")) + tag)) + +(define (cg-break cg tag) + (%cg-emit-many cg (list "%break(" tag ")\n"))) + +(define (cg-continue cg tag) + (%cg-emit-many cg (list "%continue(" tag ")\n"))) + +;; -------------------------------------------------------------------- +;; Variadic receive (§G.2). Layout: cg-fn-begin/v reserves a 4-slot +;; saved-register area at known frame offsets; va_start sets ap to the +;; address of the first slot past the named-arg count; va_arg reads +;; *ap, advances ap by 8, and pushes the value as the requested type. +;; +;; ap is an lval (typically a `va_list` local). cg-va-start pops it, +;; computes the address, stores into *ap (or the slot directly), and +;; pushes nothing. cg-va-arg pops ap-lval, loads ap, dereferences for +;; the value, advances ap, stores back, pushes the loaded value. +;; +;; Limitation: only first 4 incoming args (named + variadic) live in +;; the save area; variadic args at index >= 4 need LDARG and are not +;; yet supported. See punchlist §G.2 for the gap. +;; -------------------------------------------------------------------- +(define (%cg-vararg-first-slot cg) + (let ((s (%cg-fn-get cg '%fn-vararg-first-slot))) + (cond ((not s) (die #f "cg-va-start: not a variadic function")) + (else s)))) + +(define (cg-va-start cg) + ;; Pop ap-lval. Materialize "&sp + vararg-first-slot" into a0, + ;; store through ap-lval. Pushes nothing. + (let* ((ap-lv (cg-pop cg)) + (vsl (%cg-vararg-first-slot cg))) + (cond ((not (opnd-lval? ap-lv)) + (die #f "cg-va-start: ap not lvalue"))) + ;; Compute address into a0. + (%cg-emit-many cg (list "%mov(a0, sp)\n" + "%addi(a0, a0, " + (%cg-slot-expr cg vsl) ")\n")) + ;; Store a0 at ap-lval. + (cond + ((eq? (opnd-kind ap-lv) 'frame) + (cond + ((%cg-indirect? cg (opnd-ext ap-lv)) + (%cg-emit-ld-slot cg 't0 (opnd-ext ap-lv)) + (%cg-emit-st cg 'a0 't0 0)) + (else (%cg-emit-st-slot cg 'a0 (opnd-ext ap-lv))))) + ((eq? (opnd-kind ap-lv) 'global) + (%cg-emit-la cg 't0 (opnd-ext ap-lv)) + (%cg-emit-st cg 'a0 't0 0)) + (else (die #f "cg-va-start: bad ap kind" (opnd-kind ap-lv)))))) + +(define (cg-va-arg cg ctype) + ;; Pop ap-lval. Load ap into a0. Read 8 bytes at [a0] into a1. + ;; Advance a0 by 8 and store back through ap-lval. Push a1 as rval + ;; of type ctype (caller cg-cast's if needed). + (let ((ap-lv (cg-pop cg))) + (cond ((not (opnd-lval? ap-lv)) + (die #f "cg-va-arg: ap not lvalue"))) + ;; Load ap into a0. + (pmatch ap-lv + (($ opnd? (kind frame) (ext ,off)) + (guard (%cg-indirect? cg off)) + (%cg-emit-ld-slot cg 't0 off) + (%cg-emit-ld cg 'a0 't0 0)) + (($ opnd? (kind frame) (ext ,off)) + (%cg-emit-ld-slot cg 'a0 off)) + (($ opnd? (kind global) (ext ,lbl)) + (%cg-emit-la cg 't0 lbl) + (%cg-emit-ld cg 'a0 't0 0)) + (else (die #f "cg-va-arg: bad ap kind" (opnd-kind ap-lv)))) + ;; Load value at [a0] into a1 (full 8 bytes; cg-cast on the rval + ;; the caller pushes will narrow if needed). + (%cg-emit-ld cg 'a1 'a0 0) + ;; Advance ap by 8. + (%cg-emit-many cg (list "%addi(a0, a0, 8)\n")) + ;; Store advanced ap back. + (pmatch ap-lv + (($ opnd? (kind frame) (ext ,off)) + (guard (%cg-indirect? cg off)) + (%cg-emit-ld-slot cg 't0 off) + (%cg-emit-st cg 'a0 't0 0)) + (($ opnd? (kind frame) (ext ,off)) + (%cg-emit-st-slot cg 'a0 off)) + (($ opnd? (kind global) (ext ,lbl)) + (%cg-emit-la cg 't0 lbl) + (%cg-emit-st cg 'a0 't0 0)) + (else 0)) + ;; Spill the loaded value (a1) to a fresh frame slot under ctype. + (%cg-spill-reg cg 'a1 ctype))) + +(define (cg-va-end cg) + ;; va_end is a no-op in this design. Pop and discard ap-lval. + (cg-pop cg) + 0) + +;; -------------------------------------------------------------------- +;; Labels and unconditional goto (§F.4 / CC-CONTRACTS §5.3). +;; user_<name> namespace keeps the user's label space disjoint from +;; the compiler-internal ::ret and ::lbl_<n>. Labels resolve through +;; libp1pp's %scope mechanism, so forward references inside the same +;; %fn block work without explicit forward declaration. +;; -------------------------------------------------------------------- +(define (cg-emit-label cg name-bv) + (%cg-emit-many cg (list "::user_" name-bv "\n"))) + +(define (cg-goto cg name-bv) + (%cg-emit-many cg (list "%b(&::user_" name-bv ")\n"))) + +;; -------------------------------------------------------------------- +;; switch +;; -------------------------------------------------------------------- +(define-record-type swctx + (%swctx ctrl-slot end-tag default-lbl) + swctx? + (ctrl-slot swctx-ctrl-slot) + (end-tag swctx-end-tag) + (default-lbl swctx-default-lbl swctx-default-lbl-set!)) + +(define (cg-switch-begin cg) + (let* ((p (cg-pop cg)) + (off (cg-alloc-slot cg 8 8)) + (tag (%cg-fresh-loop-tag cg)) + (disp-lbl (bytevector-append "sw_disp_" tag))) + (%cg-load-opnd-into cg p 't0) + (%cg-emit-st-slot cg 't0 off) + (%cg-emit-many cg (list "%loop_tag(" tag ", {\n" + "%b(&::" disp-lbl ")\n")) + (%swctx off tag #f))) + +(define (cg-switch-case cg sw const-int) + (let* ((lbl (%cg-fresh-lbl cg)) + (key (string->symbol + (bytevector-append "%sw_cases__" (swctx-end-tag sw)))) + (cur (or (%cg-fn-get cg key) '())) + (entry (cons const-int lbl))) + (%cg-fn-set! cg key (cons entry cur)) + (%cg-emit-many cg (list "::" lbl "\n")))) + +(define (cg-switch-default cg sw) + (let ((lbl (%cg-fresh-lbl cg))) + (swctx-default-lbl-set! sw lbl) + (%cg-emit-many cg (list "::" lbl "\n")))) + +(define (cg-switch-end cg sw) + (let* ((tag (swctx-end-tag sw)) + (key (string->symbol (bytevector-append "%sw_cases__" tag))) + (cases (reverse (or (%cg-fn-get cg key) '()))) + (default-lbl (swctx-default-lbl sw)) + (disp-lbl (bytevector-append "sw_disp_" tag))) + (%cg-emit-many cg (list "%break(" tag ")\n" + "::" disp-lbl "\n")) + (%cg-emit-many cg (list "%ld(t0, sp, " + (%cg-slot-expr cg (swctx-ctrl-slot sw)) ")\n")) + (for-each + (lambda (c) + (%cg-emit-many cg (list "%li(t1, " (%n (car c)) ")\n" + "%beq(t0, t1, &::" (cdr c) ")\n"))) + cases) + (cond + (default-lbl (%cg-emit-many cg (list "%b(&::" default-lbl ")\n"))) + (else 0)) + (%cg-emit-many cg (list "%break(" tag ")\n" + "})\n")))) + +;; -------------------------------------------------------------------- +;; Globals and data +;; -------------------------------------------------------------------- +;; cg-emit-global: emit a global symbol into either .data (initialized) +;; or .bss (zero-init). +;; +;; init can be: +;; #f — zero-init in .bss (size from sym's ctype). +;; (piece ...) — initialized in .data; pieces concatenated. +;; +;; Each piece is either: +;; <bytevector> — raw bytes; emitted as N×!(byte) entries. +;; (label-ref . <label-bv>) — 8-byte pointer slot containing &label; +;; emitted as `&<label> %(0)` (4B label ref + +;; 4B zero pad). +(define (%cg-init-piece->bv piece) + (cond + ((bytevector? piece) + (let ((n (bytevector-length piece))) + (let loop ((i 0) (acc '())) + (cond + ((= i n) (bv-cat (reverse acc))) + (else + (loop (+ i 1) + (cons (bv-cat (list "!(" + (number->string + (bytevector-u8-ref piece i) 10) + ")\n")) + acc))))))) + ((and (pair? piece) (eq? (car piece) 'label-ref)) + (bv-cat (list "&" (cdr piece) " %(0)\n"))) + (else (die #f "cg-emit-global: bad init piece" piece)))) + +(define (cg-emit-global cg sym init) + (let* ((nm (sym-name sym)) + (lbl (%cg-mangle-global nm)) + (sz (ctype-size (sym-type sym))) + (size (if (< sz 0) 8 sz))) + (cond + (init + (buf-push! (cg-data cg) (bv-cat (list "\n:" lbl "\n"))) + (let walk ((ps init)) + (cond + ((null? ps) 0) + (else + (buf-push! (cg-data cg) (%cg-init-piece->bv (car ps))) + (walk (cdr ps)))))) + (else + (buf-push! (cg-bss cg) + (bv-cat (list "\n:" lbl "\n" + (let zero-loop ((rem size) (acc '())) + (cond + ((<= rem 0) (bv-cat (reverse acc))) + ((>= rem 8) + (zero-loop (- rem 8) (cons "$(0)\n" acc))) + (else + (zero-loop (- rem 1) (cons "!(0)\n" acc)))))))))) + (cg-globals-set! cg (alist-set (sym-name sym) sym (cg-globals cg))) + 0)) + +(define (cg-emit-extern cg sym) + (cg-globals-set! cg (alist-set (sym-name sym) sym (cg-globals cg))) + 0) + +(define (cg-intern-string cg bv-content) + (let ((p (alist-ref bv-content (cg-str-pool cg)))) + (cond + (p p) + (else + (let* ((n (length (cg-str-pool cg))) + (lbl (bytevector-append "cc__str_" (%n n)))) + (cg-str-pool-set! cg + (alist-set bv-content lbl (cg-str-pool cg))) + (buf-push! (cg-data cg) + (bv-cat (list "\n:" lbl "\n" + "\"" bv-content "\"\n" + "!(0)\n"))) + lbl))))) + +;; -------------------------------------------------------------------- +;; Frame +;; -------------------------------------------------------------------- +(define (cg-alloc-slot cg bytes align) + (let* ((aligned (align-up (cg-frame-hi cg) align)) + (new-hi (+ aligned bytes))) + (cg-frame-hi-set! cg new-hi) + aligned)) +;; cc/parse.scm — recursive-descent + Pratt parser. Minimal scheme1. + +(define (make-pstate toks cg) + (%pstate toks (list '()) (list '()) '() #f '() cg)) + +(define (peek ps) (car (ps-toks ps))) +(define (peek2 ps) + (let ((r (cdr (ps-toks ps)))) + (if (null? r) (car (ps-toks ps)) (car r)))) +(define (advance ps) + (let ((t (peek ps))) (ps-toks-set! ps (cdr (ps-toks ps))) t)) +(define (at-kw? ps s) + (pmatch (peek ps) + (($ tok? (kind KW) (value ,v)) (eq? v s)) + (else #f))) +(define (at-punct? ps s) + (pmatch (peek ps) + (($ tok? (kind PUNCT) (value ,v)) (eq? v s)) + (else #f))) +(define (expect-kw ps s) + (let ((t (peek ps))) + (pmatch t + (($ tok? (kind KW) (value ,v)) (guard (eq? v s)) (advance ps)) + (else (die (tok-loc t) "expected kw" s))))) +(define (expect-punct ps s) + (let ((t (peek ps))) + (pmatch t + (($ tok? (kind PUNCT) (value ,v)) (guard (eq? v s)) (advance ps)) + (else (die (tok-loc t) "expected punct" s))))) + +(define (scope-enter! ps) + (ps-scope-set! ps (cons '() (ps-scope ps))) + (ps-tags-set! ps (cons '() (ps-tags ps)))) +(define (scope-leave! ps) + (ps-scope-set! ps (cdr (ps-scope ps))) + (ps-tags-set! ps (cdr (ps-tags ps)))) +(define (scope-bind! ps n s) + (let* ((f (ps-scope ps)) (top (car f)) (r (cdr f))) + (if (alist-ref n top) (die #f "dup decl" n) + (ps-scope-set! ps (cons (alist-set n s top) r))))) +(define (scope-lookup ps n) + (let loop ((f (ps-scope ps))) + (cond ((null? f) #f) + (else + (let ((v (alist-ref n (car f)))) + (if v v (loop (cdr f)))))))) +(define (tag-bind! ps n c) + (let* ((f (ps-tags ps)) (top (car f)) (r (cdr f))) + (ps-tags-set! ps (cons (alist-set n c top) r)))) +(define (tag-lookup ps n) + (let loop ((f (ps-tags ps))) + (cond ((null? f) #f) + (else (let ((v (alist-ref n (car f)))) + (if v v (loop (cdr f)))))))) +(define (typedef-add! ps n) + (ps-typedefs-set! ps (alist-set n #t (ps-typedefs ps)))) +(define (typedef? ps n) + (if (alist-ref n (ps-typedefs ps)) #t #f)) + +(define (%mk-ptr p) (%ctype 'ptr 8 8 p)) +(define (%mk-arr e n) + (%ctype 'arr (if (< n 0) -1 (* n (ctype-size e))) + (ctype-align e) (cons e n))) +(define (%mk-fn r p v) (%ctype 'fn -1 -1 (list r p v))) +(define (ctype-is-ptr? t) (eq? (ctype-kind t) 'ptr)) +(define (ctype-is-fn? t) (eq? (ctype-kind t) 'fn)) +(define (ctype-is-arr? t) (eq? (ctype-kind t) 'arr)) + +(define (eat-cv-quals! ps) + (cond ((or (at-kw? ps 'const) (at-kw? ps 'volatile) + (at-kw? ps 'restrict)) + (advance ps) (eat-cv-quals! ps)) + (else #t))) + +(define (parse-decl-spec ps) + (let loop ((sto #f) (sn #f) (lg 0) (b #f) (saw #f)) + (let ((t (peek ps))) + (cond + ((or (at-kw? ps 'auto) (at-kw? ps 'register)) + (advance ps) (loop sto sn lg b #t)) + ((at-kw? ps 'static) (advance ps) (loop 'static sn lg b #t)) + ((at-kw? ps 'extern) (advance ps) (loop 'extern sn lg b #t)) + ((at-kw? ps 'typedef) (advance ps) (loop 'typedef sn lg b #t)) + ((or (at-kw? ps 'const) (at-kw? ps 'volatile) + (at-kw? ps 'restrict) (at-kw? ps 'inline)) + (advance ps) (loop sto sn lg b #t)) + ((at-kw? ps 'signed) (advance ps) (loop sto 'signed lg b #t)) + ((at-kw? ps 'unsigned) (advance ps) (loop sto 'unsigned lg b #t)) + ((at-kw? ps 'short) (advance ps) (loop sto sn -1 b #t)) + ((at-kw? ps 'long) (advance ps) (loop sto sn (+ lg 1) b #t)) + ((at-kw? ps 'void) (advance ps) (loop sto sn lg 'void #t)) + ((at-kw? ps 'char) (advance ps) (loop sto sn lg 'char #t)) + ((at-kw? ps 'int) (advance ps) (loop sto sn lg 'int #t)) + ((at-kw? ps '_Bool) (advance ps) (loop sto sn lg 'bool #t)) + ((or (at-kw? ps 'float) (at-kw? ps 'double) + (at-kw? ps '_Complex) (at-kw? ps '_Imaginary)) + (die (tok-loc t) "no float" (tok-value t))) + ((or (at-kw? ps '_Atomic) (at-kw? ps '_Thread_local) + (at-kw? ps '_Alignas) (at-kw? ps '_Generic) + (at-kw? ps '_Alignof) (at-kw? ps '_Static_assert)) + (die (tok-loc t) "rejected" (tok-value t))) + ((at-kw? ps 'struct) + (loop sto sn lg (parse-aggregate-spec ps 'struct) #t)) + ((at-kw? ps 'union) + (loop sto sn lg (parse-aggregate-spec ps 'union) #t)) + ((at-kw? ps 'enum) + (loop sto sn lg (parse-enum-spec ps) #t)) + ((and (not b) (eq? (tok-kind t) 'IDENT) + (typedef? ps (tok-value t))) + (let* ((tk (advance ps)) (sm (scope-lookup ps (tok-value tk)))) + (if (and sm (eq? (sym-kind sm) 'typedef)) + (loop sto sn lg (sym-type sm) #t) + (die (tok-loc tk) "typedef no sym" (tok-value tk))))) + (else + (cond ((not saw) (die (tok-loc t) "expected decl-spec" + (tok-value t))) + (else (cons sto (resolve-base t sn lg b))))))))) + +(define (resolve-base loc sn lg b) + (cond + ((eq? b 'void) + (if (or sn (not (zero? lg))) (die loc "void+qual") %t-void)) + ((eq? b 'bool) + (if (or sn (not (zero? lg))) (die loc "bool+qual") %t-bool)) + ((eq? b 'char) + (cond ((eq? sn 'unsigned) %t-u8) (else %t-i8))) + ((or (eq? b 'int) (and (not b) (or sn (not (zero? lg))))) + (cond ((= lg -1) (if (eq? sn 'unsigned) %t-u16 %t-i16)) + ((= lg 0) (if (eq? sn 'unsigned) %t-u32 %t-i32)) + (else (if (eq? sn 'unsigned) %t-u64 %t-i64)))) + ((ctype? b) + (if (or sn (not (zero? lg))) (die loc "type+qual") b)) + (else (die loc "unknown decl-spec")))) + +(define (parse-aggregate-spec ps kind) + (advance ps) + (let ((tag (pmatch (peek ps) + (($ tok? (kind IDENT)) (tok-value (advance ps))) + (else #f)))) + (cond + ((at-punct? ps 'lbrace) + (advance ps) + (let* ((ex (and tag (tag-lookup ps tag))) + (ct (cond ((and ex (eq? (ctype-kind ex) kind)) ex) + (else (let ((c (%ctype kind -1 -1 + (list (or tag #f) #f '())))) + (if tag (tag-bind! ps tag c)) c)))) + (fields (parse-struct-fields ps))) + (expect-punct ps 'rbrace) + (complete-agg! ct kind tag fields) ct)) + (tag (let ((ex (tag-lookup ps tag))) + (cond (ex ex) + (else (let ((c (%ctype kind -1 -1 + (list tag #f '())))) + (tag-bind! ps tag c) c))))) + (else (die (tok-loc (peek ps)) "anon agg"))))) + +(define (parse-struct-fields ps) + (let loop ((acc '()) (off 0)) + (cond + ((at-punct? ps 'rbrace) (reverse acc)) + (else + (let ((spec (parse-decl-spec ps))) + (let dl ((acc2 acc) (o2 off)) + (let* ((p (parse-declarator ps (cdr spec))) + (nm (car p)) (ty (cdr p)) + (al (max (ctype-align ty) 1)) + (sz (ctype-size ty)) + (oa (align-up o2 al))) + (cond + ((at-punct? ps 'comma) + (advance ps) + (dl (cons (list nm ty oa) acc2) + (+ oa (max sz 0)))) + ((at-punct? ps 'semi) + (advance ps) + (loop (cons (list nm ty oa) acc2) + (+ oa (max sz 0)))) + (else (die (tok-loc (peek ps)) "field")))))))))) + +(define (complete-agg! ct k tag fs) + (let* ((ma (let m ((xs fs) (a 1)) + (if (null? xs) a + (m (cdr xs) (max a (ctype-align (cadr (car xs)))))))) + (last (let l ((xs fs) (e 0)) + (if (null? xs) e + (let* ((f (car xs)) (off (car (cddr f))) + (sz (ctype-size (cadr f)))) + (l (cdr xs) (max e (+ off (max sz 0)))))))) + (sz (cond ((eq? k 'union) + (let u ((xs fs) (s 0)) + (if (null? xs) s + (u (cdr xs) + (max s (ctype-size (cadr (car xs)))))))) + (else (align-up last ma))))) + (ctype-size-set! ct sz) + (ctype-align-set! ct ma) + (ctype-ext-set! ct (list tag #t fs)))) + +(define (parse-enum-spec ps) + (advance ps) + (let ((tag (pmatch (peek ps) + (($ tok? (kind IDENT)) (tok-value (advance ps))) + (else #f)))) + (cond + ((at-punct? ps 'lbrace) + (advance ps) + (let ((ct (%ctype 'enum 4 4 (list tag '())))) + (if tag (tag-bind! ps tag ct)) + (let loop ((vs '()) (nv 0)) + (cond + ((at-punct? ps 'rbrace) + (advance ps) + (ctype-ext-set! ct (list tag (reverse vs))) ct) + (else + (let* ((nt (advance ps)) (nm (tok-value nt)) + (val (cond ((at-punct? ps 'assign) + (advance ps) (parse-const-int ps)) + (else nv)))) + (scope-bind! ps nm + (%sym nm 'enum-const #f %t-i32 val)) + (cond ((at-punct? ps 'comma) (advance ps)) + ((at-punct? ps 'rbrace) #t) + (else (die (tok-loc (peek ps)) "enum"))) + (loop (cons (cons nm val) vs) (+ val 1)))))))) + (tag (let ((e (tag-lookup ps tag))) + (cond (e e) + (else (let ((c (%ctype 'enum 4 4 (list tag '())))) + (tag-bind! ps tag c) c))))) + (else (die (tok-loc (peek ps)) "enum"))))) + +(define (parse-const-int ps) + (let ((t (peek ps))) + (pmatch t + (($ tok? (kind INT)) (tok-value (advance ps))) + (($ tok? (kind PUNCT) (value minus)) (advance ps) (- 0 (parse-const-int ps))) + (($ tok? (kind PUNCT) (value plus)) (advance ps) (parse-const-int ps)) + (($ tok? (kind IDENT) (value ,n)) + (let ((sm (scope-lookup ps n))) + (cond ((and sm (eq? (sym-kind sm) 'enum-const)) + (advance ps) (sym-slot sm)) + (else (die (tok-loc t) "const?" n))))) + (else (die (tok-loc t) "const?" (tok-value t)))))) + +(define (parse-declarator ps base) + ((cdr (parse-decl-cont ps)) base + (lambda (n t) (cons n t)))) + +(define (parse-decl-cont ps) + (pmatch (peek ps) + (($ tok? (kind PUNCT) (value star)) + (advance ps) (eat-cv-quals! ps) + (let* ((r (parse-decl-cont ps)) (rf (cdr r))) + (cons (car r) (lambda (b k) (rf (%mk-ptr b) k))))) + (($ tok? (kind PUNCT) (value lparen)) + (guard (paren-is-group? ps)) + (advance ps) + (let* ((i (parse-decl-cont ps)) (if- (cdr i))) + (expect-punct ps 'rparen) + (let ((s (parse-decl-suf-cont ps))) + (cons (car i) (lambda (b k) (if- (s b) k)))))) + (($ tok? (kind IDENT) (value ,n)) + (advance ps) + (let ((s (parse-decl-suf-cont ps))) + (cons n (lambda (b k) (k n (s b)))))) + (else + (let ((s (parse-decl-suf-cont ps))) + (cons #f (lambda (b k) (k #f (s b)))))))) + +(define (parse-decl-suf-cont ps) + ;; C declarator suffixes apply RIGHT-TO-LEFT (innermost first): + ;; int a[2][3] ⇒ arr (arr int 3) 2 (outer dim 2) + ;; not arr (arr int 2) 3 (which would treat the leftmost suffix as + ;; outermost). The recursive structure builds the inner suffix's + ;; result first, then this level wraps. + (pmatch (peek ps) + (($ tok? (kind PUNCT) (value lbrack)) + (advance ps) + (let* ((ln (cond ((at-punct? ps 'rbrack) -1) + (else (parse-const-int ps)))) + (_ (expect-punct ps 'rbrack)) + (r (parse-decl-suf-cont ps))) + (lambda (b) (%mk-arr (r b) ln)))) + (($ tok? (kind PUNCT) (value lparen)) + (advance ps) + (let* ((res (parse-fn-params ps)) + (p (car res)) (v (cdr res))) + (expect-punct ps 'rparen) + (let ((r (parse-decl-suf-cont ps))) + (lambda (b) (%mk-fn (r b) p v))))) + (else (lambda (b) b)))) + +(define (paren-is-group? ps) + (pmatch (peek2 ps) + (($ tok? (kind KW) (value ,v)) + (cond ((or (eq? v 'void) (eq? v 'char) (eq? v 'short) + (eq? v 'int) (eq? v 'long) (eq? v 'signed) + (eq? v 'unsigned) (eq? v '_Bool) + (eq? v 'struct) (eq? v 'union) (eq? v 'enum) + (eq? v 'const) (eq? v 'volatile) + (eq? v 'restrict) (eq? v 'static) + (eq? v 'extern) (eq? v 'register)) #f) + (else #t))) + (($ tok? (kind IDENT) (value ,n)) + (cond ((typedef? ps n) #f) (else #t))) + (($ tok? (kind PUNCT) (value rparen)) #f) + (($ tok? (kind PUNCT) (value star)) #t) + (($ tok? (kind PUNCT) (value lparen)) #t) + (($ tok? (kind PUNCT) (value lbrack)) #t) + (else #f))) + +(define (parse-fn-params ps) + (cond + ((at-punct? ps 'rparen) (cons '() #f)) + ((and (at-kw? ps 'void) + (eq? (tok-kind (peek2 ps)) 'PUNCT) + (eq? (tok-value (peek2 ps)) 'rparen)) + (advance ps) (cons '() #f)) + (else + (let loop ((acc '())) + (cond + ((at-punct? ps 'ellipsis) + (advance ps) (cons (reverse acc) #t)) + (else + (let* ((sp (parse-decl-spec ps)) + (p (parse-declarator ps (cdr sp))) + (nm (car p)) (ty (cdr p)) + (ty2 (cond ((ctype-is-arr? ty) + (%mk-ptr (car (ctype-ext ty)))) + ((ctype-is-fn? ty) (%mk-ptr ty)) + (else ty)))) + (cond + ((at-punct? ps 'comma) + (advance ps) (loop (cons (cons nm ty2) acc))) + ((at-punct? ps 'rparen) + (cons (reverse (cons (cons nm ty2) acc)) #f)) + (else (die (tok-loc (peek ps)) "param")))))))))) + +(define (parse-translation-unit ps) + (cond + ((eq? (tok-kind (peek ps)) 'EOF) #t) + (else (parse-decl-or-fn ps) (parse-translation-unit ps)))) + +(define (parse-decl-or-fn ps) + (let* ((sp (parse-decl-spec ps)) + (sto (car sp)) (b (cdr sp))) + (cond + ((at-punct? ps 'semi) (advance ps) 'decl) + (else + (let* ((p (parse-declarator ps b)) + (n (car p)) (t (cdr p))) + (cond + ((and (ctype-is-fn? t) (at-punct? ps 'lbrace)) + (parse-fn-body ps n t) 'fn) + (else + (handle-decl ps sto n t) + (let lp () + (cond + ((at-punct? ps 'comma) + (advance ps) + (let* ((p2 (parse-declarator ps b)) + (n2 (car p2)) (t2 (cdr p2))) + (handle-decl ps sto n2 t2) (lp))) + (else (expect-punct ps 'semi) 'decl)))))))))) + +(define (handle-decl ps sto n ty) + (cond + ((not n) (die #f "no name")) + ((eq? sto 'typedef) + (typedef-add! ps n) + (scope-bind! ps n (%sym n 'typedef #f ty #f))) + ((ctype-is-fn? ty) + (scope-bind! ps n + (%sym n 'fn (or sto 'extern) ty + (bytevector-append "cc__" n)))) + ;; §I: block-scope `static` routes to a global with a name mangled + ;; on the enclosing function so two functions can each have their + ;; own `static int n;` without colliding. The sym's NAME holds the + ;; mangled form (cg-push-sym / cg-emit-global both prefix "cc__" + ;; onto sym-name to derive the emitted label); scope-bind!s key + ;; remains the original identifier for source-level lookup. + ((and (eq? sto 'static) (ps-fn-ctx ps)) + (let* ((fname (fn-ctx-name (ps-fn-ctx ps))) + (mangled (bytevector-append fname "__" n)) + (sm (%sym mangled 'var 'static ty + (bytevector-append "cc__" mangled)))) + (scope-bind! ps n sm) + (cond + ((at-punct? ps 'assign) + (advance ps) + (cg-emit-global (ps-cg ps) sm (parse-init-global ps ty))) + (else (cg-emit-global (ps-cg ps) sm #f))))) + (else + (cond + ((not (ps-fn-ctx ps)) + (let ((sm (%sym n 'var (or sto 'extern) ty + (bytevector-append "cc__" n)))) + (scope-bind! ps n sm) + (cond + ((at-punct? ps 'assign) + (advance ps) + (cg-emit-global (ps-cg ps) sm + (parse-init-global ps ty))) + ((eq? sto 'extern) (cg-emit-extern (ps-cg ps) sm)) + (else (cg-emit-global (ps-cg ps) sm #f))))) + (else + (let* ((sz (max (ctype-size ty) 1)) + (al (max (ctype-align ty) 1)) + (sl (cg-alloc-slot (ps-cg ps) sz al)) + (sm (%sym n 'var (or sto 'auto) ty sl))) + (scope-bind! ps n sm) + (cond + ((at-punct? ps 'assign) + (advance ps) + (cond + ;; Aggregate locals get the per-element store treatment. + ((or (at-punct? ps 'lbrace) + (and (eq? (ctype-kind ty) 'arr) + (eq? (tok-kind (peek ps)) 'STR))) + (parse-init-local-aggregate ps sm ty)) + (else + (cg-push-sym (ps-cg ps) sm) + (parse-expr-bp ps 4) (rval! ps) + (cg-cast (ps-cg ps) ty) + (cg-assign (ps-cg ps)) + (cg-pop (ps-cg ps))))) + (else #t)))))))) + +;; ==================================================================== +;; Initializers (CC.md §Variable initializers, §E of CC-PUNCHLIST). +;; +;; parse-init-global ps ty +;; Reads the initializer following `=` for a file-scope or block-scope +;; static var of static-storage type `ty` and returns a list of +;; pieces suitable for cg-emit-global. See cg.scm §cg-emit-global for +;; the piece grammar. +;; +;; parse-init-local ps sm ty +;; Reads the initializer for an auto-storage variable bound to slot +;; sym `sm` and emits per-element store cg ops. Returns unspecified. +;; ==================================================================== + +(define (%int->le-bv n nbytes) + ;; N-byte little-endian encoding of integer n into a fresh bv. Bytes + ;; >= sign-bit are filled by repeated >>8 (works for both signed and + ;; unsigned because we only keep the low N bytes). + (let ((out (make-bytevector nbytes 0))) + (let loop ((i 0) (v n)) + (cond + ((= i nbytes) out) + (else + (bytevector-u8-set! out i (bit-and v 255)) + (loop (+ i 1) (arithmetic-shift v -8))))))) + +(define (%const-init-piece ps ty) + ;; Parse a non-brace initializer expression for scalar type `ty` and + ;; return a single piece. Recognised forms: + ;; - INT (with optional unary +/-) -> N-byte LE bv + ;; - enum-const IDENT -> N-byte LE bv + ;; - &IDENT (address of a global var/fn) -> (label-ref . cc__name) + ;; - IDENT (function name; decays to fn ptr) -> (label-ref . cc__name) + ;; - STR (only for char* targets) -> (label-ref . string-pool-label) + (let ((t (peek ps))) + (cond + ;; Address initializer: &ident -> label-ref + ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'amp)) + (advance ps) + (let ((it (peek ps))) + (cond + ((eq? (tok-kind it) 'IDENT) + (advance ps) + (let ((sm (scope-lookup ps (tok-value it)))) + (cond + ((not sm) (die (tok-loc it) "init: undecl" (tok-value it))) + ((or (eq? (sym-kind sm) 'fn) + (and (eq? (sym-kind sm) 'var) + (or (eq? (sym-storage sm) 'static) + (eq? (sym-storage sm) 'extern)))) + (cons 'label-ref (sym-slot sm))) + (else + (die (tok-loc it) "init: &x must reference a global" + (tok-value it)))))) + (else (die (tok-loc it) "init: &?" (tok-value it)))))) + ;; Function name or array name as a label-ref initializer. + ;; (Both decay to a pointer when used as a value.) + ((and (eq? (tok-kind t) 'IDENT) + (let ((sm (scope-lookup ps (tok-value t)))) + (and sm + (or (eq? (sym-kind sm) 'fn) + (and (eq? (sym-kind sm) 'var) + (eq? (ctype-kind (sym-type sm)) 'arr) + (or (eq? (sym-storage sm) 'static) + (eq? (sym-storage sm) 'extern))))))) + (advance ps) + (let ((sm (scope-lookup ps (tok-value t)))) + (cons 'label-ref (sym-slot sm)))) + ;; Plain string literal as char* initializer. + ((eq? (tok-kind t) 'STR) + (advance ps) + (let ((lbl (cg-intern-string (ps-cg ps) (tok-value t)))) + (cons 'label-ref lbl))) + ;; Otherwise it's a const integer. + (else + (let ((v (parse-const-int ps))) + (%int->le-bv v (max (ctype-size ty) 1))))))) + +(define (%init-array-elem-type ty) + (cond ((eq? (ctype-kind ty) 'arr) (car (ctype-ext ty))) + (else (die #f "init: not an array" ty)))) + +(define (%init-array-decl-len ty) + ;; Declared array length (-1 = inferred). + (cond ((eq? (ctype-kind ty) 'arr) (cdr (ctype-ext ty))) (else -1))) + +(define (%init-fix-array-size! ty count) + ;; Patch an inferred-length array to `count`. + (let ((elem (car (ctype-ext ty)))) + (ctype-ext-set! ty (cons elem count)) + (ctype-size-set! ty (* count (ctype-size elem))))) + +(define (%init-struct-fields ty) + ;; Return ((name-bv ctype offset) ...) for a struct/union ctype. + (let ((ext (ctype-ext ty))) + (cond ((and (pair? ext) (pair? (cdr ext))) (car (cddr ext))) + (else (die #f "init: not a struct" ty))))) + +(define (%find-field fields nm) + (cond ((null? fields) #f) + ((equal? (car (car fields)) nm) (car fields)) + (else (%find-field (cdr fields) nm)))) + +(define (%pad-piece nbytes) + (make-bytevector nbytes 0)) + +;; ----- Global initializers --------------------------------------------- +(define (parse-init-global ps ty) + (pmatch (peek ps) + ;; String literal initializer for char[] + (($ tok? (kind STR) (value ,s)) + (guard (and (eq? (ctype-kind ty) 'arr) + (let ((et (car (ctype-ext ty)))) + (or (eq? et %t-i8) (eq? et %t-u8))))) + (advance ps) + (let* ((slen (bytevector-length s)) + (decl (cdr (ctype-ext ty))) + (final (cond ((< decl 0) (+ slen 1)) (else decl)))) + (cond ((< decl 0) (%init-fix-array-size! ty final))) + (let ((bv (make-bytevector final 0))) + (let loop ((i 0)) + (cond + ((or (= i slen) (>= i final)) (list bv)) + (else + (bytevector-u8-set! bv i (bytevector-u8-ref s i)) + (loop (+ i 1)))))))) + ;; Brace-form + (($ tok? (kind PUNCT) (value lbrace)) + (advance ps) + (cond + ((eq? (ctype-kind ty) 'arr) + (%parse-init-array-list ps ty)) + ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union)) + (%parse-init-struct-list ps ty)) + (else + ;; Brace-wrapped scalar: { expr } + (let ((piece (%const-init-piece ps ty))) + (cond ((at-punct? ps 'comma) (advance ps))) + (expect-punct ps 'rbrace) + (list piece))))) + ;; Bare scalar initializer + (else (list (%const-init-piece ps ty))))) + +(define (%parse-init-array-list ps ty) + ;; Element-list array initializer; assumes `{` already consumed. + (let* ((elem (%init-array-elem-type ty)) + (esize (ctype-size elem)) + (decl (%init-array-decl-len ty))) + (let lp ((acc '()) (count 0)) + (cond + ((at-punct? ps 'rbrace) + (advance ps) + (cond ((< decl 0) (%init-fix-array-size! ty count))) + ;; Pad to declared length if longer than count. + (let* ((final (cond ((< decl 0) count) (else decl))) + (pad (- final count))) + (cond + ((> pad 0) + (reverse (cons (%pad-piece (* pad esize)) acc))) + (else (reverse acc))))) + (else + (let ((piece + (cond + ((at-punct? ps 'lbrace) + ;; Nested aggregate: brace-flatten via recursion. + (advance ps) + ;; element is itself struct/array + (cond + ((eq? (ctype-kind elem) 'arr) + (%parse-init-array-list ps elem)) + ((or (eq? (ctype-kind elem) 'struct) + (eq? (ctype-kind elem) 'union)) + (%parse-init-struct-list ps elem)) + (else + (let ((p (%const-init-piece ps elem))) + (cond ((at-punct? ps 'comma) (advance ps))) + (expect-punct ps 'rbrace) + (list p))))) + (else + (list (%const-init-piece ps elem)))))) + (cond ((at-punct? ps 'comma) (advance ps))) + (lp (append (reverse piece) acc) (+ count 1)))))))) + +(define (%piece-bytesize p) + ;; Output width of one piece (cf. %cg-init-piece->bv): a bv emits + ;; one byte per element; a (label-ref . _) emits an 8-byte slot. + (cond + ((bytevector? p) (bytevector-length p)) + ((and (pair? p) (eq? (car p) 'label-ref)) 8) + (else (die #f "init: unknown piece" p)))) + +(define (%pieces-bytesize ps-list) + (let loop ((xs ps-list) (n 0)) + (cond ((null? xs) n) + (else (loop (cdr xs) (+ n (%piece-bytesize (car xs)))))))) + +(define (%merge-init-entries entries total-size) + ;; entries: list of (abs-offset . piece-list), in source order. + ;; Sort stably by offset (later writes to the same offset win, per C + ;; designated-init semantics) and emit pad pieces in any gaps and at + ;; the tail. Preserves label-ref pieces — we never merge them into a + ;; flat bv. + (let* ((sorted (%init-stable-sort-by-offset entries)) + (out + (let walk ((xs sorted) (cursor 0) (acc '())) + (cond + ((null? xs) + (cond + ((< cursor total-size) + (reverse (cons (%pad-piece (- total-size cursor)) acc))) + (else (reverse acc)))) + (else + (let* ((e (car xs)) + (eoff (car e)) + (epieces (cdr e)) + (esize (%pieces-bytesize epieces)) + (acc1 (cond + ((> eoff cursor) + (cons (%pad-piece (- eoff cursor)) acc)) + (else acc))) + (acc2 (append (reverse epieces) acc1))) + (walk (cdr xs) (+ eoff esize) acc2))))))) + out)) + +(define (%init-stable-sort-by-offset entries) + ;; Insertion sort, stable by source order for ties. n is small (one + ;; entry per initialized field) so O(n^2) is fine. + (let lp ((xs entries) (acc '())) + (cond + ((null? xs) acc) + (else + (let ((e (car xs))) + (lp (cdr xs) + (let ins ((ys acc) (head '())) + (cond + ((null? ys) + (append (reverse head) (list e))) + ((<= (car e) (car (car ys))) + (append (reverse head) (cons e ys))) + (else + (ins (cdr ys) (cons (car ys) head))))))))))) + +(define (%parse-init-struct-list ps ty) + ;; Struct/union initializer; assumes `{` already consumed. Supports + ;; positional and `.field = expr` forms — including out-of-order + ;; designators ({.y=5, .x=7}). Each entry records its absolute + ;; offset; %merge-init-entries sorts and pads at the closing brace. + (let* ((fields (%init-struct-fields ty)) + (size (ctype-size ty))) + (let lp ((entries '()) (rest fields)) + (cond + ((at-punct? ps 'rbrace) + (advance ps) + (%merge-init-entries (reverse entries) size)) + (else + (let* ((designated? (at-punct? ps 'dot)) + (target + (cond + (designated? + (advance ps) + (let ((nt (advance ps))) + (cond + ((not (eq? (tok-kind nt) 'IDENT)) + (die (tok-loc nt) "init: .field expects ident"))) + (let ((f (%find-field fields (tok-value nt)))) + (cond + ((not f) (die (tok-loc nt) "init: no such field" + (tok-value nt)))) + (expect-punct ps 'assign) + f))) + ((null? rest) + (die (tok-loc (peek ps)) "init: too many fields")) + (else (car rest)))) + (fname (car target)) + (fty (car (cdr target))) + (foff (car (cddr target))) + (piece-list + (cond + ((at-punct? ps 'lbrace) + (advance ps) + (cond + ((eq? (ctype-kind fty) 'arr) + (%parse-init-array-list ps fty)) + ((or (eq? (ctype-kind fty) 'struct) + (eq? (ctype-kind fty) 'union)) + (%parse-init-struct-list ps fty)) + (else + (let ((p (%const-init-piece ps fty))) + (cond ((at-punct? ps 'comma) (advance ps))) + (expect-punct ps 'rbrace) + (list p))))) + (else + (list (%const-init-piece ps fty)))))) + (cond ((at-punct? ps 'comma) (advance ps))) + (lp (cons (cons foff piece-list) entries) + (cond + (designated? + ;; designated init: drop fields up to and including target + (let drop ((xs fields)) + (cond + ((null? xs) '()) + ((equal? (car (car xs)) fname) (cdr xs)) + (else (drop (cdr xs)))))) + (else (cdr rest)))))))))) + +;; ----- Local aggregate initializers ------------------------------------ +;; Emits per-element store sequences via cg ops into the slot of `sm` +;; (a 'var sym whose slot is the frame offset). Assumes the assignment +;; `=` has already been consumed. +(define (parse-init-local-aggregate ps sm ty) + (pmatch (peek ps) + ;; Local char[] = "string" — fill from string bytes. + (($ tok? (kind STR) (value ,s)) + (guard (and (eq? (ctype-kind ty) 'arr) + (let ((et (car (ctype-ext ty)))) + (or (eq? et %t-i8) (eq? et %t-u8))))) + (advance ps) + (let* ((slen (bytevector-length s)) + (decl (cdr (ctype-ext ty))) + (final (cond ((< decl 0) (+ slen 1)) (else decl)))) + (cond ((< decl 0) (%init-fix-array-size! ty final))) + ;; Emit byte stores for each char in s, plus NUL for the + ;; trailing slot if final > slen. + (let loop ((i 0)) + (cond + ((>= i final) #t) + (else + (let ((b (cond ((< i slen) (bytevector-u8-ref s i)) + (else 0))) + (off (+ (sym-slot sm) i))) + (%push-frame-elem-lval ps off %t-u8) + (cg-push-imm (ps-cg ps) %t-u8 b) + (cg-assign (ps-cg ps)) + (cg-pop (ps-cg ps)) + (loop (+ i 1)))))))) + (($ tok? (kind PUNCT) (value lbrace)) + (advance ps) + (cond + ((eq? (ctype-kind ty) 'arr) + (%parse-init-local-array-list ps sm (sym-slot sm) ty)) + ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union)) + (%parse-init-local-struct-list ps sm (sym-slot sm) ty)) + (else (die #f "init local: brace on scalar?")))) + (else (die (tok-loc (peek ps)) "init local aggregate?")))) + +(define (%emit-local-elem-store ps sm rel-off elem-ty piece-or-thunk) + ;; Emit a single scalar store at slot[base + rel-off]. piece is the + ;; raw initializer expression — but here we want to actually evaluate + ;; it via parse-expr to allow non-const expressions for autos. + ;; Caller handles this; this helper handles the store-into-frame ops. + 0) + +(define (%push-frame-elem-lval ps base-off ty) + (cg-push (ps-cg ps) (%opnd 'frame ty base-off #t))) + +(define (%parse-init-local-array-list ps sm base-off ty) + (let* ((elem (%init-array-elem-type ty)) + (esize (ctype-size elem)) + (decl (%init-array-decl-len ty))) + (let lp ((i 0)) + (cond + ((at-punct? ps 'rbrace) + (advance ps) + (cond ((< decl 0) (%init-fix-array-size! ty i))) + ;; Zero out remaining slots if any (declared length > i). + (let ((final (cond ((< decl 0) i) (else decl)))) + (let zlp ((k i)) + (cond + ((>= k final) #t) + (else + (let ((off (+ base-off (* k esize)))) + (cond + ((or (eq? (ctype-kind elem) 'arr) + (eq? (ctype-kind elem) 'struct) + (eq? (ctype-kind elem) 'union)) + ;; Zero each byte in this aggregate slot. + (let zb ((j 0)) + (cond + ((>= j esize) #t) + (else + (%push-frame-elem-lval ps (+ off j) %t-u8) + (cg-push-imm (ps-cg ps) %t-u8 0) + (cg-assign (ps-cg ps)) + (cg-pop (ps-cg ps)) + (zb (+ j 1)))))) + (else + (%push-frame-elem-lval ps off elem) + (cg-push-imm (ps-cg ps) elem 0) + (cg-assign (ps-cg ps)) + (cg-pop (ps-cg ps))))) + (zlp (+ k 1))))))) + (else + (let ((eoff (+ base-off (* i esize)))) + (cond + ((at-punct? ps 'lbrace) + (advance ps) + (cond + ((eq? (ctype-kind elem) 'arr) + (%parse-init-local-array-list ps sm eoff elem)) + ((or (eq? (ctype-kind elem) 'struct) + (eq? (ctype-kind elem) 'union)) + (%parse-init-local-struct-list ps sm eoff elem)) + (else + (%push-frame-elem-lval ps eoff elem) + (parse-expr-bp ps 4) (rval! ps) + (cg-cast (ps-cg ps) elem) + (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)) + (cond ((at-punct? ps 'comma) (advance ps))) + (expect-punct ps 'rbrace)))) + (else + (%push-frame-elem-lval ps eoff elem) + (parse-expr-bp ps 4) (rval! ps) + (cg-cast (ps-cg ps) elem) + (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)))) + (cond ((at-punct? ps 'comma) (advance ps))) + (lp (+ i 1)))))))) + +(define (%bv-in-list? bv xs) + (cond ((null? xs) #f) + ((equal? bv (car xs)) #t) + (else (%bv-in-list? bv (cdr xs))))) + +(define (%emit-zero-field ps base-off f) + ;; Note: scheme1's `+` is binary-only — `(+ a b c)` returns (+ a b) + ;; and silently drops the rest. Compute absolute byte offsets via + ;; nested binary +. + (let* ((fty (car (cdr f))) + (foff (car (cddr f))) + (fsize (ctype-size fty)) + (start-off (+ base-off foff))) + (let zb ((j 0)) + (cond + ((>= j fsize) #t) + (else + (%push-frame-elem-lval ps (+ start-off j) %t-u8) + (cg-push-imm (ps-cg ps) %t-u8 0) + (cg-assign (ps-cg ps)) + (cg-pop (ps-cg ps)) + (zb (+ j 1))))))) + +(define (%parse-init-local-struct-list ps sm base-off ty) + ;; Track each initialized field by name in `seen`; at the closing brace + ;; zero every field NOT in `seen`. The previous design tracked positional + ;; "remaining fields" via `rest`, which silently dropped earlier fields + ;; when a designator jumped backwards (e.g. `{.y = 5}` left `x` + ;; uninitialized). C requires every unmentioned member of an aggregate + ;; with at least one designator/initializer to be zeroed (C11 §6.7.9 ¶21). + (let ((fields (%init-struct-fields ty))) + (let lp ((rest fields) (seen '())) + (cond + ((at-punct? ps 'rbrace) + (advance ps) + (for-each + (lambda (f) + (cond ((not (%bv-in-list? (car f) seen)) + (%emit-zero-field ps base-off f)))) + fields)) + (else + (let* ((designated? (at-punct? ps 'dot)) + (target + (cond + (designated? + (advance ps) + (let ((nt (advance ps))) + (let ((f (%find-field fields (tok-value nt)))) + (cond + ((not f) (die (tok-loc nt) "init: no such field" + (tok-value nt)))) + (expect-punct ps 'assign) + f))) + ((null? rest) + (die (tok-loc (peek ps)) "init: too many fields")) + (else (car rest)))) + (fname (car target)) + (fty (car (cdr target))) + (foff (car (cddr target))) + (eoff (+ base-off foff))) + (cond + ((at-punct? ps 'lbrace) + (advance ps) + (cond + ((eq? (ctype-kind fty) 'arr) + (%parse-init-local-array-list ps sm eoff fty)) + ((or (eq? (ctype-kind fty) 'struct) + (eq? (ctype-kind fty) 'union)) + (%parse-init-local-struct-list ps sm eoff fty)) + (else + (%push-frame-elem-lval ps eoff fty) + (parse-expr-bp ps 4) (rval! ps) + (cg-cast (ps-cg ps) fty) + (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)) + (cond ((at-punct? ps 'comma) (advance ps))) + (expect-punct ps 'rbrace)))) + (else + (%push-frame-elem-lval ps eoff fty) + (parse-expr-bp ps 4) (rval! ps) + (cg-cast (ps-cg ps) fty) + (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)))) + (cond ((at-punct? ps 'comma) (advance ps))) + (lp (cond + (designated? + (let drop ((xs fields)) + (cond + ((null? xs) '()) + ((equal? (car (car xs)) fname) (cdr xs)) + (else (drop (cdr xs)))))) + (else (cdr rest))) + (cons fname seen)))))))) + + +;; A → B → C arena pattern from tests/scheme1/93-heap-mark-rewind.scm: +;; +;; A = parse-decl-or-fn (caller, arena-unaware) +;; B = parse-fn-body (this fn — arena boundary) +;; C = %parse-fn-body-inner (the real per-fn parse + cg work) +;; +;; B's "out" is the cg's fixed-storage bufs (cg-text and friends): they +;; were allocated at cg-init (pre-mark) and only mutate via byte writes, +;; so byte-level work survives heap-rewind!. Everything C allocates — +;; vstack opnds, intermediate bvs, ctype scratch, scope frames, switch +;; case alists — is post-mark and discarded. +;; +;; The fn-name binding into the surrounding scope (used so recursive +;; calls can resolve the name during the body) is done BEFORE the mark +;; so it survives. Inner scope frames are popped via scope-leave! before +;; the rewind, so their cells become unreachable; rewind reclaims them. +;; +;; Rewind-safety guard: the body might add user-visible globals +;; (block-statics), strings (literals), tags, or typedefs. Those entries +;; are post-mark and would dangle on rewind. We snapshot the relevant +;; alists before parsing and skip the rewind if any changed — paying +;; full heap cost only for functions that genuinely mutate global state. +(define (parse-fn-body ps name dt) + ;; Hoist the recursive-binding scope-bind! out of the marked region + ;; so the fn-sym cons survives rewind. + (cond ((not (scope-lookup ps name)) + (scope-bind! ps name + (%sym name 'fn 'extern dt + (bytevector-append "cc__" name))))) + (let* ((cg (ps-cg ps)) + (mark (heap-mark)) + (globals-before (cg-globals cg)) + (str-pool-before (cg-str-pool cg)) + (typedefs-before (ps-typedefs ps)) + (tags-before (ps-tags ps))) + (%parse-fn-body-inner ps name dt) + (cond + ((and (eq? globals-before (cg-globals cg)) + (eq? str-pool-before (cg-str-pool cg)) + (eq? typedefs-before (ps-typedefs ps)) + (eq? tags-before (ps-tags ps))) + ;; cg-fn-meta points at post-mark alist conses (fn metadata, + ;; switch-case lists, indirect-slots). Drop the reference before + ;; rewinding so the cg record holds no dangling pointers — the + ;; next cg-fn-begin/v would reset it anyway, but if this is the + ;; last fn, leaving it set leaves a latent landmine. + (cg-fn-meta-set! cg '()) + (heap-rewind! mark) + (debug-log "fn-rewound" name "heap" (heap-usage))) + (else + (debug-log "fn-kept" name "heap" (heap-usage)))))) + +(define (%parse-fn-body-inner ps name dt) + (let* ((e (ctype-ext dt)) (ret (car e)) + (par (cadr e)) (var (car (cddr e)))) + (let ((psyms (cg-fn-begin/v (ps-cg ps) name par ret var))) + (ps-fn-ctx-set! ps + (%fn-ctx name ret (map cdr psyms) var '())) + (scope-enter! ps) + (for-each (lambda (p) (scope-bind! ps (car p) (cdr p))) + psyms) + (expect-punct ps 'lbrace) + (parse-cstmt-body ps) + (expect-punct ps 'rbrace) + (scope-leave! ps) + (ps-fn-ctx-set! ps #f) + (cg-fn-end (ps-cg ps))))) + +(define (parse-stmt ps) + (pmatch (peek ps) + (($ tok? (kind PUNCT) (value lbrace)) (parse-cstmt ps)) + (($ tok? (kind KW) (value if)) (parse-if-stmt ps)) + (($ tok? (kind KW) (value while)) (parse-while-stmt ps)) + (($ tok? (kind KW) (value do)) (parse-do-stmt ps)) + (($ tok? (kind KW) (value for)) (parse-for-stmt ps)) + (($ tok? (kind KW) (value switch)) (parse-switch-stmt ps)) + (($ tok? (kind KW) (value return)) (parse-return-stmt ps)) + (($ tok? (kind KW) (value goto)) (parse-goto-stmt ps)) + (($ tok? (kind KW) (value break)) + (advance ps) (expect-punct ps 'semi) (do-break ps)) + (($ tok? (kind KW) (value continue)) + (advance ps) (expect-punct ps 'semi) (do-continue ps)) + (($ tok? (kind KW) (value case)) (parse-case-stmt ps)) + (($ tok? (kind KW) (value default)) (parse-default-stmt ps)) + (($ tok? (kind IDENT)) + (guard (and (eq? (tok-kind (peek2 ps)) 'PUNCT) + (eq? (tok-value (peek2 ps)) 'colon))) + (parse-labelled-stmt ps)) + (else + (cond ((stmt-starts-decl? ps) (parse-local-decl ps)) + (else (parse-expr-stmt ps)))))) + +(define (stmt-starts-decl? ps) + (pmatch (peek ps) + (($ tok? (kind KW) (value ,v)) + (or (eq? v 'auto) (eq? v 'register) (eq? v 'static) + (eq? v 'extern) (eq? v 'typedef) (eq? v 'const) + (eq? v 'volatile) (eq? v 'restrict) (eq? v 'inline) + (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int) + (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned) + (eq? v '_Bool) (eq? v 'struct) (eq? v 'union) + (eq? v 'enum))) + (($ tok? (kind IDENT) (value ,n)) (typedef? ps n)) + (else #f))) + +(define (parse-local-decl ps) + (let* ((sp (parse-decl-spec ps)) + (sto (car sp)) (b (cdr sp))) + (cond + ((at-punct? ps 'semi) (advance ps) #t) + (else + (let lp () + (let* ((p (parse-declarator ps b)) + (n (car p)) (t (cdr p))) + (handle-decl ps sto n t) + (cond ((at-punct? ps 'comma) (advance ps) (lp)) + (else (expect-punct ps 'semi) #t)))))))) + +(define (parse-cstmt ps) + (expect-punct ps 'lbrace) + (scope-enter! ps) + (parse-cstmt-body ps) + (scope-leave! ps) + (expect-punct ps 'rbrace) #t) + +(define (parse-cstmt-body ps) + (cond + ((at-punct? ps 'rbrace) #t) + ((eq? (tok-kind (peek ps)) 'EOF) + (die (tok-loc (peek ps)) "EOF in cstmt")) + (else (parse-stmt ps) (parse-cstmt-body ps)))) + +(define (parse-compound-stmt ps) (parse-cstmt ps)) + +(define (parse-if-stmt ps) + (expect-kw ps 'if) + (expect-punct ps 'lparen) + (parse-expr ps) (rval! ps) + (expect-punct ps 'rparen) + (cg-ifelse (ps-cg ps) + (lambda () (parse-stmt ps)) + (lambda () + (cond ((at-kw? ps 'else) + (advance ps) (parse-stmt ps)) + (else #t))))) + +;; cg-loop's body-thunk now receives the tag from cg (CC-CONTRACTS +;; §3.3); the parser threads it into break/continue via loop-ctx. + +(define (parse-while-stmt ps) + (expect-kw ps 'while) + (expect-punct ps 'lparen) + (cg-loop (ps-cg ps) + (lambda () (parse-expr ps) (rval! ps)) + (lambda (tag) + (expect-punct ps 'rparen) + (push-loop-ctx! ps 'while tag #t) + (parse-stmt ps) + (pop-loop-ctx! ps))) #t) + +(define (parse-do-stmt ps) + (expect-kw ps 'do) + ;; do-while needs its tag known *before* the body parses, so we + ;; capture it inside the body-thunk and stash it for pop-loop-ctx + ;; via a side cell. + (cg-loop (ps-cg ps) + (lambda () #t) + (lambda (tag) + (push-loop-ctx! ps 'do tag #t) + (parse-stmt ps) + (pop-loop-ctx! ps) + (expect-kw ps 'while) (expect-punct ps 'lparen) + (parse-expr ps) (rval! ps) + (expect-punct ps 'rparen) (expect-punct ps 'semi) + (cg-unop (ps-cg ps) 'lnot) + (cg-if (ps-cg ps) + (lambda () (cg-break (ps-cg ps) tag))))) + #t) + +(define (parse-for-stmt ps) + (expect-kw ps 'for) (expect-punct ps 'lparen) + (scope-enter! ps) + (cond + ((at-punct? ps 'semi) (advance ps)) + ((stmt-starts-decl? ps) (parse-local-decl ps)) + (else (parse-expr ps) (cg-pop (ps-cg ps)) + (expect-punct ps 'semi))) + (cg-loop (ps-cg ps) + (lambda () + (cond ((at-punct? ps 'semi) + (cg-push-imm (ps-cg ps) %t-i32 1)) + (else (parse-expr ps) (rval! ps))) + (expect-punct ps 'semi)) + (lambda (tag) + (let ((stk (collect-til-rparen ps))) + (expect-punct ps 'rparen) + (push-loop-ctx! ps 'for tag #t) + (parse-stmt ps) + (pop-loop-ctx! ps) + (cond + ((null? stk) #t) + (else + (let ((sv (ps-toks ps))) + (ps-toks-set! ps + (append stk (list (make-tok 'EOF #f #f)))) + (parse-expr ps) (cg-pop (ps-cg ps)) + (ps-toks-set! ps sv))))))) + (scope-leave! ps) #t) + +(define (collect-til-rparen ps) + (let loop ((acc '()) (d 0)) + (let ((t (peek ps))) + (cond + ((eq? (tok-kind t) 'EOF) + (die (tok-loc t) "EOF in for-step")) + ((and (zero? d) (eq? (tok-kind t) 'PUNCT) + (eq? (tok-value t) 'rparen)) (reverse acc)) + (else + (let ((nt (advance ps))) + (loop (cons nt acc) + (cond ((not (eq? (tok-kind nt) 'PUNCT)) d) + ((eq? (tok-value nt) 'lparen) (+ d 1)) + ((eq? (tok-value nt) 'rparen) (- d 1)) + (else d))))))))) + +(define (parse-switch-stmt ps) + (expect-kw ps 'switch) (expect-punct ps 'lparen) + (parse-expr ps) (rval! ps) + (expect-punct ps 'rparen) + ;; Switch's break-target tag is the swctx's end-tag — cg owns it, + ;; and we read it back so cg-break inside the switch body emits a + ;; tag cg actually labels. + (let* ((sw (cg-switch-begin (ps-cg ps))) + (tg (swctx-end-tag sw))) + (push-loop-ctx-sw! ps 'switch tg sw) + (parse-stmt ps) + (pop-loop-ctx! ps) + (cg-switch-end (ps-cg ps) sw))) + +(define (parse-case-stmt ps) + (expect-kw ps 'case) + (let ((v (parse-const-int ps))) + (expect-punct ps 'colon) + (cg-switch-case (ps-cg ps) (innermost-sw ps) v) + (parse-stmt ps))) + +(define (parse-default-stmt ps) + (expect-kw ps 'default) (expect-punct ps 'colon) + (cg-switch-default (ps-cg ps) (innermost-sw ps)) + (parse-stmt ps)) + +(define (parse-return-stmt ps) + (expect-kw ps 'return) + (cond + ((at-punct? ps 'semi) (advance ps) (cg-return (ps-cg ps))) + (else + (parse-expr ps) (rval! ps) + (let ((fc (ps-fn-ctx ps))) + (cond + ((and fc (not (eq? (ctype-kind (fn-ctx-return-type fc)) 'void))) + (cg-cast (ps-cg ps) (fn-ctx-return-type fc))) + (else #t))) + (cg-return (ps-cg ps)) + (expect-punct ps 'semi)))) + +(define (parse-goto-stmt ps) + (expect-kw ps 'goto) + (let ((t (advance ps))) + (cond ((eq? (tok-kind t) 'IDENT) + (cg-goto (ps-cg ps) (tok-value t))) + (else (die (tok-loc t) "label?")))) + (expect-punct ps 'semi)) + +(define (parse-labelled-stmt ps) + (let ((t (advance ps))) + (expect-punct ps 'colon) + (cg-emit-label (ps-cg ps) (tok-value t)) + (parse-stmt ps))) + +(define (parse-expr-stmt ps) + (cond + ((at-punct? ps 'semi) (advance ps) #t) + (else (parse-expr ps) (cg-pop (ps-cg ps)) + (expect-punct ps 'semi)))) + +(define (push-loop-ctx! ps k tg hc) + (ps-loops-set! ps (cons (%loop-ctx k tg hc) (ps-loops ps)))) +(define (push-loop-ctx-sw! ps k tg sw) + (ps-loops-set! ps + (cons (%loop-ctx k (cons tg sw) #f) (ps-loops ps)))) +(define (pop-loop-ctx! ps) + (ps-loops-set! ps (cdr (ps-loops ps)))) +(define (do-break ps) + (let ((c (innermost-loop ps))) + (cond + ((not c) (die #f "break outside")) + ((eq? (loop-ctx-kind c) 'switch) + (cg-break (ps-cg ps) (car (loop-ctx-tag c)))) + (else (cg-break (ps-cg ps) (loop-ctx-tag c)))))) +(define (do-continue ps) + (let ((c (innermost-cont ps))) + (cond ((not c) (die #f "cont outside")) + (else (cg-continue (ps-cg ps) (loop-ctx-tag c)))))) +(define (innermost-loop ps) + (cond ((null? (ps-loops ps)) #f) (else (car (ps-loops ps))))) +(define (innermost-cont ps) + (let lp ((xs (ps-loops ps))) + (cond ((null? xs) #f) + ((eq? (loop-ctx-kind (car xs)) 'switch) (lp (cdr xs))) + (else (car xs))))) +(define (innermost-sw ps) + (let lp ((xs (ps-loops ps))) + (cond ((null? xs) (die #f "case outside switch")) + ((eq? (loop-ctx-kind (car xs)) 'switch) + (cdr (loop-ctx-tag (car xs)))) + (else (lp (cdr xs)))))) + +(define %binop-bp + (list + (cons 'comma (cons 1 2)) + (cons 'assign (cons 4 3)) (cons 'plus-eq (cons 4 3)) + (cons 'minus-eq (cons 4 3)) (cons 'star-eq (cons 4 3)) + (cons 'slash-eq (cons 4 3)) (cons 'pct-eq (cons 4 3)) + (cons 'shl-eq (cons 4 3)) (cons 'shr-eq (cons 4 3)) + (cons 'amp-eq (cons 4 3)) (cons 'caret-eq (cons 4 3)) + (cons 'bar-eq (cons 4 3)) (cons 'qmark (cons 6 5)) + (cons 'lor (cons 10 11)) (cons 'land (cons 20 21)) + (cons 'bar (cons 30 31)) (cons 'caret (cons 40 41)) + (cons 'amp (cons 50 51)) + (cons 'eq2 (cons 60 61)) (cons 'ne (cons 60 61)) + (cons 'lt (cons 70 71)) (cons 'le (cons 70 71)) + (cons 'gt (cons 70 71)) (cons 'ge (cons 70 71)) + (cons 'shl (cons 80 81)) (cons 'shr (cons 80 81)) + (cons 'plus (cons 90 91)) (cons 'minus (cons 90 91)) + (cons 'star (cons 100 101)) (cons 'slash (cons 100 101)) + (cons 'pct (cons 100 101)))) + +(define (binop-bp-of s) (alist-ref/eq s %binop-bp)) + +(define (punct-to-cgop s) + (cond ((eq? s 'plus) 'add) ((eq? s 'minus) 'sub) + ((eq? s 'star) 'mul) ((eq? s 'slash) 'div) + ((eq? s 'pct) 'rem) ((eq? s 'amp) 'and) + ((eq? s 'bar) 'or) ((eq? s 'caret) 'xor) + ((eq? s 'shl) 'shl) ((eq? s 'shr) 'shr) + ((eq? s 'eq2) 'eq) ((eq? s 'ne) 'ne) + ((eq? s 'lt) 'lt) ((eq? s 'le) 'le) + ((eq? s 'gt) 'gt) ((eq? s 'ge) 'ge) + (else (die #f "binop" s)))) + +(define (compound-op s) + (cond ((eq? s 'plus-eq) 'add) ((eq? s 'minus-eq) 'sub) + ((eq? s 'star-eq) 'mul) ((eq? s 'slash-eq) 'div) + ((eq? s 'pct-eq) 'rem) ((eq? s 'shl-eq) 'shl) + ((eq? s 'shr-eq) 'shr) ((eq? s 'amp-eq) 'and) + ((eq? s 'caret-eq) 'xor) ((eq? s 'bar-eq) 'or) + (else #f))) + +(define (parse-expr ps) (parse-expr-bp ps 0)) + +(define (parse-expr-bp ps mn) + (parse-unary ps) (parse-binary-rhs ps mn)) + +(define (parse-binary-rhs ps mn) + (let ((t (peek ps))) + (cond + ((not (eq? (tok-kind t) 'PUNCT)) #t) + (else + (let ((bp (binop-bp-of (tok-value t)))) + (cond + ((not bp) #t) + ((< (car bp) mn) #t) + (else + (let ((op (tok-value t)) (rb (cdr bp))) + (advance ps) + (cond + ((eq? op 'comma) + ;; lhs has been parsed; discard it and evaluate rhs. + ;; Result of the comma expr is the rhs's rval. + (cg-pop (ps-cg ps)) + (parse-expr-bp ps rb) (rval! ps)) + ((eq? op 'assign) + (parse-expr-bp ps rb) (rval! ps) + (cg-assign (ps-cg ps))) + ((compound-op op) + (let ((b (compound-op op))) + (cg-dup (ps-cg ps)) + (cg-load (ps-cg ps)) + (parse-expr-bp ps rb) (rval! ps) + (cg-arith-conv (ps-cg ps)) + (cg-binop (ps-cg ps) b) + (cg-assign (ps-cg ps)))) + ((eq? op 'qmark) + (rval! ps) + (cg-ifelse-merge (ps-cg ps) + (lambda () + (parse-expr-bp ps 0) (rval! ps)) + (lambda () + (expect-punct ps 'colon) + (parse-expr-bp ps rb) (rval! ps)))) + ((eq? op 'land) + (rval! ps) + ;; Both branches must push i32 0/1. Right side is + ;; coerced via `cg-cast bool` so the merge slot + ;; carries i32 (per §H.2). + (cg-ifelse-merge (ps-cg ps) + (lambda () + (parse-expr-bp ps rb) (rval! ps) + (cg-cast (ps-cg ps) %t-bool) + (cg-cast (ps-cg ps) %t-i32)) + (lambda () + (cg-push-imm (ps-cg ps) %t-i32 0)))) + ((eq? op 'lor) + (rval! ps) + (cg-ifelse-merge (ps-cg ps) + (lambda () + (cg-push-imm (ps-cg ps) %t-i32 1)) + (lambda () + (parse-expr-bp ps rb) (rval! ps) + (cg-cast (ps-cg ps) %t-bool) + (cg-cast (ps-cg ps) %t-i32)))) + (else + (rval! ps) (cg-promote (ps-cg ps)) + (parse-expr-bp ps rb) (rval! ps) + (cg-promote (ps-cg ps)) + (cg-arith-conv (ps-cg ps)) + (cg-binop (ps-cg ps) (punct-to-cgop op)))) + (parse-binary-rhs ps mn))))))))) + +(define (parse-unary ps) + (pmatch (peek ps) + (($ tok? (kind PUNCT) (value amp)) + (advance ps) (parse-unary ps) + (cg-take-addr (ps-cg ps))) + (($ tok? (kind PUNCT) (value star)) + (advance ps) (parse-unary ps) (rval! ps) + (cg-push-deref (ps-cg ps))) + (($ tok? (kind PUNCT) (value plus)) + (advance ps) (parse-unary ps) + (rval! ps) (cg-promote (ps-cg ps))) + (($ tok? (kind PUNCT) (value minus)) + (advance ps) (parse-unary ps) + (rval! ps) (cg-promote (ps-cg ps)) + (cg-unop (ps-cg ps) 'neg)) + (($ tok? (kind PUNCT) (value tilde)) + (advance ps) (parse-unary ps) + (rval! ps) (cg-promote (ps-cg ps)) + (cg-unop (ps-cg ps) 'bnot)) + (($ tok? (kind PUNCT) (value bang)) + (advance ps) (parse-unary ps) (rval! ps) + (cg-unop (ps-cg ps) 'lnot)) + (($ tok? (kind PUNCT) (value inc)) + (advance ps) (parse-unary ps) + (cg-dup (ps-cg ps)) + (cg-load (ps-cg ps)) + (cg-push-imm (ps-cg ps) %t-i32 1) + (cg-binop (ps-cg ps) 'add) (cg-assign (ps-cg ps))) + (($ tok? (kind PUNCT) (value dec)) + (advance ps) (parse-unary ps) + (cg-dup (ps-cg ps)) + (cg-load (ps-cg ps)) + (cg-push-imm (ps-cg ps) %t-i32 1) + (cg-binop (ps-cg ps) 'sub) (cg-assign (ps-cg ps))) + (($ tok? (kind PUNCT) (value lparen)) (parse-cast-or-unary ps)) + (($ tok? (kind KW) (value sizeof)) + (advance ps) + (cond + ((at-punct? ps 'lparen) + (advance ps) + (cond + ((token-is-decl? ps) + (let* ((sp (parse-decl-spec ps)) + (p (parse-declarator ps (cdr sp))) + (ty (cdr p))) + (expect-punct ps 'rparen) + (cg-push-imm (ps-cg ps) %t-u64 + (max (ctype-size ty) 0)))) + (else + (parse-expr ps) (expect-punct ps 'rparen) + (let* ((tp (cg-top (ps-cg ps))) + (sz (max (ctype-size (opnd-type tp)) 0))) + (cg-pop (ps-cg ps)) + (cg-push-imm (ps-cg ps) %t-u64 sz))))) + (else (parse-unary ps) + (let* ((tp (cg-top (ps-cg ps))) + (sz (max (ctype-size (opnd-type tp)) 0))) + (cg-pop (ps-cg ps)) + (cg-push-imm (ps-cg ps) %t-u64 sz))))) + (else (parse-postfix ps)))) + +(define (token-is-decl? ps) + (pmatch (peek ps) + (($ tok? (kind KW) (value ,v)) + (or (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int) + (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned) + (eq? v '_Bool) (eq? v 'struct) (eq? v 'union) + (eq? v 'enum) (eq? v 'const) (eq? v 'volatile) + (eq? v 'restrict) (eq? v 'inline))) + (($ tok? (kind IDENT) (value ,n)) (typedef? ps n)) + (else #f))) + +(define (parse-cast-or-unary ps) + (pmatch (peek2 ps) + (($ tok? (kind KW) (value ,v)) + (guard (or (eq? v 'void) (eq? v 'char) (eq? v 'short) + (eq? v 'int) (eq? v 'long) (eq? v 'signed) + (eq? v 'unsigned) (eq? v '_Bool) + (eq? v 'struct) (eq? v 'union) (eq? v 'enum) + (eq? v 'const) (eq? v 'volatile) + (eq? v 'restrict))) + (advance ps) + (let* ((sp (parse-decl-spec ps)) + (p (parse-declarator ps (cdr sp))) + (ty (cdr p))) + (expect-punct ps 'rparen) + (parse-unary ps) + ;; Cast operand undergoes lvalue conversion first (C semantics): + ;; arrays decay to pointers, lvals become rvals. cg-cast then + ;; bit-casts the resulting rval to the target type. + (rval! ps) + (cg-cast (ps-cg ps) ty))) + (($ tok? (kind IDENT) (value ,n)) + (guard (typedef? ps n)) + (advance ps) + (let* ((sp (parse-decl-spec ps)) + (p (parse-declarator ps (cdr sp))) + (ty (cdr p))) + (expect-punct ps 'rparen) + (parse-unary ps) + (rval! ps) + (cg-cast (ps-cg ps) ty))) + (else (advance ps) (parse-expr ps) + (expect-punct ps 'rparen) + (parse-postfix-rest ps)))) + +(define (parse-postfix ps) + (parse-primary ps) (parse-postfix-rest ps)) + +(define (parse-postfix-rest ps) + (let lp () + (pmatch (peek ps) + (($ tok? (kind PUNCT) (value lbrack)) + (advance ps) (rval! ps) + (parse-expr ps) (rval! ps) + (expect-punct ps 'rbrack) + (cg-binop (ps-cg ps) 'add) + (cg-push-deref (ps-cg ps)) (lp)) + (($ tok? (kind PUNCT) (value lparen)) + (advance ps) (rval-not-fn! ps) + (let* ((fn-ty (call-fn-type (ps-cg ps))) + (n (parse-call-args ps fn-ty)) + ;; has-result? = #f for known void returns. Skips the + ;; wasted ST a0 → frame-slot spill that cg-call would + ;; otherwise emit for void calls. + (has-result? + (cond + ((not fn-ty) #t) + ((eq? (ctype-kind (car (ctype-ext fn-ty))) 'void) #f) + (else #t)))) + (expect-punct ps 'rparen) + (cg-call (ps-cg ps) n has-result?) + ;; Maintain parse's "one rval per expression" invariant so + ;; comma / parse-expr-stmt / for-init/step pop sites stay + ;; simple. The placeholder is vstack-only and never + ;; materialized (cg-pop is a vstack op, no emit). + (cond ((not has-result?) + (cg-push-imm (ps-cg ps) %t-i32 0))) + (lp))) + (($ tok? (kind PUNCT) (value dot)) + (advance ps) + (pmatch (advance ps) + (($ tok? (kind IDENT) (value ,n)) + (cg-push-field (ps-cg ps) n) (lp)) + (($ tok? (loc ,l)) (die l "expected field name")))) + (($ tok? (kind PUNCT) (value arrow)) + (advance ps) + (pmatch (advance ps) + (($ tok? (kind IDENT) (value ,n)) + ;; ptr -> field: load the pointer to rval, deref to reach + ;; the struct lval, then push the field. + (rval! ps) + (cg-push-deref (ps-cg ps)) + (cg-push-field (ps-cg ps) n) (lp)) + (($ tok? (loc ,l)) (die l "expected field name")))) + (($ tok? (kind PUNCT) (value inc)) + (advance ps) + (cg-postinc (ps-cg ps)) (lp)) + (($ tok? (kind PUNCT) (value dec)) + (advance ps) + (cg-postdec (ps-cg ps)) (lp)) + (else #t)))) + +;; call-fn-type cg -> ctype-or-#f +;; The function operand sits at the top of the vstack when +;; parse-call-args runs (just after rval-not-fn!). Its type may be +;; `fn` directly (named callee) or `ptr -> fn` (function pointer). +;; Returns the underlying `fn` ctype, or #f if the operand isn't +;; recognizably callable (callsite still works — no per-arg cast). +(define (call-fn-type cg) + (let* ((tp (cg-top cg))) + (cond + ((not tp) #f) + (else + (let* ((ty (opnd-type tp)) + (k (ctype-kind ty))) + (cond + ((eq? k 'fn) ty) + ((eq? k 'ptr) + (let ((pe (ctype-ext ty))) + (cond ((and pe (eq? (ctype-kind pe) 'fn)) pe) + (else #f)))) + (else #f))))))) + +;; param-types-of fn-ty -> (params variadic?) with a #f fallback. +(define (call-fn-param-info fn-ty) + (cond + ((not fn-ty) (cons '() #f)) + (else + (let ((ext (ctype-ext fn-ty))) + (cons (cadr ext) (car (cddr ext))))))) + +;; parse-call-args ps fn-ty -> arg-count +;; Casts each fixed arg to the declared param type (CC.md §K.5). +;; For variadic args (index >= named-arg count, when variadic? = #t) +;; applies cg-promote (CC.md §G.1). +(define (parse-call-args ps fn-ty) + (cond + ((at-punct? ps 'rparen) 0) + (else + (let* ((info (call-fn-param-info fn-ty)) + (params (car info)) + (var? (cdr info)) + (nfix (length params))) + (let lp ((n 0) (rem params)) + (parse-expr-bp ps 4) (rval! ps) + (cond + ;; Fixed-arg: cast to declared param type. param entry shape + ;; is (name . ctype) per cg-fn-begin's contract. + ((not (null? rem)) + (cg-cast (ps-cg ps) (cdr (car rem)))) + ;; Variadic position (n >= nfix and var? is true): promote. + (var? + (cg-promote (ps-cg ps)))) + (let ((m (+ n 1)) + (rest (if (null? rem) '() (cdr rem)))) + (cond ((at-punct? ps 'comma) (advance ps) (lp m rest)) + (else m)))))))) + +;; -------------------------------------------------------------------- +;; __builtin_va_* (§G.2). va_list / va_start / va_arg / va_end in +;; <stdarg.h> alias these. Each is parsed as: name '(' args ')'. +;; va_start(ap, last) — last is parsed and discarded; cg only needs +;; the variadic-first-slot offset, which it already tracks. +;; va_arg(ap, T) — T is a type-name; result rval has that type. +;; va_end(ap) — no-op codegen; just consumes ap. +;; +;; Pushes a single imm 0 for va_start / va_end so they fit as +;; expression statements; va_arg pushes the rval. +;; -------------------------------------------------------------------- +(define (parse-builtin-va-start ps) + (advance ps) ; IDENT + (expect-punct ps 'lparen) + (parse-expr-bp ps 4) ; ap (must be lval) + (expect-punct ps 'comma) + ;; "last" is parsed for syntactic completeness then dropped — cg + ;; doesn't need it; the variadic-first-slot was determined at + ;; cg-fn-begin/v time. + (parse-expr-bp ps 4) (cg-pop (ps-cg ps)) + (expect-punct ps 'rparen) + (cg-va-start (ps-cg ps)) + ;; Push a placeholder rval so the call expression has a value + ;; (matches va_start's "void" but our parser expects all + ;; expressions to leave one rval). + (cg-push-imm (ps-cg ps) %t-i32 0)) + +(define (parse-builtin-va-arg ps) + (advance ps) ; IDENT + (expect-punct ps 'lparen) + (parse-expr-bp ps 4) ; ap (lval) + (expect-punct ps 'comma) + (let* ((sp (parse-decl-spec ps)) + (p (parse-declarator ps (cdr sp))) + (ty (cdr p))) + (expect-punct ps 'rparen) + (cg-va-arg (ps-cg ps) ty))) + +(define (parse-builtin-va-end ps) + (advance ps) ; IDENT + (expect-punct ps 'lparen) + (parse-expr-bp ps 4) ; ap + (expect-punct ps 'rparen) + (cg-va-end (ps-cg ps)) + (cg-push-imm (ps-cg ps) %t-i32 0)) + +(define (parse-primary ps) + (let ((t (peek ps))) + (pmatch t + (($ tok? (kind INT) (value ,n)) + (advance ps) + (cg-push-imm (ps-cg ps) %t-i32 n)) + (($ tok? (kind CHAR) (value ,c)) + (advance ps) + (cg-push-imm (ps-cg ps) %t-i8 c)) + (($ tok? (kind STR) (value ,s)) + (advance ps) + (cg-push-string (ps-cg ps) s)) + (($ tok? (kind IDENT) (value ,n)) + (cond + ((bv= n "__builtin_va_start") (parse-builtin-va-start ps)) + ((bv= n "__builtin_va_arg") (parse-builtin-va-arg ps)) + ((bv= n "__builtin_va_end") (parse-builtin-va-end ps)) + (else + (let ((sm (scope-lookup ps n))) + (advance ps) + (cond + ((not sm) (die (tok-loc t) "undecl" n)) + ((eq? (sym-kind sm) 'enum-const) + (cg-push-imm (ps-cg ps) %t-i32 (sym-slot sm))) + (else (cg-push-sym (ps-cg ps) sm))))))) + (($ tok? (kind PUNCT) (value lparen)) + (advance ps) (parse-expr ps) (expect-punct ps 'rparen)) + (else (die (tok-loc t) "unexp" (tok-value t)))))) + +(define (rval! ps) + (let ((tp (cg-top (ps-cg ps)))) + (cond ((and tp (opnd? tp) (opnd-lval? tp)) + (cg-load (ps-cg ps))) + (else #t)))) + +(define (rval-not-fn! ps) + (let ((tp (cg-top (ps-cg ps)))) + (cond ((and tp (opnd? tp) (opnd-lval? tp) + (not (ctype-is-fn? (opnd-type tp)))) + (cg-load (ps-cg ps))) + (else #t)))) +;; cc/main.scm — driver. Argv, file I/O, ties phases together. +;; +;; Realization of docs/CC-INTERNALS.md §main.scm. + +;; -------------------------------------------------------------------- +;; CLI: cc <input.c> <output.P1pp> +;; (-o flag and -D flags are deferred — phase-1 runner doesn't need them.) +;; +;; scheme1 passes (argv) as a list of bvs; argv[0] is "scheme1", argv[1] +;; is the catm'd compiler source path, argv[2..] are the user-facing +;; positional args. cc-main strips the first two. +;; -------------------------------------------------------------------- + +(define (%cc-slurp path) + (let ((r (open-input path))) + (cond ((not (car r)) + (die #f "cannot open input" path))) + (let* ((p (cdr r)) + (rd (read-all p))) + (close p) + (cond ((not (car rd)) (die #f "read failed" path))) + (cdr rd)))) + +(define (%cc-write path bv) + (let ((r (open-output path))) + (cond ((not (car r)) + (die #f "cannot open output" path))) + (let ((p (cdr r))) + (write-bv-fd (port-fd p) bv) + (close p) + 0))) + +;; CC_DEBUG=1 in the env doesn't fly here (no getenv); instead, scan +;; argv for a sentinel "--cc-debug" flag. When present, debug-log +;; prints heap usage between phases to fd 2. +(define (%cc-flag? args flag) + (cond ((null? args) #f) + ((bv= (car args) flag) #t) + (else (%cc-flag? (cdr args) flag)))) + +(define (%cc-strip-flag args flag) + (cond ((null? args) '()) + ((bv= (car args) flag) (cdr args)) + (else (cons (car args) (%cc-strip-flag (cdr args) flag))))) + +(define (cc-main av) + (let* ((raw (cdr (cdr av))) + (dbg (%cc-flag? raw "--cc-debug")) + (args (%cc-strip-flag raw "--cc-debug"))) + (cond (dbg (debug-log-on!))) + (cond + ((or (null? args) (null? (cdr args))) + (die #f "usage: cc [--cc-debug] <input.c> <output.P1pp>"))) + (let* ((in-path (car args)) + (out-path (car (cdr args)))) + (debug-log "phase=start" "heap" (heap-usage)) + (let* ((src (%cc-slurp in-path)) + (_1 (debug-log "phase=slurp" "heap" (heap-usage) + "src-bytes" (bytevector-length src))) + (toks (lex-tokenize src in-path)) + (_2 (debug-log "phase=lex" "heap" (heap-usage))) + (expanded (pp-expand toks '())) + (_3 (debug-log "phase=pp" "heap" (heap-usage))) + (cg (cg-init)) + (ps (make-pstate expanded cg))) + (parse-translation-unit ps) + (debug-log "phase=parse" "heap" (heap-usage)) + (let ((out (cg-finish cg))) + (debug-log "phase=cg-finish" "heap" (heap-usage) + "out-bytes" (bytevector-length out)) + (%cc-write out-path out)) + 0)))) + diff --git a/cc/cg.scm b/cc/cg.scm @@ -1,1246 +0,0 @@ -;; cc/cg.scm — codegen state and emission API. -;; Realization of docs/CC-INTERNALS.md §cg.scm. -;; Conversion split per CC-CONTRACTS §4: parse owns promotion etc; -;; cg owns sign extension, signed/unsigned dispatch, pointer scaling. -;; -;; Output uses libp1pp's structured macros (%fn, %ifelse_nez, -;; %loop_tag, %break, %continue) per docs/LIBP1PP.md. -;; -;; Frame layout (CC-CONTRACTS §3): -;; [sp + 0 .. staging*8) outgoing-arg staging -;; [sp + staging*8 ..) locals + spilled vstack values -;; Slot offsets are emitted symbolically as `(+ %<fn>__SO N)` so the -;; staging size, only known at fn-end, can be filled in via a 0-arg -;; M1pp macro `<fn>__SO` defined just before the `%fn(...)` block. - -(define (%cg-emit-buf cg) - (cond ((cg-in-fn? cg) (cg-fn-buf cg)) (else (cg-text cg)))) - -(define (%cg-emit cg bv) - (buf-push! (%cg-emit-buf cg) bv)) - -(define (%cg-emit-many cg bvs) - (for-each (lambda (b) (%cg-emit cg b)) bvs)) - -(define (%n n) (number->string n 10)) - -;; Per-fn metadata (name, ret-slot, ret-type, switch-case lists, ...) -;; lives on cg-fn-meta, reset at every cg-fn-begin/v. Keeping it off -;; cg-globals means cg-globals only mutates when the user emits a real -;; global, which is what parse-fn-body's rewind-safety check needs. -(define (%cg-fn-set! cg key val) - (cg-fn-meta-set! cg (alist-update key (lambda (_) val) (cg-fn-meta cg)))) - -(define (%cg-fn-get cg key) (alist-ref/eq key (cg-fn-meta cg))) - -(define (%cg-fresh-label cg prefix) - (let* ((n (cg-label-ctr cg)) - (bv (bytevector-append prefix (%n n)))) - (cg-label-ctr-set! cg (+ n 1)) - bv)) - -(define (%cg-fresh-loop-tag cg) (%cg-fresh-label cg "L")) -(define (%cg-fresh-lbl cg) (%cg-fresh-label cg "lbl_")) - -(define (%cg-bump-outgoing! cg n) - (if (< (cg-max-outgoing cg) n) (cg-max-outgoing-set! cg n) 0)) - -(define (%cg-slot-expr cg logical-off) - (let ((nm (%cg-fn-get cg '%fn-name))) - (bv-cat (list "(+ %" nm "__SO " (%n logical-off) ")")))) - -(define (%cg-mangle-global name-bv) - (bytevector-append "cc__" name-bv)) - -(define (%cg-reg->bv r) (symbol->string r)) - -(define (%cg-emit-li cg reg n) - (%cg-emit-many cg (list "%li(" (%cg-reg->bv reg) ", " (%n n) ")\n"))) - -(define (%cg-emit-la cg reg label-bv) - (%cg-emit-many cg (list "%la(" (%cg-reg->bv reg) ", &" label-bv ")\n"))) - -(define (%cg-emit-ld-slot cg reg logical-off) - (%cg-emit-many cg (list "%ld(" (%cg-reg->bv reg) ", sp, " - (%cg-slot-expr cg logical-off) ")\n"))) - -(define (%cg-emit-st-slot cg reg logical-off) - (%cg-emit-many cg (list "%st(" (%cg-reg->bv reg) ", sp, " - (%cg-slot-expr cg logical-off) ")\n"))) - -(define (%cg-emit-ld cg reg base off) - (%cg-emit-many cg (list "%ld(" (%cg-reg->bv reg) ", " - (%cg-reg->bv base) ", " (%n off) ")\n"))) - -(define (%cg-emit-st cg reg base off) - (%cg-emit-many cg (list "%st(" (%cg-reg->bv reg) ", " - (%cg-reg->bv base) ", " (%n off) ")\n"))) - -;; Width-aware load/store. Dispatches on ctype-size: -;; 1: %lb / %sb (LB zero-extends; for signed i8 we sign-extend by -;; shli/sari 56 to materialize the canonical 64-bit form). -;; 2/4: byte-decomposed (P1 has only 1-byte and 8-byte memory ops, -;; and word ops require natural alignment which we can't promise -;; for struct fields or non-word-aligned local slots). Loads -;; gather bytes via %lb + shli/or; stores scatter via shri/%sb. -;; Signed loads (i16/i32) sign-extend via shli/sari to canonical -;; 64-bit form. -;; 8 (or anything else for now): %ld / %st. -;; Scratch convention: helpers may clobber t1; callers never pass -;; reg=t1. - -(define (%cg-emit-ldN-bytes cg reg base-bv off-expr-fn n-bytes) - ;; Emit n-bytes %lb gathers into reg with shift+OR. byte 0 is low. - ;; off-expr-fn is a procedure: (off-expr-fn k) returns the bv - ;; expression for offset k. - (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", " base-bv ", " - (off-expr-fn 0) ")\n")) - (let loop ((k 1)) - (cond - ((= k n-bytes) 0) - (else - (%cg-emit-many cg (list - "%lb(t1, " base-bv ", " (off-expr-fn k) ")\n" - "%shli(t1, t1, " (%n (* 8 k)) ")\n" - "%or(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", t1)\n")) - (loop (+ k 1)))))) - -(define (%cg-emit-stN-bytes cg reg base-bv off-expr-fn n-bytes) - ;; Emit n-bytes %sb scatters from reg via shri-shifted t1. - (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", " base-bv ", " - (off-expr-fn 0) ")\n")) - (let loop ((k 1)) - (cond - ((= k n-bytes) 0) - (else - (%cg-emit-many cg (list - "%shri(t1, " (%cg-reg->bv reg) ", " (%n (* 8 k)) ")\n" - "%sb(t1, " base-bv ", " (off-expr-fn k) ")\n")) - (loop (+ k 1)))))) - -(define (%cg-emit-sext cg reg shift-amount) - (%cg-emit-many cg (list - "%shli(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", " - (%n shift-amount) ")\n" - "%sari(" (%cg-reg->bv reg) ", " (%cg-reg->bv reg) ", " - (%n shift-amount) ")\n"))) - -(define (%cg-emit-ld-slot-typed cg reg ctype logical-off) - (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype)) - (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k))))) - (cond - ((= sz 1) - (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", sp, " - (off-fn 0) ")\n")) - (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56)))) - ((= sz 2) - (%cg-emit-ldN-bytes cg reg "sp" off-fn 2) - (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48)))) - ((= sz 4) - (%cg-emit-ldN-bytes cg reg "sp" off-fn 4) - (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32)))) - (else (%cg-emit-ld-slot cg reg logical-off))))) - -(define (%cg-emit-st-slot-typed cg reg ctype logical-off) - (let* ((sz (ctype-size ctype)) - (off-fn (lambda (k) (%cg-slot-expr cg (+ logical-off k))))) - (cond - ((= sz 1) - (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", sp, " - (off-fn 0) ")\n"))) - ((= sz 2) (%cg-emit-stN-bytes cg reg "sp" off-fn 2)) - ((= sz 4) (%cg-emit-stN-bytes cg reg "sp" off-fn 4)) - (else (%cg-emit-st-slot cg reg logical-off))))) - -(define (%cg-emit-ld-typed cg reg ctype base off) - (let* ((sz (ctype-size ctype)) (kind (ctype-kind ctype)) - (base-bv (%cg-reg->bv base)) - (off-fn (lambda (k) (%n (+ off k))))) - (cond - ((= sz 1) - (%cg-emit-many cg (list "%lb(" (%cg-reg->bv reg) ", " - base-bv ", " (off-fn 0) ")\n")) - (cond ((eq? kind 'i8) (%cg-emit-sext cg reg 56)))) - ((= sz 2) - (%cg-emit-ldN-bytes cg reg base-bv off-fn 2) - (cond ((eq? kind 'i16) (%cg-emit-sext cg reg 48)))) - ((= sz 4) - (%cg-emit-ldN-bytes cg reg base-bv off-fn 4) - (cond ((eq? kind 'i32) (%cg-emit-sext cg reg 32)))) - (else (%cg-emit-ld cg reg base off))))) - -(define (%cg-emit-st-typed cg reg ctype base off) - (let* ((sz (ctype-size ctype)) - (base-bv (%cg-reg->bv base)) - (off-fn (lambda (k) (%n (+ off k))))) - (cond - ((= sz 1) - (%cg-emit-many cg (list "%sb(" (%cg-reg->bv reg) ", " - base-bv ", " (off-fn 0) ")\n"))) - ((= sz 2) (%cg-emit-stN-bytes cg reg base-bv off-fn 2)) - ((= sz 4) (%cg-emit-stN-bytes cg reg base-bv off-fn 4)) - (else (%cg-emit-st cg reg base off))))) - -(define (%cg-load-opnd-into cg op reg) - ;; frame lval: load at type width. frame rval is a spilled word - ;; (alloc-slot 8 8 in %cg-spill-reg) — always 8-byte load. - ;; global lval width > 1 byte-gathers must not alias dest with base — - ;; the first %lb would otherwise clobber the address before subsequent - ;; byte loads. Stage the address in t2. - (pmatch op - (($ opnd? (kind imm) (ext ,n)) (%cg-emit-li cg reg n)) - (($ opnd? (kind frame) (lval? #t) (type ,ty) (ext ,off)) - (%cg-emit-ld-slot-typed cg reg ty off)) - (($ opnd? (kind frame) (ext ,off)) (%cg-emit-ld-slot cg reg off)) - (($ opnd? (kind global) (lval? #f) (ext ,lbl)) (%cg-emit-la cg reg lbl)) - (($ opnd? (kind global) (type ,ty) (ext ,lbl)) - (%cg-emit-la cg 't2 lbl) - (%cg-emit-ld-typed cg reg ty 't2 0)) - (else (die #f "cg internal: unknown opnd-kind" (opnd-kind op))))) - -(define (%cg-spill-reg cg reg ty) - (let* ((off (cg-alloc-slot cg 8 8)) - (op (%opnd 'frame ty off #f))) - (%cg-emit-st-slot cg reg off) - (cg-vstack-set! cg (cons op (cg-vstack cg))) - op)) - -(define (%ctype-ptr? t) - (let ((k (ctype-kind t))) - (if (eq? k 'ptr) #t (eq? k 'arr)))) - -(define (%ctype-pointee t) - (cond ((eq? (ctype-kind t) 'ptr) (ctype-ext t)) - ((eq? (ctype-kind t) 'arr) (car (ctype-ext t))) - (else #f))) - -(define (%ctype-unsigned? t) - (let ((k (ctype-kind t))) - (cond ((eq? k 'u8) #t) ((eq? k 'u16) #t) ((eq? k 'u32) #t) - ((eq? k 'u64) #t) ((eq? k 'bool) #t) - ((eq? k 'ptr) #t) ((eq? k 'arr) #t) ((eq? k 'fn) #t) - (else #f)))) - -(define (%ctype-size t) (ctype-size t)) - -(define (%reg-by-idx i) - (cond ((= i 0) 'a0) ((= i 1) 'a1) ((= i 2) 'a2) ((= i 3) 'a3) - (else (die #f "cg: param idx > 3 needs ldarg path" i)))) - -;; -------------------------------------------------------------------- -;; Lifecycle -;; -------------------------------------------------------------------- - -(define (cg-init) - (%cg (make-buf/cap %BUF-CAP-TEXT) ; text - (make-buf/cap %BUF-CAP-DATA) ; data - (make-buf/cap %BUF-CAP-BSS) ; bss - '() ; vstack - 0 ; frame-hi - 0 ; label-ctr - '() ; str-pool - '() ; globals - '() ; fn-meta - (make-buf/cap %BUF-CAP-FN) ; fn-buf (reused per fn) - (make-buf/cap %BUF-CAP-PROLOGUE) ; prologue-buf (reused per fn) - 0 ; max-outgoing - #f)) ; in-fn? - -(define (cg-finish cg) - ;; Entry stub. P1's program-entry contract (docs/P1.md §Program Entry) - ;; delivers argc in a0 and argv in a1 at p1_main. %call doesn't - ;; clobber a0/a1, so falling straight through to cc__main forwards - ;; them unchanged. The 16-byte frame is just enough for %enter's - ;; saved-fp/lr to fit; cc__main builds its own frame on top. - ;; (CC-CONTRACTS §J.1, §5.4.) - (let ((tb (cg-text cg))) - (buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to cc__main\n") - (buf-push! tb "%fn(p1_main, 16, {\n") - (buf-push! tb "%call(&cc__main)\n") - (buf-push! tb "})\n")) - ;; Every P1pp translation unit must end with :ELF_end so the ELF - ;; header can compute file-size and ph_memsz boundaries. - (bv-cat (list (buf-flush (cg-text cg)) - (buf-flush (cg-data cg)) - (buf-flush (cg-bss cg)) - ":ELF_end\n"))) - -(define (cg-fn-begin cg name params return-type) - (cg-fn-begin/v cg name params return-type #f)) - -;; Variadic-aware variant. variadic? = #t reserves 16 contiguous 8-byte -;; slots covering incoming arg indices 0..15, populating each from the -;; appropriate source: a-register for idx 0..3, LDARG slot (idx-4) for -;; idx 4..15. va_start computes the address of the slot at index = -;; named-arg count, so va_arg walks linearly through the rest. -;; Indices 4..15 may be garbage when the caller passed fewer args; user -;; code stops walking based on a count or sentinel before those slots -;; are read. Limit of 15 variadic args (after named) is enough for -;; tcc.c's logging shapes; bump VARARG_WINDOW if you need more. -(define (cg-fn-begin/v cg name params return-type variadic?) - (buf-reset! (cg-fn-buf cg)) - (buf-reset! (cg-prologue-buf cg)) - (cg-in-fn?-set! cg #t) - (cg-vstack-set! cg '()) - (cg-frame-hi-set! cg 0) - ;; cg-label-ctr is NOT reset per-fn. Loop tags emit single-colon - ;; (global) labels via libp1pp's %loop_tag macro (`:L0_top`, - ;; `:L0_end`) — see P1/P1pp.P1pp:loop_tag — so two functions both - ;; using L0 would produce duplicate global labels and break linking. - ;; Switch dispatch labels (`sw_disp_L<N>`) inherit the same tag and - ;; are also single-colon. Keeping the counter monotonic across - ;; functions guarantees uniqueness without needing to mangle. - (cg-max-outgoing-set! cg 0) - (cg-fn-meta-set! cg '()) - (%cg-fn-set! cg '%fn-name name) - (%cg-fn-set! cg '%fn-ret-type return-type) - (%cg-fn-set! cg '%indirect-slots '()) - (%cg-fn-set! cg '%fn-variadic? variadic?) - (let ((ret-slot (cg-alloc-slot cg 8 8))) - (%cg-fn-set! cg '%fn-ret-slot ret-slot) - (cond - ((not (eq? (ctype-kind return-type) 'void)) - (buf-push! (cg-prologue-buf cg) - (bv-cat (list "%li(t0, 0)\n" - "%st(t0, sp, " - (%cg-slot-expr cg ret-slot) ")\n")))))) - ;; params per CC-CONTRACTS §3.1: list of (name-bv . ctype). We - ;; return an alist (name-bv . sym) the parser binds into scope. - (let walk ((ps params) (idx 0) (out '()) (first-slot #f)) - (cond - ((null? ps) - (cond - (variadic? - ;; Pad the incoming-arg window out to 16 slots. For idx 0..3 - ;; the slot is filled from a-register; for idx 4..15 from - ;; LDARG slot (idx-4). va_start points at the slot whose - ;; index equals the named-arg count, and va_arg walks - ;; linearly from there through the rest of the window. - (let pad ((i idx) (vfirst #f) (fs first-slot)) - (cond - ((>= i 16) - ;; If named-arg count was 0, vfirst is the very first - ;; slot of the save area (= fs). - (%cg-fn-set! cg '%fn-vararg-first-slot - (or vfirst fs)) - (reverse out)) - (else - (let ((off (cg-alloc-slot cg 8 8))) - (cond - ((< i 4) - (let ((ar (%reg-by-idx i))) - (buf-push! (cg-prologue-buf cg) - (bv-cat (list "%st(" (%cg-reg->bv ar) - ", sp, " - (%cg-slot-expr cg off) ")\n"))))) - (else - (buf-push! (cg-prologue-buf cg) - (bv-cat (list "%ldarg(t0, " (%n (- i 4)) ")\n" - "%st(t0, sp, " - (%cg-slot-expr cg off) ")\n"))))) - (pad (+ i 1) - (or vfirst off) - (or fs off))))))) - (else (reverse out)))) - (else - (let* ((p (car ps)) - (nm (car p)) - (ty (cdr p)) - (off (cg-alloc-slot cg 8 8)) - (psym (%sym nm 'param #f ty off))) - (cond - ((< idx 4) - (let ((ar (%reg-by-idx idx))) - (buf-push! (cg-prologue-buf cg) - (bv-cat (list "%st(" (%cg-reg->bv ar) - ", sp, " (%cg-slot-expr cg off) ")\n"))))) - (else - (buf-push! (cg-prologue-buf cg) - (bv-cat (list "%ldarg(t0, " (%n (- idx 4)) ")\n" - "%st(t0, sp, " (%cg-slot-expr cg off) ")\n"))))) - (walk (cdr ps) (+ idx 1) (cons (cons nm psym) out) - (or first-slot off))))))) - -(define (cg-fn-end cg) - ;; Drain prologue-buf and fn-buf directly into cg-text via buf-drain! - ;; (memcpy, no allocation). Header/footer pieces go through buf-push! - ;; on cg-text — also memcpy. Net result: zero net heap allocation in - ;; cg-fn-end other than the small (%n N) bvs for staging-bytes / - ;; frame-size, which the surrounding parse-fn-body's heap-rewind! - ;; reclaims. - (let* ((name (%cg-fn-get cg '%fn-name)) - (ret-slot (%cg-fn-get cg '%fn-ret-slot)) - (ret-type (%cg-fn-get cg '%fn-ret-type)) - (locals-hi (cg-frame-hi cg)) - (staging-bytes (* 8 (cg-max-outgoing cg))) - (raw-size (+ staging-bytes locals-hi)) - (frame-size (align-up raw-size 16)) - (mangled (%cg-mangle-global name)) - (tb (cg-text cg))) - ;; Now that the body is fully emitted, leave fn dispatch so any - ;; trailing emits in this function (including the ret-block below) - ;; route to cg-text directly. - (cg-in-fn?-set! cg #f) - ;; staging-size macro - (buf-push! tb "%macro ") - (buf-push! tb name) - (buf-push! tb "__SO()\n") - (buf-push! tb (%n staging-bytes)) - (buf-push! tb "\n%endm\n") - ;; %fn header - (buf-push! tb "%fn(") - (buf-push! tb mangled) - (buf-push! tb ", ") - (buf-push! tb (%n frame-size)) - (buf-push! tb ", {\n") - ;; prologue + body, drained byte-for-byte - (buf-drain! tb (cg-prologue-buf cg)) - (buf-drain! tb (cg-fn-buf cg)) - ;; ret block - (buf-push! tb "::ret\n") - (cond - ((eq? (ctype-kind ret-type) 'void) - (buf-push! tb "%li(a0, 0)\n")) - (else - (buf-push! tb "%ld(a0, sp, ") - (buf-push! tb (%cg-slot-expr cg ret-slot)) - (buf-push! tb ")\n"))) - (buf-push! tb "})\n") - (cg-vstack-set! cg '()) - (cg-frame-hi-set! cg 0) - (cg-max-outgoing-set! cg 0) - 0)) - -;; -------------------------------------------------------------------- -;; Vstack -;; -------------------------------------------------------------------- -(define (cg-push cg op) - (cg-vstack-set! cg (cons op (cg-vstack cg))) - op) - -(define (cg-pop cg) - (let ((s (cg-vstack cg))) - (cond ((null? s) (die #f "cg-pop: empty vstack")) - (else (cg-vstack-set! cg (cdr s)) (car s))))) - -(define (cg-top cg) - (let ((s (cg-vstack cg))) - (cond ((null? s) (die #f "cg-top: empty vstack")) (else (car s))))) - -(define (cg-depth cg) (length (cg-vstack cg))) - -;; Duplicate the top vstack entry. For lvals this is safe — the slot -;; (or label, or indirect-marked frame) backing the lval keeps existing -;; until the function ends. For rvals it duplicates the descriptor of -;; the spilled value; both copies refer to the same already-emitted -;; storage. CC-CONTRACTS §4.1: used for `lhs += rhs` and `++lhs` to -;; preserve the lhs across a `cg-load` so the subsequent `cg-assign` -;; still has its address. -(define (cg-dup cg) - (let ((p (cg-top cg))) (cg-push cg p) p)) - -;; -------------------------------------------------------------------- -;; Materialize -;; -------------------------------------------------------------------- -(define (cg-push-imm cg ctype value) - (cg-push cg (%opnd 'imm ctype value #f))) - -(define (cg-push-string cg bv-content) - (let* ((label (cg-intern-string cg bv-content)) - (cp-ty (%ctype 'ptr 8 8 %t-i8))) - (cg-push cg (%opnd 'global cp-ty label #f)))) - -(define (cg-push-sym cg sym) - (pmatch sym - (($ sym? (kind fn) (type ,ty) (name ,nm)) - (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #f))) - (($ sym? (kind enum-const) (type ,ty) (slot ,v)) - (cg-push cg (%opnd 'imm ty v #f))) - (($ sym? (kind var) (storage extern) (type ,ty) (name ,nm)) - (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #t))) - (($ sym? (kind var) (storage static) (type ,ty) (name ,nm)) - (cg-push cg (%opnd 'global ty (%cg-mangle-global nm) #t))) - (($ sym? (kind var) (type ,ty) (slot ,off)) - (cg-push cg (%opnd 'frame ty off #t))) - (($ sym? (kind param) (type ,ty) (slot ,off)) - (cg-push cg (%opnd 'frame ty off #t))) - (else (die #f "cg-push-sym: unsupported sym-kind" (sym-kind sym))))) - -;; A cg-push-deref result is a frame-lval whose slot HOLDS THE ADDRESS -;; (not the value). To distinguish from ordinary frame-lvals (whose -;; slot holds the value directly), we tag indirect slots in -;; %indirect-slots so cg-load and cg-assign can do the extra -;; indirection. -(define (%cg-mark-indirect! cg off) - (let ((cur (or (%cg-fn-get cg '%indirect-slots) '()))) - (%cg-fn-set! cg '%indirect-slots (cons off cur)))) - -(define (%cg-indirect? cg off) - (let ((cur (or (%cg-fn-get cg '%indirect-slots) '()))) - (let loop ((xs cur)) - (cond ((null? xs) #f) ((= (car xs) off) #t) (else (loop (cdr xs))))))) - -(define (cg-push-deref cg) - (let* ((p (cg-pop cg)) - (pt (opnd-type p)) - (pe (cond ((eq? (ctype-kind pt) 'ptr) (ctype-ext pt)) - ((eq? (ctype-kind pt) 'arr) (car (ctype-ext pt))) - (else #f)))) - (cond - ((not pe) (die #f "cg-push-deref: not a pointer" pt)) - (else - (%cg-load-opnd-into cg p 't0) - (let ((off (cg-alloc-slot cg 8 8))) - (%cg-emit-st-slot cg 't0 off) - (%cg-mark-indirect! cg off) - (cg-push cg (%opnd 'frame pe off #t))))))) - -;; -------------------------------------------------------------------- -;; Aggregate field access (§D.1–D.4) -;; -------------------------------------------------------------------- -;; cg-push-field cg fname: -;; pop a struct/union lval; look up `fname` in the struct's fields -;; list (data.scm: ext = (tag complete? fields), where each field -;; is (name-bv ctype offset)); push a new lval at the field's -;; offset with the field's ctype. -;; -;; Three input cases: -;; - direct frame lval at slot `off` -> frame lval at off+fo -;; - indirect frame lval (slot holds addr) -> new indirect slot for -;; addr+fo -;; - global lval at label L -> indirect slot for -;; la(L)+fo -;; In all cases the resulting lval has the field's ctype. - -(define (%cg-find-field fields fname) - (let loop ((xs fields)) - (cond - ((null? xs) #f) - ((bv= (car (car xs)) fname) (car xs)) - (else (loop (cdr xs)))))) - -(define (cg-push-field cg fname) - (let* ((s (cg-pop cg)) - (sty (opnd-type s)) - (k (ctype-kind sty))) - (cond - ((not (or (eq? k 'struct) (eq? k 'union))) - (die #f "cg-push-field: not a struct/union" k)) - ((not (opnd-lval? s)) - (die #f "cg-push-field: not an lvalue" k)) - (else - (let* ((fields (car (cddr (ctype-ext sty)))) - (f (%cg-find-field fields fname))) - (cond - ((not f) (die #f "cg-push-field: no such field" fname)) - (else - (let* ((fty (cadr f)) (fo (car (cddr f)))) - (pmatch s - ;; direct frame lval: just shift the slot offset. - (($ opnd? (kind frame) (ext ,off)) - (guard (not (%cg-indirect? cg off))) - (cg-push cg (%opnd 'frame fty (+ off fo) #t))) - ;; indirect frame lval: addr lives in the slot. Compute - ;; addr+fo into a new indirect slot. - (($ opnd? (kind frame) (ext ,off)) - (%cg-emit-ld-slot cg 't0 off) - (cond - ((> fo 0) - (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n")))) - (let ((no (cg-alloc-slot cg 8 8))) - (%cg-emit-st-slot cg 't0 no) - (%cg-mark-indirect! cg no) - (cg-push cg (%opnd 'frame fty no #t)))) - ;; global lval: load addr, add offset, indirect slot. - (($ opnd? (kind global) (ext ,lbl)) - (%cg-emit-la cg 't0 lbl) - (cond - ((> fo 0) - (%cg-emit-many cg (list "%addi(t0, t0, " (%n fo) ")\n")))) - (let ((no (cg-alloc-slot cg 8 8))) - (%cg-emit-st-slot cg 't0 no) - (%cg-mark-indirect! cg no) - (cg-push cg (%opnd 'frame fty no #t)))) - (else - (die #f "cg-push-field: unsupported lval kind" - (opnd-kind s)))))))))))) - -;; cg-decay-array: -;; if top of vstack is an arr-typed lval, replace it with a ptr-rval -;; to the first element. C arrays decay to T* in most contexts; -;; parse calls this before rval-style operations. No-op otherwise. -(define (cg-decay-array cg) - (let ((tp (cg-top cg))) - (cond - ((and (opnd-lval? tp) (eq? (ctype-kind (opnd-type tp)) 'arr)) - (let* ((p (cg-pop cg)) - (et (car (ctype-ext (opnd-type p)))) - (pty (%ctype 'ptr 8 8 et))) - (pmatch p - ;; direct frame lval: address is sp+off. - (($ opnd? (kind frame) (ext ,off)) - (guard (not (%cg-indirect? cg off))) - (%cg-emit-many cg (list "%mov(t0, sp)\n" - "%addi(t0, t0, " - (%cg-slot-expr cg off) ")\n")) - (%cg-spill-reg cg 't0 pty)) - ;; indirect frame lval (rare for arrays, but support it): - ;; the slot holds the address already. - (($ opnd? (kind frame) (ext ,off)) - (%cg-emit-ld-slot cg 't0 off) - (%cg-spill-reg cg 't0 pty)) - ;; global array: la(label) is the address. - (($ opnd? (kind global) (ext ,lbl)) - (%cg-emit-la cg 't0 lbl) - (%cg-spill-reg cg 't0 pty)) - (else (die #f "cg-decay-array: unsupported lval kind" - (opnd-kind p)))))) - (else tp)))) - -;; -------------------------------------------------------------------- -;; Address & deref -;; -------------------------------------------------------------------- -(define (cg-take-addr cg) - (let* ((p (cg-pop cg)) - (ty (opnd-type p)) - ;; &arr yields T(*)[N] per strict C. Pointer arithmetic on - ;; the result scales by sizeof(T[N]) (the whole array), so - ;; &arr + 1 is one-past-end. Array-to-pointer decay happens - ;; on use via cg-decay-array, not at the & operator. - (pty (%ctype 'ptr 8 8 ty))) - (pmatch p - (($ opnd? (lval? #f)) (die #f "cg-take-addr: not an lvalue")) - ;; The address itself lives at sp+slot — &*p degenerates to p. - (($ opnd? (kind frame) (ext ,off)) - (guard (%cg-indirect? cg off)) - (%cg-emit-ld-slot cg 't0 off) - (%cg-spill-reg cg 't0 pty)) - ;; %mov(rd, sp) gives the portable-sp pointer (the backend - ;; handles any hidden frame-header offset). Then add slot. - (($ opnd? (kind frame) (ext ,off)) - (%cg-emit-many cg (list "%mov(t0, sp)\n" - "%addi(t0, t0, " - (%cg-slot-expr cg off) ")\n")) - (%cg-spill-reg cg 't0 pty)) - (($ opnd? (kind global) (ext ,lbl)) - (%cg-emit-la cg 't0 lbl) - (%cg-spill-reg cg 't0 pty)) - (else (die #f "cg-take-addr: non-addressable" (opnd-kind p)))))) - -(define (cg-load cg) - (let* ((p (cg-pop cg)) (ty (opnd-type p))) - (cond - ((not (opnd-lval? p)) (die #f "cg-load: not an lvalue")) - ;; Array lvalues decay to a ptr-rval addressing the first - ;; element (C array-to-pointer decay). We push the lval back - ;; and route through cg-decay-array for a single source of truth. - ((eq? (ctype-kind ty) 'arr) - (cg-push cg p) (cg-decay-array cg)) - ((and (eq? (opnd-kind p) 'frame) - (%cg-indirect? cg (opnd-ext p))) - ;; Indirect frame-lval: slot holds the address. Stage the - ;; address in t2 so multi-byte gathers don't alias dest with - ;; base. - (%cg-emit-ld-slot cg 't2 (opnd-ext p)) - (%cg-emit-ld-typed cg 't0 ty 't2 0) - (%cg-spill-reg cg 't0 ty)) - (else (%cg-load-opnd-into cg p 't0) (%cg-spill-reg cg 't0 ty))))) - -;; -------------------------------------------------------------------- -;; Type conversions -;; -------------------------------------------------------------------- -(define (cg-cast cg to-type) - (let* ((p (cg-pop cg)) - (from-ty (opnd-type p)) - (from-sz (%ctype-size from-ty)) - (to-sz (%ctype-size to-type)) - (to-kind (ctype-kind to-type))) - (cond - ((eq? to-kind 'bool) - (%cg-load-opnd-into cg p 't0) - (%cg-emit-many cg (list - "%ifelse_eqz(t0, { %li(t0, 0) }, { %li(t0, 1) })\n")) - (%cg-spill-reg cg 't0 to-type)) - ((or (eq? to-kind 'ptr) - (and (or (eq? to-kind 'i64) (eq? to-kind 'u64)) - (or (eq? (ctype-kind from-ty) 'ptr) - (eq? (ctype-kind from-ty) 'arr)))) - (cg-push cg (%opnd (opnd-kind p) to-type (opnd-ext p) (opnd-lval? p)))) - ((>= to-sz from-sz) - (cg-push cg (%opnd (opnd-kind p) to-type (opnd-ext p) (opnd-lval? p)))) - (else - ;; Narrowing cast. Signed targets (i8/i16/i32) shli/sari to - ;; truncate-and-sign-extend in one step, so the slot holds the - ;; canonical 64-bit form and a subsequent widening cast (which - ;; is relabel-only) restores the value. Unsigned targets mask - ;; off high bits to zero-extend. - (%cg-load-opnd-into cg p 't0) - (cond - ((eq? to-kind 'i8) (%cg-emit-sext cg 't0 56)) - ((eq? to-kind 'i16) (%cg-emit-sext cg 't0 48)) - ((eq? to-kind 'i32) (%cg-emit-sext cg 't0 32)) - ((= to-sz 1) (%cg-emit-many cg (list "%andi(t0, t0, 255)\n"))) - ((= to-sz 2) - (%cg-emit-many cg (list "%li(t1, 65535)\n%and(t0, t0, t1)\n"))) - ((= to-sz 4) - (%cg-emit-many cg (list "%li(t1, 4294967295)\n%and(t0, t0, t1)\n"))) - (else 0)) - (%cg-spill-reg cg 't0 to-type))))) - -(define (cg-promote cg) - (let* ((p (cg-pop cg)) - (ty (opnd-type p)) - (sz (%ctype-size ty))) - (cond - ((< sz 4) - (cond - ((%ctype-unsigned? ty) - (cg-push cg (%opnd (opnd-kind p) %t-u32 (opnd-ext p) (opnd-lval? p)))) - (else - (cg-push cg (%opnd (opnd-kind p) %t-i32 (opnd-ext p) (opnd-lval? p)))))) - (else (cg-push cg p))))) - -(define (cg-arith-conv cg) - ;; Usual arithmetic conversions. CC-CONTRACTS §4.2: applies to - ;; arithmetic operands. When either operand is a pointer (or array, - ;; which behaves as a pointer in arithmetic), the pair is a - ;; pointer-arith case — leave the types alone so cg-binop can detect - ;; the ptr operand and apply the right scaling. - (let* ((b (cg-pop cg)) - (a (cg-pop cg)) - (ta (opnd-type a)) - (tb (opnd-type b)) - (sa (%ctype-size ta)) - (sb (%ctype-size tb))) - (cond - ;; Pointer/array arithmetic: leave types alone so cg-binop's - ;; ptr-aware add/sub branch fires with the correct pointee type - ;; (and doesn't see two pointers, which would skip scaling). - ((or (%ctype-ptr? ta) (%ctype-ptr? tb)) - (cg-push cg a) - (cg-push cg b)) - (else - (let ((common (cond - ((> sa sb) ta) - ((> sb sa) tb) - ((%ctype-unsigned? ta) ta) - ((%ctype-unsigned? tb) tb) - (else ta)))) - (cg-push cg (%opnd (opnd-kind a) common (opnd-ext a) (opnd-lval? a))) - (cg-push cg (%opnd (opnd-kind b) common (opnd-ext b) (opnd-lval? b)))))))) - -;; -------------------------------------------------------------------- -;; Operators -;; -------------------------------------------------------------------- -(define (%cg-emit-rrr cg op rd ra rb) - (%cg-emit-many cg (list "%" op "(" (%cg-reg->bv rd) ", " - (%cg-reg->bv ra) ", " (%cg-reg->bv rb) ")\n"))) - -(define (%cg-emit-cmp cg cc ra rb rd) - (%cg-emit-many cg (list "%ifelse_" cc "(" - (%cg-reg->bv ra) ", " (%cg-reg->bv rb) - ", { %li(" (%cg-reg->bv rd) ", 1) }, " - "{ %li(" (%cg-reg->bv rd) ", 0) })\n"))) - -(define (cg-binop cg op) - (let* ((b (cg-pop cg)) - (a (cg-pop cg)) - (ta (opnd-type a)) - (tb (opnd-type b)) - (unsigned? (or (%ctype-unsigned? ta) (%ctype-unsigned? tb))) - (a-ptr? (%ctype-ptr? ta)) - (b-ptr? (%ctype-ptr? tb)) - (result-ty - (cond - ((or (eq? op 'eq) (eq? op 'ne) - (eq? op 'lt) (eq? op 'le) (eq? op 'gt) (eq? op 'ge)) - %t-i32) - ((and a-ptr? b-ptr? (eq? op 'sub)) %t-i64) - (a-ptr? ta) - (b-ptr? tb) - (else ta)))) - (cond - ((and a-ptr? (or (eq? op 'add) (eq? op 'sub)) (not b-ptr?)) - (%cg-load-opnd-into cg a 'a0) - (%cg-load-opnd-into cg b 'a1) - (let ((sz (%ctype-size (%ctype-pointee ta)))) - (cond ((> sz 1) (%cg-emit-many cg (list "%li(t0, " (%n sz) ")\n")) - (%cg-emit-rrr cg "mul" 'a1 'a1 't0)) - (else 0))) - (%cg-emit-rrr cg (if (eq? op 'add) "add" "sub") 't0 'a0 'a1) - (%cg-spill-reg cg 't0 result-ty)) - ((and b-ptr? (eq? op 'add) (not a-ptr?)) - (%cg-load-opnd-into cg a 'a0) - (%cg-load-opnd-into cg b 'a1) - (let ((sz (%ctype-size (%ctype-pointee tb)))) - (cond ((> sz 1) (%cg-emit-many cg (list "%li(t0, " (%n sz) ")\n")) - (%cg-emit-rrr cg "mul" 'a0 'a0 't0)) - (else 0))) - (%cg-emit-rrr cg "add" 't0 'a0 'a1) - (%cg-spill-reg cg 't0 result-ty)) - ((and a-ptr? b-ptr? (eq? op 'sub)) - (%cg-load-opnd-into cg a 'a0) - (%cg-load-opnd-into cg b 'a1) - (%cg-emit-rrr cg "sub" 't0 'a0 'a1) - (let ((sz (%ctype-size (%ctype-pointee ta)))) - (cond ((> sz 1) (%cg-emit-many cg (list "%li(t1, " (%n sz) ")\n")) - (%cg-emit-rrr cg "div" 't0 't0 't1)) - (else 0))) - (%cg-spill-reg cg 't0 result-ty)) - (else - (%cg-load-opnd-into cg a 'a0) - (%cg-load-opnd-into cg b 'a1) - (cond - ((eq? op 'add) (%cg-emit-rrr cg "add" 't0 'a0 'a1)) - ((eq? op 'sub) (%cg-emit-rrr cg "sub" 't0 'a0 'a1)) - ((eq? op 'mul) (%cg-emit-rrr cg "mul" 't0 'a0 'a1)) - ((eq? op 'and) (%cg-emit-rrr cg "and" 't0 'a0 'a1)) - ((eq? op 'or) (%cg-emit-rrr cg "or" 't0 'a0 'a1)) - ((eq? op 'xor) (%cg-emit-rrr cg "xor" 't0 'a0 'a1)) - ((eq? op 'shl) (%cg-emit-rrr cg "shl" 't0 'a0 'a1)) - ((eq? op 'shr) - (if unsigned? (%cg-emit-rrr cg "shr" 't0 'a0 'a1) - (%cg-emit-rrr cg "sar" 't0 'a0 'a1))) - ((eq? op 'div) (%cg-emit-rrr cg "div" 't0 'a0 'a1)) - ((eq? op 'rem) (%cg-emit-rrr cg "rem" 't0 'a0 'a1)) - ((eq? op 'eq) (%cg-emit-cmp cg "eq" 'a0 'a1 't0)) - ((eq? op 'ne) (%cg-emit-cmp cg "ne" 'a0 'a1 't0)) - ((eq? op 'lt) (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a0 'a1 't0)) - ((eq? op 'gt) (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a1 'a0 't0)) - ((eq? op 'le) - (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a1 'a0 't0) - (%cg-emit-many cg (list "%li(t1, 1)\n%xor(t0, t0, t1)\n"))) - ((eq? op 'ge) - (%cg-emit-cmp cg (if unsigned? "ltu" "lt") 'a0 'a1 't0) - (%cg-emit-many cg (list "%li(t1, 1)\n%xor(t0, t0, t1)\n"))) - (else (die #f "cg-binop: unknown op" op))) - (%cg-spill-reg cg 't0 result-ty))))) - -;; Post-increment / post-decrement on the top-of-vstack lval. -;; Pushes the OLD value (per C semantics) and emits the +1 / -1 store. -;; Uses cg-dup + cg-load to capture the old rval (which is then in a -;; never-reused spill slot), then runs the regular dup+load+add+assign -;; pattern for the store. Pointer scaling falls out of cg-binop add. -(define (%cg-post-inc-dec cg op) - (cg-dup cg) - (cg-load cg) - (let ((old (cg-pop cg))) - (cg-dup cg) - (cg-load cg) - (cg-push-imm cg %t-i32 1) - (cg-binop cg op) - (cg-assign cg) - (cg-pop cg) - (cg-push cg old))) - -(define (cg-postinc cg) (%cg-post-inc-dec cg 'add)) -(define (cg-postdec cg) (%cg-post-inc-dec cg 'sub)) - -(define (cg-unop cg op) - (let* ((p (cg-pop cg)) (ty (opnd-type p))) - (%cg-load-opnd-into cg p 't0) - (cond - ((eq? op 'neg) - (%cg-emit-many cg (list "%li(t1, 0)\n%sub(t0, t1, t0)\n")) - (%cg-spill-reg cg 't0 ty)) - ((eq? op 'bnot) - (%cg-emit-many cg (list "%li(t1, -1)\n%xor(t0, t0, t1)\n")) - (%cg-spill-reg cg 't0 ty)) - ((eq? op 'lnot) - (%cg-emit-many cg (list "%ifelse_eqz(t0, { %li(t0, 1) }, { %li(t0, 0) })\n")) - (%cg-spill-reg cg 't0 %t-i32)) - (else (die #f "cg-unop: unknown op" op))))) - -(define (cg-assign cg) - ;; Pops rhs, pops lhs, casts rhs to lhs's type (parser cannot peek - ;; deeper than vstack top to do this itself — CC-CONTRACTS §4.2), - ;; emits the store, pushes the assigned value as the result rval. - (let* ((rhs0 (cg-pop cg)) - (lhs (cg-pop cg)) - (ty (opnd-type lhs))) - (cond ((not (opnd-lval? lhs)) (die #f "cg-assign: lhs not lvalue"))) - ;; Cast rhs to lhs's type (no-op when the types already match). - (cg-push cg rhs0) - (cg-cast cg ty) - (let ((rhs (cg-pop cg))) - (%cg-load-opnd-into cg rhs 'a0) - (pmatch lhs - (($ opnd? (kind frame) (ext ,off)) - (guard (%cg-indirect? cg off)) - (%cg-emit-ld-slot cg 't0 off) - (%cg-emit-st-typed cg 'a0 ty 't0 0)) - (($ opnd? (kind frame) (ext ,off)) - (%cg-emit-st-slot-typed cg 'a0 ty off)) - (($ opnd? (kind global) (ext ,lbl)) - (%cg-emit-la cg 't0 lbl) - (%cg-emit-st-typed cg 'a0 ty 't0 0)) - (else (die #f "cg-assign: unsupported lhs kind" (opnd-kind lhs)))) - (%cg-spill-reg cg 'a0 ty)))) - -;; -------------------------------------------------------------------- -;; Calls -;; -------------------------------------------------------------------- -(define (cg-call cg arity has-result?) - (let* ((args (let loop ((i 0) (acc '())) - (cond ((= i arity) acc) - (else (loop (+ i 1) (cons (cg-pop cg) acc)))))) - (fn-op (cg-pop cg))) - (let stage ((xs args) (idx 0)) - (cond - ((null? xs) 0) - ((< idx 4) - (%cg-load-opnd-into cg (car xs) (%reg-by-idx idx)) - (stage (cdr xs) (+ idx 1))) - (else - (%cg-load-opnd-into cg (car xs) 't0) - (%cg-emit-st cg 't0 'sp (* 8 (- idx 4))) - (stage (cdr xs) (+ idx 1))))) - (cond ((> arity 4) (%cg-bump-outgoing! cg (- arity 4))) (else 0)) - (cond - ((and (eq? (opnd-kind fn-op) 'global) (not (opnd-lval? fn-op))) - (%cg-emit-many cg (list "%call(&" (opnd-ext fn-op) ")\n"))) - (else - (%cg-load-opnd-into cg fn-op 't0) - (%cg-emit-many cg (list "%callr(t0)\n")))) - (cond - (has-result? - (let* ((fty (opnd-type fn-op)) - (rty (cond - ((eq? (ctype-kind fty) 'fn) (car (ctype-ext fty))) - ((eq? (ctype-kind fty) 'ptr) - (let ((p (ctype-ext fty))) - (if (eq? (ctype-kind p) 'fn) (car (ctype-ext p)) %t-i64))) - (else %t-i64)))) - (%cg-spill-reg cg 'a0 rty))) - (else #f)))) - -;; -------------------------------------------------------------------- -;; Return -;; -------------------------------------------------------------------- -(define (cg-return cg) - (let ((ret-slot (%cg-fn-get cg '%fn-ret-slot)) - (ret-type (%cg-fn-get cg '%fn-ret-type))) - (cond - ((eq? (ctype-kind ret-type) 'void) - (%cg-emit-many cg (list "%b(&::ret)\n"))) - (else - (let ((p (cg-pop cg))) - (%cg-load-opnd-into cg p 'a0) - (%cg-emit-st-slot cg 'a0 ret-slot) - (%cg-emit-many cg (list "%b(&::ret)\n"))))))) - -;; -------------------------------------------------------------------- -;; Structured control flow -;; -------------------------------------------------------------------- -(define (cg-if cg then-thunk) - (let ((p (cg-pop cg))) - (%cg-load-opnd-into cg p 't0) - (%cg-emit-many cg (list "%if_nez(t0, {\n")) - (then-thunk) - (%cg-emit-many cg (list "})\n")))) - -(define (cg-ifelse cg then-thunk else-thunk) - (let ((p (cg-pop cg))) - (%cg-load-opnd-into cg p 't0) - (%cg-emit-many cg (list "%ifelse_nez(t0, {\n")) - (then-thunk) - (%cg-emit-many cg (list "}, {\n")) - (else-thunk) - (%cg-emit-many cg (list "})\n")))) - -;; Conditionals-as-values: `cg-ifelse` is correct for if-statements -;; (thunks push nothing) but each thunk for ternary / `&&` / `||` ends -;; with one rval on top of the vstack — and after both branches run, -;; we'd be left with TWO opnds, which breaks the type contract for -;; the surrounding expression. `cg-ifelse-merge` solves that: pop the -;; cond, allocate one result slot, and after each thunk runs, pop its -;; rval and store into the slot. Push the slot as one frame rval. -;; Both branches must push exactly one opnd; the result type is the -;; type of the first thunk's pushed opnd (parser must arrange for -;; both branches to push compatible types — either by passing -;; pre-coerced operands or by injecting a `cg-cast` inside the thunk). -(define (cg-ifelse-merge cg then-thunk else-thunk) - (let* ((cond-op (cg-pop cg)) - (slot (cg-alloc-slot cg 8 8))) - (%cg-load-opnd-into cg cond-op 't0) - (%cg-emit-many cg (list "%ifelse_nez(t0, {\n")) - (then-thunk) - (let* ((p (cg-pop cg)) - (rty (opnd-type p))) - (%cg-load-opnd-into cg p 'a0) - (%cg-emit-st-slot cg 'a0 slot) - (%cg-emit-many cg (list "}, {\n")) - (else-thunk) - (let ((q (cg-pop cg))) - (%cg-load-opnd-into cg q 'a0) - (%cg-emit-st-slot cg 'a0 slot)) - (%cg-emit-many cg (list "})\n")) - (cg-push cg (%opnd 'frame rty slot #f))))) - -(define (cg-loop cg head-thunk body-thunk) - ;; body-thunk receives the loop tag as its argument; parser uses - ;; that tag for cg-break / cg-continue inside the body. CC-CONTRACTS - ;; §1.9 / §3.3. - (let ((tag (%cg-fresh-loop-tag cg))) - (%cg-emit-many cg (list "%loop_tag(" tag ", {\n")) - (head-thunk) - (cond - ((zero? (cg-depth cg)) 0) - (else - (let ((c (cg-pop cg))) - (%cg-load-opnd-into cg c 't0) - (%cg-emit-many cg (list "%if_eqz(t0, { %break(" tag ") })\n"))))) - (body-thunk tag) - (%cg-emit-many cg (list "})\n")) - tag)) - -(define (cg-break cg tag) - (%cg-emit-many cg (list "%break(" tag ")\n"))) - -(define (cg-continue cg tag) - (%cg-emit-many cg (list "%continue(" tag ")\n"))) - -;; -------------------------------------------------------------------- -;; Variadic receive (§G.2). Layout: cg-fn-begin/v reserves a 4-slot -;; saved-register area at known frame offsets; va_start sets ap to the -;; address of the first slot past the named-arg count; va_arg reads -;; *ap, advances ap by 8, and pushes the value as the requested type. -;; -;; ap is an lval (typically a `va_list` local). cg-va-start pops it, -;; computes the address, stores into *ap (or the slot directly), and -;; pushes nothing. cg-va-arg pops ap-lval, loads ap, dereferences for -;; the value, advances ap, stores back, pushes the loaded value. -;; -;; Limitation: only first 4 incoming args (named + variadic) live in -;; the save area; variadic args at index >= 4 need LDARG and are not -;; yet supported. See punchlist §G.2 for the gap. -;; -------------------------------------------------------------------- -(define (%cg-vararg-first-slot cg) - (let ((s (%cg-fn-get cg '%fn-vararg-first-slot))) - (cond ((not s) (die #f "cg-va-start: not a variadic function")) - (else s)))) - -(define (cg-va-start cg) - ;; Pop ap-lval. Materialize "&sp + vararg-first-slot" into a0, - ;; store through ap-lval. Pushes nothing. - (let* ((ap-lv (cg-pop cg)) - (vsl (%cg-vararg-first-slot cg))) - (cond ((not (opnd-lval? ap-lv)) - (die #f "cg-va-start: ap not lvalue"))) - ;; Compute address into a0. - (%cg-emit-many cg (list "%mov(a0, sp)\n" - "%addi(a0, a0, " - (%cg-slot-expr cg vsl) ")\n")) - ;; Store a0 at ap-lval. - (cond - ((eq? (opnd-kind ap-lv) 'frame) - (cond - ((%cg-indirect? cg (opnd-ext ap-lv)) - (%cg-emit-ld-slot cg 't0 (opnd-ext ap-lv)) - (%cg-emit-st cg 'a0 't0 0)) - (else (%cg-emit-st-slot cg 'a0 (opnd-ext ap-lv))))) - ((eq? (opnd-kind ap-lv) 'global) - (%cg-emit-la cg 't0 (opnd-ext ap-lv)) - (%cg-emit-st cg 'a0 't0 0)) - (else (die #f "cg-va-start: bad ap kind" (opnd-kind ap-lv)))))) - -(define (cg-va-arg cg ctype) - ;; Pop ap-lval. Load ap into a0. Read 8 bytes at [a0] into a1. - ;; Advance a0 by 8 and store back through ap-lval. Push a1 as rval - ;; of type ctype (caller cg-cast's if needed). - (let ((ap-lv (cg-pop cg))) - (cond ((not (opnd-lval? ap-lv)) - (die #f "cg-va-arg: ap not lvalue"))) - ;; Load ap into a0. - (pmatch ap-lv - (($ opnd? (kind frame) (ext ,off)) - (guard (%cg-indirect? cg off)) - (%cg-emit-ld-slot cg 't0 off) - (%cg-emit-ld cg 'a0 't0 0)) - (($ opnd? (kind frame) (ext ,off)) - (%cg-emit-ld-slot cg 'a0 off)) - (($ opnd? (kind global) (ext ,lbl)) - (%cg-emit-la cg 't0 lbl) - (%cg-emit-ld cg 'a0 't0 0)) - (else (die #f "cg-va-arg: bad ap kind" (opnd-kind ap-lv)))) - ;; Load value at [a0] into a1 (full 8 bytes; cg-cast on the rval - ;; the caller pushes will narrow if needed). - (%cg-emit-ld cg 'a1 'a0 0) - ;; Advance ap by 8. - (%cg-emit-many cg (list "%addi(a0, a0, 8)\n")) - ;; Store advanced ap back. - (pmatch ap-lv - (($ opnd? (kind frame) (ext ,off)) - (guard (%cg-indirect? cg off)) - (%cg-emit-ld-slot cg 't0 off) - (%cg-emit-st cg 'a0 't0 0)) - (($ opnd? (kind frame) (ext ,off)) - (%cg-emit-st-slot cg 'a0 off)) - (($ opnd? (kind global) (ext ,lbl)) - (%cg-emit-la cg 't0 lbl) - (%cg-emit-st cg 'a0 't0 0)) - (else 0)) - ;; Spill the loaded value (a1) to a fresh frame slot under ctype. - (%cg-spill-reg cg 'a1 ctype))) - -(define (cg-va-end cg) - ;; va_end is a no-op in this design. Pop and discard ap-lval. - (cg-pop cg) - 0) - -;; -------------------------------------------------------------------- -;; Labels and unconditional goto (§F.4 / CC-CONTRACTS §5.3). -;; user_<name> namespace keeps the user's label space disjoint from -;; the compiler-internal ::ret and ::lbl_<n>. Labels resolve through -;; libp1pp's %scope mechanism, so forward references inside the same -;; %fn block work without explicit forward declaration. -;; -------------------------------------------------------------------- -(define (cg-emit-label cg name-bv) - (%cg-emit-many cg (list "::user_" name-bv "\n"))) - -(define (cg-goto cg name-bv) - (%cg-emit-many cg (list "%b(&::user_" name-bv ")\n"))) - -;; -------------------------------------------------------------------- -;; switch -;; -------------------------------------------------------------------- -(define-record-type swctx - (%swctx ctrl-slot end-tag default-lbl) - swctx? - (ctrl-slot swctx-ctrl-slot) - (end-tag swctx-end-tag) - (default-lbl swctx-default-lbl swctx-default-lbl-set!)) - -(define (cg-switch-begin cg) - (let* ((p (cg-pop cg)) - (off (cg-alloc-slot cg 8 8)) - (tag (%cg-fresh-loop-tag cg)) - (disp-lbl (bytevector-append "sw_disp_" tag))) - (%cg-load-opnd-into cg p 't0) - (%cg-emit-st-slot cg 't0 off) - (%cg-emit-many cg (list "%loop_tag(" tag ", {\n" - "%b(&::" disp-lbl ")\n")) - (%swctx off tag #f))) - -(define (cg-switch-case cg sw const-int) - (let* ((lbl (%cg-fresh-lbl cg)) - (key (string->symbol - (bytevector-append "%sw_cases__" (swctx-end-tag sw)))) - (cur (or (%cg-fn-get cg key) '())) - (entry (cons const-int lbl))) - (%cg-fn-set! cg key (cons entry cur)) - (%cg-emit-many cg (list "::" lbl "\n")))) - -(define (cg-switch-default cg sw) - (let ((lbl (%cg-fresh-lbl cg))) - (swctx-default-lbl-set! sw lbl) - (%cg-emit-many cg (list "::" lbl "\n")))) - -(define (cg-switch-end cg sw) - (let* ((tag (swctx-end-tag sw)) - (key (string->symbol (bytevector-append "%sw_cases__" tag))) - (cases (reverse (or (%cg-fn-get cg key) '()))) - (default-lbl (swctx-default-lbl sw)) - (disp-lbl (bytevector-append "sw_disp_" tag))) - (%cg-emit-many cg (list "%break(" tag ")\n" - "::" disp-lbl "\n")) - (%cg-emit-many cg (list "%ld(t0, sp, " - (%cg-slot-expr cg (swctx-ctrl-slot sw)) ")\n")) - (for-each - (lambda (c) - (%cg-emit-many cg (list "%li(t1, " (%n (car c)) ")\n" - "%beq(t0, t1, &::" (cdr c) ")\n"))) - cases) - (cond - (default-lbl (%cg-emit-many cg (list "%b(&::" default-lbl ")\n"))) - (else 0)) - (%cg-emit-many cg (list "%break(" tag ")\n" - "})\n")))) - -;; -------------------------------------------------------------------- -;; Globals and data -;; -------------------------------------------------------------------- -;; cg-emit-global: emit a global symbol into either .data (initialized) -;; or .bss (zero-init). -;; -;; init can be: -;; #f — zero-init in .bss (size from sym's ctype). -;; (piece ...) — initialized in .data; pieces concatenated. -;; -;; Each piece is either: -;; <bytevector> — raw bytes; emitted as N×!(byte) entries. -;; (label-ref . <label-bv>) — 8-byte pointer slot containing &label; -;; emitted as `&<label> %(0)` (4B label ref + -;; 4B zero pad). -(define (%cg-init-piece->bv piece) - (cond - ((bytevector? piece) - (let ((n (bytevector-length piece))) - (let loop ((i 0) (acc '())) - (cond - ((= i n) (bv-cat (reverse acc))) - (else - (loop (+ i 1) - (cons (bv-cat (list "!(" - (number->string - (bytevector-u8-ref piece i) 10) - ")\n")) - acc))))))) - ((and (pair? piece) (eq? (car piece) 'label-ref)) - (bv-cat (list "&" (cdr piece) " %(0)\n"))) - (else (die #f "cg-emit-global: bad init piece" piece)))) - -(define (cg-emit-global cg sym init) - (let* ((nm (sym-name sym)) - (lbl (%cg-mangle-global nm)) - (sz (ctype-size (sym-type sym))) - (size (if (< sz 0) 8 sz))) - (cond - (init - (buf-push! (cg-data cg) (bv-cat (list "\n:" lbl "\n"))) - (let walk ((ps init)) - (cond - ((null? ps) 0) - (else - (buf-push! (cg-data cg) (%cg-init-piece->bv (car ps))) - (walk (cdr ps)))))) - (else - (buf-push! (cg-bss cg) - (bv-cat (list "\n:" lbl "\n" - (let zero-loop ((rem size) (acc '())) - (cond - ((<= rem 0) (bv-cat (reverse acc))) - ((>= rem 8) - (zero-loop (- rem 8) (cons "$(0)\n" acc))) - (else - (zero-loop (- rem 1) (cons "!(0)\n" acc)))))))))) - (cg-globals-set! cg (alist-set (sym-name sym) sym (cg-globals cg))) - 0)) - -(define (cg-emit-extern cg sym) - (cg-globals-set! cg (alist-set (sym-name sym) sym (cg-globals cg))) - 0) - -(define (cg-intern-string cg bv-content) - (let ((p (alist-ref bv-content (cg-str-pool cg)))) - (cond - (p p) - (else - (let* ((n (length (cg-str-pool cg))) - (lbl (bytevector-append "cc__str_" (%n n)))) - (cg-str-pool-set! cg - (alist-set bv-content lbl (cg-str-pool cg))) - (buf-push! (cg-data cg) - (bv-cat (list "\n:" lbl "\n" - "\"" bv-content "\"\n" - "!(0)\n"))) - lbl))))) - -;; -------------------------------------------------------------------- -;; Frame -;; -------------------------------------------------------------------- -(define (cg-alloc-slot cg bytes align) - (let* ((aligned (align-up (cg-frame-hi cg) align)) - (new-hi (+ aligned bytes))) - (cg-frame-hi-set! cg new-hi) - aligned)) diff --git a/cc/data.scm b/cc/data.scm @@ -1,239 +0,0 @@ -;; cc/data.scm — record types and symbol alphabets shared across modules. -;; -;; Concrete realization of: -;; docs/CC-INTERNALS.md §data.scm -;; docs/CC-CONTRACTS.md §1 -;; -;; Adding a record or alphabet symbol requires updating the contract -;; doc first. - -;; -------------------------------------------------------------------- -;; loc — source location for diagnostics -;; -------------------------------------------------------------------- -(define-record-type loc - (%loc file line col) - loc? - (file loc-file) ; bv - (line loc-line) ; fixnum - (col loc-col)) ; fixnum - -;; -------------------------------------------------------------------- -;; tok — lexer token. See CC-CONTRACTS §1.1 for kind set, §1.2 for -;; PUNCT value symbols, §1.3 for KW value symbols. -;; -------------------------------------------------------------------- -(define-record-type tok - (%tok kind value loc hide) - tok? - (kind tok-kind) ; symbol from §1.1 - (value tok-value) ; bv | fixnum | symbol | #f - (loc tok-loc) ; loc - (hide tok-hide)) ; list of bv (macro names already expanded) - -(define (make-tok kind value loc) - (%tok kind value loc '())) - -;; -------------------------------------------------------------------- -;; macro — preprocessor macro definition -;; -------------------------------------------------------------------- -(define-record-type macro - (%macro kind params body) - macro? - (kind macro-kind) ; 'obj | 'fn | 'fn-vararg - (params macro-params) ; list of bv - (body macro-body)) ; list of tok - -;; -------------------------------------------------------------------- -;; ctype — C type. See CC-CONTRACTS §1.4 for kind set, and -;; CC-INTERNALS §data.scm for the ext payload table. -;; -;; Fields that mutate over a ctype's lifetime: -;; size and align — set to -1/-1 on forward struct/union decl, -;; fixed when the type is completed. -;; ext — same; struct/union ext changes shape when -;; the body is parsed. -;; -------------------------------------------------------------------- -(define-record-type ctype - (%ctype kind size align ext) - ctype? - (kind ctype-kind) - (size ctype-size ctype-size-set!) - (align ctype-align ctype-align-set!) - (ext ctype-ext ctype-ext-set!)) - -;; Interned primitive ctypes (CC-CONTRACTS §1.4). Equality is eq?. -(define %t-void (%ctype 'void -1 -1 #f)) -(define %t-i8 (%ctype 'i8 1 1 #f)) -(define %t-u8 (%ctype 'u8 1 1 #f)) -(define %t-i16 (%ctype 'i16 2 2 #f)) -(define %t-u16 (%ctype 'u16 2 2 #f)) -(define %t-i32 (%ctype 'i32 4 4 #f)) -(define %t-u32 (%ctype 'u32 4 4 #f)) -(define %t-i64 (%ctype 'i64 8 8 #f)) -(define %t-u64 (%ctype 'u64 8 8 #f)) -(define %t-bool (%ctype 'bool 1 1 #f)) - -;; -------------------------------------------------------------------- -;; sym — declared identifier (function, variable, typedef, …) -;; See CC-CONTRACTS §1.7 (kind), §1.8 (storage). -;; -------------------------------------------------------------------- -(define-record-type sym - (%sym name kind storage type slot) - sym? - (name sym-name) ; bv - (kind sym-kind) ; symbol from §1.7 - (storage sym-storage) ; symbol from §1.8 or #f - (type sym-type) ; ctype - (slot sym-slot)) ; fixnum | bv | #f, per kind - -;; -------------------------------------------------------------------- -;; opnd — operand on cg's vstack. See CC-CONTRACTS §1.5 (kind), -;; §1.10 (reg names). -;; -------------------------------------------------------------------- -(define-record-type opnd - (%opnd kind type ext lval?) - opnd? - (kind opnd-kind) - (type opnd-type) - (ext opnd-ext) - (lval? opnd-lval?)) - -;; -------------------------------------------------------------------- -;; loop-ctx — entry on parser's loop/switch context stack. -;; See CC-CONTRACTS §1.9. -;; -------------------------------------------------------------------- -(define-record-type loop-ctx - (%loop-ctx kind tag has-continue?) - loop-ctx? - (kind loop-ctx-kind) - (tag loop-ctx-tag) - (has-continue? loop-ctx-has-continue?)) - -;; -------------------------------------------------------------------- -;; fn-ctx — current-function context inside the parser. -;; -------------------------------------------------------------------- -(define-record-type fn-ctx - (%fn-ctx name return-type params variadic? labels) - fn-ctx? - (name fn-ctx-name) - (return-type fn-ctx-return-type) - (params fn-ctx-params) - (variadic? fn-ctx-variadic?) - (labels fn-ctx-labels fn-ctx-labels-set!)) - -;; -------------------------------------------------------------------- -;; pstate — parser state. Owned by parse.scm; read-only to cg. -;; -------------------------------------------------------------------- -(define-record-type pstate - (%pstate toks scope tags loops fn-ctx typedefs cg) - pstate? - (toks ps-toks ps-toks-set!) - (scope ps-scope ps-scope-set!) - (tags ps-tags ps-tags-set!) - (loops ps-loops ps-loops-set!) - (fn-ctx ps-fn-ctx ps-fn-ctx-set!) - (typedefs ps-typedefs ps-typedefs-set!) - (cg ps-cg)) - -;; -------------------------------------------------------------------- -;; cg — codegen state. Owned by cg.scm. -;; -------------------------------------------------------------------- -;; fn-buf and prologue-buf are pre-allocated (cg-init) and reused across -;; functions — cg-fn-begin/v calls buf-reset! on them, cg-fn-end drains -;; them into cg-text via buf-drain!. No per-fn allocation, which is what -;; lets parse-fn-body wrap the body in heap-mark/heap-rewind! safely: -;; the destination buf storage lives pre-mark, byte writes are stable -;; across rewind, and the parse/cg scratch dies cleanly. -;; -;; in-fn? discriminates "currently inside a function body" so -;; %cg-emit-buf can route emits to fn-buf during the body and cg-text -;; outside it (entry stub, etc.). -;; cg-globals: user-visible globals only (cg-emit-global / cg-emit-extern). -;; Stable except when user code adds a global — which is exactly what the -;; parse-fn-body rewind-safety check probes. -;; -;; cg-fn-meta: transient per-function state (fn-name, ret-slot, ret-type, -;; vararg-first-slot, indirect-slots, switch-case lists, ...). Reset on -;; cg-fn-begin/v; reads via %cg-fn-get / writes via %cg-fn-set!. Kept -;; out of cg-globals so rewind-safety checks on cg-globals aren't -;; tripped by every fn-begin. -(define-record-type cg - (%cg text data bss vstack frame-hi label-ctr str-pool globals fn-meta fn-buf prologue-buf max-outgoing in-fn?) - cg? - (text cg-text) - (data cg-data) - (bss cg-bss) - (vstack cg-vstack cg-vstack-set!) - (frame-hi cg-frame-hi cg-frame-hi-set!) - (label-ctr cg-label-ctr cg-label-ctr-set!) - (str-pool cg-str-pool cg-str-pool-set!) - (globals cg-globals cg-globals-set!) - (fn-meta cg-fn-meta cg-fn-meta-set!) - (fn-buf cg-fn-buf) - (prologue-buf cg-prologue-buf) - (max-outgoing cg-max-outgoing cg-max-outgoing-set!) - (in-fn? cg-in-fn? cg-in-fn?-set!)) - -;; -------------------------------------------------------------------- -;; Symbol alphabets — canonical alists. See CC-CONTRACTS §1. -;; -------------------------------------------------------------------- - -;; CC-CONTRACTS §1.3 — keyword bytevector → keyword symbol. -(define %keyword-alist - '(;; storage - ("auto" . auto) ("register" . register) ("static" . static) - ("extern" . extern) ("typedef" . typedef) - ;; qualifiers (parsed and discarded by parse) - ("const" . const) ("volatile" . volatile) ("restrict" . restrict) - ("inline" . inline) - ;; type specifiers - ("void" . void) ("char" . char) ("short" . short) - ("int" . int) ("long" . long) - ("signed" . signed) ("unsigned" . unsigned) ("_Bool" . _Bool) - ;; rejected type specifiers (KW so diagnostics are crisp) - ("float" . float) ("double" . double) - ;; aggregates - ("struct" . struct) ("union" . union) ("enum" . enum) - ;; statements - ("if" . if) ("else" . else) - ("while" . while) ("do" . do) ("for" . for) - ("switch" . switch) ("case" . case) ("default" . default) - ("break" . break) ("continue" . continue) - ("return" . return) ("goto" . goto) - ;; operators - ("sizeof" . sizeof) - ;; reserved-and-rejected (KW so diagnostics are crisp) - ("_Generic" . _Generic) ("_Atomic" . _Atomic) - ("_Thread_local" . _Thread_local) - ("_Alignof" . _Alignof) ("_Alignas" . _Alignas) - ("_Static_assert" . _Static_assert) - ("_Complex" . _Complex) ("_Imaginary" . _Imaginary))) - -;; CC-CONTRACTS §1.2 — punctuator bytevector → punct symbol. -;; Listed longest-match-first; the lexer scans this list in order. -;; Digraphs (<: :> <% %> %: %:%:) lex to their standard equivalents. -(define %punct-alist - '(;; 4-byte - ("%:%:" . paste) - ;; 3-byte - ("..." . ellipsis) ("<<=" . shl-eq) (">>=" . shr-eq) - ;; 2-byte - ("##" . paste) ("->" . arrow) - ("++" . inc) ("--" . dec) - ("<<" . shl) (">>" . shr) - ("<=" . le) (">=" . ge) ("==" . eq2) ("!=" . ne) - ("&&" . land) ("||" . lor) - ("+=" . plus-eq) ("-=" . minus-eq) ("*=" . star-eq) - ("/=" . slash-eq) ("%=" . pct-eq) - ("&=" . amp-eq) ("^=" . caret-eq) ("|=" . bar-eq) - ;; digraphs (mapped to the standard equivalent symbol) - ("<:" . lbrack) (":>" . rbrack) - ("<%" . lbrace) ("%>" . rbrace) ("%:" . hash) - ;; 1-byte - ("[" . lbrack) ("]" . rbrack) - ("(" . lparen) (")" . rparen) - ("{" . lbrace) ("}" . rbrace) - ("." . dot) ("," . comma) (";" . semi) (":" . colon) ("?" . qmark) - ("+" . plus) ("-" . minus) ("*" . star) ("/" . slash) ("%" . pct) - ("&" . amp) ("|" . bar) ("^" . caret) ("~" . tilde) ("!" . bang) - ("<" . lt) (">" . gt) ("=" . assign) - ("#" . hash))) diff --git a/cc/lex.scm b/cc/lex.scm @@ -1,900 +0,0 @@ -;; cc/lex.scm — bytestream → token list. Pure function; no I/O, -;; no macro awareness. -;; -;; Realization of docs/CC-INTERNALS.md §lex.scm. Symbol alphabets -;; (KW, PUNCT, tok-kind) live in cc/data.scm; do not duplicate. -;; -;; Owner: <unassigned> -;; -;; Implementation notes: -;; -;; - The lexer walks `src` byte-by-byte, threading (pos, line, col) -;; explicitly through every helper (no mutable state). Each token -;; captures its starting loc; helpers return (tok npos nline ncol). -;; - Trigraphs and `\<newline>` line splicing are handled via a single -;; logical-byte primitive `%lex-peek`: it advances over splices and -;; translates trigraphs in-place, so downstream code only ever sees -;; the "translation phase 2" stream. -;; - Comments are stripped at the same level as whitespace. -;; - NL tokens are emitted at every physical newline so pp can use -;; them to terminate directives. -;; -;; Heap discipline (per tests/scheme1/93-heap-mark-rewind.scm): -;; -;; - Token-producing helpers wrap their inner work in a heap-mark / -;; heap-rewind! arena. The slots that must survive the rewind -;; (start-loc and the integer holders for npos/nline/ncol) are bound -;; *before* the (set! mark (heap-mark)) so the let's env extensions -;; live below the mark. The byte-run scanners' tail-call env frames -;; and any %lex-peek 4-lists are above the mark and get reclaimed. -;; For helpers that produce a fresh bytevector (ident, string), the -;; bv is allocated post-rewind so it persists into the parent arena. -;; - Numeric digit runs accumulate their value inline via -;; %accum-int-while; they no longer materialize a per-byte cons list -;; and then a separate %digits-value walk. - -;; -------------------------------------------------------------------- -;; Byte-class predicates (raw u8 values, not chars). -;; -------------------------------------------------------------------- -(define (%digit? b) (if (< b 48) #f (if (< 57 b) #f #t))) ; '0'..'9' -(define (%hex? b) - (cond ((%digit? b) #t) - ((if (< b 65) #f (if (< 70 b) #f #t)) #t) ; 'A'..'F' - ((if (< b 97) #f (if (< 102 b) #f #t)) #t) ; 'a'..'f' - (else #f))) -(define (%octal? b) (if (< b 48) #f (if (< 55 b) #f #t))) ; '0'..'7' -(define (%alpha? b) - (cond ((if (< b 65) #f (if (< 90 b) #f #t)) #t) ; 'A'..'Z' - ((if (< b 97) #f (if (< 122 b) #f #t)) #t) ; 'a'..'z' - (else #f))) -(define (%ident-start? b) (or (%alpha? b) (= b 95))) ; '_' -(define (%ident-cont? b) (or (%ident-start? b) (%digit? b))) -(define (%hspace? b) (or (= b 32) (= b 9) (= b 11) (= b 12))) ; SP TAB VT FF -(define (%newline? b) (= b 10)) ; '\n' - -;; -------------------------------------------------------------------- -;; Logical byte access. %lex-peek returns -;; (byte npos nline ncol) -;; where (npos, nline, ncol) points *just past* the consumed physical -;; bytes. On EOF it returns (#f pos line col). -;; -;; Two transformations folded in here: -;; -;; - Trigraphs: ??= ??( ??/ ??) ??' ??< ??! ??> ??- -;; # [ \ ] ^ { | } ~ -;; The pair `??` followed by one of the nine trigraph completers -;; produces the translated byte and advances 3 source bytes. -;; - Line splice: a backslash immediately followed by `\n` is removed -;; as a unit (incrementing line, resetting col to 1) and we recurse -;; to fetch the next logical byte. -;; -;; Other escapes (e.g. `\<not-newline>`) are returned as-is — string and -;; char literals do their own escape-handling. -;; -------------------------------------------------------------------- -(define (%trigraph-byte b) - ;; Map the third trigraph byte to its replacement, or #f. - (cond ((= b 61) 35) ; '=' -> '#' - ((= b 40) 91) ; '(' -> '[' - ((= b 47) 92) ; '/' -> '\\' - ((= b 41) 93) ; ')' -> ']' - ((= b 39) 94) ; '\'' -> '^' - ((= b 60) 123) ; '<' -> '{' - ((= b 33) 124) ; '!' -> '|' - ((= b 62) 125) ; '>' -> '}' - ((= b 45) 126) ; '-' -> '~' - (else #f))) - -(define (%lex-peek src pos line col) - (let ((n (bytevector-length src))) - (cond - ((>= pos n) (list #f pos line col)) - (else - (let ((b (bytevector-u8-ref src pos))) - (cond - ;; Trigraph: ?? + completer - ((and (= b 63) - (< (+ pos 2) n) - (= (bytevector-u8-ref src (+ pos 1)) 63)) - (let ((tr (%trigraph-byte (bytevector-u8-ref src (+ pos 2))))) - (if tr - (list tr (+ pos 3) line (+ col 3)) - (list b (+ pos 1) line (+ col 1))))) - ;; Line splice: backslash + newline (consume both, no token) - ((and (= b 92) - (< (+ pos 1) n) - (= (bytevector-u8-ref src (+ pos 1)) 10)) - (%lex-peek src (+ pos 2) (+ line 1) 1)) - ;; Newline: pass through but caller decides line/col bump - ((%newline? b) - (list b (+ pos 1) (+ line 1) 1)) - (else - (list b (+ pos 1) line (+ col 1))))))))) - -;; Convenience accessors over the 4-list. -(define (%pk-byte p) (car p)) -(define (%pk-pos p) (car (cdr p))) -(define (%pk-line p) (car (cdr (cdr p)))) -(define (%pk-col p) (car (cdr (cdr (cdr p))))) - -;; Fast-byte test. When (%fast-byte? b) is #t, reading b directly with -;; bytevector-u8-ref is exactly equivalent to %lex-peek's result: the -;; logical byte is b, npos = pos+1, nline unchanged, ncol = col+1, and -;; no list allocation is needed. Excludes the three bytes that %lex-peek -;; can transform: '?' (trigraph), '\\' (line splice), '\n' (line bump). -(define (%fast-byte? b) - (cond ((= b 63) #f) - ((= b 92) #f) - ((= b 10) #f) - (else #t))) - -;; -------------------------------------------------------------------- -;; Whitespace + comment skipper. Returns (pos line col). -;; Handles spaces/tabs, // line comments, /* block */ comments. Does -;; *not* consume `\n` — newlines are tokens. -;; -------------------------------------------------------------------- -(define (%skip-ws-and-comments src pos line col file) - (let ((n (bytevector-length src))) - (cond - ((>= pos n) (list pos line col)) - (else - (let ((b (bytevector-u8-ref src pos))) - (cond - ((and (%fast-byte? b) (%hspace? b)) - (%skip-ws-and-comments src (+ pos 1) line (+ col 1) file)) - ((%fast-byte? b) - ;; Fast-byte that isn't hspace. Only '/' is interesting; - ;; everything else terminates the skip. - (cond - ((= b 47) (%maybe-comment src pos line col file)) - (else (list pos line col)))) - (else - ;; Slow path: trigraph / splice / newline. - (let* ((p (%lex-peek src pos line col)) - (b2 (%pk-byte p))) - (cond - ((not b2) (list pos line col)) - ((%hspace? b2) - (%skip-ws-and-comments src (%pk-pos p) (%pk-line p) (%pk-col p) - file)) - ((= b2 47) (%maybe-comment src pos line col file)) - (else (list pos line col))))))))))) - -(define (%maybe-comment src pos line col file) - ;; Source byte at pos resolves to '/'. Decide between // line comment, - ;; /* block comment, or "leave the slash alone" (it's a punctuator). - (let* ((p (%lex-peek src pos line col)) - (q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) - (b2 (%pk-byte q))) - (cond - ((and b2 (= b2 47)) - (%skip-line-comment src (%pk-pos q) (%pk-line q) (%pk-col q) file)) - ((and b2 (= b2 42)) - (%skip-block-comment src (%pk-pos q) (%pk-line q) (%pk-col q) - file line col)) - (else (list pos line col))))) - -(define (%skip-line-comment src pos line col file) - ;; Consume bytes until end-of-stream or until we *see* '\n' (do not - ;; consume the newline itself; outer loop emits the NL). - (let ((n (bytevector-length src))) - (cond - ((>= pos n) (%skip-ws-and-comments src pos line col file)) - (else - (let ((b (bytevector-u8-ref src pos))) - (cond - ;; '\n' terminates without consuming. - ((= b 10) (%skip-ws-and-comments src pos line col file)) - ((%fast-byte? b) - (%skip-line-comment src (+ pos 1) line (+ col 1) file)) - (else - ;; Slow path: ?/\ — let %lex-peek handle trigraph/splice. - (let* ((p (%lex-peek src pos line col)) - (b2 (%pk-byte p))) - (cond - ((not b2) (%skip-ws-and-comments src pos line col file)) - ((%newline? b2) (%skip-ws-and-comments src pos line col file)) - (else - (%skip-line-comment src (%pk-pos p) (%pk-line p) (%pk-col p) - file))))))))))) - -(define (%skip-block-comment src pos line col file start-line start-col) - (let ((n (bytevector-length src))) - (cond - ((>= pos n) - (die (%loc file start-line start-col) - "unterminated /* block comment")) - (else - (let ((b (bytevector-u8-ref src pos))) - (cond - ;; Fast path for plain content bytes that aren't '*'. - ((and (%fast-byte? b) (not (= b 42))) - (%skip-block-comment src (+ pos 1) line (+ col 1) - file start-line start-col)) - (else - ;; Slow path: '*', '\n', '?' (trigraph), '\\' (splice). - (let* ((p (%lex-peek src pos line col)) - (b1 (%pk-byte p))) - (cond - ((not b1) - (die (%loc file start-line start-col) - "unterminated /* block comment")) - ((= b1 42) - (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) - (b2 (%pk-byte q))) - (cond - ((not b2) - (die (%loc file start-line start-col) - "unterminated /* block comment")) - ((= b2 47) - (%skip-ws-and-comments src (%pk-pos q) (%pk-line q) (%pk-col q) - file)) - (else - ;; Re-scan starting at the byte after '*'; the '*' was - ;; not the closer, but the next byte might itself be '*'. - (%skip-block-comment src (%pk-pos p) (%pk-line p) (%pk-col p) - file start-line start-col))))) - (else - (%skip-block-comment src (%pk-pos p) (%pk-line p) (%pk-col p) - file start-line start-col))))))))))) - -;; -------------------------------------------------------------------- -;; Byte-run scanners. -;; -;; Tail-recursive walkers used by ident/number/string readers. None -;; allocate per scanned byte on the fast path (only %lex-peek 4-lists -;; on trigraph/splice/newline); the per-iteration env frames allocated -;; by tail recursion are reclaimed by the caller's heap-rewind!. -;; -;; - %scan-while: count bytes that satisfy pred. (count npos nline ncol) -;; - %fill-while-bv: write matching bytes into a pre-sized bv. -;; - %accum-int-while: accumulate a base-N integer over digit bytes. -;; (val count npos nline ncol) -;; - %accum-octal-bounded: same, but stops after k digits. -;; -------------------------------------------------------------------- -(define (%scan-while pred src pos line col) - (let ((n (bytevector-length src))) - (let loop ((pos pos) (line line) (col col) (cnt 0)) - (cond - ((>= pos n) (list cnt pos line col)) - (else - (let ((b (bytevector-u8-ref src pos))) - (cond - ((%fast-byte? b) - (if (pred b) - (loop (+ pos 1) line (+ col 1) (+ cnt 1)) - (list cnt pos line col))) - (else - (let* ((p (%lex-peek src pos line col)) - (b2 (%pk-byte p))) - (if (and b2 (pred b2)) - (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ cnt 1)) - (list cnt pos line col))))))))))) - -(define (%fill-while-bv pred src pos line col bv idx) - (let ((n (bytevector-length src))) - (let loop ((pos pos) (line line) (col col) (idx idx)) - (cond - ((>= pos n) idx) - (else - (let ((b (bytevector-u8-ref src pos))) - (cond - ((%fast-byte? b) - (cond - ((pred b) - (bytevector-u8-set! bv idx b) - (loop (+ pos 1) line (+ col 1) (+ idx 1))) - (else idx))) - (else - (let* ((p (%lex-peek src pos line col)) - (b2 (%pk-byte p))) - (cond - ((and b2 (pred b2)) - (bytevector-u8-set! bv idx b2) - (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1))) - (else idx))))))))))) - -(define (%digit-val-byte b) - ;; ASCII digit byte → integer value. Caller guarantees b is a valid - ;; digit in the relevant base (0-9 / 0-7 / 0-9a-fA-F). - (cond ((%digit? b) (- b 48)) - ((if (< b 65) #f (if (< 70 b) #f #t)) (+ (- b 65) 10)) - ((if (< b 97) #f (if (< 102 b) #f #t)) (+ (- b 97) 10)) - (else 0))) - -(define (%accum-int-while pred src pos line col base) - (let ((n (bytevector-length src))) - (let loop ((pos pos) (line line) (col col) (val 0) (cnt 0)) - (cond - ((>= pos n) (list val cnt pos line col)) - (else - (let ((b (bytevector-u8-ref src pos))) - (cond - ((%fast-byte? b) - (if (pred b) - (loop (+ pos 1) line (+ col 1) - (+ (* val base) (%digit-val-byte b)) (+ cnt 1)) - (list val cnt pos line col))) - (else - (let* ((p (%lex-peek src pos line col)) - (b2 (%pk-byte p))) - (if (and b2 (pred b2)) - (loop (%pk-pos p) (%pk-line p) (%pk-col p) - (+ (* val base) (%digit-val-byte b2)) (+ cnt 1)) - (list val cnt pos line col))))))))))) - -(define (%accum-octal-bounded src pos line col k) - ;; Up to k octal digits. Returns (val count npos nline ncol). - (let ((n (bytevector-length src))) - (let loop ((pos pos) (line line) (col col) (k k) (val 0) (cnt 0)) - (cond - ((zero? k) (list val cnt pos line col)) - ((>= pos n) (list val cnt pos line col)) - (else - (let ((b (bytevector-u8-ref src pos))) - (cond - ((%fast-byte? b) - (if (%octal? b) - (loop (+ pos 1) line (+ col 1) (- k 1) - (+ (* val 8) (- b 48)) (+ cnt 1)) - (list val cnt pos line col))) - (else - (let* ((p (%lex-peek src pos line col)) - (b2 (%pk-byte p))) - (if (and b2 (%octal? b2)) - (loop (%pk-pos p) (%pk-line p) (%pk-col p) (- k 1) - (+ (* val 8) (- b2 48)) (+ cnt 1)) - (list val cnt pos line col))))))))))) - -;; -------------------------------------------------------------------- -;; Identifier / keyword reader. -;; -;; Returns (tok npos nline ncol). Caller has already verified that the -;; first byte at `pos` satisfies %ident-start?. -;; -;; Two-pass with heap-mark/rewind: pass 1 (%scan-while) sizes the run, -;; then we rewind, allocate `name` bv post-rewind so it survives, then -;; pass 2 (%fill-while-bv) writes into it under a fresh mark. The -;; integer slots count/npos/nline/ncol are bound *before* the mark so -;; they survive both rewinds. -;; -------------------------------------------------------------------- -(define (lex-read-ident src pos file) - ;; Public for tests. Threads line/col from a fresh start. - (%lex-read-ident src pos 1 (+ pos 1) file)) - -(define (%lex-read-ident src pos line col file) - (let ((start-loc (%loc file line col)) - (count 0) (npos 0) (nline 0) (ncol 0) - (mark 0)) - (set! mark (heap-mark)) - (let ((sres (%scan-while %ident-cont? src pos line col))) - (set! count (car sres)) - (set! npos (car (cdr sres))) - (set! nline (car (cdr (cdr sres)))) - (set! ncol (car (cdr (cdr (cdr sres)))))) - (heap-rewind! mark) - (let ((name (make-bytevector count 0)) - (mark2 0)) - (set! mark2 (heap-mark)) - (%fill-while-bv %ident-cont? src pos line col name 0) - (heap-rewind! mark2) - (let ((kw (alist-ref name %keyword-alist))) - (cons (if kw - (make-tok 'KW kw start-loc) - (make-tok 'IDENT name start-loc)) - (list npos nline ncol)))))) - -;; -------------------------------------------------------------------- -;; Number reader. -;; -;; Decimal: [1-9][0-9]* (suffix: u U l L ll LL combinations) -;; Hex: 0x[0-9a-fA-F]+ | 0X... -;; Octal: 0[0-7]* -;; Float: anything looking like 1.0, 1e3, .5 → die crisply. -;; -;; Returns (tok npos nline ncol) on success. Aborts via `die` on float. -;; -;; %accum-int-while folds digit collection and value computation into -;; one walk — no per-byte cons cells, no separate digits-list pass. -;; -------------------------------------------------------------------- -(define (lex-read-number src pos file) - (%lex-read-number src pos 1 (+ pos 1) file)) - -(define (%lex-read-number src pos line col file) - (let* ((start-loc (%loc file line col)) - (p (%lex-peek src pos line col)) - (b (%pk-byte p))) - (cond - ;; '0x' / '0X' hex prefix - ((and (= b 48) - (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) - (b2 (%pk-byte q))) - (and b2 (or (= b2 120) (= b2 88))))) ; 'x' or 'X' - (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) - (r (%accum-int-while %hex? src - (%pk-pos q) (%pk-line q) (%pk-col q) 16)) - (val (car r)) - (cnt (car (cdr r))) - (pos2 (car (cdr (cdr r)))) - (line2 (car (cdr (cdr (cdr r))))) - (col2 (car (cdr (cdr (cdr (cdr r))))))) - (if (zero? cnt) - (die start-loc "expected hex digits after 0x") - (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file))) - (cons (make-tok 'INT val start-loc) after))))) - ;; '0' alone → octal sequence (could be just zero) - ((= b 48) - (let* ((r (%accum-int-while %octal? src - (%pk-pos p) (%pk-line p) (%pk-col p) 8)) - (val (car r)) - (pos2 (car (cdr (cdr r)))) - (line2 (car (cdr (cdr (cdr r))))) - (col2 (car (cdr (cdr (cdr (cdr r))))))) - ;; Reject '.' / 'e' / 'E' immediately after the octal run — float. - (%check-no-float src pos2 line2 col2 file start-loc) - ;; Reject stray digits 8/9 in an octal context (e.g. 089). - (let* ((p3 (%lex-peek src pos2 line2 col2)) - (b3 (%pk-byte p3))) - (if (and b3 (%digit? b3)) - (die start-loc "invalid octal digit" (bv-of-byte b3)) - (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file))) - (cons (make-tok 'INT val start-loc) after)))))) - ;; '1'-'9' → decimal - ((%digit? b) - (let* ((r (%accum-int-while %digit? src pos line col 10)) - (val (car r)) - (pos2 (car (cdr (cdr r)))) - (line2 (car (cdr (cdr (cdr r))))) - (col2 (car (cdr (cdr (cdr (cdr r))))))) - (%check-no-float src pos2 line2 col2 file start-loc) - (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file))) - (cons (make-tok 'INT val start-loc) after)))) - ;; '.' followed by a digit = float-style literal — reject. - ((= b 46) - (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) - (b2 (%pk-byte q))) - (if (and b2 (%digit? b2)) - (die start-loc "floating-point literal not supported") - ;; Otherwise '.' was a punctuator — caller wouldn't have - ;; routed here unless it was a digit-led prefix. - (die start-loc "internal: number reader on non-number")))) - (else - (die start-loc "internal: number reader on non-number"))))) - -(define (%check-no-float src pos line col file start-loc) - ;; If the byte at pos starts a fractional/exponent part, abort. - (let* ((p (%lex-peek src pos line col)) - (b (%pk-byte p))) - (cond - ((not b) #t) - ((= b 46) ; '.' - (die start-loc "floating-point literal not supported")) - ((or (= b 101) (= b 69)) ; 'e' / 'E' - ;; Only a float exponent if followed by [+-]?digit. - (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) - (b2 (%pk-byte q))) - (cond - ((and b2 (%digit? b2)) - (die start-loc "floating-point literal not supported")) - ((and b2 (or (= b2 43) (= b2 45))) - (let* ((r (%lex-peek src (%pk-pos q) (%pk-line q) (%pk-col q))) - (b3 (%pk-byte r))) - (if (and b3 (%digit? b3)) - (die start-loc "floating-point literal not supported") - #t))) - (else #t)))) - (else #t)))) - -(define (%lex-strip-int-suffix src pos line col file) - ;; Consume any combination of u U l L (the long can be doubled). We - ;; don't validate orderings strictly; tcc.c uses the canonical forms. - ;; Returns (npos nline ncol). - (let loop ((pos pos) (line line) (col col)) - (let* ((p (%lex-peek src pos line col)) - (b (%pk-byte p))) - (cond - ((not b) (list pos line col)) - ((or (= b 117) (= b 85) ; u U - (= b 108) (= b 76)) ; l L - (loop (%pk-pos p) (%pk-line p) (%pk-col p))) - (else (list pos line col)))))) - -;; -------------------------------------------------------------------- -;; Escape sequence reader. -;; -;; %scan-or-fill-escape decodes one escape sequence starting at `pos` -;; (which points one past the leading `\\`). When `bv` is a bytevector, -;; the resulting byte is written to (bv idx); when it is #f, no write -;; occurs (used during the string-pass scan phase). Returns the 4-list -;; (val npos nline ncol). -;; -------------------------------------------------------------------- -(define (%scan-or-fill-escape src pos line col file start-loc bv idx) - (let* ((p (%lex-peek src pos line col)) - (b (%pk-byte p))) - (cond - ((not b) (die start-loc "unterminated escape sequence")) - ;; \xNN — 1+ hex digits (tcc.c uses 1- and 2-digit forms). - ((or (= b 120) (= b 88)) ; 'x' / 'X' - (let* ((r (%accum-int-while %hex? src - (%pk-pos p) (%pk-line p) (%pk-col p) 16)) - (val0 (car r)) - (cnt (car (cdr r))) - (pos2 (car (cdr (cdr r)))) - (line2 (car (cdr (cdr (cdr r))))) - (col2 (car (cdr (cdr (cdr (cdr r))))))) - (cond - ((zero? cnt) (die start-loc "expected hex digits after \\x")) - (else - (let ((val (bit-and val0 255))) - (cond (bv (bytevector-u8-set! bv idx val)) - (else #f)) - (list val pos2 line2 col2)))))) - ;; \NNN — 1..3 octal digits. - ((%octal? b) - (let* ((r (%accum-octal-bounded src pos line col 3)) - (val0 (car r)) - (pos2 (car (cdr (cdr r)))) - (line2 (car (cdr (cdr (cdr r))))) - (col2 (car (cdr (cdr (cdr (cdr r)))))) - (val (bit-and val0 255))) - (cond (bv (bytevector-u8-set! bv idx val)) - (else #f)) - (list val pos2 line2 col2))) - (else - (let ((val (cond ((= b 110) 10) ; n - ((= b 116) 9) ; t - ((= b 114) 13) ; r - ((= b 92) 92) ; \\ - ((= b 39) 39) ; ' - ((= b 34) 34) ; " - ((= b 48) 0) ; 0 (already handled by octal but be safe) - ((= b 97) 7) ; \a -> BEL - ((= b 98) 8) ; \b - ((= b 102) 12) ; \f - ((= b 118) 11) ; \v - ((= b 63) 63) ; \? - (else - (die start-loc "unknown escape" (bv-of-byte b)))))) - (cond (bv (bytevector-u8-set! bv idx val)) - (else #f)) - (list val (%pk-pos p) (%pk-line p) (%pk-col p))))))) - -;; -------------------------------------------------------------------- -;; String reader. -;; -;; Caller has verified src[pos] == '"' (raw byte 34). Returns -;; (tok npos nline ncol) with the raw decoded bytes (no NUL appended). -;; -;; Two-pass: %string-pass with bv=#f counts effective bytes (escapes -;; collapse to 1 byte each); after rewind we allocate the final bv and -;; rerun with bv set so the bytes are written directly into it. -;; -------------------------------------------------------------------- -(define (lex-read-string src pos file) - (%lex-read-string src pos 1 (+ pos 1) file)) - -(define (%lex-read-string src pos line col file) - (let ((start-loc (%loc file line col)) - (cnt 0) (npos 0) (nline 0) (ncol 0) - (mark 0)) - ;; '"' (34) is a fast-byte and never a trigraph result, so the - ;; physical byte at `pos` is exactly the opening quote. - (cond - ((or (>= pos (bytevector-length src)) - (not (= (bytevector-u8-ref src pos) 34))) - (die start-loc "internal: string reader on non-quote")) - (else - (set! mark (heap-mark)) - (let ((sres (%string-pass src (+ pos 1) line (+ col 1) - file start-loc #f))) - (set! cnt (car sres)) - (set! npos (car (cdr sres))) - (set! nline (car (cdr (cdr sres)))) - (set! ncol (car (cdr (cdr (cdr sres)))))) - (heap-rewind! mark) - (let ((bv (make-bytevector cnt 0)) - (mark2 0)) - (set! mark2 (heap-mark)) - (%string-pass src (+ pos 1) line (+ col 1) file start-loc bv) - (heap-rewind! mark2) - (cons (make-tok 'STR bv start-loc) - (list npos nline ncol))))))) - -(define (%string-pass src pos line col file start-loc bv) - ;; Walk the string body (after opening "). When `bv` is #f, count - ;; effective bytes; when it is a bytevector, write bytes into it at - ;; index 0..count-1. Returns (count npos nline ncol). - (let ((n (bytevector-length src))) - (let loop ((pos pos) (line line) (col col) (idx 0)) - (cond - ((>= pos n) (die start-loc "unterminated string literal")) - (else - (let ((b (bytevector-u8-ref src pos))) - (cond - ;; Closing quote — fast byte but special. - ((= b 34) - (list idx (+ pos 1) line (+ col 1))) - ((%fast-byte? b) - (cond (bv (bytevector-u8-set! bv idx b)) - (else #f)) - (loop (+ pos 1) line (+ col 1) (+ idx 1))) - (else - ;; Slow path: ?/\ (trigraph/splice/escape) or '\n'. - (let* ((p (%lex-peek src pos line col)) - (b2 (%pk-byte p))) - (cond - ((not b2) - (die start-loc "unterminated string literal")) - ((= b2 34) - (list idx (%pk-pos p) (%pk-line p) (%pk-col p))) - ((%newline? b2) - (die start-loc "newline in string literal")) - ((= b2 92) - (let* ((er (%scan-or-fill-escape - src (%pk-pos p) (%pk-line p) (%pk-col p) - file start-loc bv idx)) - (epos (car (cdr er))) - (eline (car (cdr (cdr er)))) - (ecol (car (cdr (cdr (cdr er)))))) - (loop epos eline ecol (+ idx 1)))) - (else - (cond (bv (bytevector-u8-set! bv idx b2)) - (else #f)) - (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1))))))))))))) - -;; -------------------------------------------------------------------- -;; Char reader. -;; -;; Caller has verified src[pos] == '\''. Multi-character constants -;; ('AB') are rejected via die. -;; -------------------------------------------------------------------- -(define (lex-read-char src pos file) - (%lex-read-char src pos 1 (+ pos 1) file)) - -(define (%lex-read-char src pos line col file) - (let* ((start-loc (%loc file line col)) - (p0 (%lex-peek src pos line col)) - (b0 (%pk-byte p0))) - (if (not (and b0 (= b0 39))) - (die start-loc "internal: char reader on non-quote") - (%collect-char src (%pk-pos p0) (%pk-line p0) (%pk-col p0) - file start-loc)))) - -(define (%collect-char src pos line col file start-loc) - ;; Read exactly one byte (handling escapes), then expect closing '\''. - (let* ((p (%lex-peek src pos line col)) - (b (%pk-byte p))) - (cond - ((not b) (die start-loc "unterminated char literal")) - ((= b 39) (die start-loc "empty char literal")) - ((%newline? b) (die start-loc "newline in char literal")) - ((= b 92) ; escape - (let* ((r (%scan-or-fill-escape src - (%pk-pos p) (%pk-line p) (%pk-col p) - file start-loc #f 0)) - (val (car r)) - (pos2 (car (cdr r))) - (line2 (car (cdr (cdr r)))) - (col2 (car (cdr (cdr (cdr r)))))) - (%expect-char-close src pos2 line2 col2 file start-loc val))) - (else - (%expect-char-close src (%pk-pos p) (%pk-line p) (%pk-col p) - file start-loc b))))) - -(define (%expect-char-close src pos line col file start-loc val) - (let* ((p (%lex-peek src pos line col)) - (b (%pk-byte p))) - (cond - ((not b) (die start-loc "unterminated char literal")) - ((= b 39) - (cons (make-tok 'CHAR val start-loc) - (list (%pk-pos p) (%pk-line p) (%pk-col p)))) - (else - (die start-loc "multi-character char constant not supported"))))) - -;; -------------------------------------------------------------------- -;; Punctuator reader. -;; -;; Greedy longest-match against %punct-alist (cc/data.scm). The alist -;; is already ordered longest-first. We additionally bucket entries by -;; their first byte so %lex-read-punct only loops over the small set of -;; patterns that can start at the current source byte. -;; -------------------------------------------------------------------- - -(define (%alist-ref-int k al) - ;; Lookup in an int-keyed alist (linear scan, '= compare). - (cond ((null? al) #f) - ((= (car (car al)) k) (cdr (car al))) - (else (%alist-ref-int k (cdr al))))) - -(define (%mem-int? k xs) - (cond ((null? xs) #f) - ((= (car xs) k) #t) - (else (%mem-int? k (cdr xs))))) - -(define (%filter-by-first-byte b al) - ;; Subset of `al` whose pattern starts with byte b, preserving order. - (cond - ((null? al) '()) - ((= (bytevector-u8-ref (car (car al)) 0) b) - (cons (car al) (%filter-by-first-byte b (cdr al)))) - (else (%filter-by-first-byte b (cdr al))))) - -(define (%group-by-first-byte al) - ;; Build ((first-byte . sub-alist) ...) over `al`, one bucket per - ;; distinct first byte; sub-alist preserves longest-match-first - ;; order from the source list. - (let loop ((xs al) (seen '()) (out '())) - (cond - ((null? xs) (reverse out)) - (else - (let* ((entry (car xs)) - (pat (car entry)) - (b (bytevector-u8-ref pat 0))) - (cond - ((%mem-int? b seen) (loop (cdr xs) seen out)) - (else - (loop (cdr xs) - (cons b seen) - (cons (cons b (%filter-by-first-byte b al)) out))))))))) - -(define %punct-buckets (%group-by-first-byte %punct-alist)) - -(define (lex-read-punct src pos file) - (%lex-read-punct src pos 1 (+ pos 1) file)) - -(define (%lex-read-punct src pos line col file) - (let* ((start-loc (%loc file line col)) - (p (%lex-peek src pos line col)) - (b (%pk-byte p))) - (cond - ((not b) (die start-loc "unrecognized byte" "EOF")) - (else - (let ((bucket (%alist-ref-int b %punct-buckets))) - (cond - ((not bucket) (die start-loc "unrecognized byte" (bv-of-byte b))) - (else (%punct-loop src pos line col file start-loc bucket)))))))) - -(define (%punct-loop src pos line col file start-loc al) - (cond - ((null? al) - (let* ((p (%lex-peek src pos line col))) - (die start-loc "unrecognized byte" - (if (%pk-byte p) (bv-of-byte (%pk-byte p)) "EOF")))) - (else - (let* ((entry (car al)) - (pat (car entry)) - (sym (cdr entry)) - (m (%match-bytes src pos line col pat 0))) - (if m - (cons (make-tok 'PUNCT sym start-loc) m) - (%punct-loop src pos line col file start-loc (cdr al))))))) - -(define (%match-bytes src pos line col pat i) - ;; If the next bytes from (pos line col), in logical-byte stream - ;; order, equal `pat[i..]`, return (npos nline ncol) after the - ;; match. Otherwise #f. - (cond - ((= i (bytevector-length pat)) (list pos line col)) - (else - (let ((n (bytevector-length src))) - (cond - ((>= pos n) #f) - (else - (let ((b (bytevector-u8-ref src pos)) - (pb (bytevector-u8-ref pat i))) - (cond - ((%fast-byte? b) - (if (= b pb) - (%match-bytes src (+ pos 1) line (+ col 1) pat (+ i 1)) - #f)) - (else - (let* ((p (%lex-peek src pos line col)) - (b2 (%pk-byte p))) - (cond - ((not b2) #f) - ((= b2 pb) - (%match-bytes src (%pk-pos p) (%pk-line p) (%pk-col p) - pat (+ i 1))) - (else #f)))))))))))) - -;; -------------------------------------------------------------------- -;; lex-tokenize src file -> list of tok ending in EOF. -;; -------------------------------------------------------------------- -(define (lex-tokenize src file) - (%lex-loop src 0 1 1 file '() #t)) - -;; bol? — `#t` when no token has been emitted on the current physical -;; line yet (start of file, or only NL + whitespace seen since the last -;; line break). pp recognizes a directive only when its leading `#` is -;; at line-start; we forward that decision into the token stream by -;; emitting `HASH` instead of `(PUNCT hash …)` for a line-leading `#`. -(define (%lex-loop src pos line col file acc bol?) - (let* ((sw (%skip-ws-and-comments src pos line col file)) - (pos1 (car sw)) - (line1 (car (cdr sw))) - (col1 (car (cdr (cdr sw)))) - (p (%lex-peek src pos1 line1 col1)) - (b (%pk-byte p))) - (cond - ;; EOF - ((not b) - (let* ((eof-tok (make-tok 'EOF #f (%loc file line1 col1)))) - (reverse (cons eof-tok acc)))) - ;; Newline → emit NL, reset bol?. - ((%newline? b) - (let ((nl (make-tok 'NL #f (%loc file line1 col1)))) - (%lex-loop src (%pk-pos p) (%pk-line p) (%pk-col p) - file (cons nl acc) #t))) - ;; Line-leading `#` → emit HASH, but only the bare `#`. `##` is - ;; never line-leading in valid C; if it appears, fall through to - ;; normal punctuator handling so it lexes as `paste`. - ((and bol? (= b 35)) - (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) - (b2 (%pk-byte q))) - (cond - ((and b2 (= b2 35)) - (let* ((r (%lex-read-punct src pos1 line1 col1 file)) - (tok (car r)) - (npos (car (cdr r))) - (nline (car (cdr (cdr r)))) - (ncol (car (cdr (cdr (cdr r)))))) - (%lex-loop src npos nline ncol file (cons tok acc) #f))) - (else - (let ((tok (make-tok 'HASH #f (%loc file line1 col1)))) - (%lex-loop src (%pk-pos p) (%pk-line p) (%pk-col p) - file (cons tok acc) #f)))))) - ;; Identifier / keyword - ((%ident-start? b) - (let* ((r (%lex-read-ident src pos1 line1 col1 file)) - (tok (car r)) - (npos (car (cdr r))) - (nline (car (cdr (cdr r)))) - (ncol (car (cdr (cdr (cdr r)))))) - (%lex-loop src npos nline ncol file (cons tok acc) #f))) - ;; Number (digit start) - ((%digit? b) - (let* ((r (%lex-read-number src pos1 line1 col1 file)) - (tok (car r)) - (npos (car (cdr r))) - (nline (car (cdr (cdr r)))) - (ncol (car (cdr (cdr (cdr r)))))) - (%lex-loop src npos nline ncol file (cons tok acc) #f))) - ;; '.' might start a number (1.0 actually starts with digit; .5 - ;; would route here). We keep this as a punctuator unless followed - ;; by a digit, in which case the lexer rejects per spec. - ((= b 46) - (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) - (b2 (%pk-byte q))) - (cond - ((and b2 (%digit? b2)) - (die (%loc file line1 col1) "floating-point literal not supported")) - (else - (let* ((r (%lex-read-punct src pos1 line1 col1 file)) - (tok (car r)) - (npos (car (cdr r))) - (nline (car (cdr (cdr r)))) - (ncol (car (cdr (cdr (cdr r)))))) - (%lex-loop src npos nline ncol file (cons tok acc) #f)))))) - ;; String - ((= b 34) - (let* ((r (%lex-read-string src pos1 line1 col1 file)) - (tok (car r)) - (npos (car (cdr r))) - (nline (car (cdr (cdr r)))) - (ncol (car (cdr (cdr (cdr r)))))) - (%lex-loop src npos nline ncol file (cons tok acc) #f))) - ;; Char - ((= b 39) - (let* ((r (%lex-read-char src pos1 line1 col1 file)) - (tok (car r)) - (npos (car (cdr r))) - (nline (car (cdr (cdr r)))) - (ncol (car (cdr (cdr (cdr r)))))) - (%lex-loop src npos nline ncol file (cons tok acc) #f))) - ;; Punctuator (default) - (else - (let* ((r (%lex-read-punct src pos1 line1 col1 file)) - (tok (car r)) - (npos (car (cdr r))) - (nline (car (cdr (cdr r)))) - (ncol (car (cdr (cdr (cdr r)))))) - (%lex-loop src npos nline ncol file (cons tok acc) #f)))))) diff --git a/cc/main.scm b/cc/main.scm @@ -1,74 +1,5 @@ -;; cc/main.scm — driver. Argv, file I/O, ties phases together. -;; -;; Realization of docs/CC-INTERNALS.md §main.scm. - -;; -------------------------------------------------------------------- -;; CLI: cc <input.c> <output.P1pp> -;; (-o flag and -D flags are deferred — phase-1 runner doesn't need them.) -;; -;; scheme1 passes (argv) as a list of bvs; argv[0] is "scheme1", argv[1] -;; is the catm'd compiler source path, argv[2..] are the user-facing -;; positional args. cc-main strips the first two. -;; -------------------------------------------------------------------- - -(define (%cc-slurp path) - (let ((r (open-input path))) - (cond ((not (car r)) - (die #f "cannot open input" path))) - (let* ((p (cdr r)) - (rd (read-all p))) - (close p) - (cond ((not (car rd)) (die #f "read failed" path))) - (cdr rd)))) - -(define (%cc-write path bv) - (let ((r (open-output path))) - (cond ((not (car r)) - (die #f "cannot open output" path))) - (let ((p (cdr r))) - (write-bv-fd (port-fd p) bv) - (close p) - 0))) - -;; CC_DEBUG=1 in the env doesn't fly here (no getenv); instead, scan -;; argv for a sentinel "--cc-debug" flag. When present, debug-log -;; prints heap usage between phases to fd 2. -(define (%cc-flag? args flag) - (cond ((null? args) #f) - ((bv= (car args) flag) #t) - (else (%cc-flag? (cdr args) flag)))) - -(define (%cc-strip-flag args flag) - (cond ((null? args) '()) - ((bv= (car args) flag) (cdr args)) - (else (cons (car args) (%cc-strip-flag (cdr args) flag))))) - -(define (cc-main av) - (let* ((raw (cdr (cdr av))) - (dbg (%cc-flag? raw "--cc-debug")) - (args (%cc-strip-flag raw "--cc-debug"))) - (cond (dbg (debug-log-on!))) - (cond - ((or (null? args) (null? (cdr args))) - (die #f "usage: cc [--cc-debug] <input.c> <output.P1pp>"))) - (let* ((in-path (car args)) - (out-path (car (cdr args)))) - (debug-log "phase=start" "heap" (heap-usage)) - (let* ((src (%cc-slurp in-path)) - (_1 (debug-log "phase=slurp" "heap" (heap-usage) - "src-bytes" (bytevector-length src))) - (toks (lex-tokenize src in-path)) - (_2 (debug-log "phase=lex" "heap" (heap-usage))) - (expanded (pp-expand toks '())) - (_3 (debug-log "phase=pp" "heap" (heap-usage))) - (cg (cg-init)) - (ps (make-pstate expanded cg))) - (parse-translation-unit ps) - (debug-log "phase=parse" "heap" (heap-usage)) - (let ((out (cg-finish cg))) - (debug-log "phase=cg-finish" "heap" (heap-usage) - "out-bytes" (bytevector-length out)) - (%cc-write out-path out)) - 0)))) +;; cc/main.scm — production entry point. cc.scm defines cc-main; this +;; one-liner fires it under (argv). Tests catm cc.scm without this file +;; so they can run their own drivers without sys-exit pre-empting them. (sys-exit (cc-main (argv))) diff --git a/cc/parse.scm b/cc/parse.scm @@ -1,1673 +0,0 @@ -;; cc/parse.scm — recursive-descent + Pratt parser. Minimal scheme1. - -(define (make-pstate toks cg) - (%pstate toks (list '()) (list '()) '() #f '() cg)) - -(define (peek ps) (car (ps-toks ps))) -(define (peek2 ps) - (let ((r (cdr (ps-toks ps)))) - (if (null? r) (car (ps-toks ps)) (car r)))) -(define (advance ps) - (let ((t (peek ps))) (ps-toks-set! ps (cdr (ps-toks ps))) t)) -(define (at-kw? ps s) - (pmatch (peek ps) - (($ tok? (kind KW) (value ,v)) (eq? v s)) - (else #f))) -(define (at-punct? ps s) - (pmatch (peek ps) - (($ tok? (kind PUNCT) (value ,v)) (eq? v s)) - (else #f))) -(define (expect-kw ps s) - (let ((t (peek ps))) - (pmatch t - (($ tok? (kind KW) (value ,v)) (guard (eq? v s)) (advance ps)) - (else (die (tok-loc t) "expected kw" s))))) -(define (expect-punct ps s) - (let ((t (peek ps))) - (pmatch t - (($ tok? (kind PUNCT) (value ,v)) (guard (eq? v s)) (advance ps)) - (else (die (tok-loc t) "expected punct" s))))) - -(define (scope-enter! ps) - (ps-scope-set! ps (cons '() (ps-scope ps))) - (ps-tags-set! ps (cons '() (ps-tags ps)))) -(define (scope-leave! ps) - (ps-scope-set! ps (cdr (ps-scope ps))) - (ps-tags-set! ps (cdr (ps-tags ps)))) -(define (scope-bind! ps n s) - (let* ((f (ps-scope ps)) (top (car f)) (r (cdr f))) - (if (alist-ref n top) (die #f "dup decl" n) - (ps-scope-set! ps (cons (alist-set n s top) r))))) -(define (scope-lookup ps n) - (let loop ((f (ps-scope ps))) - (cond ((null? f) #f) - (else - (let ((v (alist-ref n (car f)))) - (if v v (loop (cdr f)))))))) -(define (tag-bind! ps n c) - (let* ((f (ps-tags ps)) (top (car f)) (r (cdr f))) - (ps-tags-set! ps (cons (alist-set n c top) r)))) -(define (tag-lookup ps n) - (let loop ((f (ps-tags ps))) - (cond ((null? f) #f) - (else (let ((v (alist-ref n (car f)))) - (if v v (loop (cdr f)))))))) -(define (typedef-add! ps n) - (ps-typedefs-set! ps (alist-set n #t (ps-typedefs ps)))) -(define (typedef? ps n) - (if (alist-ref n (ps-typedefs ps)) #t #f)) - -(define (%mk-ptr p) (%ctype 'ptr 8 8 p)) -(define (%mk-arr e n) - (%ctype 'arr (if (< n 0) -1 (* n (ctype-size e))) - (ctype-align e) (cons e n))) -(define (%mk-fn r p v) (%ctype 'fn -1 -1 (list r p v))) -(define (ctype-is-ptr? t) (eq? (ctype-kind t) 'ptr)) -(define (ctype-is-fn? t) (eq? (ctype-kind t) 'fn)) -(define (ctype-is-arr? t) (eq? (ctype-kind t) 'arr)) - -(define (eat-cv-quals! ps) - (cond ((or (at-kw? ps 'const) (at-kw? ps 'volatile) - (at-kw? ps 'restrict)) - (advance ps) (eat-cv-quals! ps)) - (else #t))) - -(define (parse-decl-spec ps) - (let loop ((sto #f) (sn #f) (lg 0) (b #f) (saw #f)) - (let ((t (peek ps))) - (cond - ((or (at-kw? ps 'auto) (at-kw? ps 'register)) - (advance ps) (loop sto sn lg b #t)) - ((at-kw? ps 'static) (advance ps) (loop 'static sn lg b #t)) - ((at-kw? ps 'extern) (advance ps) (loop 'extern sn lg b #t)) - ((at-kw? ps 'typedef) (advance ps) (loop 'typedef sn lg b #t)) - ((or (at-kw? ps 'const) (at-kw? ps 'volatile) - (at-kw? ps 'restrict) (at-kw? ps 'inline)) - (advance ps) (loop sto sn lg b #t)) - ((at-kw? ps 'signed) (advance ps) (loop sto 'signed lg b #t)) - ((at-kw? ps 'unsigned) (advance ps) (loop sto 'unsigned lg b #t)) - ((at-kw? ps 'short) (advance ps) (loop sto sn -1 b #t)) - ((at-kw? ps 'long) (advance ps) (loop sto sn (+ lg 1) b #t)) - ((at-kw? ps 'void) (advance ps) (loop sto sn lg 'void #t)) - ((at-kw? ps 'char) (advance ps) (loop sto sn lg 'char #t)) - ((at-kw? ps 'int) (advance ps) (loop sto sn lg 'int #t)) - ((at-kw? ps '_Bool) (advance ps) (loop sto sn lg 'bool #t)) - ((or (at-kw? ps 'float) (at-kw? ps 'double) - (at-kw? ps '_Complex) (at-kw? ps '_Imaginary)) - (die (tok-loc t) "no float" (tok-value t))) - ((or (at-kw? ps '_Atomic) (at-kw? ps '_Thread_local) - (at-kw? ps '_Alignas) (at-kw? ps '_Generic) - (at-kw? ps '_Alignof) (at-kw? ps '_Static_assert)) - (die (tok-loc t) "rejected" (tok-value t))) - ((at-kw? ps 'struct) - (loop sto sn lg (parse-aggregate-spec ps 'struct) #t)) - ((at-kw? ps 'union) - (loop sto sn lg (parse-aggregate-spec ps 'union) #t)) - ((at-kw? ps 'enum) - (loop sto sn lg (parse-enum-spec ps) #t)) - ((and (not b) (eq? (tok-kind t) 'IDENT) - (typedef? ps (tok-value t))) - (let* ((tk (advance ps)) (sm (scope-lookup ps (tok-value tk)))) - (if (and sm (eq? (sym-kind sm) 'typedef)) - (loop sto sn lg (sym-type sm) #t) - (die (tok-loc tk) "typedef no sym" (tok-value tk))))) - (else - (cond ((not saw) (die (tok-loc t) "expected decl-spec" - (tok-value t))) - (else (cons sto (resolve-base t sn lg b))))))))) - -(define (resolve-base loc sn lg b) - (cond - ((eq? b 'void) - (if (or sn (not (zero? lg))) (die loc "void+qual") %t-void)) - ((eq? b 'bool) - (if (or sn (not (zero? lg))) (die loc "bool+qual") %t-bool)) - ((eq? b 'char) - (cond ((eq? sn 'unsigned) %t-u8) (else %t-i8))) - ((or (eq? b 'int) (and (not b) (or sn (not (zero? lg))))) - (cond ((= lg -1) (if (eq? sn 'unsigned) %t-u16 %t-i16)) - ((= lg 0) (if (eq? sn 'unsigned) %t-u32 %t-i32)) - (else (if (eq? sn 'unsigned) %t-u64 %t-i64)))) - ((ctype? b) - (if (or sn (not (zero? lg))) (die loc "type+qual") b)) - (else (die loc "unknown decl-spec")))) - -(define (parse-aggregate-spec ps kind) - (advance ps) - (let ((tag (pmatch (peek ps) - (($ tok? (kind IDENT)) (tok-value (advance ps))) - (else #f)))) - (cond - ((at-punct? ps 'lbrace) - (advance ps) - (let* ((ex (and tag (tag-lookup ps tag))) - (ct (cond ((and ex (eq? (ctype-kind ex) kind)) ex) - (else (let ((c (%ctype kind -1 -1 - (list (or tag #f) #f '())))) - (if tag (tag-bind! ps tag c)) c)))) - (fields (parse-struct-fields ps))) - (expect-punct ps 'rbrace) - (complete-agg! ct kind tag fields) ct)) - (tag (let ((ex (tag-lookup ps tag))) - (cond (ex ex) - (else (let ((c (%ctype kind -1 -1 - (list tag #f '())))) - (tag-bind! ps tag c) c))))) - (else (die (tok-loc (peek ps)) "anon agg"))))) - -(define (parse-struct-fields ps) - (let loop ((acc '()) (off 0)) - (cond - ((at-punct? ps 'rbrace) (reverse acc)) - (else - (let ((spec (parse-decl-spec ps))) - (let dl ((acc2 acc) (o2 off)) - (let* ((p (parse-declarator ps (cdr spec))) - (nm (car p)) (ty (cdr p)) - (al (max (ctype-align ty) 1)) - (sz (ctype-size ty)) - (oa (align-up o2 al))) - (cond - ((at-punct? ps 'comma) - (advance ps) - (dl (cons (list nm ty oa) acc2) - (+ oa (max sz 0)))) - ((at-punct? ps 'semi) - (advance ps) - (loop (cons (list nm ty oa) acc2) - (+ oa (max sz 0)))) - (else (die (tok-loc (peek ps)) "field")))))))))) - -(define (complete-agg! ct k tag fs) - (let* ((ma (let m ((xs fs) (a 1)) - (if (null? xs) a - (m (cdr xs) (max a (ctype-align (cadr (car xs)))))))) - (last (let l ((xs fs) (e 0)) - (if (null? xs) e - (let* ((f (car xs)) (off (car (cddr f))) - (sz (ctype-size (cadr f)))) - (l (cdr xs) (max e (+ off (max sz 0)))))))) - (sz (cond ((eq? k 'union) - (let u ((xs fs) (s 0)) - (if (null? xs) s - (u (cdr xs) - (max s (ctype-size (cadr (car xs)))))))) - (else (align-up last ma))))) - (ctype-size-set! ct sz) - (ctype-align-set! ct ma) - (ctype-ext-set! ct (list tag #t fs)))) - -(define (parse-enum-spec ps) - (advance ps) - (let ((tag (pmatch (peek ps) - (($ tok? (kind IDENT)) (tok-value (advance ps))) - (else #f)))) - (cond - ((at-punct? ps 'lbrace) - (advance ps) - (let ((ct (%ctype 'enum 4 4 (list tag '())))) - (if tag (tag-bind! ps tag ct)) - (let loop ((vs '()) (nv 0)) - (cond - ((at-punct? ps 'rbrace) - (advance ps) - (ctype-ext-set! ct (list tag (reverse vs))) ct) - (else - (let* ((nt (advance ps)) (nm (tok-value nt)) - (val (cond ((at-punct? ps 'assign) - (advance ps) (parse-const-int ps)) - (else nv)))) - (scope-bind! ps nm - (%sym nm 'enum-const #f %t-i32 val)) - (cond ((at-punct? ps 'comma) (advance ps)) - ((at-punct? ps 'rbrace) #t) - (else (die (tok-loc (peek ps)) "enum"))) - (loop (cons (cons nm val) vs) (+ val 1)))))))) - (tag (let ((e (tag-lookup ps tag))) - (cond (e e) - (else (let ((c (%ctype 'enum 4 4 (list tag '())))) - (tag-bind! ps tag c) c))))) - (else (die (tok-loc (peek ps)) "enum"))))) - -(define (parse-const-int ps) - (let ((t (peek ps))) - (pmatch t - (($ tok? (kind INT)) (tok-value (advance ps))) - (($ tok? (kind PUNCT) (value minus)) (advance ps) (- 0 (parse-const-int ps))) - (($ tok? (kind PUNCT) (value plus)) (advance ps) (parse-const-int ps)) - (($ tok? (kind IDENT) (value ,n)) - (let ((sm (scope-lookup ps n))) - (cond ((and sm (eq? (sym-kind sm) 'enum-const)) - (advance ps) (sym-slot sm)) - (else (die (tok-loc t) "const?" n))))) - (else (die (tok-loc t) "const?" (tok-value t)))))) - -(define (parse-declarator ps base) - ((cdr (parse-decl-cont ps)) base - (lambda (n t) (cons n t)))) - -(define (parse-decl-cont ps) - (pmatch (peek ps) - (($ tok? (kind PUNCT) (value star)) - (advance ps) (eat-cv-quals! ps) - (let* ((r (parse-decl-cont ps)) (rf (cdr r))) - (cons (car r) (lambda (b k) (rf (%mk-ptr b) k))))) - (($ tok? (kind PUNCT) (value lparen)) - (guard (paren-is-group? ps)) - (advance ps) - (let* ((i (parse-decl-cont ps)) (if- (cdr i))) - (expect-punct ps 'rparen) - (let ((s (parse-decl-suf-cont ps))) - (cons (car i) (lambda (b k) (if- (s b) k)))))) - (($ tok? (kind IDENT) (value ,n)) - (advance ps) - (let ((s (parse-decl-suf-cont ps))) - (cons n (lambda (b k) (k n (s b)))))) - (else - (let ((s (parse-decl-suf-cont ps))) - (cons #f (lambda (b k) (k #f (s b)))))))) - -(define (parse-decl-suf-cont ps) - ;; C declarator suffixes apply RIGHT-TO-LEFT (innermost first): - ;; int a[2][3] ⇒ arr (arr int 3) 2 (outer dim 2) - ;; not arr (arr int 2) 3 (which would treat the leftmost suffix as - ;; outermost). The recursive structure builds the inner suffix's - ;; result first, then this level wraps. - (pmatch (peek ps) - (($ tok? (kind PUNCT) (value lbrack)) - (advance ps) - (let* ((ln (cond ((at-punct? ps 'rbrack) -1) - (else (parse-const-int ps)))) - (_ (expect-punct ps 'rbrack)) - (r (parse-decl-suf-cont ps))) - (lambda (b) (%mk-arr (r b) ln)))) - (($ tok? (kind PUNCT) (value lparen)) - (advance ps) - (let* ((res (parse-fn-params ps)) - (p (car res)) (v (cdr res))) - (expect-punct ps 'rparen) - (let ((r (parse-decl-suf-cont ps))) - (lambda (b) (%mk-fn (r b) p v))))) - (else (lambda (b) b)))) - -(define (paren-is-group? ps) - (pmatch (peek2 ps) - (($ tok? (kind KW) (value ,v)) - (cond ((or (eq? v 'void) (eq? v 'char) (eq? v 'short) - (eq? v 'int) (eq? v 'long) (eq? v 'signed) - (eq? v 'unsigned) (eq? v '_Bool) - (eq? v 'struct) (eq? v 'union) (eq? v 'enum) - (eq? v 'const) (eq? v 'volatile) - (eq? v 'restrict) (eq? v 'static) - (eq? v 'extern) (eq? v 'register)) #f) - (else #t))) - (($ tok? (kind IDENT) (value ,n)) - (cond ((typedef? ps n) #f) (else #t))) - (($ tok? (kind PUNCT) (value rparen)) #f) - (($ tok? (kind PUNCT) (value star)) #t) - (($ tok? (kind PUNCT) (value lparen)) #t) - (($ tok? (kind PUNCT) (value lbrack)) #t) - (else #f))) - -(define (parse-fn-params ps) - (cond - ((at-punct? ps 'rparen) (cons '() #f)) - ((and (at-kw? ps 'void) - (eq? (tok-kind (peek2 ps)) 'PUNCT) - (eq? (tok-value (peek2 ps)) 'rparen)) - (advance ps) (cons '() #f)) - (else - (let loop ((acc '())) - (cond - ((at-punct? ps 'ellipsis) - (advance ps) (cons (reverse acc) #t)) - (else - (let* ((sp (parse-decl-spec ps)) - (p (parse-declarator ps (cdr sp))) - (nm (car p)) (ty (cdr p)) - (ty2 (cond ((ctype-is-arr? ty) - (%mk-ptr (car (ctype-ext ty)))) - ((ctype-is-fn? ty) (%mk-ptr ty)) - (else ty)))) - (cond - ((at-punct? ps 'comma) - (advance ps) (loop (cons (cons nm ty2) acc))) - ((at-punct? ps 'rparen) - (cons (reverse (cons (cons nm ty2) acc)) #f)) - (else (die (tok-loc (peek ps)) "param")))))))))) - -(define (parse-translation-unit ps) - (cond - ((eq? (tok-kind (peek ps)) 'EOF) #t) - (else (parse-decl-or-fn ps) (parse-translation-unit ps)))) - -(define (parse-decl-or-fn ps) - (let* ((sp (parse-decl-spec ps)) - (sto (car sp)) (b (cdr sp))) - (cond - ((at-punct? ps 'semi) (advance ps) 'decl) - (else - (let* ((p (parse-declarator ps b)) - (n (car p)) (t (cdr p))) - (cond - ((and (ctype-is-fn? t) (at-punct? ps 'lbrace)) - (parse-fn-body ps n t) 'fn) - (else - (handle-decl ps sto n t) - (let lp () - (cond - ((at-punct? ps 'comma) - (advance ps) - (let* ((p2 (parse-declarator ps b)) - (n2 (car p2)) (t2 (cdr p2))) - (handle-decl ps sto n2 t2) (lp))) - (else (expect-punct ps 'semi) 'decl)))))))))) - -(define (handle-decl ps sto n ty) - (cond - ((not n) (die #f "no name")) - ((eq? sto 'typedef) - (typedef-add! ps n) - (scope-bind! ps n (%sym n 'typedef #f ty #f))) - ((ctype-is-fn? ty) - (scope-bind! ps n - (%sym n 'fn (or sto 'extern) ty - (bytevector-append "cc__" n)))) - ;; §I: block-scope `static` routes to a global with a name mangled - ;; on the enclosing function so two functions can each have their - ;; own `static int n;` without colliding. The sym's NAME holds the - ;; mangled form (cg-push-sym / cg-emit-global both prefix "cc__" - ;; onto sym-name to derive the emitted label); scope-bind!s key - ;; remains the original identifier for source-level lookup. - ((and (eq? sto 'static) (ps-fn-ctx ps)) - (let* ((fname (fn-ctx-name (ps-fn-ctx ps))) - (mangled (bytevector-append fname "__" n)) - (sm (%sym mangled 'var 'static ty - (bytevector-append "cc__" mangled)))) - (scope-bind! ps n sm) - (cond - ((at-punct? ps 'assign) - (advance ps) - (cg-emit-global (ps-cg ps) sm (parse-init-global ps ty))) - (else (cg-emit-global (ps-cg ps) sm #f))))) - (else - (cond - ((not (ps-fn-ctx ps)) - (let ((sm (%sym n 'var (or sto 'extern) ty - (bytevector-append "cc__" n)))) - (scope-bind! ps n sm) - (cond - ((at-punct? ps 'assign) - (advance ps) - (cg-emit-global (ps-cg ps) sm - (parse-init-global ps ty))) - ((eq? sto 'extern) (cg-emit-extern (ps-cg ps) sm)) - (else (cg-emit-global (ps-cg ps) sm #f))))) - (else - (let* ((sz (max (ctype-size ty) 1)) - (al (max (ctype-align ty) 1)) - (sl (cg-alloc-slot (ps-cg ps) sz al)) - (sm (%sym n 'var (or sto 'auto) ty sl))) - (scope-bind! ps n sm) - (cond - ((at-punct? ps 'assign) - (advance ps) - (cond - ;; Aggregate locals get the per-element store treatment. - ((or (at-punct? ps 'lbrace) - (and (eq? (ctype-kind ty) 'arr) - (eq? (tok-kind (peek ps)) 'STR))) - (parse-init-local-aggregate ps sm ty)) - (else - (cg-push-sym (ps-cg ps) sm) - (parse-expr-bp ps 4) (rval! ps) - (cg-cast (ps-cg ps) ty) - (cg-assign (ps-cg ps)) - (cg-pop (ps-cg ps))))) - (else #t)))))))) - -;; ==================================================================== -;; Initializers (CC.md §Variable initializers, §E of CC-PUNCHLIST). -;; -;; parse-init-global ps ty -;; Reads the initializer following `=` for a file-scope or block-scope -;; static var of static-storage type `ty` and returns a list of -;; pieces suitable for cg-emit-global. See cg.scm §cg-emit-global for -;; the piece grammar. -;; -;; parse-init-local ps sm ty -;; Reads the initializer for an auto-storage variable bound to slot -;; sym `sm` and emits per-element store cg ops. Returns unspecified. -;; ==================================================================== - -(define (%int->le-bv n nbytes) - ;; N-byte little-endian encoding of integer n into a fresh bv. Bytes - ;; >= sign-bit are filled by repeated >>8 (works for both signed and - ;; unsigned because we only keep the low N bytes). - (let ((out (make-bytevector nbytes 0))) - (let loop ((i 0) (v n)) - (cond - ((= i nbytes) out) - (else - (bytevector-u8-set! out i (bit-and v 255)) - (loop (+ i 1) (arithmetic-shift v -8))))))) - -(define (%const-init-piece ps ty) - ;; Parse a non-brace initializer expression for scalar type `ty` and - ;; return a single piece. Recognised forms: - ;; - INT (with optional unary +/-) -> N-byte LE bv - ;; - enum-const IDENT -> N-byte LE bv - ;; - &IDENT (address of a global var/fn) -> (label-ref . cc__name) - ;; - IDENT (function name; decays to fn ptr) -> (label-ref . cc__name) - ;; - STR (only for char* targets) -> (label-ref . string-pool-label) - (let ((t (peek ps))) - (cond - ;; Address initializer: &ident -> label-ref - ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'amp)) - (advance ps) - (let ((it (peek ps))) - (cond - ((eq? (tok-kind it) 'IDENT) - (advance ps) - (let ((sm (scope-lookup ps (tok-value it)))) - (cond - ((not sm) (die (tok-loc it) "init: undecl" (tok-value it))) - ((or (eq? (sym-kind sm) 'fn) - (and (eq? (sym-kind sm) 'var) - (or (eq? (sym-storage sm) 'static) - (eq? (sym-storage sm) 'extern)))) - (cons 'label-ref (sym-slot sm))) - (else - (die (tok-loc it) "init: &x must reference a global" - (tok-value it)))))) - (else (die (tok-loc it) "init: &?" (tok-value it)))))) - ;; Function name or array name as a label-ref initializer. - ;; (Both decay to a pointer when used as a value.) - ((and (eq? (tok-kind t) 'IDENT) - (let ((sm (scope-lookup ps (tok-value t)))) - (and sm - (or (eq? (sym-kind sm) 'fn) - (and (eq? (sym-kind sm) 'var) - (eq? (ctype-kind (sym-type sm)) 'arr) - (or (eq? (sym-storage sm) 'static) - (eq? (sym-storage sm) 'extern))))))) - (advance ps) - (let ((sm (scope-lookup ps (tok-value t)))) - (cons 'label-ref (sym-slot sm)))) - ;; Plain string literal as char* initializer. - ((eq? (tok-kind t) 'STR) - (advance ps) - (let ((lbl (cg-intern-string (ps-cg ps) (tok-value t)))) - (cons 'label-ref lbl))) - ;; Otherwise it's a const integer. - (else - (let ((v (parse-const-int ps))) - (%int->le-bv v (max (ctype-size ty) 1))))))) - -(define (%init-array-elem-type ty) - (cond ((eq? (ctype-kind ty) 'arr) (car (ctype-ext ty))) - (else (die #f "init: not an array" ty)))) - -(define (%init-array-decl-len ty) - ;; Declared array length (-1 = inferred). - (cond ((eq? (ctype-kind ty) 'arr) (cdr (ctype-ext ty))) (else -1))) - -(define (%init-fix-array-size! ty count) - ;; Patch an inferred-length array to `count`. - (let ((elem (car (ctype-ext ty)))) - (ctype-ext-set! ty (cons elem count)) - (ctype-size-set! ty (* count (ctype-size elem))))) - -(define (%init-struct-fields ty) - ;; Return ((name-bv ctype offset) ...) for a struct/union ctype. - (let ((ext (ctype-ext ty))) - (cond ((and (pair? ext) (pair? (cdr ext))) (car (cddr ext))) - (else (die #f "init: not a struct" ty))))) - -(define (%find-field fields nm) - (cond ((null? fields) #f) - ((equal? (car (car fields)) nm) (car fields)) - (else (%find-field (cdr fields) nm)))) - -(define (%pad-piece nbytes) - (make-bytevector nbytes 0)) - -;; ----- Global initializers --------------------------------------------- -(define (parse-init-global ps ty) - (pmatch (peek ps) - ;; String literal initializer for char[] - (($ tok? (kind STR) (value ,s)) - (guard (and (eq? (ctype-kind ty) 'arr) - (let ((et (car (ctype-ext ty)))) - (or (eq? et %t-i8) (eq? et %t-u8))))) - (advance ps) - (let* ((slen (bytevector-length s)) - (decl (cdr (ctype-ext ty))) - (final (cond ((< decl 0) (+ slen 1)) (else decl)))) - (cond ((< decl 0) (%init-fix-array-size! ty final))) - (let ((bv (make-bytevector final 0))) - (let loop ((i 0)) - (cond - ((or (= i slen) (>= i final)) (list bv)) - (else - (bytevector-u8-set! bv i (bytevector-u8-ref s i)) - (loop (+ i 1)))))))) - ;; Brace-form - (($ tok? (kind PUNCT) (value lbrace)) - (advance ps) - (cond - ((eq? (ctype-kind ty) 'arr) - (%parse-init-array-list ps ty)) - ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union)) - (%parse-init-struct-list ps ty)) - (else - ;; Brace-wrapped scalar: { expr } - (let ((piece (%const-init-piece ps ty))) - (cond ((at-punct? ps 'comma) (advance ps))) - (expect-punct ps 'rbrace) - (list piece))))) - ;; Bare scalar initializer - (else (list (%const-init-piece ps ty))))) - -(define (%parse-init-array-list ps ty) - ;; Element-list array initializer; assumes `{` already consumed. - (let* ((elem (%init-array-elem-type ty)) - (esize (ctype-size elem)) - (decl (%init-array-decl-len ty))) - (let lp ((acc '()) (count 0)) - (cond - ((at-punct? ps 'rbrace) - (advance ps) - (cond ((< decl 0) (%init-fix-array-size! ty count))) - ;; Pad to declared length if longer than count. - (let* ((final (cond ((< decl 0) count) (else decl))) - (pad (- final count))) - (cond - ((> pad 0) - (reverse (cons (%pad-piece (* pad esize)) acc))) - (else (reverse acc))))) - (else - (let ((piece - (cond - ((at-punct? ps 'lbrace) - ;; Nested aggregate: brace-flatten via recursion. - (advance ps) - ;; element is itself struct/array - (cond - ((eq? (ctype-kind elem) 'arr) - (%parse-init-array-list ps elem)) - ((or (eq? (ctype-kind elem) 'struct) - (eq? (ctype-kind elem) 'union)) - (%parse-init-struct-list ps elem)) - (else - (let ((p (%const-init-piece ps elem))) - (cond ((at-punct? ps 'comma) (advance ps))) - (expect-punct ps 'rbrace) - (list p))))) - (else - (list (%const-init-piece ps elem)))))) - (cond ((at-punct? ps 'comma) (advance ps))) - (lp (append (reverse piece) acc) (+ count 1)))))))) - -(define (%piece-bytesize p) - ;; Output width of one piece (cf. %cg-init-piece->bv): a bv emits - ;; one byte per element; a (label-ref . _) emits an 8-byte slot. - (cond - ((bytevector? p) (bytevector-length p)) - ((and (pair? p) (eq? (car p) 'label-ref)) 8) - (else (die #f "init: unknown piece" p)))) - -(define (%pieces-bytesize ps-list) - (let loop ((xs ps-list) (n 0)) - (cond ((null? xs) n) - (else (loop (cdr xs) (+ n (%piece-bytesize (car xs)))))))) - -(define (%merge-init-entries entries total-size) - ;; entries: list of (abs-offset . piece-list), in source order. - ;; Sort stably by offset (later writes to the same offset win, per C - ;; designated-init semantics) and emit pad pieces in any gaps and at - ;; the tail. Preserves label-ref pieces — we never merge them into a - ;; flat bv. - (let* ((sorted (%init-stable-sort-by-offset entries)) - (out - (let walk ((xs sorted) (cursor 0) (acc '())) - (cond - ((null? xs) - (cond - ((< cursor total-size) - (reverse (cons (%pad-piece (- total-size cursor)) acc))) - (else (reverse acc)))) - (else - (let* ((e (car xs)) - (eoff (car e)) - (epieces (cdr e)) - (esize (%pieces-bytesize epieces)) - (acc1 (cond - ((> eoff cursor) - (cons (%pad-piece (- eoff cursor)) acc)) - (else acc))) - (acc2 (append (reverse epieces) acc1))) - (walk (cdr xs) (+ eoff esize) acc2))))))) - out)) - -(define (%init-stable-sort-by-offset entries) - ;; Insertion sort, stable by source order for ties. n is small (one - ;; entry per initialized field) so O(n^2) is fine. - (let lp ((xs entries) (acc '())) - (cond - ((null? xs) acc) - (else - (let ((e (car xs))) - (lp (cdr xs) - (let ins ((ys acc) (head '())) - (cond - ((null? ys) - (append (reverse head) (list e))) - ((<= (car e) (car (car ys))) - (append (reverse head) (cons e ys))) - (else - (ins (cdr ys) (cons (car ys) head))))))))))) - -(define (%parse-init-struct-list ps ty) - ;; Struct/union initializer; assumes `{` already consumed. Supports - ;; positional and `.field = expr` forms — including out-of-order - ;; designators ({.y=5, .x=7}). Each entry records its absolute - ;; offset; %merge-init-entries sorts and pads at the closing brace. - (let* ((fields (%init-struct-fields ty)) - (size (ctype-size ty))) - (let lp ((entries '()) (rest fields)) - (cond - ((at-punct? ps 'rbrace) - (advance ps) - (%merge-init-entries (reverse entries) size)) - (else - (let* ((designated? (at-punct? ps 'dot)) - (target - (cond - (designated? - (advance ps) - (let ((nt (advance ps))) - (cond - ((not (eq? (tok-kind nt) 'IDENT)) - (die (tok-loc nt) "init: .field expects ident"))) - (let ((f (%find-field fields (tok-value nt)))) - (cond - ((not f) (die (tok-loc nt) "init: no such field" - (tok-value nt)))) - (expect-punct ps 'assign) - f))) - ((null? rest) - (die (tok-loc (peek ps)) "init: too many fields")) - (else (car rest)))) - (fname (car target)) - (fty (car (cdr target))) - (foff (car (cddr target))) - (piece-list - (cond - ((at-punct? ps 'lbrace) - (advance ps) - (cond - ((eq? (ctype-kind fty) 'arr) - (%parse-init-array-list ps fty)) - ((or (eq? (ctype-kind fty) 'struct) - (eq? (ctype-kind fty) 'union)) - (%parse-init-struct-list ps fty)) - (else - (let ((p (%const-init-piece ps fty))) - (cond ((at-punct? ps 'comma) (advance ps))) - (expect-punct ps 'rbrace) - (list p))))) - (else - (list (%const-init-piece ps fty)))))) - (cond ((at-punct? ps 'comma) (advance ps))) - (lp (cons (cons foff piece-list) entries) - (cond - (designated? - ;; designated init: drop fields up to and including target - (let drop ((xs fields)) - (cond - ((null? xs) '()) - ((equal? (car (car xs)) fname) (cdr xs)) - (else (drop (cdr xs)))))) - (else (cdr rest)))))))))) - -;; ----- Local aggregate initializers ------------------------------------ -;; Emits per-element store sequences via cg ops into the slot of `sm` -;; (a 'var sym whose slot is the frame offset). Assumes the assignment -;; `=` has already been consumed. -(define (parse-init-local-aggregate ps sm ty) - (pmatch (peek ps) - ;; Local char[] = "string" — fill from string bytes. - (($ tok? (kind STR) (value ,s)) - (guard (and (eq? (ctype-kind ty) 'arr) - (let ((et (car (ctype-ext ty)))) - (or (eq? et %t-i8) (eq? et %t-u8))))) - (advance ps) - (let* ((slen (bytevector-length s)) - (decl (cdr (ctype-ext ty))) - (final (cond ((< decl 0) (+ slen 1)) (else decl)))) - (cond ((< decl 0) (%init-fix-array-size! ty final))) - ;; Emit byte stores for each char in s, plus NUL for the - ;; trailing slot if final > slen. - (let loop ((i 0)) - (cond - ((>= i final) #t) - (else - (let ((b (cond ((< i slen) (bytevector-u8-ref s i)) - (else 0))) - (off (+ (sym-slot sm) i))) - (%push-frame-elem-lval ps off %t-u8) - (cg-push-imm (ps-cg ps) %t-u8 b) - (cg-assign (ps-cg ps)) - (cg-pop (ps-cg ps)) - (loop (+ i 1)))))))) - (($ tok? (kind PUNCT) (value lbrace)) - (advance ps) - (cond - ((eq? (ctype-kind ty) 'arr) - (%parse-init-local-array-list ps sm (sym-slot sm) ty)) - ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union)) - (%parse-init-local-struct-list ps sm (sym-slot sm) ty)) - (else (die #f "init local: brace on scalar?")))) - (else (die (tok-loc (peek ps)) "init local aggregate?")))) - -(define (%emit-local-elem-store ps sm rel-off elem-ty piece-or-thunk) - ;; Emit a single scalar store at slot[base + rel-off]. piece is the - ;; raw initializer expression — but here we want to actually evaluate - ;; it via parse-expr to allow non-const expressions for autos. - ;; Caller handles this; this helper handles the store-into-frame ops. - 0) - -(define (%push-frame-elem-lval ps base-off ty) - (cg-push (ps-cg ps) (%opnd 'frame ty base-off #t))) - -(define (%parse-init-local-array-list ps sm base-off ty) - (let* ((elem (%init-array-elem-type ty)) - (esize (ctype-size elem)) - (decl (%init-array-decl-len ty))) - (let lp ((i 0)) - (cond - ((at-punct? ps 'rbrace) - (advance ps) - (cond ((< decl 0) (%init-fix-array-size! ty i))) - ;; Zero out remaining slots if any (declared length > i). - (let ((final (cond ((< decl 0) i) (else decl)))) - (let zlp ((k i)) - (cond - ((>= k final) #t) - (else - (let ((off (+ base-off (* k esize)))) - (cond - ((or (eq? (ctype-kind elem) 'arr) - (eq? (ctype-kind elem) 'struct) - (eq? (ctype-kind elem) 'union)) - ;; Zero each byte in this aggregate slot. - (let zb ((j 0)) - (cond - ((>= j esize) #t) - (else - (%push-frame-elem-lval ps (+ off j) %t-u8) - (cg-push-imm (ps-cg ps) %t-u8 0) - (cg-assign (ps-cg ps)) - (cg-pop (ps-cg ps)) - (zb (+ j 1)))))) - (else - (%push-frame-elem-lval ps off elem) - (cg-push-imm (ps-cg ps) elem 0) - (cg-assign (ps-cg ps)) - (cg-pop (ps-cg ps))))) - (zlp (+ k 1))))))) - (else - (let ((eoff (+ base-off (* i esize)))) - (cond - ((at-punct? ps 'lbrace) - (advance ps) - (cond - ((eq? (ctype-kind elem) 'arr) - (%parse-init-local-array-list ps sm eoff elem)) - ((or (eq? (ctype-kind elem) 'struct) - (eq? (ctype-kind elem) 'union)) - (%parse-init-local-struct-list ps sm eoff elem)) - (else - (%push-frame-elem-lval ps eoff elem) - (parse-expr-bp ps 4) (rval! ps) - (cg-cast (ps-cg ps) elem) - (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)) - (cond ((at-punct? ps 'comma) (advance ps))) - (expect-punct ps 'rbrace)))) - (else - (%push-frame-elem-lval ps eoff elem) - (parse-expr-bp ps 4) (rval! ps) - (cg-cast (ps-cg ps) elem) - (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)))) - (cond ((at-punct? ps 'comma) (advance ps))) - (lp (+ i 1)))))))) - -(define (%bv-in-list? bv xs) - (cond ((null? xs) #f) - ((equal? bv (car xs)) #t) - (else (%bv-in-list? bv (cdr xs))))) - -(define (%emit-zero-field ps base-off f) - ;; Note: scheme1's `+` is binary-only — `(+ a b c)` returns (+ a b) - ;; and silently drops the rest. Compute absolute byte offsets via - ;; nested binary +. - (let* ((fty (car (cdr f))) - (foff (car (cddr f))) - (fsize (ctype-size fty)) - (start-off (+ base-off foff))) - (let zb ((j 0)) - (cond - ((>= j fsize) #t) - (else - (%push-frame-elem-lval ps (+ start-off j) %t-u8) - (cg-push-imm (ps-cg ps) %t-u8 0) - (cg-assign (ps-cg ps)) - (cg-pop (ps-cg ps)) - (zb (+ j 1))))))) - -(define (%parse-init-local-struct-list ps sm base-off ty) - ;; Track each initialized field by name in `seen`; at the closing brace - ;; zero every field NOT in `seen`. The previous design tracked positional - ;; "remaining fields" via `rest`, which silently dropped earlier fields - ;; when a designator jumped backwards (e.g. `{.y = 5}` left `x` - ;; uninitialized). C requires every unmentioned member of an aggregate - ;; with at least one designator/initializer to be zeroed (C11 §6.7.9 ¶21). - (let ((fields (%init-struct-fields ty))) - (let lp ((rest fields) (seen '())) - (cond - ((at-punct? ps 'rbrace) - (advance ps) - (for-each - (lambda (f) - (cond ((not (%bv-in-list? (car f) seen)) - (%emit-zero-field ps base-off f)))) - fields)) - (else - (let* ((designated? (at-punct? ps 'dot)) - (target - (cond - (designated? - (advance ps) - (let ((nt (advance ps))) - (let ((f (%find-field fields (tok-value nt)))) - (cond - ((not f) (die (tok-loc nt) "init: no such field" - (tok-value nt)))) - (expect-punct ps 'assign) - f))) - ((null? rest) - (die (tok-loc (peek ps)) "init: too many fields")) - (else (car rest)))) - (fname (car target)) - (fty (car (cdr target))) - (foff (car (cddr target))) - (eoff (+ base-off foff))) - (cond - ((at-punct? ps 'lbrace) - (advance ps) - (cond - ((eq? (ctype-kind fty) 'arr) - (%parse-init-local-array-list ps sm eoff fty)) - ((or (eq? (ctype-kind fty) 'struct) - (eq? (ctype-kind fty) 'union)) - (%parse-init-local-struct-list ps sm eoff fty)) - (else - (%push-frame-elem-lval ps eoff fty) - (parse-expr-bp ps 4) (rval! ps) - (cg-cast (ps-cg ps) fty) - (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)) - (cond ((at-punct? ps 'comma) (advance ps))) - (expect-punct ps 'rbrace)))) - (else - (%push-frame-elem-lval ps eoff fty) - (parse-expr-bp ps 4) (rval! ps) - (cg-cast (ps-cg ps) fty) - (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)))) - (cond ((at-punct? ps 'comma) (advance ps))) - (lp (cond - (designated? - (let drop ((xs fields)) - (cond - ((null? xs) '()) - ((equal? (car (car xs)) fname) (cdr xs)) - (else (drop (cdr xs)))))) - (else (cdr rest))) - (cons fname seen)))))))) - - -;; A → B → C arena pattern from tests/scheme1/93-heap-mark-rewind.scm: -;; -;; A = parse-decl-or-fn (caller, arena-unaware) -;; B = parse-fn-body (this fn — arena boundary) -;; C = %parse-fn-body-inner (the real per-fn parse + cg work) -;; -;; B's "out" is the cg's fixed-storage bufs (cg-text and friends): they -;; were allocated at cg-init (pre-mark) and only mutate via byte writes, -;; so byte-level work survives heap-rewind!. Everything C allocates — -;; vstack opnds, intermediate bvs, ctype scratch, scope frames, switch -;; case alists — is post-mark and discarded. -;; -;; The fn-name binding into the surrounding scope (used so recursive -;; calls can resolve the name during the body) is done BEFORE the mark -;; so it survives. Inner scope frames are popped via scope-leave! before -;; the rewind, so their cells become unreachable; rewind reclaims them. -;; -;; Rewind-safety guard: the body might add user-visible globals -;; (block-statics), strings (literals), tags, or typedefs. Those entries -;; are post-mark and would dangle on rewind. We snapshot the relevant -;; alists before parsing and skip the rewind if any changed — paying -;; full heap cost only for functions that genuinely mutate global state. -(define (parse-fn-body ps name dt) - ;; Hoist the recursive-binding scope-bind! out of the marked region - ;; so the fn-sym cons survives rewind. - (cond ((not (scope-lookup ps name)) - (scope-bind! ps name - (%sym name 'fn 'extern dt - (bytevector-append "cc__" name))))) - (let* ((cg (ps-cg ps)) - (mark (heap-mark)) - (globals-before (cg-globals cg)) - (str-pool-before (cg-str-pool cg)) - (typedefs-before (ps-typedefs ps)) - (tags-before (ps-tags ps))) - (%parse-fn-body-inner ps name dt) - (cond - ((and (eq? globals-before (cg-globals cg)) - (eq? str-pool-before (cg-str-pool cg)) - (eq? typedefs-before (ps-typedefs ps)) - (eq? tags-before (ps-tags ps))) - ;; cg-fn-meta points at post-mark alist conses (fn metadata, - ;; switch-case lists, indirect-slots). Drop the reference before - ;; rewinding so the cg record holds no dangling pointers — the - ;; next cg-fn-begin/v would reset it anyway, but if this is the - ;; last fn, leaving it set leaves a latent landmine. - (cg-fn-meta-set! cg '()) - (heap-rewind! mark) - (debug-log "fn-rewound" name "heap" (heap-usage))) - (else - (debug-log "fn-kept" name "heap" (heap-usage)))))) - -(define (%parse-fn-body-inner ps name dt) - (let* ((e (ctype-ext dt)) (ret (car e)) - (par (cadr e)) (var (car (cddr e)))) - (let ((psyms (cg-fn-begin/v (ps-cg ps) name par ret var))) - (ps-fn-ctx-set! ps - (%fn-ctx name ret (map cdr psyms) var '())) - (scope-enter! ps) - (for-each (lambda (p) (scope-bind! ps (car p) (cdr p))) - psyms) - (expect-punct ps 'lbrace) - (parse-cstmt-body ps) - (expect-punct ps 'rbrace) - (scope-leave! ps) - (ps-fn-ctx-set! ps #f) - (cg-fn-end (ps-cg ps))))) - -(define (parse-stmt ps) - (pmatch (peek ps) - (($ tok? (kind PUNCT) (value lbrace)) (parse-cstmt ps)) - (($ tok? (kind KW) (value if)) (parse-if-stmt ps)) - (($ tok? (kind KW) (value while)) (parse-while-stmt ps)) - (($ tok? (kind KW) (value do)) (parse-do-stmt ps)) - (($ tok? (kind KW) (value for)) (parse-for-stmt ps)) - (($ tok? (kind KW) (value switch)) (parse-switch-stmt ps)) - (($ tok? (kind KW) (value return)) (parse-return-stmt ps)) - (($ tok? (kind KW) (value goto)) (parse-goto-stmt ps)) - (($ tok? (kind KW) (value break)) - (advance ps) (expect-punct ps 'semi) (do-break ps)) - (($ tok? (kind KW) (value continue)) - (advance ps) (expect-punct ps 'semi) (do-continue ps)) - (($ tok? (kind KW) (value case)) (parse-case-stmt ps)) - (($ tok? (kind KW) (value default)) (parse-default-stmt ps)) - (($ tok? (kind IDENT)) - (guard (and (eq? (tok-kind (peek2 ps)) 'PUNCT) - (eq? (tok-value (peek2 ps)) 'colon))) - (parse-labelled-stmt ps)) - (else - (cond ((stmt-starts-decl? ps) (parse-local-decl ps)) - (else (parse-expr-stmt ps)))))) - -(define (stmt-starts-decl? ps) - (pmatch (peek ps) - (($ tok? (kind KW) (value ,v)) - (or (eq? v 'auto) (eq? v 'register) (eq? v 'static) - (eq? v 'extern) (eq? v 'typedef) (eq? v 'const) - (eq? v 'volatile) (eq? v 'restrict) (eq? v 'inline) - (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int) - (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned) - (eq? v '_Bool) (eq? v 'struct) (eq? v 'union) - (eq? v 'enum))) - (($ tok? (kind IDENT) (value ,n)) (typedef? ps n)) - (else #f))) - -(define (parse-local-decl ps) - (let* ((sp (parse-decl-spec ps)) - (sto (car sp)) (b (cdr sp))) - (cond - ((at-punct? ps 'semi) (advance ps) #t) - (else - (let lp () - (let* ((p (parse-declarator ps b)) - (n (car p)) (t (cdr p))) - (handle-decl ps sto n t) - (cond ((at-punct? ps 'comma) (advance ps) (lp)) - (else (expect-punct ps 'semi) #t)))))))) - -(define (parse-cstmt ps) - (expect-punct ps 'lbrace) - (scope-enter! ps) - (parse-cstmt-body ps) - (scope-leave! ps) - (expect-punct ps 'rbrace) #t) - -(define (parse-cstmt-body ps) - (cond - ((at-punct? ps 'rbrace) #t) - ((eq? (tok-kind (peek ps)) 'EOF) - (die (tok-loc (peek ps)) "EOF in cstmt")) - (else (parse-stmt ps) (parse-cstmt-body ps)))) - -(define (parse-compound-stmt ps) (parse-cstmt ps)) - -(define (parse-if-stmt ps) - (expect-kw ps 'if) - (expect-punct ps 'lparen) - (parse-expr ps) (rval! ps) - (expect-punct ps 'rparen) - (cg-ifelse (ps-cg ps) - (lambda () (parse-stmt ps)) - (lambda () - (cond ((at-kw? ps 'else) - (advance ps) (parse-stmt ps)) - (else #t))))) - -;; cg-loop's body-thunk now receives the tag from cg (CC-CONTRACTS -;; §3.3); the parser threads it into break/continue via loop-ctx. - -(define (parse-while-stmt ps) - (expect-kw ps 'while) - (expect-punct ps 'lparen) - (cg-loop (ps-cg ps) - (lambda () (parse-expr ps) (rval! ps)) - (lambda (tag) - (expect-punct ps 'rparen) - (push-loop-ctx! ps 'while tag #t) - (parse-stmt ps) - (pop-loop-ctx! ps))) #t) - -(define (parse-do-stmt ps) - (expect-kw ps 'do) - ;; do-while needs its tag known *before* the body parses, so we - ;; capture it inside the body-thunk and stash it for pop-loop-ctx - ;; via a side cell. - (cg-loop (ps-cg ps) - (lambda () #t) - (lambda (tag) - (push-loop-ctx! ps 'do tag #t) - (parse-stmt ps) - (pop-loop-ctx! ps) - (expect-kw ps 'while) (expect-punct ps 'lparen) - (parse-expr ps) (rval! ps) - (expect-punct ps 'rparen) (expect-punct ps 'semi) - (cg-unop (ps-cg ps) 'lnot) - (cg-if (ps-cg ps) - (lambda () (cg-break (ps-cg ps) tag))))) - #t) - -(define (parse-for-stmt ps) - (expect-kw ps 'for) (expect-punct ps 'lparen) - (scope-enter! ps) - (cond - ((at-punct? ps 'semi) (advance ps)) - ((stmt-starts-decl? ps) (parse-local-decl ps)) - (else (parse-expr ps) (cg-pop (ps-cg ps)) - (expect-punct ps 'semi))) - (cg-loop (ps-cg ps) - (lambda () - (cond ((at-punct? ps 'semi) - (cg-push-imm (ps-cg ps) %t-i32 1)) - (else (parse-expr ps) (rval! ps))) - (expect-punct ps 'semi)) - (lambda (tag) - (let ((stk (collect-til-rparen ps))) - (expect-punct ps 'rparen) - (push-loop-ctx! ps 'for tag #t) - (parse-stmt ps) - (pop-loop-ctx! ps) - (cond - ((null? stk) #t) - (else - (let ((sv (ps-toks ps))) - (ps-toks-set! ps - (append stk (list (make-tok 'EOF #f #f)))) - (parse-expr ps) (cg-pop (ps-cg ps)) - (ps-toks-set! ps sv))))))) - (scope-leave! ps) #t) - -(define (collect-til-rparen ps) - (let loop ((acc '()) (d 0)) - (let ((t (peek ps))) - (cond - ((eq? (tok-kind t) 'EOF) - (die (tok-loc t) "EOF in for-step")) - ((and (zero? d) (eq? (tok-kind t) 'PUNCT) - (eq? (tok-value t) 'rparen)) (reverse acc)) - (else - (let ((nt (advance ps))) - (loop (cons nt acc) - (cond ((not (eq? (tok-kind nt) 'PUNCT)) d) - ((eq? (tok-value nt) 'lparen) (+ d 1)) - ((eq? (tok-value nt) 'rparen) (- d 1)) - (else d))))))))) - -(define (parse-switch-stmt ps) - (expect-kw ps 'switch) (expect-punct ps 'lparen) - (parse-expr ps) (rval! ps) - (expect-punct ps 'rparen) - ;; Switch's break-target tag is the swctx's end-tag — cg owns it, - ;; and we read it back so cg-break inside the switch body emits a - ;; tag cg actually labels. - (let* ((sw (cg-switch-begin (ps-cg ps))) - (tg (swctx-end-tag sw))) - (push-loop-ctx-sw! ps 'switch tg sw) - (parse-stmt ps) - (pop-loop-ctx! ps) - (cg-switch-end (ps-cg ps) sw))) - -(define (parse-case-stmt ps) - (expect-kw ps 'case) - (let ((v (parse-const-int ps))) - (expect-punct ps 'colon) - (cg-switch-case (ps-cg ps) (innermost-sw ps) v) - (parse-stmt ps))) - -(define (parse-default-stmt ps) - (expect-kw ps 'default) (expect-punct ps 'colon) - (cg-switch-default (ps-cg ps) (innermost-sw ps)) - (parse-stmt ps)) - -(define (parse-return-stmt ps) - (expect-kw ps 'return) - (cond - ((at-punct? ps 'semi) (advance ps) (cg-return (ps-cg ps))) - (else - (parse-expr ps) (rval! ps) - (let ((fc (ps-fn-ctx ps))) - (cond - ((and fc (not (eq? (ctype-kind (fn-ctx-return-type fc)) 'void))) - (cg-cast (ps-cg ps) (fn-ctx-return-type fc))) - (else #t))) - (cg-return (ps-cg ps)) - (expect-punct ps 'semi)))) - -(define (parse-goto-stmt ps) - (expect-kw ps 'goto) - (let ((t (advance ps))) - (cond ((eq? (tok-kind t) 'IDENT) - (cg-goto (ps-cg ps) (tok-value t))) - (else (die (tok-loc t) "label?")))) - (expect-punct ps 'semi)) - -(define (parse-labelled-stmt ps) - (let ((t (advance ps))) - (expect-punct ps 'colon) - (cg-emit-label (ps-cg ps) (tok-value t)) - (parse-stmt ps))) - -(define (parse-expr-stmt ps) - (cond - ((at-punct? ps 'semi) (advance ps) #t) - (else (parse-expr ps) (cg-pop (ps-cg ps)) - (expect-punct ps 'semi)))) - -(define (push-loop-ctx! ps k tg hc) - (ps-loops-set! ps (cons (%loop-ctx k tg hc) (ps-loops ps)))) -(define (push-loop-ctx-sw! ps k tg sw) - (ps-loops-set! ps - (cons (%loop-ctx k (cons tg sw) #f) (ps-loops ps)))) -(define (pop-loop-ctx! ps) - (ps-loops-set! ps (cdr (ps-loops ps)))) -(define (do-break ps) - (let ((c (innermost-loop ps))) - (cond - ((not c) (die #f "break outside")) - ((eq? (loop-ctx-kind c) 'switch) - (cg-break (ps-cg ps) (car (loop-ctx-tag c)))) - (else (cg-break (ps-cg ps) (loop-ctx-tag c)))))) -(define (do-continue ps) - (let ((c (innermost-cont ps))) - (cond ((not c) (die #f "cont outside")) - (else (cg-continue (ps-cg ps) (loop-ctx-tag c)))))) -(define (innermost-loop ps) - (cond ((null? (ps-loops ps)) #f) (else (car (ps-loops ps))))) -(define (innermost-cont ps) - (let lp ((xs (ps-loops ps))) - (cond ((null? xs) #f) - ((eq? (loop-ctx-kind (car xs)) 'switch) (lp (cdr xs))) - (else (car xs))))) -(define (innermost-sw ps) - (let lp ((xs (ps-loops ps))) - (cond ((null? xs) (die #f "case outside switch")) - ((eq? (loop-ctx-kind (car xs)) 'switch) - (cdr (loop-ctx-tag (car xs)))) - (else (lp (cdr xs)))))) - -(define %binop-bp - (list - (cons 'comma (cons 1 2)) - (cons 'assign (cons 4 3)) (cons 'plus-eq (cons 4 3)) - (cons 'minus-eq (cons 4 3)) (cons 'star-eq (cons 4 3)) - (cons 'slash-eq (cons 4 3)) (cons 'pct-eq (cons 4 3)) - (cons 'shl-eq (cons 4 3)) (cons 'shr-eq (cons 4 3)) - (cons 'amp-eq (cons 4 3)) (cons 'caret-eq (cons 4 3)) - (cons 'bar-eq (cons 4 3)) (cons 'qmark (cons 6 5)) - (cons 'lor (cons 10 11)) (cons 'land (cons 20 21)) - (cons 'bar (cons 30 31)) (cons 'caret (cons 40 41)) - (cons 'amp (cons 50 51)) - (cons 'eq2 (cons 60 61)) (cons 'ne (cons 60 61)) - (cons 'lt (cons 70 71)) (cons 'le (cons 70 71)) - (cons 'gt (cons 70 71)) (cons 'ge (cons 70 71)) - (cons 'shl (cons 80 81)) (cons 'shr (cons 80 81)) - (cons 'plus (cons 90 91)) (cons 'minus (cons 90 91)) - (cons 'star (cons 100 101)) (cons 'slash (cons 100 101)) - (cons 'pct (cons 100 101)))) - -(define (binop-bp-of s) (alist-ref/eq s %binop-bp)) - -(define (punct-to-cgop s) - (cond ((eq? s 'plus) 'add) ((eq? s 'minus) 'sub) - ((eq? s 'star) 'mul) ((eq? s 'slash) 'div) - ((eq? s 'pct) 'rem) ((eq? s 'amp) 'and) - ((eq? s 'bar) 'or) ((eq? s 'caret) 'xor) - ((eq? s 'shl) 'shl) ((eq? s 'shr) 'shr) - ((eq? s 'eq2) 'eq) ((eq? s 'ne) 'ne) - ((eq? s 'lt) 'lt) ((eq? s 'le) 'le) - ((eq? s 'gt) 'gt) ((eq? s 'ge) 'ge) - (else (die #f "binop" s)))) - -(define (compound-op s) - (cond ((eq? s 'plus-eq) 'add) ((eq? s 'minus-eq) 'sub) - ((eq? s 'star-eq) 'mul) ((eq? s 'slash-eq) 'div) - ((eq? s 'pct-eq) 'rem) ((eq? s 'shl-eq) 'shl) - ((eq? s 'shr-eq) 'shr) ((eq? s 'amp-eq) 'and) - ((eq? s 'caret-eq) 'xor) ((eq? s 'bar-eq) 'or) - (else #f))) - -(define (parse-expr ps) (parse-expr-bp ps 0)) - -(define (parse-expr-bp ps mn) - (parse-unary ps) (parse-binary-rhs ps mn)) - -(define (parse-binary-rhs ps mn) - (let ((t (peek ps))) - (cond - ((not (eq? (tok-kind t) 'PUNCT)) #t) - (else - (let ((bp (binop-bp-of (tok-value t)))) - (cond - ((not bp) #t) - ((< (car bp) mn) #t) - (else - (let ((op (tok-value t)) (rb (cdr bp))) - (advance ps) - (cond - ((eq? op 'comma) - ;; lhs has been parsed; discard it and evaluate rhs. - ;; Result of the comma expr is the rhs's rval. - (cg-pop (ps-cg ps)) - (parse-expr-bp ps rb) (rval! ps)) - ((eq? op 'assign) - (parse-expr-bp ps rb) (rval! ps) - (cg-assign (ps-cg ps))) - ((compound-op op) - (let ((b (compound-op op))) - (cg-dup (ps-cg ps)) - (cg-load (ps-cg ps)) - (parse-expr-bp ps rb) (rval! ps) - (cg-arith-conv (ps-cg ps)) - (cg-binop (ps-cg ps) b) - (cg-assign (ps-cg ps)))) - ((eq? op 'qmark) - (rval! ps) - (cg-ifelse-merge (ps-cg ps) - (lambda () - (parse-expr-bp ps 0) (rval! ps)) - (lambda () - (expect-punct ps 'colon) - (parse-expr-bp ps rb) (rval! ps)))) - ((eq? op 'land) - (rval! ps) - ;; Both branches must push i32 0/1. Right side is - ;; coerced via `cg-cast bool` so the merge slot - ;; carries i32 (per §H.2). - (cg-ifelse-merge (ps-cg ps) - (lambda () - (parse-expr-bp ps rb) (rval! ps) - (cg-cast (ps-cg ps) %t-bool) - (cg-cast (ps-cg ps) %t-i32)) - (lambda () - (cg-push-imm (ps-cg ps) %t-i32 0)))) - ((eq? op 'lor) - (rval! ps) - (cg-ifelse-merge (ps-cg ps) - (lambda () - (cg-push-imm (ps-cg ps) %t-i32 1)) - (lambda () - (parse-expr-bp ps rb) (rval! ps) - (cg-cast (ps-cg ps) %t-bool) - (cg-cast (ps-cg ps) %t-i32)))) - (else - (rval! ps) (cg-promote (ps-cg ps)) - (parse-expr-bp ps rb) (rval! ps) - (cg-promote (ps-cg ps)) - (cg-arith-conv (ps-cg ps)) - (cg-binop (ps-cg ps) (punct-to-cgop op)))) - (parse-binary-rhs ps mn))))))))) - -(define (parse-unary ps) - (pmatch (peek ps) - (($ tok? (kind PUNCT) (value amp)) - (advance ps) (parse-unary ps) - (cg-take-addr (ps-cg ps))) - (($ tok? (kind PUNCT) (value star)) - (advance ps) (parse-unary ps) (rval! ps) - (cg-push-deref (ps-cg ps))) - (($ tok? (kind PUNCT) (value plus)) - (advance ps) (parse-unary ps) - (rval! ps) (cg-promote (ps-cg ps))) - (($ tok? (kind PUNCT) (value minus)) - (advance ps) (parse-unary ps) - (rval! ps) (cg-promote (ps-cg ps)) - (cg-unop (ps-cg ps) 'neg)) - (($ tok? (kind PUNCT) (value tilde)) - (advance ps) (parse-unary ps) - (rval! ps) (cg-promote (ps-cg ps)) - (cg-unop (ps-cg ps) 'bnot)) - (($ tok? (kind PUNCT) (value bang)) - (advance ps) (parse-unary ps) (rval! ps) - (cg-unop (ps-cg ps) 'lnot)) - (($ tok? (kind PUNCT) (value inc)) - (advance ps) (parse-unary ps) - (cg-dup (ps-cg ps)) - (cg-load (ps-cg ps)) - (cg-push-imm (ps-cg ps) %t-i32 1) - (cg-binop (ps-cg ps) 'add) (cg-assign (ps-cg ps))) - (($ tok? (kind PUNCT) (value dec)) - (advance ps) (parse-unary ps) - (cg-dup (ps-cg ps)) - (cg-load (ps-cg ps)) - (cg-push-imm (ps-cg ps) %t-i32 1) - (cg-binop (ps-cg ps) 'sub) (cg-assign (ps-cg ps))) - (($ tok? (kind PUNCT) (value lparen)) (parse-cast-or-unary ps)) - (($ tok? (kind KW) (value sizeof)) - (advance ps) - (cond - ((at-punct? ps 'lparen) - (advance ps) - (cond - ((token-is-decl? ps) - (let* ((sp (parse-decl-spec ps)) - (p (parse-declarator ps (cdr sp))) - (ty (cdr p))) - (expect-punct ps 'rparen) - (cg-push-imm (ps-cg ps) %t-u64 - (max (ctype-size ty) 0)))) - (else - (parse-expr ps) (expect-punct ps 'rparen) - (let* ((tp (cg-top (ps-cg ps))) - (sz (max (ctype-size (opnd-type tp)) 0))) - (cg-pop (ps-cg ps)) - (cg-push-imm (ps-cg ps) %t-u64 sz))))) - (else (parse-unary ps) - (let* ((tp (cg-top (ps-cg ps))) - (sz (max (ctype-size (opnd-type tp)) 0))) - (cg-pop (ps-cg ps)) - (cg-push-imm (ps-cg ps) %t-u64 sz))))) - (else (parse-postfix ps)))) - -(define (token-is-decl? ps) - (pmatch (peek ps) - (($ tok? (kind KW) (value ,v)) - (or (eq? v 'void) (eq? v 'char) (eq? v 'short) (eq? v 'int) - (eq? v 'long) (eq? v 'signed) (eq? v 'unsigned) - (eq? v '_Bool) (eq? v 'struct) (eq? v 'union) - (eq? v 'enum) (eq? v 'const) (eq? v 'volatile) - (eq? v 'restrict) (eq? v 'inline))) - (($ tok? (kind IDENT) (value ,n)) (typedef? ps n)) - (else #f))) - -(define (parse-cast-or-unary ps) - (pmatch (peek2 ps) - (($ tok? (kind KW) (value ,v)) - (guard (or (eq? v 'void) (eq? v 'char) (eq? v 'short) - (eq? v 'int) (eq? v 'long) (eq? v 'signed) - (eq? v 'unsigned) (eq? v '_Bool) - (eq? v 'struct) (eq? v 'union) (eq? v 'enum) - (eq? v 'const) (eq? v 'volatile) - (eq? v 'restrict))) - (advance ps) - (let* ((sp (parse-decl-spec ps)) - (p (parse-declarator ps (cdr sp))) - (ty (cdr p))) - (expect-punct ps 'rparen) - (parse-unary ps) - ;; Cast operand undergoes lvalue conversion first (C semantics): - ;; arrays decay to pointers, lvals become rvals. cg-cast then - ;; bit-casts the resulting rval to the target type. - (rval! ps) - (cg-cast (ps-cg ps) ty))) - (($ tok? (kind IDENT) (value ,n)) - (guard (typedef? ps n)) - (advance ps) - (let* ((sp (parse-decl-spec ps)) - (p (parse-declarator ps (cdr sp))) - (ty (cdr p))) - (expect-punct ps 'rparen) - (parse-unary ps) - (rval! ps) - (cg-cast (ps-cg ps) ty))) - (else (advance ps) (parse-expr ps) - (expect-punct ps 'rparen) - (parse-postfix-rest ps)))) - -(define (parse-postfix ps) - (parse-primary ps) (parse-postfix-rest ps)) - -(define (parse-postfix-rest ps) - (let lp () - (pmatch (peek ps) - (($ tok? (kind PUNCT) (value lbrack)) - (advance ps) (rval! ps) - (parse-expr ps) (rval! ps) - (expect-punct ps 'rbrack) - (cg-binop (ps-cg ps) 'add) - (cg-push-deref (ps-cg ps)) (lp)) - (($ tok? (kind PUNCT) (value lparen)) - (advance ps) (rval-not-fn! ps) - (let* ((fn-ty (call-fn-type (ps-cg ps))) - (n (parse-call-args ps fn-ty)) - ;; has-result? = #f for known void returns. Skips the - ;; wasted ST a0 → frame-slot spill that cg-call would - ;; otherwise emit for void calls. - (has-result? - (cond - ((not fn-ty) #t) - ((eq? (ctype-kind (car (ctype-ext fn-ty))) 'void) #f) - (else #t)))) - (expect-punct ps 'rparen) - (cg-call (ps-cg ps) n has-result?) - ;; Maintain parse's "one rval per expression" invariant so - ;; comma / parse-expr-stmt / for-init/step pop sites stay - ;; simple. The placeholder is vstack-only and never - ;; materialized (cg-pop is a vstack op, no emit). - (cond ((not has-result?) - (cg-push-imm (ps-cg ps) %t-i32 0))) - (lp))) - (($ tok? (kind PUNCT) (value dot)) - (advance ps) - (pmatch (advance ps) - (($ tok? (kind IDENT) (value ,n)) - (cg-push-field (ps-cg ps) n) (lp)) - (($ tok? (loc ,l)) (die l "expected field name")))) - (($ tok? (kind PUNCT) (value arrow)) - (advance ps) - (pmatch (advance ps) - (($ tok? (kind IDENT) (value ,n)) - ;; ptr -> field: load the pointer to rval, deref to reach - ;; the struct lval, then push the field. - (rval! ps) - (cg-push-deref (ps-cg ps)) - (cg-push-field (ps-cg ps) n) (lp)) - (($ tok? (loc ,l)) (die l "expected field name")))) - (($ tok? (kind PUNCT) (value inc)) - (advance ps) - (cg-postinc (ps-cg ps)) (lp)) - (($ tok? (kind PUNCT) (value dec)) - (advance ps) - (cg-postdec (ps-cg ps)) (lp)) - (else #t)))) - -;; call-fn-type cg -> ctype-or-#f -;; The function operand sits at the top of the vstack when -;; parse-call-args runs (just after rval-not-fn!). Its type may be -;; `fn` directly (named callee) or `ptr -> fn` (function pointer). -;; Returns the underlying `fn` ctype, or #f if the operand isn't -;; recognizably callable (callsite still works — no per-arg cast). -(define (call-fn-type cg) - (let* ((tp (cg-top cg))) - (cond - ((not tp) #f) - (else - (let* ((ty (opnd-type tp)) - (k (ctype-kind ty))) - (cond - ((eq? k 'fn) ty) - ((eq? k 'ptr) - (let ((pe (ctype-ext ty))) - (cond ((and pe (eq? (ctype-kind pe) 'fn)) pe) - (else #f)))) - (else #f))))))) - -;; param-types-of fn-ty -> (params variadic?) with a #f fallback. -(define (call-fn-param-info fn-ty) - (cond - ((not fn-ty) (cons '() #f)) - (else - (let ((ext (ctype-ext fn-ty))) - (cons (cadr ext) (car (cddr ext))))))) - -;; parse-call-args ps fn-ty -> arg-count -;; Casts each fixed arg to the declared param type (CC.md §K.5). -;; For variadic args (index >= named-arg count, when variadic? = #t) -;; applies cg-promote (CC.md §G.1). -(define (parse-call-args ps fn-ty) - (cond - ((at-punct? ps 'rparen) 0) - (else - (let* ((info (call-fn-param-info fn-ty)) - (params (car info)) - (var? (cdr info)) - (nfix (length params))) - (let lp ((n 0) (rem params)) - (parse-expr-bp ps 4) (rval! ps) - (cond - ;; Fixed-arg: cast to declared param type. param entry shape - ;; is (name . ctype) per cg-fn-begin's contract. - ((not (null? rem)) - (cg-cast (ps-cg ps) (cdr (car rem)))) - ;; Variadic position (n >= nfix and var? is true): promote. - (var? - (cg-promote (ps-cg ps)))) - (let ((m (+ n 1)) - (rest (if (null? rem) '() (cdr rem)))) - (cond ((at-punct? ps 'comma) (advance ps) (lp m rest)) - (else m)))))))) - -;; -------------------------------------------------------------------- -;; __builtin_va_* (§G.2). va_list / va_start / va_arg / va_end in -;; <stdarg.h> alias these. Each is parsed as: name '(' args ')'. -;; va_start(ap, last) — last is parsed and discarded; cg only needs -;; the variadic-first-slot offset, which it already tracks. -;; va_arg(ap, T) — T is a type-name; result rval has that type. -;; va_end(ap) — no-op codegen; just consumes ap. -;; -;; Pushes a single imm 0 for va_start / va_end so they fit as -;; expression statements; va_arg pushes the rval. -;; -------------------------------------------------------------------- -(define (parse-builtin-va-start ps) - (advance ps) ; IDENT - (expect-punct ps 'lparen) - (parse-expr-bp ps 4) ; ap (must be lval) - (expect-punct ps 'comma) - ;; "last" is parsed for syntactic completeness then dropped — cg - ;; doesn't need it; the variadic-first-slot was determined at - ;; cg-fn-begin/v time. - (parse-expr-bp ps 4) (cg-pop (ps-cg ps)) - (expect-punct ps 'rparen) - (cg-va-start (ps-cg ps)) - ;; Push a placeholder rval so the call expression has a value - ;; (matches va_start's "void" but our parser expects all - ;; expressions to leave one rval). - (cg-push-imm (ps-cg ps) %t-i32 0)) - -(define (parse-builtin-va-arg ps) - (advance ps) ; IDENT - (expect-punct ps 'lparen) - (parse-expr-bp ps 4) ; ap (lval) - (expect-punct ps 'comma) - (let* ((sp (parse-decl-spec ps)) - (p (parse-declarator ps (cdr sp))) - (ty (cdr p))) - (expect-punct ps 'rparen) - (cg-va-arg (ps-cg ps) ty))) - -(define (parse-builtin-va-end ps) - (advance ps) ; IDENT - (expect-punct ps 'lparen) - (parse-expr-bp ps 4) ; ap - (expect-punct ps 'rparen) - (cg-va-end (ps-cg ps)) - (cg-push-imm (ps-cg ps) %t-i32 0)) - -(define (parse-primary ps) - (let ((t (peek ps))) - (pmatch t - (($ tok? (kind INT) (value ,n)) - (advance ps) - (cg-push-imm (ps-cg ps) %t-i32 n)) - (($ tok? (kind CHAR) (value ,c)) - (advance ps) - (cg-push-imm (ps-cg ps) %t-i8 c)) - (($ tok? (kind STR) (value ,s)) - (advance ps) - (cg-push-string (ps-cg ps) s)) - (($ tok? (kind IDENT) (value ,n)) - (cond - ((bv= n "__builtin_va_start") (parse-builtin-va-start ps)) - ((bv= n "__builtin_va_arg") (parse-builtin-va-arg ps)) - ((bv= n "__builtin_va_end") (parse-builtin-va-end ps)) - (else - (let ((sm (scope-lookup ps n))) - (advance ps) - (cond - ((not sm) (die (tok-loc t) "undecl" n)) - ((eq? (sym-kind sm) 'enum-const) - (cg-push-imm (ps-cg ps) %t-i32 (sym-slot sm))) - (else (cg-push-sym (ps-cg ps) sm))))))) - (($ tok? (kind PUNCT) (value lparen)) - (advance ps) (parse-expr ps) (expect-punct ps 'rparen)) - (else (die (tok-loc t) "unexp" (tok-value t)))))) - -(define (rval! ps) - (let ((tp (cg-top (ps-cg ps)))) - (cond ((and tp (opnd? tp) (opnd-lval? tp)) - (cg-load (ps-cg ps))) - (else #t)))) - -(define (rval-not-fn! ps) - (let ((tp (cg-top (ps-cg ps)))) - (cond ((and tp (opnd? tp) (opnd-lval? tp) - (not (ctype-is-fn? (opnd-type tp)))) - (cg-load (ps-cg ps))) - (else #t)))) diff --git a/cc/pp.scm b/cc/pp.scm @@ -1,805 +0,0 @@ -;; cc/pp.scm — token list -> expanded token list. -;; Realizes docs/CC-INTERNALS.md §pp.scm. Hide-set per C11 6.10.3.4. -;; #include rejected (CC.md §Toolchain envelope). - -;; --- helpers (TODO: promote to util.scm if shared more broadly) --- -(define (%pp-bv-mem? x xs) - (cond ((null? xs) #f) - ((bv= x (car xs)) #t) - (else (%pp-bv-mem? x (cdr xs))))) - -(define (%pp-bv-union a b) - (cond ((null? a) b) - ((%pp-bv-mem? (car a) b) (%pp-bv-union (cdr a) b)) - (else (cons (car a) (%pp-bv-union (cdr a) b))))) - -(define (%pp-with-hide t hide) - (%tok (tok-kind t) (tok-value t) (tok-loc t) hide)) -(define (%pp-with-loc t loc) - (%tok (tok-kind t) (tok-value t) loc (tok-hide t))) - -;; --- pp-state (private record) --- -;; cond-stack: list of (active? . has-taken?). Outer-active gating is -;; computed by walking the stack rather than encoding it in frames. -(define-record-type pp-state - (%pp-state macros cond-stack cur-file line-delta) - pp-state? - (macros pps-macros pps-macros-set!) - (cond-stack pps-cond-stack pps-cond-stack-set!) - (cur-file pps-cur-file pps-cur-file-set!) - (line-delta pps-line-delta pps-line-delta-set!)) - -(define (%pp-make-state defs) (%pp-state defs '() #f 0)) - -(define (%pp-active? state) - (let loop ((xs (pps-cond-stack state))) - (cond ((null? xs) #t) - ((not (car (car xs))) #f) - (else (loop (cdr xs)))))) - -;; Active for the *parent* of the top frame (used by elif/else). -(define (%pp-parent-active? state) - (let ((cs (pps-cond-stack state))) - (cond ((null? cs) #t) - (else - (let loop ((xs (cdr cs))) - (cond ((null? xs) #t) - ((not (car (car xs))) #f) - (else (loop (cdr xs))))))))) - -;; --- token classification --- -(define (%pp-eof? t) (eq? (tok-kind t) 'EOF)) -(define (%pp-nl? t) (eq? (tok-kind t) 'NL)) -(define (%pp-hash? t) (eq? (tok-kind t) 'HASH)) -(define (%pp-ident? t) (eq? (tok-kind t) 'IDENT)) -(define (%pp-int? t) (eq? (tok-kind t) 'INT)) -(define (%pp-punct? t pname) - (and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) pname))) -(define (%pp-ident-name? t name-bv) - (and (%pp-ident? t) (bv= (tok-value t) name-bv))) -(define (%pp-skip-ws toks) toks) - -;; --- built-in macro names --- -(define %pp-bv-FILE "__FILE__") -(define %pp-bv-LINE "__LINE__") -(define %pp-bv-STDC "__STDC__") -(define %pp-bv-LISPCC "__LISPCC__") -(define %pp-bv-DATE "__DATE__") -(define %pp-bv-TIME "__TIME__") -(define %pp-bv-STDC-VERSION "__STDC_VERSION__") -(define %pp-bv-STDC-HOSTED "__STDC_HOSTED__") -(define %pp-bv-VA-ARGS "__VA_ARGS__") -(define %pp-bv-defined "defined") - -;; Fixed values for reproducibility — we don't read the wall clock. -(define %pp-bv-DATE-VALUE "Jan 1 1970") -(define %pp-bv-TIME-VALUE "00:00:00") - -(define (%pp-builtin? name) - (or (bv= name %pp-bv-FILE) (bv= name %pp-bv-LINE) - (bv= name %pp-bv-STDC) (bv= name %pp-bv-LISPCC) - (bv= name %pp-bv-DATE) (bv= name %pp-bv-TIME) - (bv= name %pp-bv-STDC-VERSION) (bv= name %pp-bv-STDC-HOSTED))) - -(define (%pp-expand-builtin name loc state) - (let* ((file (or (pps-cur-file state) (loc-file loc))) - (line (+ (loc-line loc) (pps-line-delta state))) - (col (loc-col loc)) - (here (%loc file line col))) - (cond - ((bv= name %pp-bv-FILE) (list (%tok 'STR file here '()))) - ((bv= name %pp-bv-LINE) (list (%tok 'INT line here '()))) - ((bv= name %pp-bv-STDC) (list (%tok 'INT 1 here '()))) - ((bv= name %pp-bv-LISPCC) (list (%tok 'INT 1 here '()))) - ((bv= name %pp-bv-DATE) (list (%tok 'STR %pp-bv-DATE-VALUE here '()))) - ((bv= name %pp-bv-TIME) (list (%tok 'STR %pp-bv-TIME-VALUE here '()))) - ((bv= name %pp-bv-STDC-VERSION) (list (%tok 'INT 199901 here '()))) - ((bv= name %pp-bv-STDC-HOSTED) (list (%tok 'INT 1 here '()))) - (else (die loc "internal: not a builtin" name))))) - -;; --- buf-list: simple reversed-list buffer of toks --- -(define-record-type buf-list - (%buf-list xs) - buf-list? - (xs buf-list-xs buf-list-xs-set!)) -(define (make-buf-list) (%buf-list '())) -(define (buf-list-push! b t) (buf-list-xs-set! b (cons t (buf-list-xs b)))) -(define (buf-list-push-many! b ts) - (let loop ((ts ts)) - (cond ((null? ts) #t) - (else (buf-list-push! b (car ts)) (loop (cdr ts)))))) -(define (buf-list-flush b) (reverse (buf-list-xs b))) - -;; Translation phase 6: concatenate adjacent string literals. The merged -;; token keeps the loc and hide-set of the first; values are byte-appended. -(define (%pp-merge-adjacent-strs toks) - (let loop ((toks toks) (acc '())) - (cond - ((null? toks) (reverse acc)) - ((and (not (null? acc)) - (eq? (tok-kind (car toks)) 'STR) - (eq? (tok-kind (car acc)) 'STR)) - (let* ((prev (car acc)) - (cur (car toks)) - (merged (%tok 'STR - (bytevector-append (tok-value prev) (tok-value cur)) - (tok-loc prev) - (tok-hide prev)))) - (loop (cdr toks) (cons merged (cdr acc))))) - (else (loop (cdr toks) (cons (car toks) acc)))))) - -;; --- pp-expand: top-level driver --- -(define (pp-expand toks initial-defines) - (let ((state (%pp-make-state initial-defines)) - (out (make-buf-list))) - (let loop ((toks toks)) - (cond - ((null? toks) (die #f "pp-expand: missing EOF token")) - ((%pp-eof? (car toks)) - (cond ((not (null? (pps-cond-stack state))) - (die (tok-loc (car toks)) "unterminated #if/#ifdef/#ifndef")) - (else - (buf-list-push! out (car toks)) - (%pp-merge-adjacent-strs (buf-list-flush out))))) - ((%pp-nl? (car toks)) (loop (cdr toks))) - ((%pp-hash? (car toks)) - (let* ((lr (%pp-take-line (cdr toks))) - (line (car lr)) (rest (cdr lr))) - (%pp-dispatch-directive (car toks) line state out) - (loop rest))) - (else - (let* ((lr (%pp-take-line toks)) - (line (car lr)) (rest (cdr lr))) - (cond ((%pp-active? state) - (%pp-emit-expanded line state out)) - (else #t)) - (loop rest))))))) - -;; Take tokens up to (not including) the next NL or EOF. NL is consumed; -;; EOF is left in the stream so the driver sees it next. -(define (%pp-take-line toks) - (let loop ((toks toks) (acc '())) - (cond - ((null? toks) (cons (reverse acc) toks)) - ((%pp-eof? (car toks)) (cons (reverse acc) toks)) - ((%pp-nl? (car toks)) (cons (reverse acc) (cdr toks))) - (else (loop (cdr toks) (cons (car toks) acc)))))) - -;; --- directive dispatch --- -;; pmatch-based on the directive name bv. bv literals match by equal?. -;; Directive name can arrive as IDENT (most cases) or KW (`if` and `else` -;; are C keywords promoted by lex; their KW symbol values map back to bv -;; via symbol->string). -(define (%pp-directive-name t) - (cond ((eq? (tok-kind t) 'IDENT) (tok-value t)) - ((eq? (tok-kind t) 'KW) (symbol->string (tok-value t))) - (else #f))) - -(define (%pp-dispatch-directive hash-tok line state out) - (let ((line (%pp-skip-ws line))) - (cond - ((null? line) #t) ; bare `#` line — null directive - ((%pp-directive-name (car line)) - (let ((name (%pp-directive-name (car line))) - (rest (cdr line)) - (loc (tok-loc (car line)))) - (pmatch name - ("define" (cond ((%pp-active? state) (%pp-do-define rest state)) (else #t))) - ("undef" (cond ((%pp-active? state) (%pp-do-undef rest state)) (else #t))) - ("if" (%pp-do-if rest state)) - ("ifdef" (%pp-do-ifdef rest state)) - ("ifndef" (%pp-do-ifndef rest state)) - ("elif" (%pp-do-elif rest state)) - ("else" (%pp-do-else rest state)) - ("endif" (%pp-do-endif rest state)) - ("error" (cond ((%pp-active? state) - (%pp-do-error (cons (car line) rest) state)) - (else #t))) - ("line" (cond ((%pp-active? state) (%pp-do-line rest state)) (else #t))) - ("pragma" (cond ((%pp-active? state) (%pp-do-pragma rest state)) (else #t))) - ("include" (cond ((%pp-active? state) (%pp-do-include rest state)) (else #t))) - (else (die loc "unknown preprocessor directive" name))))) - (else - (die (tok-loc (car line)) "expected directive name after '#'" - (tok-kind (car line))))))) - -;; --- #define --- -;; function-like vs object-like is decided by an immediately-adjacent `(`. -;; "Adjacent" = column of `(` equals column of name + length of name. -(define (%pp-do-define line state) - (cond - ((null? line) (die #f "#define requires a macro name")) - ((not (%pp-ident? (car line))) - (die (tok-loc (car line)) "#define: expected identifier")) - (else - (let* ((nt (car line)) (name (tok-value nt)) (rest (cdr line))) - (cond - ((and (not (null? rest)) - (%pp-punct? (car rest) 'lparen) - (= (loc-col (tok-loc (car rest))) - (+ (loc-col (tok-loc nt)) - (bytevector-length name)))) - (%pp-define-fn name (cdr rest) (tok-loc nt) state)) - (else - (let ((m (%macro 'obj '() rest))) - (pps-macros-set! state (alist-set name m (pps-macros state)))))))))) - -(define (%pp-define-fn name post-lparen nloc state) - (let loop ((toks post-lparen) (params '()) (variadic? #f)) - (cond - ((null? toks) (die nloc "#define: unterminated parameter list")) - ((%pp-punct? (car toks) 'rparen) - (let* ((body (cdr toks)) - (kind (if variadic? 'fn-vararg 'fn)) - (m (%macro kind (reverse params) body))) - (pps-macros-set! state (alist-set name m (pps-macros state))))) - ((%pp-punct? (car toks) 'ellipsis) - (let ((rest (cdr toks))) - (cond - ((null? rest) (die (tok-loc (car toks)) "#define: '...' must precede ')'")) - ((%pp-punct? (car rest) 'rparen) (loop rest params #t)) - (else (die (tok-loc (car rest)) "#define: garbage after '...'"))))) - ((null? params) - (cond - ((%pp-ident? (car toks)) - (loop (cdr toks) (cons (tok-value (car toks)) params) #f)) - (else (die (tok-loc (car toks)) "#define: expected parameter name")))) - (else - (cond - ((%pp-punct? (car toks) 'comma) - (let ((after (cdr toks))) - (cond - ((null? after) (die (tok-loc (car toks)) "#define: trailing ','")) - ((%pp-punct? (car after) 'ellipsis) - (let ((aa (cdr after))) - (cond - ((and (not (null? aa)) (%pp-punct? (car aa) 'rparen)) - (loop aa params #t)) - (else (die (tok-loc (car after)) - "#define: '...' must precede ')'"))))) - ((%pp-ident? (car after)) - (loop (cdr after) (cons (tok-value (car after)) params) #f)) - (else - (die (tok-loc (car after)) - "#define: expected parameter name after ','"))))) - (else (die (tok-loc (car toks)) - "#define: expected ',' or ')' in parameter list"))))))) - -;; --- #undef --- -(define (%pp-do-undef line state) - (cond - ((null? line) (die #f "#undef requires a macro name")) - ((not (%pp-ident? (car line))) - (die (tok-loc (car line)) "#undef: expected identifier")) - (else - (pps-macros-set! state - (%pp-alist-drop (tok-value (car line)) (pps-macros state)))))) - -(define (%pp-alist-drop key al) - (cond ((null? al) '()) - ((bv= (car (car al)) key) (cdr al)) - (else (cons (car al) (%pp-alist-drop key (cdr al)))))) - -;; --- #if / #ifdef / #ifndef / #elif / #else / #endif --- -(define (%pp-do-if line state) - (cond - ((not (%pp-active? state)) - (pps-cond-stack-set! state (cons (cons #f #f) (pps-cond-stack state)))) - (else - (let* ((v (pp-eval-cexpr line (pps-macros state))) - (a? (not (= v 0)))) - (pps-cond-stack-set! state (cons (cons a? a?) (pps-cond-stack state))))))) - -(define (%pp-do-ifdef line state) - (cond - ((not (%pp-active? state)) - (pps-cond-stack-set! state (cons (cons #f #f) (pps-cond-stack state)))) - (else - (let ((d? (%pp-defined? (%pp-name-of-single line) state))) - (pps-cond-stack-set! state - (cons (cons d? d?) (pps-cond-stack state))))))) - -(define (%pp-do-ifndef line state) - (cond - ((not (%pp-active? state)) - (pps-cond-stack-set! state (cons (cons #f #f) (pps-cond-stack state)))) - (else - (let ((a? (not (%pp-defined? (%pp-name-of-single line) state)))) - (pps-cond-stack-set! state - (cons (cons a? a?) (pps-cond-stack state))))))) - -(define (%pp-name-of-single line) - (cond - ((null? line) (die #f "#ifdef/#ifndef: missing identifier")) - ((not (%pp-ident? (car line))) - (die (tok-loc (car line)) "#ifdef/#ifndef: expected identifier")) - (else (tok-value (car line))))) - -(define (%pp-defined? name state) - (or (alist-ref name (pps-macros state)) - (%pp-builtin? name) - #f)) - -(define (%pp-do-elif line state) - (let ((cs (pps-cond-stack state))) - (cond - ((null? cs) (die #f "#elif outside #if")) - (else - (let* ((top (car cs)) (rest (cdr cs)) - (taken? (cdr top)) - (par? (%pp-parent-active? state))) - (cond - ((or (not par?) taken?) - (pps-cond-stack-set! state (cons (cons #f taken?) rest))) - (else - (let* ((v (pp-eval-cexpr line (pps-macros state))) - (a? (not (= v 0)))) - (pps-cond-stack-set! state - (cons (cons a? (or a? taken?)) rest)))))))))) - -(define (%pp-do-else line state) - (let ((cs (pps-cond-stack state))) - (cond - ((null? cs) (die #f "#else outside #if")) - (else - (let* ((top (car cs)) (rest (cdr cs)) - (taken? (cdr top)) - (par? (%pp-parent-active? state))) - (cond - ((not par?) - (pps-cond-stack-set! state (cons (cons #f taken?) rest))) - (taken? - (pps-cond-stack-set! state (cons (cons #f #t) rest))) - (else - (pps-cond-stack-set! state (cons (cons #t #t) rest))))))))) - -(define (%pp-do-endif line state) - (let ((cs (pps-cond-stack state))) - (cond ((null? cs) (die #f "#endif outside #if")) - (else (pps-cond-stack-set! state (cdr cs)))))) - -;; --- #error --- -;; line[0] is the directive name "error"; the rest is the user message. -(define (%pp-do-error line state) - (let* ((msg-toks (if (null? line) '() (cdr line))) - (loc (if (null? line) #f (tok-loc (car line)))) - (msg (%pp-toks->display msg-toks))) - (die loc "#error" msg))) - -(define (%pp-toks->display toks) - (let loop ((toks toks) (acc '()) (first? #t)) - (cond - ((null? toks) (bv-cat (reverse acc))) - (else - (let ((p (%pp-tok->bv (car toks)))) - (loop (cdr toks) - (if first? (cons p acc) (cons p (cons " " acc))) - #f)))))) - -(define (%pp-tok->bv t) - (let ((k (tok-kind t)) (v (tok-value t))) - (cond - ((eq? k 'IDENT) v) - ((eq? k 'INT) (fixnum->bv v 10)) - ((eq? k 'STR) (%pp-quote-bytes v 34)) - ((eq? k 'CHAR) (%pp-quote-bytes (bv-of-byte v) 39)) - ((eq? k 'KW) (symbol->string v)) - ((eq? k 'PUNCT) (symbol->string v)) - (else "?")))) - -;; Reconstruct a string/char literal source spelling from cooked content. -;; Per C11 6.10.3.2: insert `\` before each `"` and `\` (or `'` for char). -;; `delim` is 34 for STR, 39 for CHAR. -(define (%pp-quote-bytes bv delim) - (let* ((n (bytevector-length bv)) - (delim-bv (bv-of-byte delim))) - (let loop ((i 0) (acc (list delim-bv))) - (cond - ((= i n) (bv-cat (reverse (cons delim-bv acc)))) - (else - (let ((b (bytevector-u8-ref bv i))) - (cond - ((or (= b delim) (= b 92)) - (loop (+ i 1) (cons (bv-of-byte b) (cons "\\" acc)))) - (else - (loop (+ i 1) (cons (bv-of-byte b) acc)))))))))) - -;; --- #line / #pragma / #include --- -;; Approximate #line: subsequent toks have line = (orig-line + delta), -;; where delta = (N - here-line - 1). Good enough for most cases. -(define (%pp-do-line line state) - (cond - ((null? line) (die #f "#line requires a line number")) - ((not (%pp-int? (car line))) - (die (tok-loc (car line)) "#line: expected integer")) - (else - (let* ((nt (car line)) (n (tok-value nt)) - (rest (cdr line)) - (here (loc-line (tok-loc nt)))) - (pps-line-delta-set! state (- n here 1)) - (cond - ((null? rest) #t) - ((eq? (tok-kind (car rest)) 'STR) - (pps-cur-file-set! state (tok-value (car rest)))) - (else (die (tok-loc (car rest)) - "#line: expected string after number"))))))) - -(define (%pp-do-pragma line state) #t) - -(define (%pp-do-include line state) - (die (if (null? line) #f (tok-loc (car line))) - "#include: file inclusion is handled upstream by pre-flatten")) - -;; --- macro expansion engine --- -;; Walk toks; for each IDENT, look up in macros / builtins. Hide-set: -;; if the name is in t.hide, don't expand. Otherwise expand and rescan -;; the produced body, with hide += {name}. -(define (%pp-emit-expanded toks state out) - (let loop ((toks toks)) - (cond - ((null? toks) #t) - (else - (let* ((t (car toks)) (rest (cdr toks))) - (cond - ((not (%pp-ident? t)) - (buf-list-push! out (%pp-relocate t state)) - (loop rest)) - (else - (let ((name (tok-value t))) - (cond - ((%pp-bv-mem? name (tok-hide t)) - (buf-list-push! out (%pp-relocate t state)) - (loop rest)) - ((%pp-builtin? name) - (buf-list-push-many! out - (%pp-expand-builtin name (tok-loc t) state)) - (loop rest)) - (else - (let ((m (alist-ref name (pps-macros state)))) - (cond - ((not m) - (buf-list-push! out (%pp-relocate t state)) - (loop rest)) - (else - (%pp-apply-macro t m rest state out loop)))))))))))))) - -(define (%pp-apply-macro t m rest state out cont) - (let ((kind (macro-kind m)) (name (tok-value t))) - (cond - ((eq? kind 'obj) - (let ((bodies (%pp-prepare-body (macro-body m) - (cons name (tok-hide t))))) - (%pp-emit-expanded bodies state out) - (cont rest))) - (else - (let ((after (%pp-skip-ws rest))) - (cond - ((or (null? after) (not (%pp-punct? (car after) 'lparen))) - (buf-list-push! out (%pp-relocate t state)) - (cont rest)) - (else - (let* ((ar (%pp-collect-args (cdr after) (tok-loc t))) - (args (car ar)) (rest2 (cdr ar)) - (params (macro-params m)) - (variadic? (eq? kind 'fn-vararg)) - (env (%pp-bind-args params args variadic? (tok-loc t))) - (sub (%pp-substitute (macro-body m) env (tok-loc t))) - (bodies (%pp-prepare-body sub - (cons name (tok-hide t))))) - (%pp-emit-expanded bodies state out) - (cont rest2))))))))) - -(define (%pp-prepare-body body extra-hide) - (map (lambda (t) - (%pp-with-hide t (%pp-bv-union extra-hide (tok-hide t)))) - body)) - -;; Collect comma-separated args. `toks` starts AFTER `(`. Returns -;; (args . rest), where args is a list of token-lists. -(define (%pp-collect-args toks call-loc) - (let loop ((toks toks) (depth 0) (cur '()) (args '())) - (cond - ((null? toks) (die call-loc "macro call: unterminated argument list")) - ((%pp-eof? (car toks)) - (die call-loc "macro call: unterminated argument list")) - ((and (= depth 0) (%pp-punct? (car toks) 'rparen)) - (let ((args* - (cond - ;; Empty parens count as one empty argument; bind-args - ;; degenerates this back to "no args" for 0-param macros. - ((and (null? args) (null? cur)) (list '())) - (else (reverse (cons (reverse cur) args)))))) - (cons args* (cdr toks)))) - ((and (= depth 0) (%pp-punct? (car toks) 'comma)) - (loop (cdr toks) 0 '() (cons (reverse cur) args))) - ((%pp-punct? (car toks) 'lparen) - (loop (cdr toks) (+ depth 1) (cons (car toks) cur) args)) - ((%pp-punct? (car toks) 'rparen) - (loop (cdr toks) (- depth 1) (cons (car toks) cur) args)) - (else - (loop (cdr toks) depth (cons (car toks) cur) args))))) - -;; Bind formals → token-lists (alist by bv key). Variadic gathers -;; trailing actuals into __VA_ARGS__, joined with synthetic commas. -(define (%pp-bind-args params args variadic? call-loc) - (let* ((np (length params)) (na (length args))) - (cond - (variadic? - (cond - ((< na np) (die call-loc "macro call: too few arguments")) - (else - (let loop ((ps params) (as args) (acc '())) - (cond - ((null? ps) - (alist-set %pp-bv-VA-ARGS (%pp-join-comma as) acc)) - (else - (loop (cdr ps) (cdr as) - (alist-set (car ps) (car as) acc)))))))) - (else - (cond - ((and (= np 0) (= na 1) (null? (car args))) '()) - ((not (= np na)) (die call-loc "macro call: argument count mismatch")) - (else - (let loop ((ps params) (as args) (acc '())) - (cond - ((null? ps) acc) - (else (loop (cdr ps) (cdr as) - (alist-set (car ps) (car as) acc))))))))))) - -(define (%pp-join-comma argss) - (cond - ((null? argss) '()) - ((null? (cdr argss)) (car argss)) - (else - (append (car argss) - (cons (%pp-synth-comma) (%pp-join-comma (cdr argss))))))) - -(define (%pp-synth-comma) - (%tok 'PUNCT 'comma (%loc "<expand>" 0 0) '())) - -;; Body substitution: walk body; replace param IDENTs with arg toks, -;; handle `#param` (stringize) and `a##b` (paste). For v1 we do not -;; pre-expand args before substitution; the rescan after substitution -;; catches the same expansions in practice. -(define (%pp-substitute body env call-loc) - (let loop ((body body) (out '())) - (cond - ((null? body) (reverse out)) - (else - (let ((t (car body)) (rest (cdr body))) - (cond - ((%pp-punct? t 'hash) - (cond - ((or (null? rest) (not (%pp-ident? (car rest)))) - (die (tok-loc t) "stringize: '#' must precede a parameter name")) - (else - (let* ((id (car rest)) (pn (tok-value id)) - (pt (alist-ref pn env))) - (cond - ((not pt) - (die (tok-loc id) "stringize: '#' operand must be a parameter" pn)) - (else - (let ((s (%tok 'STR (%pp-toks->display pt) (tok-loc t) '()))) - (loop (cdr rest) (cons s out))))))))) - ((%pp-punct? t 'paste) - (cond - ((null? out) (die (tok-loc t) "paste: '##' cannot start a body")) - ((null? rest) (die (tok-loc t) "paste: '##' cannot end a body")) - (else - (let* ((lhs (car out)) - (rt (car rest)) - (rhs-list - (cond - ((and (%pp-ident? rt) (alist-ref (tok-value rt) env)) - (alist-ref (tok-value rt) env)) - (else (list rt))))) - (cond - ((null? rhs-list) (loop (cdr rest) out)) - (else - (let* ((p (%pp-paste-tokens lhs (car rhs-list))) - (after (append (cdr rhs-list) (cdr rest)))) - (loop after (cons p (cdr out)))))))))) - ((%pp-ident? t) - (let* ((pn (tok-value t)) (pt (alist-ref pn env))) - (cond - ((not pt) (loop rest (cons t out))) - ((and (not (null? rest)) (%pp-punct? (car rest) 'paste)) - (cond - ((null? pt) (loop (cdr rest) out)) - (else (loop rest (append (reverse pt) out))))) - (else (loop rest (append (reverse pt) out)))))) - (else (loop rest (cons t out))))))))) - -;; Paste two tokens textually; reparse the result. -(define (%pp-paste-tokens lhs rhs) - (let ((lk (tok-kind lhs)) (rk (tok-kind rhs))) - (cond - ((and (eq? lk 'IDENT) (eq? rk 'IDENT)) - (%tok 'IDENT (bytevector-append (tok-value lhs) (tok-value rhs)) - (tok-loc lhs) (%pp-bv-union (tok-hide lhs) (tok-hide rhs)))) - ((and (eq? lk 'IDENT) (eq? rk 'INT)) - (%tok 'IDENT (bytevector-append (tok-value lhs) (fixnum->bv (tok-value rhs) 10)) - (tok-loc lhs) (%pp-bv-union (tok-hide lhs) (tok-hide rhs)))) - ((and (eq? lk 'INT) (eq? rk 'INT)) - (let* ((s (bytevector-append (fixnum->bv (tok-value lhs) 10) - (fixnum->bv (tok-value rhs) 10))) - (pr (bv->fixnum s 10))) - (cond - ((not (car pr)) (die (tok-loc lhs) "paste: cannot reparse as integer" s)) - (else (%tok 'INT (cdr pr) (tok-loc lhs) - (%pp-bv-union (tok-hide lhs) (tok-hide rhs))))))) - (else (die (tok-loc lhs) "paste: unsupported token kinds" lk rk))))) - -(define (%pp-relocate t state) - (cond - ((and (= (pps-line-delta state) 0) (not (pps-cur-file state))) t) - (else - (let* ((l (tok-loc t)) - (f (or (pps-cur-file state) (loc-file l))) - (ln (+ (loc-line l) (pps-line-delta state))) - (c (loc-col l))) - (%pp-with-loc t (%loc f ln c)))))) - -;; --- pp-eval-cexpr: #if expression evaluator --- -;; Steps: resolve `defined NAME`, macro-expand the rest, treat any -;; remaining IDENT as 0, parse with recursive descent. -;; -;; Arena boundary (test_93 A→B→C pattern). Everything between the mark -;; and the rewind is scratch: `s1`/`s2`/`s3` (each a fresh token list, -;; where `s2` runs the full macro-expansion engine), plus the recursive -;; parser's (val . rest) cons cell at every level. The result is a -;; fixnum, so no pre-allocated out cell is needed — `val` survives the -;; rewind by virtue of being an immediate. The error path goes through -;; `die` (which sys-exits), so no rewind there. -(define (pp-eval-cexpr toks macros) - (let ((mark (heap-mark))) - (let* ((state (%pp-state macros '() #f 0)) - (s1 (%pp-resolve-defined toks state)) - (s2 (%pp-expand-line s1 state)) - (s3 (%pp-idents-as-zero s2))) - (let* ((p (%pp-cx-expr s3)) - (val (car p)) (rest (cdr p))) - (cond - ((null? rest) (heap-rewind! mark) val) - (else (die (tok-loc (car rest)) "#if: garbage at end of expression" - (tok-kind (car rest))))))))) - -(define (%pp-expand-line toks state) - (let ((out (make-buf-list))) - (%pp-emit-expanded toks state out) - (buf-list-flush out))) - -(define (%pp-resolve-defined toks state) - (let loop ((toks toks) (acc '())) - (cond - ((null? toks) (reverse acc)) - ((%pp-ident-name? (car toks) %pp-bv-defined) - (let ((rest (cdr toks))) - (cond - ((null? rest) (die (tok-loc (car toks)) "defined: missing operand")) - ((%pp-ident? (car rest)) - (let ((v (if (%pp-defined? (tok-value (car rest)) state) 1 0))) - (loop (cdr rest) - (cons (%tok 'INT v (tok-loc (car toks)) '()) acc)))) - ((%pp-punct? (car rest) 'lparen) - (let ((after (cdr rest))) - (cond - ((or (null? after) (not (%pp-ident? (car after)))) - (die (tok-loc (car toks)) "defined: expected identifier")) - (else - (let ((aa (cdr after))) - (cond - ((or (null? aa) (not (%pp-punct? (car aa) 'rparen))) - (die (tok-loc (car toks)) "defined: expected ')'")) - (else - (let ((v (if (%pp-defined? (tok-value (car after)) state) 1 0))) - (loop (cdr aa) - (cons (%tok 'INT v (tok-loc (car toks)) '()) acc)))))))) )) - (else (die (tok-loc (car rest)) "defined: expected identifier or '('"))))) - (else (loop (cdr toks) (cons (car toks) acc)))))) - -(define (%pp-idents-as-zero toks) - (map (lambda (t) - (cond ((%pp-ident? t) (%tok 'INT 0 (tok-loc t) '())) - (else t))) - toks)) - -;; --- recursive-descent #if expression parser --- -;; Returns (value . rest). -(define (%pp-cx-expr toks) (%pp-cx-cond toks)) - -(define (%pp-cx-cond toks) - (let* ((p (%pp-cx-lor toks)) - (v (car p)) (rest (cdr p))) - (cond - ((and (not (null? rest)) (%pp-punct? (car rest) 'qmark)) - (let* ((p2 (%pp-cx-expr (cdr rest))) - (vt (car p2)) (after (cdr p2))) - (cond - ((or (null? after) (not (%pp-punct? (car after) 'colon))) - (die (if (null? after) #f (tok-loc (car after))) "?: missing ':'")) - (else - (let* ((p3 (%pp-cx-cond (cdr after))) - (vf (car p3)) (rest3 (cdr p3))) - (cons (if (not (= v 0)) vt vf) rest3)))))) - (else (cons v rest))))) - -(define (%pp-cx-binl next ops toks) - (let loop ((p (next toks))) - (let ((v (car p)) (rest (cdr p))) - (cond - ((null? rest) p) - (else - (let* ((tt (car rest)) - (hit (and (eq? (tok-kind tt) 'PUNCT) - (alist-ref/eq (tok-value tt) ops)))) - (cond - ((not hit) p) - (else - (let* ((p2 (next (cdr rest))) - (v2 (car p2)) (rest2 (cdr p2))) - (loop (cons (hit v v2) rest2))))))))))) - -(define (%pp-cx-lor toks) - (%pp-cx-binl %pp-cx-land - (list (cons 'lor (lambda (a b) (if (or (not (= a 0)) (not (= b 0))) 1 0)))) - toks)) -(define (%pp-cx-land toks) - (%pp-cx-binl %pp-cx-bor - (list (cons 'land (lambda (a b) (if (and (not (= a 0)) (not (= b 0))) 1 0)))) - toks)) -(define (%pp-cx-bor toks) (%pp-cx-binl %pp-cx-bxor (list (cons 'bar bit-or)) toks)) -(define (%pp-cx-bxor toks) (%pp-cx-binl %pp-cx-band (list (cons 'caret bit-xor)) toks)) -(define (%pp-cx-band toks) (%pp-cx-binl %pp-cx-eq (list (cons 'amp bit-and)) toks)) -(define (%pp-cx-eq toks) - (%pp-cx-binl %pp-cx-rel - (list (cons 'eq2 (lambda (a b) (if (= a b) 1 0))) - (cons 'ne (lambda (a b) (if (= a b) 0 1)))) - toks)) -(define (%pp-cx-rel toks) - (%pp-cx-binl %pp-cx-shift - (list (cons 'lt (lambda (a b) (if (< a b) 1 0))) - (cons 'le (lambda (a b) (if (<= a b) 1 0))) - (cons 'gt (lambda (a b) (if (> a b) 1 0))) - (cons 'ge (lambda (a b) (if (>= a b) 1 0)))) - toks)) -(define (%pp-cx-shift toks) - (%pp-cx-binl %pp-cx-add - (list (cons 'shl (lambda (a b) (arithmetic-shift a b))) - (cons 'shr (lambda (a b) (arithmetic-shift a (- 0 b))))) - toks)) -(define (%pp-cx-add toks) - (%pp-cx-binl %pp-cx-mul (list (cons 'plus +) (cons 'minus -)) toks)) -(define (%pp-cx-mul toks) - (%pp-cx-binl %pp-cx-unary - (list (cons 'star *) (cons 'slash quotient) (cons 'pct remainder)) - toks)) - -(define (%pp-cx-unary toks) - (cond - ((null? toks) (die #f "#if: unexpected end of expression")) - ((%pp-punct? (car toks) 'plus) (%pp-cx-unary (cdr toks))) - ((%pp-punct? (car toks) 'minus) - (let* ((p (%pp-cx-unary (cdr toks))) (v (car p)) (r (cdr p))) - (cons (- 0 v) r))) - ((%pp-punct? (car toks) 'bang) - (let* ((p (%pp-cx-unary (cdr toks))) (v (car p)) (r (cdr p))) - (cons (if (= v 0) 1 0) r))) - ((%pp-punct? (car toks) 'tilde) - (let* ((p (%pp-cx-unary (cdr toks))) (v (car p)) (r (cdr p))) - (cons (bit-not v) r))) - (else (%pp-cx-primary toks)))) - -(define (%pp-cx-primary toks) - (cond - ((null? toks) (die #f "#if: expected expression")) - ((%pp-int? (car toks)) (cons (tok-value (car toks)) (cdr toks))) - ((eq? (tok-kind (car toks)) 'CHAR) - (cons (tok-value (car toks)) (cdr toks))) - ((%pp-punct? (car toks) 'lparen) - (let* ((p (%pp-cx-expr (cdr toks))) (v (car p)) (r (cdr p))) - (cond - ((or (null? r) (not (%pp-punct? (car r) 'rparen))) - (die (if (null? r) #f (tok-loc (car r))) "#if: missing ')'")) - (else (cons v (cdr r)))))) - (else (die (tok-loc (car toks)) "#if: unexpected token" (tok-kind (car toks)))))) diff --git a/cc/util.scm b/cc/util.scm @@ -1,286 +0,0 @@ -;; cc/util.scm — leaf helpers. Depends only on the scheme1 prelude. -;; -;; Realization of docs/CC-INTERNALS.md §util.scm. Engineers may add -;; helpers here freely; the listed signatures are the load-bearing -;; surface other modules call. - -;; -------------------------------------------------------------------- -;; bytevector helpers (scheme1 strings ARE bytevectors) -;; -------------------------------------------------------------------- -(define (bv= a b) (bytevector=? a b)) - -(define (bv-prefix? p s) - ;; Is s a bv that starts with the bytes of p? - (let ((plen (bytevector-length p)) - (slen (bytevector-length s))) - (if (< slen plen) - #f - (let loop ((i 0)) - (cond ((= i plen) #t) - ((= (bytevector-u8-ref p i) (bytevector-u8-ref s i)) - (loop (+ i 1))) - (else #f)))))) - -(define (bv-find bv byte from) - ;; Linear scan for the first byte == `byte` at index >= from. - ;; Returns the index, or #f if not found. - (let ((n (bytevector-length bv))) - (let loop ((i from)) - (cond ((>= i n) #f) - ((= (bytevector-u8-ref bv i) byte) i) - (else (loop (+ i 1))))))) - -(define (bv-slice bv start end) - ;; Fresh copy of bytes in [start, end). bytevector-copy already does - ;; this in scheme1 (3-arg form returns a fresh bv). - (bytevector-copy bv start end)) - -(define (bv-of-byte b) (make-bytevector 1 b)) - -(define (bv-cat lst-of-bv) - ;; Concat a list of bytevectors with one allocation. bytevector-append - ;; is variadic, so apply does this in a single linear pass. - (apply bytevector-append lst-of-bv)) - -(define (bv->fixnum bv radix) - ;; (ok . val) per CC-INTERNALS: (#t . val) on parse, (#f . #f) on fail. - ;; string->number is pure and returns #f on parse failure (not the - ;; (ok . val) convention, since it's not a syscall). - (let ((n (string->number bv radix))) - (if n (cons #t n) (cons #f #f)))) - -(define (fixnum->bv n radix) (number->string n radix)) - -;; -------------------------------------------------------------------- -;; lists / alists -;; -------------------------------------------------------------------- -(define (alist-ref key al) - ;; equal? compare (intended for bv keys). The prelude's `assoc` uses - ;; eq?, so we roll our own for the equal? case. - (cond ((null? al) #f) - ((equal? (car (car al)) key) (cdr (car al))) - (else (alist-ref key (cdr al))))) - -(define (alist-ref/eq key al) - ;; eq? compare (for symbol keys). Reuses the prelude's assoc, which - ;; is eq?-based. - (let ((p (assoc key al))) - (if p (cdr p) #f))) - -(define (alist-set key val al) (cons (cons key val) al)) - -(define (alist-update key f al) - ;; Functional update by equal? key. If found, replace its value with - ;; (f old-val). If not found, prepend (cons key (f #f)) so callers - ;; can use this as upsert-with-default. - (let loop ((xs al) (acc '())) - (cond ((null? xs) - (cons (cons key (f #f)) (reverse acc))) - ((equal? (car (car xs)) key) - (append (reverse acc) - (cons (cons key (f (cdr (car xs)))) - (cdr xs)))) - (else (loop (cdr xs) (cons (car xs) acc)))))) - -(define (any p xs) - (cond ((null? xs) #f) - ((p (car xs)) #t) - (else (any p (cdr xs))))) - -(define (every p xs) - (cond ((null? xs) #t) - ((p (car xs)) (every p (cdr xs))) - (else #f))) - -(define (count p xs) - (let loop ((xs xs) (n 0)) - (cond ((null? xs) n) - ((p (car xs)) (loop (cdr xs) (+ n 1))) - (else (loop (cdr xs) n))))) - -;; -------------------------------------------------------------------- -;; ints -;; -------------------------------------------------------------------- -(define (min3 a b c) (min a (min b c))) -(define (align-up n k) - ;; round n up to the nearest multiple of k (k must be a power of 2) - (let ((mask (- k 1))) - (bit-and (+ n mask) (bit-not mask)))) - -;; -------------------------------------------------------------------- -;; output buffer (fixed-size pre-allocated byte storage) -;; -;; Every buf owns one bytevector of `cap` bytes, plus a write `offset`. -;; buf-push! is bytevector-copy! into storage — zero allocation per -;; push, no chunks list to chase. This is what makes per-function -;; heap-mark/heap-rewind! safe in cg: the destination buf is fixed- -;; storage (allocated once, lives pre-mark), so byte-level mutations -;; survive a rewind that discards the parse/cg scratch. -;; -;; Sizing knobs live in one place so they're easy to tune as inputs -;; grow. cg-init picks per-buf caps; the per-fn bufs are reused -;; across functions (reset, not re-allocated). -;; -------------------------------------------------------------------- - -;; Tuning constants — total fixed pre-allocation ≈ 12.27 MiB on a -;; 64 MiB heap. Bump these when a workload overflows; the buf-overflow -;; die() reports off/len/cap so misses are easy to diagnose. -;; -;; Each cap is a power of two. scheme1's bv_capacity_for rounds the -;; requested length up to the smallest power of two ≥ n, so asking for -;; 2^k bytes consumes exactly 2^k of heap. -(define %BUF-CAP-TEXT 8388608) ; 8 MiB: .text + entry stub -(define %BUF-CAP-DATA 2097152) ; 2 MiB: .data (strings, globals) -(define %BUF-CAP-BSS 2097152) ; 2 MiB: .bss -(define %BUF-CAP-FN 262144) ; 256 KiB: per-fn body asm -(define %BUF-CAP-PROLOGUE 16384) ; 16 KiB: per-fn prologue -(define %BUF-CAP-DEFAULT 65536) ; 64 KiB: make-buf fallback - -(define-record-type buf - (%buf storage offset cap) - buf? - (storage buf-storage) ; bv: pre-allocated, never resized - (offset buf-offset buf-offset-set!) ; fixnum: bytes written so far - (cap buf-cap)) ; fixnum: storage capacity - -(define (make-buf/cap cap) - (%buf (make-bytevector cap 0) 0 cap)) - -(define (make-buf) (make-buf/cap %BUF-CAP-DEFAULT)) - -(define (buf-push! b bv) - (let* ((n (bytevector-length bv)) - (off (buf-offset b)) - (newoff (+ off n))) - (cond - ((> newoff (buf-cap b)) - (die #f "buf overflow" off n (buf-cap b)))) - (bytevector-copy! (buf-storage b) off bv 0 n) - (buf-offset-set! b newoff))) - -(define (buf-flush b) - ;; Snapshot the used prefix as a fresh bv. One allocation; the - ;; underlying storage is unchanged. - (bytevector-copy (buf-storage b) 0 (buf-offset b))) - -(define (buf-reset! b) (buf-offset-set! b 0)) - -(define (buf-drain! dst src) - ;; Copy src's used bytes into dst at dst's current write head; reset - ;; src to empty. dst and src must be distinct bufs. - (let* ((slen (buf-offset src)) - (doff (buf-offset dst)) - (newoff (+ doff slen))) - (cond - ((> newoff (buf-cap dst)) - (die #f "buf-drain overflow" doff slen (buf-cap dst)))) - (bytevector-copy! (buf-storage dst) doff (buf-storage src) 0 slen) - (buf-offset-set! dst newoff) - (buf-offset-set! src 0))) - -;; -------------------------------------------------------------------- -;; diagnostics + I/O -;; -------------------------------------------------------------------- -(define (die loc msg . irritants) - ;; Format per CC-CONTRACTS §2.3: - ;; <file>:<line>:<col>: error: <msg>: <irritant> <irritant> ... - ;; When loc is #f, the "<file>:<line>:<col>: " prefix is omitted. - ;; irritants are written via display semantics (no quoting); format's - ;; ~a handles bv/fixnum/pair/symbol the same way display does. - ;; - ;; All output is built into a single bv and sent to fd 2 with one - ;; sys-write loop, so a partial write doesn't interleave fragments - ;; from a concurrent process. - (let* ((prefix (if loc - (format "~a:~d:~d: error: " - (loc-file loc) (loc-line loc) (loc-col loc)) - "error: ")) - (head (bytevector-append prefix (format "~a" msg))) - ;; Irritants get ": " before the first and " " between the rest. - (tail (if (null? irritants) - (list NL-BV) - (let walk ((xs irritants) (sep ": ") (acc '())) - (if (null? xs) - (reverse (cons NL-BV acc)) - (walk (cdr xs) - " " - (cons (format "~a" (car xs)) - (cons sep acc))))))) - (out (bv-cat (cons head tail)))) - (write-bv-fd 2 out) - (sys-exit 1))) - -(define (slurp-fd fd) - ;; Read fd to EOF. Uses BUFSIZE chunks (same constant the prelude's - ;; port layer uses); bv-concat-reverse builds the result in one - ;; allocation so a multi-MB tcc.c stays linear. - (let ((buf (make-bytevector BUFSIZE))) - (let loop ((acc '())) - (let ((r (sys-read fd buf 0 BUFSIZE))) - (cond ((not (car r)) - (die #f "slurp-fd: sys-read failed" (cdr r))) - ((zero? (cdr r)) - (bv-concat-reverse acc)) - (else - (loop (cons (bytevector-copy buf 0 (cdr r)) acc)))))))) - -(define (write-bv-fd fd bv) - ;; Full write or die. sys-write may write fewer bytes than requested; - ;; advance the offset and retry the unwritten tail. - ;; - ;; On failure we sys-exit directly instead of routing through `die` - ;; — `die` itself uses write-bv-fd, so a write failure to fd 2 must - ;; not recurse infinitely. Status 1 matches the contract for `die`. - (let ((len (bytevector-length bv))) - (let loop ((off 0)) - (if (= off len) - #t - (let ((r (sys-write fd bv off (- len off)))) - (cond ((not (car r)) (sys-exit 1)) - ((zero? (cdr r)) (sys-exit 1)) - (else (loop (+ off (cdr r)))))))))) - -;; -------------------------------------------------------------------- -;; debug logging -;; -;; Cheap sticky on/off: the cc compiler is single-threaded and short- -;; lived, so a top-level mutable flag is fine. Toggle via -;; (debug-log-on!) / (debug-log-off!). When on, (debug-log msg . irr) -;; writes one line to fd 2 in the same display-style format as `die`, -;; but doesn't abort. The intent is to trace heap usage between cc -;; phases (lex/pp/parse/cg-finish) without compile-time conditionals. -;; -------------------------------------------------------------------- -(define %debug-log-enabled #f) -(define (debug-log-on!) (set! %debug-log-enabled #t)) -(define (debug-log-off!) (set! %debug-log-enabled #f)) -(define (debug-log? ) %debug-log-enabled) - -(define (debug-log msg . irritants) - (cond - (%debug-log-enabled - (let* ((head (bytevector-append "[cc] " (format "~a" msg))) - (tail (if (null? irritants) - (list NL-BV) - (let walk ((xs irritants) (sep ": ") (acc '())) - (if (null? xs) - (reverse (cons NL-BV acc)) - (walk (cdr xs) - " " - (cons (format "~a" (car xs)) - (cons sep acc))))))) - (out (bv-cat (cons head tail)))) - (write-bv-fd 2 out))) - (else #t))) - -;; -------------------------------------------------------------------- -;; fresh-name generator (used for cg label counters, etc.) -;; -------------------------------------------------------------------- -(define (make-namer prefix) - ;; Returns a thunk; each call yields prefix0, prefix1, ... as a fresh - ;; bv. The counter lives in the closure's lexical environment; scheme1 - ;; closures heap-capture by reference, so set! on ctr is sticky. - (let ((ctr 0)) - (lambda () - (let ((s (bytevector-append prefix (number->string ctr 10)))) - (set! ctr (+ ctr 1)) - s)))) diff --git a/docs/CC-INTERNALS.md b/docs/CC-INTERNALS.md @@ -4,16 +4,18 @@ Companion to [CC.md](CC.md). CC.md says what we accept; this doc says how the implementation is organized so engineers can split work and test independently. -The compiler is one scheme1 program assembled from six files at build -time: +The compiler lives in one file, `cc/cc.scm`, that bundles every +module section in order; `cc/main.scm` is the one-liner entry point +that fires `cc-main`: ``` -build: catm cc/util.scm cc/data.scm cc/lex.scm cc/pp.scm cc/cg.scm cc/parse.scm cc/main.scm > cc/cc.scm -run: scheme1 < cc/cc.scm -- input.flat.c output.P1pp +build: catm build/$ARCH/cc/cc.scm scheme1/prelude.scm cc/cc.scm cc/main.scm +run: scheme1 build/$ARCH/cc/cc.scm input.flat.c output.P1pp ``` -(Driver shell-script details — argv plumbing, scheme1 prelude prepend -— belong in scripts/, not in this doc.) +Sections inside `cc.scm` are delimited by their original +`;; cc/<name>.scm —` headers (util, data, lex, pp, cg, parse, main), +so the layering below is still navigable by search. ## Module DAG @@ -25,16 +27,16 @@ data ────────┼─► lex ──► pp ──► parse └──────────────────────► cg ──► main ``` -- **util.scm** — leaf helpers; depends only on the scheme1 prelude. -- **data.scm** — record-type definitions used across modules. -- **lex.scm** — bytestream → token list. Pure function. -- **pp.scm** — token list → expanded token list. Pure function. -- **cg.scm** — codegen state and emission API. Mutates a `cg` record. -- **parse.scm** — token list + cg → P1pp output. Mutates `pstate` and +- **util** — leaf helpers; depends only on the scheme1 prelude. +- **data** — record-type definitions used across modules. +- **lex** — bytestream → token list. Pure function. +- **pp** — token list → expanded token list. Pure function. +- **cg** — codegen state and emission API. Mutates a `cg` record. +- **parse** — token list + cg → P1pp output. Mutates `pstate` and drives `cg`. -- **main.scm** — argv handling, file I/O, ties phases together. +- **main** — argv handling, file I/O, ties phases together. -Cycles are forbidden. parse.scm calls cg.scm but never the reverse. +Cycles are forbidden. The parse section calls cg but never the reverse. ## Feature workflow @@ -46,12 +48,14 @@ When adding any new codegen-touching feature, **always** in this order: `.expected-exit` (default `0`) and optional `.expected` stdout file. At this point the test fails — that's the spec. 2. **Implement the cg primitive(s).** Add or fix the API surface in - `cc/cg.scm`. Iterate until `make test SUITE=cc-cg` passes. + the cg section of `cc/cc.scm`. Iterate until `make test SUITE=cc-cg` + passes. 3. **cc fixture next.** Write a `tests/cc/<n>-name.c` that exercises the same feature from the C source side, with a `main` that exits with a known value. Add `.expected-exit` / `.expected`. Test fails — that's the spec for the parser. -4. **Implement the parse changes.** Wire C syntax through `cc/parse.scm` +4. **Implement the parse changes.** Wire C syntax through the parse + section of `cc/cc.scm` using the cg primitives from step 2. Iterate until `make test SUITE=cc` passes. diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh @@ -248,7 +248,7 @@ _cc_unit_suite() { } run_cc_util_suite() { - _cc_unit_suite cc-util expected "scheme1/prelude.scm cc/util.scm" + _cc_unit_suite cc-util expected "scheme1/prelude.scm cc/cc.scm" } # _cc_pipeline_suite <suite-name> <expected-ext> <layers> @@ -289,7 +289,7 @@ _cc_pipeline_suite() { run_cc_lex_suite() { _cc_pipeline_suite cc-lex expected-toks \ - "scheme1/prelude.scm cc/util.scm cc/data.scm cc/lex.scm tests/cc-lex/_run-lex.scm" + "scheme1/prelude.scm cc/cc.scm tests/cc-lex/_run-lex.scm" } # Two passes: .c fixtures via the lex+pp pipeline; the lone .scm fixture @@ -299,10 +299,10 @@ run_cc_lex_suite() { run_cc_pp_suite() { saved=$NAMES _cc_pipeline_suite cc-pp expected-toks \ - "scheme1/prelude.scm cc/util.scm cc/data.scm cc/lex.scm cc/pp.scm tests/cc-pp/_run-pp.scm" + "scheme1/prelude.scm cc/cc.scm tests/cc-pp/_run-pp.scm" NAMES=$saved _cc_unit_suite cc-pp expected \ - "scheme1/prelude.scm cc/util.scm cc/data.scm cc/pp.scm" + "scheme1/prelude.scm cc/cc.scm" } # _cc_runtime_suite <suite-name> <fixture-ext> <layers> [<fixture-as-arg?>] @@ -368,7 +368,7 @@ _cc_runtime_suite() { run_cc_cg_suite() { _cc_runtime_suite cc-cg scm \ - "scheme1/prelude.scm cc/util.scm cc/data.scm cc/cg.scm" 0 + "scheme1/prelude.scm cc/cc.scm" 0 } run_cc_suite() {