boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 6bc7fe027448d93fba459b87e00097a972817b6b
parent a86b719a54e31357eadbcd172dc3bd1776912ed4
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 25 Apr 2026 21:58:31 -0700

cc: scaffold compiler tree and test dirs

Six-file source layout under cc/, with public function signatures
stubbed via (error "TBD: <name>") so parallel agents can find their
work. data.scm is the only file with real content — it carries every
record-type definition and the canonical keyword/punctuator alists
referenced from CC-CONTRACTS.md §1.

Test fixtures seeded with one minimal golden per suite (cc-lex,
cc-pp, cc-parse, cc-cg, cc-e2e). The cc-e2e fixture is the phase-1
milestone from CC-CONTRACTS.md §6.

Build wiring (Makefile target, run-tests.sh extension) deferred —
will land alongside the first working module so the runner has
something to exercise.

Diffstat:
Acc/README.md | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Acc/cg.scm | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acc/data.scm | 218+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acc/lex.scm | 39+++++++++++++++++++++++++++++++++++++++
Acc/main.scm | 19+++++++++++++++++++
Acc/parse.scm | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acc/pp.scm | 46++++++++++++++++++++++++++++++++++++++++++++++
Acc/util.scm | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/cc-cg/00-fn-empty.scm | 13+++++++++++++
Atests/cc-e2e/00-return-argc.c | 8++++++++
Atests/cc-e2e/00-return-argc.expected-exit | 1+
Atests/cc-lex/00-empty.c | 0
Atests/cc-lex/00-empty.expected-toks | 1+
Atests/cc-lex/01-keywords.c | 1+
Atests/cc-lex/01-keywords.expected-toks | 11+++++++++++
Atests/cc-parse/00-empty-main.c | 1+
Atests/cc-parse/00-empty-main.expected-trace | 4++++
Atests/cc-pp/00-noop.c | 1+
Atests/cc-pp/00-noop.expected-toks | 6++++++
19 files changed, 661 insertions(+), 0 deletions(-)

diff --git a/cc/README.md b/cc/README.md @@ -0,0 +1,50 @@ +# cc — C compiler in scheme1 + +A single-file C compiler written in scheme1 (see [LISP.md](../docs/LISP.md)) +that emits P1pp text, replacing MesCC at the live-bootstrap `tcc-mes` stage. + +## Specs + +Read in this order: + +1. [docs/CC.md](../docs/CC.md) — accepted C subset. +2. [docs/CC-INTERNALS.md](../docs/CC-INTERNALS.md) — module interfaces. +3. [docs/CC-CONTRACTS.md](../docs/CC-CONTRACTS.md) — frozen alphabets, ABI, + test formats, mangling, phase-1 milestone. + +## Files + +| File | Purpose | +|------|---------| +| `util.scm` | Leaf helpers (bv, alist, buf, die). | +| `data.scm` | Records + symbol alphabets shared across modules. | +| `lex.scm` | Bytestream → token list. | +| `pp.scm` | Token list → expanded token list. | +| `cg.scm` | Codegen state + emission API. | +| `parse.scm` | Recursive-descent + Pratt; calls cg directly. | +| `main.scm` | argv handling + driver. | + +Build (single source for scheme1): + +``` +catm cc/cc.scm \ + cc/util.scm cc/data.scm cc/lex.scm cc/pp.scm \ + cc/cg.scm cc/parse.scm cc/main.scm +``` + +Run via the existing `boot-run-scheme1.sh` wrapper, which prepends +`scheme1/prelude.scm` ahead of the catm'd compiler source. + +## Status + +Scaffolded. Every public function body is `(error "TBD: <name>")`. +Engineers fill in their assigned module; the contracts in +CC-CONTRACTS.md keep the interfaces stable. + +## Tests + +- `tests/cc-lex/` — lexer goldens (token serialization) +- `tests/cc-pp/` — preprocessor goldens +- `tests/cc-parse/` — parser → cg-trace mock +- `tests/cc-cg/` — direct cg API tests +- `tests/cc-e2e/` — `.c` → ELF → run diff --git a/cc/cg.scm b/cc/cg.scm @@ -0,0 +1,97 @@ +;; cc/cg.scm — codegen state and emission API. +;; +;; Realization of docs/CC-INTERNALS.md §cg.scm. Conversion- +;; responsibility split per CC-CONTRACTS §4: parse owns +;; promotion/arith-conv/lval-to-rval; cg owns sign extension, +;; signed/unsigned variant selection, and pointer scaling. +;; +;; Output goes through libp1pp's structured macros (%fn, %ifelse_nez, +;; %loop_tag, %break, %continue) — see docs/LIBP1PP.md. +;; +;; Owner: <unassigned> + +;; -------------------------------------------------------------------- +;; Lifecycle +;; -------------------------------------------------------------------- +(define (cg-init) (error "TBD: cg-init")) +(define (cg-finish cg) (error "TBD: cg-finish")) + +(define (cg-fn-begin cg name params return-type) (error "TBD: cg-fn-begin")) +(define (cg-fn-end cg) (error "TBD: cg-fn-end")) + +;; -------------------------------------------------------------------- +;; Vstack — push / pop / inspect +;; -------------------------------------------------------------------- +(define (cg-push cg op) (error "TBD: cg-push")) +(define (cg-pop cg) (error "TBD: cg-pop")) +(define (cg-top cg) (error "TBD: cg-top")) +(define (cg-depth cg) (error "TBD: cg-depth")) + +;; -------------------------------------------------------------------- +;; Materialize +;; -------------------------------------------------------------------- +(define (cg-push-imm cg ctype value) (error "TBD: cg-push-imm")) +(define (cg-push-string cg bv-content) (error "TBD: cg-push-string")) +(define (cg-push-sym cg sym) (error "TBD: cg-push-sym")) +(define (cg-push-deref cg) (error "TBD: cg-push-deref")) + +;; -------------------------------------------------------------------- +;; Address & deref operators +;; -------------------------------------------------------------------- +(define (cg-take-addr cg) (error "TBD: cg-take-addr")) +(define (cg-load cg) (error "TBD: cg-load")) + +;; -------------------------------------------------------------------- +;; Type conversions +;; -------------------------------------------------------------------- +(define (cg-cast cg to-type) (error "TBD: cg-cast")) +(define (cg-promote cg) (error "TBD: cg-promote")) +(define (cg-arith-conv cg) (error "TBD: cg-arith-conv")) + +;; -------------------------------------------------------------------- +;; Operators (CC-CONTRACTS §1.11) +;; -------------------------------------------------------------------- +(define (cg-binop cg op) (error "TBD: cg-binop")) +(define (cg-unop cg op) (error "TBD: cg-unop")) +(define (cg-assign cg) (error "TBD: cg-assign")) + +;; -------------------------------------------------------------------- +;; Calls — pops fn and args from vstack; pushes result if has-result? +;; -------------------------------------------------------------------- +(define (cg-call cg arity has-result?) (error "TBD: cg-call")) + +;; -------------------------------------------------------------------- +;; Return — pops rval, stores into return slot, branches to ::ret +;; -------------------------------------------------------------------- +(define (cg-return cg) (error "TBD: cg-return")) + +;; -------------------------------------------------------------------- +;; Structured control flow — thunks let parse recurse into the body. +;; -------------------------------------------------------------------- +(define (cg-if cg then-thunk) (error "TBD: cg-if")) +(define (cg-ifelse cg then-thunk else-thunk) (error "TBD: cg-ifelse")) + +(define (cg-loop cg head-thunk body-thunk) (error "TBD: cg-loop")) +(define (cg-loop-end cg tag) (error "TBD: cg-loop-end")) +(define (cg-break cg tag) (error "TBD: cg-break")) +(define (cg-continue cg tag) (error "TBD: cg-continue")) + +;; -------------------------------------------------------------------- +;; switch +;; -------------------------------------------------------------------- +(define (cg-switch-begin cg) (error "TBD: cg-switch-begin")) +(define (cg-switch-case cg swctx const-int) (error "TBD: cg-switch-case")) +(define (cg-switch-default cg swctx) (error "TBD: cg-switch-default")) +(define (cg-switch-end cg swctx) (error "TBD: cg-switch-end")) + +;; -------------------------------------------------------------------- +;; Globals and data +;; -------------------------------------------------------------------- +(define (cg-emit-global cg sym init-bv-or-#f) (error "TBD: cg-emit-global")) +(define (cg-emit-extern cg sym) (error "TBD: cg-emit-extern")) +(define (cg-intern-string cg bv-content) (error "TBD: cg-intern-string")) + +;; -------------------------------------------------------------------- +;; Frame — used internally by cg and by parse for locals +;; -------------------------------------------------------------------- +(define (cg-alloc-slot cg bytes align) (error "TBD: cg-alloc-slot")) diff --git a/cc/data.scm b/cc/data.scm @@ -0,0 +1,218 @@ +;; cc/data.scm — record types and symbol alphabets shared across modules. +;; +;; Concrete realization of: +;; docs/CC-INTERNALS.md §data.scm +;; docs/CC-CONTRACTS.md §1 +;; +;; Adding a record or alphabet symbol requires updating the contract +;; doc first. + +;; -------------------------------------------------------------------- +;; loc — source location for diagnostics +;; -------------------------------------------------------------------- +(define-record-type loc + (%loc file line col) + loc? + (file loc-file) ; bv + (line loc-line) ; fixnum + (col loc-col)) ; fixnum + +;; -------------------------------------------------------------------- +;; tok — lexer token. See CC-CONTRACTS §1.1 for kind set, §1.2 for +;; PUNCT value symbols, §1.3 for KW value symbols. +;; -------------------------------------------------------------------- +(define-record-type tok + (%tok kind value loc hide) + tok? + (kind tok-kind) ; symbol from §1.1 + (value tok-value) ; bv | fixnum | symbol | #f + (loc tok-loc) ; loc + (hide tok-hide)) ; list of bv (macro names already expanded) + +(define (make-tok kind value loc) + (%tok kind value loc '())) + +;; -------------------------------------------------------------------- +;; macro — preprocessor macro definition +;; -------------------------------------------------------------------- +(define-record-type macro + (%macro kind params body) + macro? + (kind macro-kind) ; 'obj | 'fn | 'fn-vararg + (params macro-params) ; list of bv + (body macro-body)) ; list of tok + +;; -------------------------------------------------------------------- +;; ctype — C type. See CC-CONTRACTS §1.4 for kind set, and +;; CC-INTERNALS §data.scm for the ext payload table. +;; +;; Fields that mutate over a ctype's lifetime: +;; size and align — set to -1/-1 on forward struct/union decl, +;; fixed when the type is completed. +;; ext — same; struct/union ext changes shape when +;; the body is parsed. +;; -------------------------------------------------------------------- +(define-record-type ctype + (%ctype kind size align ext) + ctype? + (kind ctype-kind) + (size ctype-size ctype-size-set!) + (align ctype-align ctype-align-set!) + (ext ctype-ext ctype-ext-set!)) + +;; Interned primitive ctypes (CC-CONTRACTS §1.4). Equality is eq?. +(define %t-void (%ctype 'void -1 -1 #f)) +(define %t-i8 (%ctype 'i8 1 1 #f)) +(define %t-u8 (%ctype 'u8 1 1 #f)) +(define %t-i16 (%ctype 'i16 2 2 #f)) +(define %t-u16 (%ctype 'u16 2 2 #f)) +(define %t-i32 (%ctype 'i32 4 4 #f)) +(define %t-u32 (%ctype 'u32 4 4 #f)) +(define %t-i64 (%ctype 'i64 8 8 #f)) +(define %t-u64 (%ctype 'u64 8 8 #f)) +(define %t-bool (%ctype 'bool 1 1 #f)) + +;; -------------------------------------------------------------------- +;; sym — declared identifier (function, variable, typedef, …) +;; See CC-CONTRACTS §1.7 (kind), §1.8 (storage). +;; -------------------------------------------------------------------- +(define-record-type sym + (%sym name kind storage type slot) + sym? + (name sym-name) ; bv + (kind sym-kind) ; symbol from §1.7 + (storage sym-storage) ; symbol from §1.8 or #f + (type sym-type) ; ctype + (slot sym-slot)) ; fixnum | bv | #f, per kind + +;; -------------------------------------------------------------------- +;; opnd — operand on cg's vstack. See CC-CONTRACTS §1.5 (kind), +;; §1.10 (reg names). +;; -------------------------------------------------------------------- +(define-record-type opnd + (%opnd kind type ext lval?) + opnd? + (kind opnd-kind) + (type opnd-type) + (ext opnd-ext) + (lval? opnd-lval?)) + +;; -------------------------------------------------------------------- +;; loop-ctx — entry on parser's loop/switch context stack. +;; See CC-CONTRACTS §1.9. +;; -------------------------------------------------------------------- +(define-record-type loop-ctx + (%loop-ctx kind tag has-continue?) + loop-ctx? + (kind loop-ctx-kind) + (tag loop-ctx-tag) + (has-continue? loop-ctx-has-continue?)) + +;; -------------------------------------------------------------------- +;; fn-ctx — current-function context inside the parser. +;; -------------------------------------------------------------------- +(define-record-type fn-ctx + (%fn-ctx name return-type params variadic? labels) + fn-ctx? + (name fn-ctx-name) + (return-type fn-ctx-return-type) + (params fn-ctx-params) + (variadic? fn-ctx-variadic?) + (labels fn-ctx-labels fn-ctx-labels-set!)) + +;; -------------------------------------------------------------------- +;; pstate — parser state. Owned by parse.scm; read-only to cg. +;; -------------------------------------------------------------------- +(define-record-type pstate + (%pstate toks scope tags loops fn-ctx typedefs cg) + pstate? + (toks ps-toks ps-toks-set!) + (scope ps-scope ps-scope-set!) + (tags ps-tags ps-tags-set!) + (loops ps-loops ps-loops-set!) + (fn-ctx ps-fn-ctx ps-fn-ctx-set!) + (typedefs ps-typedefs ps-typedefs-set!) + (cg ps-cg)) + +;; -------------------------------------------------------------------- +;; cg — codegen state. Owned by cg.scm. +;; -------------------------------------------------------------------- +(define-record-type cg + (%cg text data bss vstack frame-hi label-ctr str-pool globals fn-buf prologue-buf max-outgoing) + cg? + (text cg-text) + (data cg-data) + (bss cg-bss) + (vstack cg-vstack cg-vstack-set!) + (frame-hi cg-frame-hi cg-frame-hi-set!) + (label-ctr cg-label-ctr cg-label-ctr-set!) + (str-pool cg-str-pool cg-str-pool-set!) + (globals cg-globals cg-globals-set!) + (fn-buf cg-fn-buf cg-fn-buf-set!) + (prologue-buf cg-prologue-buf cg-prologue-buf-set!) + (max-outgoing cg-max-outgoing cg-max-outgoing-set!)) + +;; -------------------------------------------------------------------- +;; Symbol alphabets — canonical alists. See CC-CONTRACTS §1. +;; -------------------------------------------------------------------- + +;; CC-CONTRACTS §1.3 — keyword bytevector → keyword symbol. +(define %keyword-alist + '(;; storage + ("auto" . auto) ("register" . register) ("static" . static) + ("extern" . extern) ("typedef" . typedef) + ;; qualifiers (parsed and discarded by parse) + ("const" . const) ("volatile" . volatile) ("restrict" . restrict) + ("inline" . inline) + ;; type specifiers + ("void" . void) ("char" . char) ("short" . short) + ("int" . int) ("long" . long) + ("signed" . signed) ("unsigned" . unsigned) ("_Bool" . _Bool) + ;; rejected type specifiers (KW so diagnostics are crisp) + ("float" . float) ("double" . double) + ;; aggregates + ("struct" . struct) ("union" . union) ("enum" . enum) + ;; statements + ("if" . if) ("else" . else) + ("while" . while) ("do" . do) ("for" . for) + ("switch" . switch) ("case" . case) ("default" . default) + ("break" . break) ("continue" . continue) + ("return" . return) ("goto" . goto) + ;; operators + ("sizeof" . sizeof) + ;; reserved-and-rejected (KW so diagnostics are crisp) + ("_Generic" . _Generic) ("_Atomic" . _Atomic) + ("_Thread_local" . _Thread_local) + ("_Alignof" . _Alignof) ("_Alignas" . _Alignas) + ("_Static_assert" . _Static_assert) + ("_Complex" . _Complex) ("_Imaginary" . _Imaginary))) + +;; CC-CONTRACTS §1.2 — punctuator bytevector → punct symbol. +;; Listed longest-match-first; the lexer scans this list in order. +;; Digraphs (<: :> <% %> %: %:%:) lex to their standard equivalents. +(define %punct-alist + '(;; 4-byte + ("%:%:" . paste) + ;; 3-byte + ("..." . ellipsis) ("<<=" . shl-eq) (">>=" . shr-eq) + ;; 2-byte + ("##" . paste) ("->" . arrow) + ("++" . inc) ("--" . dec) + ("<<" . shl) (">>" . shr) + ("<=" . le) (">=" . ge) ("==" . eq2) ("!=" . ne) + ("&&" . land) ("||" . lor) + ("+=" . plus-eq) ("-=" . minus-eq) ("*=" . star-eq) + ("/=" . slash-eq) ("%=" . pct-eq) + ("&=" . amp-eq) ("^=" . caret-eq) ("|=" . bar-eq) + ;; digraphs (mapped to the standard equivalent symbol) + ("<:" . lbrack) (":>" . rbrack) + ("<%" . lbrace) ("%>" . rbrace) ("%:" . hash) + ;; 1-byte + ("[" . lbrack) ("]" . rbrack) + ("(" . lparen) (")" . rparen) + ("{" . lbrace) ("}" . rbrace) + ("." . dot) ("," . comma) (";" . semi) (":" . colon) ("?" . qmark) + ("+" . plus) ("-" . minus) ("*" . star) ("/" . slash) ("%" . pct) + ("&" . amp) ("|" . bar) ("^" . caret) ("~" . tilde) ("!" . bang) + ("<" . lt) (">" . gt) ("=" . assign) + ("#" . hash))) diff --git a/cc/lex.scm b/cc/lex.scm @@ -0,0 +1,39 @@ +;; cc/lex.scm — bytestream → token list. Pure function; no I/O, +;; no macro awareness. +;; +;; Realization of docs/CC-INTERNALS.md §lex.scm. Symbol alphabets +;; (KW, PUNCT, tok-kind) live in cc/data.scm; do not duplicate. +;; +;; Owner: <unassigned> + +;; -------------------------------------------------------------------- +;; lex-tokenize +;; src : bv (the C bytestream; may be very large) +;; file : bv (filename, recorded into tok-loc) +;; -> : list of tok ending in a single (tok kind=EOF); never #f +;; aborts via util's `die` on unrecognized byte sequences +;; -------------------------------------------------------------------- +(define (lex-tokenize src file) + (error "TBD: lex-tokenize")) + +;; -------------------------------------------------------------------- +;; Internal helpers exposed for unit tests in tests/cc-lex/. +;; Each consumes one logical token starting at byte offset `pos` and +;; returns a (tok . pos') pair; pos' is the byte just past the token. +;; -------------------------------------------------------------------- +(define (lex-read-number src pos file) + (error "TBD: lex-read-number")) + +(define (lex-read-string src pos file) + (error "TBD: lex-read-string")) + +(define (lex-read-char src pos file) + (error "TBD: lex-read-char")) + +(define (lex-read-ident src pos file) + ;; Produces IDENT or KW after consulting %keyword-alist (cc/data.scm). + (error "TBD: lex-read-ident")) + +(define (lex-read-punct src pos file) + ;; Greedy longest-match against %punct-alist (cc/data.scm). + (error "TBD: lex-read-punct")) diff --git a/cc/main.scm b/cc/main.scm @@ -0,0 +1,19 @@ +;; cc/main.scm — driver. argv handling, file I/O, ties phases together. +;; +;; Realization of docs/CC-INTERNALS.md §main.scm. +;; +;; Owner: <unassigned> + +;; -------------------------------------------------------------------- +;; cc-main : argv (list of bv) -> exit-status (fixnum) +;; argv[0] is the program name (per scheme1's `argv` primitive). +;; Recognized CLI: <input.flat.c> -o <output.P1pp> [-D NAME[=VAL]] ... +;; -------------------------------------------------------------------- +(define (cc-main argv) + (error "TBD: cc-main")) + +;; -------------------------------------------------------------------- +;; Top-level invocation. main.scm is the last file in the catm order, +;; so execution begins here once the runtime has finished loading. +;; -------------------------------------------------------------------- +(sys-exit (cc-main (argv))) diff --git a/cc/parse.scm b/cc/parse.scm @@ -0,0 +1,74 @@ +;; cc/parse.scm — recursive-descent + Pratt expression parser. +;; xcc-style direct emit: no AST. Calls cg as it parses. +;; +;; Realization of docs/CC-INTERNALS.md §parse.scm. Conversion- +;; responsibility split per CC-CONTRACTS §4. +;; +;; Owner: <unassigned> + +;; -------------------------------------------------------------------- +;; parse-translation-unit — single entry point +;; ps : pstate with ps-toks set (output of pp-expand) and ps-cg +;; initialized via (cg-init). All other ps-fields start empty. +;; Mutates ps until ps-toks reaches a single (tok kind=EOF). +;; -------------------------------------------------------------------- +(define (parse-translation-unit ps) + (error "TBD: parse-translation-unit")) + +;; -------------------------------------------------------------------- +;; Top-level alternates +;; -------------------------------------------------------------------- +(define (parse-decl-or-fn ps) (error "TBD: parse-decl-or-fn")) +(define (parse-decl-spec ps) (error "TBD: parse-decl-spec")) +(define (parse-declarator ps base-type) (error "TBD: parse-declarator")) +(define (parse-init-list ps target-type) (error "TBD: parse-init-list")) +(define (parse-fn-body ps name decl-type) (error "TBD: parse-fn-body")) + +;; -------------------------------------------------------------------- +;; Statements +;; -------------------------------------------------------------------- +(define (parse-stmt ps) (error "TBD: parse-stmt")) +(define (parse-compound-stmt ps) (error "TBD: parse-compound-stmt")) +(define (parse-if-stmt ps) (error "TBD: parse-if-stmt")) +(define (parse-while-stmt ps) (error "TBD: parse-while-stmt")) +(define (parse-do-stmt ps) (error "TBD: parse-do-stmt")) +(define (parse-for-stmt ps) (error "TBD: parse-for-stmt")) +(define (parse-switch-stmt ps) (error "TBD: parse-switch-stmt")) +(define (parse-return-stmt ps) (error "TBD: parse-return-stmt")) +(define (parse-goto-stmt ps) (error "TBD: parse-goto-stmt")) +(define (parse-expr-stmt ps) (error "TBD: parse-expr-stmt")) + +;; -------------------------------------------------------------------- +;; Expressions — Pratt climber +;; -------------------------------------------------------------------- +(define (parse-expr ps) (parse-expr-bp ps 0)) +(define (parse-expr-bp ps min-bp) (error "TBD: parse-expr-bp")) +(define (parse-primary ps) (error "TBD: parse-primary")) +(define (parse-postfix ps) (error "TBD: parse-postfix")) +(define (parse-unary ps) (error "TBD: parse-unary")) +(define (parse-cast-or-unary ps) (error "TBD: parse-cast-or-unary")) + +;; -------------------------------------------------------------------- +;; Token stream — peek / advance / expect (private convention) +;; -------------------------------------------------------------------- +(define (peek ps) (error "TBD: peek")) +(define (peek2 ps) (error "TBD: peek2")) +(define (advance ps) (error "TBD: advance")) +(define (expect-kw ps sym) (error "TBD: expect-kw")) +(define (expect-punct ps sym) (error "TBD: expect-punct")) +(define (at-kw? ps sym) (error "TBD: at-kw?")) +(define (at-punct? ps sym) (error "TBD: at-punct?")) + +;; -------------------------------------------------------------------- +;; Scope helpers +;; -------------------------------------------------------------------- +(define (scope-enter! ps) (error "TBD: scope-enter!")) +(define (scope-leave! ps) (error "TBD: scope-leave!")) +(define (scope-bind! ps name sym) (error "TBD: scope-bind!")) +(define (scope-lookup ps name) (error "TBD: scope-lookup")) + +(define (tag-bind! ps name ctype) (error "TBD: tag-bind!")) +(define (tag-lookup ps name) (error "TBD: tag-lookup")) + +(define (typedef-add! ps name) (error "TBD: typedef-add!")) +(define (typedef? ps name) (error "TBD: typedef?")) diff --git a/cc/pp.scm b/cc/pp.scm @@ -0,0 +1,46 @@ +;; cc/pp.scm — token list → expanded token list. +;; +;; Realization of docs/CC-INTERNALS.md §pp.scm. Hide-set discipline +;; per C11 6.10.3.4. #include is rejected (see CC.md §Toolchain +;; envelope). +;; +;; Owner: <unassigned> + +;; -------------------------------------------------------------------- +;; pp-expand +;; toks : list of tok (output of lex-tokenize) +;; initial-defines : alist (bv . macro) — typically from -D flags +;; -> : list of tok with HASH and NL stripped +;; KW / IDENT / INT / STR / CHAR / PUNCT / EOF only +;; aborts via die on directive errors +;; -------------------------------------------------------------------- +(define (pp-expand toks initial-defines) + (error "TBD: pp-expand")) + +;; -------------------------------------------------------------------- +;; Constant-expression evaluator for #if / #elif. Tokens are the +;; expression body *after* macro expansion; identifiers that aren't +;; macros are zero per the C standard. +;; -------------------------------------------------------------------- +(define (pp-eval-cexpr toks macros) + (error "TBD: pp-eval-cexpr")) + +;; -------------------------------------------------------------------- +;; Directive handlers — internal. Each takes the token list of one +;; logical directive line (between HASH and the next NL) plus a +;; private pp-state record (defined inside this module, not exposed). +;; -------------------------------------------------------------------- +(define (%pp-do-define line state) (error "TBD: %pp-do-define")) +(define (%pp-do-undef line state) (error "TBD: %pp-do-undef")) +(define (%pp-do-if line state) (error "TBD: %pp-do-if")) +(define (%pp-do-ifdef line state) (error "TBD: %pp-do-ifdef")) +(define (%pp-do-ifndef line state) (error "TBD: %pp-do-ifndef")) +(define (%pp-do-elif line state) (error "TBD: %pp-do-elif")) +(define (%pp-do-else line state) (error "TBD: %pp-do-else")) +(define (%pp-do-endif line state) (error "TBD: %pp-do-endif")) +(define (%pp-do-error line state) (error "TBD: %pp-do-error")) +(define (%pp-do-line line state) (error "TBD: %pp-do-line")) +(define (%pp-do-pragma line state) (error "TBD: %pp-do-pragma")) +(define (%pp-do-include line state) + ;; Always dies — pre-flatten step is upstream of cc. + (error "TBD: %pp-do-include (must die per CC.md)")) diff --git a/cc/util.scm b/cc/util.scm @@ -0,0 +1,71 @@ +;; cc/util.scm — leaf helpers. Depends only on the scheme1 prelude. +;; +;; Realization of docs/CC-INTERNALS.md §util.scm. Engineers may add +;; helpers here freely; the listed signatures are the load-bearing +;; surface other modules call. + +;; -------------------------------------------------------------------- +;; bytevector helpers (scheme1 strings ARE bytevectors) +;; -------------------------------------------------------------------- +(define (bv= a b) (bytevector=? a b)) + +(define (bv-prefix? p s) (error "TBD: bv-prefix?")) +(define (bv-find bv byte from) (error "TBD: bv-find")) +(define (bv-slice bv start end) (error "TBD: bv-slice")) +(define (bv-of-byte b) (make-bytevector 1 b)) +(define (bv-cat lst-of-bv) (error "TBD: bv-cat")) +(define (bv->fixnum bv radix) (error "TBD: bv->fixnum")) +(define (fixnum->bv n radix) (number->string n radix)) + +;; -------------------------------------------------------------------- +;; lists / alists +;; -------------------------------------------------------------------- +(define (alist-ref key al) (error "TBD: alist-ref")) +(define (alist-ref/eq key al) (error "TBD: alist-ref/eq")) +(define (alist-set key val al) (cons (cons key val) al)) +(define (alist-update key f al) (error "TBD: alist-update")) + +(define (any p xs) (error "TBD: any")) +(define (every p xs) (error "TBD: every")) +(define (count p xs) (error "TBD: count")) + +;; -------------------------------------------------------------------- +;; ints +;; -------------------------------------------------------------------- +(define (min3 a b c) (min a (min b c))) +(define (align-up n k) + ;; round n up to the nearest multiple of k (k must be a power of 2) + (let ((mask (- k 1))) + (bit-and (+ n mask) (bit-not mask)))) + +;; -------------------------------------------------------------------- +;; output buffer (reversed list of bv chunks; flush concats once) +;; -------------------------------------------------------------------- +(define-record-type buf + (%buf chunks) + buf? + (chunks buf-chunks buf-chunks-set!)) + +(define (make-buf) (%buf '())) + +(define (buf-push! b bv) + (buf-chunks-set! b (cons bv (buf-chunks b)))) + +(define (buf-flush b) (error "TBD: buf-flush")) + +;; -------------------------------------------------------------------- +;; diagnostics + I/O +;; -------------------------------------------------------------------- +(define (die loc msg . irritants) + ;; Format per CC-INTERNALS §Errors: + ;; <file>:<line>:<col>: error: <msg>: <irritant> <irritant> ... + ;; Writes to fd 2 then sys-exits 1. + (error "TBD: die")) + +(define (slurp-fd fd) (error "TBD: slurp-fd")) +(define (write-bv-fd fd bv) (error "TBD: write-bv-fd")) + +;; -------------------------------------------------------------------- +;; fresh-name generator (used for cg label counters, etc.) +;; -------------------------------------------------------------------- +(define (make-namer prefix) (error "TBD: make-namer")) diff --git a/tests/cc-cg/00-fn-empty.scm b/tests/cc-cg/00-fn-empty.scm @@ -0,0 +1,13 @@ +;; tests/cc-cg/00-fn-empty.scm — minimal direct-cg test. +;; Builds a function that just returns 0, snapshots the P1pp output. +;; +;; Run shape (TBD pending test runner): +;; scheme1 <prelude.scm + cc/*.scm + this file> +;; diff stdout 00-fn-empty.expected-p1pp + +(let ((cg (cg-init))) + (cg-fn-begin cg "main" '() %t-i32) + (cg-push-imm cg %t-i32 0) + (cg-return cg) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg))) diff --git a/tests/cc-e2e/00-return-argc.c b/tests/cc-e2e/00-return-argc.c @@ -0,0 +1,8 @@ +/* Phase-1 milestone — see docs/CC-CONTRACTS.md §6. + * Returns argc, so: + * ./00-return-argc -> exit 1 (argv[0] only) + * ./00-return-argc a b -> exit 3 + */ +int main(int argc, char **argv) { + return argc; +} diff --git a/tests/cc-e2e/00-return-argc.expected-exit b/tests/cc-e2e/00-return-argc.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/cc-lex/00-empty.c b/tests/cc-lex/00-empty.c diff --git a/tests/cc-lex/00-empty.expected-toks b/tests/cc-lex/00-empty.expected-toks @@ -0,0 +1 @@ +(EOF #f "00-empty.c" 1 1) diff --git a/tests/cc-lex/01-keywords.c b/tests/cc-lex/01-keywords.c @@ -0,0 +1 @@ +int main(void) { return 0; } diff --git a/tests/cc-lex/01-keywords.expected-toks b/tests/cc-lex/01-keywords.expected-toks @@ -0,0 +1,11 @@ +(KW int "01-keywords.c" 1 1) +(IDENT "main" "01-keywords.c" 1 5) +(PUNCT lparen "01-keywords.c" 1 9) +(KW void "01-keywords.c" 1 10) +(PUNCT rparen "01-keywords.c" 1 14) +(PUNCT lbrace "01-keywords.c" 1 16) +(KW return "01-keywords.c" 1 18) +(INT 0 "01-keywords.c" 1 25) +(PUNCT semi "01-keywords.c" 1 26) +(PUNCT rbrace "01-keywords.c" 1 28) +(EOF #f "01-keywords.c" 2 1) diff --git a/tests/cc-parse/00-empty-main.c b/tests/cc-parse/00-empty-main.c @@ -0,0 +1 @@ +int main(void) { return 0; } diff --git a/tests/cc-parse/00-empty-main.expected-trace b/tests/cc-parse/00-empty-main.expected-trace @@ -0,0 +1,4 @@ +(fn-begin "main" () i32) +(push-imm i32 0) +(return) +(fn-end) diff --git a/tests/cc-pp/00-noop.c b/tests/cc-pp/00-noop.c @@ -0,0 +1 @@ +int x = 0; diff --git a/tests/cc-pp/00-noop.expected-toks b/tests/cc-pp/00-noop.expected-toks @@ -0,0 +1,6 @@ +(KW int "00-noop.c" 1 1) +(IDENT "x" "00-noop.c" 1 5) +(PUNCT assign "00-noop.c" 1 7) +(INT 0 "00-noop.c" 1 9) +(PUNCT semi "00-noop.c" 1 10) +(EOF #f "00-noop.c" 2 1)