commit 19a45576c25b82c868654d936291e1365c4f1ef4
parent 6aa3cb14986ed02faabb6ecb9cd19d697a95566f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 9 May 2026 18:37:07 -0700
parse/cg/decl: scaffold C front-end vertical, pass spine corpus
Recursive-descent parser, value-stack codegen, and DeclTable land as
real implementations. Six spine cases (6_5_01..03 + 6_8_01..03) pass
across all four test paths (D/R/E/J); cases_err/6_5_undeclared still
fails with a precise diagnostic.
Module shape per DESIGN.md §5: parser drives DeclTable for C semantics
and CG for executable code, single-pass with one token of lookahead.
Operators are named per C11 §6.5 productions and statements per §6.8 so
each new feature is a localized addition at its named function. cg_store
follows C semantics (leaves assigned value on the stack); a new
cg_inc_dec helper keeps inc/dec inside CG instead of juggling 3-element
rotates from the parser.
Diffstat:
6 files changed, 2346 insertions(+), 28 deletions(-)
diff --git a/src/api/stubs.c b/src/api/stubs.c
@@ -37,41 +37,18 @@ static _Noreturn void unimplemented(Compiler* c, const char* what) {
/* ============================================================
* Parser
- * ============================================================ */
+ * ============================================================
+ * parse_c lives in src/parse/parse.c. The asm parser is still a stub
+ * pending its own corpus rows; reaching it from a CFREE_LANG_ASM input
+ * raises a clean diagnostic. */
-void parse_c(Compiler* c, Pp* p, DeclTable* d, CG* g, Debug* dbg) {
- (void)p;
- (void)d;
- (void)g;
- (void)dbg;
- unimplemented(c, "parse_c");
-}
void parse_asm(Compiler* c, Lexer* l, MCEmitter* m) {
(void)l;
(void)m;
unimplemented(c, "parse_asm");
}
-/* ============================================================
- * Declarations
- * ============================================================ */
-
-DeclTable* decl_new(Compiler* c, ObjBuilder* o) {
- (void)o;
- unimplemented(c, "decl");
-}
-void decl_free(DeclTable* d) { (void)d; }
-
-/* ============================================================
- * Codegen + arch target
- * ============================================================ */
-
-CG* cg_new(Compiler* c, CGTarget* t, Debug* d) {
- (void)t;
- (void)d;
- unimplemented(c, "cg");
-}
-void cg_free(CG* g) { (void)g; }
+/* DeclTable lives in src/decl/decl.c. CG lives in src/cg/cg.c. */
/* mc_new / mc_free live in src/arch/mc.c.
* cgtarget_new / cgtarget_finalize / cgtarget_free live in src/arch/<target>.c
diff --git a/src/cg/cg.c b/src/cg/cg.c
@@ -0,0 +1,948 @@
+/* Single-pass code generator with a TCC-style value stack.
+ *
+ * The parser pushes values (lvalues, immediates, register rvalues) and
+ * issues operations; cg materializes operands and dispatches to CGTarget.
+ * No AST. At -O0 the wrapped target backend is a real CGTarget; at -O1+
+ * opt_cgtarget records the same calls into IR for cross-function passes.
+ *
+ * Value stack semantics:
+ * - SValue.op carries an Operand whose `kind` decides what the value is.
+ * - OPK_IMM / OPK_REG are rvalues (can be consumed by binop/cmp/store).
+ * - OPK_LOCAL / OPK_GLOBAL / OPK_INDIRECT are lvalues. cg_load promotes
+ * them to OPK_REG via target->load + a fresh scratch register.
+ *
+ * This is the spine slice — enough for §6.5/§6.8 fixtures: scalar i32
+ * locals, integer arithmetic, comparisons, control flow, and return.
+ * Aggregates, atomics, calls, and the asm/setjmp/intrinsic methods are
+ * placeholders pending their corpus rows. The interface in cg.h is the
+ * commitment; this file fills in the slice that's exercised today. */
+
+#include "cg/cg.h"
+
+#include <string.h>
+
+#include "abi/abi.h"
+#include "arch/arch.h"
+#include "core/arena.h"
+#include "core/core.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "debug/debug.h"
+#include "obj/obj.h"
+#include "type/type.h"
+
+/* ============================================================
+ * Value stack
+ * ============================================================ */
+
+typedef struct SValue {
+ Operand op; /* IMM/REG (rvalue) or LOCAL/GLOBAL/INDIRECT (lvalue) */
+ const Type* type; /* C semantic type of the value (post-promotion) */
+} SValue;
+
+#define CG_STACK_INITIAL 16u
+
+struct CG {
+ Compiler* c;
+ CGTarget* target;
+ Debug* debug;
+ TargetABI* abi;
+ Pool* pool;
+
+ /* Function scope */
+ const CGFuncDesc* fn_desc;
+ ObjSymId fn_sym;
+ ObjSecId fn_text_sec;
+ u32 fn_begin_pos;
+ const Type* fn_ret_type;
+ const ABIFuncInfo* fn_abi;
+
+ SrcLoc cur_loc;
+
+ /* Value stack — grown via heap; arena would also work but heap is fine
+ * since it's freed in cg_free. */
+ SValue* stack;
+ u32 sp;
+ u32 cap;
+};
+
+static void stack_grow(CG* g, u32 want) {
+ Heap* h = g->c->env->heap;
+ u32 cap = g->cap;
+ SValue* nb;
+ if (cap >= want) return;
+ while (cap < want) cap = cap ? cap * 2u : CG_STACK_INITIAL;
+ nb = (SValue*)h->alloc(h, sizeof(SValue) * cap, _Alignof(SValue));
+ if (g->stack) {
+ memcpy(nb, g->stack, sizeof(SValue) * g->sp);
+ h->free(h, g->stack, sizeof(SValue) * g->cap);
+ }
+ g->stack = nb;
+ g->cap = cap;
+}
+
+static void push(CG* g, SValue v) {
+ stack_grow(g, g->sp + 1);
+ g->stack[g->sp++] = v;
+}
+
+static SValue pop(CG* g) {
+ if (g->sp == 0) {
+ compiler_panic(g->c, g->cur_loc, "cg: stack underflow");
+ }
+ return g->stack[--g->sp];
+}
+
+static SValue* top(CG* g) {
+ if (g->sp == 0) {
+ compiler_panic(g->c, g->cur_loc, "cg: stack empty");
+ }
+ return &g->stack[g->sp - 1];
+}
+
+/* ============================================================
+ * Operand sugar
+ * ============================================================ */
+
+static u8 type_class(const Type* ty) {
+ if (ty && (ty->kind == TY_FLOAT || ty->kind == TY_DOUBLE ||
+ ty->kind == TY_LDOUBLE)) {
+ return RC_FP;
+ }
+ return RC_INT;
+}
+
+static Operand op_imm(i64 v, const Type* ty) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_IMM;
+ o.cls = type_class(ty);
+ o.type = ty;
+ o.v.imm = v;
+ return o;
+}
+
+static Operand op_reg(Reg r, const Type* ty) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_REG;
+ o.cls = type_class(ty);
+ o.type = ty;
+ o.v.reg = r;
+ return o;
+}
+
+static Operand op_local(FrameSlot s, const Type* ty) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_LOCAL;
+ o.cls = RC_INT;
+ o.type = ty;
+ o.v.frame_slot = s;
+ return o;
+}
+
+static Operand op_global(ObjSymId sym, i64 addend, const Type* ty) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_GLOBAL;
+ o.cls = RC_INT;
+ o.type = ty;
+ o.v.global.sym = sym;
+ o.v.global.addend = addend;
+ return o;
+}
+
+/* ============================================================
+ * MemAccess derivation
+ * ============================================================ */
+
+static MemAccess derive_mem(CG* g, const Type* ty, AliasKind alias_kind,
+ i32 alias_local) {
+ MemAccess m;
+ memset(&m, 0, sizeof m);
+ m.type = ty;
+ m.size = abi_sizeof(g->abi, ty);
+ m.align = abi_alignof(g->abi, ty);
+ m.flags = MF_NONE;
+ if (ty && (ty->qual & Q_VOLATILE)) m.flags |= MF_VOLATILE;
+ if (ty && (ty->qual & Q_ATOMIC)) m.flags |= MF_ATOMIC;
+ m.alias.kind = (u8)alias_kind;
+ if (alias_kind == ALIAS_LOCAL) {
+ m.alias.v.local_id = alias_local;
+ }
+ return m;
+}
+
+/* Pick an alias root from an lvalue Operand. */
+static AliasKind alias_for_lvalue(const Operand* o) {
+ switch (o->kind) {
+ case OPK_LOCAL:
+ return ALIAS_LOCAL;
+ case OPK_GLOBAL:
+ return ALIAS_GLOBAL;
+ case OPK_INDIRECT:
+ default:
+ return ALIAS_UNKNOWN;
+ }
+}
+
+/* ============================================================
+ * Construction
+ * ============================================================ */
+
+CG* cg_new(Compiler* c, CGTarget* t, Debug* d) {
+ Heap* h = c->env->heap;
+ CG* g = (CG*)h->alloc(h, sizeof(CG), _Alignof(CG));
+ memset(g, 0, sizeof *g);
+ g->c = c;
+ g->target = t;
+ g->debug = d;
+ g->abi = c->abi;
+ g->pool = c->global;
+ /* Wire Debug into the backend so per-instruction emit calls can attribute
+ * line rows. cg owns this hookup per DESIGN §11. */
+ if (t) t->debug = d;
+ if (t && t->mc) t->mc->debug = d;
+ return g;
+}
+
+void cg_free(CG* g) {
+ Heap* h;
+ if (!g) return;
+ h = g->c->env->heap;
+ if (g->stack) h->free(h, g->stack, sizeof(SValue) * g->cap);
+ h->free(h, g, sizeof *g);
+}
+
+/* ============================================================
+ * Function lifecycle
+ * ============================================================ */
+
+void cg_func_begin(CG* g, const CGFuncDesc* fd) {
+ CGTarget* T = g->target;
+ g->fn_desc = fd;
+ g->fn_sym = fd->sym;
+ g->fn_text_sec = fd->text_section_id;
+ g->fn_ret_type = fd->fn_type ? fd->fn_type->fn.ret : NULL;
+ g->fn_abi = fd->abi;
+ g->sp = 0;
+
+ /* Class-1 DWARF: a new subprogram opens. doc/DWARF.md §3.1 makes this
+ * the parser's job; we forward through cg as a convenience hook. */
+ if (g->debug) {
+ debug_func_begin(g->debug, fd->sym, DEBUG_TYPE_NONE, fd->loc);
+ }
+
+ g->fn_begin_pos = T->mc ? T->mc->pos(T->mc) : 0u;
+ T->func_begin(T, fd);
+}
+
+void cg_func_end(CG* g) {
+ CGTarget* T = g->target;
+ T->func_end(T);
+ if (g->debug && T->mc) {
+ u32 end_pos = T->mc->pos(T->mc);
+ debug_func_pc_range(g->debug, g->fn_text_sec, g->fn_begin_pos, end_pos);
+ debug_func_end(g->debug);
+ }
+ g->fn_desc = NULL;
+}
+
+/* ============================================================
+ * Locals / parameters
+ * ============================================================ */
+
+FrameSlot cg_local(CG* g, const FrameSlotDesc* d) {
+ return g->target->frame_slot(g->target, d);
+}
+
+void cg_param(CG* g, const CGParamDesc* d) { g->target->param(g->target, d); }
+
+void cg_bind_decl(CG* g, DeclId id) {
+ /* Decl binding is parser territory at this slice; nothing for cg to do. */
+ (void)g;
+ (void)id;
+}
+
+/* ============================================================
+ * Pushes
+ * ============================================================ */
+
+void cg_push_int(CG* g, i64 v, const Type* ty) {
+ SValue sv;
+ sv.op = op_imm(v, ty);
+ sv.type = ty;
+ push(g, sv);
+}
+
+void cg_push_const(CG* g, ConstBytes cb) {
+ /* Materialize into a fresh register through target->load_const so the
+ * stack value is plain rvalue REG. The constant pool / immediate-encoding
+ * choice is the backend's. */
+ CGTarget* T = g->target;
+ Reg r = T->alloc_reg(T, type_class(cb.type), cb.type);
+ Operand dst = op_reg(r, cb.type);
+ T->load_const(T, dst, cb);
+ {
+ SValue sv;
+ sv.op = dst;
+ sv.type = cb.type;
+ push(g, sv);
+ }
+}
+
+void cg_push_float(CG* g, double v, const Type* ty) {
+ /* Convenience path that sidesteps exact-bit literal materialization.
+ * Conforming literal parsing should prefer cg_push_const. */
+ CGTarget* T = g->target;
+ union {
+ double d;
+ float f;
+ u8 b[8];
+ } u;
+ ConstBytes cb;
+ cb.type = ty;
+ cb.size = abi_sizeof(g->abi, ty);
+ cb.align = abi_alignof(g->abi, ty);
+ if (ty && ty->kind == TY_FLOAT) {
+ u.f = (float)v;
+ } else {
+ u.d = v;
+ }
+ cb.bytes = u.b;
+ cg_push_const(g, cb);
+ (void)T;
+}
+
+void cg_push_str(CG* g, Sym str_id, const Type* ty) {
+ /* Place the string bytes in .rodata and push a pointer. v1 unused by
+ * the spine corpus; left as a clean stub. */
+ (void)g;
+ (void)str_id;
+ (void)ty;
+ compiler_panic(g->c, g->cur_loc, "cg_push_str: not implemented in v1 slice");
+}
+
+void cg_push_local(CG* g, FrameSlot s) {
+ /* The slot's type isn't recorded in cg directly — we trust the parser's
+ * declared local type. Spine: local types come back through the parser's
+ * scope record, not through cg, so the push uses NULL type and the
+ * subsequent cg_load supplies the right type. The parser actually pushes
+ * via the type-aware variant; this base entry is here for completeness. */
+ SValue sv;
+ sv.op = op_local(s, NULL);
+ sv.type = NULL;
+ push(g, sv);
+}
+
+/* Type-aware variants used by the parser. Not in the public header; the
+ * parser calls these directly via a small extension below. */
+void cg_push_local_typed(CG* g, FrameSlot s, const Type* ty);
+void cg_push_local_typed(CG* g, FrameSlot s, const Type* ty) {
+ SValue sv;
+ sv.op = op_local(s, ty);
+ sv.type = ty;
+ push(g, sv);
+}
+
+void cg_push_global(CG* g, ObjSymId sym, const Type* ty) {
+ SValue sv;
+ sv.op = op_global(sym, 0, ty);
+ sv.type = ty;
+ push(g, sv);
+}
+
+/* ============================================================
+ * Stack manipulation
+ * ============================================================ */
+
+void cg_dup(CG* g) {
+ SValue v = *top(g);
+ push(g, v);
+}
+
+void cg_swap(CG* g) {
+ SValue a;
+ SValue b;
+ if (g->sp < 2) compiler_panic(g->c, g->cur_loc, "cg_swap: need 2 values");
+ a = g->stack[g->sp - 1];
+ b = g->stack[g->sp - 2];
+ g->stack[g->sp - 1] = b;
+ g->stack[g->sp - 2] = a;
+}
+
+void cg_drop(CG* g) { (void)pop(g); }
+
+/* ============================================================
+ * load / store / addr
+ * ============================================================ */
+
+static int is_lvalue(const Operand* o) {
+ return o->kind == OPK_LOCAL || o->kind == OPK_GLOBAL ||
+ o->kind == OPK_INDIRECT;
+}
+
+void cg_load(CG* g) {
+ SValue v = pop(g);
+ CGTarget* T = g->target;
+ if (!is_lvalue(&v.op)) {
+ /* Already an rvalue — passing-through is correct (cg_load is idempotent
+ * on rvalues so the parser can call it eagerly). */
+ push(g, v);
+ return;
+ }
+ {
+ const Type* ty = v.type ? v.type : v.op.type;
+ Reg r = T->alloc_reg(T, type_class(ty), ty);
+ Operand dst = op_reg(r, ty);
+ MemAccess ma;
+ AliasKind ak = alias_for_lvalue(&v.op);
+ i32 alias_local = (ak == ALIAS_LOCAL) ? (i32)v.op.v.frame_slot : 0;
+ ma = derive_mem(g, ty, ak, alias_local);
+ T->load(T, dst, v.op, ma);
+ {
+ SValue rv;
+ rv.op = dst;
+ rv.type = ty;
+ push(g, rv);
+ }
+ }
+}
+
+void cg_addr(CG* g) {
+ SValue v = pop(g);
+ CGTarget* T = g->target;
+ if (!is_lvalue(&v.op)) {
+ compiler_panic(g->c, g->cur_loc, "cg_addr: operand is not an lvalue");
+ }
+ {
+ const Type* pty = type_ptr(g->pool, v.type ? v.type : v.op.type);
+ Reg r = T->alloc_reg(T, RC_INT, pty);
+ Operand dst = op_reg(r, pty);
+ T->addr_of(T, dst, v.op);
+ {
+ SValue rv;
+ rv.op = dst;
+ rv.type = pty;
+ push(g, rv);
+ }
+ }
+}
+
+void cg_store(CG* g) {
+ /* stack: [..., lv, rv] → [..., rv]
+ *
+ * C semantics: the value of an assignment expression is the value
+ * stored. Leaving rv on top of the stack lets the parser fall through
+ * to the next operator naturally; statement-context callers cg_drop
+ * the leftover. */
+ SValue rv = pop(g);
+ SValue lv = pop(g);
+ CGTarget* T = g->target;
+ if (!is_lvalue(&lv.op)) {
+ compiler_panic(g->c, g->cur_loc, "cg_store: destination is not an lvalue");
+ }
+ {
+ const Type* ty = lv.type ? lv.type : lv.op.type;
+ AliasKind ak = alias_for_lvalue(&lv.op);
+ i32 alias_local = (ak == ALIAS_LOCAL) ? (i32)lv.op.v.frame_slot : 0;
+ MemAccess ma = derive_mem(g, ty, ak, alias_local);
+ /* IMM is a legal source for store; otherwise force into a reg. */
+ Operand src = rv.op;
+ if (src.kind != OPK_REG && src.kind != OPK_IMM) {
+ Reg r = T->alloc_reg(T, type_class(ty), ty);
+ Operand dst = op_reg(r, ty);
+ MemAccess mr;
+ AliasKind sak = alias_for_lvalue(&src);
+ i32 saloc = (sak == ALIAS_LOCAL) ? (i32)src.v.frame_slot : 0;
+ mr = derive_mem(g, rv.type ? rv.type : ty, sak, saloc);
+ T->load(T, dst, src, mr);
+ src = dst;
+ }
+ T->store(T, lv.op, src, ma);
+ {
+ SValue out;
+ out.op = src;
+ out.type = ty;
+ push(g, out);
+ }
+ }
+}
+
+/* ============================================================
+ * Aggregates / bitfields — placeholders
+ * ============================================================ */
+
+void cg_copy_aggregate(CG* g, AggregateAccess a) {
+ (void)a;
+ compiler_panic(g->c, g->cur_loc, "cg_copy_aggregate: not in v1 slice");
+}
+void cg_set_aggregate(CG* g, AggregateAccess a) {
+ (void)a;
+ compiler_panic(g->c, g->cur_loc, "cg_set_aggregate: not in v1 slice");
+}
+void cg_bitfield_load(CG* g, BitFieldAccess b) {
+ (void)b;
+ compiler_panic(g->c, g->cur_loc, "cg_bitfield_load: not in v1 slice");
+}
+void cg_bitfield_store(CG* g, BitFieldAccess b) {
+ (void)b;
+ compiler_panic(g->c, g->cur_loc, "cg_bitfield_store: not in v1 slice");
+}
+
+/* ============================================================
+ * Arithmetic / compare / convert
+ * ============================================================ */
+
+/* Force an SValue into a register operand of the given type. */
+static Operand force_reg(CG* g, SValue v, const Type* ty) {
+ CGTarget* T = g->target;
+ if (v.op.kind == OPK_REG) return v.op;
+ if (v.op.kind == OPK_IMM) {
+ Reg r = T->alloc_reg(T, type_class(ty), ty);
+ Operand dst = op_reg(r, ty);
+ T->load_imm(T, dst, v.op.v.imm);
+ return dst;
+ }
+ if (is_lvalue(&v.op)) {
+ Reg r = T->alloc_reg(T, type_class(ty), ty);
+ Operand dst = op_reg(r, ty);
+ AliasKind ak = alias_for_lvalue(&v.op);
+ i32 al = (ak == ALIAS_LOCAL) ? (i32)v.op.v.frame_slot : 0;
+ MemAccess ma = derive_mem(g, ty, ak, al);
+ T->load(T, dst, v.op, ma);
+ return dst;
+ }
+ compiler_panic(g->c, g->cur_loc, "cg: cannot force operand to register");
+}
+
+void cg_binop(CG* g, BinOp op) {
+ /* stack: [a, b] → [a OP b] */
+ SValue b = pop(g);
+ SValue a = pop(g);
+ CGTarget* T = g->target;
+ /* Result type is `a`'s type at this slice (parser already coerced). */
+ const Type* ty = a.type ? a.type : b.type;
+ Operand ra = force_reg(g, a, ty);
+ Operand rb = force_reg(g, b, ty);
+ Reg rr = T->alloc_reg(T, type_class(ty), ty);
+ Operand dst = op_reg(rr, ty);
+ T->binop(T, op, dst, ra, rb);
+ {
+ SValue sv;
+ sv.op = dst;
+ sv.type = ty;
+ push(g, sv);
+ }
+}
+
+void cg_unop(CG* g, UnOp op) {
+ SValue a = pop(g);
+ CGTarget* T = g->target;
+ const Type* ty = a.type ? a.type : a.op.type;
+ Operand ra = force_reg(g, a, ty);
+ Reg rr = T->alloc_reg(T, type_class(ty), ty);
+ Operand dst = op_reg(rr, ty);
+ T->unop(T, op, dst, ra);
+ {
+ SValue sv;
+ sv.op = dst;
+ sv.type = ty;
+ push(g, sv);
+ }
+}
+
+void cg_cmp(CG* g, CmpOp op) {
+ /* stack: [a, b] → [i32 result 0/1] */
+ SValue b = pop(g);
+ SValue a = pop(g);
+ CGTarget* T = g->target;
+ const Type* opty = a.type ? a.type : b.type;
+ const Type* i32 = type_prim(g->pool, TY_INT);
+ Operand ra = force_reg(g, a, opty);
+ Operand rb = force_reg(g, b, opty);
+ Reg rr = T->alloc_reg(T, RC_INT, i32);
+ Operand dst = op_reg(rr, i32);
+ T->cmp(T, op, dst, ra, rb);
+ {
+ SValue sv;
+ sv.op = dst;
+ sv.type = i32;
+ push(g, sv);
+ }
+}
+
+void cg_inc_dec(CG* g, BinOp op, int post) {
+ /* stack: [lv] → [resultval]. Materialize the in-place update inside cg
+ * because juggling lv + old + new through dup/swap from outside requires
+ * a 3-element rotate the stack API doesn't expose. */
+ CGTarget* T = g->target;
+ SValue lv;
+ const Type* ty;
+ AliasKind ak;
+ i32 alias_local;
+ MemAccess ma;
+ Reg r_old;
+ Reg r_new;
+ Operand o_old;
+ Operand o_new;
+ Operand o_one;
+
+ lv = pop(g);
+ if (!is_lvalue(&lv.op)) {
+ compiler_panic(g->c, g->cur_loc,
+ "cg_inc_dec: target is not an lvalue");
+ }
+ ty = lv.type ? lv.type : lv.op.type;
+ ak = alias_for_lvalue(&lv.op);
+ alias_local = (ak == ALIAS_LOCAL) ? (i32)lv.op.v.frame_slot : 0;
+ ma = derive_mem(g, ty, ak, alias_local);
+
+ /* Load current value into r_old, compute r_new = r_old +/- 1, store back. */
+ r_old = T->alloc_reg(T, type_class(ty), ty);
+ o_old = op_reg(r_old, ty);
+ T->load(T, o_old, lv.op, ma);
+
+ r_new = T->alloc_reg(T, type_class(ty), ty);
+ o_new = op_reg(r_new, ty);
+ o_one = op_imm(1, ty);
+ T->binop(T, op, o_new, o_old, o_one);
+
+ T->store(T, lv.op, o_new, ma);
+
+ {
+ SValue sv;
+ sv.op = post ? o_old : o_new;
+ sv.type = ty;
+ push(g, sv);
+ }
+}
+
+void cg_convert(CG* g, const Type* dst_ty) {
+ SValue v = pop(g);
+ CGTarget* T = g->target;
+ const Type* sty = v.type ? v.type : v.op.type;
+ ConvKind ck;
+ Operand src;
+ Reg rr;
+ Operand dst;
+ /* Trivial: same type. */
+ if (sty == dst_ty) {
+ push(g, v);
+ return;
+ }
+ src = force_reg(g, v, sty);
+ rr = T->alloc_reg(T, type_class(dst_ty), dst_ty);
+ dst = op_reg(rr, dst_ty);
+ /* Pick a ConvKind from src/dst kinds. v1 spine only sees integer↔integer
+ * (sign/zero ext + trunc); float and bitcast follow the same dispatch. */
+ {
+ int s_int = type_is_int(sty);
+ int d_int = type_is_int(dst_ty);
+ int s_flt = sty && (sty->kind == TY_FLOAT || sty->kind == TY_DOUBLE ||
+ sty->kind == TY_LDOUBLE);
+ int d_flt = dst_ty && (dst_ty->kind == TY_FLOAT || dst_ty->kind == TY_DOUBLE ||
+ dst_ty->kind == TY_LDOUBLE);
+ u32 s_sz = sty ? abi_sizeof(g->abi, sty) : 0;
+ u32 d_sz = dst_ty ? abi_sizeof(g->abi, dst_ty) : 0;
+ int s_signed = sty ? abi_type_info(g->abi, sty).signed_ : 0;
+ if (s_int && d_int) {
+ if (d_sz < s_sz) {
+ ck = CV_TRUNC;
+ } else if (d_sz > s_sz) {
+ ck = s_signed ? CV_SEXT : CV_ZEXT;
+ } else {
+ ck = CV_BITCAST;
+ }
+ } else if (s_int && d_flt) {
+ ck = s_signed ? CV_ITOF_S : CV_ITOF_U;
+ } else if (s_flt && d_int) {
+ int d_signed = abi_type_info(g->abi, dst_ty).signed_;
+ ck = d_signed ? CV_FTOI_S : CV_FTOI_U;
+ } else if (s_flt && d_flt) {
+ ck = (d_sz > s_sz) ? CV_FEXT : CV_FTRUNC;
+ } else {
+ ck = CV_BITCAST;
+ }
+ }
+ T->convert(T, ck, dst, src);
+ {
+ SValue sv;
+ sv.op = dst;
+ sv.type = dst_ty;
+ push(g, sv);
+ }
+}
+
+/* ============================================================
+ * Calls / return
+ * ============================================================ */
+
+void cg_call(CG* g, u32 nargs, const Type* fn_type) {
+ /* stack: [..., callee, arg0..argN-1] → [result] (or nothing if void) */
+ CGTarget* T = g->target;
+ const ABIFuncInfo* abi = abi_func_info(g->abi, fn_type);
+ CGABIValue* avs = NULL;
+ CGABIValue ret_v;
+ CGCallDesc desc;
+ Operand callee_op;
+ SValue callee;
+ u32 i;
+
+ if (g->sp < (u32)nargs + 1u) {
+ compiler_panic(g->c, g->cur_loc, "cg_call: stack underflow");
+ }
+ if (nargs) {
+ avs = arena_array(g->c->tu, CGABIValue, nargs);
+ memset(avs, 0, sizeof(CGABIValue) * nargs);
+ }
+
+ /* Pop args in reverse so we can fill avs[i] in declaration order. */
+ for (i = 0; i < nargs; ++i) {
+ u32 idx = nargs - 1u - i;
+ SValue arg = pop(g);
+ const Type* aty = fn_type->fn.params ? fn_type->fn.params[idx] : arg.type;
+ Operand src;
+ /* Materialize into an Operand the backend can route through ABI parts.
+ * For simple scalars REG/IMM is enough; aggregates would force LOCAL. */
+ if (arg.op.kind == OPK_LOCAL || arg.op.kind == OPK_GLOBAL ||
+ arg.op.kind == OPK_INDIRECT) {
+ /* lvalue: backend may need an address (byval/indirect) or a loaded
+ * value. Spine: scalars only — load to register. */
+ Reg r = T->alloc_reg(T, type_class(aty), aty);
+ Operand dst = op_reg(r, aty);
+ AliasKind ak = alias_for_lvalue(&arg.op);
+ i32 al = (ak == ALIAS_LOCAL) ? (i32)arg.op.v.frame_slot : 0;
+ MemAccess ma = derive_mem(g, aty, ak, al);
+ T->load(T, dst, arg.op, ma);
+ src = dst;
+ } else if (arg.op.kind == OPK_IMM) {
+ src = arg.op;
+ } else {
+ src = arg.op;
+ }
+ avs[idx].type = aty;
+ avs[idx].abi = &abi->params[idx];
+ avs[idx].storage = src;
+ avs[idx].parts = NULL;
+ avs[idx].nparts = 0;
+ }
+
+ callee = pop(g);
+ if (callee.op.kind == OPK_GLOBAL) {
+ callee_op = callee.op;
+ } else {
+ /* Indirect call — force into a register if necessary. */
+ callee_op = force_reg(g, callee, fn_type);
+ }
+
+ memset(&desc, 0, sizeof desc);
+ desc.fn_type = fn_type;
+ desc.abi = abi;
+ desc.callee = callee_op;
+ desc.args = avs;
+ desc.nargs = nargs;
+ desc.flags = CG_CALL_NONE;
+ /* Return storage: REG of the right class for scalar returns; struct
+ * returns would set parts/storage differently. */
+ memset(&ret_v, 0, sizeof ret_v);
+ ret_v.type = fn_type->fn.ret;
+ ret_v.abi = &abi->ret;
+ if (ret_v.type && ret_v.type->kind != TY_VOID) {
+ Reg r = T->alloc_reg(T, type_class(ret_v.type), ret_v.type);
+ ret_v.storage = op_reg(r, ret_v.type);
+ }
+ desc.ret = ret_v;
+
+ T->call(T, &desc);
+
+ if (ret_v.type && ret_v.type->kind != TY_VOID) {
+ SValue sv;
+ sv.op = ret_v.storage;
+ sv.type = ret_v.type;
+ push(g, sv);
+ }
+}
+
+void cg_tail_call(CG* g, u32 nargs, const Type* fn_type) {
+ /* Sibling-call form. v1 routes through cg_call with CG_CALL_TAIL. */
+ (void)nargs;
+ (void)fn_type;
+ compiler_panic(g->c, g->cur_loc, "cg_tail_call: not in v1 slice");
+}
+
+void cg_ret(CG* g, int has_value) {
+ CGTarget* T = g->target;
+ const ABIFuncInfo* abi = g->fn_abi;
+ if (!has_value) {
+ T->ret(T, NULL);
+ return;
+ }
+ {
+ SValue v = pop(g);
+ const Type* rty = g->fn_ret_type;
+ Operand ret_op = force_reg(g, v, rty);
+ CGABIValue av;
+ memset(&av, 0, sizeof av);
+ av.type = rty;
+ av.abi = &abi->ret;
+ av.storage = ret_op;
+ T->ret(T, &av);
+ }
+}
+
+/* ============================================================
+ * alloca / variadics / setjmp / atomics — placeholders
+ * ============================================================ */
+
+void cg_alloca(CG* g) {
+ compiler_panic(g->c, g->cur_loc, "cg_alloca: not in v1 slice");
+}
+void cg_va_start_(CG* g) {
+ compiler_panic(g->c, g->cur_loc, "cg_va_start: not in v1 slice");
+}
+void cg_va_arg_(CG* g, const Type* t) {
+ (void)t;
+ compiler_panic(g->c, g->cur_loc, "cg_va_arg: not in v1 slice");
+}
+void cg_va_end_(CG* g) {
+ compiler_panic(g->c, g->cur_loc, "cg_va_end: not in v1 slice");
+}
+void cg_va_copy_(CG* g) {
+ compiler_panic(g->c, g->cur_loc, "cg_va_copy: not in v1 slice");
+}
+void cg_setjmp(CG* g) {
+ compiler_panic(g->c, g->cur_loc, "cg_setjmp: not in v1 slice");
+}
+void cg_longjmp(CG* g) {
+ compiler_panic(g->c, g->cur_loc, "cg_longjmp: not in v1 slice");
+}
+void cg_atomic_load(CG* g, MemOrder o) {
+ (void)o;
+ compiler_panic(g->c, g->cur_loc, "cg_atomic_load: not in v1 slice");
+}
+void cg_atomic_store(CG* g, MemOrder o) {
+ (void)o;
+ compiler_panic(g->c, g->cur_loc, "cg_atomic_store: not in v1 slice");
+}
+void cg_atomic_rmw(CG* g, AtomicOp a, MemOrder o) {
+ (void)a;
+ (void)o;
+ compiler_panic(g->c, g->cur_loc, "cg_atomic_rmw: not in v1 slice");
+}
+void cg_atomic_cas(CG* g, MemOrder s, MemOrder f) {
+ (void)s;
+ (void)f;
+ compiler_panic(g->c, g->cur_loc, "cg_atomic_cas: not in v1 slice");
+}
+void cg_fence(CG* g, MemOrder o) {
+ (void)o;
+ compiler_panic(g->c, g->cur_loc, "cg_fence: not in v1 slice");
+}
+
+/* ============================================================
+ * Control flow — flat labels
+ * ============================================================ */
+
+CGLabel cg_label_new(CG* g) { return (CGLabel)g->target->label_new(g->target); }
+
+void cg_label_place(CG* g, CGLabel l) {
+ g->target->label_place(g->target, (Label)l);
+}
+
+void cg_jump(CG* g, CGLabel l) { g->target->jump(g->target, (Label)l); }
+
+void cg_branch_true(CG* g, CGLabel l) {
+ /* Pop i1 and branch if nonzero. v1 synthesizes cmp_branch(CMP_NE, val, 0). */
+ SValue v = pop(g);
+ CGTarget* T = g->target;
+ const Type* ty = v.type ? v.type : type_prim(g->pool, TY_INT);
+ Operand a = force_reg(g, v, ty);
+ Operand zero = op_imm(0, ty);
+ T->cmp_branch(T, CMP_NE, a, zero, (Label)l);
+}
+
+void cg_branch_false(CG* g, CGLabel l) {
+ SValue v = pop(g);
+ CGTarget* T = g->target;
+ const Type* ty = v.type ? v.type : type_prim(g->pool, TY_INT);
+ /* Constant-fold: branch on a known-zero immediate becomes unconditional;
+ * branch on a known-nonzero immediate becomes a no-op. The aarch64
+ * cmp_branch handles immediates too, but folding here keeps the emitted
+ * code clean and lets `if (1) ...` skip the cmp entirely. */
+ if (v.op.kind == OPK_IMM) {
+ if (v.op.v.imm == 0) {
+ T->jump(T, (Label)l);
+ }
+ return;
+ }
+ {
+ Operand a = force_reg(g, v, ty);
+ Operand zero = op_imm(0, ty);
+ T->cmp_branch(T, CMP_EQ, a, zero, (Label)l);
+ }
+}
+
+/* ============================================================
+ * Structured control flow — passthrough to target
+ * ============================================================ */
+
+CGScope cg_scope_begin(CG* g, CGScopeConfig cfg) {
+ CGScopeDesc d;
+ memset(&d, 0, sizeof d);
+ d.kind = (u8)cfg.kind;
+ d.break_label = (Label)cfg.break_label;
+ d.continue_label = (Label)cfg.continue_label;
+ d.result_type = cfg.result_type;
+ if (cfg.kind == SCOPE_IF) {
+ /* Pop the condition. */
+ SValue v = pop(g);
+ const Type* ty = v.type ? v.type : type_prim(g->pool, TY_INT);
+ d.cond = force_reg(g, v, ty);
+ }
+ return (CGScope)g->target->scope_begin(g->target, &d);
+}
+
+void cg_scope_else(CG* g, CGScope s) {
+ g->target->scope_else(g->target, (CGScope)s);
+}
+
+void cg_scope_end(CG* g, CGScope s) {
+ g->target->scope_end(g->target, (CGScope)s);
+}
+
+void cg_break(CG* g, CGScope s) {
+ g->target->break_to(g->target, (CGScope)s);
+}
+
+void cg_continue(CG* g, CGScope s) {
+ g->target->continue_to(g->target, (CGScope)s);
+}
+
+/* ============================================================
+ * Source location
+ * ============================================================ */
+
+void cg_set_loc(CG* g, SrcLoc loc) {
+ g->cur_loc = loc;
+ if (g->target->set_loc) g->target->set_loc(g->target, loc);
+ if (g->debug) debug_set_pending_loc(g->debug, loc);
+}
+
+/* ============================================================
+ * Inline asm — placeholder
+ * ============================================================ */
+
+void cg_inline_asm(CG* g, const char* tmpl, const AsmConstraint* outs, u32 nout,
+ const AsmConstraint* ins, u32 nin, const Sym* clobbers,
+ u32 nclob) {
+ (void)tmpl;
+ (void)outs;
+ (void)nout;
+ (void)ins;
+ (void)nin;
+ (void)clobbers;
+ (void)nclob;
+ compiler_panic(g->c, g->cur_loc, "cg_inline_asm: not in v1 slice");
+}
diff --git a/src/cg/cg.h b/src/cg/cg.h
@@ -54,6 +54,15 @@ void cg_unop(CG*, UnOp);
void cg_cmp(CG*, CmpOp);
void cg_convert(CG*, const Type* dst); /* picks ConvKind from src/dst */
+/* Increment/decrement an lvalue in place. Pops the lvalue from the value
+ * stack, performs `*lv = *lv +/- 1`, and pushes the result rvalue. With
+ * `post=1` the pushed value is the OLD value (post-inc/dec); with
+ * `post=0` it is the NEW value (pre-inc/dec). `op` is BO_IADD or BO_ISUB.
+ * The integer-1 step is the parser's responsibility for non-integer
+ * types (pointer arithmetic), but the spine slice deals only with
+ * integer locals. */
+void cg_inc_dec(CG*, BinOp op, int post);
+
/* Direct vs indirect: callee on the stack distinguishes itself by
* SValue/operand kind. CG obtains ABIFuncInfo from Compiler.abi, materializes
* CGABIValue argument/return parts, then calls CGTarget.call with a CGCallDesc.
diff --git a/src/decl/decl.c b/src/decl/decl.c
@@ -0,0 +1,187 @@
+/* DeclTable — C declaration semantics above ObjBuilder.
+ *
+ * Maps DeclId → Decl record. Allocates an ObjSymId for any non-typedef,
+ * non-auto/register decl with linkage; the parser may also pre-create a
+ * symbol (forward reference) and pass it in via Decl.obj_sym.
+ *
+ * Identifier *lookup* is not handled here — that lives on the parser's
+ * scope stack so block scopes and shadowing fall out naturally. DeclTable
+ * is just the C-language layer above ObjBuilder: storage class, linkage,
+ * static-locals, tentative defs, and global initializers.
+ *
+ * v1 surface is intentionally minimal: just enough for the spine corpus
+ * (functions; ints; static locals) plus the hooks DESIGN.md §5.3.1
+ * commits to. Tentative-definition coalescing, COMDAT, and aliases are
+ * stubs at the API edge; their full semantics arrive with the multi-TU
+ * corpus. */
+
+#include "decl/decl.h"
+
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/core.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "obj/obj.h"
+
+struct DeclTable {
+ Compiler* c;
+ ObjBuilder* ob;
+ Decl* slots; /* index 0 reserved as DECL_NONE */
+ u32 nslots;
+ u32 cap;
+};
+
+#define DECL_INITIAL_CAP 16u
+
+static void decls_grow(DeclTable* t, u32 want) {
+ Heap* h = t->c->env->heap;
+ u32 cap = t->cap;
+ Decl* nb;
+ if (cap >= want) return;
+ while (cap < want) cap = cap ? cap * 2u : DECL_INITIAL_CAP;
+ nb = (Decl*)h->alloc(h, sizeof(Decl) * cap, _Alignof(Decl));
+ if (t->slots) {
+ memcpy(nb, t->slots, sizeof(Decl) * t->nslots);
+ h->free(h, t->slots, sizeof(Decl) * t->cap);
+ }
+ t->slots = nb;
+ t->cap = cap;
+}
+
+DeclTable* decl_new(Compiler* c, ObjBuilder* ob) {
+ Heap* h = c->env->heap;
+ DeclTable* t =
+ (DeclTable*)h->alloc(h, sizeof(DeclTable), _Alignof(DeclTable));
+ memset(t, 0, sizeof *t);
+ t->c = c;
+ t->ob = ob;
+ decls_grow(t, 1);
+ memset(&t->slots[0], 0, sizeof(Decl));
+ t->nslots = 1;
+ return t;
+}
+
+void decl_free(DeclTable* t) {
+ Heap* h;
+ if (!t) return;
+ h = t->c->env->heap;
+ if (t->slots) h->free(h, t->slots, sizeof(Decl) * t->cap);
+ h->free(h, t, sizeof(*t));
+}
+
+ObjBuilder* decl_obj(DeclTable* t) { return t ? t->ob : NULL; }
+
+DeclId decl_declare(DeclTable* t, const Decl* in) {
+ DeclId id;
+ Decl* slot;
+ decls_grow(t, t->nslots + 1);
+ id = (DeclId)t->nslots++;
+ slot = &t->slots[id];
+ *slot = *in;
+ slot->id = id;
+ /* Mint an ObjSymId for any decl that needs one and lacks one. The parser
+ * may pre-create a symbol for forward references (e.g. a function
+ * called before its definition) and bind it through `obj_sym` here. */
+ if (slot->obj_sym == OBJ_SYM_NONE && slot->name &&
+ slot->storage != DS_TYPEDEF && slot->storage != DS_AUTO &&
+ slot->storage != DS_REGISTER) {
+ SymBind bind = (slot->linkage == DL_EXTERNAL) ? SB_GLOBAL : SB_LOCAL;
+ SymKind k = (slot->type && slot->type->kind == TY_FUNC) ? SK_FUNC : SK_OBJ;
+ slot->obj_sym = obj_symbol_ex(t->ob, slot->name, bind, (SymVis)slot->visibility,
+ k, OBJ_SEC_NONE, 0, 0, 0);
+ }
+ return id;
+}
+
+const Decl* decl_get(const DeclTable* t, DeclId id) {
+ if (!t || id == DECL_NONE || id >= t->nslots) return NULL;
+ return &t->slots[id];
+}
+
+ObjSymId decl_obj_sym(const DeclTable* t, DeclId id) {
+ const Decl* d = decl_get(t, id);
+ return d ? d->obj_sym : OBJ_SYM_NONE;
+}
+
+void decl_define_function(DeclTable* t, DeclId id, ObjSecId text_section_id,
+ u64 value, u64 size) {
+ /* Backends call obj_symbol_define from func_end already; this hook is for
+ * callers that want explicit decl-side definition (e.g. asm-defined
+ * functions where no CGTarget func_end runs). */
+ const Decl* d = decl_get(t, id);
+ if (!d || d->obj_sym == OBJ_SYM_NONE) return;
+ obj_symbol_define(t->ob, d->obj_sym, text_section_id, value, size);
+}
+
+void decl_define_object(DeclTable* t, DeclId id, u64 size, u32 align,
+ const InitItem* init, u32 ninit) {
+ const Decl* d = decl_get(t, id);
+ ObjSecId sec_id;
+ Sym sec_name;
+ int has_nonzero;
+ u32 i;
+ if (!d || d->obj_sym == OBJ_SYM_NONE) return;
+ has_nonzero = 0;
+ for (i = 0; i < ninit; ++i) {
+ if (init[i].kind != INIT_ZERO) {
+ has_nonzero = 1;
+ break;
+ }
+ }
+ if (!has_nonzero) {
+ sec_name = pool_intern_cstr(t->c->global, ".bss");
+ sec_id = obj_section(t->ob, sec_name, SEC_BSS, SF_ALLOC | SF_WRITE,
+ align ? align : 1u);
+ obj_reserve_bss(t->ob, sec_id, (u32)size, align ? align : 1u);
+ obj_symbol_define(t->ob, d->obj_sym, sec_id, 0, size);
+ return;
+ }
+ sec_name = pool_intern_cstr(t->c->global, ".data");
+ sec_id = obj_section(t->ob, sec_name, SEC_DATA, SF_ALLOC | SF_WRITE,
+ align ? align : 1u);
+ {
+ u32 base = obj_pos(t->ob, sec_id);
+ obj_reserve(t->ob, sec_id, size);
+ obj_symbol_define(t->ob, d->obj_sym, sec_id, base, size);
+ for (i = 0; i < ninit; ++i) {
+ const InitItem* it = &init[i];
+ switch (it->kind) {
+ case INIT_BYTES:
+ obj_patch(t->ob, sec_id, base + it->offset, it->v.bytes.bytes,
+ it->v.bytes.size);
+ break;
+ case INIT_FILL: {
+ u32 j;
+ u8 b = it->v.fill.byte;
+ for (j = 0; j < it->size; ++j) {
+ obj_patch(t->ob, sec_id, base + it->offset + j, &b, 1);
+ }
+ break;
+ }
+ case INIT_RELOC:
+ obj_reloc(t->ob, sec_id, base + it->offset, it->v.reloc.kind,
+ it->v.reloc.target, it->v.reloc.addend);
+ break;
+ case INIT_ZERO:
+ default:
+ break;
+ }
+ }
+ }
+}
+
+void decl_define_tentative(DeclTable* t, DeclId id, u64 size, u32 align) {
+ Decl* slot;
+ if (id == DECL_NONE || id >= t->nslots) return;
+ slot = &t->slots[id];
+ slot->flags |= DF_TENTATIVE;
+ decl_define_object(t, id, size, align, NULL, 0);
+}
+
+void decl_define_alias(DeclTable* t, DeclId self, DeclId target) {
+ (void)t;
+ (void)self;
+ (void)target;
+}
diff --git a/src/decl/decl.h b/src/decl/decl.h
@@ -81,6 +81,11 @@ typedef struct InitItem {
DeclTable* decl_new(Compiler*, ObjBuilder*);
void decl_free(DeclTable*);
+/* The ObjBuilder a DeclTable was created against. Useful for the parser
+ * (which receives a DeclTable, not the builder) when it needs to create
+ * sections or look up object-level facts. */
+ObjBuilder* decl_obj(DeclTable*);
+
DeclId decl_declare(DeclTable*, const Decl*);
const Decl* decl_get(const DeclTable*, DeclId);
ObjSymId decl_obj_sym(const DeclTable*, DeclId);
diff --git a/src/parse/parse.c b/src/parse/parse.c
@@ -0,0 +1,1192 @@
+/* C11 recursive-descent parser. No AST; the parser drives DeclTable for C
+ * declaration semantics and CG for executable code in a single pass.
+ *
+ * Module shape (DESIGN §5):
+ * - lex / pp produce a token stream; we keep one token of lookahead.
+ * - The parser maintains its own scope stack (block/file scope) for
+ * identifier resolution. DeclTable owns DeclId/ObjSymId allocation.
+ * - Statements drive CG: cg_func_begin/end, cg_local, cg_set_loc,
+ * cg_label_*, cg_branch_*, cg_jump, cg_ret. Expressions drive CG's
+ * value stack: cg_push_*, cg_load, cg_store, cg_binop, cg_cmp.
+ * - One Tok of lookahead is enough for C11; at decision points we use
+ * the keyword/punctuator directly.
+ *
+ * v1 slice: single-TU; functions returning int; int locals (with comma-
+ * separated initializers); compound, if/else, while, for, return,
+ * expression statements; expressions covering the §6.5 spine
+ * (additive/multiplicative/relational/equality, unary, parens, post/pre
+ * inc-dec, simple assignment + compound assignment). The grammar is
+ * organized so each higher-level production gets its own function — the
+ * full C grammar slots in the same shape, one production at a time. */
+
+#include "parse/parse.h"
+
+#include <stdarg.h>
+#include <string.h>
+
+#include "abi/abi.h"
+#include "arch/arch.h"
+#include "cg/cg.h"
+#include "core/arena.h"
+#include "core/core.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "debug/debug.h"
+#include "decl/decl.h"
+#include "lex/lex.h"
+#include "obj/obj.h"
+#include "pp/pp.h"
+#include "type/type.h"
+
+/* Type-aware push for locals — exposed by cg.c, not in cg.h. */
+extern void cg_push_local_typed(CG*, FrameSlot, const Type*);
+
+/* ============================================================
+ * Keywords
+ * ============================================================
+ * Lex emits TOK_IDENT; the parser bucketizes idents into keywords by
+ * comparing the interned Sym against a fixed table populated at parser
+ * init. The table covers C11 plus a handful of common GCC-style
+ * extensions the runtime headers use. Adding a new keyword is one entry
+ * here plus one parser branch; the lexer never changes. */
+typedef enum CKw {
+ KW_NONE = 0,
+ KW_AUTO,
+ KW_BREAK,
+ KW_CASE,
+ KW_CHAR,
+ KW_CONST,
+ KW_CONTINUE,
+ KW_DEFAULT,
+ KW_DO,
+ KW_DOUBLE,
+ KW_ELSE,
+ KW_ENUM,
+ KW_EXTERN,
+ KW_FLOAT,
+ KW_FOR,
+ KW_GOTO,
+ KW_IF,
+ KW_INLINE,
+ KW_INT,
+ KW_LONG,
+ KW_REGISTER,
+ KW_RESTRICT,
+ KW_RETURN,
+ KW_SHORT,
+ KW_SIGNED,
+ KW_SIZEOF,
+ KW_STATIC,
+ KW_STRUCT,
+ KW_SWITCH,
+ KW_TYPEDEF,
+ KW_UNION,
+ KW_UNSIGNED,
+ KW_VOID,
+ KW_VOLATILE,
+ KW_WHILE,
+ KW_BOOL, /* _Bool */
+ KW_COMPLEX, /* _Complex */
+ KW_IMAGINARY, /* _Imaginary */
+ KW_ALIGNAS, /* _Alignas */
+ KW_ALIGNOF, /* _Alignof */
+ KW_ATOMIC, /* _Atomic */
+ KW_GENERIC, /* _Generic */
+ KW_NORETURN, /* _Noreturn */
+ KW_STATIC_ASSERT, /* _Static_assert */
+ KW_THREAD_LOCAL, /* _Thread_local */
+ KW_COUNT
+} CKw;
+
+static const char* const kw_names[KW_COUNT] = {
+ NULL, "auto", "break", "case", "char",
+ "const", "continue", "default", "do", "double",
+ "else", "enum", "extern", "float", "for",
+ "goto", "if", "inline", "int", "long",
+ "register", "restrict", "return", "short", "signed",
+ "sizeof", "static", "struct", "switch", "typedef",
+ "union", "unsigned", "void", "volatile", "while",
+ "_Bool", "_Complex", "_Imaginary","_Alignas", "_Alignof",
+ "_Atomic", "_Generic", "_Noreturn", "_Static_assert", "_Thread_local",
+};
+
+/* ============================================================
+ * Scope stack
+ * ============================================================
+ * One ScopeEntry per declared identifier; chained in declaration order
+ * within a Scope. Block scopes are pushed/popped around every compound
+ * statement, parameter list, and `for`-init. Lookup walks parent chains. */
+
+typedef enum SymEntryKind {
+ SEK_LOCAL, /* local variable, OPK_LOCAL via FrameSlot */
+ SEK_GLOBAL, /* global var, OPK_GLOBAL via ObjSymId */
+ SEK_FUNC, /* function decl, OPK_GLOBAL via ObjSymId */
+ SEK_TYPEDEF, /* typedef name */
+ SEK_ENUM_CST, /* enumeration constant */
+} SymEntryKind;
+
+typedef struct SymEntry SymEntry;
+struct SymEntry {
+ Sym name;
+ u8 kind; /* SymEntryKind */
+ u8 pad[3];
+ const Type* type;
+ union {
+ FrameSlot slot;
+ ObjSymId sym;
+ i64 enum_value;
+ } v;
+ SymEntry* next;
+};
+
+typedef struct Scope Scope;
+struct Scope {
+ SymEntry* entries; /* LIFO */
+ Scope* parent;
+};
+
+/* ============================================================
+ * Parser context
+ * ============================================================ */
+
+typedef struct Parser {
+ Compiler* c;
+ Pp* pp;
+ DeclTable* decls;
+ CG* cg;
+ Debug* debug;
+ TargetABI* abi;
+ Pool* pool;
+
+ Tok cur; /* one token of lookahead */
+
+ Sym kw_sym[KW_COUNT];
+
+ Scope* scope; /* top of stack; file scope is the root */
+
+ ObjSecId text_sec;
+
+ /* Loop/switch context for break/continue. CGLabel 0 means none. */
+ CGLabel cur_break;
+ CGLabel cur_continue;
+} Parser;
+
+/* ============================================================
+ * Diagnostics
+ * ============================================================ */
+
+static SrcLoc tok_loc(const Tok* t) { return t->loc; }
+
+static _Noreturn void perr(Parser* p, const char* fmt, ...) {
+ va_list ap;
+ SrcLoc loc = tok_loc(&p->cur);
+ va_start(ap, fmt);
+ compiler_panicv(p->c, loc, fmt, ap);
+}
+
+/* ============================================================
+ * Token helpers
+ * ============================================================ */
+
+static void advance(Parser* p) { p->cur = pp_next(p->pp); }
+
+static int is_punct(const Tok* t, u32 punct) {
+ return t->kind == TOK_PUNCT && t->v.punct == punct;
+}
+
+static int is_pp_hash(const Tok* t) { return t->kind == TOK_PP_HASH; }
+
+static int is_kw(const Parser* p, const Tok* t, CKw k) {
+ return t->kind == TOK_IDENT && t->v.ident == p->kw_sym[k];
+}
+
+static CKw ident_kw(const Parser* p, Sym name) {
+ /* Linear scan; KW_COUNT is small. */
+ CKw i;
+ for (i = (CKw)1; i < KW_COUNT; ++i) {
+ if (p->kw_sym[i] == name) return i;
+ }
+ return KW_NONE;
+}
+
+static int accept_punct(Parser* p, u32 punct) {
+ if (is_punct(&p->cur, punct)) {
+ advance(p);
+ return 1;
+ }
+ return 0;
+}
+
+static int accept_kw(Parser* p, CKw k) {
+ if (is_kw(p, &p->cur, k)) {
+ advance(p);
+ return 1;
+ }
+ return 0;
+}
+
+static void expect_punct(Parser* p, u32 punct, const char* what) {
+ if (!accept_punct(p, punct)) {
+ perr(p, "expected %s", what);
+ }
+}
+
+/* expect_kw is wired up but unused at this slice — `void` consumption
+ * goes through accept_kw already. Kept commented as a documentation hook
+ * for the next slice that needs it (e.g. `_Static_assert`).
+ *
+ * static void expect_kw(Parser*, CKw, const char* what); */
+
+/* ============================================================
+ * Scopes
+ * ============================================================ */
+
+static Scope* scope_new(Parser* p, Scope* parent) {
+ Scope* s = arena_new(p->c->tu, Scope);
+ if (!s) perr(p, "out of memory in scope_new");
+ s->entries = NULL;
+ s->parent = parent;
+ return s;
+}
+
+static void scope_push(Parser* p) { p->scope = scope_new(p, p->scope); }
+
+static void scope_pop(Parser* p) {
+ if (p->scope) p->scope = p->scope->parent;
+}
+
+static SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind,
+ const Type* type) {
+ SymEntry* e = arena_new(p->c->tu, SymEntry);
+ if (!e) perr(p, "out of memory in scope_define");
+ memset(e, 0, sizeof *e);
+ e->name = name;
+ e->kind = (u8)kind;
+ e->type = type;
+ e->next = p->scope->entries;
+ p->scope->entries = e;
+ return e;
+}
+
+static SymEntry* scope_lookup(Parser* p, Sym name) {
+ Scope* s;
+ for (s = p->scope; s; s = s->parent) {
+ SymEntry* e;
+ for (e = s->entries; e; e = e->next) {
+ if (e->name == name) return e;
+ }
+ }
+ return NULL;
+}
+
+/* ============================================================
+ * Type helpers
+ * ============================================================ */
+
+static const Type* ty_int(Parser* p) { return type_prim(p->pool, TY_INT); }
+
+/* ============================================================
+ * Literal parsing
+ * ============================================================
+ * v1 spine sees only decimal integer literals without suffixes; this
+ * routine accepts the common 0x/0/decimal forms with optional u/l/ll
+ * suffixes so the §6.5 corpus rows that aren't yet exercised still
+ * land usefully. Final type selection uses int for now and grows when
+ * the §6.4.4.1 corpus catches up. */
+static i64 parse_int_literal(Parser* p, const Tok* t) {
+ size_t len = 0;
+ const char* s = pool_str(p->pool, t->spelling, &len);
+ size_t i = 0;
+ i64 base = 10;
+ i64 acc = 0;
+ if (!s) perr(p, "bad numeric literal");
+ if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
+ base = 16;
+ i = 2;
+ } else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) {
+ base = 2;
+ i = 2;
+ } else if (len >= 1 && s[0] == '0') {
+ base = 8;
+ i = 1;
+ }
+ for (; i < len; ++i) {
+ int c = (unsigned char)s[i];
+ int dv;
+ /* Stop at suffix characters (u/U/l/L). */
+ if (c == 'u' || c == 'U' || c == 'l' || c == 'L') break;
+ if (c >= '0' && c <= '9')
+ dv = c - '0';
+ else if (c >= 'a' && c <= 'f')
+ dv = c - 'a' + 10;
+ else if (c >= 'A' && c <= 'F')
+ dv = c - 'A' + 10;
+ else
+ perr(p, "bad digit in numeric literal");
+ if (dv >= base) perr(p, "digit out of range for base");
+ acc = acc * base + dv;
+ }
+ return acc;
+}
+
+/* ============================================================
+ * Expressions — precedence climbing
+ * ============================================================
+ * Pratt-style climber: each level consumes its operators and recurses
+ * into the next-tighter level. Each function leaves the result on the CG
+ * stack. The grammar follows C11 §6.5 top-down; only the productions
+ * needed by the spine are wired today, with the remaining ones marked
+ * with TODO comments at the call sites where they'll slot in.
+ *
+ * Level (loose → tight):
+ * assignment = `=` `+=` `-=` `*=` `/=` `%=` `&=` `|=` `^=` `<<=` `>>=`
+ * conditional = `? :` (TODO)
+ * logical_or = `||` (TODO)
+ * logical_and = `&&` (TODO)
+ * bit_or = `|`
+ * bit_xor = `^`
+ * bit_and = `&`
+ * equality = `==` `!=`
+ * relational = `<` `<=` `>` `>=`
+ * shift = `<<` `>>`
+ * additive = `+` `-`
+ * multiplicative = `*` `/` `%`
+ * cast = `(type) cast` (TODO)
+ * unary = `+ - ! ~ * & ++ --` `sizeof` (partial)
+ * postfix = `[] () . -> ++ --` (partial)
+ * primary = ident | num | `(` expr `)` | strlit | charlit
+ */
+
+static void parse_expr(Parser* p);
+static void parse_assign_expr(Parser* p);
+static void parse_unary(Parser* p);
+static void parse_postfix(Parser* p);
+
+/* Produce an rvalue on the stack: pop, and if it's an lvalue, load. */
+static void to_rvalue(Parser* p) {
+ cg_load(p->cg);
+ /* cg_load is idempotent on rvalues. */
+ (void)p;
+}
+
+static void parse_primary(Parser* p) {
+ Tok t = p->cur;
+ if (t.kind == TOK_NUM) {
+ i64 v = parse_int_literal(p, &t);
+ advance(p);
+ cg_push_int(p->cg, v, ty_int(p));
+ return;
+ }
+ if (is_punct(&t, '(')) {
+ advance(p);
+ parse_expr(p);
+ expect_punct(p, ')', "')'");
+ return;
+ }
+ if (t.kind == TOK_IDENT) {
+ SymEntry* e;
+ /* Reject keywords used as expressions. */
+ if (ident_kw(p, t.v.ident) != KW_NONE) {
+ perr(p, "unexpected keyword in expression");
+ }
+ e = scope_lookup(p, t.v.ident);
+ if (!e) {
+ size_t nlen = 0;
+ const char* nm = pool_str(p->pool, t.v.ident, &nlen);
+ compiler_panic(p->c, t.loc, "undeclared identifier '%.*s'", (int)nlen,
+ nm ? nm : "?");
+ }
+ advance(p);
+ switch (e->kind) {
+ case SEK_LOCAL:
+ cg_push_local_typed(p->cg, e->v.slot, e->type);
+ return;
+ case SEK_GLOBAL:
+ case SEK_FUNC:
+ cg_push_global(p->cg, e->v.sym, e->type);
+ return;
+ case SEK_ENUM_CST:
+ cg_push_int(p->cg, e->v.enum_value, e->type);
+ return;
+ case SEK_TYPEDEF:
+ default:
+ perr(p, "identifier is not a value");
+ }
+ }
+ if (t.kind == TOK_CHR) {
+ /* Minimal char-literal: take the first decoded byte from the lit table.
+ * Spine doesn't use char literals, so this is best-effort. */
+ const LitInfo* li = pp_lit(p->pp, t.lit);
+ i64 v = 0;
+ (void)li;
+ advance(p);
+ cg_push_int(p->cg, v, ty_int(p));
+ return;
+ }
+ perr(p, "expected expression");
+}
+
+static void parse_postfix(Parser* p) {
+ parse_primary(p);
+ for (;;) {
+ Tok t = p->cur;
+ if (is_punct(&t, P_INC)) {
+ advance(p);
+ cg_inc_dec(p->cg, BO_IADD, /*post=*/1);
+ continue;
+ }
+ if (is_punct(&t, P_DEC)) {
+ advance(p);
+ cg_inc_dec(p->cg, BO_ISUB, /*post=*/1);
+ continue;
+ }
+ if (is_punct(&t, '(') || is_punct(&t, '[') || is_punct(&t, '.') ||
+ is_punct(&t, P_ARROW)) {
+ perr(p, "call/subscript/member access not supported in v1 slice");
+ }
+ break;
+ }
+}
+
+static void parse_unary(Parser* p) {
+ Tok t = p->cur;
+ if (is_punct(&t, '+')) {
+ advance(p);
+ parse_unary(p);
+ to_rvalue(p);
+ return;
+ }
+ if (is_punct(&t, '-')) {
+ advance(p);
+ parse_unary(p);
+ to_rvalue(p);
+ cg_unop(p->cg, UO_NEG);
+ return;
+ }
+ if (is_punct(&t, '!')) {
+ advance(p);
+ parse_unary(p);
+ to_rvalue(p);
+ /* Logical not via cmp == 0. */
+ cg_push_int(p->cg, 0, ty_int(p));
+ cg_cmp(p->cg, CMP_EQ);
+ return;
+ }
+ if (is_punct(&t, '~')) {
+ advance(p);
+ parse_unary(p);
+ to_rvalue(p);
+ cg_unop(p->cg, UO_BNOT);
+ return;
+ }
+ if (is_punct(&t, P_INC) || is_punct(&t, P_DEC)) {
+ BinOp bop = is_punct(&t, P_INC) ? BO_IADD : BO_ISUB;
+ advance(p);
+ parse_unary(p);
+ cg_inc_dec(p->cg, bop, /*post=*/0);
+ return;
+ }
+ parse_postfix(p);
+ /* postfix may have left an lvalue or rvalue. Higher-level callers
+ * issue to_rvalue when they need the value. */
+}
+
+/* Binary operator levels: each takes a `next` pointer to the tighter level
+ * and a list of accepted operators with their codegen mapping. Inlined as
+ * a single function per level to keep the call graph readable. */
+
+static void parse_mul(Parser* p) {
+ parse_unary(p);
+ for (;;) {
+ Tok t = p->cur;
+ BinOp bop;
+ if (is_punct(&t, '*')) {
+ bop = BO_IMUL;
+ } else if (is_punct(&t, '/')) {
+ bop = BO_SDIV;
+ } else if (is_punct(&t, '%')) {
+ bop = BO_SREM;
+ } else {
+ break;
+ }
+ advance(p);
+ to_rvalue(p);
+ parse_unary(p);
+ to_rvalue(p);
+ cg_binop(p->cg, bop);
+ }
+}
+
+static void parse_add(Parser* p) {
+ parse_mul(p);
+ for (;;) {
+ Tok t = p->cur;
+ BinOp bop;
+ if (is_punct(&t, '+')) {
+ bop = BO_IADD;
+ } else if (is_punct(&t, '-')) {
+ bop = BO_ISUB;
+ } else {
+ break;
+ }
+ advance(p);
+ to_rvalue(p);
+ parse_mul(p);
+ to_rvalue(p);
+ cg_binop(p->cg, bop);
+ }
+}
+
+static void parse_shift(Parser* p) {
+ parse_add(p);
+ for (;;) {
+ Tok t = p->cur;
+ BinOp bop;
+ if (is_punct(&t, P_SHL)) {
+ bop = BO_SHL;
+ } else if (is_punct(&t, P_SHR)) {
+ bop = BO_SHR_S;
+ } else {
+ break;
+ }
+ advance(p);
+ to_rvalue(p);
+ parse_add(p);
+ to_rvalue(p);
+ cg_binop(p->cg, bop);
+ }
+}
+
+static void parse_rel(Parser* p) {
+ parse_shift(p);
+ for (;;) {
+ Tok t = p->cur;
+ CmpOp cop;
+ if (is_punct(&t, '<')) {
+ cop = CMP_LT_S;
+ } else if (is_punct(&t, '>')) {
+ cop = CMP_GT_S;
+ } else if (is_punct(&t, P_LE)) {
+ cop = CMP_LE_S;
+ } else if (is_punct(&t, P_GE)) {
+ cop = CMP_GE_S;
+ } else {
+ break;
+ }
+ advance(p);
+ to_rvalue(p);
+ parse_shift(p);
+ to_rvalue(p);
+ cg_cmp(p->cg, cop);
+ }
+}
+
+static void parse_eq(Parser* p) {
+ parse_rel(p);
+ for (;;) {
+ Tok t = p->cur;
+ CmpOp cop;
+ if (is_punct(&t, P_EQ)) {
+ cop = CMP_EQ;
+ } else if (is_punct(&t, P_NE)) {
+ cop = CMP_NE;
+ } else {
+ break;
+ }
+ advance(p);
+ to_rvalue(p);
+ parse_rel(p);
+ to_rvalue(p);
+ cg_cmp(p->cg, cop);
+ }
+}
+
+static void parse_band(Parser* p) {
+ parse_eq(p);
+ while (is_punct(&p->cur, '&')) {
+ advance(p);
+ to_rvalue(p);
+ parse_eq(p);
+ to_rvalue(p);
+ cg_binop(p->cg, BO_AND);
+ }
+}
+
+static void parse_bxor(Parser* p) {
+ parse_band(p);
+ while (is_punct(&p->cur, '^')) {
+ advance(p);
+ to_rvalue(p);
+ parse_band(p);
+ to_rvalue(p);
+ cg_binop(p->cg, BO_XOR);
+ }
+}
+
+static void parse_bor(Parser* p) {
+ parse_bxor(p);
+ while (is_punct(&p->cur, '|')) {
+ advance(p);
+ to_rvalue(p);
+ parse_bxor(p);
+ to_rvalue(p);
+ cg_binop(p->cg, BO_OR);
+ }
+}
+
+/* Logical && / || / ?: are short-circuiting and need labels. The spine
+ * doesn't need them yet (the relevant corpus rows are the §6.5_1[2,3,4]
+ * group); they slot in here when those rows graduate. */
+
+static void parse_assign_expr(Parser* p) {
+ parse_bor(p);
+ /* The LHS is now on the CG stack. If it's an lvalue we may consume it
+ * for assignment; otherwise we keep the rvalue as the final result. */
+ Tok t = p->cur;
+ BinOp compound;
+ int is_simple_assign;
+ if (is_punct(&t, '=')) {
+ is_simple_assign = 1;
+ compound = (BinOp)0;
+ } else if (is_punct(&t, P_ADD_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_IADD;
+ } else if (is_punct(&t, P_SUB_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_ISUB;
+ } else if (is_punct(&t, P_MUL_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_IMUL;
+ } else if (is_punct(&t, P_DIV_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_SDIV;
+ } else if (is_punct(&t, P_MOD_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_SREM;
+ } else if (is_punct(&t, P_AND_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_AND;
+ } else if (is_punct(&t, P_OR_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_OR;
+ } else if (is_punct(&t, P_XOR_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_XOR;
+ } else if (is_punct(&t, P_SHL_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_SHL;
+ } else if (is_punct(&t, P_SHR_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_SHR_S;
+ } else {
+ return;
+ }
+ advance(p);
+ if (is_simple_assign) {
+ /* LHS lvalue is on stack. Parse RHS, store. The result of the
+ * assignment is the assigned value; for the spine we leave the stack
+ * empty after store (statement context), which is correct for
+ * `x = expr;` and for the for-init `i = 1` since the value is
+ * discarded. To support assignment-as-expression, we'd need to
+ * cg_dup the LHS first and re-load after store. */
+ parse_assign_expr(p);
+ to_rvalue(p);
+ cg_store(p->cg);
+ return;
+ }
+ /* Compound: x += y → load x, compute, store. We need to keep the LHS
+ * lvalue and produce a new rvalue. Stack: [lv]. Sequence:
+ * dup [lv, lv]
+ * load [lv, x]
+ * parse RHS, rvalue [lv, x, y]
+ * binop [lv, x_op_y]
+ * store [] */
+ cg_dup(p->cg);
+ cg_load(p->cg);
+ parse_assign_expr(p);
+ to_rvalue(p);
+ cg_binop(p->cg, compound);
+ cg_store(p->cg);
+}
+
+static void parse_expr(Parser* p) {
+ parse_assign_expr(p);
+ while (is_punct(&p->cur, ',')) {
+ advance(p);
+ /* Discard left, evaluate right. */
+ cg_drop(p->cg);
+ parse_assign_expr(p);
+ }
+}
+
+/* ============================================================
+ * Declarations (slice: `int` / `void` only, no struct/union/enum/typedef)
+ * ============================================================ */
+
+typedef struct DeclSpecs {
+ const Type* type;
+ DeclStorage storage;
+ u32 flags; /* DeclFlag */
+} DeclSpecs;
+
+static int parse_decl_specs(Parser* p, DeclSpecs* out) {
+ /* v1: tracks `int`, `void`, `static`, `extern`, plus a couple of common
+ * qualifiers that are ignored at this slice. Returns 0 if no specifier
+ * was consumed (caller treats that as "not a declaration"). */
+ int seen = 0;
+ out->type = NULL;
+ out->storage = DS_AUTO;
+ out->flags = DF_NONE;
+ for (;;) {
+ Tok t = p->cur;
+ if (is_kw(p, &t, KW_INT)) {
+ if (out->type) perr(p, "conflicting type specifiers");
+ out->type = type_prim(p->pool, TY_INT);
+ advance(p);
+ seen = 1;
+ } else if (is_kw(p, &t, KW_VOID)) {
+ if (out->type) perr(p, "conflicting type specifiers");
+ out->type = type_void(p->pool);
+ advance(p);
+ seen = 1;
+ } else if (is_kw(p, &t, KW_STATIC)) {
+ out->storage = DS_STATIC;
+ advance(p);
+ seen = 1;
+ } else if (is_kw(p, &t, KW_EXTERN)) {
+ out->storage = DS_EXTERN;
+ advance(p);
+ seen = 1;
+ } else if (is_kw(p, &t, KW_CONST) || is_kw(p, &t, KW_VOLATILE) ||
+ is_kw(p, &t, KW_RESTRICT) || is_kw(p, &t, KW_INLINE) ||
+ is_kw(p, &t, KW_NORETURN) || is_kw(p, &t, KW_REGISTER) ||
+ is_kw(p, &t, KW_AUTO)) {
+ /* Recognized but currently no-op at this slice. */
+ advance(p);
+ seen = 1;
+ } else {
+ break;
+ }
+ }
+ if (seen && !out->type) {
+ /* `static x;` without type — default to int per pre-C99, but this is
+ * a hard error in C99/C11. Still tolerate at the scaffold level. */
+ out->type = ty_int(p);
+ }
+ return seen;
+}
+
+/* Forward decl for parse_compound_stmt (mutually recursive with statement
+ * dispatch). */
+static void parse_stmt(Parser* p);
+static void parse_compound_stmt(Parser* p);
+
+/* Allocate a frame slot for a local variable of `type` and bind `name`
+ * into the current scope. */
+static FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc) {
+ FrameSlotDesc fsd;
+ FrameSlot s;
+ SymEntry* e;
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = type;
+ fsd.name = name;
+ fsd.loc = loc;
+ fsd.size = abi_sizeof(p->abi, type);
+ fsd.align = abi_alignof(p->abi, type);
+ fsd.kind = FS_LOCAL;
+ fsd.flags = FSF_NONE;
+ s = cg_local(p->cg, &fsd);
+ e = scope_define(p, name, SEK_LOCAL, type);
+ e->v.slot = s;
+ return s;
+}
+
+/* Parse a single init-declarator after the decl-specs have been consumed.
+ * Spine grammar: declarator = IDENT ; init = `=` assign_expr.
+ * Pointer/array/function declarators are TODO — those slot in here as
+ * additional layers around the IDENT. */
+static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
+ SrcLoc loc;
+ Tok name_tok;
+ Sym name;
+ if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
+ perr(p, "expected declarator name");
+ }
+ name_tok = p->cur;
+ loc = tok_loc(&name_tok);
+ name = name_tok.v.ident;
+ advance(p);
+ /* Local declaration only at this slice. */
+ {
+ FrameSlot s = make_local(p, name, specs->type, loc);
+ if (accept_punct(p, '=')) {
+ cg_set_loc(p->cg, loc);
+ cg_push_local_typed(p->cg, s, specs->type);
+ parse_assign_expr(p);
+ to_rvalue(p);
+ cg_store(p->cg);
+ /* cg_store leaves the assigned value on the stack (C semantics);
+ * an init-declarator is statement-context, so drop it. */
+ cg_drop(p->cg);
+ }
+ }
+}
+
+static void parse_local_decl(Parser* p, const DeclSpecs* specs) {
+ parse_init_declarator(p, specs);
+ while (accept_punct(p, ',')) {
+ parse_init_declarator(p, specs);
+ }
+ expect_punct(p, ';', "';' after declaration");
+}
+
+/* ============================================================
+ * Statements
+ * ============================================================ */
+
+static void parse_if_stmt(Parser* p) {
+ CGLabel L_else = cg_label_new(p->cg);
+ CGLabel L_end = cg_label_new(p->cg);
+ expect_punct(p, '(', "'('");
+ parse_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ')', "')'");
+ cg_branch_false(p->cg, L_else);
+ parse_stmt(p);
+ if (accept_kw(p, KW_ELSE)) {
+ cg_jump(p->cg, L_end);
+ cg_label_place(p->cg, L_else);
+ parse_stmt(p);
+ cg_label_place(p->cg, L_end);
+ } else {
+ cg_label_place(p->cg, L_else);
+ }
+}
+
+static void parse_while_stmt(Parser* p) {
+ CGLabel L_top = cg_label_new(p->cg);
+ CGLabel L_end = cg_label_new(p->cg);
+ CGLabel saved_break = p->cur_break;
+ CGLabel saved_continue = p->cur_continue;
+ expect_punct(p, '(', "'('");
+ cg_label_place(p->cg, L_top);
+ parse_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ')', "')'");
+ cg_branch_false(p->cg, L_end);
+ p->cur_break = L_end;
+ p->cur_continue = L_top;
+ parse_stmt(p);
+ p->cur_break = saved_break;
+ p->cur_continue = saved_continue;
+ cg_jump(p->cg, L_top);
+ cg_label_place(p->cg, L_end);
+}
+
+static void parse_for_stmt(Parser* p) {
+ CGLabel L_top = cg_label_new(p->cg);
+ CGLabel L_step = cg_label_new(p->cg);
+ CGLabel L_end = cg_label_new(p->cg);
+ CGLabel saved_break = p->cur_break;
+ CGLabel saved_continue = p->cur_continue;
+
+ /* `for` introduces a block scope spanning init/cond/step/body so that
+ * an init declaration `int i=...` is visible only inside the loop. */
+ scope_push(p);
+ expect_punct(p, '(', "'('");
+
+ /* init: declaration | expr | ; */
+ if (!accept_punct(p, ';')) {
+ DeclSpecs specs;
+ if (parse_decl_specs(p, &specs)) {
+ parse_local_decl(p, &specs); /* consumes the trailing ';' */
+ } else {
+ parse_expr(p);
+ cg_drop(p->cg);
+ expect_punct(p, ';', "';'");
+ }
+ }
+
+ cg_label_place(p->cg, L_top);
+ /* cond: optional. Empty cond means "always true". */
+ if (!is_punct(&p->cur, ';')) {
+ parse_expr(p);
+ to_rvalue(p);
+ cg_branch_false(p->cg, L_end);
+ }
+ expect_punct(p, ';', "';'");
+
+ /* Need to remember the step expression — but since this is single-pass,
+ * we can't replay tokens. Standard trick: emit body before step by
+ * jumping over the step on first entry, then placing the step where
+ * the back-edge arrives. */
+ {
+ CGLabel L_body = cg_label_new(p->cg);
+ cg_jump(p->cg, L_body);
+ cg_label_place(p->cg, L_step);
+ /* step: optional. */
+ if (!is_punct(&p->cur, ')')) {
+ parse_expr(p);
+ cg_drop(p->cg);
+ }
+ cg_jump(p->cg, L_top);
+ expect_punct(p, ')', "')'");
+ cg_label_place(p->cg, L_body);
+
+ p->cur_break = L_end;
+ p->cur_continue = L_step;
+ parse_stmt(p);
+ p->cur_break = saved_break;
+ p->cur_continue = saved_continue;
+
+ cg_jump(p->cg, L_step);
+ cg_label_place(p->cg, L_end);
+ }
+ scope_pop(p);
+}
+
+static void parse_return_stmt(Parser* p) {
+ if (accept_punct(p, ';')) {
+ cg_ret(p->cg, 0);
+ return;
+ }
+ parse_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ';', "';' after return value");
+ cg_ret(p->cg, 1);
+}
+
+static void parse_break_stmt(Parser* p) {
+ if (p->cur_break == 0) perr(p, "'break' outside of loop or switch");
+ cg_jump(p->cg, p->cur_break);
+ expect_punct(p, ';', "';' after break");
+}
+
+static void parse_continue_stmt(Parser* p) {
+ if (p->cur_continue == 0) perr(p, "'continue' outside of loop");
+ cg_jump(p->cg, p->cur_continue);
+ expect_punct(p, ';', "';' after continue");
+}
+
+static void parse_compound_stmt(Parser* p) {
+ expect_punct(p, '{', "'{'");
+ scope_push(p);
+ while (!is_punct(&p->cur, '}') && p->cur.kind != TOK_EOF) {
+ /* Drain stray newlines & pp-hash artifacts. (PP normally consumes
+ * these, but we tolerate them here as a no-op safety net.) */
+ if (p->cur.kind == TOK_NEWLINE || is_pp_hash(&p->cur)) {
+ advance(p);
+ continue;
+ }
+ {
+ DeclSpecs specs;
+ Tok save_tok = p->cur; /* nothing to roll back yet — accept reused below */
+ (void)save_tok;
+ if (parse_decl_specs(p, &specs)) {
+ parse_local_decl(p, &specs);
+ } else {
+ parse_stmt(p);
+ }
+ }
+ }
+ expect_punct(p, '}', "'}'");
+ scope_pop(p);
+}
+
+static void parse_stmt(Parser* p) {
+ cg_set_loc(p->cg, tok_loc(&p->cur));
+ if (is_punct(&p->cur, '{')) {
+ parse_compound_stmt(p);
+ return;
+ }
+ if (is_punct(&p->cur, ';')) {
+ advance(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_IF)) {
+ advance(p);
+ parse_if_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_WHILE)) {
+ advance(p);
+ parse_while_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_FOR)) {
+ advance(p);
+ parse_for_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_RETURN)) {
+ advance(p);
+ parse_return_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_BREAK)) {
+ advance(p);
+ parse_break_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_CONTINUE)) {
+ advance(p);
+ parse_continue_stmt(p);
+ return;
+ }
+ /* Expression statement. */
+ parse_expr(p);
+ cg_drop(p->cg);
+ expect_punct(p, ';', "';' after expression");
+}
+
+/* ============================================================
+ * External (top-level) declarations
+ * ============================================================ */
+
+/* For the spine, the only function shape is `int test_main(void) { ... }`.
+ * We accept `<type> IDENT (` `void` `)` `{` ... `}` and reject anything
+ * fancier. The full §6.7.6 declarator surface (parameters, varargs,
+ * pointer/array returns) lands as the corresponding corpus rows do. */
+static void parse_function_definition(Parser* p, const DeclSpecs* specs,
+ Sym fname, SrcLoc fname_loc) {
+ const Type** ptypes = NULL;
+ u16 nparams = 0;
+ const Type* fn_ty;
+ const ABIFuncInfo* abi;
+ Decl decl_in;
+ DeclId did;
+ ObjSymId fsym;
+ CGFuncDesc fd;
+
+ /* Param list: `void` or empty (and `)`); full list is TODO. */
+ expect_punct(p, '(', "'('");
+ if (accept_kw(p, KW_VOID)) {
+ /* `(void)`: zero params, not variadic. */
+ } else if (!is_punct(&p->cur, ')')) {
+ perr(p, "only `(void)` parameter list is supported in v1 slice");
+ }
+ expect_punct(p, ')', "')'");
+
+ fn_ty = type_func(p->pool, specs->type, ptypes, nparams, 0);
+ abi = abi_func_info(p->abi, fn_ty);
+
+ memset(&decl_in, 0, sizeof decl_in);
+ decl_in.name = fname;
+ decl_in.type = fn_ty;
+ decl_in.loc = fname_loc;
+ decl_in.storage = (specs->storage == DS_STATIC) ? DS_STATIC : DS_EXTERN;
+ decl_in.linkage =
+ (specs->storage == DS_STATIC) ? DL_INTERNAL : DL_EXTERNAL;
+ decl_in.visibility = SV_DEFAULT;
+ did = decl_declare(p->decls, &decl_in);
+ fsym = decl_obj_sym(p->decls, did);
+ /* Promote the symbol's binding for non-static functions. decl_declare
+ * minted it with the right binding; assert here for clarity. */
+
+ /* Bind the function name into file scope so calls resolve. */
+ {
+ SymEntry* e = scope_define(p, fname, SEK_FUNC, fn_ty);
+ e->v.sym = fsym;
+ }
+
+ /* Function body: open a parameter scope, then descend into body. The
+ * spine has no params, so we just open an empty scope. */
+ memset(&fd, 0, sizeof fd);
+ fd.sym = fsym;
+ fd.text_section_id = p->text_sec;
+ fd.group_id = OBJ_GROUP_NONE;
+ fd.fn_type = fn_ty;
+ fd.abi = abi;
+ fd.params = NULL;
+ fd.nparams = 0;
+ fd.loc = fname_loc;
+
+ scope_push(p);
+ cg_set_loc(p->cg, fname_loc);
+ cg_func_begin(p->cg, &fd);
+ parse_compound_stmt(p);
+ /* Implicit fall-through return for `int main` — emit a return-0 if the
+ * function reaches the closing brace without an explicit return. The
+ * codegen always emits a real epilogue at func_end, so this is just a
+ * safety belt against undefined behavior on trailing fall-through.
+ * Spine cases all `return ...;` explicitly, so this is dead code there. */
+ if (specs->type && specs->type->kind != TY_VOID) {
+ cg_push_int(p->cg, 0, specs->type);
+ cg_ret(p->cg, 1);
+ } else {
+ cg_ret(p->cg, 0);
+ }
+ cg_func_end(p->cg);
+ scope_pop(p);
+}
+
+static void parse_external_decl(Parser* p) {
+ DeclSpecs specs;
+ Tok name_tok;
+ Sym name;
+ SrcLoc loc;
+
+ if (!parse_decl_specs(p, &specs)) {
+ perr(p, "expected declaration");
+ }
+
+ /* Parse the declarator. v1 slice: just IDENT — pointer/array layers
+ * are TODO. */
+ if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
+ perr(p, "expected declarator");
+ }
+ name_tok = p->cur;
+ loc = tok_loc(&name_tok);
+ name = name_tok.v.ident;
+ advance(p);
+
+ if (is_punct(&p->cur, '(')) {
+ parse_function_definition(p, &specs, name, loc);
+ return;
+ }
+
+ /* Global object declaration: `int g;` / `int g = 7;` / `int g = ..., h;` */
+ /* v1 slice does not implement global initializers — defer until §6.7.9
+ * cases need them. We just register the decl and reserve BSS. */
+ perr(p, "global object declarations not supported in v1 slice");
+}
+
+static void parse_translation_unit(Parser* p) {
+ while (p->cur.kind != TOK_EOF) {
+ if (p->cur.kind == TOK_NEWLINE || is_pp_hash(&p->cur)) {
+ advance(p);
+ continue;
+ }
+ parse_external_decl(p);
+ }
+}
+
+/* ============================================================
+ * Entry point
+ * ============================================================ */
+
+void parse_c(Compiler* c, Pp* pp, DeclTable* decls, CG* cg, Debug* debug) {
+ Parser p;
+ Sym text_name;
+ CKw i;
+
+ memset(&p, 0, sizeof p);
+ p.c = c;
+ p.pp = pp;
+ p.decls = decls;
+ p.cg = cg;
+ p.debug = debug;
+ p.abi = c->abi;
+ p.pool = c->global;
+
+ /* Intern keyword spellings. The lexer doesn't know about keywords; this
+ * is the canonical bucketization site (DESIGN §5.1 / lex.h §6.4). */
+ for (i = (CKw)1; i < KW_COUNT; ++i) {
+ p.kw_sym[i] = pool_intern_cstr(p.pool, kw_names[i]);
+ }
+
+ /* File scope. */
+ p.scope = scope_new(&p, NULL);
+
+ /* Default text section. -ffunction-sections / explicit attribute(section)
+ * cases will replace this per-function; the spine uses ".text". */
+ text_name = pool_intern_cstr(p.pool, ".text");
+ p.text_sec = obj_section(decl_obj(decls), text_name, SEC_TEXT,
+ SF_ALLOC | SF_EXEC, 4u);
+
+ /* Pull the first token. PP yields preprocessed C tokens; directives
+ * have already been consumed. */
+ p.cur = pp_next(p.pp);
+
+ parse_translation_unit(&p);
+}