kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 19a45576c25b82c868654d936291e1365c4f1ef4
parent 6aa3cb14986ed02faabb6ecb9cd19d697a95566f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 18:37:07 -0700

parse/cg/decl: scaffold C front-end vertical, pass spine corpus

Recursive-descent parser, value-stack codegen, and DeclTable land as
real implementations. Six spine cases (6_5_01..03 + 6_8_01..03) pass
across all four test paths (D/R/E/J); cases_err/6_5_undeclared still
fails with a precise diagnostic.

Module shape per DESIGN.md §5: parser drives DeclTable for C semantics
and CG for executable code, single-pass with one token of lookahead.
Operators are named per C11 §6.5 productions and statements per §6.8 so
each new feature is a localized addition at its named function. cg_store
follows C semantics (leaves assigned value on the stack); a new
cg_inc_dec helper keeps inc/dec inside CG instead of juggling 3-element
rotates from the parser.

Diffstat:
Msrc/api/stubs.c | 33+++++----------------------------
Asrc/cg/cg.c | 948+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/cg/cg.h | 9+++++++++
Asrc/decl/decl.c | 187+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/decl/decl.h | 5+++++
Asrc/parse/parse.c | 1192+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 2346 insertions(+), 28 deletions(-)

diff --git a/src/api/stubs.c b/src/api/stubs.c @@ -37,41 +37,18 @@ static _Noreturn void unimplemented(Compiler* c, const char* what) { /* ============================================================ * Parser - * ============================================================ */ + * ============================================================ + * parse_c lives in src/parse/parse.c. The asm parser is still a stub + * pending its own corpus rows; reaching it from a CFREE_LANG_ASM input + * raises a clean diagnostic. */ -void parse_c(Compiler* c, Pp* p, DeclTable* d, CG* g, Debug* dbg) { - (void)p; - (void)d; - (void)g; - (void)dbg; - unimplemented(c, "parse_c"); -} void parse_asm(Compiler* c, Lexer* l, MCEmitter* m) { (void)l; (void)m; unimplemented(c, "parse_asm"); } -/* ============================================================ - * Declarations - * ============================================================ */ - -DeclTable* decl_new(Compiler* c, ObjBuilder* o) { - (void)o; - unimplemented(c, "decl"); -} -void decl_free(DeclTable* d) { (void)d; } - -/* ============================================================ - * Codegen + arch target - * ============================================================ */ - -CG* cg_new(Compiler* c, CGTarget* t, Debug* d) { - (void)t; - (void)d; - unimplemented(c, "cg"); -} -void cg_free(CG* g) { (void)g; } +/* DeclTable lives in src/decl/decl.c. CG lives in src/cg/cg.c. */ /* mc_new / mc_free live in src/arch/mc.c. * cgtarget_new / cgtarget_finalize / cgtarget_free live in src/arch/<target>.c diff --git a/src/cg/cg.c b/src/cg/cg.c @@ -0,0 +1,948 @@ +/* Single-pass code generator with a TCC-style value stack. + * + * The parser pushes values (lvalues, immediates, register rvalues) and + * issues operations; cg materializes operands and dispatches to CGTarget. + * No AST. At -O0 the wrapped target backend is a real CGTarget; at -O1+ + * opt_cgtarget records the same calls into IR for cross-function passes. + * + * Value stack semantics: + * - SValue.op carries an Operand whose `kind` decides what the value is. + * - OPK_IMM / OPK_REG are rvalues (can be consumed by binop/cmp/store). + * - OPK_LOCAL / OPK_GLOBAL / OPK_INDIRECT are lvalues. cg_load promotes + * them to OPK_REG via target->load + a fresh scratch register. + * + * This is the spine slice — enough for §6.5/§6.8 fixtures: scalar i32 + * locals, integer arithmetic, comparisons, control flow, and return. + * Aggregates, atomics, calls, and the asm/setjmp/intrinsic methods are + * placeholders pending their corpus rows. The interface in cg.h is the + * commitment; this file fills in the slice that's exercised today. */ + +#include "cg/cg.h" + +#include <string.h> + +#include "abi/abi.h" +#include "arch/arch.h" +#include "core/arena.h" +#include "core/core.h" +#include "core/heap.h" +#include "core/pool.h" +#include "debug/debug.h" +#include "obj/obj.h" +#include "type/type.h" + +/* ============================================================ + * Value stack + * ============================================================ */ + +typedef struct SValue { + Operand op; /* IMM/REG (rvalue) or LOCAL/GLOBAL/INDIRECT (lvalue) */ + const Type* type; /* C semantic type of the value (post-promotion) */ +} SValue; + +#define CG_STACK_INITIAL 16u + +struct CG { + Compiler* c; + CGTarget* target; + Debug* debug; + TargetABI* abi; + Pool* pool; + + /* Function scope */ + const CGFuncDesc* fn_desc; + ObjSymId fn_sym; + ObjSecId fn_text_sec; + u32 fn_begin_pos; + const Type* fn_ret_type; + const ABIFuncInfo* fn_abi; + + SrcLoc cur_loc; + + /* Value stack — grown via heap; arena would also work but heap is fine + * since it's freed in cg_free. */ + SValue* stack; + u32 sp; + u32 cap; +}; + +static void stack_grow(CG* g, u32 want) { + Heap* h = g->c->env->heap; + u32 cap = g->cap; + SValue* nb; + if (cap >= want) return; + while (cap < want) cap = cap ? cap * 2u : CG_STACK_INITIAL; + nb = (SValue*)h->alloc(h, sizeof(SValue) * cap, _Alignof(SValue)); + if (g->stack) { + memcpy(nb, g->stack, sizeof(SValue) * g->sp); + h->free(h, g->stack, sizeof(SValue) * g->cap); + } + g->stack = nb; + g->cap = cap; +} + +static void push(CG* g, SValue v) { + stack_grow(g, g->sp + 1); + g->stack[g->sp++] = v; +} + +static SValue pop(CG* g) { + if (g->sp == 0) { + compiler_panic(g->c, g->cur_loc, "cg: stack underflow"); + } + return g->stack[--g->sp]; +} + +static SValue* top(CG* g) { + if (g->sp == 0) { + compiler_panic(g->c, g->cur_loc, "cg: stack empty"); + } + return &g->stack[g->sp - 1]; +} + +/* ============================================================ + * Operand sugar + * ============================================================ */ + +static u8 type_class(const Type* ty) { + if (ty && (ty->kind == TY_FLOAT || ty->kind == TY_DOUBLE || + ty->kind == TY_LDOUBLE)) { + return RC_FP; + } + return RC_INT; +} + +static Operand op_imm(i64 v, const Type* ty) { + Operand o; + memset(&o, 0, sizeof o); + o.kind = OPK_IMM; + o.cls = type_class(ty); + o.type = ty; + o.v.imm = v; + return o; +} + +static Operand op_reg(Reg r, const Type* ty) { + Operand o; + memset(&o, 0, sizeof o); + o.kind = OPK_REG; + o.cls = type_class(ty); + o.type = ty; + o.v.reg = r; + return o; +} + +static Operand op_local(FrameSlot s, const Type* ty) { + Operand o; + memset(&o, 0, sizeof o); + o.kind = OPK_LOCAL; + o.cls = RC_INT; + o.type = ty; + o.v.frame_slot = s; + return o; +} + +static Operand op_global(ObjSymId sym, i64 addend, const Type* ty) { + Operand o; + memset(&o, 0, sizeof o); + o.kind = OPK_GLOBAL; + o.cls = RC_INT; + o.type = ty; + o.v.global.sym = sym; + o.v.global.addend = addend; + return o; +} + +/* ============================================================ + * MemAccess derivation + * ============================================================ */ + +static MemAccess derive_mem(CG* g, const Type* ty, AliasKind alias_kind, + i32 alias_local) { + MemAccess m; + memset(&m, 0, sizeof m); + m.type = ty; + m.size = abi_sizeof(g->abi, ty); + m.align = abi_alignof(g->abi, ty); + m.flags = MF_NONE; + if (ty && (ty->qual & Q_VOLATILE)) m.flags |= MF_VOLATILE; + if (ty && (ty->qual & Q_ATOMIC)) m.flags |= MF_ATOMIC; + m.alias.kind = (u8)alias_kind; + if (alias_kind == ALIAS_LOCAL) { + m.alias.v.local_id = alias_local; + } + return m; +} + +/* Pick an alias root from an lvalue Operand. */ +static AliasKind alias_for_lvalue(const Operand* o) { + switch (o->kind) { + case OPK_LOCAL: + return ALIAS_LOCAL; + case OPK_GLOBAL: + return ALIAS_GLOBAL; + case OPK_INDIRECT: + default: + return ALIAS_UNKNOWN; + } +} + +/* ============================================================ + * Construction + * ============================================================ */ + +CG* cg_new(Compiler* c, CGTarget* t, Debug* d) { + Heap* h = c->env->heap; + CG* g = (CG*)h->alloc(h, sizeof(CG), _Alignof(CG)); + memset(g, 0, sizeof *g); + g->c = c; + g->target = t; + g->debug = d; + g->abi = c->abi; + g->pool = c->global; + /* Wire Debug into the backend so per-instruction emit calls can attribute + * line rows. cg owns this hookup per DESIGN §11. */ + if (t) t->debug = d; + if (t && t->mc) t->mc->debug = d; + return g; +} + +void cg_free(CG* g) { + Heap* h; + if (!g) return; + h = g->c->env->heap; + if (g->stack) h->free(h, g->stack, sizeof(SValue) * g->cap); + h->free(h, g, sizeof *g); +} + +/* ============================================================ + * Function lifecycle + * ============================================================ */ + +void cg_func_begin(CG* g, const CGFuncDesc* fd) { + CGTarget* T = g->target; + g->fn_desc = fd; + g->fn_sym = fd->sym; + g->fn_text_sec = fd->text_section_id; + g->fn_ret_type = fd->fn_type ? fd->fn_type->fn.ret : NULL; + g->fn_abi = fd->abi; + g->sp = 0; + + /* Class-1 DWARF: a new subprogram opens. doc/DWARF.md §3.1 makes this + * the parser's job; we forward through cg as a convenience hook. */ + if (g->debug) { + debug_func_begin(g->debug, fd->sym, DEBUG_TYPE_NONE, fd->loc); + } + + g->fn_begin_pos = T->mc ? T->mc->pos(T->mc) : 0u; + T->func_begin(T, fd); +} + +void cg_func_end(CG* g) { + CGTarget* T = g->target; + T->func_end(T); + if (g->debug && T->mc) { + u32 end_pos = T->mc->pos(T->mc); + debug_func_pc_range(g->debug, g->fn_text_sec, g->fn_begin_pos, end_pos); + debug_func_end(g->debug); + } + g->fn_desc = NULL; +} + +/* ============================================================ + * Locals / parameters + * ============================================================ */ + +FrameSlot cg_local(CG* g, const FrameSlotDesc* d) { + return g->target->frame_slot(g->target, d); +} + +void cg_param(CG* g, const CGParamDesc* d) { g->target->param(g->target, d); } + +void cg_bind_decl(CG* g, DeclId id) { + /* Decl binding is parser territory at this slice; nothing for cg to do. */ + (void)g; + (void)id; +} + +/* ============================================================ + * Pushes + * ============================================================ */ + +void cg_push_int(CG* g, i64 v, const Type* ty) { + SValue sv; + sv.op = op_imm(v, ty); + sv.type = ty; + push(g, sv); +} + +void cg_push_const(CG* g, ConstBytes cb) { + /* Materialize into a fresh register through target->load_const so the + * stack value is plain rvalue REG. The constant pool / immediate-encoding + * choice is the backend's. */ + CGTarget* T = g->target; + Reg r = T->alloc_reg(T, type_class(cb.type), cb.type); + Operand dst = op_reg(r, cb.type); + T->load_const(T, dst, cb); + { + SValue sv; + sv.op = dst; + sv.type = cb.type; + push(g, sv); + } +} + +void cg_push_float(CG* g, double v, const Type* ty) { + /* Convenience path that sidesteps exact-bit literal materialization. + * Conforming literal parsing should prefer cg_push_const. */ + CGTarget* T = g->target; + union { + double d; + float f; + u8 b[8]; + } u; + ConstBytes cb; + cb.type = ty; + cb.size = abi_sizeof(g->abi, ty); + cb.align = abi_alignof(g->abi, ty); + if (ty && ty->kind == TY_FLOAT) { + u.f = (float)v; + } else { + u.d = v; + } + cb.bytes = u.b; + cg_push_const(g, cb); + (void)T; +} + +void cg_push_str(CG* g, Sym str_id, const Type* ty) { + /* Place the string bytes in .rodata and push a pointer. v1 unused by + * the spine corpus; left as a clean stub. */ + (void)g; + (void)str_id; + (void)ty; + compiler_panic(g->c, g->cur_loc, "cg_push_str: not implemented in v1 slice"); +} + +void cg_push_local(CG* g, FrameSlot s) { + /* The slot's type isn't recorded in cg directly — we trust the parser's + * declared local type. Spine: local types come back through the parser's + * scope record, not through cg, so the push uses NULL type and the + * subsequent cg_load supplies the right type. The parser actually pushes + * via the type-aware variant; this base entry is here for completeness. */ + SValue sv; + sv.op = op_local(s, NULL); + sv.type = NULL; + push(g, sv); +} + +/* Type-aware variants used by the parser. Not in the public header; the + * parser calls these directly via a small extension below. */ +void cg_push_local_typed(CG* g, FrameSlot s, const Type* ty); +void cg_push_local_typed(CG* g, FrameSlot s, const Type* ty) { + SValue sv; + sv.op = op_local(s, ty); + sv.type = ty; + push(g, sv); +} + +void cg_push_global(CG* g, ObjSymId sym, const Type* ty) { + SValue sv; + sv.op = op_global(sym, 0, ty); + sv.type = ty; + push(g, sv); +} + +/* ============================================================ + * Stack manipulation + * ============================================================ */ + +void cg_dup(CG* g) { + SValue v = *top(g); + push(g, v); +} + +void cg_swap(CG* g) { + SValue a; + SValue b; + if (g->sp < 2) compiler_panic(g->c, g->cur_loc, "cg_swap: need 2 values"); + a = g->stack[g->sp - 1]; + b = g->stack[g->sp - 2]; + g->stack[g->sp - 1] = b; + g->stack[g->sp - 2] = a; +} + +void cg_drop(CG* g) { (void)pop(g); } + +/* ============================================================ + * load / store / addr + * ============================================================ */ + +static int is_lvalue(const Operand* o) { + return o->kind == OPK_LOCAL || o->kind == OPK_GLOBAL || + o->kind == OPK_INDIRECT; +} + +void cg_load(CG* g) { + SValue v = pop(g); + CGTarget* T = g->target; + if (!is_lvalue(&v.op)) { + /* Already an rvalue — passing-through is correct (cg_load is idempotent + * on rvalues so the parser can call it eagerly). */ + push(g, v); + return; + } + { + const Type* ty = v.type ? v.type : v.op.type; + Reg r = T->alloc_reg(T, type_class(ty), ty); + Operand dst = op_reg(r, ty); + MemAccess ma; + AliasKind ak = alias_for_lvalue(&v.op); + i32 alias_local = (ak == ALIAS_LOCAL) ? (i32)v.op.v.frame_slot : 0; + ma = derive_mem(g, ty, ak, alias_local); + T->load(T, dst, v.op, ma); + { + SValue rv; + rv.op = dst; + rv.type = ty; + push(g, rv); + } + } +} + +void cg_addr(CG* g) { + SValue v = pop(g); + CGTarget* T = g->target; + if (!is_lvalue(&v.op)) { + compiler_panic(g->c, g->cur_loc, "cg_addr: operand is not an lvalue"); + } + { + const Type* pty = type_ptr(g->pool, v.type ? v.type : v.op.type); + Reg r = T->alloc_reg(T, RC_INT, pty); + Operand dst = op_reg(r, pty); + T->addr_of(T, dst, v.op); + { + SValue rv; + rv.op = dst; + rv.type = pty; + push(g, rv); + } + } +} + +void cg_store(CG* g) { + /* stack: [..., lv, rv] → [..., rv] + * + * C semantics: the value of an assignment expression is the value + * stored. Leaving rv on top of the stack lets the parser fall through + * to the next operator naturally; statement-context callers cg_drop + * the leftover. */ + SValue rv = pop(g); + SValue lv = pop(g); + CGTarget* T = g->target; + if (!is_lvalue(&lv.op)) { + compiler_panic(g->c, g->cur_loc, "cg_store: destination is not an lvalue"); + } + { + const Type* ty = lv.type ? lv.type : lv.op.type; + AliasKind ak = alias_for_lvalue(&lv.op); + i32 alias_local = (ak == ALIAS_LOCAL) ? (i32)lv.op.v.frame_slot : 0; + MemAccess ma = derive_mem(g, ty, ak, alias_local); + /* IMM is a legal source for store; otherwise force into a reg. */ + Operand src = rv.op; + if (src.kind != OPK_REG && src.kind != OPK_IMM) { + Reg r = T->alloc_reg(T, type_class(ty), ty); + Operand dst = op_reg(r, ty); + MemAccess mr; + AliasKind sak = alias_for_lvalue(&src); + i32 saloc = (sak == ALIAS_LOCAL) ? (i32)src.v.frame_slot : 0; + mr = derive_mem(g, rv.type ? rv.type : ty, sak, saloc); + T->load(T, dst, src, mr); + src = dst; + } + T->store(T, lv.op, src, ma); + { + SValue out; + out.op = src; + out.type = ty; + push(g, out); + } + } +} + +/* ============================================================ + * Aggregates / bitfields — placeholders + * ============================================================ */ + +void cg_copy_aggregate(CG* g, AggregateAccess a) { + (void)a; + compiler_panic(g->c, g->cur_loc, "cg_copy_aggregate: not in v1 slice"); +} +void cg_set_aggregate(CG* g, AggregateAccess a) { + (void)a; + compiler_panic(g->c, g->cur_loc, "cg_set_aggregate: not in v1 slice"); +} +void cg_bitfield_load(CG* g, BitFieldAccess b) { + (void)b; + compiler_panic(g->c, g->cur_loc, "cg_bitfield_load: not in v1 slice"); +} +void cg_bitfield_store(CG* g, BitFieldAccess b) { + (void)b; + compiler_panic(g->c, g->cur_loc, "cg_bitfield_store: not in v1 slice"); +} + +/* ============================================================ + * Arithmetic / compare / convert + * ============================================================ */ + +/* Force an SValue into a register operand of the given type. */ +static Operand force_reg(CG* g, SValue v, const Type* ty) { + CGTarget* T = g->target; + if (v.op.kind == OPK_REG) return v.op; + if (v.op.kind == OPK_IMM) { + Reg r = T->alloc_reg(T, type_class(ty), ty); + Operand dst = op_reg(r, ty); + T->load_imm(T, dst, v.op.v.imm); + return dst; + } + if (is_lvalue(&v.op)) { + Reg r = T->alloc_reg(T, type_class(ty), ty); + Operand dst = op_reg(r, ty); + AliasKind ak = alias_for_lvalue(&v.op); + i32 al = (ak == ALIAS_LOCAL) ? (i32)v.op.v.frame_slot : 0; + MemAccess ma = derive_mem(g, ty, ak, al); + T->load(T, dst, v.op, ma); + return dst; + } + compiler_panic(g->c, g->cur_loc, "cg: cannot force operand to register"); +} + +void cg_binop(CG* g, BinOp op) { + /* stack: [a, b] → [a OP b] */ + SValue b = pop(g); + SValue a = pop(g); + CGTarget* T = g->target; + /* Result type is `a`'s type at this slice (parser already coerced). */ + const Type* ty = a.type ? a.type : b.type; + Operand ra = force_reg(g, a, ty); + Operand rb = force_reg(g, b, ty); + Reg rr = T->alloc_reg(T, type_class(ty), ty); + Operand dst = op_reg(rr, ty); + T->binop(T, op, dst, ra, rb); + { + SValue sv; + sv.op = dst; + sv.type = ty; + push(g, sv); + } +} + +void cg_unop(CG* g, UnOp op) { + SValue a = pop(g); + CGTarget* T = g->target; + const Type* ty = a.type ? a.type : a.op.type; + Operand ra = force_reg(g, a, ty); + Reg rr = T->alloc_reg(T, type_class(ty), ty); + Operand dst = op_reg(rr, ty); + T->unop(T, op, dst, ra); + { + SValue sv; + sv.op = dst; + sv.type = ty; + push(g, sv); + } +} + +void cg_cmp(CG* g, CmpOp op) { + /* stack: [a, b] → [i32 result 0/1] */ + SValue b = pop(g); + SValue a = pop(g); + CGTarget* T = g->target; + const Type* opty = a.type ? a.type : b.type; + const Type* i32 = type_prim(g->pool, TY_INT); + Operand ra = force_reg(g, a, opty); + Operand rb = force_reg(g, b, opty); + Reg rr = T->alloc_reg(T, RC_INT, i32); + Operand dst = op_reg(rr, i32); + T->cmp(T, op, dst, ra, rb); + { + SValue sv; + sv.op = dst; + sv.type = i32; + push(g, sv); + } +} + +void cg_inc_dec(CG* g, BinOp op, int post) { + /* stack: [lv] → [resultval]. Materialize the in-place update inside cg + * because juggling lv + old + new through dup/swap from outside requires + * a 3-element rotate the stack API doesn't expose. */ + CGTarget* T = g->target; + SValue lv; + const Type* ty; + AliasKind ak; + i32 alias_local; + MemAccess ma; + Reg r_old; + Reg r_new; + Operand o_old; + Operand o_new; + Operand o_one; + + lv = pop(g); + if (!is_lvalue(&lv.op)) { + compiler_panic(g->c, g->cur_loc, + "cg_inc_dec: target is not an lvalue"); + } + ty = lv.type ? lv.type : lv.op.type; + ak = alias_for_lvalue(&lv.op); + alias_local = (ak == ALIAS_LOCAL) ? (i32)lv.op.v.frame_slot : 0; + ma = derive_mem(g, ty, ak, alias_local); + + /* Load current value into r_old, compute r_new = r_old +/- 1, store back. */ + r_old = T->alloc_reg(T, type_class(ty), ty); + o_old = op_reg(r_old, ty); + T->load(T, o_old, lv.op, ma); + + r_new = T->alloc_reg(T, type_class(ty), ty); + o_new = op_reg(r_new, ty); + o_one = op_imm(1, ty); + T->binop(T, op, o_new, o_old, o_one); + + T->store(T, lv.op, o_new, ma); + + { + SValue sv; + sv.op = post ? o_old : o_new; + sv.type = ty; + push(g, sv); + } +} + +void cg_convert(CG* g, const Type* dst_ty) { + SValue v = pop(g); + CGTarget* T = g->target; + const Type* sty = v.type ? v.type : v.op.type; + ConvKind ck; + Operand src; + Reg rr; + Operand dst; + /* Trivial: same type. */ + if (sty == dst_ty) { + push(g, v); + return; + } + src = force_reg(g, v, sty); + rr = T->alloc_reg(T, type_class(dst_ty), dst_ty); + dst = op_reg(rr, dst_ty); + /* Pick a ConvKind from src/dst kinds. v1 spine only sees integer↔integer + * (sign/zero ext + trunc); float and bitcast follow the same dispatch. */ + { + int s_int = type_is_int(sty); + int d_int = type_is_int(dst_ty); + int s_flt = sty && (sty->kind == TY_FLOAT || sty->kind == TY_DOUBLE || + sty->kind == TY_LDOUBLE); + int d_flt = dst_ty && (dst_ty->kind == TY_FLOAT || dst_ty->kind == TY_DOUBLE || + dst_ty->kind == TY_LDOUBLE); + u32 s_sz = sty ? abi_sizeof(g->abi, sty) : 0; + u32 d_sz = dst_ty ? abi_sizeof(g->abi, dst_ty) : 0; + int s_signed = sty ? abi_type_info(g->abi, sty).signed_ : 0; + if (s_int && d_int) { + if (d_sz < s_sz) { + ck = CV_TRUNC; + } else if (d_sz > s_sz) { + ck = s_signed ? CV_SEXT : CV_ZEXT; + } else { + ck = CV_BITCAST; + } + } else if (s_int && d_flt) { + ck = s_signed ? CV_ITOF_S : CV_ITOF_U; + } else if (s_flt && d_int) { + int d_signed = abi_type_info(g->abi, dst_ty).signed_; + ck = d_signed ? CV_FTOI_S : CV_FTOI_U; + } else if (s_flt && d_flt) { + ck = (d_sz > s_sz) ? CV_FEXT : CV_FTRUNC; + } else { + ck = CV_BITCAST; + } + } + T->convert(T, ck, dst, src); + { + SValue sv; + sv.op = dst; + sv.type = dst_ty; + push(g, sv); + } +} + +/* ============================================================ + * Calls / return + * ============================================================ */ + +void cg_call(CG* g, u32 nargs, const Type* fn_type) { + /* stack: [..., callee, arg0..argN-1] → [result] (or nothing if void) */ + CGTarget* T = g->target; + const ABIFuncInfo* abi = abi_func_info(g->abi, fn_type); + CGABIValue* avs = NULL; + CGABIValue ret_v; + CGCallDesc desc; + Operand callee_op; + SValue callee; + u32 i; + + if (g->sp < (u32)nargs + 1u) { + compiler_panic(g->c, g->cur_loc, "cg_call: stack underflow"); + } + if (nargs) { + avs = arena_array(g->c->tu, CGABIValue, nargs); + memset(avs, 0, sizeof(CGABIValue) * nargs); + } + + /* Pop args in reverse so we can fill avs[i] in declaration order. */ + for (i = 0; i < nargs; ++i) { + u32 idx = nargs - 1u - i; + SValue arg = pop(g); + const Type* aty = fn_type->fn.params ? fn_type->fn.params[idx] : arg.type; + Operand src; + /* Materialize into an Operand the backend can route through ABI parts. + * For simple scalars REG/IMM is enough; aggregates would force LOCAL. */ + if (arg.op.kind == OPK_LOCAL || arg.op.kind == OPK_GLOBAL || + arg.op.kind == OPK_INDIRECT) { + /* lvalue: backend may need an address (byval/indirect) or a loaded + * value. Spine: scalars only — load to register. */ + Reg r = T->alloc_reg(T, type_class(aty), aty); + Operand dst = op_reg(r, aty); + AliasKind ak = alias_for_lvalue(&arg.op); + i32 al = (ak == ALIAS_LOCAL) ? (i32)arg.op.v.frame_slot : 0; + MemAccess ma = derive_mem(g, aty, ak, al); + T->load(T, dst, arg.op, ma); + src = dst; + } else if (arg.op.kind == OPK_IMM) { + src = arg.op; + } else { + src = arg.op; + } + avs[idx].type = aty; + avs[idx].abi = &abi->params[idx]; + avs[idx].storage = src; + avs[idx].parts = NULL; + avs[idx].nparts = 0; + } + + callee = pop(g); + if (callee.op.kind == OPK_GLOBAL) { + callee_op = callee.op; + } else { + /* Indirect call — force into a register if necessary. */ + callee_op = force_reg(g, callee, fn_type); + } + + memset(&desc, 0, sizeof desc); + desc.fn_type = fn_type; + desc.abi = abi; + desc.callee = callee_op; + desc.args = avs; + desc.nargs = nargs; + desc.flags = CG_CALL_NONE; + /* Return storage: REG of the right class for scalar returns; struct + * returns would set parts/storage differently. */ + memset(&ret_v, 0, sizeof ret_v); + ret_v.type = fn_type->fn.ret; + ret_v.abi = &abi->ret; + if (ret_v.type && ret_v.type->kind != TY_VOID) { + Reg r = T->alloc_reg(T, type_class(ret_v.type), ret_v.type); + ret_v.storage = op_reg(r, ret_v.type); + } + desc.ret = ret_v; + + T->call(T, &desc); + + if (ret_v.type && ret_v.type->kind != TY_VOID) { + SValue sv; + sv.op = ret_v.storage; + sv.type = ret_v.type; + push(g, sv); + } +} + +void cg_tail_call(CG* g, u32 nargs, const Type* fn_type) { + /* Sibling-call form. v1 routes through cg_call with CG_CALL_TAIL. */ + (void)nargs; + (void)fn_type; + compiler_panic(g->c, g->cur_loc, "cg_tail_call: not in v1 slice"); +} + +void cg_ret(CG* g, int has_value) { + CGTarget* T = g->target; + const ABIFuncInfo* abi = g->fn_abi; + if (!has_value) { + T->ret(T, NULL); + return; + } + { + SValue v = pop(g); + const Type* rty = g->fn_ret_type; + Operand ret_op = force_reg(g, v, rty); + CGABIValue av; + memset(&av, 0, sizeof av); + av.type = rty; + av.abi = &abi->ret; + av.storage = ret_op; + T->ret(T, &av); + } +} + +/* ============================================================ + * alloca / variadics / setjmp / atomics — placeholders + * ============================================================ */ + +void cg_alloca(CG* g) { + compiler_panic(g->c, g->cur_loc, "cg_alloca: not in v1 slice"); +} +void cg_va_start_(CG* g) { + compiler_panic(g->c, g->cur_loc, "cg_va_start: not in v1 slice"); +} +void cg_va_arg_(CG* g, const Type* t) { + (void)t; + compiler_panic(g->c, g->cur_loc, "cg_va_arg: not in v1 slice"); +} +void cg_va_end_(CG* g) { + compiler_panic(g->c, g->cur_loc, "cg_va_end: not in v1 slice"); +} +void cg_va_copy_(CG* g) { + compiler_panic(g->c, g->cur_loc, "cg_va_copy: not in v1 slice"); +} +void cg_setjmp(CG* g) { + compiler_panic(g->c, g->cur_loc, "cg_setjmp: not in v1 slice"); +} +void cg_longjmp(CG* g) { + compiler_panic(g->c, g->cur_loc, "cg_longjmp: not in v1 slice"); +} +void cg_atomic_load(CG* g, MemOrder o) { + (void)o; + compiler_panic(g->c, g->cur_loc, "cg_atomic_load: not in v1 slice"); +} +void cg_atomic_store(CG* g, MemOrder o) { + (void)o; + compiler_panic(g->c, g->cur_loc, "cg_atomic_store: not in v1 slice"); +} +void cg_atomic_rmw(CG* g, AtomicOp a, MemOrder o) { + (void)a; + (void)o; + compiler_panic(g->c, g->cur_loc, "cg_atomic_rmw: not in v1 slice"); +} +void cg_atomic_cas(CG* g, MemOrder s, MemOrder f) { + (void)s; + (void)f; + compiler_panic(g->c, g->cur_loc, "cg_atomic_cas: not in v1 slice"); +} +void cg_fence(CG* g, MemOrder o) { + (void)o; + compiler_panic(g->c, g->cur_loc, "cg_fence: not in v1 slice"); +} + +/* ============================================================ + * Control flow — flat labels + * ============================================================ */ + +CGLabel cg_label_new(CG* g) { return (CGLabel)g->target->label_new(g->target); } + +void cg_label_place(CG* g, CGLabel l) { + g->target->label_place(g->target, (Label)l); +} + +void cg_jump(CG* g, CGLabel l) { g->target->jump(g->target, (Label)l); } + +void cg_branch_true(CG* g, CGLabel l) { + /* Pop i1 and branch if nonzero. v1 synthesizes cmp_branch(CMP_NE, val, 0). */ + SValue v = pop(g); + CGTarget* T = g->target; + const Type* ty = v.type ? v.type : type_prim(g->pool, TY_INT); + Operand a = force_reg(g, v, ty); + Operand zero = op_imm(0, ty); + T->cmp_branch(T, CMP_NE, a, zero, (Label)l); +} + +void cg_branch_false(CG* g, CGLabel l) { + SValue v = pop(g); + CGTarget* T = g->target; + const Type* ty = v.type ? v.type : type_prim(g->pool, TY_INT); + /* Constant-fold: branch on a known-zero immediate becomes unconditional; + * branch on a known-nonzero immediate becomes a no-op. The aarch64 + * cmp_branch handles immediates too, but folding here keeps the emitted + * code clean and lets `if (1) ...` skip the cmp entirely. */ + if (v.op.kind == OPK_IMM) { + if (v.op.v.imm == 0) { + T->jump(T, (Label)l); + } + return; + } + { + Operand a = force_reg(g, v, ty); + Operand zero = op_imm(0, ty); + T->cmp_branch(T, CMP_EQ, a, zero, (Label)l); + } +} + +/* ============================================================ + * Structured control flow — passthrough to target + * ============================================================ */ + +CGScope cg_scope_begin(CG* g, CGScopeConfig cfg) { + CGScopeDesc d; + memset(&d, 0, sizeof d); + d.kind = (u8)cfg.kind; + d.break_label = (Label)cfg.break_label; + d.continue_label = (Label)cfg.continue_label; + d.result_type = cfg.result_type; + if (cfg.kind == SCOPE_IF) { + /* Pop the condition. */ + SValue v = pop(g); + const Type* ty = v.type ? v.type : type_prim(g->pool, TY_INT); + d.cond = force_reg(g, v, ty); + } + return (CGScope)g->target->scope_begin(g->target, &d); +} + +void cg_scope_else(CG* g, CGScope s) { + g->target->scope_else(g->target, (CGScope)s); +} + +void cg_scope_end(CG* g, CGScope s) { + g->target->scope_end(g->target, (CGScope)s); +} + +void cg_break(CG* g, CGScope s) { + g->target->break_to(g->target, (CGScope)s); +} + +void cg_continue(CG* g, CGScope s) { + g->target->continue_to(g->target, (CGScope)s); +} + +/* ============================================================ + * Source location + * ============================================================ */ + +void cg_set_loc(CG* g, SrcLoc loc) { + g->cur_loc = loc; + if (g->target->set_loc) g->target->set_loc(g->target, loc); + if (g->debug) debug_set_pending_loc(g->debug, loc); +} + +/* ============================================================ + * Inline asm — placeholder + * ============================================================ */ + +void cg_inline_asm(CG* g, const char* tmpl, const AsmConstraint* outs, u32 nout, + const AsmConstraint* ins, u32 nin, const Sym* clobbers, + u32 nclob) { + (void)tmpl; + (void)outs; + (void)nout; + (void)ins; + (void)nin; + (void)clobbers; + (void)nclob; + compiler_panic(g->c, g->cur_loc, "cg_inline_asm: not in v1 slice"); +} diff --git a/src/cg/cg.h b/src/cg/cg.h @@ -54,6 +54,15 @@ void cg_unop(CG*, UnOp); void cg_cmp(CG*, CmpOp); void cg_convert(CG*, const Type* dst); /* picks ConvKind from src/dst */ +/* Increment/decrement an lvalue in place. Pops the lvalue from the value + * stack, performs `*lv = *lv +/- 1`, and pushes the result rvalue. With + * `post=1` the pushed value is the OLD value (post-inc/dec); with + * `post=0` it is the NEW value (pre-inc/dec). `op` is BO_IADD or BO_ISUB. + * The integer-1 step is the parser's responsibility for non-integer + * types (pointer arithmetic), but the spine slice deals only with + * integer locals. */ +void cg_inc_dec(CG*, BinOp op, int post); + /* Direct vs indirect: callee on the stack distinguishes itself by * SValue/operand kind. CG obtains ABIFuncInfo from Compiler.abi, materializes * CGABIValue argument/return parts, then calls CGTarget.call with a CGCallDesc. diff --git a/src/decl/decl.c b/src/decl/decl.c @@ -0,0 +1,187 @@ +/* DeclTable — C declaration semantics above ObjBuilder. + * + * Maps DeclId → Decl record. Allocates an ObjSymId for any non-typedef, + * non-auto/register decl with linkage; the parser may also pre-create a + * symbol (forward reference) and pass it in via Decl.obj_sym. + * + * Identifier *lookup* is not handled here — that lives on the parser's + * scope stack so block scopes and shadowing fall out naturally. DeclTable + * is just the C-language layer above ObjBuilder: storage class, linkage, + * static-locals, tentative defs, and global initializers. + * + * v1 surface is intentionally minimal: just enough for the spine corpus + * (functions; ints; static locals) plus the hooks DESIGN.md §5.3.1 + * commits to. Tentative-definition coalescing, COMDAT, and aliases are + * stubs at the API edge; their full semantics arrive with the multi-TU + * corpus. */ + +#include "decl/decl.h" + +#include <string.h> + +#include "core/arena.h" +#include "core/core.h" +#include "core/heap.h" +#include "core/pool.h" +#include "obj/obj.h" + +struct DeclTable { + Compiler* c; + ObjBuilder* ob; + Decl* slots; /* index 0 reserved as DECL_NONE */ + u32 nslots; + u32 cap; +}; + +#define DECL_INITIAL_CAP 16u + +static void decls_grow(DeclTable* t, u32 want) { + Heap* h = t->c->env->heap; + u32 cap = t->cap; + Decl* nb; + if (cap >= want) return; + while (cap < want) cap = cap ? cap * 2u : DECL_INITIAL_CAP; + nb = (Decl*)h->alloc(h, sizeof(Decl) * cap, _Alignof(Decl)); + if (t->slots) { + memcpy(nb, t->slots, sizeof(Decl) * t->nslots); + h->free(h, t->slots, sizeof(Decl) * t->cap); + } + t->slots = nb; + t->cap = cap; +} + +DeclTable* decl_new(Compiler* c, ObjBuilder* ob) { + Heap* h = c->env->heap; + DeclTable* t = + (DeclTable*)h->alloc(h, sizeof(DeclTable), _Alignof(DeclTable)); + memset(t, 0, sizeof *t); + t->c = c; + t->ob = ob; + decls_grow(t, 1); + memset(&t->slots[0], 0, sizeof(Decl)); + t->nslots = 1; + return t; +} + +void decl_free(DeclTable* t) { + Heap* h; + if (!t) return; + h = t->c->env->heap; + if (t->slots) h->free(h, t->slots, sizeof(Decl) * t->cap); + h->free(h, t, sizeof(*t)); +} + +ObjBuilder* decl_obj(DeclTable* t) { return t ? t->ob : NULL; } + +DeclId decl_declare(DeclTable* t, const Decl* in) { + DeclId id; + Decl* slot; + decls_grow(t, t->nslots + 1); + id = (DeclId)t->nslots++; + slot = &t->slots[id]; + *slot = *in; + slot->id = id; + /* Mint an ObjSymId for any decl that needs one and lacks one. The parser + * may pre-create a symbol for forward references (e.g. a function + * called before its definition) and bind it through `obj_sym` here. */ + if (slot->obj_sym == OBJ_SYM_NONE && slot->name && + slot->storage != DS_TYPEDEF && slot->storage != DS_AUTO && + slot->storage != DS_REGISTER) { + SymBind bind = (slot->linkage == DL_EXTERNAL) ? SB_GLOBAL : SB_LOCAL; + SymKind k = (slot->type && slot->type->kind == TY_FUNC) ? SK_FUNC : SK_OBJ; + slot->obj_sym = obj_symbol_ex(t->ob, slot->name, bind, (SymVis)slot->visibility, + k, OBJ_SEC_NONE, 0, 0, 0); + } + return id; +} + +const Decl* decl_get(const DeclTable* t, DeclId id) { + if (!t || id == DECL_NONE || id >= t->nslots) return NULL; + return &t->slots[id]; +} + +ObjSymId decl_obj_sym(const DeclTable* t, DeclId id) { + const Decl* d = decl_get(t, id); + return d ? d->obj_sym : OBJ_SYM_NONE; +} + +void decl_define_function(DeclTable* t, DeclId id, ObjSecId text_section_id, + u64 value, u64 size) { + /* Backends call obj_symbol_define from func_end already; this hook is for + * callers that want explicit decl-side definition (e.g. asm-defined + * functions where no CGTarget func_end runs). */ + const Decl* d = decl_get(t, id); + if (!d || d->obj_sym == OBJ_SYM_NONE) return; + obj_symbol_define(t->ob, d->obj_sym, text_section_id, value, size); +} + +void decl_define_object(DeclTable* t, DeclId id, u64 size, u32 align, + const InitItem* init, u32 ninit) { + const Decl* d = decl_get(t, id); + ObjSecId sec_id; + Sym sec_name; + int has_nonzero; + u32 i; + if (!d || d->obj_sym == OBJ_SYM_NONE) return; + has_nonzero = 0; + for (i = 0; i < ninit; ++i) { + if (init[i].kind != INIT_ZERO) { + has_nonzero = 1; + break; + } + } + if (!has_nonzero) { + sec_name = pool_intern_cstr(t->c->global, ".bss"); + sec_id = obj_section(t->ob, sec_name, SEC_BSS, SF_ALLOC | SF_WRITE, + align ? align : 1u); + obj_reserve_bss(t->ob, sec_id, (u32)size, align ? align : 1u); + obj_symbol_define(t->ob, d->obj_sym, sec_id, 0, size); + return; + } + sec_name = pool_intern_cstr(t->c->global, ".data"); + sec_id = obj_section(t->ob, sec_name, SEC_DATA, SF_ALLOC | SF_WRITE, + align ? align : 1u); + { + u32 base = obj_pos(t->ob, sec_id); + obj_reserve(t->ob, sec_id, size); + obj_symbol_define(t->ob, d->obj_sym, sec_id, base, size); + for (i = 0; i < ninit; ++i) { + const InitItem* it = &init[i]; + switch (it->kind) { + case INIT_BYTES: + obj_patch(t->ob, sec_id, base + it->offset, it->v.bytes.bytes, + it->v.bytes.size); + break; + case INIT_FILL: { + u32 j; + u8 b = it->v.fill.byte; + for (j = 0; j < it->size; ++j) { + obj_patch(t->ob, sec_id, base + it->offset + j, &b, 1); + } + break; + } + case INIT_RELOC: + obj_reloc(t->ob, sec_id, base + it->offset, it->v.reloc.kind, + it->v.reloc.target, it->v.reloc.addend); + break; + case INIT_ZERO: + default: + break; + } + } + } +} + +void decl_define_tentative(DeclTable* t, DeclId id, u64 size, u32 align) { + Decl* slot; + if (id == DECL_NONE || id >= t->nslots) return; + slot = &t->slots[id]; + slot->flags |= DF_TENTATIVE; + decl_define_object(t, id, size, align, NULL, 0); +} + +void decl_define_alias(DeclTable* t, DeclId self, DeclId target) { + (void)t; + (void)self; + (void)target; +} diff --git a/src/decl/decl.h b/src/decl/decl.h @@ -81,6 +81,11 @@ typedef struct InitItem { DeclTable* decl_new(Compiler*, ObjBuilder*); void decl_free(DeclTable*); +/* The ObjBuilder a DeclTable was created against. Useful for the parser + * (which receives a DeclTable, not the builder) when it needs to create + * sections or look up object-level facts. */ +ObjBuilder* decl_obj(DeclTable*); + DeclId decl_declare(DeclTable*, const Decl*); const Decl* decl_get(const DeclTable*, DeclId); ObjSymId decl_obj_sym(const DeclTable*, DeclId); diff --git a/src/parse/parse.c b/src/parse/parse.c @@ -0,0 +1,1192 @@ +/* C11 recursive-descent parser. No AST; the parser drives DeclTable for C + * declaration semantics and CG for executable code in a single pass. + * + * Module shape (DESIGN §5): + * - lex / pp produce a token stream; we keep one token of lookahead. + * - The parser maintains its own scope stack (block/file scope) for + * identifier resolution. DeclTable owns DeclId/ObjSymId allocation. + * - Statements drive CG: cg_func_begin/end, cg_local, cg_set_loc, + * cg_label_*, cg_branch_*, cg_jump, cg_ret. Expressions drive CG's + * value stack: cg_push_*, cg_load, cg_store, cg_binop, cg_cmp. + * - One Tok of lookahead is enough for C11; at decision points we use + * the keyword/punctuator directly. + * + * v1 slice: single-TU; functions returning int; int locals (with comma- + * separated initializers); compound, if/else, while, for, return, + * expression statements; expressions covering the §6.5 spine + * (additive/multiplicative/relational/equality, unary, parens, post/pre + * inc-dec, simple assignment + compound assignment). The grammar is + * organized so each higher-level production gets its own function — the + * full C grammar slots in the same shape, one production at a time. */ + +#include "parse/parse.h" + +#include <stdarg.h> +#include <string.h> + +#include "abi/abi.h" +#include "arch/arch.h" +#include "cg/cg.h" +#include "core/arena.h" +#include "core/core.h" +#include "core/heap.h" +#include "core/pool.h" +#include "debug/debug.h" +#include "decl/decl.h" +#include "lex/lex.h" +#include "obj/obj.h" +#include "pp/pp.h" +#include "type/type.h" + +/* Type-aware push for locals — exposed by cg.c, not in cg.h. */ +extern void cg_push_local_typed(CG*, FrameSlot, const Type*); + +/* ============================================================ + * Keywords + * ============================================================ + * Lex emits TOK_IDENT; the parser bucketizes idents into keywords by + * comparing the interned Sym against a fixed table populated at parser + * init. The table covers C11 plus a handful of common GCC-style + * extensions the runtime headers use. Adding a new keyword is one entry + * here plus one parser branch; the lexer never changes. */ +typedef enum CKw { + KW_NONE = 0, + KW_AUTO, + KW_BREAK, + KW_CASE, + KW_CHAR, + KW_CONST, + KW_CONTINUE, + KW_DEFAULT, + KW_DO, + KW_DOUBLE, + KW_ELSE, + KW_ENUM, + KW_EXTERN, + KW_FLOAT, + KW_FOR, + KW_GOTO, + KW_IF, + KW_INLINE, + KW_INT, + KW_LONG, + KW_REGISTER, + KW_RESTRICT, + KW_RETURN, + KW_SHORT, + KW_SIGNED, + KW_SIZEOF, + KW_STATIC, + KW_STRUCT, + KW_SWITCH, + KW_TYPEDEF, + KW_UNION, + KW_UNSIGNED, + KW_VOID, + KW_VOLATILE, + KW_WHILE, + KW_BOOL, /* _Bool */ + KW_COMPLEX, /* _Complex */ + KW_IMAGINARY, /* _Imaginary */ + KW_ALIGNAS, /* _Alignas */ + KW_ALIGNOF, /* _Alignof */ + KW_ATOMIC, /* _Atomic */ + KW_GENERIC, /* _Generic */ + KW_NORETURN, /* _Noreturn */ + KW_STATIC_ASSERT, /* _Static_assert */ + KW_THREAD_LOCAL, /* _Thread_local */ + KW_COUNT +} CKw; + +static const char* const kw_names[KW_COUNT] = { + NULL, "auto", "break", "case", "char", + "const", "continue", "default", "do", "double", + "else", "enum", "extern", "float", "for", + "goto", "if", "inline", "int", "long", + "register", "restrict", "return", "short", "signed", + "sizeof", "static", "struct", "switch", "typedef", + "union", "unsigned", "void", "volatile", "while", + "_Bool", "_Complex", "_Imaginary","_Alignas", "_Alignof", + "_Atomic", "_Generic", "_Noreturn", "_Static_assert", "_Thread_local", +}; + +/* ============================================================ + * Scope stack + * ============================================================ + * One ScopeEntry per declared identifier; chained in declaration order + * within a Scope. Block scopes are pushed/popped around every compound + * statement, parameter list, and `for`-init. Lookup walks parent chains. */ + +typedef enum SymEntryKind { + SEK_LOCAL, /* local variable, OPK_LOCAL via FrameSlot */ + SEK_GLOBAL, /* global var, OPK_GLOBAL via ObjSymId */ + SEK_FUNC, /* function decl, OPK_GLOBAL via ObjSymId */ + SEK_TYPEDEF, /* typedef name */ + SEK_ENUM_CST, /* enumeration constant */ +} SymEntryKind; + +typedef struct SymEntry SymEntry; +struct SymEntry { + Sym name; + u8 kind; /* SymEntryKind */ + u8 pad[3]; + const Type* type; + union { + FrameSlot slot; + ObjSymId sym; + i64 enum_value; + } v; + SymEntry* next; +}; + +typedef struct Scope Scope; +struct Scope { + SymEntry* entries; /* LIFO */ + Scope* parent; +}; + +/* ============================================================ + * Parser context + * ============================================================ */ + +typedef struct Parser { + Compiler* c; + Pp* pp; + DeclTable* decls; + CG* cg; + Debug* debug; + TargetABI* abi; + Pool* pool; + + Tok cur; /* one token of lookahead */ + + Sym kw_sym[KW_COUNT]; + + Scope* scope; /* top of stack; file scope is the root */ + + ObjSecId text_sec; + + /* Loop/switch context for break/continue. CGLabel 0 means none. */ + CGLabel cur_break; + CGLabel cur_continue; +} Parser; + +/* ============================================================ + * Diagnostics + * ============================================================ */ + +static SrcLoc tok_loc(const Tok* t) { return t->loc; } + +static _Noreturn void perr(Parser* p, const char* fmt, ...) { + va_list ap; + SrcLoc loc = tok_loc(&p->cur); + va_start(ap, fmt); + compiler_panicv(p->c, loc, fmt, ap); +} + +/* ============================================================ + * Token helpers + * ============================================================ */ + +static void advance(Parser* p) { p->cur = pp_next(p->pp); } + +static int is_punct(const Tok* t, u32 punct) { + return t->kind == TOK_PUNCT && t->v.punct == punct; +} + +static int is_pp_hash(const Tok* t) { return t->kind == TOK_PP_HASH; } + +static int is_kw(const Parser* p, const Tok* t, CKw k) { + return t->kind == TOK_IDENT && t->v.ident == p->kw_sym[k]; +} + +static CKw ident_kw(const Parser* p, Sym name) { + /* Linear scan; KW_COUNT is small. */ + CKw i; + for (i = (CKw)1; i < KW_COUNT; ++i) { + if (p->kw_sym[i] == name) return i; + } + return KW_NONE; +} + +static int accept_punct(Parser* p, u32 punct) { + if (is_punct(&p->cur, punct)) { + advance(p); + return 1; + } + return 0; +} + +static int accept_kw(Parser* p, CKw k) { + if (is_kw(p, &p->cur, k)) { + advance(p); + return 1; + } + return 0; +} + +static void expect_punct(Parser* p, u32 punct, const char* what) { + if (!accept_punct(p, punct)) { + perr(p, "expected %s", what); + } +} + +/* expect_kw is wired up but unused at this slice — `void` consumption + * goes through accept_kw already. Kept commented as a documentation hook + * for the next slice that needs it (e.g. `_Static_assert`). + * + * static void expect_kw(Parser*, CKw, const char* what); */ + +/* ============================================================ + * Scopes + * ============================================================ */ + +static Scope* scope_new(Parser* p, Scope* parent) { + Scope* s = arena_new(p->c->tu, Scope); + if (!s) perr(p, "out of memory in scope_new"); + s->entries = NULL; + s->parent = parent; + return s; +} + +static void scope_push(Parser* p) { p->scope = scope_new(p, p->scope); } + +static void scope_pop(Parser* p) { + if (p->scope) p->scope = p->scope->parent; +} + +static SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind, + const Type* type) { + SymEntry* e = arena_new(p->c->tu, SymEntry); + if (!e) perr(p, "out of memory in scope_define"); + memset(e, 0, sizeof *e); + e->name = name; + e->kind = (u8)kind; + e->type = type; + e->next = p->scope->entries; + p->scope->entries = e; + return e; +} + +static SymEntry* scope_lookup(Parser* p, Sym name) { + Scope* s; + for (s = p->scope; s; s = s->parent) { + SymEntry* e; + for (e = s->entries; e; e = e->next) { + if (e->name == name) return e; + } + } + return NULL; +} + +/* ============================================================ + * Type helpers + * ============================================================ */ + +static const Type* ty_int(Parser* p) { return type_prim(p->pool, TY_INT); } + +/* ============================================================ + * Literal parsing + * ============================================================ + * v1 spine sees only decimal integer literals without suffixes; this + * routine accepts the common 0x/0/decimal forms with optional u/l/ll + * suffixes so the §6.5 corpus rows that aren't yet exercised still + * land usefully. Final type selection uses int for now and grows when + * the §6.4.4.1 corpus catches up. */ +static i64 parse_int_literal(Parser* p, const Tok* t) { + size_t len = 0; + const char* s = pool_str(p->pool, t->spelling, &len); + size_t i = 0; + i64 base = 10; + i64 acc = 0; + if (!s) perr(p, "bad numeric literal"); + if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + base = 16; + i = 2; + } else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) { + base = 2; + i = 2; + } else if (len >= 1 && s[0] == '0') { + base = 8; + i = 1; + } + for (; i < len; ++i) { + int c = (unsigned char)s[i]; + int dv; + /* Stop at suffix characters (u/U/l/L). */ + if (c == 'u' || c == 'U' || c == 'l' || c == 'L') break; + if (c >= '0' && c <= '9') + dv = c - '0'; + else if (c >= 'a' && c <= 'f') + dv = c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + dv = c - 'A' + 10; + else + perr(p, "bad digit in numeric literal"); + if (dv >= base) perr(p, "digit out of range for base"); + acc = acc * base + dv; + } + return acc; +} + +/* ============================================================ + * Expressions — precedence climbing + * ============================================================ + * Pratt-style climber: each level consumes its operators and recurses + * into the next-tighter level. Each function leaves the result on the CG + * stack. The grammar follows C11 §6.5 top-down; only the productions + * needed by the spine are wired today, with the remaining ones marked + * with TODO comments at the call sites where they'll slot in. + * + * Level (loose → tight): + * assignment = `=` `+=` `-=` `*=` `/=` `%=` `&=` `|=` `^=` `<<=` `>>=` + * conditional = `? :` (TODO) + * logical_or = `||` (TODO) + * logical_and = `&&` (TODO) + * bit_or = `|` + * bit_xor = `^` + * bit_and = `&` + * equality = `==` `!=` + * relational = `<` `<=` `>` `>=` + * shift = `<<` `>>` + * additive = `+` `-` + * multiplicative = `*` `/` `%` + * cast = `(type) cast` (TODO) + * unary = `+ - ! ~ * & ++ --` `sizeof` (partial) + * postfix = `[] () . -> ++ --` (partial) + * primary = ident | num | `(` expr `)` | strlit | charlit + */ + +static void parse_expr(Parser* p); +static void parse_assign_expr(Parser* p); +static void parse_unary(Parser* p); +static void parse_postfix(Parser* p); + +/* Produce an rvalue on the stack: pop, and if it's an lvalue, load. */ +static void to_rvalue(Parser* p) { + cg_load(p->cg); + /* cg_load is idempotent on rvalues. */ + (void)p; +} + +static void parse_primary(Parser* p) { + Tok t = p->cur; + if (t.kind == TOK_NUM) { + i64 v = parse_int_literal(p, &t); + advance(p); + cg_push_int(p->cg, v, ty_int(p)); + return; + } + if (is_punct(&t, '(')) { + advance(p); + parse_expr(p); + expect_punct(p, ')', "')'"); + return; + } + if (t.kind == TOK_IDENT) { + SymEntry* e; + /* Reject keywords used as expressions. */ + if (ident_kw(p, t.v.ident) != KW_NONE) { + perr(p, "unexpected keyword in expression"); + } + e = scope_lookup(p, t.v.ident); + if (!e) { + size_t nlen = 0; + const char* nm = pool_str(p->pool, t.v.ident, &nlen); + compiler_panic(p->c, t.loc, "undeclared identifier '%.*s'", (int)nlen, + nm ? nm : "?"); + } + advance(p); + switch (e->kind) { + case SEK_LOCAL: + cg_push_local_typed(p->cg, e->v.slot, e->type); + return; + case SEK_GLOBAL: + case SEK_FUNC: + cg_push_global(p->cg, e->v.sym, e->type); + return; + case SEK_ENUM_CST: + cg_push_int(p->cg, e->v.enum_value, e->type); + return; + case SEK_TYPEDEF: + default: + perr(p, "identifier is not a value"); + } + } + if (t.kind == TOK_CHR) { + /* Minimal char-literal: take the first decoded byte from the lit table. + * Spine doesn't use char literals, so this is best-effort. */ + const LitInfo* li = pp_lit(p->pp, t.lit); + i64 v = 0; + (void)li; + advance(p); + cg_push_int(p->cg, v, ty_int(p)); + return; + } + perr(p, "expected expression"); +} + +static void parse_postfix(Parser* p) { + parse_primary(p); + for (;;) { + Tok t = p->cur; + if (is_punct(&t, P_INC)) { + advance(p); + cg_inc_dec(p->cg, BO_IADD, /*post=*/1); + continue; + } + if (is_punct(&t, P_DEC)) { + advance(p); + cg_inc_dec(p->cg, BO_ISUB, /*post=*/1); + continue; + } + if (is_punct(&t, '(') || is_punct(&t, '[') || is_punct(&t, '.') || + is_punct(&t, P_ARROW)) { + perr(p, "call/subscript/member access not supported in v1 slice"); + } + break; + } +} + +static void parse_unary(Parser* p) { + Tok t = p->cur; + if (is_punct(&t, '+')) { + advance(p); + parse_unary(p); + to_rvalue(p); + return; + } + if (is_punct(&t, '-')) { + advance(p); + parse_unary(p); + to_rvalue(p); + cg_unop(p->cg, UO_NEG); + return; + } + if (is_punct(&t, '!')) { + advance(p); + parse_unary(p); + to_rvalue(p); + /* Logical not via cmp == 0. */ + cg_push_int(p->cg, 0, ty_int(p)); + cg_cmp(p->cg, CMP_EQ); + return; + } + if (is_punct(&t, '~')) { + advance(p); + parse_unary(p); + to_rvalue(p); + cg_unop(p->cg, UO_BNOT); + return; + } + if (is_punct(&t, P_INC) || is_punct(&t, P_DEC)) { + BinOp bop = is_punct(&t, P_INC) ? BO_IADD : BO_ISUB; + advance(p); + parse_unary(p); + cg_inc_dec(p->cg, bop, /*post=*/0); + return; + } + parse_postfix(p); + /* postfix may have left an lvalue or rvalue. Higher-level callers + * issue to_rvalue when they need the value. */ +} + +/* Binary operator levels: each takes a `next` pointer to the tighter level + * and a list of accepted operators with their codegen mapping. Inlined as + * a single function per level to keep the call graph readable. */ + +static void parse_mul(Parser* p) { + parse_unary(p); + for (;;) { + Tok t = p->cur; + BinOp bop; + if (is_punct(&t, '*')) { + bop = BO_IMUL; + } else if (is_punct(&t, '/')) { + bop = BO_SDIV; + } else if (is_punct(&t, '%')) { + bop = BO_SREM; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_unary(p); + to_rvalue(p); + cg_binop(p->cg, bop); + } +} + +static void parse_add(Parser* p) { + parse_mul(p); + for (;;) { + Tok t = p->cur; + BinOp bop; + if (is_punct(&t, '+')) { + bop = BO_IADD; + } else if (is_punct(&t, '-')) { + bop = BO_ISUB; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_mul(p); + to_rvalue(p); + cg_binop(p->cg, bop); + } +} + +static void parse_shift(Parser* p) { + parse_add(p); + for (;;) { + Tok t = p->cur; + BinOp bop; + if (is_punct(&t, P_SHL)) { + bop = BO_SHL; + } else if (is_punct(&t, P_SHR)) { + bop = BO_SHR_S; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_add(p); + to_rvalue(p); + cg_binop(p->cg, bop); + } +} + +static void parse_rel(Parser* p) { + parse_shift(p); + for (;;) { + Tok t = p->cur; + CmpOp cop; + if (is_punct(&t, '<')) { + cop = CMP_LT_S; + } else if (is_punct(&t, '>')) { + cop = CMP_GT_S; + } else if (is_punct(&t, P_LE)) { + cop = CMP_LE_S; + } else if (is_punct(&t, P_GE)) { + cop = CMP_GE_S; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_shift(p); + to_rvalue(p); + cg_cmp(p->cg, cop); + } +} + +static void parse_eq(Parser* p) { + parse_rel(p); + for (;;) { + Tok t = p->cur; + CmpOp cop; + if (is_punct(&t, P_EQ)) { + cop = CMP_EQ; + } else if (is_punct(&t, P_NE)) { + cop = CMP_NE; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_rel(p); + to_rvalue(p); + cg_cmp(p->cg, cop); + } +} + +static void parse_band(Parser* p) { + parse_eq(p); + while (is_punct(&p->cur, '&')) { + advance(p); + to_rvalue(p); + parse_eq(p); + to_rvalue(p); + cg_binop(p->cg, BO_AND); + } +} + +static void parse_bxor(Parser* p) { + parse_band(p); + while (is_punct(&p->cur, '^')) { + advance(p); + to_rvalue(p); + parse_band(p); + to_rvalue(p); + cg_binop(p->cg, BO_XOR); + } +} + +static void parse_bor(Parser* p) { + parse_bxor(p); + while (is_punct(&p->cur, '|')) { + advance(p); + to_rvalue(p); + parse_bxor(p); + to_rvalue(p); + cg_binop(p->cg, BO_OR); + } +} + +/* Logical && / || / ?: are short-circuiting and need labels. The spine + * doesn't need them yet (the relevant corpus rows are the §6.5_1[2,3,4] + * group); they slot in here when those rows graduate. */ + +static void parse_assign_expr(Parser* p) { + parse_bor(p); + /* The LHS is now on the CG stack. If it's an lvalue we may consume it + * for assignment; otherwise we keep the rvalue as the final result. */ + Tok t = p->cur; + BinOp compound; + int is_simple_assign; + if (is_punct(&t, '=')) { + is_simple_assign = 1; + compound = (BinOp)0; + } else if (is_punct(&t, P_ADD_ASSIGN)) { + is_simple_assign = 0; compound = BO_IADD; + } else if (is_punct(&t, P_SUB_ASSIGN)) { + is_simple_assign = 0; compound = BO_ISUB; + } else if (is_punct(&t, P_MUL_ASSIGN)) { + is_simple_assign = 0; compound = BO_IMUL; + } else if (is_punct(&t, P_DIV_ASSIGN)) { + is_simple_assign = 0; compound = BO_SDIV; + } else if (is_punct(&t, P_MOD_ASSIGN)) { + is_simple_assign = 0; compound = BO_SREM; + } else if (is_punct(&t, P_AND_ASSIGN)) { + is_simple_assign = 0; compound = BO_AND; + } else if (is_punct(&t, P_OR_ASSIGN)) { + is_simple_assign = 0; compound = BO_OR; + } else if (is_punct(&t, P_XOR_ASSIGN)) { + is_simple_assign = 0; compound = BO_XOR; + } else if (is_punct(&t, P_SHL_ASSIGN)) { + is_simple_assign = 0; compound = BO_SHL; + } else if (is_punct(&t, P_SHR_ASSIGN)) { + is_simple_assign = 0; compound = BO_SHR_S; + } else { + return; + } + advance(p); + if (is_simple_assign) { + /* LHS lvalue is on stack. Parse RHS, store. The result of the + * assignment is the assigned value; for the spine we leave the stack + * empty after store (statement context), which is correct for + * `x = expr;` and for the for-init `i = 1` since the value is + * discarded. To support assignment-as-expression, we'd need to + * cg_dup the LHS first and re-load after store. */ + parse_assign_expr(p); + to_rvalue(p); + cg_store(p->cg); + return; + } + /* Compound: x += y → load x, compute, store. We need to keep the LHS + * lvalue and produce a new rvalue. Stack: [lv]. Sequence: + * dup [lv, lv] + * load [lv, x] + * parse RHS, rvalue [lv, x, y] + * binop [lv, x_op_y] + * store [] */ + cg_dup(p->cg); + cg_load(p->cg); + parse_assign_expr(p); + to_rvalue(p); + cg_binop(p->cg, compound); + cg_store(p->cg); +} + +static void parse_expr(Parser* p) { + parse_assign_expr(p); + while (is_punct(&p->cur, ',')) { + advance(p); + /* Discard left, evaluate right. */ + cg_drop(p->cg); + parse_assign_expr(p); + } +} + +/* ============================================================ + * Declarations (slice: `int` / `void` only, no struct/union/enum/typedef) + * ============================================================ */ + +typedef struct DeclSpecs { + const Type* type; + DeclStorage storage; + u32 flags; /* DeclFlag */ +} DeclSpecs; + +static int parse_decl_specs(Parser* p, DeclSpecs* out) { + /* v1: tracks `int`, `void`, `static`, `extern`, plus a couple of common + * qualifiers that are ignored at this slice. Returns 0 if no specifier + * was consumed (caller treats that as "not a declaration"). */ + int seen = 0; + out->type = NULL; + out->storage = DS_AUTO; + out->flags = DF_NONE; + for (;;) { + Tok t = p->cur; + if (is_kw(p, &t, KW_INT)) { + if (out->type) perr(p, "conflicting type specifiers"); + out->type = type_prim(p->pool, TY_INT); + advance(p); + seen = 1; + } else if (is_kw(p, &t, KW_VOID)) { + if (out->type) perr(p, "conflicting type specifiers"); + out->type = type_void(p->pool); + advance(p); + seen = 1; + } else if (is_kw(p, &t, KW_STATIC)) { + out->storage = DS_STATIC; + advance(p); + seen = 1; + } else if (is_kw(p, &t, KW_EXTERN)) { + out->storage = DS_EXTERN; + advance(p); + seen = 1; + } else if (is_kw(p, &t, KW_CONST) || is_kw(p, &t, KW_VOLATILE) || + is_kw(p, &t, KW_RESTRICT) || is_kw(p, &t, KW_INLINE) || + is_kw(p, &t, KW_NORETURN) || is_kw(p, &t, KW_REGISTER) || + is_kw(p, &t, KW_AUTO)) { + /* Recognized but currently no-op at this slice. */ + advance(p); + seen = 1; + } else { + break; + } + } + if (seen && !out->type) { + /* `static x;` without type — default to int per pre-C99, but this is + * a hard error in C99/C11. Still tolerate at the scaffold level. */ + out->type = ty_int(p); + } + return seen; +} + +/* Forward decl for parse_compound_stmt (mutually recursive with statement + * dispatch). */ +static void parse_stmt(Parser* p); +static void parse_compound_stmt(Parser* p); + +/* Allocate a frame slot for a local variable of `type` and bind `name` + * into the current scope. */ +static FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc) { + FrameSlotDesc fsd; + FrameSlot s; + SymEntry* e; + memset(&fsd, 0, sizeof fsd); + fsd.type = type; + fsd.name = name; + fsd.loc = loc; + fsd.size = abi_sizeof(p->abi, type); + fsd.align = abi_alignof(p->abi, type); + fsd.kind = FS_LOCAL; + fsd.flags = FSF_NONE; + s = cg_local(p->cg, &fsd); + e = scope_define(p, name, SEK_LOCAL, type); + e->v.slot = s; + return s; +} + +/* Parse a single init-declarator after the decl-specs have been consumed. + * Spine grammar: declarator = IDENT ; init = `=` assign_expr. + * Pointer/array/function declarators are TODO — those slot in here as + * additional layers around the IDENT. */ +static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { + SrcLoc loc; + Tok name_tok; + Sym name; + if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected declarator name"); + } + name_tok = p->cur; + loc = tok_loc(&name_tok); + name = name_tok.v.ident; + advance(p); + /* Local declaration only at this slice. */ + { + FrameSlot s = make_local(p, name, specs->type, loc); + if (accept_punct(p, '=')) { + cg_set_loc(p->cg, loc); + cg_push_local_typed(p->cg, s, specs->type); + parse_assign_expr(p); + to_rvalue(p); + cg_store(p->cg); + /* cg_store leaves the assigned value on the stack (C semantics); + * an init-declarator is statement-context, so drop it. */ + cg_drop(p->cg); + } + } +} + +static void parse_local_decl(Parser* p, const DeclSpecs* specs) { + parse_init_declarator(p, specs); + while (accept_punct(p, ',')) { + parse_init_declarator(p, specs); + } + expect_punct(p, ';', "';' after declaration"); +} + +/* ============================================================ + * Statements + * ============================================================ */ + +static void parse_if_stmt(Parser* p) { + CGLabel L_else = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + expect_punct(p, '(', "'('"); + parse_expr(p); + to_rvalue(p); + expect_punct(p, ')', "')'"); + cg_branch_false(p->cg, L_else); + parse_stmt(p); + if (accept_kw(p, KW_ELSE)) { + cg_jump(p->cg, L_end); + cg_label_place(p->cg, L_else); + parse_stmt(p); + cg_label_place(p->cg, L_end); + } else { + cg_label_place(p->cg, L_else); + } +} + +static void parse_while_stmt(Parser* p) { + CGLabel L_top = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + CGLabel saved_break = p->cur_break; + CGLabel saved_continue = p->cur_continue; + expect_punct(p, '(', "'('"); + cg_label_place(p->cg, L_top); + parse_expr(p); + to_rvalue(p); + expect_punct(p, ')', "')'"); + cg_branch_false(p->cg, L_end); + p->cur_break = L_end; + p->cur_continue = L_top; + parse_stmt(p); + p->cur_break = saved_break; + p->cur_continue = saved_continue; + cg_jump(p->cg, L_top); + cg_label_place(p->cg, L_end); +} + +static void parse_for_stmt(Parser* p) { + CGLabel L_top = cg_label_new(p->cg); + CGLabel L_step = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + CGLabel saved_break = p->cur_break; + CGLabel saved_continue = p->cur_continue; + + /* `for` introduces a block scope spanning init/cond/step/body so that + * an init declaration `int i=...` is visible only inside the loop. */ + scope_push(p); + expect_punct(p, '(', "'('"); + + /* init: declaration | expr | ; */ + if (!accept_punct(p, ';')) { + DeclSpecs specs; + if (parse_decl_specs(p, &specs)) { + parse_local_decl(p, &specs); /* consumes the trailing ';' */ + } else { + parse_expr(p); + cg_drop(p->cg); + expect_punct(p, ';', "';'"); + } + } + + cg_label_place(p->cg, L_top); + /* cond: optional. Empty cond means "always true". */ + if (!is_punct(&p->cur, ';')) { + parse_expr(p); + to_rvalue(p); + cg_branch_false(p->cg, L_end); + } + expect_punct(p, ';', "';'"); + + /* Need to remember the step expression — but since this is single-pass, + * we can't replay tokens. Standard trick: emit body before step by + * jumping over the step on first entry, then placing the step where + * the back-edge arrives. */ + { + CGLabel L_body = cg_label_new(p->cg); + cg_jump(p->cg, L_body); + cg_label_place(p->cg, L_step); + /* step: optional. */ + if (!is_punct(&p->cur, ')')) { + parse_expr(p); + cg_drop(p->cg); + } + cg_jump(p->cg, L_top); + expect_punct(p, ')', "')'"); + cg_label_place(p->cg, L_body); + + p->cur_break = L_end; + p->cur_continue = L_step; + parse_stmt(p); + p->cur_break = saved_break; + p->cur_continue = saved_continue; + + cg_jump(p->cg, L_step); + cg_label_place(p->cg, L_end); + } + scope_pop(p); +} + +static void parse_return_stmt(Parser* p) { + if (accept_punct(p, ';')) { + cg_ret(p->cg, 0); + return; + } + parse_expr(p); + to_rvalue(p); + expect_punct(p, ';', "';' after return value"); + cg_ret(p->cg, 1); +} + +static void parse_break_stmt(Parser* p) { + if (p->cur_break == 0) perr(p, "'break' outside of loop or switch"); + cg_jump(p->cg, p->cur_break); + expect_punct(p, ';', "';' after break"); +} + +static void parse_continue_stmt(Parser* p) { + if (p->cur_continue == 0) perr(p, "'continue' outside of loop"); + cg_jump(p->cg, p->cur_continue); + expect_punct(p, ';', "';' after continue"); +} + +static void parse_compound_stmt(Parser* p) { + expect_punct(p, '{', "'{'"); + scope_push(p); + while (!is_punct(&p->cur, '}') && p->cur.kind != TOK_EOF) { + /* Drain stray newlines & pp-hash artifacts. (PP normally consumes + * these, but we tolerate them here as a no-op safety net.) */ + if (p->cur.kind == TOK_NEWLINE || is_pp_hash(&p->cur)) { + advance(p); + continue; + } + { + DeclSpecs specs; + Tok save_tok = p->cur; /* nothing to roll back yet — accept reused below */ + (void)save_tok; + if (parse_decl_specs(p, &specs)) { + parse_local_decl(p, &specs); + } else { + parse_stmt(p); + } + } + } + expect_punct(p, '}', "'}'"); + scope_pop(p); +} + +static void parse_stmt(Parser* p) { + cg_set_loc(p->cg, tok_loc(&p->cur)); + if (is_punct(&p->cur, '{')) { + parse_compound_stmt(p); + return; + } + if (is_punct(&p->cur, ';')) { + advance(p); + return; + } + if (is_kw(p, &p->cur, KW_IF)) { + advance(p); + parse_if_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_WHILE)) { + advance(p); + parse_while_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_FOR)) { + advance(p); + parse_for_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_RETURN)) { + advance(p); + parse_return_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_BREAK)) { + advance(p); + parse_break_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_CONTINUE)) { + advance(p); + parse_continue_stmt(p); + return; + } + /* Expression statement. */ + parse_expr(p); + cg_drop(p->cg); + expect_punct(p, ';', "';' after expression"); +} + +/* ============================================================ + * External (top-level) declarations + * ============================================================ */ + +/* For the spine, the only function shape is `int test_main(void) { ... }`. + * We accept `<type> IDENT (` `void` `)` `{` ... `}` and reject anything + * fancier. The full §6.7.6 declarator surface (parameters, varargs, + * pointer/array returns) lands as the corresponding corpus rows do. */ +static void parse_function_definition(Parser* p, const DeclSpecs* specs, + Sym fname, SrcLoc fname_loc) { + const Type** ptypes = NULL; + u16 nparams = 0; + const Type* fn_ty; + const ABIFuncInfo* abi; + Decl decl_in; + DeclId did; + ObjSymId fsym; + CGFuncDesc fd; + + /* Param list: `void` or empty (and `)`); full list is TODO. */ + expect_punct(p, '(', "'('"); + if (accept_kw(p, KW_VOID)) { + /* `(void)`: zero params, not variadic. */ + } else if (!is_punct(&p->cur, ')')) { + perr(p, "only `(void)` parameter list is supported in v1 slice"); + } + expect_punct(p, ')', "')'"); + + fn_ty = type_func(p->pool, specs->type, ptypes, nparams, 0); + abi = abi_func_info(p->abi, fn_ty); + + memset(&decl_in, 0, sizeof decl_in); + decl_in.name = fname; + decl_in.type = fn_ty; + decl_in.loc = fname_loc; + decl_in.storage = (specs->storage == DS_STATIC) ? DS_STATIC : DS_EXTERN; + decl_in.linkage = + (specs->storage == DS_STATIC) ? DL_INTERNAL : DL_EXTERNAL; + decl_in.visibility = SV_DEFAULT; + did = decl_declare(p->decls, &decl_in); + fsym = decl_obj_sym(p->decls, did); + /* Promote the symbol's binding for non-static functions. decl_declare + * minted it with the right binding; assert here for clarity. */ + + /* Bind the function name into file scope so calls resolve. */ + { + SymEntry* e = scope_define(p, fname, SEK_FUNC, fn_ty); + e->v.sym = fsym; + } + + /* Function body: open a parameter scope, then descend into body. The + * spine has no params, so we just open an empty scope. */ + memset(&fd, 0, sizeof fd); + fd.sym = fsym; + fd.text_section_id = p->text_sec; + fd.group_id = OBJ_GROUP_NONE; + fd.fn_type = fn_ty; + fd.abi = abi; + fd.params = NULL; + fd.nparams = 0; + fd.loc = fname_loc; + + scope_push(p); + cg_set_loc(p->cg, fname_loc); + cg_func_begin(p->cg, &fd); + parse_compound_stmt(p); + /* Implicit fall-through return for `int main` — emit a return-0 if the + * function reaches the closing brace without an explicit return. The + * codegen always emits a real epilogue at func_end, so this is just a + * safety belt against undefined behavior on trailing fall-through. + * Spine cases all `return ...;` explicitly, so this is dead code there. */ + if (specs->type && specs->type->kind != TY_VOID) { + cg_push_int(p->cg, 0, specs->type); + cg_ret(p->cg, 1); + } else { + cg_ret(p->cg, 0); + } + cg_func_end(p->cg); + scope_pop(p); +} + +static void parse_external_decl(Parser* p) { + DeclSpecs specs; + Tok name_tok; + Sym name; + SrcLoc loc; + + if (!parse_decl_specs(p, &specs)) { + perr(p, "expected declaration"); + } + + /* Parse the declarator. v1 slice: just IDENT — pointer/array layers + * are TODO. */ + if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected declarator"); + } + name_tok = p->cur; + loc = tok_loc(&name_tok); + name = name_tok.v.ident; + advance(p); + + if (is_punct(&p->cur, '(')) { + parse_function_definition(p, &specs, name, loc); + return; + } + + /* Global object declaration: `int g;` / `int g = 7;` / `int g = ..., h;` */ + /* v1 slice does not implement global initializers — defer until §6.7.9 + * cases need them. We just register the decl and reserve BSS. */ + perr(p, "global object declarations not supported in v1 slice"); +} + +static void parse_translation_unit(Parser* p) { + while (p->cur.kind != TOK_EOF) { + if (p->cur.kind == TOK_NEWLINE || is_pp_hash(&p->cur)) { + advance(p); + continue; + } + parse_external_decl(p); + } +} + +/* ============================================================ + * Entry point + * ============================================================ */ + +void parse_c(Compiler* c, Pp* pp, DeclTable* decls, CG* cg, Debug* debug) { + Parser p; + Sym text_name; + CKw i; + + memset(&p, 0, sizeof p); + p.c = c; + p.pp = pp; + p.decls = decls; + p.cg = cg; + p.debug = debug; + p.abi = c->abi; + p.pool = c->global; + + /* Intern keyword spellings. The lexer doesn't know about keywords; this + * is the canonical bucketization site (DESIGN §5.1 / lex.h §6.4). */ + for (i = (CKw)1; i < KW_COUNT; ++i) { + p.kw_sym[i] = pool_intern_cstr(p.pool, kw_names[i]); + } + + /* File scope. */ + p.scope = scope_new(&p, NULL); + + /* Default text section. -ffunction-sections / explicit attribute(section) + * cases will replace this per-function; the spine uses ".text". */ + text_name = pool_intern_cstr(p.pool, ".text"); + p.text_sec = obj_section(decl_obj(decls), text_name, SEC_TEXT, + SF_ALLOC | SF_EXEC, 4u); + + /* Pull the first token. PP yields preprocessed C tokens; directives + * have already been consumed. */ + p.cur = pp_next(p.pp); + + parse_translation_unit(&p); +}