kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 1268ac83cff81e2e67f57e50c6e18aeb95acf194
parent f9d61faa02ff59f3d347478852d1959247a8e133
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 20 May 2026 10:24:00 -0700

c_target: land Phase 2 of the C-source backend

Implements control flow and memory ops on the C-source CGTarget: labels,
jump, cmp/cmp_branch, unop, scope_*, local/local_addr/addr_of, INDIRECT
load/store, and convert (with CV_BITCAST via __builtin_memcpy). Every
reg-LHS assignment now wraps the RHS in `(DECL_T)(uintptr_t)(...)` so
CG's reg-id reuse across types stays well-typed in C.

Toy path C runs only at opt level 0 (the C target forces opt=0
internally) and skips err cases when no native compile path is enabled.

Phase 2 takes toy path C from 17 to 49 passing; remaining SKIPs all
blame Phase 3/4 surface (aggregates, intrinsics, atomics, data
emission).

Diffstat:
Mdoc/CBACKEND.md | 46++++++++++++++++++++++++++++++++++++++--------
Msrc/arch/c_target/emit.c | 679+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Msrc/arch/c_target/internal.h | 29+++++++++++++++++++++++++++--
Msrc/arch/c_target/target.c | 112+++++++++++++++++++++----------------------------------------------------------
Mtest/toy/run.sh | 13+++++++++++--
5 files changed, 723 insertions(+), 156 deletions(-)

diff --git a/doc/CBACKEND.md b/doc/CBACKEND.md @@ -436,14 +436,44 @@ Other implementation choices that landed in Phase 1: Acceptance met: `int add(int,int){return a+b;} int main(){return add(2,3);}` round-trips through the C target and exits 5. -### Phase 2 — control flow and memory - -- `cmp`, `cmp_branch`, `label_*`, `jump`, `scope_*`. -- `load`/`store`/`addr_of`/`indirect`, `local`, `frame_slot`, `local_addr`. -- `convert` for the integer/float conversions; `bitcast` via memcpy. - -Acceptance: corpora that exercise loops, conditionals, locals, pointer -chasing. +### Phase 2 — control flow and memory ✅ landed + +Implemented: `label_new`/`label_place`/`jump`/`cmp_branch`, `cmp`, `unop`, +`scope_begin`/`scope_else`/`scope_end`/`break_to`/`continue_to`, `local`, +`local_addr`, `addr_of`, `convert` (including `CV_BITCAST` via +`__builtin_memcpy`), `load`/`store` for `OPK_INDIRECT`. + +Other choices that landed in Phase 2: + +- **Reg ids reused across types.** CG's value stack mints fresh virtual + reg ids but reuses them across statements when the dst reg's previous + value has been consumed. The first-sighting C declaration locks in one + type, so every write to a `vN` emits `vN = (DECL_T)(uintptr_t)(RHS);` + (or `(DECL_T)(RHS)` for FP dst). The `(uintptr_t)` bridge suppresses + `-Wint-conversion` when the RHS happens to be an integer expression and + `DECL_T` is a pointer. +- **Locals always go to frame slots.** `c_local` allocates a slot via + `c_frame_slot` and returns `CG_LOCAL_STORAGE_FRAME`; non-addr-taken + locals could in principle be REGs but the uniform FRAME mapping is + simpler and the C compiler dissolves the distinction. +- **Scope methods are mostly inert.** The public `CfreeCg` API places + break/continue labels via `label_place` itself, so `scope_begin` and + `scope_end` only record kind/labels in a per-target stack. `break_to` + and `continue_to` shim onto `jump` for completeness, and `SCOPE_IF` + emits `if (!cond) goto break_label;` for the internal scope path. +- **`OPK_GLOBAL` data access still panics → SKIP.** Phase 2 unlocked many + toy fixtures that lower to global data references; those now surface as + Phase 4 SKIPs ("OPK_GLOBAL data reference not yet supported") rather + than masquerading as Phase-2 method gaps. +- **Toy harness now runs path C at opt level 0 only.** `--emit=c` forces + `opt_level=0` internally, so running it for every level in + `CFREE_TOY_OPT_LEVELS` was duplicating identical work; the toy err + cases no longer run when only path C is enabled (they're cc-failure + tests with no dependency on the emit target). + +Acceptance met: 49 toy cases under path C pass (loops, conditionals, +locals, pointer chasing, recursion); the remaining 78 SKIPs all blame +Phase 3/4 surface (aggregates, intrinsics, atomics, data emission). ### Phase 3 — aggregates, varargs, intrinsics diff --git a/src/arch/c_target/emit.c b/src/arch/c_target/emit.c @@ -57,6 +57,18 @@ static const char* c_int_type_name_for_width(u32 width, int signed_) { } } +/* Returns the integer width for sign-aware emission. 0 if the type isn't a + * fixed-width integer (float, ptr, void, aggregate). */ +static u32 c_int_width_for_signedness(CTarget* t, CfreeCgTypeId type) { + if (type == CFREE_CG_TYPE_NONE) return 0; + CfreeCgTypeId u = api_unalias_type(t->c, type); + const CgType* ty = cg_type_get(t->c, u); + if (!ty) return 0; + if (ty->kind == CFREE_CG_TYPE_INT) return ty->integer.width; + if (ty->kind == CFREE_CG_TYPE_BOOL) return 32; /* bool maps to int32_t */ + return 0; +} + /* Phase 1: void / bool / sized int / pointer. Aggregates and floats panic. */ static const char* c_typename(CTarget* t, CfreeCgTypeId type) { CfreeCgTypeId resolved = api_unalias_type(t->c, type); @@ -202,6 +214,15 @@ void c_emit_operand(CTarget* t, Operand op) { c_slot_name(op.v.frame_slot, buf, sizeof buf); cbuf_puts(&t->body, buf); return; + case OPK_GLOBAL: { + /* Data globals need an `extern T name;` declaration AND, in the + * single-TU case, the actual data definition — both belong to data + * emission (Phase 4). Defer with a graceful panic the harness reports + * as SKIP. Function callees go through c_call directly, not here. */ + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + compiler_panic(t->c, loc, + "C target: OPK_GLOBAL data reference not yet supported"); + } default: { SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; compiler_panic(t->c, loc, "C target: operand kind %d not yet supported", @@ -210,6 +231,157 @@ void c_emit_operand(CTarget* t, Operand op) { } } +/* CG's value stack reuses Reg ids across different operand types (e.g. a + * register first written as void* may later receive a bool result). The C + * declaration is fixed at first sighting, so every write must explicitly + * cast the RHS back to the declared type. For pointer-typed destinations we + * insert a uintptr_t bridge to avoid -Wint-conversion when the RHS happens + * to be an integer expression; float destinations skip the bridge since + * pointer/float mixing doesn't occur in cfree IR. */ +static int c_type_is_float(CTarget* t, CfreeCgTypeId type) { + if (type == CFREE_CG_TYPE_NONE) return 0; + const CgType* ty = cg_type_get(t->c, api_unalias_type(t->c, type)); + return ty && ty->kind == CFREE_CG_TYPE_FLOAT; +} + +/* Emit " vN = (DECL_T)(uintptr_t)(" or, for float regs, " vN = (DECL_T)(". + * Caller must then emit the RHS expression and call c_emit_reg_assign_close. */ +static void c_emit_reg_assign_open(CTarget* t, Reg r) { + if ((u32)r >= t->reg_cap || !t->reg_declared[r]) { + compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, + "C target: assign to undeclared reg v%u", (unsigned)r); + } + CfreeCgTypeId decl = t->reg_type[r]; + char buf[24]; + c_reg_name(r, buf, sizeof buf); + cbuf_puts(&t->body, " "); + cbuf_puts(&t->body, buf); + cbuf_puts(&t->body, " = ("); + c_emit_type(t, &t->body, decl); + cbuf_puts(&t->body, ")"); + if (!c_type_is_float(t, decl)) { + cbuf_puts(&t->body, "(uintptr_t)"); + } + cbuf_puts(&t->body, "("); +} + +static void c_emit_reg_assign_close(CTarget* t) { + cbuf_puts(&t->body, ");\n"); +} + +void c_emit_operand_signed(CTarget* t, Operand op, int signed_) { + u32 w = c_int_width_for_signedness(t, op.type); + if (w == 0) { + /* Not an integer — emit without sign cast. */ + c_emit_operand(t, op); + return; + } + const char* tn = c_int_type_name_for_width(w, signed_); + if (!tn) { + c_emit_operand(t, op); + return; + } + cbuf_puts(&t->body, "(("); + cbuf_puts(&t->body, tn); + cbuf_puts(&t->body, ")"); + c_emit_operand(t, op); + cbuf_puts(&t->body, ")"); +} + +/* Emit a C lvalue expression for an addr operand (OPK_LOCAL / OPK_GLOBAL / + * OPK_INDIRECT) using `access_type` as the access type. The result is the + * full `*(T*)(...)` dereference (or the C variable directly when the access + * type matches the underlying slot/symbol). */ +static void c_emit_addr_deref(CTarget* t, Operand addr, CfreeCgTypeId access_type) { + char buf[24]; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + switch (addr.kind) { + case OPK_LOCAL: + /* slot_N is already a typed C variable matching its declared type. + * For access_type that differs (rare; e.g. bit-casting a slot), we'd + * need to memcpy; defer until a fixture forces it. */ + c_slot_name(addr.v.frame_slot, buf, sizeof buf); + cbuf_puts(&t->body, buf); + return; + case OPK_GLOBAL: { + /* Phase 2 defers data emission to Phase 4. */ + (void)access_type; + compiler_panic(t->c, loc, + "C target: OPK_GLOBAL data reference not yet supported"); + } + case OPK_INDIRECT: { + Operand base_reg; + base_reg.kind = OPK_REG; + base_reg.cls = RC_INT; + base_reg.type = 0; + base_reg.v.reg = addr.v.ind.base; + /* Ensure the base reg is declared. We can't readily look up its type + * post-hoc, so reuse whatever it was first declared with. */ + if ((u32)addr.v.ind.base >= t->reg_cap || + !t->reg_declared[addr.v.ind.base]) { + compiler_panic(t->c, loc, + "C target: indirect on undeclared base reg v%u", + (unsigned)addr.v.ind.base); + } + cbuf_puts(&t->body, "(*("); + c_emit_type(t, &t->body, access_type); + cbuf_puts(&t->body, "*)((char*)"); + char rbuf[24]; + c_reg_name(addr.v.ind.base, rbuf, sizeof rbuf); + cbuf_puts(&t->body, rbuf); + if (addr.v.ind.ofs != 0) { + cbuf_puts(&t->body, " + "); + cbuf_put_i64(&t->body, (i64)addr.v.ind.ofs); + } + cbuf_puts(&t->body, "))"); + return; + } + default: + compiler_panic(t->c, loc, + "C target: addr-deref on operand kind %d not supported", + (int)addr.kind); + } +} + +/* Emit a C address-of expression for a lvalue operand. Output is a pointer + * value (cast to dst_type). */ +static void c_emit_lvalue_addr(CTarget* t, Operand lv, CfreeCgTypeId dst_type) { + char buf[24]; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + switch (lv.kind) { + case OPK_LOCAL: + cbuf_puts(&t->body, "(("); + c_emit_type(t, &t->body, dst_type); + cbuf_puts(&t->body, ")&"); + c_slot_name(lv.v.frame_slot, buf, sizeof buf); + cbuf_puts(&t->body, buf); + cbuf_puts(&t->body, ")"); + return; + case OPK_GLOBAL: { + (void)dst_type; + compiler_panic(t->c, loc, + "C target: OPK_GLOBAL data reference not yet supported"); + } + case OPK_INDIRECT: { + cbuf_puts(&t->body, "(("); + c_emit_type(t, &t->body, dst_type); + cbuf_puts(&t->body, ")((char*)"); + c_reg_name(lv.v.ind.base, buf, sizeof buf); + cbuf_puts(&t->body, buf); + if (lv.v.ind.ofs != 0) { + cbuf_puts(&t->body, " + "); + cbuf_put_i64(&t->body, (i64)lv.v.ind.ofs); + } + cbuf_puts(&t->body, "))"); + return; + } + default: + compiler_panic(t->c, loc, + "C target: addr-of on operand kind %d not supported", + (int)lv.kind); + } +} + /* === Symbol name lookup === */ const char* c_sym_name(CTarget* t, ObjSymId sym) { @@ -276,6 +448,9 @@ void c_func_begin(CGTarget* T, const CGFuncDesc* fd) { t->reg_type[i] = 0; } t->nslots = 0; + t->next_label = 0; + t->next_tmp = 0; + t->nscopes = 0; const char* name = c_sym_name(t, fd->sym); @@ -396,15 +571,9 @@ void c_load_imm(CGTarget* T, Operand dst, i64 imm) { compiler_panic(t->c, loc, "C target: load_imm dst must be REG"); } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - char buf[24]; - c_reg_name(dst.v.reg, buf, sizeof buf); - cbuf_puts(&t->body, " "); - cbuf_puts(&t->body, buf); - cbuf_puts(&t->body, " = ("); - c_emit_type(t, &t->body, dst.type); - cbuf_puts(&t->body, ")"); + c_emit_reg_assign_open(t, dst.v.reg); cbuf_put_i64(&t->body, imm); - cbuf_puts(&t->body, ";\n"); + c_emit_reg_assign_close(t); } void c_copy(CGTarget* T, Operand dst, Operand src) { @@ -414,13 +583,9 @@ void c_copy(CGTarget* T, Operand dst, Operand src) { compiler_panic(t->c, loc, "C target: copy dst must be REG"); } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - char buf[24]; - c_reg_name(dst.v.reg, buf, sizeof buf); - cbuf_puts(&t->body, " "); - cbuf_puts(&t->body, buf); - cbuf_puts(&t->body, " = "); + c_emit_reg_assign_open(t, dst.v.reg); c_emit_operand(t, src); - cbuf_puts(&t->body, ";\n"); + c_emit_reg_assign_close(t); } static const char* binop_to_c(BinOp op) { @@ -456,6 +621,31 @@ static const char* binop_to_c(BinOp op) { return NULL; } +/* For BinOp `op`, decide how to sign-cast the operands. Returns 0 for "no + * cast", 1 for "cast both to signed", 2 for "cast both to unsigned", 3 for + * "cast lhs only (signedness `lhs_signed`)" (used for shifts). */ +typedef enum { BSC_NONE, BSC_SIGNED, BSC_UNSIGNED, BSC_SHIFT_LHS } BinSignCast; + +static BinSignCast binop_sign_kind(BinOp op, int* lhs_signed_out) { + *lhs_signed_out = 1; + switch (op) { + case BO_SDIV: + case BO_SREM: + return BSC_SIGNED; + case BO_UDIV: + case BO_UREM: + return BSC_UNSIGNED; + case BO_SHR_S: + *lhs_signed_out = 1; + return BSC_SHIFT_LHS; + case BO_SHR_U: + *lhs_signed_out = 0; + return BSC_SHIFT_LHS; + default: + return BSC_NONE; + } +} + void c_binop(CGTarget* T, BinOp op, Operand dst, Operand a, Operand b) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; @@ -467,19 +657,393 @@ void c_binop(CGTarget* T, BinOp op, Operand dst, Operand a, Operand b) { compiler_panic(t->c, loc, "C target: binop dst must be REG"); } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - char buf[24]; - c_reg_name(dst.v.reg, buf, sizeof buf); - cbuf_puts(&t->body, " "); - cbuf_puts(&t->body, buf); - cbuf_puts(&t->body, " = "); - c_emit_operand(t, a); - cbuf_puts(&t->body, " "); + c_emit_reg_assign_open(t, dst.v.reg); + int lhs_signed = 1; + BinSignCast bsc = binop_sign_kind(op, &lhs_signed); + switch (bsc) { + case BSC_NONE: + c_emit_operand(t, a); + cbuf_puts(&t->body, " "); + cbuf_puts(&t->body, sym); + cbuf_puts(&t->body, " "); + c_emit_operand(t, b); + break; + case BSC_SIGNED: + c_emit_operand_signed(t, a, 1); + cbuf_puts(&t->body, " "); + cbuf_puts(&t->body, sym); + cbuf_puts(&t->body, " "); + c_emit_operand_signed(t, b, 1); + break; + case BSC_UNSIGNED: + c_emit_operand_signed(t, a, 0); + cbuf_puts(&t->body, " "); + cbuf_puts(&t->body, sym); + cbuf_puts(&t->body, " "); + c_emit_operand_signed(t, b, 0); + break; + case BSC_SHIFT_LHS: + c_emit_operand_signed(t, a, lhs_signed); + cbuf_puts(&t->body, " "); + cbuf_puts(&t->body, sym); + cbuf_puts(&t->body, " "); + c_emit_operand(t, b); + break; + } + c_emit_reg_assign_close(t); +} + +/* ===== unop ===== */ + +void c_unop(CGTarget* T, UnOp op, Operand dst, Operand a) { + CTarget* t = (CTarget*)T; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + if (dst.kind != OPK_REG) { + compiler_panic(t->c, loc, "C target: unop dst must be REG"); + } + c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + const char* sym = NULL; + switch (op) { + case UO_NEG: + case UO_FNEG: + sym = "-"; + break; + case UO_NOT: + sym = "!"; + break; + case UO_BNOT: + sym = "~"; + break; + default: + compiler_panic(t->c, loc, "C target: unknown unop %d", (int)op); + } + c_emit_reg_assign_open(t, dst.v.reg); cbuf_puts(&t->body, sym); + c_emit_operand(t, a); + c_emit_reg_assign_close(t); +} + +/* ===== compare ops ===== */ + +static const char* cmp_to_c(CmpOp op) { + switch (op) { + case CMP_EQ: return "=="; + case CMP_NE: return "!="; + case CMP_LT_S: + case CMP_LT_U: + case CMP_LT_F: return "<"; + case CMP_LE_S: + case CMP_LE_U: + case CMP_LE_F: return "<="; + case CMP_GT_S: + case CMP_GT_U: + case CMP_GT_F: return ">"; + case CMP_GE_S: + case CMP_GE_U: + case CMP_GE_F: return ">="; + } + return NULL; +} + +/* Returns 1 if cmp op needs unsigned operand cast. -1 if signed. 0 if no cast + * (EQ/NE — sign doesn't matter for integer equality at the same width — and + * float compares). */ +static int cmp_signedness(CmpOp op) { + switch (op) { + case CMP_LT_S: + case CMP_LE_S: + case CMP_GT_S: + case CMP_GE_S: + return -1; + case CMP_LT_U: + case CMP_LE_U: + case CMP_GT_U: + case CMP_GE_U: + return 1; + default: + return 0; + } +} + +static void c_emit_cmp_operands(CTarget* t, CmpOp op, Operand a, Operand b) { + int sg = cmp_signedness(op); + if (sg == 0) { + c_emit_operand(t, a); + cbuf_puts(&t->body, " "); + cbuf_puts(&t->body, cmp_to_c(op)); + cbuf_puts(&t->body, " "); + c_emit_operand(t, b); + } else { + int signed_ = (sg < 0); + c_emit_operand_signed(t, a, signed_); + cbuf_puts(&t->body, " "); + cbuf_puts(&t->body, cmp_to_c(op)); + cbuf_puts(&t->body, " "); + c_emit_operand_signed(t, b, signed_); + } +} + +void c_cmp(CGTarget* T, CmpOp op, Operand dst, Operand a, Operand b) { + CTarget* t = (CTarget*)T; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + if (dst.kind != OPK_REG) { + compiler_panic(t->c, loc, "C target: cmp dst must be REG"); + } + if (!cmp_to_c(op)) { + compiler_panic(t->c, loc, "C target: unknown cmp %d", (int)op); + } + c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_emit_reg_assign_open(t, dst.v.reg); + c_emit_cmp_operands(t, op, a, b); + c_emit_reg_assign_close(t); +} + +/* ===== labels, jump, cmp_branch ===== */ + +static void c_label_name(Label l, char* out, size_t cap) { + size_t i = 0; + if (cap == 0) return; + const char* p = "L"; + while (*p && i + 1 < cap) out[i++] = *p++; + char tmp[16]; + size_t n = 0; + u32 v = (u32)l; + if (v == 0) { + tmp[n++] = '0'; + } else { + while (v) { + tmp[n++] = (char)('0' + (v % 10)); + v /= 10; + } + } + while (n && i + 1 < cap) out[i++] = tmp[--n]; + out[i] = '\0'; +} + +Label c_label_new(CGTarget* T) { + CTarget* t = (CTarget*)T; + t->next_label += 1; + return (Label)t->next_label; +} + +void c_label_place(CGTarget* T, Label l) { + CTarget* t = (CTarget*)T; + char buf[24]; + c_label_name(l, buf, sizeof buf); + /* `Lk: ;` — the empty statement keeps it valid even at end-of-block. */ cbuf_puts(&t->body, " "); - c_emit_operand(t, b); + cbuf_puts(&t->body, buf); + cbuf_puts(&t->body, ": ;\n"); +} + +void c_jump(CGTarget* T, Label l) { + CTarget* t = (CTarget*)T; + char buf[24]; + c_label_name(l, buf, sizeof buf); + cbuf_puts(&t->body, " goto "); + cbuf_puts(&t->body, buf); cbuf_puts(&t->body, ";\n"); } +void c_cmp_branch(CGTarget* T, CmpOp op, Operand a, Operand b, Label l) { + CTarget* t = (CTarget*)T; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + if (!cmp_to_c(op)) { + compiler_panic(t->c, loc, "C target: unknown cmp %d", (int)op); + } + char buf[24]; + c_label_name(l, buf, sizeof buf); + cbuf_puts(&t->body, " if ("); + c_emit_cmp_operands(t, op, a, b); + cbuf_puts(&t->body, ") goto "); + cbuf_puts(&t->body, buf); + cbuf_puts(&t->body, ";\n"); +} + +/* ===== scopes ===== + * CG mints break/continue labels itself and places them around the body, so + * scope_begin/end have nothing to emit. We retain the kind+labels in case + * break_to/continue_to are invoked (legacy code path not used by the public + * api but reachable via opt's lowering — which the C target bypasses). */ + +static void c_grow_scopes(CTarget* t, u32 needed) { + Heap* h = t->c->ctx->heap; + u32 newcap = t->scopes_cap ? t->scopes_cap : 8; + while (newcap < needed) newcap *= 2; + CScopeInfo* ns = (CScopeInfo*)h->realloc( + h, t->scopes, t->scopes_cap * sizeof(CScopeInfo), + newcap * sizeof(CScopeInfo), _Alignof(CScopeInfo)); + if (!ns && newcap) { + compiler_panic(t->c, (SrcLoc){0, 0, 0}, "C target: out of memory"); + } + t->scopes = ns; + t->scopes_cap = newcap; +} + +CGScope c_scope_begin(CGTarget* T, const CGScopeDesc* d) { + CTarget* t = (CTarget*)T; + if (t->nscopes + 1u >= t->scopes_cap) c_grow_scopes(t, t->nscopes + 2u); + u32 idx = t->nscopes; + t->scopes[idx].kind = d->kind; + t->scopes[idx].break_label = d->break_label; + t->scopes[idx].continue_label = d->continue_label; + t->nscopes += 1u; + /* SCOPE_IF carries a cond consumed here. The public CfreeCg API always + * emits SCOPE_LOOP, so this branch only fires for internal callers. */ + if (d->kind == SCOPE_IF) { + char buf[24]; + c_label_name(d->break_label, buf, sizeof buf); + cbuf_puts(&t->body, " if (!("); + c_emit_operand(t, d->cond); + cbuf_puts(&t->body, ")) goto "); + cbuf_puts(&t->body, buf); + cbuf_puts(&t->body, ";\n"); + } + return (CGScope)(idx + 1u); +} + +void c_scope_else(CGTarget* T, CGScope s) { + (void)T; + (void)s; + /* Public API doesn't emit SCOPE_IF; if it ever does, the frontend is + * responsible for placing the else label and the break_label itself. */ +} + +void c_scope_end(CGTarget* T, CGScope s) { + CTarget* t = (CTarget*)T; + if (s == 0 || (u32)s > t->nscopes) { + compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, + "C target: scope_end on invalid handle"); + } + /* LIFO pop. */ + t->nscopes -= 1u; +} + +void c_break_to(CGTarget* T, CGScope s) { + CTarget* t = (CTarget*)T; + if (s == 0 || (u32)s > t->nscopes) { + compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, + "C target: break_to on invalid handle"); + } + c_jump(T, t->scopes[s - 1u].break_label); +} + +void c_continue_to(CGTarget* T, CGScope s) { + CTarget* t = (CTarget*)T; + if (s == 0 || (u32)s > t->nscopes) { + compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, + "C target: continue_to on invalid handle"); + } + c_jump(T, t->scopes[s - 1u].continue_label); +} + +/* ===== local, local_addr ===== */ + +CGLocalStorage c_local(CGTarget* T, const CGLocalDesc* d) { + /* Map every local to a frame slot. virtual_regs=1 means we never spill, + * and at the C level frame slots and regs both look like plain locals, + * so there's nothing to gain from picking REG storage for non-addressable + * locals. The uniform FRAME mapping also lets local_addr just emit + * `&slot_N` without bookkeeping. */ + FrameSlotDesc fsd; + fsd.type = d->type; + fsd.name = d->name; + fsd.loc = d->loc; + fsd.size = d->size; + fsd.align = d->align; + fsd.kind = FS_LOCAL; + fsd.pad = 0; + fsd.flags = 0; + if (d->flags & CG_LOCAL_ADDR_TAKEN) fsd.flags |= FSF_ADDR_TAKEN; + FrameSlot slot = c_frame_slot(T, &fsd); + CGLocalStorage st; + st.kind = CG_LOCAL_STORAGE_FRAME; + st.pad[0] = st.pad[1] = st.pad[2] = 0; + st.v.frame_slot = slot; + return st; +} + +void c_local_addr(CGTarget* T, Operand dst, const CGLocalDesc* d, + CGLocalStorage s) { + CTarget* t = (CTarget*)T; + (void)d; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + if (dst.kind != OPK_REG) { + compiler_panic(t->c, loc, "C target: local_addr dst must be REG"); + } + if (s.kind != CG_LOCAL_STORAGE_FRAME) { + compiler_panic(t->c, loc, "C target: local_addr expects frame storage"); + } + c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + char buf[24]; + c_emit_reg_assign_open(t, dst.v.reg); + cbuf_puts(&t->body, "&"); + c_slot_name(s.v.frame_slot, buf, sizeof buf); + cbuf_puts(&t->body, buf); + c_emit_reg_assign_close(t); +} + +/* ===== convert ===== */ + +void c_convert(CGTarget* T, ConvKind k, Operand dst, Operand src) { + CTarget* t = (CTarget*)T; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + if (dst.kind != OPK_REG) { + compiler_panic(t->c, loc, "C target: convert dst must be REG"); + } + c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + char buf[24]; + c_reg_name(dst.v.reg, buf, sizeof buf); + + if (k == CV_BITCAST) { + /* Same-size reinterpretation. Use __builtin_memcpy through a temp so + * neither aliasing nor representation assumptions creep in. The temp + * lives in its own `{ ... }` block, so no name collision tracking. */ + u32 id = ++t->next_tmp; + cbuf_puts(&t->body, " { "); + c_emit_type(t, &t->body, src.type); + cbuf_puts(&t->body, " __bc"); + cbuf_put_u64(&t->body, (u64)id); + cbuf_puts(&t->body, " = "); + c_emit_operand(t, src); + cbuf_puts(&t->body, "; __builtin_memcpy(&"); + cbuf_puts(&t->body, buf); + cbuf_puts(&t->body, ", &__bc"); + cbuf_put_u64(&t->body, (u64)id); + cbuf_puts(&t->body, ", sizeof __bc"); + cbuf_put_u64(&t->body, (u64)id); + cbuf_puts(&t->body, "); }\n"); + return; + } + + /* Integer and float conversions: a C cast does the right thing once the + * source is first cast to the appropriate signedness (for SEXT/ZEXT and + * ITOF_S/U / FTOI_S/U). */ + int src_signed = 1; + switch (k) { + case CV_ZEXT: + case CV_ITOF_U: + case CV_FTOI_U: + src_signed = 0; + break; + default: + src_signed = 1; + break; + } + + c_emit_reg_assign_open(t, dst.v.reg); + cbuf_puts(&t->body, "("); + c_emit_type(t, &t->body, dst.type); + cbuf_puts(&t->body, ")"); + if (k == CV_SEXT || k == CV_ZEXT) { + c_emit_operand_signed(t, src, src_signed); + } else { + /* TRUNC / FTOI / ITOF / FEXT / FTRUNC: rely on C cast semantics. */ + c_emit_operand(t, src); + } + c_emit_reg_assign_close(t); +} + /* === call === */ void c_call(CGTarget* T, const CGCallDesc* d) { @@ -497,7 +1061,6 @@ void c_call(CGTarget* T, const CGCallDesc* d) { CfreeCgTypeId ret_type = fty->func.ret; int has_ret = !cg_type_is_void(t->c, ret_type); - cbuf_puts(&t->body, " "); if (has_ret) { if (d->ret.storage.kind != OPK_REG) { compiler_panic(t->c, loc, @@ -505,13 +1068,10 @@ void c_call(CGTarget* T, const CGCallDesc* d) { } c_ensure_reg(t, d->ret.storage.v.reg, ret_type, (RegClass)d->ret.storage.cls); - char buf[24]; - c_reg_name(d->ret.storage.v.reg, buf, sizeof buf); - cbuf_puts(&t->body, buf); - cbuf_puts(&t->body, " = "); + c_emit_reg_assign_open(t, d->ret.storage.v.reg); + } else { + cbuf_puts(&t->body, " "); } - /* Emit a forward declaration so calls to symbols defined later or not at - * all (external) compile against a known prototype. */ c_ensure_forward_decl(t, d->callee.v.global.sym, d->fn_type); cbuf_puts(&t->body, c_sym_name(t, d->callee.v.global.sym)); cbuf_puts(&t->body, "("); @@ -519,60 +1079,51 @@ void c_call(CGTarget* T, const CGCallDesc* d) { if (i > 0) cbuf_puts(&t->body, ", "); c_emit_operand(t, d->args[i].storage); } - cbuf_puts(&t->body, ");\n"); + cbuf_puts(&t->body, ")"); + if (has_ret) { + c_emit_reg_assign_close(t); + } else { + cbuf_puts(&t->body, ";\n"); + } } /* === load / store === */ void c_load(CGTarget* T, Operand dst, Operand addr, MemAccess m) { CTarget* t = (CTarget*)T; - (void)m; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; if (dst.kind != OPK_REG) { compiler_panic(t->c, loc, "C target: load dst must be REG"); } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - char buf[24]; - c_reg_name(dst.v.reg, buf, sizeof buf); - cbuf_puts(&t->body, " "); - cbuf_puts(&t->body, buf); - cbuf_puts(&t->body, " = "); - switch (addr.kind) { - case OPK_LOCAL: - /* slot_N is already a typed C variable; direct read. */ - c_slot_name(addr.v.frame_slot, buf, sizeof buf); - cbuf_puts(&t->body, buf); - break; - default: - compiler_panic(t->c, loc, - "C target: load from operand kind %d not yet supported", - (int)addr.kind); - } - cbuf_puts(&t->body, ";\n"); + CfreeCgTypeId access_ty = m.type ? m.type : dst.type; + c_emit_reg_assign_open(t, dst.v.reg); + c_emit_addr_deref(t, addr, access_ty); + c_emit_reg_assign_close(t); } void c_store(CGTarget* T, Operand addr, Operand src, MemAccess m) { CTarget* t = (CTarget*)T; - (void)m; - SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + CfreeCgTypeId access_ty = m.type ? m.type : src.type; cbuf_puts(&t->body, " "); - switch (addr.kind) { - case OPK_LOCAL: { - char buf[24]; - c_slot_name(addr.v.frame_slot, buf, sizeof buf); - cbuf_puts(&t->body, buf); - break; - } - default: - compiler_panic(t->c, loc, - "C target: store to operand kind %d not yet supported", - (int)addr.kind); - } + c_emit_addr_deref(t, addr, access_ty); cbuf_puts(&t->body, " = "); c_emit_operand(t, src); cbuf_puts(&t->body, ";\n"); } +void c_addr_of(CGTarget* T, Operand dst, Operand lv) { + CTarget* t = (CTarget*)T; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + if (dst.kind != OPK_REG) { + compiler_panic(t->c, loc, "C target: addr_of dst must be REG"); + } + c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_emit_reg_assign_open(t, dst.v.reg); + c_emit_lvalue_addr(t, lv, dst.type); + c_emit_reg_assign_close(t); +} + void c_ret(CGTarget* T, const CGABIValue* val) { CTarget* t = (CTarget*)T; cbuf_puts(&t->body, " return"); @@ -690,9 +1241,13 @@ void c_destroy(CGTarget* T) { h->free(h, t->reg_type, t->reg_cap * sizeof(CfreeCgTypeId)); if (t->slot_type) h->free(h, t->slot_type, t->slot_cap * sizeof(CfreeCgTypeId)); + if (t->scopes) + h->free(h, t->scopes, t->scopes_cap * sizeof(CScopeInfo)); t->reg_declared = NULL; t->reg_type = NULL; t->slot_type = NULL; + t->scopes = NULL; t->reg_cap = 0; t->slot_cap = 0; + t->scopes_cap = 0; } diff --git a/src/arch/c_target/internal.h b/src/arch/c_target/internal.h @@ -79,8 +79,27 @@ typedef struct CTarget { size_t fn_body_start; const CGFuncDesc* cur_fn; + + /* Label minting: ids 1..next_label. 0 is reserved as LABEL_NONE. */ + u32 next_label; + + /* Stack of active scopes. CGScope handles are (slot_index + 1). */ + struct CScopeInfo* scopes; + u32 scopes_cap; + u32 nscopes; + + /* Monotone counter for synthesizing unique temporary names within a + * function (e.g. bitcast scratch). Reset on func_begin. */ + u32 next_tmp; } CTarget; +typedef struct CScopeInfo { + u8 kind; /* ScopeKind */ + u8 pad[3]; + Label break_label; + Label continue_label; +} CScopeInfo; + CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w); /* Helpers shared across emit.c. */ @@ -91,9 +110,15 @@ void c_ensure_reg(CTarget* t, Reg r, CfreeCgTypeId type, RegClass cls); void c_reg_name(Reg r, char* out, size_t cap); /* Write the C type for a CG int/float/ptr type to `b`. */ void c_emit_type(CTarget* t, CBuf* b, CfreeCgTypeId type); -/* Write operand expression to body (e.g. "v3", "(int32_t)42"). For Phase 1 - * only OPK_REG and OPK_IMM are supported. */ +/* Write operand expression to body (e.g. "v3", "(int32_t)42"). Supports + * OPK_REG / OPK_IMM / OPK_LOCAL / OPK_GLOBAL. INDIRECT is only valid in + * lvalue positions and is emitted via load/store/addr_of paths. */ void c_emit_operand(CTarget* t, Operand op); +/* Like c_emit_operand but wraps in an explicit signed/unsigned cast of the + * operand's type width. For integer ops where signedness affects semantics + * (UDIV/UREM, SHR_U, unsigned compares). Falls back to c_emit_operand when + * the operand is not integer-typed. */ +void c_emit_operand_signed(CTarget* t, Operand op, int signed_); /* Lookup the C linker name for an ObjSymId. Returns interned string. */ const char* c_sym_name(CTarget* t, ObjSymId sym); diff --git a/src/arch/c_target/target.c b/src/arch/c_target/target.c @@ -21,11 +21,26 @@ void c_ret(CGTarget*, const CGABIValue*); void c_load_imm(CGTarget*, Operand, i64); void c_copy(CGTarget*, Operand, Operand); void c_binop(CGTarget*, BinOp, Operand, Operand, Operand); +void c_unop(CGTarget*, UnOp, Operand, Operand); +void c_cmp(CGTarget*, CmpOp, Operand, Operand, Operand); +void c_convert(CGTarget*, ConvKind, Operand, Operand); void c_call(CGTarget*, const CGCallDesc*); void c_load(CGTarget*, Operand, Operand, MemAccess); void c_store(CGTarget*, Operand, Operand, MemAccess); +void c_addr_of(CGTarget*, Operand, Operand); CGLocalStorage c_param(CGTarget*, const CGParamDesc*); +CGLocalStorage c_local(CGTarget*, const CGLocalDesc*); +void c_local_addr(CGTarget*, Operand, const CGLocalDesc*, CGLocalStorage); FrameSlot c_frame_slot(CGTarget*, const FrameSlotDesc*); +Label c_label_new(CGTarget*); +void c_label_place(CGTarget*, Label); +void c_jump(CGTarget*, Label); +void c_cmp_branch(CGTarget*, CmpOp, Operand, Operand, Label); +CGScope c_scope_begin(CGTarget*, const CGScopeDesc*); +void c_scope_else(CGTarget*, CGScope); +void c_scope_end(CGTarget*, CGScope); +void c_break_to(CGTarget*, CGScope); +void c_continue_to(CGTarget*, CGScope); void c_set_loc(CGTarget*, SrcLoc); void c_finalize(CGTarget*); void c_destroy(CGTarget*); @@ -44,17 +59,6 @@ static void c_unimpl_func_begin_known_frame(CGTarget* t, const CGFuncDesc* f, C_UNIMPL("func_begin_known_frame"); } -static CGLocalStorage c_unimpl_local(CGTarget* t, const CGLocalDesc* d) { - (void)d; - C_UNIMPL("local"); -} - -static void c_unimpl_local_addr(CGTarget* t, Operand dst, const CGLocalDesc* d, - CGLocalStorage s) { - (void)dst; (void)d; (void)s; - C_UNIMPL("local_addr"); -} - static void c_unimpl_spill_reg(CGTarget* t, Operand a, FrameSlot s, MemAccess m) { (void)a; (void)s; (void)m; @@ -111,52 +115,10 @@ static u32 c_call_stack_size_zero(CGTarget* t, const CGCallDesc* d) { return 0; } -static Label c_unimpl_label_new(CGTarget* t) { - C_UNIMPL("label_new"); -} -static void c_unimpl_label_place(CGTarget* t, Label l) { - (void)l; - C_UNIMPL("label_place"); -} -static void c_unimpl_jump(CGTarget* t, Label l) { - (void)l; - C_UNIMPL("jump"); -} -static void c_unimpl_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, - Label l) { - (void)op; (void)a; (void)b; (void)l; - C_UNIMPL("cmp_branch"); -} - -static CGScope c_unimpl_scope_begin(CGTarget* t, const CGScopeDesc* d) { - (void)d; - C_UNIMPL("scope_begin"); -} -static void c_unimpl_scope_else(CGTarget* t, CGScope s) { - (void)s; - C_UNIMPL("scope_else"); -} -static void c_unimpl_scope_end(CGTarget* t, CGScope s) { - (void)s; - C_UNIMPL("scope_end"); -} -static void c_unimpl_break_to(CGTarget* t, CGScope s) { - (void)s; - C_UNIMPL("break_to"); -} -static void c_unimpl_continue_to(CGTarget* t, CGScope s) { - (void)s; - C_UNIMPL("continue_to"); -} - static void c_unimpl_load_const(CGTarget* t, Operand dst, ConstBytes cb) { (void)dst; (void)cb; C_UNIMPL("load_const"); } -static void c_unimpl_addr_of(CGTarget* t, Operand dst, Operand lv) { - (void)dst; (void)lv; - C_UNIMPL("addr_of"); -} static void c_unimpl_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) { (void)dst; (void)sym; (void)addend; @@ -183,20 +145,6 @@ static void c_unimpl_bitfield_store(CGTarget* t, Operand addr, Operand src, C_UNIMPL("bitfield_store"); } -static void c_unimpl_unop(CGTarget* t, UnOp op, Operand d, Operand a) { - (void)op; (void)d; (void)a; - C_UNIMPL("unop"); -} -static void c_unimpl_cmp(CGTarget* t, CmpOp op, Operand d, Operand a, - Operand b) { - (void)op; (void)d; (void)a; (void)b; - C_UNIMPL("cmp"); -} -static void c_unimpl_convert(CGTarget* t, ConvKind k, Operand d, Operand s) { - (void)k; (void)d; (void)s; - C_UNIMPL("convert"); -} - static void c_unimpl_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* p) { (void)d; (void)p; C_UNIMPL("plan_call"); @@ -310,8 +258,8 @@ CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w) { /* ---- frame slots and locals ---- */ t->frame_slot = c_frame_slot; - t->local = c_unimpl_local; - t->local_addr = c_unimpl_local_addr; + t->local = c_local; + t->local_addr = c_local_addr; t->param = c_param; t->spill_reg = c_unimpl_spill_reg; t->reload_reg = c_unimpl_reload_reg; @@ -329,15 +277,15 @@ CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w) { t->call_stack_size = c_call_stack_size_zero; /* ---- labels and control flow ---- */ - t->label_new = c_unimpl_label_new; - t->label_place = c_unimpl_label_place; - t->jump = c_unimpl_jump; - t->cmp_branch = c_unimpl_cmp_branch; - t->scope_begin = c_unimpl_scope_begin; - t->scope_else = c_unimpl_scope_else; - t->scope_end = c_unimpl_scope_end; - t->break_to = c_unimpl_break_to; - t->continue_to = c_unimpl_continue_to; + t->label_new = c_label_new; + t->label_place = c_label_place; + t->jump = c_jump; + t->cmp_branch = c_cmp_branch; + t->scope_begin = c_scope_begin; + t->scope_else = c_scope_else; + t->scope_end = c_scope_end; + t->break_to = c_break_to; + t->continue_to = c_continue_to; /* ---- data movement ---- */ t->load_imm = c_load_imm; @@ -345,7 +293,7 @@ CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w) { t->copy = c_copy; t->load = c_load; t->store = c_store; - t->addr_of = c_unimpl_addr_of; + t->addr_of = c_addr_of; t->tls_addr_of = c_unimpl_tls_addr_of; t->copy_bytes = c_unimpl_copy_bytes; t->set_bytes = c_unimpl_set_bytes; @@ -354,9 +302,9 @@ CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w) { /* ---- arithmetic, compare, convert ---- */ t->binop = c_binop; - t->unop = c_unimpl_unop; - t->cmp = c_unimpl_cmp; - t->convert = c_unimpl_convert; + t->unop = c_unop; + t->cmp = c_cmp; + t->convert = c_convert; /* ---- calls / return ---- */ t->call = c_call; diff --git a/test/toy/run.sh b/test/toy/run.sh @@ -396,13 +396,22 @@ for src in "${cases[@]}"; do if [ $RUN_X -eq 1 ]; then run_case_cross "$name" "$src" "$expected" "$work" "$opt" fi - if [ $RUN_C -eq 1 ]; then + # Path C forces opt_level=0 internally regardless of -O; running it + # at multiple opt levels would duplicate identical work. + if [ $RUN_C -eq 1 ] && [ "$opt" = "0" ]; then run_case_emit_c "$name" "$src" "$expected" "$work" "$opt" fi done done -err_cases=("$TEST_DIR"/err/*.toy) +# err cases exercise compile-failure paths; they aren't relevant to path C +# (which goes through the same cc invocation). Only run them when at least +# one of the native compile paths (R/L/X) is enabled. +if [ $RUN_R -eq 1 ] || [ $RUN_L -eq 1 ] || [ $RUN_X -eq 1 ]; then + err_cases=("$TEST_DIR"/err/*.toy) +else + err_cases=() +fi for src in "${err_cases[@]}"; do name="$(basename "$src" .toy)" if [ -n "$FILTER" ] && [[ "$name" != *"$FILTER"* ]]; then