kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit d391d1a13fc19116a5ed2473022ffa5d5344916b
parent 5054c67a493c11ec0c032c04e722962402406411
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 21 May 2026 10:22:24 -0700

cg: preserve switch table indices under opt

Keep the original selector across jump-table bounds checks and recompute the table index before address calculation so optimized lowering does not carry a stale duplicated arithmetic value across control flow.

Refresh index operands after allocating the scaled address register, since that allocation can materialize delayed expressions into a fresh virtual register under opt.

Expand the forced jump-table toy case to use a non-zero dense range, catching selector-vs-index addressing regressions.

Diffstat:
Msrc/cg/control.c | 36+++++++++++++++++++++++++-----------
Msrc/cg/value.c | 12++++++++----
Mtest/toy/cases/127_switch_forced_jump_table.toy | 48+++++++++++++++++++++++++-----------------------
3 files changed, 58 insertions(+), 38 deletions(-)

diff --git a/src/cg/control.c b/src/cg/control.c @@ -199,19 +199,28 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d, i64_ty = builtin_id(CFREE_CG_BUILTIN_I64); void_ptr_ty = cg_type_ptr_to(c, builtin_id(CFREE_CG_BUILTIN_VOID)); - /* 1. idx = sel - vmin (selector_type wraparound is what we want; - * the unsigned bounds check below catches both negative-underflow - * and positive-overflow cases). */ + /* 1. Keep the original selector, then compute idx = sel - vmin + * (selector_type wraparound is what we want; the unsigned bounds + * check below catches both negative-underflow and positive-overflow + * cases). The selector copy lets us recompute idx after the bounds + * branch instead of carrying a duplicated arithmetic value across + * control flow. */ + cfree_cg_dup(g); /* [sel, sel] */ cfree_cg_push_int(g, (uint64_t)plan->vmin, sel_ty); - cfree_cg_int_binop(g, CFREE_CG_INT_SUB, 0); /* [idx] */ + cfree_cg_int_binop(g, CFREE_CG_INT_SUB, 0); /* [sel, idx] */ /* 2. Bounds check: branch to default when idx u> span-1. */ - cfree_cg_dup(g); /* [idx, idx] */ + cfree_cg_dup(g); /* [sel, idx, idx] */ cfree_cg_push_int(g, plan->span - 1u, sel_ty); - cfree_cg_int_cmp(g, CFREE_CG_INT_GT_U); /* [idx, cond] */ - cfree_cg_branch_true(g, (CfreeCgLabel)d->default_label); /* [idx] */ + cfree_cg_int_cmp(g, CFREE_CG_INT_GT_U); /* [sel, idx, cond] */ + cfree_cg_branch_true(g, (CfreeCgLabel)d->default_label); /* [sel, idx] */ - /* 3. Widen idx to i64 so the subsequent index multiply runs at + /* 3. Recompute idx from the preserved selector for table addressing. */ + cfree_cg_drop(g); /* [sel] */ + cfree_cg_push_int(g, (uint64_t)plan->vmin, sel_ty); + cfree_cg_int_binop(g, CFREE_CG_INT_SUB, 0); /* [idx] */ + + /* 4. Widen idx to i64 so the subsequent index multiply runs at * pointer width regardless of selector signedness. The bounds * check above already established 0 <= idx < span, so zext is * value-preserving. */ @@ -219,7 +228,7 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d, cfree_cg_zext(g, i64_ty); } - /* 4. Build the dense label[] slot array and emit the rodata table. */ + /* 5. Build the dense label[] slot array and emit the rodata table. */ labels = (Label*)h->alloc(h, (size_t)plan->span * sizeof *labels, _Alignof(Label)); if (!labels) compiler_panic(c, g->cur_loc, "cfree_cg_switch: oom"); @@ -248,7 +257,7 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d, decl.as.object.flags = CFREE_CG_OBJ_READONLY; api_remember_sym(g, table_sym, arr_ty, decl); - /* 5. Compute &table[idx] and load the label address. */ + /* 6. Compute &table[idx] and load the label address. */ cfree_cg_push_symbol_lvalue(g, (CfreeCgSym)table_sym, 0); /* [idx, table_lv] */ cfree_cg_swap(g); /* [table_lv, idx] */ cfree_cg_index(g, 0); /* [&table[idx]] */ @@ -257,7 +266,7 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d, acc.align = (uint32_t)c->target.ptr_align; cfree_cg_load(g, acc); /* [label_addr] */ - /* 6. Indirect branch with the full closed target set (every case + + /* 7. Indirect branch with the full closed target set (every case + * default), so backends doing branch-target hardening (BTI/IBT/CFG) * can stamp landing pads on every reachable label. */ targets = (CfreeCgLabel*)h->alloc(h, (d->ncases + 1u) * sizeof *targets, @@ -836,6 +845,11 @@ void cfree_cg_index(CfreeCg* g, uint64_t offset) { } else { Reg sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty); Operand scaled = api_op_reg(sr, idx_ty); + /* Allocating `scaled` can materialize a delayed index expression into a + * fresh virtual register under opt. Refresh idx_op so the multiply uses + * the materialized value, not the pre-materialization source operand. */ + idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); + if (idx.op.kind == OPK_REG) idx_op = idx.op; T->binop(T, BO_IMUL, scaled, idx_op, api_op_imm((i64)elemsz, idx_ty)); if (offset > 0) { T->binop(T, BO_IADD, scaled, scaled, api_op_imm((i64)offset, idx_ty)); diff --git a/src/cg/value.c b/src/cg/value.c @@ -689,10 +689,12 @@ void api_ensure_reg(CfreeCg* g, ApiSValue* sv) { if (sv->kind == SV_CMP) { CfreeCgTypeId ty = api_sv_type(sv); Operand dst; - if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG && + if (!g->target->virtual_regs && + sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG && sv->delayed.cmp.a.cls == RC_INT) { dst = api_op_reg(sv->delayed.cmp.a.v.reg, ty); - } else if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG && + } else if (!g->target->virtual_regs && + sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG && sv->delayed.cmp.b.cls == RC_INT) { dst = api_op_reg(sv->delayed.cmp.b.v.reg, ty); } else { @@ -706,10 +708,12 @@ void api_ensure_reg(CfreeCg* g, ApiSValue* sv) { if (sv->kind == SV_ARITH) { CfreeCgTypeId ty = api_sv_type(sv); Operand dst; - if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG && + if (!g->target->virtual_regs && + sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG && sv->delayed.arith.a.cls == RC_INT) { dst = api_op_reg(sv->delayed.arith.a.v.reg, ty); - } else if (api_arith_rhs_reusable(sv) && sv->delayed.arith.b_owned && + } else if (!g->target->virtual_regs && + api_arith_rhs_reusable(sv) && sv->delayed.arith.b_owned && sv->delayed.arith.b.kind == OPK_REG && sv->delayed.arith.b.cls == RC_INT) { dst = api_op_reg(sv->delayed.arith.b.v.reg, ty); diff --git a/test/toy/cases/127_switch_forced_jump_table.toy b/test/toy/cases/127_switch_forced_jump_table.toy @@ -1,38 +1,40 @@ -// Forced .jump_table over a dense 16-case range with no default. +// Forced .jump_table over a dense non-zero 16-case range with no default. // Stresses the table-emission path: when no default is given, every // in-range value must reach a case, and out-of-range values must fall -// through past the switch (the synthesized default). +// through past the switch (the synthesized default). The non-zero vmin +// catches regressions where table addressing accidentally uses the original +// selector instead of selector - vmin. fn pick(x: i64): i64 { var r: i64 = -1; switch @[.jump_table] x { - 0 { r = 1000; } - 1 { r = 1001; } - 2 { r = 1002; } - 3 { r = 1003; } - 4 { r = 1004; } - 5 { r = 1005; } - 6 { r = 1006; } - 7 { r = 1007; } - 8 { r = 1008; } - 9 { r = 1009; } - 10 { r = 1010; } - 11 { r = 1011; } - 12 { r = 1012; } - 13 { r = 1013; } - 14 { r = 1014; } - 15 { r = 1015; } + 10 { r = 1000; } + 11 { r = 1001; } + 12 { r = 1002; } + 13 { r = 1003; } + 14 { r = 1004; } + 15 { r = 1005; } + 16 { r = 1006; } + 17 { r = 1007; } + 18 { r = 1008; } + 19 { r = 1009; } + 20 { r = 1010; } + 21 { r = 1011; } + 22 { r = 1012; } + 23 { r = 1013; } + 24 { r = 1014; } + 25 { r = 1015; } } return r; } fn __user_main(): i64 { var s: i64 = 0; - s = s + pick(0); // vmin in-range -> 1000 - s = s + pick(15); // vmax in-range -> 1015 - s = s + pick(7); // interior -> 1007 - s = s + pick(-1); // below vmin -> -1 (synthesized default) - s = s + pick(16); // above vmax -> -1 (synthesized default) + s = s + pick(10); // vmin in-range -> 1000 + s = s + pick(25); // vmax in-range -> 1015 + s = s + pick(17); // interior -> 1007 + s = s + pick(9); // below vmin -> -1 (synthesized default) + s = s + pick(26); // above vmax -> -1 (synthesized default) s = s + pick(100); // far out -> -1 (synthesized default) return s; // 1000+1015+1007 - 3 = 3019 }