commit d391d1a13fc19116a5ed2473022ffa5d5344916b
parent 5054c67a493c11ec0c032c04e722962402406411
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 10:22:24 -0700
cg: preserve switch table indices under opt
Keep the original selector across jump-table bounds checks and recompute the table index before address calculation so optimized lowering does not carry a stale duplicated arithmetic value across control flow.
Refresh index operands after allocating the scaled address register, since that allocation can materialize delayed expressions into a fresh virtual register under opt.
Expand the forced jump-table toy case to use a non-zero dense range, catching selector-vs-index addressing regressions.
Diffstat:
3 files changed, 58 insertions(+), 38 deletions(-)
diff --git a/src/cg/control.c b/src/cg/control.c
@@ -199,19 +199,28 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d,
i64_ty = builtin_id(CFREE_CG_BUILTIN_I64);
void_ptr_ty = cg_type_ptr_to(c, builtin_id(CFREE_CG_BUILTIN_VOID));
- /* 1. idx = sel - vmin (selector_type wraparound is what we want;
- * the unsigned bounds check below catches both negative-underflow
- * and positive-overflow cases). */
+ /* 1. Keep the original selector, then compute idx = sel - vmin
+ * (selector_type wraparound is what we want; the unsigned bounds
+ * check below catches both negative-underflow and positive-overflow
+ * cases). The selector copy lets us recompute idx after the bounds
+ * branch instead of carrying a duplicated arithmetic value across
+ * control flow. */
+ cfree_cg_dup(g); /* [sel, sel] */
cfree_cg_push_int(g, (uint64_t)plan->vmin, sel_ty);
- cfree_cg_int_binop(g, CFREE_CG_INT_SUB, 0); /* [idx] */
+ cfree_cg_int_binop(g, CFREE_CG_INT_SUB, 0); /* [sel, idx] */
/* 2. Bounds check: branch to default when idx u> span-1. */
- cfree_cg_dup(g); /* [idx, idx] */
+ cfree_cg_dup(g); /* [sel, idx, idx] */
cfree_cg_push_int(g, plan->span - 1u, sel_ty);
- cfree_cg_int_cmp(g, CFREE_CG_INT_GT_U); /* [idx, cond] */
- cfree_cg_branch_true(g, (CfreeCgLabel)d->default_label); /* [idx] */
+ cfree_cg_int_cmp(g, CFREE_CG_INT_GT_U); /* [sel, idx, cond] */
+ cfree_cg_branch_true(g, (CfreeCgLabel)d->default_label); /* [sel, idx] */
- /* 3. Widen idx to i64 so the subsequent index multiply runs at
+ /* 3. Recompute idx from the preserved selector for table addressing. */
+ cfree_cg_drop(g); /* [sel] */
+ cfree_cg_push_int(g, (uint64_t)plan->vmin, sel_ty);
+ cfree_cg_int_binop(g, CFREE_CG_INT_SUB, 0); /* [idx] */
+
+ /* 4. Widen idx to i64 so the subsequent index multiply runs at
* pointer width regardless of selector signedness. The bounds
* check above already established 0 <= idx < span, so zext is
* value-preserving. */
@@ -219,7 +228,7 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d,
cfree_cg_zext(g, i64_ty);
}
- /* 4. Build the dense label[] slot array and emit the rodata table. */
+ /* 5. Build the dense label[] slot array and emit the rodata table. */
labels = (Label*)h->alloc(h, (size_t)plan->span * sizeof *labels,
_Alignof(Label));
if (!labels) compiler_panic(c, g->cur_loc, "cfree_cg_switch: oom");
@@ -248,7 +257,7 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d,
decl.as.object.flags = CFREE_CG_OBJ_READONLY;
api_remember_sym(g, table_sym, arr_ty, decl);
- /* 5. Compute &table[idx] and load the label address. */
+ /* 6. Compute &table[idx] and load the label address. */
cfree_cg_push_symbol_lvalue(g, (CfreeCgSym)table_sym, 0); /* [idx, table_lv] */
cfree_cg_swap(g); /* [table_lv, idx] */
cfree_cg_index(g, 0); /* [&table[idx]] */
@@ -257,7 +266,7 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d,
acc.align = (uint32_t)c->target.ptr_align;
cfree_cg_load(g, acc); /* [label_addr] */
- /* 6. Indirect branch with the full closed target set (every case +
+ /* 7. Indirect branch with the full closed target set (every case +
* default), so backends doing branch-target hardening (BTI/IBT/CFG)
* can stamp landing pads on every reachable label. */
targets = (CfreeCgLabel*)h->alloc(h, (d->ncases + 1u) * sizeof *targets,
@@ -836,6 +845,11 @@ void cfree_cg_index(CfreeCg* g, uint64_t offset) {
} else {
Reg sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty);
Operand scaled = api_op_reg(sr, idx_ty);
+ /* Allocating `scaled` can materialize a delayed index expression into a
+ * fresh virtual register under opt. Refresh idx_op so the multiply uses
+ * the materialized value, not the pre-materialization source operand. */
+ idx_op = api_force_reg_unless_imm(g, &idx, idx_ty);
+ if (idx.op.kind == OPK_REG) idx_op = idx.op;
T->binop(T, BO_IMUL, scaled, idx_op, api_op_imm((i64)elemsz, idx_ty));
if (offset > 0) {
T->binop(T, BO_IADD, scaled, scaled, api_op_imm((i64)offset, idx_ty));
diff --git a/src/cg/value.c b/src/cg/value.c
@@ -689,10 +689,12 @@ void api_ensure_reg(CfreeCg* g, ApiSValue* sv) {
if (sv->kind == SV_CMP) {
CfreeCgTypeId ty = api_sv_type(sv);
Operand dst;
- if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG &&
+ if (!g->target->virtual_regs &&
+ sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG &&
sv->delayed.cmp.a.cls == RC_INT) {
dst = api_op_reg(sv->delayed.cmp.a.v.reg, ty);
- } else if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG &&
+ } else if (!g->target->virtual_regs &&
+ sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG &&
sv->delayed.cmp.b.cls == RC_INT) {
dst = api_op_reg(sv->delayed.cmp.b.v.reg, ty);
} else {
@@ -706,10 +708,12 @@ void api_ensure_reg(CfreeCg* g, ApiSValue* sv) {
if (sv->kind == SV_ARITH) {
CfreeCgTypeId ty = api_sv_type(sv);
Operand dst;
- if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG &&
+ if (!g->target->virtual_regs &&
+ sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG &&
sv->delayed.arith.a.cls == RC_INT) {
dst = api_op_reg(sv->delayed.arith.a.v.reg, ty);
- } else if (api_arith_rhs_reusable(sv) && sv->delayed.arith.b_owned &&
+ } else if (!g->target->virtual_regs &&
+ api_arith_rhs_reusable(sv) && sv->delayed.arith.b_owned &&
sv->delayed.arith.b.kind == OPK_REG &&
sv->delayed.arith.b.cls == RC_INT) {
dst = api_op_reg(sv->delayed.arith.b.v.reg, ty);
diff --git a/test/toy/cases/127_switch_forced_jump_table.toy b/test/toy/cases/127_switch_forced_jump_table.toy
@@ -1,38 +1,40 @@
-// Forced .jump_table over a dense 16-case range with no default.
+// Forced .jump_table over a dense non-zero 16-case range with no default.
// Stresses the table-emission path: when no default is given, every
// in-range value must reach a case, and out-of-range values must fall
-// through past the switch (the synthesized default).
+// through past the switch (the synthesized default). The non-zero vmin
+// catches regressions where table addressing accidentally uses the original
+// selector instead of selector - vmin.
fn pick(x: i64): i64 {
var r: i64 = -1;
switch @[.jump_table] x {
- 0 { r = 1000; }
- 1 { r = 1001; }
- 2 { r = 1002; }
- 3 { r = 1003; }
- 4 { r = 1004; }
- 5 { r = 1005; }
- 6 { r = 1006; }
- 7 { r = 1007; }
- 8 { r = 1008; }
- 9 { r = 1009; }
- 10 { r = 1010; }
- 11 { r = 1011; }
- 12 { r = 1012; }
- 13 { r = 1013; }
- 14 { r = 1014; }
- 15 { r = 1015; }
+ 10 { r = 1000; }
+ 11 { r = 1001; }
+ 12 { r = 1002; }
+ 13 { r = 1003; }
+ 14 { r = 1004; }
+ 15 { r = 1005; }
+ 16 { r = 1006; }
+ 17 { r = 1007; }
+ 18 { r = 1008; }
+ 19 { r = 1009; }
+ 20 { r = 1010; }
+ 21 { r = 1011; }
+ 22 { r = 1012; }
+ 23 { r = 1013; }
+ 24 { r = 1014; }
+ 25 { r = 1015; }
}
return r;
}
fn __user_main(): i64 {
var s: i64 = 0;
- s = s + pick(0); // vmin in-range -> 1000
- s = s + pick(15); // vmax in-range -> 1015
- s = s + pick(7); // interior -> 1007
- s = s + pick(-1); // below vmin -> -1 (synthesized default)
- s = s + pick(16); // above vmax -> -1 (synthesized default)
+ s = s + pick(10); // vmin in-range -> 1000
+ s = s + pick(25); // vmax in-range -> 1015
+ s = s + pick(17); // interior -> 1007
+ s = s + pick(9); // below vmin -> -1 (synthesized default)
+ s = s + pick(26); // above vmax -> -1 (synthesized default)
s = s + pick(100); // far out -> -1 (synthesized default)
return s; // 1000+1015+1007 - 3 = 3019
}