cg: preserve switch table indices under opt - kit

commit d391d1a13fc19116a5ed2473022ffa5d5344916b
parent 5054c67a493c11ec0c032c04e722962402406411
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 21 May 2026 10:22:24 -0700

cg: preserve switch table indices under opt

Keep the original selector across jump-table bounds checks and recompute the table index before address calculation so optimized lowering does not carry a stale duplicated arithmetic value across control flow.

Refresh index operands after allocating the scaled address register, since that allocation can materialize delayed expressions into a fresh virtual register under opt.

Expand the forced jump-table toy case to use a non-zero dense range, catching selector-vs-index addressing regressions.

Diffstat:
M src/cg/control.c  | 36 +++++++++++++++++++++++++-----------
M src/cg/value.c  | 12 ++++++++----
M test/toy/cases/127_switch_forced_jump_table.toy  | 48 +++++++++++++++++++++++++-----------------------

3 files changed, 58 insertions(+), 38 deletions(-)
diff --git a/src/cg/control.c b/src/cg/control.c
@@ -199,19 +199,28 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d,
   i64_ty = builtin_id(CFREE_CG_BUILTIN_I64);
   void_ptr_ty = cg_type_ptr_to(c, builtin_id(CFREE_CG_BUILTIN_VOID));
 
-  /* 1. idx = sel - vmin (selector_type wraparound is what we want;
-   *    the unsigned bounds check below catches both negative-underflow
-   *    and positive-overflow cases). */
+  /* 1. Keep the original selector, then compute idx = sel - vmin
+   *    (selector_type wraparound is what we want; the unsigned bounds
+   *    check below catches both negative-underflow and positive-overflow
+   *    cases). The selector copy lets us recompute idx after the bounds
+   *    branch instead of carrying a duplicated arithmetic value across
+   *    control flow. */
+  cfree_cg_dup(g);                              /* [sel, sel] */
   cfree_cg_push_int(g, (uint64_t)plan->vmin, sel_ty);
-  cfree_cg_int_binop(g, CFREE_CG_INT_SUB, 0); /* [idx] */
+  cfree_cg_int_binop(g, CFREE_CG_INT_SUB, 0);  /* [sel, idx] */
 
   /* 2. Bounds check: branch to default when idx u> span-1. */
-  cfree_cg_dup(g);                              /* [idx, idx] */
+  cfree_cg_dup(g);                              /* [sel, idx, idx] */
   cfree_cg_push_int(g, plan->span - 1u, sel_ty);
-  cfree_cg_int_cmp(g, CFREE_CG_INT_GT_U);       /* [idx, cond] */
-  cfree_cg_branch_true(g, (CfreeCgLabel)d->default_label); /* [idx] */
+  cfree_cg_int_cmp(g, CFREE_CG_INT_GT_U);       /* [sel, idx, cond] */
+  cfree_cg_branch_true(g, (CfreeCgLabel)d->default_label); /* [sel, idx] */
 
-  /* 3. Widen idx to i64 so the subsequent index multiply runs at
+  /* 3. Recompute idx from the preserved selector for table addressing. */
+  cfree_cg_drop(g);                             /* [sel] */
+  cfree_cg_push_int(g, (uint64_t)plan->vmin, sel_ty);
+  cfree_cg_int_binop(g, CFREE_CG_INT_SUB, 0);  /* [idx] */
+
+  /* 4. Widen idx to i64 so the subsequent index multiply runs at
    *    pointer width regardless of selector signedness. The bounds
    *    check above already established 0 <= idx < span, so zext is
    *    value-preserving. */
@@ -219,7 +228,7 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d,
     cfree_cg_zext(g, i64_ty);
   }
 
-  /* 4. Build the dense label[] slot array and emit the rodata table. */
+  /* 5. Build the dense label[] slot array and emit the rodata table. */
   labels = (Label*)h->alloc(h, (size_t)plan->span * sizeof *labels,
                             _Alignof(Label));
   if (!labels) compiler_panic(c, g->cur_loc, "cfree_cg_switch: oom");
@@ -248,7 +257,7 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d,
   decl.as.object.flags = CFREE_CG_OBJ_READONLY;
   api_remember_sym(g, table_sym, arr_ty, decl);
 
-  /* 5. Compute &table[idx] and load the label address. */
+  /* 6. Compute &table[idx] and load the label address. */
   cfree_cg_push_symbol_lvalue(g, (CfreeCgSym)table_sym, 0); /* [idx, table_lv] */
   cfree_cg_swap(g);                                          /* [table_lv, idx] */
   cfree_cg_index(g, 0);                                      /* [&table[idx]] */
@@ -257,7 +266,7 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d,
   acc.align = (uint32_t)c->target.ptr_align;
   cfree_cg_load(g, acc);                                     /* [label_addr] */
 
-  /* 6. Indirect branch with the full closed target set (every case +
+  /* 7. Indirect branch with the full closed target set (every case +
    *    default), so backends doing branch-target hardening (BTI/IBT/CFG)
    *    can stamp landing pads on every reachable label. */
   targets = (CfreeCgLabel*)h->alloc(h, (d->ncases + 1u) * sizeof *targets,
@@ -836,6 +845,11 @@ void cfree_cg_index(CfreeCg* g, uint64_t offset) {
   } else {
     Reg sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty);
     Operand scaled = api_op_reg(sr, idx_ty);
+    /* Allocating `scaled` can materialize a delayed index expression into a
+     * fresh virtual register under opt. Refresh idx_op so the multiply uses
+     * the materialized value, not the pre-materialization source operand. */
+    idx_op = api_force_reg_unless_imm(g, &idx, idx_ty);
+    if (idx.op.kind == OPK_REG) idx_op = idx.op;
     T->binop(T, BO_IMUL, scaled, idx_op, api_op_imm((i64)elemsz, idx_ty));
     if (offset > 0) {
       T->binop(T, BO_IADD, scaled, scaled, api_op_imm((i64)offset, idx_ty));
diff --git a/src/cg/value.c b/src/cg/value.c
@@ -689,10 +689,12 @@ void api_ensure_reg(CfreeCg* g, ApiSValue* sv) {
   if (sv->kind == SV_CMP) {
     CfreeCgTypeId ty = api_sv_type(sv);
     Operand dst;
-    if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG &&
+    if (!g->target->virtual_regs &&
+        sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG &&
         sv->delayed.cmp.a.cls == RC_INT) {
       dst = api_op_reg(sv->delayed.cmp.a.v.reg, ty);
-    } else if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG &&
+    } else if (!g->target->virtual_regs &&
+               sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG &&
                sv->delayed.cmp.b.cls == RC_INT) {
       dst = api_op_reg(sv->delayed.cmp.b.v.reg, ty);
     } else {
@@ -706,10 +708,12 @@ void api_ensure_reg(CfreeCg* g, ApiSValue* sv) {
   if (sv->kind == SV_ARITH) {
     CfreeCgTypeId ty = api_sv_type(sv);
     Operand dst;
-    if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG &&
+    if (!g->target->virtual_regs &&
+        sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG &&
         sv->delayed.arith.a.cls == RC_INT) {
       dst = api_op_reg(sv->delayed.arith.a.v.reg, ty);
-    } else if (api_arith_rhs_reusable(sv) && sv->delayed.arith.b_owned &&
+    } else if (!g->target->virtual_regs &&
+               api_arith_rhs_reusable(sv) && sv->delayed.arith.b_owned &&
                sv->delayed.arith.b.kind == OPK_REG &&
                sv->delayed.arith.b.cls == RC_INT) {
       dst = api_op_reg(sv->delayed.arith.b.v.reg, ty);
diff --git a/test/toy/cases/127_switch_forced_jump_table.toy b/test/toy/cases/127_switch_forced_jump_table.toy
@@ -1,38 +1,40 @@
-// Forced .jump_table over a dense 16-case range with no default.
+// Forced .jump_table over a dense non-zero 16-case range with no default.
 // Stresses the table-emission path: when no default is given, every
 // in-range value must reach a case, and out-of-range values must fall
-// through past the switch (the synthesized default).
+// through past the switch (the synthesized default). The non-zero vmin
+// catches regressions where table addressing accidentally uses the original
+// selector instead of selector - vmin.
 
 fn pick(x: i64): i64 {
   var r: i64 = -1;
   switch @[.jump_table] x {
-    0  { r = 1000; }
-    1  { r = 1001; }
-    2  { r = 1002; }
-    3  { r = 1003; }
-    4  { r = 1004; }
-    5  { r = 1005; }
-    6  { r = 1006; }
-    7  { r = 1007; }
-    8  { r = 1008; }
-    9  { r = 1009; }
-    10 { r = 1010; }
-    11 { r = 1011; }
-    12 { r = 1012; }
-    13 { r = 1013; }
-    14 { r = 1014; }
-    15 { r = 1015; }
+    10 { r = 1000; }
+    11 { r = 1001; }
+    12 { r = 1002; }
+    13 { r = 1003; }
+    14 { r = 1004; }
+    15 { r = 1005; }
+    16 { r = 1006; }
+    17 { r = 1007; }
+    18 { r = 1008; }
+    19 { r = 1009; }
+    20 { r = 1010; }
+    21 { r = 1011; }
+    22 { r = 1012; }
+    23 { r = 1013; }
+    24 { r = 1014; }
+    25 { r = 1015; }
   }
   return r;
 }
 
 fn __user_main(): i64 {
   var s: i64 = 0;
-  s = s + pick(0);    // vmin in-range  -> 1000
-  s = s + pick(15);   // vmax in-range  -> 1015
-  s = s + pick(7);    // interior       -> 1007
-  s = s + pick(-1);   // below vmin     -> -1   (synthesized default)
-  s = s + pick(16);   // above vmax     -> -1   (synthesized default)
+  s = s + pick(10);   // vmin in-range  -> 1000
+  s = s + pick(25);   // vmax in-range  -> 1015
+  s = s + pick(17);   // interior       -> 1007
+  s = s + pick(9);    // below vmin     -> -1   (synthesized default)
+  s = s + pick(26);   // above vmax     -> -1   (synthesized default)
   s = s + pick(100);  // far out        -> -1   (synthesized default)
   return s;           // 1000+1015+1007 - 3 = 3019
 }

	kit kit
	git clone https://git.ryansepassi.com/git/kit.git
	Log \| Files \| Refs \| README

M	src/cg/control.c	\|	36	+++++++++++++++++++++++++-----------
M	src/cg/value.c	\|	12	++++++++----
M	test/toy/cases/127_switch_forced_jump_table.toy	\|	48	+++++++++++++++++++++++++-----------------------