commit ab675f5697a19ecc6d59233c1acfb096b0656588
parent 5194cb9d9957c73acaf9b0aec66baee5efa53e7c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 20 May 2026 20:43:03 -0700
Add jump-table lowering for CG switches
Diffstat:
6 files changed, 308 insertions(+), 17 deletions(-)
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -629,7 +629,8 @@ typedef struct CGSwitchDesc {
const CGSwitchCase* cases;
u32 ncases;
u8 hint; /* CfreeCgSwitchHint */
- u8 pad[3];
+ u8 opt_level; /* 0/1/2; reads policy in cg_lower_switch_default */
+ u8 pad[2];
} CGSwitchDesc;
typedef struct CGLocalStaticDataDesc {
diff --git a/src/cg/control.c b/src/cg/control.c
@@ -1,4 +1,5 @@
#include "cg/internal.h"
+#include "core/metrics.h"
CfreeCgLabel cfree_cg_label_new(CfreeCg* g) {
if (!g) return CFREE_CG_LABEL_NONE;
@@ -74,14 +75,12 @@ void cfree_cg_branch_false(CfreeCg* g, CfreeCgLabel label) {
void cg_lower_switch_default(CGTarget* t, const CGSwitchDesc* d) {
/* Cmp-and-branch chain: one cmp_branch per case, then jump to
- * default (or fall through if LABEL_NONE). Fast to emit and the
- * default policy at -O0. Density-driven jump-table conversion runs
- * later as an opt-level rewrite over IR_SWITCH; the structured
- * shape survives in IR until then.
- *
- * d->hint is currently advisory only here — cg does not rewrite
- * into a jump table at lowering time, so JUMP_TABLE and
- * BRANCH_CHAIN both produce the chain. */
+ * default (or fall through if LABEL_NONE). The fallback shape; the
+ * frontend-facing cfree_cg_switch picks chain vs. jump-table up
+ * front (see cg_plan_switch) and routes here only for the chain
+ * case. Backend overrides (the C target's switch_) and opt's IR
+ * replay both reach this from outside the cg API, so the lowering
+ * stays target-only and uses just cmp_branch + jump. */
for (u32 i = 0; i < d->ncases; ++i) {
Operand imm = api_op_imm((i64)d->cases[i].value, d->selector_type);
t->cmp_branch(t, CMP_EQ, d->selector, imm, d->cases[i].label);
@@ -91,23 +90,207 @@ void cg_lower_switch_default(CGTarget* t, const CGSwitchDesc* d) {
}
}
+/* Density / sizing thresholds for the O1 jump-table heuristic. At O0
+ * we only honor an explicit CFREE_CG_SWITCH_JUMP_TABLE hint — the policy
+ * is single-pass and tied to frontend intent rather than analysis. O1
+ * runs a linear scan over the case list and picks a single global table
+ * when the cases are dense enough; clustering / multi-table layouts are
+ * out of scope here. */
+#define CG_SWITCH_TABLE_MIN_CASES_O1 4u
+#define CG_SWITCH_TABLE_MAX_SPAN_O1 4096u
+#define CG_SWITCH_TABLE_DENSITY_RECIP_O1 4u /* ncases * recip >= span */
+
+typedef enum CGSwitchClass {
+ CG_SWITCH_CLS_CHAIN,
+ CG_SWITCH_CLS_TABLE,
+} CGSwitchClass;
+
+typedef struct CGSwitchPlan {
+ CGSwitchClass cls;
+ i64 vmin;
+ u64 span;
+} CGSwitchPlan;
+
+/* Single pass over the cases array: derive (vmin, vmax, span) in the
+ * selector type's signed interpretation. Frontends store case values as
+ * the u64 bit pattern of an i64 sign-extended from the selector's
+ * width, so api_sign_extend_width recovers the source ordering for
+ * both signed and unsigned selectors that fit in i64. Returns 0 if the
+ * selector type is unusable for our policy (>64 bits) or if span
+ * overflows u64. */
+static int cg_switch_extents(Compiler* c, const CGSwitchDesc* d, i64* out_vmin,
+ u64* out_span) {
+ u32 width;
+ i64 vmin;
+ i64 vmax;
+ u32 i;
+ width = cfree_cg_type_int_width((CfreeCompiler*)c, d->selector_type);
+ if (!width || width > 64u) return 0;
+ vmin = INT64_MAX;
+ vmax = INT64_MIN;
+ for (i = 0; i < d->ncases; ++i) {
+ i64 vi = (width == 64u) ? (i64)d->cases[i].value
+ : api_sign_extend_width(d->cases[i].value, width);
+ if (vi < vmin) vmin = vi;
+ if (vi > vmax) vmax = vi;
+ }
+ if (vmax < vmin) return 0;
+ {
+ u64 delta = (u64)vmax - (u64)vmin;
+ if (delta == UINT64_MAX) return 0;
+ *out_span = delta + 1u;
+ }
+ *out_vmin = vmin;
+ return 1;
+}
+
+static CGSwitchPlan cg_plan_switch(CfreeCg* g, const CGSwitchDesc* d) {
+ CGSwitchPlan plan;
+ plan.cls = CG_SWITCH_CLS_CHAIN;
+ plan.vmin = 0;
+ plan.span = 0;
+ if (d->ncases == 0) return plan;
+ if (d->default_label == LABEL_NONE) return plan;
+ if (d->hint == CFREE_CG_SWITCH_BRANCH_CHAIN) return plan;
+ if (!cg_switch_extents(g->c, d, &plan.vmin, &plan.span)) return plan;
+ if (d->hint == CFREE_CG_SWITCH_JUMP_TABLE) {
+ /* Frontend explicitly opted in. Honor unless the span is wildly
+ * out of bounds — a forced hint shouldn't blow up code size on a
+ * misshapen switch. */
+ if (plan.span > CG_SWITCH_TABLE_MAX_SPAN_O1) return plan;
+ plan.cls = CG_SWITCH_CLS_TABLE;
+ return plan;
+ }
+ /* TARGET_DEFAULT: O0 keeps the chain; O1+ runs the density check. */
+ if (d->opt_level == 0) return plan;
+ if (d->ncases < CG_SWITCH_TABLE_MIN_CASES_O1) return plan;
+ if (plan.span > CG_SWITCH_TABLE_MAX_SPAN_O1) return plan;
+ if (plan.span > (u64)d->ncases * CG_SWITCH_TABLE_DENSITY_RECIP_O1) return plan;
+ plan.cls = CG_SWITCH_CLS_TABLE;
+ return plan;
+}
+
+/* Emit a dense jump-table dispatch using cg-API ops. The selector value
+ * is still on the value stack on entry. Routing through the cg API
+ * means the same primitives work for direct CG (lowered to machine
+ * ops) and for the opt wrapper (recorded as IR_BINOP / IR_CMP_BRANCH /
+ * IR_LOAD / IR_INDIRECT_BRANCH, which pass_emit already lowers
+ * natively without ever materializing IR_SWITCH). */
+static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d,
+ const CGSwitchPlan* plan) {
+ Compiler* c;
+ Heap* h;
+ CfreeCgTypeId sel_ty;
+ u32 sel_w;
+ CfreeCgTypeId i64_ty;
+ CfreeCgTypeId void_ptr_ty;
+ CfreeCgTypeId arr_ty;
+ Label* labels;
+ CfreeCgLabel* targets;
+ ObjSymId table_sym;
+ CfreeCgDecl decl;
+ CfreeCgMemAccess acc;
+ u64 i;
+ u32 width;
+ c = g->c;
+ h = (Heap*)c->ctx->heap;
+ sel_ty = d->selector_type;
+ sel_w = cfree_cg_type_int_width((CfreeCompiler*)c, sel_ty);
+ i64_ty = builtin_id(CFREE_CG_BUILTIN_I64);
+ void_ptr_ty = cg_type_ptr_to(c, builtin_id(CFREE_CG_BUILTIN_VOID));
+
+ /* 1. idx = sel - vmin (selector_type wraparound is what we want;
+ * the unsigned bounds check below catches both negative-underflow
+ * and positive-overflow cases). */
+ cfree_cg_push_int(g, (uint64_t)plan->vmin, sel_ty);
+ cfree_cg_int_binop(g, CFREE_CG_INT_SUB, 0); /* [idx] */
+
+ /* 2. Bounds check: branch to default when idx u> span-1. */
+ cfree_cg_dup(g); /* [idx, idx] */
+ cfree_cg_push_int(g, plan->span - 1u, sel_ty);
+ cfree_cg_int_cmp(g, CFREE_CG_INT_GT_U); /* [idx, cond] */
+ cfree_cg_branch_true(g, (CfreeCgLabel)d->default_label); /* [idx] */
+
+ /* 3. Widen idx to i64 so the subsequent index multiply runs at
+ * pointer width regardless of selector signedness. The bounds
+ * check above already established 0 <= idx < span, so zext is
+ * value-preserving. */
+ if (sel_w < 64u) {
+ cfree_cg_zext(g, i64_ty);
+ }
+
+ /* 4. Build the dense label[] slot array and emit the rodata table. */
+ labels = (Label*)h->alloc(h, (size_t)plan->span * sizeof *labels,
+ _Alignof(Label));
+ if (!labels) compiler_panic(c, g->cur_loc, "cfree_cg_switch: oom");
+ for (i = 0; i < plan->span; ++i) labels[i] = d->default_label;
+ width = sel_w;
+ for (i = 0; i < d->ncases; ++i) {
+ i64 vi = (width == 64u) ? (i64)d->cases[i].value
+ : api_sign_extend_width(d->cases[i].value, width);
+ u64 slot = (u64)(vi - plan->vmin);
+ labels[slot] = d->cases[i].label;
+ }
+ table_sym = api_emit_label_table(g, labels, (u32)plan->span);
+ h->free(h, labels, (size_t)plan->span * sizeof *labels);
+ if (table_sym == OBJ_SYM_NONE) {
+ /* api_emit_label_table panics on real failure; this only fires if
+ * a future caller asks for a 0-slot table (which cg_plan_switch
+ * already rules out). */
+ compiler_panic(c, g->cur_loc, "cfree_cg_switch: table emission failed");
+ return;
+ }
+ arr_ty = cfree_cg_type_array((CfreeCompiler*)c, void_ptr_ty, plan->span);
+ memset(&decl, 0, sizeof decl);
+ decl.kind = CFREE_CG_DECL_OBJECT;
+ decl.sym.bind = CFREE_SB_LOCAL;
+ decl.sym.visibility = CFREE_CG_VIS_DEFAULT;
+ decl.as.object.flags = CFREE_CG_OBJ_READONLY;
+ api_remember_sym(g, table_sym, arr_ty, decl);
+
+ /* 5. Compute &table[idx] and load the label address. */
+ cfree_cg_push_symbol_lvalue(g, (CfreeCgSym)table_sym, 0); /* [idx, table_lv] */
+ cfree_cg_swap(g); /* [table_lv, idx] */
+ cfree_cg_index(g, 0); /* [&table[idx]] */
+ memset(&acc, 0, sizeof acc);
+ acc.type = void_ptr_ty;
+ acc.align = (uint32_t)c->target.ptr_align;
+ cfree_cg_load(g, acc); /* [label_addr] */
+
+ /* 6. Indirect branch with the full closed target set (every case +
+ * default), so backends doing branch-target hardening (BTI/IBT/CFG)
+ * can stamp landing pads on every reachable label. */
+ targets = (CfreeCgLabel*)h->alloc(h, (d->ncases + 1u) * sizeof *targets,
+ _Alignof(CfreeCgLabel));
+ if (!targets) compiler_panic(c, g->cur_loc, "cfree_cg_switch: oom");
+ for (i = 0; i < d->ncases; ++i) {
+ targets[i] = (CfreeCgLabel)d->cases[i].label;
+ }
+ targets[d->ncases] = (CfreeCgLabel)d->default_label;
+ cfree_cg_computed_goto(g, targets, d->ncases + 1u);
+ h->free(h, targets, (d->ncases + 1u) * sizeof *targets);
+}
+
void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) {
ApiSValue selector;
CGSwitchDesc desc;
Heap* h;
CGSwitchCase* cases = NULL;
+ CGSwitchPlan plan;
+ int native_switch_override;
if (!g) return;
if (g->sp == 0) return;
api_local_const_control_boundary(g);
- selector = api_pop(g);
memset(&desc, 0, sizeof desc);
desc.selector_type = resolve_type(g->c, sw.selector_type);
- if (!desc.selector_type) desc.selector_type = api_sv_type(&selector);
- desc.selector =
- api_force_reg_unless_imm(g, &selector, desc.selector_type);
+ if (!desc.selector_type) {
+ ApiSValue tmp = g->stack[g->sp - 1u];
+ desc.selector_type = api_sv_type(&tmp);
+ }
desc.default_label = (Label)sw.default_label;
desc.ncases = sw.ncases;
desc.hint = (u8)sw.hint;
+ desc.opt_level = (u8)g->opt_level;
if (sw.ncases) {
h = g->c->ctx->heap;
cases = (CGSwitchCase*)h->alloc(h, sw.ncases * sizeof(CGSwitchCase),
@@ -119,16 +302,34 @@ void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) {
}
desc.cases = cases;
}
- if (g->target->switch_) {
- g->target->switch_(g->target, &desc);
+
+ /* The C-source target overrides switch_ to emit a native `switch`
+ * statement and forces opt_level=0; route everything to it
+ * unchanged. Native + opt targets both flow through the planner. */
+ native_switch_override = (g->target->switch_ && g->opt_level == 0);
+ plan = native_switch_override ? (CGSwitchPlan){CG_SWITCH_CLS_CHAIN, 0, 0}
+ : cg_plan_switch(g, &desc);
+
+ if (plan.cls == CG_SWITCH_CLS_TABLE) {
+ /* Selector stays on the value stack; cg_emit_switch_table consumes
+ * it via cg-API ops so the path also records cleanly under opt. */
+ metrics_count(g->c, "cg.switch.table", 1);
+ cg_emit_switch_table(g, &desc, &plan);
} else {
- cg_lower_switch_default(g->target, &desc);
+ metrics_count(g->c, "cg.switch.chain", 1);
+ selector = api_pop(g);
+ desc.selector = api_force_reg_unless_imm(g, &selector, desc.selector_type);
+ if (g->target->switch_) {
+ g->target->switch_(g->target, &desc);
+ } else {
+ cg_lower_switch_default(g->target, &desc);
+ }
+ api_release(g, &selector);
}
if (cases) {
h = g->c->ctx->heap;
h->free(h, cases, sw.ncases * sizeof(CGSwitchCase));
}
- api_release(g, &selector);
}
void cfree_cg_push_label_addr(CfreeCg* g, CfreeCgLabel label,
diff --git a/src/cg/data.c b/src/cg/data.c
@@ -589,3 +589,83 @@ void cfree_cg_data_end(CfreeCg* g) {
g->data_base = 0;
g->data_size = 0;
}
+
+/* Emit a function-local jump-table of `n` label addresses into .rodata
+ * and return the anonymous ObjSymId backing it. Each slot is one pointer
+ * wide. Used by api_cg_switch_lower_table to materialize the dispatch
+ * table inline during cg recording — under direct CG the reloc resolves
+ * immediately when the label is later placed; under opt, the
+ * label-data reloc is queued on the MCEmitter and resolved at replay
+ * time once the wrapped backend's func_begin has set cur_func_sym. The
+ * helper does not register the symbol with CfreeCg's sym table;
+ * callers wire its CfreeCg type via api_remember_sym so subsequent
+ * cfree_cg_push_symbol_lvalue / cfree_cg_index can address it as an
+ * array of pointers. */
+ObjSymId api_emit_label_table(CfreeCg* g, const Label* labels, u32 n) {
+ Compiler* c;
+ ObjBuilder* ob;
+ MCEmitter* mc;
+ CGTarget* T;
+ u32 ptrsz;
+ u32 ptral;
+ Sym sec_name;
+ ObjSecId sec;
+ u32 base;
+ RelocKind rk;
+ u8 pad[8];
+ char name_buf[40];
+ Sym anon;
+ ObjSymId sym;
+ u32 i;
+ if (!g || !labels || !n) return OBJ_SYM_NONE;
+ c = g->c;
+ ob = g->obj;
+ mc = g->mc;
+ T = g->target;
+ if (!mc) {
+ compiler_panic(c, g->cur_loc,
+ "api_emit_label_table: requires MCEmitter "
+ "(unsupported with --emit=c)");
+ return OBJ_SYM_NONE;
+ }
+ ptrsz = (u32)c->target.ptr_size;
+ ptral = (u32)c->target.ptr_align;
+ if (ptrsz != 4u && ptrsz != 8u) {
+ compiler_panic(c, g->cur_loc,
+ "api_emit_label_table: unsupported ptr_size %u",
+ (unsigned)ptrsz);
+ return OBJ_SYM_NONE;
+ }
+ rk = api_data_reloc_kind(/*pcrel=*/0, ptrsz);
+ if (rk == R_NONE) {
+ compiler_panic(c, g->cur_loc,
+ "api_emit_label_table: unsupported reloc width %u",
+ (unsigned)ptrsz);
+ return OBJ_SYM_NONE;
+ }
+ sec_name = pool_intern_cstr(c->global, ".rodata");
+ sec = obj_section(ob, sec_name, SEC_RODATA, SF_ALLOC, ptral);
+ base = obj_align_to(ob, sec, ptral);
+ /* Write `n` placeholder slots first; emit_label_data_reloc later
+ * patches each one inline (via obj_patch) at label-place time. */
+ memset(pad, 0, sizeof pad);
+ for (i = 0; i < n; ++i) {
+ obj_write(ob, sec, pad, ptrsz);
+ }
+ for (i = 0; i < n; ++i) {
+ MCLabel ml = T->cg_label_to_mc_label
+ ? T->cg_label_to_mc_label(T, labels[i])
+ : (MCLabel)labels[i];
+ if (ml == MC_LABEL_NONE) {
+ compiler_panic(c, g->cur_loc,
+ "api_emit_label_table: label has no MCLabel");
+ return OBJ_SYM_NONE;
+ }
+ mc->emit_label_data_reloc(mc, sec, base + i * ptrsz, ml, rk, ptrsz,
+ /*extra_addend=*/0);
+ }
+ snprintf(name_buf, sizeof name_buf, ".Lcfree_jt.%u", g->rodata_counter++);
+ anon = pool_intern_cstr(c->global, name_buf);
+ sym = obj_symbol(ob, anon, SB_LOCAL, SK_OBJ, sec, base, (u64)n * ptrsz);
+ return sym;
+}
diff --git a/src/cg/internal.h b/src/cg/internal.h
@@ -155,6 +155,7 @@ struct CfreeCg {
u32 scope_generation;
u32 rodata_counter;
+ int opt_level;
ObjSecId data_sec;
ObjSymId data_sym;
@@ -318,6 +319,7 @@ void cfree_cg_data_pcrel(CfreeCg* g, CfreeCgSym target, int64_t addend,
void cfree_cg_data_symdiff(CfreeCg* g, CfreeCgSym lhs, CfreeCgSym rhs,
int64_t addend, uint32_t width);
void cfree_cg_data_end(CfreeCg* g);
+ObjSymId api_emit_label_table(CfreeCg* g, const Label* labels, u32 n);
DebugTypeId api_debug_type(CfreeCg* g, CfreeCgTypeId id);
int api_source_flags_addr_taken(u32 flags);
int api_local_requires_memory(CfreeCg* g, CfreeCgTypeId ty,
diff --git a/src/cg/session.c b/src/cg/session.c
@@ -74,6 +74,7 @@ CfreeStatus cfree_cg_new(CfreeCompiler* c, CfreeObjBuilder* out,
g->target = target;
g->mc = mc;
g->debug = debug;
+ g->opt_level = opt_level;
*cg_out = g;
return CFREE_OK;
}
diff --git a/src/opt/pass_emit.c b/src/opt/pass_emit.c
@@ -617,6 +617,12 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) {
d.default_label = ensure_label(r, aux->default_block);
d.ncases = aux->ncases;
d.hint = aux->hint;
+ /* opt only invokes pass_emit at level >= 1; cfree_cg_switch
+ * already routed dense/forced-table switches through
+ * cg_emit_switch_table, so anything that survives in IR_SWITCH
+ * is chain by construction. Set the field anyway to keep the
+ * desc fully populated. */
+ d.opt_level = 1u;
if (aux->ncases) {
cases = arena_array(r->f->arena, CGSwitchCase, aux->ncases);
for (u32 i = 0; i < aux->ncases; ++i) {