commit 7e162375ecff7191eac6b17cecfed4ce7a56bcc5
parent 5dbd91a562625c2f87035eeb010468eb5ea91d4e
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 16:07:47 -0700
opt: place split spills on critical edges
Diffstat:
6 files changed, 372 insertions(+), 87 deletions(-)
diff --git a/doc/OPT.md b/doc/OPT.md
@@ -674,7 +674,7 @@ Implement:
- [x] Conservative singleton live-range splitting when whole hard allocation
fails.
- [x] Boundary reload/store insertion for hard split segments through rewrite.
-- [ ] Critical-edge CFG mutation for spill placement. Current placement stays
+- [x] Critical-edge CFG mutation for spill placement. Current placement stays
at unambiguous block/range boundaries.
Exit criteria:
diff --git a/src/opt/opt_internal.h b/src/opt/opt_internal.h
@@ -88,6 +88,10 @@ void opt_analysis_build_dom_frontier(Func*, OptAnalysis*);
int opt_analysis_dominates(const OptAnalysis*, u32 dom, u32 node);
void opt_rebuild_def_use(Func*);
void opt_verify(Func*, const char* stage);
+void opt_replace_succ_ref(Func*, u32 pred, u32 old_succ, u32 new_succ);
+void opt_emit_order_insert_after(Func*, u32 after, u32 block);
+int opt_edge_is_fallthrough(Func*, u32 pred, u32 succ);
+u32 opt_split_edge(Func*, u32 pred, u32 succ);
int opt_val_in_inst_defs(const Inst*, Val);
void opt_walk_operand(Func*, Inst*, Operand*, int is_def, OptOperandWalkFn,
diff --git a/src/opt/pass_cfg.c b/src/opt/pass_cfg.c
@@ -113,6 +113,97 @@ static void prune_unreachable(Func* f, const u8* reachable) {
f->emit_order_n = w;
}
+void opt_replace_succ_ref(Func* f, u32 pred, u32 old_succ, u32 new_succ) {
+ if (!f || pred >= f->nblocks) return;
+ Block* bl = &f->blocks[pred];
+ for (u32 s = 0; s < bl->nsucc; ++s) {
+ if (bl->succ[s] == old_succ) bl->succ[s] = new_succ;
+ }
+ if (!bl->ninsts) return;
+ Inst* term = &bl->insts[bl->ninsts - 1u];
+ if ((IROp)term->op == IR_SWITCH) {
+ IRSwitchAux* aux = (IRSwitchAux*)term->extra.aux;
+ if (!aux) return;
+ for (u32 i = 0; i < aux->ncases; ++i)
+ if (aux->cases[i].block == old_succ) aux->cases[i].block = new_succ;
+ if (aux->default_block == old_succ) aux->default_block = new_succ;
+ } else if ((IROp)term->op == IR_INDIRECT_BRANCH) {
+ IRIndirectAux* aux = (IRIndirectAux*)term->extra.aux;
+ if (!aux) return;
+ for (u32 i = 0; i < aux->ntargets; ++i)
+ if (aux->targets[i] == old_succ) aux->targets[i] = new_succ;
+ }
+}
+
+void opt_emit_order_insert_after(Func* f, u32 after, u32 block) {
+ if (!f) return;
+ for (u32 i = 0; i < f->emit_order_n; ++i)
+ if (f->emit_order[i] == block) return;
+ if (f->emit_order_n == f->emit_order_cap) {
+ u32 ncap = f->emit_order_cap ? f->emit_order_cap * 2u : 8u;
+ u32* order = arena_array(f->arena, u32, ncap);
+ if (f->emit_order)
+ memcpy(order, f->emit_order, sizeof(order[0]) * f->emit_order_n);
+ f->emit_order = order;
+ f->emit_order_cap = ncap;
+ }
+ u32 pos = f->emit_order_n;
+ for (u32 i = 0; i < f->emit_order_n; ++i) {
+ if (f->emit_order[i] == after) {
+ pos = i + 1u;
+ break;
+ }
+ }
+ for (u32 i = f->emit_order_n; i > pos; --i)
+ f->emit_order[i] = f->emit_order[i - 1u];
+ f->emit_order[pos] = block;
+ ++f->emit_order_n;
+}
+
+int opt_edge_is_fallthrough(Func* f, u32 pred, u32 succ) {
+ if (!f || pred >= f->nblocks) return 0;
+ Block* bl = &f->blocks[pred];
+ if (!bl->ninsts) return 0;
+ IROp op = (IROp)bl->insts[bl->ninsts - 1u].op;
+ if (op != IR_CMP_BRANCH && op != IR_CONDBR) return 0;
+ return bl->nsucc >= 2 && bl->succ[1] == succ;
+}
+
+u32 opt_split_edge(Func* f, u32 pred, u32 succ) {
+ if (!f) return 0;
+ u32 edge = ir_block_new(f);
+ Block* eb = &f->blocks[edge];
+ Inst* br = ir_emit(f, edge, IR_BR);
+ (void)br;
+ eb->succ[0] = succ;
+ eb->nsucc = 1;
+ int place_after = opt_edge_is_fallthrough(f, pred, succ);
+ opt_replace_succ_ref(f, pred, succ, edge);
+ if (succ < f->nblocks) {
+ Block* sb = &f->blocks[succ];
+ for (u32 i = 0; i < sb->ninsts; ++i) {
+ Inst* phi = &sb->insts[i];
+ if ((IROp)phi->op != IR_PHI) break;
+ IRPhiAux* aux = (IRPhiAux*)phi->extra.aux;
+ if (!aux) continue;
+ for (u32 p = 0; p < aux->npreds; ++p) {
+ if (aux->pred_blocks[p] == pred) {
+ aux->pred_blocks[p] = edge;
+ aux->pred_vals[p] = phi->def;
+ }
+ }
+ }
+ }
+ if (place_after) {
+ opt_emit_order_insert_after(f, pred, edge);
+ } else {
+ ir_note_emit(f, edge);
+ }
+ opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE |
+ OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
+ return edge;
+}
+
void opt_build_cfg(Func* f) {
if (!f) return;
opt_analysis_invalidate(
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -1134,6 +1134,26 @@ typedef struct RewriteList {
u32 cap;
} RewriteList;
+typedef enum EdgeMatKind {
+ EDGE_MAT_STORE,
+ EDGE_MAT_RELOAD,
+} EdgeMatKind;
+
+typedef struct EdgeMat {
+ u32 pred;
+ u32 succ;
+ PReg v;
+ Reg hard_reg;
+ u8 kind;
+ u8 pad[3];
+} EdgeMat;
+
+typedef struct EdgeMatPlan {
+ EdgeMat* mats;
+ u32 n;
+ u32 cap;
+} EdgeMatPlan;
+
typedef struct RewriteOut {
Inst* data;
u32 cap;
@@ -1475,6 +1495,154 @@ static void append_split_stores_at(Func* f, RewriteList* out, u32 block,
}
}
+static void edge_mat_push(Func* f, EdgeMatPlan* plan, u32 pred, u32 succ,
+ PReg v, Reg hard_reg, EdgeMatKind kind) {
+ if (!plan || pred >= f->nblocks || succ >= f->nblocks) return;
+ if (hard_reg == (Reg)REG_NONE) return;
+ if (plan->n == plan->cap) {
+ u32 ncap = plan->cap ? plan->cap * 2u : 16u;
+ EdgeMat* nm = arena_array(f->arena, EdgeMat, ncap);
+ if (plan->mats) memcpy(nm, plan->mats, sizeof(plan->mats[0]) * plan->n);
+ plan->mats = nm;
+ plan->cap = ncap;
+ }
+ EdgeMat* m = &plan->mats[plan->n++];
+ memset(m, 0, sizeof *m);
+ m->pred = pred;
+ m->succ = succ;
+ m->v = v;
+ m->hard_reg = hard_reg;
+ m->kind = (u8)kind;
+}
+
+static void prepare_split_edge_materialization(Func* f, EdgeMatPlan* plan) {
+ if (!f || !plan || !f->opt_first_segment_by_preg) return;
+ u32* block_base = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u);
+ u32 raw = 0;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ block_base[b] = raw;
+ raw += f->blocks[b].ninsts ? f->blocks[b].ninsts : 1u;
+ }
+
+ for (PReg v = 1; v < opt_reg_count(f); ++v) {
+ if (f->preg_info[v].alloc_kind != OPT_ALLOC_SPLIT) continue;
+ for (u32 si = f->opt_first_segment_by_preg[v]; si != OPT_RANGE_NONE;
+ si = f->opt_alloc_segments[si].next) {
+ OptAllocSegment* s = &f->opt_alloc_segments[si];
+ if (s->loc_kind != OPT_LOC_HARD || s->block >= f->nblocks) continue;
+ Block* bl = &f->blocks[s->block];
+ u32 block_start = block_base[s->block];
+ u32 block_end = block_start + (bl->ninsts ? bl->ninsts : 1u);
+ if (s->reload_at_start && s->start == block_start && bl->npreds) {
+ for (u32 p = 0; p < bl->npreds; ++p)
+ edge_mat_push(f, plan, bl->preds[p], s->block, v, s->hard_reg,
+ EDGE_MAT_RELOAD);
+ s->reload_at_start = 0;
+ }
+ if (s->store_at_end && s->end == block_end && bl->nsucc) {
+ for (u32 e = 0; e < bl->nsucc; ++e)
+ edge_mat_push(f, plan, s->block, bl->succ[e], v, s->hard_reg,
+ EDGE_MAT_STORE);
+ s->store_at_end = 0;
+ }
+ }
+ }
+}
+
+static int lower_is_terminator(const Inst* in) {
+ if (!in) return 0;
+ switch ((IROp)in->op) {
+ case IR_BR:
+ case IR_CONDBR:
+ case IR_CMP_BRANCH:
+ case IR_SWITCH:
+ case IR_INDIRECT_BRANCH:
+ case IR_RET:
+ case IR_BREAK_TO:
+ case IR_CONTINUE_TO:
+ return 1;
+ case IR_INTRINSIC: {
+ IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
+ return aux && (aux->kind == INTRIN_LONGJMP || aux->kind == INTRIN_TRAP ||
+ aux->kind == INTRIN_UNREACHABLE);
+ }
+ default:
+ return 0;
+ }
+}
+
+static void block_insert_list(Func* f, u32 block, const RewriteList* list) {
+ if (!f || block >= f->nblocks || !list || !list->n) return;
+ Block* bl = &f->blocks[block];
+ u32 term = bl->ninsts && lower_is_terminator(&bl->insts[bl->ninsts - 1u]);
+ u32 insert_at = bl->ninsts - term;
+ Inst* insts = arena_zarray(f->arena, Inst, bl->ninsts + list->n);
+ if (insert_at) memcpy(insts, bl->insts, sizeof(Inst) * insert_at);
+ memcpy(insts + insert_at, list->data, sizeof(Inst) * list->n);
+ if (bl->ninsts > insert_at) {
+ memcpy(insts + insert_at + list->n, bl->insts + insert_at,
+ sizeof(Inst) * (bl->ninsts - insert_at));
+ }
+ bl->insts = insts;
+ bl->ninsts += list->n;
+ bl->cap = bl->ninsts;
+}
+
+typedef struct EdgePlace {
+ u32 pred;
+ u32 succ;
+ u32 block;
+} EdgePlace;
+
+static u32 edge_place_for(Func* f, EdgePlace** places, u32* nplaces,
+ u32* cap, u32 pred, u32 succ) {
+ for (u32 i = 0; i < *nplaces; ++i)
+ if ((*places)[i].pred == pred && (*places)[i].succ == succ)
+ return (*places)[i].block;
+ if (*nplaces == *cap) {
+ u32 ncap = *cap ? *cap * 2u : 16u;
+ EdgePlace* np = arena_array(f->arena, EdgePlace, ncap);
+ if (*places) memcpy(np, *places, sizeof((*places)[0]) * *nplaces);
+ *places = np;
+ *cap = ncap;
+ }
+ u32 block = opt_split_edge(f, pred, succ);
+ EdgePlace* p = &(*places)[(*nplaces)++];
+ p->pred = pred;
+ p->succ = succ;
+ p->block = block;
+ return block;
+}
+
+static void apply_split_edge_materialization(Func* f, const EdgeMatPlan* plan) {
+ if (!f || !plan || !plan->n) return;
+ EdgePlace* places = NULL;
+ u32 nplaces = 0;
+ u32 place_cap = 0;
+ for (u32 i = 0; i < plan->n; ++i) {
+ const EdgeMat* m = &plan->mats[i];
+ if (m->pred < f->nblocks && m->succ < f->nblocks)
+ (void)edge_place_for(f, &places, &nplaces, &place_cap, m->pred, m->succ);
+ }
+ for (u32 pass = 0; pass < 2; ++pass) {
+ EdgeMatKind kind = pass == 0 ? EDGE_MAT_STORE : EDGE_MAT_RELOAD;
+ for (u32 i = 0; i < plan->n; ++i) {
+ const EdgeMat* m = &plan->mats[i];
+ if ((EdgeMatKind)m->kind != kind) continue;
+ u32 block = edge_place_for(f, &places, &nplaces, &place_cap, m->pred,
+ m->succ);
+ RewriteList list;
+ memset(&list, 0, sizeof list);
+ if (kind == EDGE_MAT_STORE)
+ append_store_preg_hard(f, &list, m->v, m->hard_reg);
+ else
+ append_load_preg_hard(f, &list, m->v, m->hard_reg);
+ block_insert_list(f, block, &list);
+ }
+ }
+ opt_build_cfg(f);
+}
+
static void rewrite_func(Func* f, const OptLiveInfo* live_info) {
u32 words = live_info ? live_info->words : f->opt_live_words;
if (!words) words = bit_words(opt_reg_count(f));
@@ -1689,5 +1857,10 @@ void opt_regalloc(Func* f, int allow_live_range_split) {
OptAllocator alloc;
opt_assign_ranges(f, &ranges, &alloc, allow_live_range_split);
+ EdgeMatPlan edge_mats;
+ memset(&edge_mats, 0, sizeof edge_mats);
+ if (allow_live_range_split)
+ prepare_split_edge_materialization(f, &edge_mats);
rewrite_func(f, &live);
+ apply_split_edge_materialization(f, &edge_mats);
}
diff --git a/src/opt/pass_ssa.c b/src/opt/pass_ssa.c
@@ -869,91 +869,6 @@ static void insert_edge_moves(Func* f, u32 b, const EdgeMove* moves, u32 n) {
}
}
-static void replace_succ_ref(Func* f, u32 pred, u32 old_succ, u32 new_succ) {
- Block* bl = &f->blocks[pred];
- for (u32 s = 0; s < bl->nsucc; ++s) {
- if (bl->succ[s] == old_succ) bl->succ[s] = new_succ;
- }
- if (!bl->ninsts) return;
- Inst* term = &bl->insts[bl->ninsts - 1u];
- if ((IROp)term->op == IR_SWITCH) {
- IRSwitchAux* aux = (IRSwitchAux*)term->extra.aux;
- if (!aux) return;
- for (u32 i = 0; i < aux->ncases; ++i)
- if (aux->cases[i].block == old_succ) aux->cases[i].block = new_succ;
- if (aux->default_block == old_succ) aux->default_block = new_succ;
- } else if ((IROp)term->op == IR_INDIRECT_BRANCH) {
- IRIndirectAux* aux = (IRIndirectAux*)term->extra.aux;
- if (!aux) return;
- for (u32 i = 0; i < aux->ntargets; ++i)
- if (aux->targets[i] == old_succ) aux->targets[i] = new_succ;
- }
-}
-
-static void emit_order_insert_after(Func* f, u32 after, u32 block) {
- for (u32 i = 0; i < f->emit_order_n; ++i)
- if (f->emit_order[i] == block) return;
- if (f->emit_order_n == f->emit_order_cap) {
- u32 ncap = f->emit_order_cap ? f->emit_order_cap * 2u : 8u;
- u32* order = arena_array(f->arena, u32, ncap);
- if (f->emit_order)
- memcpy(order, f->emit_order, sizeof(order[0]) * f->emit_order_n);
- f->emit_order = order;
- f->emit_order_cap = ncap;
- }
- u32 pos = f->emit_order_n;
- for (u32 i = 0; i < f->emit_order_n; ++i) {
- if (f->emit_order[i] == after) {
- pos = i + 1u;
- break;
- }
- }
- for (u32 i = f->emit_order_n; i > pos; --i)
- f->emit_order[i] = f->emit_order[i - 1u];
- f->emit_order[pos] = block;
- ++f->emit_order_n;
-}
-
-static int edge_is_fallthrough(Func* f, u32 pred, u32 succ) {
- Block* bl = &f->blocks[pred];
- if (!bl->ninsts) return 0;
- IROp op = (IROp)bl->insts[bl->ninsts - 1u].op;
- if (op != IR_CMP_BRANCH && op != IR_CONDBR) return 0;
- return bl->nsucc >= 2 && bl->succ[1] == succ;
-}
-
-static u32 split_edge(Func* f, u32 pred, u32 succ) {
- u32 edge = ir_block_new(f);
- Block* eb = &f->blocks[edge];
- Inst* br = ir_emit(f, edge, IR_BR);
- (void)br;
- eb->succ[0] = succ;
- eb->nsucc = 1;
- int place_after = edge_is_fallthrough(f, pred, succ);
- replace_succ_ref(f, pred, succ, edge);
- if (succ < f->nblocks) {
- Block* sb = &f->blocks[succ];
- for (u32 i = 0; i < sb->ninsts; ++i) {
- Inst* phi = &sb->insts[i];
- if ((IROp)phi->op != IR_PHI) break;
- IRPhiAux* aux = (IRPhiAux*)phi->extra.aux;
- if (!aux) continue;
- for (u32 p = 0; p < aux->npreds; ++p) {
- if (aux->pred_blocks[p] == pred) {
- aux->pred_blocks[p] = edge;
- aux->pred_vals[p] = phi->def;
- }
- }
- }
- }
- if (place_after) {
- emit_order_insert_after(f, pred, edge);
- } else {
- ir_note_emit(f, edge);
- }
- return edge;
-}
-
static void realign_phi_preds(Func* f) {
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
@@ -1015,7 +930,7 @@ void opt_make_conventional_ssa(Func* f) {
if (!n) continue;
u32 target = pred;
if (pred >= f->nblocks) continue;
- if (f->blocks[pred].nsucc != 1) target = split_edge(f, pred, b);
+ if (f->blocks[pred].nsucc != 1) target = opt_split_edge(f, pred, b);
insert_edge_moves(f, target, moves, n);
}
}
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -552,6 +552,13 @@ static int count_op(Func* f, IROp op) {
return n;
}
+static int block_contains_op(Func* f, u32 b, IROp op) {
+ if (!f || b >= f->nblocks) return 0;
+ for (u32 i = 0; i < f->blocks[b].ninsts; ++i)
+ if ((IROp)f->blocks[b].insts[i].op == op) return 1;
+ return 0;
+}
+
static Inst* def_inst(Func* f, Val v) {
if (!f || v == VAL_NONE || v >= f->nvals) return NULL;
u32 b = f->val_def_block[v];
@@ -3382,6 +3389,99 @@ static void opt_o2_splits_singleton_when_whole_alloc_fails(void) {
tc_fini(&tc);
}
+static void opt_o2_split_materializes_on_critical_edge(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg pool[] = {19};
+ static const Reg scratch[] = {9, 10};
+ mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu);
+
+ Func* f = new_func(&tc);
+ opt_machinize(f, &mock.base);
+ u32 entry = f->entry;
+ u32 other = ir_block_new(f);
+ u32 join = ir_block_new(f);
+ ir_note_emit(f, other);
+ ir_note_emit(f, join);
+ Val pinned = add_val(f, tc.i32);
+ Val v = add_val(f, tc.i32);
+ Val tmp = add_val(f, tc.i32);
+ emit_load_imm(f, entry, pinned, tc.i32, 1);
+ emit_load_imm(f, entry, v, tc.i32, 2);
+ emit_binop(f, entry, tmp, pinned, v, tc.i32);
+ emit_test_branch(f, entry, join, other, tc.i32);
+ emit_br_to(f, other, join);
+ emit_ret_val(f, join, v, tc.i32);
+
+ opt_build_cfg(f);
+ opt_build_loop_tree(f);
+ ensure_test_preg_info(f);
+ f->preg_info[pinned].tied_hard_reg = 19;
+ u32 original_blocks = f->nblocks;
+ opt_regalloc(f, 1);
+
+ EXPECT(f->preg_info[v].alloc_kind == OPT_ALLOC_SPLIT,
+ "edge fixture should split v%u", (unsigned)v);
+ EXPECT(f->nblocks > original_blocks,
+ "critical edge materialization should add edge blocks");
+ u32 edge = f->blocks[entry].succ[0];
+ EXPECT(edge != join && edge < f->nblocks && f->blocks[edge].nsucc == 1 &&
+ f->blocks[edge].succ[0] == join,
+ "entry->join critical edge should be split");
+ if (edge != join && edge < f->nblocks) {
+ EXPECT(block_contains_op(f, edge, IR_LOAD),
+ "split edge block should hold the reload");
+ EXPECT(block_contains_op(f, edge, IR_BR),
+ "split edge block should still branch to join");
+ }
+ EXPECT(!block_contains_op(f, join, IR_LOAD),
+ "join block should not receive an ambiguous reload");
+ opt_verify(f, "test-split-critical-edge-materialization");
+ tc_fini(&tc);
+}
+
+static void opt_o1_does_not_split_spill_edges(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg pool[] = {19};
+ static const Reg scratch[] = {9, 10};
+ mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu);
+
+ Func* f = new_func(&tc);
+ opt_machinize(f, &mock.base);
+ u32 entry = f->entry;
+ u32 other = ir_block_new(f);
+ u32 join = ir_block_new(f);
+ ir_note_emit(f, other);
+ ir_note_emit(f, join);
+ Val pinned = add_val(f, tc.i32);
+ Val v = add_val(f, tc.i32);
+ Val tmp = add_val(f, tc.i32);
+ emit_load_imm(f, entry, pinned, tc.i32, 1);
+ emit_load_imm(f, entry, v, tc.i32, 2);
+ emit_binop(f, entry, tmp, pinned, v, tc.i32);
+ emit_test_branch(f, entry, join, other, tc.i32);
+ emit_br_to(f, other, join);
+ emit_ret_val(f, join, v, tc.i32);
+
+ opt_build_cfg(f);
+ opt_build_loop_tree(f);
+ ensure_test_preg_info(f);
+ f->preg_info[pinned].tied_hard_reg = 19;
+ u32 original_blocks = f->nblocks;
+ opt_regalloc(f, 0);
+
+ EXPECT(f->nblocks == original_blocks,
+ "O1 regalloc should not split CFG spill edges");
+ EXPECT(f->blocks[entry].succ[0] == join,
+ "O1 should leave the critical edge unsplit");
+ tc_fini(&tc);
+}
+
static void opt_range_regalloc_no_conflicts_and_stack_reuse(void) {
TestCtx tc;
tc_init(&tc);
@@ -5195,6 +5295,8 @@ int main(void) {
opt_o2_refuses_overlapping_copy_coalesce();
opt_o2_refuses_incompatible_copy_coalesce();
opt_o2_splits_singleton_when_whole_alloc_fails();
+ opt_o2_split_materializes_on_critical_edge();
+ opt_o1_does_not_split_spill_edges();
opt_range_regalloc_no_conflicts_and_stack_reuse();
opt_stack_spill_assignment_avoids_quadratic_probe();
opt_rewrite_spill_use_def();