kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 7e162375ecff7191eac6b17cecfed4ce7a56bcc5
parent 5dbd91a562625c2f87035eeb010468eb5ea91d4e
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 21 May 2026 16:07:47 -0700

opt: place split spills on critical edges

Diffstat:
Mdoc/OPT.md | 2+-
Msrc/opt/opt_internal.h | 4++++
Msrc/opt/pass_cfg.c | 91+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/opt/pass_lower.c | 173+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/opt/pass_ssa.c | 87+------------------------------------------------------------------------------
Mtest/opt/opt_test.c | 102+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 372 insertions(+), 87 deletions(-)

diff --git a/doc/OPT.md b/doc/OPT.md @@ -674,7 +674,7 @@ Implement: - [x] Conservative singleton live-range splitting when whole hard allocation fails. - [x] Boundary reload/store insertion for hard split segments through rewrite. -- [ ] Critical-edge CFG mutation for spill placement. Current placement stays +- [x] Critical-edge CFG mutation for spill placement. Current placement stays at unambiguous block/range boundaries. Exit criteria: diff --git a/src/opt/opt_internal.h b/src/opt/opt_internal.h @@ -88,6 +88,10 @@ void opt_analysis_build_dom_frontier(Func*, OptAnalysis*); int opt_analysis_dominates(const OptAnalysis*, u32 dom, u32 node); void opt_rebuild_def_use(Func*); void opt_verify(Func*, const char* stage); +void opt_replace_succ_ref(Func*, u32 pred, u32 old_succ, u32 new_succ); +void opt_emit_order_insert_after(Func*, u32 after, u32 block); +int opt_edge_is_fallthrough(Func*, u32 pred, u32 succ); +u32 opt_split_edge(Func*, u32 pred, u32 succ); int opt_val_in_inst_defs(const Inst*, Val); void opt_walk_operand(Func*, Inst*, Operand*, int is_def, OptOperandWalkFn, diff --git a/src/opt/pass_cfg.c b/src/opt/pass_cfg.c @@ -113,6 +113,97 @@ static void prune_unreachable(Func* f, const u8* reachable) { f->emit_order_n = w; } +void opt_replace_succ_ref(Func* f, u32 pred, u32 old_succ, u32 new_succ) { + if (!f || pred >= f->nblocks) return; + Block* bl = &f->blocks[pred]; + for (u32 s = 0; s < bl->nsucc; ++s) { + if (bl->succ[s] == old_succ) bl->succ[s] = new_succ; + } + if (!bl->ninsts) return; + Inst* term = &bl->insts[bl->ninsts - 1u]; + if ((IROp)term->op == IR_SWITCH) { + IRSwitchAux* aux = (IRSwitchAux*)term->extra.aux; + if (!aux) return; + for (u32 i = 0; i < aux->ncases; ++i) + if (aux->cases[i].block == old_succ) aux->cases[i].block = new_succ; + if (aux->default_block == old_succ) aux->default_block = new_succ; + } else if ((IROp)term->op == IR_INDIRECT_BRANCH) { + IRIndirectAux* aux = (IRIndirectAux*)term->extra.aux; + if (!aux) return; + for (u32 i = 0; i < aux->ntargets; ++i) + if (aux->targets[i] == old_succ) aux->targets[i] = new_succ; + } +} + +void opt_emit_order_insert_after(Func* f, u32 after, u32 block) { + if (!f) return; + for (u32 i = 0; i < f->emit_order_n; ++i) + if (f->emit_order[i] == block) return; + if (f->emit_order_n == f->emit_order_cap) { + u32 ncap = f->emit_order_cap ? f->emit_order_cap * 2u : 8u; + u32* order = arena_array(f->arena, u32, ncap); + if (f->emit_order) + memcpy(order, f->emit_order, sizeof(order[0]) * f->emit_order_n); + f->emit_order = order; + f->emit_order_cap = ncap; + } + u32 pos = f->emit_order_n; + for (u32 i = 0; i < f->emit_order_n; ++i) { + if (f->emit_order[i] == after) { + pos = i + 1u; + break; + } + } + for (u32 i = f->emit_order_n; i > pos; --i) + f->emit_order[i] = f->emit_order[i - 1u]; + f->emit_order[pos] = block; + ++f->emit_order_n; +} + +int opt_edge_is_fallthrough(Func* f, u32 pred, u32 succ) { + if (!f || pred >= f->nblocks) return 0; + Block* bl = &f->blocks[pred]; + if (!bl->ninsts) return 0; + IROp op = (IROp)bl->insts[bl->ninsts - 1u].op; + if (op != IR_CMP_BRANCH && op != IR_CONDBR) return 0; + return bl->nsucc >= 2 && bl->succ[1] == succ; +} + +u32 opt_split_edge(Func* f, u32 pred, u32 succ) { + if (!f) return 0; + u32 edge = ir_block_new(f); + Block* eb = &f->blocks[edge]; + Inst* br = ir_emit(f, edge, IR_BR); + (void)br; + eb->succ[0] = succ; + eb->nsucc = 1; + int place_after = opt_edge_is_fallthrough(f, pred, succ); + opt_replace_succ_ref(f, pred, succ, edge); + if (succ < f->nblocks) { + Block* sb = &f->blocks[succ]; + for (u32 i = 0; i < sb->ninsts; ++i) { + Inst* phi = &sb->insts[i]; + if ((IROp)phi->op != IR_PHI) break; + IRPhiAux* aux = (IRPhiAux*)phi->extra.aux; + if (!aux) continue; + for (u32 p = 0; p < aux->npreds; ++p) { + if (aux->pred_blocks[p] == pred) { + aux->pred_blocks[p] = edge; + aux->pred_vals[p] = phi->def; + } + } + } + } + if (place_after) { + opt_emit_order_insert_after(f, pred, edge); + } else { + ir_note_emit(f, edge); + } + opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE | + OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP); + return edge; +} + void opt_build_cfg(Func* f) { if (!f) return; opt_analysis_invalidate( diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c @@ -1134,6 +1134,26 @@ typedef struct RewriteList { u32 cap; } RewriteList; +typedef enum EdgeMatKind { + EDGE_MAT_STORE, + EDGE_MAT_RELOAD, +} EdgeMatKind; + +typedef struct EdgeMat { + u32 pred; + u32 succ; + PReg v; + Reg hard_reg; + u8 kind; + u8 pad[3]; +} EdgeMat; + +typedef struct EdgeMatPlan { + EdgeMat* mats; + u32 n; + u32 cap; +} EdgeMatPlan; + typedef struct RewriteOut { Inst* data; u32 cap; @@ -1475,6 +1495,154 @@ static void append_split_stores_at(Func* f, RewriteList* out, u32 block, } } +static void edge_mat_push(Func* f, EdgeMatPlan* plan, u32 pred, u32 succ, + PReg v, Reg hard_reg, EdgeMatKind kind) { + if (!plan || pred >= f->nblocks || succ >= f->nblocks) return; + if (hard_reg == (Reg)REG_NONE) return; + if (plan->n == plan->cap) { + u32 ncap = plan->cap ? plan->cap * 2u : 16u; + EdgeMat* nm = arena_array(f->arena, EdgeMat, ncap); + if (plan->mats) memcpy(nm, plan->mats, sizeof(plan->mats[0]) * plan->n); + plan->mats = nm; + plan->cap = ncap; + } + EdgeMat* m = &plan->mats[plan->n++]; + memset(m, 0, sizeof *m); + m->pred = pred; + m->succ = succ; + m->v = v; + m->hard_reg = hard_reg; + m->kind = (u8)kind; +} + +static void prepare_split_edge_materialization(Func* f, EdgeMatPlan* plan) { + if (!f || !plan || !f->opt_first_segment_by_preg) return; + u32* block_base = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u); + u32 raw = 0; + for (u32 b = 0; b < f->nblocks; ++b) { + block_base[b] = raw; + raw += f->blocks[b].ninsts ? f->blocks[b].ninsts : 1u; + } + + for (PReg v = 1; v < opt_reg_count(f); ++v) { + if (f->preg_info[v].alloc_kind != OPT_ALLOC_SPLIT) continue; + for (u32 si = f->opt_first_segment_by_preg[v]; si != OPT_RANGE_NONE; + si = f->opt_alloc_segments[si].next) { + OptAllocSegment* s = &f->opt_alloc_segments[si]; + if (s->loc_kind != OPT_LOC_HARD || s->block >= f->nblocks) continue; + Block* bl = &f->blocks[s->block]; + u32 block_start = block_base[s->block]; + u32 block_end = block_start + (bl->ninsts ? bl->ninsts : 1u); + if (s->reload_at_start && s->start == block_start && bl->npreds) { + for (u32 p = 0; p < bl->npreds; ++p) + edge_mat_push(f, plan, bl->preds[p], s->block, v, s->hard_reg, + EDGE_MAT_RELOAD); + s->reload_at_start = 0; + } + if (s->store_at_end && s->end == block_end && bl->nsucc) { + for (u32 e = 0; e < bl->nsucc; ++e) + edge_mat_push(f, plan, s->block, bl->succ[e], v, s->hard_reg, + EDGE_MAT_STORE); + s->store_at_end = 0; + } + } + } +} + +static int lower_is_terminator(const Inst* in) { + if (!in) return 0; + switch ((IROp)in->op) { + case IR_BR: + case IR_CONDBR: + case IR_CMP_BRANCH: + case IR_SWITCH: + case IR_INDIRECT_BRANCH: + case IR_RET: + case IR_BREAK_TO: + case IR_CONTINUE_TO: + return 1; + case IR_INTRINSIC: { + IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; + return aux && (aux->kind == INTRIN_LONGJMP || aux->kind == INTRIN_TRAP || + aux->kind == INTRIN_UNREACHABLE); + } + default: + return 0; + } +} + +static void block_insert_list(Func* f, u32 block, const RewriteList* list) { + if (!f || block >= f->nblocks || !list || !list->n) return; + Block* bl = &f->blocks[block]; + u32 term = bl->ninsts && lower_is_terminator(&bl->insts[bl->ninsts - 1u]); + u32 insert_at = bl->ninsts - term; + Inst* insts = arena_zarray(f->arena, Inst, bl->ninsts + list->n); + if (insert_at) memcpy(insts, bl->insts, sizeof(Inst) * insert_at); + memcpy(insts + insert_at, list->data, sizeof(Inst) * list->n); + if (bl->ninsts > insert_at) { + memcpy(insts + insert_at + list->n, bl->insts + insert_at, + sizeof(Inst) * (bl->ninsts - insert_at)); + } + bl->insts = insts; + bl->ninsts += list->n; + bl->cap = bl->ninsts; +} + +typedef struct EdgePlace { + u32 pred; + u32 succ; + u32 block; +} EdgePlace; + +static u32 edge_place_for(Func* f, EdgePlace** places, u32* nplaces, + u32* cap, u32 pred, u32 succ) { + for (u32 i = 0; i < *nplaces; ++i) + if ((*places)[i].pred == pred && (*places)[i].succ == succ) + return (*places)[i].block; + if (*nplaces == *cap) { + u32 ncap = *cap ? *cap * 2u : 16u; + EdgePlace* np = arena_array(f->arena, EdgePlace, ncap); + if (*places) memcpy(np, *places, sizeof((*places)[0]) * *nplaces); + *places = np; + *cap = ncap; + } + u32 block = opt_split_edge(f, pred, succ); + EdgePlace* p = &(*places)[(*nplaces)++]; + p->pred = pred; + p->succ = succ; + p->block = block; + return block; +} + +static void apply_split_edge_materialization(Func* f, const EdgeMatPlan* plan) { + if (!f || !plan || !plan->n) return; + EdgePlace* places = NULL; + u32 nplaces = 0; + u32 place_cap = 0; + for (u32 i = 0; i < plan->n; ++i) { + const EdgeMat* m = &plan->mats[i]; + if (m->pred < f->nblocks && m->succ < f->nblocks) + (void)edge_place_for(f, &places, &nplaces, &place_cap, m->pred, m->succ); + } + for (u32 pass = 0; pass < 2; ++pass) { + EdgeMatKind kind = pass == 0 ? EDGE_MAT_STORE : EDGE_MAT_RELOAD; + for (u32 i = 0; i < plan->n; ++i) { + const EdgeMat* m = &plan->mats[i]; + if ((EdgeMatKind)m->kind != kind) continue; + u32 block = edge_place_for(f, &places, &nplaces, &place_cap, m->pred, + m->succ); + RewriteList list; + memset(&list, 0, sizeof list); + if (kind == EDGE_MAT_STORE) + append_store_preg_hard(f, &list, m->v, m->hard_reg); + else + append_load_preg_hard(f, &list, m->v, m->hard_reg); + block_insert_list(f, block, &list); + } + } + opt_build_cfg(f); +} + static void rewrite_func(Func* f, const OptLiveInfo* live_info) { u32 words = live_info ? live_info->words : f->opt_live_words; if (!words) words = bit_words(opt_reg_count(f)); @@ -1689,5 +1857,10 @@ void opt_regalloc(Func* f, int allow_live_range_split) { OptAllocator alloc; opt_assign_ranges(f, &ranges, &alloc, allow_live_range_split); + EdgeMatPlan edge_mats; + memset(&edge_mats, 0, sizeof edge_mats); + if (allow_live_range_split) + prepare_split_edge_materialization(f, &edge_mats); rewrite_func(f, &live); + apply_split_edge_materialization(f, &edge_mats); } diff --git a/src/opt/pass_ssa.c b/src/opt/pass_ssa.c @@ -869,91 +869,6 @@ static void insert_edge_moves(Func* f, u32 b, const EdgeMove* moves, u32 n) { } } -static void replace_succ_ref(Func* f, u32 pred, u32 old_succ, u32 new_succ) { - Block* bl = &f->blocks[pred]; - for (u32 s = 0; s < bl->nsucc; ++s) { - if (bl->succ[s] == old_succ) bl->succ[s] = new_succ; - } - if (!bl->ninsts) return; - Inst* term = &bl->insts[bl->ninsts - 1u]; - if ((IROp)term->op == IR_SWITCH) { - IRSwitchAux* aux = (IRSwitchAux*)term->extra.aux; - if (!aux) return; - for (u32 i = 0; i < aux->ncases; ++i) - if (aux->cases[i].block == old_succ) aux->cases[i].block = new_succ; - if (aux->default_block == old_succ) aux->default_block = new_succ; - } else if ((IROp)term->op == IR_INDIRECT_BRANCH) { - IRIndirectAux* aux = (IRIndirectAux*)term->extra.aux; - if (!aux) return; - for (u32 i = 0; i < aux->ntargets; ++i) - if (aux->targets[i] == old_succ) aux->targets[i] = new_succ; - } -} - -static void emit_order_insert_after(Func* f, u32 after, u32 block) { - for (u32 i = 0; i < f->emit_order_n; ++i) - if (f->emit_order[i] == block) return; - if (f->emit_order_n == f->emit_order_cap) { - u32 ncap = f->emit_order_cap ? f->emit_order_cap * 2u : 8u; - u32* order = arena_array(f->arena, u32, ncap); - if (f->emit_order) - memcpy(order, f->emit_order, sizeof(order[0]) * f->emit_order_n); - f->emit_order = order; - f->emit_order_cap = ncap; - } - u32 pos = f->emit_order_n; - for (u32 i = 0; i < f->emit_order_n; ++i) { - if (f->emit_order[i] == after) { - pos = i + 1u; - break; - } - } - for (u32 i = f->emit_order_n; i > pos; --i) - f->emit_order[i] = f->emit_order[i - 1u]; - f->emit_order[pos] = block; - ++f->emit_order_n; -} - -static int edge_is_fallthrough(Func* f, u32 pred, u32 succ) { - Block* bl = &f->blocks[pred]; - if (!bl->ninsts) return 0; - IROp op = (IROp)bl->insts[bl->ninsts - 1u].op; - if (op != IR_CMP_BRANCH && op != IR_CONDBR) return 0; - return bl->nsucc >= 2 && bl->succ[1] == succ; -} - -static u32 split_edge(Func* f, u32 pred, u32 succ) { - u32 edge = ir_block_new(f); - Block* eb = &f->blocks[edge]; - Inst* br = ir_emit(f, edge, IR_BR); - (void)br; - eb->succ[0] = succ; - eb->nsucc = 1; - int place_after = edge_is_fallthrough(f, pred, succ); - replace_succ_ref(f, pred, succ, edge); - if (succ < f->nblocks) { - Block* sb = &f->blocks[succ]; - for (u32 i = 0; i < sb->ninsts; ++i) { - Inst* phi = &sb->insts[i]; - if ((IROp)phi->op != IR_PHI) break; - IRPhiAux* aux = (IRPhiAux*)phi->extra.aux; - if (!aux) continue; - for (u32 p = 0; p < aux->npreds; ++p) { - if (aux->pred_blocks[p] == pred) { - aux->pred_blocks[p] = edge; - aux->pred_vals[p] = phi->def; - } - } - } - } - if (place_after) { - emit_order_insert_after(f, pred, edge); - } else { - ir_note_emit(f, edge); - } - return edge; -} - static void realign_phi_preds(Func* f) { for (u32 b = 0; b < f->nblocks; ++b) { Block* bl = &f->blocks[b]; @@ -1015,7 +930,7 @@ void opt_make_conventional_ssa(Func* f) { if (!n) continue; u32 target = pred; if (pred >= f->nblocks) continue; - if (f->blocks[pred].nsucc != 1) target = split_edge(f, pred, b); + if (f->blocks[pred].nsucc != 1) target = opt_split_edge(f, pred, b); insert_edge_moves(f, target, moves, n); } } diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -552,6 +552,13 @@ static int count_op(Func* f, IROp op) { return n; } +static int block_contains_op(Func* f, u32 b, IROp op) { + if (!f || b >= f->nblocks) return 0; + for (u32 i = 0; i < f->blocks[b].ninsts; ++i) + if ((IROp)f->blocks[b].insts[i].op == op) return 1; + return 0; +} + static Inst* def_inst(Func* f, Val v) { if (!f || v == VAL_NONE || v >= f->nvals) return NULL; u32 b = f->val_def_block[v]; @@ -3382,6 +3389,99 @@ static void opt_o2_splits_singleton_when_whole_alloc_fails(void) { tc_fini(&tc); } +static void opt_o2_split_materializes_on_critical_edge(void) { + TestCtx tc; + tc_init(&tc); + MockCGTarget mock; + mock_init(&mock, tc.c); + static const Reg pool[] = {19}; + static const Reg scratch[] = {9, 10}; + mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); + + Func* f = new_func(&tc); + opt_machinize(f, &mock.base); + u32 entry = f->entry; + u32 other = ir_block_new(f); + u32 join = ir_block_new(f); + ir_note_emit(f, other); + ir_note_emit(f, join); + Val pinned = add_val(f, tc.i32); + Val v = add_val(f, tc.i32); + Val tmp = add_val(f, tc.i32); + emit_load_imm(f, entry, pinned, tc.i32, 1); + emit_load_imm(f, entry, v, tc.i32, 2); + emit_binop(f, entry, tmp, pinned, v, tc.i32); + emit_test_branch(f, entry, join, other, tc.i32); + emit_br_to(f, other, join); + emit_ret_val(f, join, v, tc.i32); + + opt_build_cfg(f); + opt_build_loop_tree(f); + ensure_test_preg_info(f); + f->preg_info[pinned].tied_hard_reg = 19; + u32 original_blocks = f->nblocks; + opt_regalloc(f, 1); + + EXPECT(f->preg_info[v].alloc_kind == OPT_ALLOC_SPLIT, + "edge fixture should split v%u", (unsigned)v); + EXPECT(f->nblocks > original_blocks, + "critical edge materialization should add edge blocks"); + u32 edge = f->blocks[entry].succ[0]; + EXPECT(edge != join && edge < f->nblocks && f->blocks[edge].nsucc == 1 && + f->blocks[edge].succ[0] == join, + "entry->join critical edge should be split"); + if (edge != join && edge < f->nblocks) { + EXPECT(block_contains_op(f, edge, IR_LOAD), + "split edge block should hold the reload"); + EXPECT(block_contains_op(f, edge, IR_BR), + "split edge block should still branch to join"); + } + EXPECT(!block_contains_op(f, join, IR_LOAD), + "join block should not receive an ambiguous reload"); + opt_verify(f, "test-split-critical-edge-materialization"); + tc_fini(&tc); +} + +static void opt_o1_does_not_split_spill_edges(void) { + TestCtx tc; + tc_init(&tc); + MockCGTarget mock; + mock_init(&mock, tc.c); + static const Reg pool[] = {19}; + static const Reg scratch[] = {9, 10}; + mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); + + Func* f = new_func(&tc); + opt_machinize(f, &mock.base); + u32 entry = f->entry; + u32 other = ir_block_new(f); + u32 join = ir_block_new(f); + ir_note_emit(f, other); + ir_note_emit(f, join); + Val pinned = add_val(f, tc.i32); + Val v = add_val(f, tc.i32); + Val tmp = add_val(f, tc.i32); + emit_load_imm(f, entry, pinned, tc.i32, 1); + emit_load_imm(f, entry, v, tc.i32, 2); + emit_binop(f, entry, tmp, pinned, v, tc.i32); + emit_test_branch(f, entry, join, other, tc.i32); + emit_br_to(f, other, join); + emit_ret_val(f, join, v, tc.i32); + + opt_build_cfg(f); + opt_build_loop_tree(f); + ensure_test_preg_info(f); + f->preg_info[pinned].tied_hard_reg = 19; + u32 original_blocks = f->nblocks; + opt_regalloc(f, 0); + + EXPECT(f->nblocks == original_blocks, + "O1 regalloc should not split CFG spill edges"); + EXPECT(f->blocks[entry].succ[0] == join, + "O1 should leave the critical edge unsplit"); + tc_fini(&tc); +} + static void opt_range_regalloc_no_conflicts_and_stack_reuse(void) { TestCtx tc; tc_init(&tc); @@ -5195,6 +5295,8 @@ int main(void) { opt_o2_refuses_overlapping_copy_coalesce(); opt_o2_refuses_incompatible_copy_coalesce(); opt_o2_splits_singleton_when_whole_alloc_fails(); + opt_o2_split_materializes_on_critical_edge(); + opt_o1_does_not_split_spill_edges(); opt_range_regalloc_no_conflicts_and_stack_reuse(); opt_stack_spill_assignment_avoids_quadratic_probe(); opt_rewrite_spill_use_def();