kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit fe755e3867b78c2712a0acbe7c54c79c993f2aec
parent eb3d74591d044df173692292c8ccfaf49a7f41e9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 21 May 2026 21:32:33 -0700

Implement O2 jump optimization

Diffstat:
Mdoc/OPT.md | 16++++++++++------
Msrc/opt/opt.c | 4++--
Msrc/opt/pass_jump.c | 142+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mtest/opt/opt_test.c | 132+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 284 insertions(+), 10 deletions(-)

diff --git a/doc/OPT.md b/doc/OPT.md @@ -460,8 +460,8 @@ verify(o2-ssa-combine) undo_ssa copy_cleanup verify(o2-undo-copy-cleanup) -build_cfg -verify(o2-undo-ssa) +opt_jump_opt +verify(o2-jump-opt) shared O1 lowering pipeline (machinize ... jump_cleanup ... opt_emit) ``` @@ -669,9 +669,13 @@ Temporary defensive/pessimizing checks to lift: are retargeted when CFG cleanup forwards or removes labeled trampoline blocks. -The next implementation work should close the remaining Phase D gap: full O2 -jump optimization beyond the shared O1 jump cleanup. After that, Phase E -inlining and cleanup become the next major O2 feature. +Full O2 jump optimization is now in the O2 path after SSA destruction. It +extends the shared O1 jump cleanup with switch/default target forwarding, empty +fallthrough-chain forwarding, repeated branch-to-branch forwarding, and +same-target conditional branch collapse while preserving the final layout +cleanup for branch-to-physical-next deletion. + +The next implementation work should move to Phase E inlining and cleanup. --- @@ -823,7 +827,7 @@ Implement in order: 13. [x] `opt_make_conventional_ssa` 14. [x] `opt_ssa_combine` 15. [x] `opt_undo_ssa` -16. [ ] full O2 `opt_jump_opt` beyond the shared O1 jump cleanup +16. [x] full O2 `opt_jump_opt` beyond the shared O1 jump cleanup Do not batch these into one landing. Each pass needs a pass-local corpus case that fails red without the pass or its bug fix. diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -1455,8 +1455,8 @@ static void opt_run_o2_pipeline(OptImpl* o) { opt_undo_ssa(o->f); opt_copy_cleanup(o->f); opt_verify(o->f, "o2-undo-copy-cleanup"); - opt_build_cfg(o->f); - opt_verify(o->f, "o2-undo-ssa"); + opt_jump_opt(o->f); + opt_verify(o->f, "o2-jump-opt"); metrics_scope_end(o->c, "opt.o2.ssa"); opt_run_lowering_pipeline(o, "opt.o2.total", 1); } diff --git a/src/opt/pass_jump.c b/src/opt/pass_jump.c @@ -52,7 +52,23 @@ static int single_jump_block(const JumpCleanupCtx* c, u32 block, return 1; } -static u32 forward_jump_target(JumpCleanupCtx* c, u32 target) { +static int empty_fallthrough_block(const JumpCleanupCtx* c, u32 block, + u32* target_out) { + Func* f = c->f; + if (block >= f->nblocks || block == f->entry) return 0; + Block* bl = &f->blocks[block]; + if (bl->mc_label != MC_LABEL_NONE) return 0; + if (bl->ninsts != 0 || bl->nsucc != 0) return 0; + u32 idx = emit_order_index(c, block); + if (idx == BLOCK_NONE || idx + 1u >= f->emit_order_n) return 0; + u32 next = f->emit_order[idx + 1u]; + if (next >= f->nblocks || next == block) return 0; + if (target_out) *target_out = next; + return 1; +} + +static u32 forward_jump_target_ex(JumpCleanupCtx* c, u32 target, + int allow_empty_fallthrough) { u32 cur = target; Func* f = c->f; u32 npath = 0; @@ -67,7 +83,9 @@ static u32 forward_jump_target(JumpCleanupCtx* c, u32 target) { result = c->forwarded_target_by_block[cur]; break; } - if (!single_jump_block(c, cur, &next)) { + if (!single_jump_block(c, cur, &next) && + (!allow_empty_fallthrough || + !empty_fallthrough_block(c, cur, &next))) { result = cur; c->forwarded_target_by_block[cur] = result; break; @@ -85,6 +103,10 @@ static u32 forward_jump_target(JumpCleanupCtx* c, u32 target) { return result; } +static u32 forward_jump_target(JumpCleanupCtx* c, u32 target) { + return forward_jump_target_ex(c, target, 0); +} + static int invert_cmp(CmpOp op, CmpOp* out) { switch (op) { case CMP_EQ: @@ -218,6 +240,93 @@ static void cleanup_layout_fallthrough_branches(const JumpCleanupCtx* c) { } } +static int forward_empty_fallthrough_chain(JumpCleanupCtx* c, u32 target, + u32* target_out) { + u32 cur = target; + Func* f = c->f; + for (u32 step = 0; step < f->nblocks; ++step) { + u32 next_block = BLOCK_NONE; + if (!empty_fallthrough_block(c, cur, &next_block)) return 0; + if (next_block >= f->nblocks) return 0; + if (!empty_fallthrough_block(c, next_block, NULL)) { + if (target_out) *target_out = next_block; + return 1; + } + cur = next_block; + } + return 0; +} + +static int full_cond_fallthrough_forwardable(JumpCleanupCtx* c, u32 pred, + u32 target, u32* target_out) { + u32 next = next_emit_block(c, pred); + if (next != target) return 0; + return forward_empty_fallthrough_chain(c, target, target_out); +} + +static u32 full_forwardable_successors(const Block* bl, const Inst* term) { + switch ((IROp)term->op) { + case IR_BR: + case IR_SWITCH: + return bl->nsucc; + case IR_CONDBR: + case IR_CMP_BRANCH: + return bl->nsucc < 2u ? bl->nsucc : 2u; + default: + return 0; + } +} + +static void full_rewrite_successor(Func* f, u32 b, u32 old_succ, + u32 new_succ) { + opt_replace_succ_ref(f, b, old_succ, new_succ); +} + +static int full_forward_branch_targets(JumpCleanupCtx* c) { + Func* f = c->f; + int changed = 0; + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + if (!bl->ninsts) continue; + Inst* term = &bl->insts[bl->ninsts - 1u]; + u32 nsucc = full_forwardable_successors(bl, term); + for (u32 s = 0; s < nsucc; ++s) { + u32 target = bl->succ[s]; + u32 forwarded = forward_jump_target_ex(c, target, 1); + if (((IROp)term->op == IR_CONDBR || (IROp)term->op == IR_CMP_BRANCH) && + s == 1u) { + if (!full_cond_fallthrough_forwardable(c, b, target, &forwarded)) + continue; + } + if (forwarded >= f->nblocks || forwarded == target) continue; + full_rewrite_successor(f, b, target, forwarded); + changed = 1; + } + } + return changed; +} + +static int full_collapse_same_target_branches(Func* f) { + int changed = 0; + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + if (!bl->ninsts || bl->nsucc != 2) continue; + Inst* term = &bl->insts[bl->ninsts - 1u]; + IROp op = (IROp)term->op; + if (op != IR_CONDBR && op != IR_CMP_BRANCH) continue; + if (bl->succ[0] != bl->succ[1]) continue; + InstId id = term->id; + SrcLoc loc = term->loc; + memset(term, 0, sizeof *term); + term->op = IR_BR; + term->id = id; + term->loc = loc; + bl->nsucc = 1; + changed = 1; + } + return changed; +} + void opt_jump_cleanup(Func* f, OptJumpCleanupStage stage) { if (!f) return; opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE | @@ -231,3 +340,32 @@ void opt_jump_cleanup(Func* f, OptJumpCleanupStage stage) { cleanup_layout_fallthrough_branches(&c); } } + +void opt_jump_opt(Func* f) { + if (!f) return; + int changed = 0; + + opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE | + OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP); + + for (u32 iter = 0; iter < f->nblocks; ++iter) { + JumpCleanupCtx c = jump_cleanup_ctx(f); + int iter_changed = 0; + iter_changed |= full_forward_branch_targets(&c); + opt_build_cfg(f); + c = jump_cleanup_ctx(f); + cleanup_invert_jump_fallthrough(&c); + c = jump_cleanup_ctx(f); + cleanup_branch_targets(&c); + iter_changed |= full_collapse_same_target_branches(f); + if (!iter_changed) break; + changed = 1; + opt_build_cfg(f); + } + + if (changed) { + opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE | + OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP); + } + opt_build_cfg(f); +} diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -501,6 +501,29 @@ static void emit_raw_condbr(Func* f, u32 b, Val cond, u32 taken, f->blocks[b].nsucc = 2; } +static void emit_switch2(Func* f, u32 b, Val sel, u64 v0, u32 case0, u64 v1, + u32 case1, u32 default_block, CfreeCgTypeId ty) { + Inst* in = ir_emit(f, b, IR_SWITCH); + IRSwitchAux* aux = arena_znew(f->arena, IRSwitchAux); + in->opnds = arena_array(f->arena, Operand, 1); + in->opnds[0] = op_reg_(sel, ty); + in->nopnds = 1; + aux->selector_type = ty; + aux->ncases = 2; + aux->has_default = 1; + aux->default_block = default_block; + aux->cases = arena_array(f->arena, IRSwitchAuxCase, 2); + aux->cases[0].value = v0; + aux->cases[0].block = case0; + aux->cases[1].value = v1; + aux->cases[1].block = case1; + in->extra.aux = aux; + ir_block_set_nsucc(f, b, 3); + f->blocks[b].succ[0] = case0; + f->blocks[b].succ[1] = case1; + f->blocks[b].succ[2] = default_block; +} + static int bytes_contains(const unsigned char* data, size_t len, const char* needle) { size_t nlen = strlen(needle); @@ -3518,6 +3541,112 @@ static void opt_jump_cleanup_layout_deletes_fallthrough_branch(void) { tc_fini(&tc); } +static void opt_jump_opt_forwards_switch_targets(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + u32 b0 = f->entry; + u32 case_hop = ir_block_new(f); + u32 case_ret = ir_block_new(f); + u32 other_ret = ir_block_new(f); + u32 default_hop = ir_block_new(f); + u32 default_ret = ir_block_new(f); + ir_note_emit(f, case_hop); + ir_note_emit(f, case_ret); + ir_note_emit(f, other_ret); + ir_note_emit(f, default_hop); + ir_note_emit(f, default_ret); + + Val sel = add_val(f, tc.i32); + Val a = add_val(f, tc.i32); + Val b = add_val(f, tc.i32); + Val c = add_val(f, tc.i32); + emit_scalar_input(f, b0, sel, tc.i32); + emit_switch2(f, b0, sel, 1, case_hop, 2, other_ret, default_hop, tc.i32); + emit_br_to(f, case_hop, case_ret); + emit_load_imm(f, case_ret, a, tc.i32, 11); + emit_ret_val(f, case_ret, a, tc.i32); + emit_load_imm(f, other_ret, b, tc.i32, 22); + emit_ret_val(f, other_ret, b, tc.i32); + emit_br_to(f, default_hop, default_ret); + emit_load_imm(f, default_ret, c, tc.i32, 33); + emit_ret_val(f, default_ret, c, tc.i32); + + opt_jump_opt(f); + opt_verify(f, "test-jump-opt-switch"); + + Inst* sw = &f->blocks[b0].insts[f->blocks[b0].ninsts - 1u]; + IRSwitchAux* aux = (IRSwitchAux*)sw->extra.aux; + EXPECT(aux->cases[0].block == case_ret, + "jump opt should forward switch case trampoline"); + EXPECT(aux->default_block == default_ret, + "jump opt should forward switch default trampoline"); + EXPECT(f->blocks[case_hop].ninsts == 0 && f->blocks[default_hop].ninsts == 0, + "forwarded switch trampolines should be pruned"); + tc_fini(&tc); +} + +static void opt_jump_opt_forwards_empty_fallthrough_chain(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + u32 b0 = f->entry; + u32 empty0 = ir_block_new(f); + u32 empty1 = ir_block_new(f); + u32 fallthrough = ir_block_new(f); + u32 taken = ir_block_new(f); + ir_note_emit(f, empty0); + ir_note_emit(f, empty1); + ir_note_emit(f, fallthrough); + ir_note_emit(f, taken); + + Val a = add_val(f, tc.i32); + Val b = add_val(f, tc.i32); + emit_test_branch(f, b0, taken, empty0, tc.i32); + emit_load_imm(f, fallthrough, a, tc.i32, 1); + emit_ret_val(f, fallthrough, a, tc.i32); + emit_load_imm(f, taken, b, tc.i32, 2); + emit_ret_val(f, taken, b, tc.i32); + + opt_jump_opt(f); + opt_verify(f, "test-jump-opt-empty-fallthrough"); + + EXPECT(f->blocks[b0].succ[1] == fallthrough, + "jump opt should forward empty physical fallthrough chain"); + EXPECT(f->blocks[empty0].ninsts == 0 && f->blocks[empty0].nsucc == 0, + "first empty fallthrough block should be pruned"); + EXPECT(f->blocks[empty1].ninsts == 0 && f->blocks[empty1].nsucc == 0, + "second empty fallthrough block should be pruned"); + EXPECT(f->emit_order_n >= 2 && f->emit_order[1] == fallthrough, + "forwarded fallthrough should become physical next block"); + tc_fini(&tc); +} + +static void opt_jump_opt_collapses_same_target_cond_branch(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + u32 b0 = f->entry; + u32 ret = ir_block_new(f); + ir_note_emit(f, ret); + + Val cond = add_val(f, tc.i32); + Val v = add_val(f, tc.i32); + emit_scalar_input(f, b0, cond, tc.i32); + emit_cond_branch(f, b0, cond, ret, ret, tc.i32); + emit_load_imm(f, ret, v, tc.i32, 7); + emit_ret_val(f, ret, v, tc.i32); + + opt_jump_opt(f); + opt_verify(f, "test-jump-opt-same-target"); + + Inst* br = &f->blocks[b0].insts[f->blocks[b0].ninsts - 1u]; + EXPECT((IROp)br->op == IR_BR && f->blocks[b0].nsucc == 1 && + f->blocks[b0].succ[0] == ret, + "jump opt should collapse same-target conditional branch"); + tc_fini(&tc); +} + static void opt_loop_tree_excludes_side_exit(void) { TestCtx tc; tc_init(&tc); @@ -6025,6 +6154,9 @@ int main(void) { opt_jump_cleanup_inverts_to_remove_jump_block(); opt_jump_cleanup_keeps_conditional_fallthrough_block(); opt_jump_cleanup_layout_deletes_fallthrough_branch(); + opt_jump_opt_forwards_switch_targets(); + opt_jump_opt_forwards_empty_fallthrough_chain(); + opt_jump_opt_collapses_same_target_cond_branch(); opt_loop_tree_excludes_side_exit(); opt_loop_tree_nested_depths(); opt_loop_tree_does_not_mutate_cfg();