commit fe755e3867b78c2712a0acbe7c54c79c993f2aec
parent eb3d74591d044df173692292c8ccfaf49a7f41e9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 21:32:33 -0700
Implement O2 jump optimization
Diffstat:
4 files changed, 284 insertions(+), 10 deletions(-)
diff --git a/doc/OPT.md b/doc/OPT.md
@@ -460,8 +460,8 @@ verify(o2-ssa-combine)
undo_ssa
copy_cleanup
verify(o2-undo-copy-cleanup)
-build_cfg
-verify(o2-undo-ssa)
+opt_jump_opt
+verify(o2-jump-opt)
shared O1 lowering pipeline (machinize ... jump_cleanup ... opt_emit)
```
@@ -669,9 +669,13 @@ Temporary defensive/pessimizing checks to lift:
are retargeted when CFG cleanup forwards or removes labeled trampoline
blocks.
-The next implementation work should close the remaining Phase D gap: full O2
-jump optimization beyond the shared O1 jump cleanup. After that, Phase E
-inlining and cleanup become the next major O2 feature.
+Full O2 jump optimization is now in the O2 path after SSA destruction. It
+extends the shared O1 jump cleanup with switch/default target forwarding, empty
+fallthrough-chain forwarding, repeated branch-to-branch forwarding, and
+same-target conditional branch collapse while preserving the final layout
+cleanup for branch-to-physical-next deletion.
+
+The next implementation work should move to Phase E inlining and cleanup.
---
@@ -823,7 +827,7 @@ Implement in order:
13. [x] `opt_make_conventional_ssa`
14. [x] `opt_ssa_combine`
15. [x] `opt_undo_ssa`
-16. [ ] full O2 `opt_jump_opt` beyond the shared O1 jump cleanup
+16. [x] full O2 `opt_jump_opt` beyond the shared O1 jump cleanup
Do not batch these into one landing. Each pass needs a pass-local corpus case
that fails red without the pass or its bug fix.
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -1455,8 +1455,8 @@ static void opt_run_o2_pipeline(OptImpl* o) {
opt_undo_ssa(o->f);
opt_copy_cleanup(o->f);
opt_verify(o->f, "o2-undo-copy-cleanup");
- opt_build_cfg(o->f);
- opt_verify(o->f, "o2-undo-ssa");
+ opt_jump_opt(o->f);
+ opt_verify(o->f, "o2-jump-opt");
metrics_scope_end(o->c, "opt.o2.ssa");
opt_run_lowering_pipeline(o, "opt.o2.total", 1);
}
diff --git a/src/opt/pass_jump.c b/src/opt/pass_jump.c
@@ -52,7 +52,23 @@ static int single_jump_block(const JumpCleanupCtx* c, u32 block,
return 1;
}
-static u32 forward_jump_target(JumpCleanupCtx* c, u32 target) {
+static int empty_fallthrough_block(const JumpCleanupCtx* c, u32 block,
+ u32* target_out) {
+ Func* f = c->f;
+ if (block >= f->nblocks || block == f->entry) return 0;
+ Block* bl = &f->blocks[block];
+ if (bl->mc_label != MC_LABEL_NONE) return 0;
+ if (bl->ninsts != 0 || bl->nsucc != 0) return 0;
+ u32 idx = emit_order_index(c, block);
+ if (idx == BLOCK_NONE || idx + 1u >= f->emit_order_n) return 0;
+ u32 next = f->emit_order[idx + 1u];
+ if (next >= f->nblocks || next == block) return 0;
+ if (target_out) *target_out = next;
+ return 1;
+}
+
+static u32 forward_jump_target_ex(JumpCleanupCtx* c, u32 target,
+ int allow_empty_fallthrough) {
u32 cur = target;
Func* f = c->f;
u32 npath = 0;
@@ -67,7 +83,9 @@ static u32 forward_jump_target(JumpCleanupCtx* c, u32 target) {
result = c->forwarded_target_by_block[cur];
break;
}
- if (!single_jump_block(c, cur, &next)) {
+ if (!single_jump_block(c, cur, &next) &&
+ (!allow_empty_fallthrough ||
+ !empty_fallthrough_block(c, cur, &next))) {
result = cur;
c->forwarded_target_by_block[cur] = result;
break;
@@ -85,6 +103,10 @@ static u32 forward_jump_target(JumpCleanupCtx* c, u32 target) {
return result;
}
+static u32 forward_jump_target(JumpCleanupCtx* c, u32 target) {
+ return forward_jump_target_ex(c, target, 0);
+}
+
static int invert_cmp(CmpOp op, CmpOp* out) {
switch (op) {
case CMP_EQ:
@@ -218,6 +240,93 @@ static void cleanup_layout_fallthrough_branches(const JumpCleanupCtx* c) {
}
}
+static int forward_empty_fallthrough_chain(JumpCleanupCtx* c, u32 target,
+ u32* target_out) {
+ u32 cur = target;
+ Func* f = c->f;
+ for (u32 step = 0; step < f->nblocks; ++step) {
+ u32 next_block = BLOCK_NONE;
+ if (!empty_fallthrough_block(c, cur, &next_block)) return 0;
+ if (next_block >= f->nblocks) return 0;
+ if (!empty_fallthrough_block(c, next_block, NULL)) {
+ if (target_out) *target_out = next_block;
+ return 1;
+ }
+ cur = next_block;
+ }
+ return 0;
+}
+
+static int full_cond_fallthrough_forwardable(JumpCleanupCtx* c, u32 pred,
+ u32 target, u32* target_out) {
+ u32 next = next_emit_block(c, pred);
+ if (next != target) return 0;
+ return forward_empty_fallthrough_chain(c, target, target_out);
+}
+
+static u32 full_forwardable_successors(const Block* bl, const Inst* term) {
+ switch ((IROp)term->op) {
+ case IR_BR:
+ case IR_SWITCH:
+ return bl->nsucc;
+ case IR_CONDBR:
+ case IR_CMP_BRANCH:
+ return bl->nsucc < 2u ? bl->nsucc : 2u;
+ default:
+ return 0;
+ }
+}
+
+static void full_rewrite_successor(Func* f, u32 b, u32 old_succ,
+ u32 new_succ) {
+ opt_replace_succ_ref(f, b, old_succ, new_succ);
+}
+
+static int full_forward_branch_targets(JumpCleanupCtx* c) {
+ Func* f = c->f;
+ int changed = 0;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ if (!bl->ninsts) continue;
+ Inst* term = &bl->insts[bl->ninsts - 1u];
+ u32 nsucc = full_forwardable_successors(bl, term);
+ for (u32 s = 0; s < nsucc; ++s) {
+ u32 target = bl->succ[s];
+ u32 forwarded = forward_jump_target_ex(c, target, 1);
+ if (((IROp)term->op == IR_CONDBR || (IROp)term->op == IR_CMP_BRANCH) &&
+ s == 1u) {
+ if (!full_cond_fallthrough_forwardable(c, b, target, &forwarded))
+ continue;
+ }
+ if (forwarded >= f->nblocks || forwarded == target) continue;
+ full_rewrite_successor(f, b, target, forwarded);
+ changed = 1;
+ }
+ }
+ return changed;
+}
+
+static int full_collapse_same_target_branches(Func* f) {
+ int changed = 0;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ if (!bl->ninsts || bl->nsucc != 2) continue;
+ Inst* term = &bl->insts[bl->ninsts - 1u];
+ IROp op = (IROp)term->op;
+ if (op != IR_CONDBR && op != IR_CMP_BRANCH) continue;
+ if (bl->succ[0] != bl->succ[1]) continue;
+ InstId id = term->id;
+ SrcLoc loc = term->loc;
+ memset(term, 0, sizeof *term);
+ term->op = IR_BR;
+ term->id = id;
+ term->loc = loc;
+ bl->nsucc = 1;
+ changed = 1;
+ }
+ return changed;
+}
+
void opt_jump_cleanup(Func* f, OptJumpCleanupStage stage) {
if (!f) return;
opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE |
@@ -231,3 +340,32 @@ void opt_jump_cleanup(Func* f, OptJumpCleanupStage stage) {
cleanup_layout_fallthrough_branches(&c);
}
}
+
+void opt_jump_opt(Func* f) {
+ if (!f) return;
+ int changed = 0;
+
+ opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE |
+ OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
+
+ for (u32 iter = 0; iter < f->nblocks; ++iter) {
+ JumpCleanupCtx c = jump_cleanup_ctx(f);
+ int iter_changed = 0;
+ iter_changed |= full_forward_branch_targets(&c);
+ opt_build_cfg(f);
+ c = jump_cleanup_ctx(f);
+ cleanup_invert_jump_fallthrough(&c);
+ c = jump_cleanup_ctx(f);
+ cleanup_branch_targets(&c);
+ iter_changed |= full_collapse_same_target_branches(f);
+ if (!iter_changed) break;
+ changed = 1;
+ opt_build_cfg(f);
+ }
+
+ if (changed) {
+ opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE |
+ OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
+ }
+ opt_build_cfg(f);
+}
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -501,6 +501,29 @@ static void emit_raw_condbr(Func* f, u32 b, Val cond, u32 taken,
f->blocks[b].nsucc = 2;
}
+static void emit_switch2(Func* f, u32 b, Val sel, u64 v0, u32 case0, u64 v1,
+ u32 case1, u32 default_block, CfreeCgTypeId ty) {
+ Inst* in = ir_emit(f, b, IR_SWITCH);
+ IRSwitchAux* aux = arena_znew(f->arena, IRSwitchAux);
+ in->opnds = arena_array(f->arena, Operand, 1);
+ in->opnds[0] = op_reg_(sel, ty);
+ in->nopnds = 1;
+ aux->selector_type = ty;
+ aux->ncases = 2;
+ aux->has_default = 1;
+ aux->default_block = default_block;
+ aux->cases = arena_array(f->arena, IRSwitchAuxCase, 2);
+ aux->cases[0].value = v0;
+ aux->cases[0].block = case0;
+ aux->cases[1].value = v1;
+ aux->cases[1].block = case1;
+ in->extra.aux = aux;
+ ir_block_set_nsucc(f, b, 3);
+ f->blocks[b].succ[0] = case0;
+ f->blocks[b].succ[1] = case1;
+ f->blocks[b].succ[2] = default_block;
+}
+
static int bytes_contains(const unsigned char* data, size_t len,
const char* needle) {
size_t nlen = strlen(needle);
@@ -3518,6 +3541,112 @@ static void opt_jump_cleanup_layout_deletes_fallthrough_branch(void) {
tc_fini(&tc);
}
+static void opt_jump_opt_forwards_switch_targets(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ u32 b0 = f->entry;
+ u32 case_hop = ir_block_new(f);
+ u32 case_ret = ir_block_new(f);
+ u32 other_ret = ir_block_new(f);
+ u32 default_hop = ir_block_new(f);
+ u32 default_ret = ir_block_new(f);
+ ir_note_emit(f, case_hop);
+ ir_note_emit(f, case_ret);
+ ir_note_emit(f, other_ret);
+ ir_note_emit(f, default_hop);
+ ir_note_emit(f, default_ret);
+
+ Val sel = add_val(f, tc.i32);
+ Val a = add_val(f, tc.i32);
+ Val b = add_val(f, tc.i32);
+ Val c = add_val(f, tc.i32);
+ emit_scalar_input(f, b0, sel, tc.i32);
+ emit_switch2(f, b0, sel, 1, case_hop, 2, other_ret, default_hop, tc.i32);
+ emit_br_to(f, case_hop, case_ret);
+ emit_load_imm(f, case_ret, a, tc.i32, 11);
+ emit_ret_val(f, case_ret, a, tc.i32);
+ emit_load_imm(f, other_ret, b, tc.i32, 22);
+ emit_ret_val(f, other_ret, b, tc.i32);
+ emit_br_to(f, default_hop, default_ret);
+ emit_load_imm(f, default_ret, c, tc.i32, 33);
+ emit_ret_val(f, default_ret, c, tc.i32);
+
+ opt_jump_opt(f);
+ opt_verify(f, "test-jump-opt-switch");
+
+ Inst* sw = &f->blocks[b0].insts[f->blocks[b0].ninsts - 1u];
+ IRSwitchAux* aux = (IRSwitchAux*)sw->extra.aux;
+ EXPECT(aux->cases[0].block == case_ret,
+ "jump opt should forward switch case trampoline");
+ EXPECT(aux->default_block == default_ret,
+ "jump opt should forward switch default trampoline");
+ EXPECT(f->blocks[case_hop].ninsts == 0 && f->blocks[default_hop].ninsts == 0,
+ "forwarded switch trampolines should be pruned");
+ tc_fini(&tc);
+}
+
+static void opt_jump_opt_forwards_empty_fallthrough_chain(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ u32 b0 = f->entry;
+ u32 empty0 = ir_block_new(f);
+ u32 empty1 = ir_block_new(f);
+ u32 fallthrough = ir_block_new(f);
+ u32 taken = ir_block_new(f);
+ ir_note_emit(f, empty0);
+ ir_note_emit(f, empty1);
+ ir_note_emit(f, fallthrough);
+ ir_note_emit(f, taken);
+
+ Val a = add_val(f, tc.i32);
+ Val b = add_val(f, tc.i32);
+ emit_test_branch(f, b0, taken, empty0, tc.i32);
+ emit_load_imm(f, fallthrough, a, tc.i32, 1);
+ emit_ret_val(f, fallthrough, a, tc.i32);
+ emit_load_imm(f, taken, b, tc.i32, 2);
+ emit_ret_val(f, taken, b, tc.i32);
+
+ opt_jump_opt(f);
+ opt_verify(f, "test-jump-opt-empty-fallthrough");
+
+ EXPECT(f->blocks[b0].succ[1] == fallthrough,
+ "jump opt should forward empty physical fallthrough chain");
+ EXPECT(f->blocks[empty0].ninsts == 0 && f->blocks[empty0].nsucc == 0,
+ "first empty fallthrough block should be pruned");
+ EXPECT(f->blocks[empty1].ninsts == 0 && f->blocks[empty1].nsucc == 0,
+ "second empty fallthrough block should be pruned");
+ EXPECT(f->emit_order_n >= 2 && f->emit_order[1] == fallthrough,
+ "forwarded fallthrough should become physical next block");
+ tc_fini(&tc);
+}
+
+static void opt_jump_opt_collapses_same_target_cond_branch(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ u32 b0 = f->entry;
+ u32 ret = ir_block_new(f);
+ ir_note_emit(f, ret);
+
+ Val cond = add_val(f, tc.i32);
+ Val v = add_val(f, tc.i32);
+ emit_scalar_input(f, b0, cond, tc.i32);
+ emit_cond_branch(f, b0, cond, ret, ret, tc.i32);
+ emit_load_imm(f, ret, v, tc.i32, 7);
+ emit_ret_val(f, ret, v, tc.i32);
+
+ opt_jump_opt(f);
+ opt_verify(f, "test-jump-opt-same-target");
+
+ Inst* br = &f->blocks[b0].insts[f->blocks[b0].ninsts - 1u];
+ EXPECT((IROp)br->op == IR_BR && f->blocks[b0].nsucc == 1 &&
+ f->blocks[b0].succ[0] == ret,
+ "jump opt should collapse same-target conditional branch");
+ tc_fini(&tc);
+}
+
static void opt_loop_tree_excludes_side_exit(void) {
TestCtx tc;
tc_init(&tc);
@@ -6025,6 +6154,9 @@ int main(void) {
opt_jump_cleanup_inverts_to_remove_jump_block();
opt_jump_cleanup_keeps_conditional_fallthrough_block();
opt_jump_cleanup_layout_deletes_fallthrough_branch();
+ opt_jump_opt_forwards_switch_targets();
+ opt_jump_opt_forwards_empty_fallthrough_chain();
+ opt_jump_opt_collapses_same_target_cond_branch();
opt_loop_tree_excludes_side_exit();
opt_loop_tree_nested_depths();
opt_loop_tree_does_not_mutate_cfg();