commit eb3d74591d044df173692292c8ccfaf49a7f41e9
parent 9903260e5965896db10241d23abfcd72b7c446ce
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 21:22:31 -0700
Implement O2 SSA combine
Diffstat:
4 files changed, 152 insertions(+), 6 deletions(-)
diff --git a/doc/OPT.md b/doc/OPT.md
@@ -455,6 +455,8 @@ pressure_relief
verify(o2-pressure-relief-ssa)
make_conventional_ssa
verify(o2-conventional-ssa)
+ssa_combine
+verify(o2-ssa-combine)
undo_ssa
copy_cleanup
verify(o2-undo-copy-cleanup)
@@ -667,10 +669,9 @@ Temporary defensive/pessimizing checks to lift:
are retargeted when CFG cleanup forwards or removes labeled trampoline
blocks.
-The next implementation work should close the remaining Phase D gaps:
-`opt_ssa_combine` first, then full O2 jump optimization beyond the shared O1
-jump cleanup. After that, Phase E inlining and cleanup become the next major
-O2 feature.
+The next implementation work should close the remaining Phase D gap: full O2
+jump optimization beyond the shared O1 jump cleanup. After that, Phase E
+inlining and cleanup become the next major O2 feature.
---
@@ -820,7 +821,7 @@ Implement in order:
11. [x] `opt_licm`
12. [x] `opt_pressure_relief`
13. [x] `opt_make_conventional_ssa`
-14. [ ] `opt_ssa_combine`
+14. [x] `opt_ssa_combine`
15. [x] `opt_undo_ssa`
16. [ ] full O2 `opt_jump_opt` beyond the shared O1 jump cleanup
@@ -963,7 +964,7 @@ Remaining Phase D exit criteria:
removed from the generated code.
- [x] LICM and pressure relief have focused pass-local tests and are in the
default O2 schedule.
-- [ ] `opt_ssa_combine` has focused red-green tests before it enters the
+- [x] `opt_ssa_combine` has focused red-green tests before it enters the
default O2 schedule.
- [ ] Full O2 jump optimization has focused red-green tests before it enters
the default O2 schedule.
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -1450,6 +1450,8 @@ static void opt_run_o2_pipeline(OptImpl* o) {
opt_verify(o->f, "o2-pressure-relief-ssa");
opt_make_conventional_ssa(o->f);
opt_verify(o->f, "o2-conventional-ssa");
+ opt_ssa_combine(o->f);
+ opt_verify(o->f, "o2-ssa-combine");
opt_undo_ssa(o->f);
opt_copy_cleanup(o->f);
opt_verify(o->f, "o2-undo-copy-cleanup");
diff --git a/src/opt/pass_o2.c b/src/opt/pass_o2.c
@@ -2596,6 +2596,75 @@ void opt_pressure_relief(Func* f) {
opt_rebuild_def_use(f);
}
+static int ssa_combine_cmp_def(Func* f, Val v, Inst** out) {
+ Inst* def = NULL;
+ if (!val_def_inst(f, v, &def)) return 0;
+ if ((IROp)def->op != IR_CMP || def->nopnds < 3) return 0;
+ *out = def;
+ return 1;
+}
+
+static int ssa_combine_fold_cmp_branch(Func* f) {
+ int changed = 0;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ if (!bl->ninsts || bl->nsucc < 2) continue;
+ Inst* br = &bl->insts[bl->ninsts - 1u];
+ if ((IROp)br->op != IR_CONDBR || br->nopnds < 1) continue;
+ if (br->opnds[0].kind != OPK_REG) continue;
+ Inst* cmp = NULL;
+ if (!ssa_combine_cmp_def(f, (Val)br->opnds[0].v.reg, &cmp)) continue;
+
+ Operand* opnds = arena_array(f->arena, Operand, 2);
+ opnds[0] = cmp->opnds[1];
+ opnds[1] = cmp->opnds[2];
+ br->op = IR_CMP_BRANCH;
+ br->opnds = opnds;
+ br->nopnds = 2;
+ br->extra.imm = cmp->extra.imm;
+ changed = 1;
+ }
+ return changed;
+}
+
+static int ssa_combine_fold_addr_uses(Func* f) {
+ int changed = 0;
+ opt_rebuild_def_use(f);
+ u32 nvals = f->nvals;
+ for (Val v = 1; v < nvals; ++v) {
+ Inst* def = NULL;
+ if (!addr_def_inst(f, v, &def)) continue;
+ Operand addr = def->opnds[1];
+ if (addr.kind != OPK_LOCAL) continue;
+
+ for (u32 u = f->opt_first_use_by_val[v]; u != OPT_USE_NONE;
+ u = f->opt_uses[u].next_for_val) {
+ OptUse* use = &f->opt_uses[u];
+ if (!addr_use_foldable(f, use)) continue;
+ Inst* mem = &f->blocks[use->block].insts[use->inst];
+ Operand folded = addr;
+ folded.type = mem->extra.mem.type ? mem->extra.mem.type : addr.type;
+ *use->operand = folded;
+ changed = 1;
+ }
+ }
+ return changed;
+}
+
+void opt_ssa_combine(Func* f) {
+ if (!f || f->opt_rewritten) return;
+ if (!f->opt_reg_ssa && f->npregs > 1) {
+ opt_rebuild_def_use(f);
+ return;
+ }
+
+ opt_rebuild_def_use(f);
+ int changed = ssa_combine_fold_cmp_branch(f);
+ changed |= ssa_combine_fold_addr_uses(f);
+ if (changed) opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
+ opt_rebuild_def_use(f);
+}
+
void opt_cleanup(Func* f) {
if (!f) return;
opt_ssa_dce(f);
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -490,6 +490,17 @@ static void emit_cond_branch(Func* f, u32 b, Val cond, u32 taken,
f->blocks[b].nsucc = 2;
}
+static void emit_raw_condbr(Func* f, u32 b, Val cond, u32 taken,
+ u32 fallthrough, CfreeCgTypeId ty) {
+ Inst* in = ir_emit(f, b, IR_CONDBR);
+ in->opnds = arena_array(f->arena, Operand, 1);
+ in->opnds[0] = op_reg_(cond, ty);
+ in->nopnds = 1;
+ f->blocks[b].succ[0] = taken;
+ f->blocks[b].succ[1] = fallthrough;
+ f->blocks[b].nsucc = 2;
+}
+
static int bytes_contains(const unsigned char* data, size_t len,
const char* needle) {
size_t nlen = strlen(needle);
@@ -2571,6 +2582,67 @@ static void opt_addr_xform_preserves_volatile_and_globals(void) {
tc_fini(&tc);
}
+static void opt_ssa_combine_fuses_cmp_condbr(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ u32 entry = f->entry;
+ u32 taken = ir_block_new(f);
+ u32 fallthrough = ir_block_new(f);
+ ir_note_emit(f, taken);
+ ir_note_emit(f, fallthrough);
+
+ Val a = add_val(f, tc.i32);
+ Val b = add_val(f, tc.i32);
+ Val cmp = add_val(f, tc.i32);
+ emit_scalar_input(f, entry, a, tc.i32);
+ emit_scalar_input(f, entry, b, tc.i32);
+ emit_cmp(f, entry, cmp, a, b, tc.i32, CMP_LT_S);
+ emit_raw_condbr(f, entry, cmp, taken, fallthrough, tc.i32);
+ emit_ret_val(f, taken, a, tc.i32);
+ emit_ret_val(f, fallthrough, b, tc.i32);
+
+ opt_build_cfg(f);
+ opt_ssa_combine(f);
+ opt_verify(f, "test-ssa-combine-cmp-branch");
+
+ Inst* br = &f->blocks[entry].insts[f->blocks[entry].ninsts - 1u];
+ EXPECT((IROp)br->op == IR_CMP_BRANCH && br->extra.imm == CMP_LT_S,
+ "ssa combine should fuse cmp-fed condbr into cmp_branch");
+ EXPECT(br->nopnds == 2 && br->opnds[0].v.reg == (Reg)a &&
+ br->opnds[1].v.reg == (Reg)b,
+ "fused cmp_branch should use the original cmp operands");
+ tc_fini(&tc);
+}
+
+static void opt_ssa_combine_folds_partial_local_addr_uses(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN);
+ CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0);
+ Val addr = add_val(f, ptr_ty);
+ Val out = add_val(f, tc.i32);
+ Val src = add_val(f, tc.i32);
+ emit_addr_of_local(f, f->entry, addr, fs, ptr_ty, tc.i32);
+ emit_load_indirect(f, f->entry, out, addr, tc.i32, 0);
+ emit_scalar_input(f, f->entry, src, tc.i32);
+ emit_store_indirect(f, f->entry, addr, src, tc.i32, MF_VOLATILE);
+ emit_ret_val(f, f->entry, out, tc.i32);
+
+ opt_build_cfg(f);
+ opt_ssa_combine(f);
+ opt_verify(f, "test-ssa-combine-addr");
+
+ Inst* load = &f->blocks[f->entry].insts[1];
+ Inst* store = &f->blocks[f->entry].insts[3];
+ EXPECT((IROp)load->op == IR_LOAD && load->opnds[1].kind == OPK_LOCAL,
+ "ssa combine should fold foldable local address load uses");
+ EXPECT((IROp)store->op == IR_STORE && store->opnds[0].kind == OPK_INDIRECT,
+ "ssa combine should preserve non-foldable address uses");
+ tc_fini(&tc);
+}
+
static void opt_simplify_local_rewrites_integer_identities(void) {
TestCtx tc;
tc_init(&tc);
@@ -5922,6 +5994,8 @@ int main(void) {
opt_block_cloning_skips_loop_backedges();
opt_addr_xform_folds_local_addr_into_memory_operand();
opt_addr_xform_preserves_volatile_and_globals();
+ opt_ssa_combine_fuses_cmp_condbr();
+ opt_ssa_combine_folds_partial_local_addr_uses();
opt_simplify_local_rewrites_integer_identities();
opt_simplify_local_preserves_unsafe_cases();
opt_simplify_rewrites_ssa_nested_identities();