kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit eb3d74591d044df173692292c8ccfaf49a7f41e9
parent 9903260e5965896db10241d23abfcd72b7c446ce
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 21 May 2026 21:22:31 -0700

Implement O2 SSA combine

Diffstat:
Mdoc/OPT.md | 13+++++++------
Msrc/opt/opt.c | 2++
Msrc/opt/pass_o2.c | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/opt/opt_test.c | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 152 insertions(+), 6 deletions(-)

diff --git a/doc/OPT.md b/doc/OPT.md @@ -455,6 +455,8 @@ pressure_relief verify(o2-pressure-relief-ssa) make_conventional_ssa verify(o2-conventional-ssa) +ssa_combine +verify(o2-ssa-combine) undo_ssa copy_cleanup verify(o2-undo-copy-cleanup) @@ -667,10 +669,9 @@ Temporary defensive/pessimizing checks to lift: are retargeted when CFG cleanup forwards or removes labeled trampoline blocks. -The next implementation work should close the remaining Phase D gaps: -`opt_ssa_combine` first, then full O2 jump optimization beyond the shared O1 -jump cleanup. After that, Phase E inlining and cleanup become the next major -O2 feature. +The next implementation work should close the remaining Phase D gap: full O2 +jump optimization beyond the shared O1 jump cleanup. After that, Phase E +inlining and cleanup become the next major O2 feature. --- @@ -820,7 +821,7 @@ Implement in order: 11. [x] `opt_licm` 12. [x] `opt_pressure_relief` 13. [x] `opt_make_conventional_ssa` -14. [ ] `opt_ssa_combine` +14. [x] `opt_ssa_combine` 15. [x] `opt_undo_ssa` 16. [ ] full O2 `opt_jump_opt` beyond the shared O1 jump cleanup @@ -963,7 +964,7 @@ Remaining Phase D exit criteria: removed from the generated code. - [x] LICM and pressure relief have focused pass-local tests and are in the default O2 schedule. -- [ ] `opt_ssa_combine` has focused red-green tests before it enters the +- [x] `opt_ssa_combine` has focused red-green tests before it enters the default O2 schedule. - [ ] Full O2 jump optimization has focused red-green tests before it enters the default O2 schedule. diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -1450,6 +1450,8 @@ static void opt_run_o2_pipeline(OptImpl* o) { opt_verify(o->f, "o2-pressure-relief-ssa"); opt_make_conventional_ssa(o->f); opt_verify(o->f, "o2-conventional-ssa"); + opt_ssa_combine(o->f); + opt_verify(o->f, "o2-ssa-combine"); opt_undo_ssa(o->f); opt_copy_cleanup(o->f); opt_verify(o->f, "o2-undo-copy-cleanup"); diff --git a/src/opt/pass_o2.c b/src/opt/pass_o2.c @@ -2596,6 +2596,75 @@ void opt_pressure_relief(Func* f) { opt_rebuild_def_use(f); } +static int ssa_combine_cmp_def(Func* f, Val v, Inst** out) { + Inst* def = NULL; + if (!val_def_inst(f, v, &def)) return 0; + if ((IROp)def->op != IR_CMP || def->nopnds < 3) return 0; + *out = def; + return 1; +} + +static int ssa_combine_fold_cmp_branch(Func* f) { + int changed = 0; + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + if (!bl->ninsts || bl->nsucc < 2) continue; + Inst* br = &bl->insts[bl->ninsts - 1u]; + if ((IROp)br->op != IR_CONDBR || br->nopnds < 1) continue; + if (br->opnds[0].kind != OPK_REG) continue; + Inst* cmp = NULL; + if (!ssa_combine_cmp_def(f, (Val)br->opnds[0].v.reg, &cmp)) continue; + + Operand* opnds = arena_array(f->arena, Operand, 2); + opnds[0] = cmp->opnds[1]; + opnds[1] = cmp->opnds[2]; + br->op = IR_CMP_BRANCH; + br->opnds = opnds; + br->nopnds = 2; + br->extra.imm = cmp->extra.imm; + changed = 1; + } + return changed; +} + +static int ssa_combine_fold_addr_uses(Func* f) { + int changed = 0; + opt_rebuild_def_use(f); + u32 nvals = f->nvals; + for (Val v = 1; v < nvals; ++v) { + Inst* def = NULL; + if (!addr_def_inst(f, v, &def)) continue; + Operand addr = def->opnds[1]; + if (addr.kind != OPK_LOCAL) continue; + + for (u32 u = f->opt_first_use_by_val[v]; u != OPT_USE_NONE; + u = f->opt_uses[u].next_for_val) { + OptUse* use = &f->opt_uses[u]; + if (!addr_use_foldable(f, use)) continue; + Inst* mem = &f->blocks[use->block].insts[use->inst]; + Operand folded = addr; + folded.type = mem->extra.mem.type ? mem->extra.mem.type : addr.type; + *use->operand = folded; + changed = 1; + } + } + return changed; +} + +void opt_ssa_combine(Func* f) { + if (!f || f->opt_rewritten) return; + if (!f->opt_reg_ssa && f->npregs > 1) { + opt_rebuild_def_use(f); + return; + } + + opt_rebuild_def_use(f); + int changed = ssa_combine_fold_cmp_branch(f); + changed |= ssa_combine_fold_addr_uses(f); + if (changed) opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE); + opt_rebuild_def_use(f); +} + void opt_cleanup(Func* f) { if (!f) return; opt_ssa_dce(f); diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -490,6 +490,17 @@ static void emit_cond_branch(Func* f, u32 b, Val cond, u32 taken, f->blocks[b].nsucc = 2; } +static void emit_raw_condbr(Func* f, u32 b, Val cond, u32 taken, + u32 fallthrough, CfreeCgTypeId ty) { + Inst* in = ir_emit(f, b, IR_CONDBR); + in->opnds = arena_array(f->arena, Operand, 1); + in->opnds[0] = op_reg_(cond, ty); + in->nopnds = 1; + f->blocks[b].succ[0] = taken; + f->blocks[b].succ[1] = fallthrough; + f->blocks[b].nsucc = 2; +} + static int bytes_contains(const unsigned char* data, size_t len, const char* needle) { size_t nlen = strlen(needle); @@ -2571,6 +2582,67 @@ static void opt_addr_xform_preserves_volatile_and_globals(void) { tc_fini(&tc); } +static void opt_ssa_combine_fuses_cmp_condbr(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + u32 entry = f->entry; + u32 taken = ir_block_new(f); + u32 fallthrough = ir_block_new(f); + ir_note_emit(f, taken); + ir_note_emit(f, fallthrough); + + Val a = add_val(f, tc.i32); + Val b = add_val(f, tc.i32); + Val cmp = add_val(f, tc.i32); + emit_scalar_input(f, entry, a, tc.i32); + emit_scalar_input(f, entry, b, tc.i32); + emit_cmp(f, entry, cmp, a, b, tc.i32, CMP_LT_S); + emit_raw_condbr(f, entry, cmp, taken, fallthrough, tc.i32); + emit_ret_val(f, taken, a, tc.i32); + emit_ret_val(f, fallthrough, b, tc.i32); + + opt_build_cfg(f); + opt_ssa_combine(f); + opt_verify(f, "test-ssa-combine-cmp-branch"); + + Inst* br = &f->blocks[entry].insts[f->blocks[entry].ninsts - 1u]; + EXPECT((IROp)br->op == IR_CMP_BRANCH && br->extra.imm == CMP_LT_S, + "ssa combine should fuse cmp-fed condbr into cmp_branch"); + EXPECT(br->nopnds == 2 && br->opnds[0].v.reg == (Reg)a && + br->opnds[1].v.reg == (Reg)b, + "fused cmp_branch should use the original cmp operands"); + tc_fini(&tc); +} + +static void opt_ssa_combine_folds_partial_local_addr_uses(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); + CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0); + Val addr = add_val(f, ptr_ty); + Val out = add_val(f, tc.i32); + Val src = add_val(f, tc.i32); + emit_addr_of_local(f, f->entry, addr, fs, ptr_ty, tc.i32); + emit_load_indirect(f, f->entry, out, addr, tc.i32, 0); + emit_scalar_input(f, f->entry, src, tc.i32); + emit_store_indirect(f, f->entry, addr, src, tc.i32, MF_VOLATILE); + emit_ret_val(f, f->entry, out, tc.i32); + + opt_build_cfg(f); + opt_ssa_combine(f); + opt_verify(f, "test-ssa-combine-addr"); + + Inst* load = &f->blocks[f->entry].insts[1]; + Inst* store = &f->blocks[f->entry].insts[3]; + EXPECT((IROp)load->op == IR_LOAD && load->opnds[1].kind == OPK_LOCAL, + "ssa combine should fold foldable local address load uses"); + EXPECT((IROp)store->op == IR_STORE && store->opnds[0].kind == OPK_INDIRECT, + "ssa combine should preserve non-foldable address uses"); + tc_fini(&tc); +} + static void opt_simplify_local_rewrites_integer_identities(void) { TestCtx tc; tc_init(&tc); @@ -5922,6 +5994,8 @@ int main(void) { opt_block_cloning_skips_loop_backedges(); opt_addr_xform_folds_local_addr_into_memory_operand(); opt_addr_xform_preserves_volatile_and_globals(); + opt_ssa_combine_fuses_cmp_condbr(); + opt_ssa_combine_folds_partial_local_addr_uses(); opt_simplify_local_rewrites_integer_identities(); opt_simplify_local_preserves_unsafe_cases(); opt_simplify_rewrites_ssa_nested_identities();