kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit ca216b998dd7f60f3801867c9ae76b9e3ce9dded
parent c72fb5b7d435448fad6f4baf766a7d5670809fc8
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 21 May 2026 18:00:15 -0700

Implement O2 pressure relief

Diffstat:
Mdoc/OPT.md | 2+-
Msrc/opt/opt.c | 2++
Msrc/opt/pass_o2.c | 115+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/opt/opt_test.c | 128+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 246 insertions(+), 1 deletion(-)

diff --git a/doc/OPT.md b/doc/OPT.md @@ -802,7 +802,7 @@ Implement in order: 9. [x] extended memory tracker for joins/path state 10. [x] `opt_dse` 11. [x] `opt_licm` -12. [ ] `opt_pressure_relief` +12. [x] `opt_pressure_relief` 13. [x] `opt_make_conventional_ssa` 14. [ ] `opt_ssa_combine` 15. [x] `opt_undo_ssa` diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -1446,6 +1446,8 @@ static void opt_run_o2_pipeline(OptImpl* o) { opt_build_loop_tree(o->f); opt_licm(o->f); opt_verify(o->f, "o2-licm-ssa"); + opt_pressure_relief(o->f); + opt_verify(o->f, "o2-pressure-relief-ssa"); opt_make_conventional_ssa(o->f); opt_verify(o->f, "o2-conventional-ssa"); opt_undo_ssa(o->f); diff --git a/src/opt/pass_o2.c b/src/opt/pass_o2.c @@ -2411,6 +2411,121 @@ void opt_licm(Func* f) { opt_rebuild_def_use(f); } +static int pressure_candidate(const Inst* in) { + if (!in || in->def == VAL_NONE || in->ndefs) return 0; + switch ((IROp)in->op) { + case IR_LOAD_IMM: + case IR_CONST_I: + return 1; + default: + return 0; + } +} + +static int pressure_barrier(const Inst* in) { + if (!in) return 1; + if (o2_is_terminator(in)) return 1; + switch ((IROp)in->op) { + case IR_LOAD: + case IR_STORE: + case IR_LOAD_CONST: + case IR_AGG_COPY: + case IR_AGG_SET: + case IR_BITFIELD_LOAD: + case IR_BITFIELD_STORE: + case IR_CALL: + case IR_SCOPE_BEGIN: + case IR_SCOPE_ELSE: + case IR_SCOPE_END: + case IR_BREAK_TO: + case IR_CONTINUE_TO: + case IR_ALLOCA: + case IR_VA_START: + case IR_VA_ARG: + case IR_VA_END: + case IR_VA_COPY: + case IR_ATOMIC_LOAD: + case IR_ATOMIC_STORE: + case IR_ATOMIC_RMW: + case IR_ATOMIC_CAS: + case IR_FENCE: + case IR_ASM_BLOCK: + case IR_INTRINSIC: + return 1; + default: + return 0; + } +} + +static int pressure_single_use(Func* f, Val v, OptUse* out) { + u32 n = 0; + OptUse one; + memset(&one, 0, sizeof one); + for (u32 u = f->opt_first_use_by_val[v]; u != OPT_USE_NONE; + u = f->opt_uses[u].next_for_val) { + one = f->opt_uses[u]; + ++n; + if (n > 1u) return 0; + } + if (n != 1u) return 0; + *out = one; + return 1; +} + +static int pressure_can_move_within_block(Func* f, u32 b, u32 from, u32 to) { + if (b >= f->nblocks || from >= to) return 0; + Block* bl = &f->blocks[b]; + if (to > bl->ninsts) return 0; + for (u32 i = from + 1u; i < to; ++i) + if (pressure_barrier(&bl->insts[i])) return 0; + return 1; +} + +static void pressure_move_before_use(Func* f, u32 b, u32 from, u32 use_inst) { + Block* bl = &f->blocks[b]; + Inst moved = bl->insts[from]; + for (u32 i = from + 1u; i < use_inst; ++i) bl->insts[i - 1u] = bl->insts[i]; + bl->insts[use_inst - 1u] = moved; + dse_refresh_def_locations(f); +} + +static int pressure_relief_once(Func* f) { + opt_rebuild_def_use(f); + for (Val v = 1; v < f->nvals; ++v) { + u32 db = f->val_def_block[v]; + u32 di = f->val_def_inst[v]; + if (db >= f->nblocks || di >= f->blocks[db].ninsts) continue; + Inst* def = &f->blocks[db].insts[di]; + if (def->def != v || !pressure_candidate(def)) continue; + + OptUse use; + if (!pressure_single_use(f, v, &use)) continue; + if (use.kind != OPT_USE_OPERAND) continue; + if (use.block != db) continue; + if (use.inst <= di + 1u) continue; + if (f->blocks[use.block].loop_depth > f->blocks[db].loop_depth) continue; + if (!pressure_can_move_within_block(f, db, di, use.inst)) continue; + + pressure_move_before_use(f, db, di, use.inst); + opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE); + return 1; + } + return 0; +} + +void opt_pressure_relief(Func* f) { + if (!f || f->opt_rewritten) return; + if (!f->opt_reg_ssa && f->npregs > 1) { + opt_rebuild_def_use(f); + return; + } + + int changed = 0; + while (pressure_relief_once(f)) changed = 1; + if (changed) opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE); + opt_rebuild_def_use(f); +} + void opt_cleanup(Func* f) { if (!f) return; opt_ssa_dce(f); diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -3679,6 +3679,130 @@ static void opt_licm_preserves_trapping_and_memory_ops(void) { tc_fini(&tc); } +static void opt_pressure_relief_sinks_single_use_load_imm(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + Val x = add_val(f, tc.i32); + Val y = add_val(f, tc.i32); + Val imm = add_val(f, tc.i32); + Val last = add_val(f, tc.i32); + Val out = add_val(f, tc.i32); + + emit_scalar_input(f, f->entry, x, tc.i32); + emit_scalar_input(f, f->entry, y, tc.i32); + emit_load_imm(f, f->entry, imm, tc.i32, 42); + for (u32 i = 0; i < 12; ++i) { + last = add_val(f, tc.i32); + emit_binop(f, f->entry, last, x, y, tc.i32); + } + emit_binop(f, f->entry, out, imm, last, tc.i32); + emit_ret_val(f, f->entry, out, tc.i32); + + opt_build_cfg(f); + opt_build_loop_tree(f); + opt_pressure_relief(f); + opt_verify(f, "test-pressure-sink"); + + EXPECT(f->val_def_block[imm] == f->entry, + "pressure relief should keep same-block load_imm in entry"); + EXPECT(f->val_def_inst[imm] + 1u == f->val_def_inst[out], + "pressure relief should sink load_imm immediately before its use"); + tc_fini(&tc); +} + +static void opt_pressure_relief_does_not_sink_into_loop(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + u32 entry = f->entry; + u32 header = ir_block_new(f); + u32 body = ir_block_new(f); + u32 exit = ir_block_new(f); + ir_note_emit(f, header); + ir_note_emit(f, body); + ir_note_emit(f, exit); + + Val cond = add_val(f, tc.i32); + Val imm = add_val(f, tc.i32); + Val x = add_val(f, tc.i32); + Val out = add_val(f, tc.i32); + emit_scalar_input(f, entry, cond, tc.i32); + emit_load_imm(f, entry, imm, tc.i32, 7); + emit_scalar_input(f, entry, x, tc.i32); + emit_br_to(f, entry, header); + emit_cond_branch(f, header, cond, body, exit, tc.i32); + emit_binop(f, body, out, imm, x, tc.i32); + emit_br_to(f, body, header); + emit_ret_val(f, exit, x, tc.i32); + + opt_build_cfg(f); + opt_build_loop_tree(f); + opt_pressure_relief(f); + opt_verify(f, "test-pressure-no-loop-sink"); + + EXPECT(f->val_def_block[imm] == entry, + "pressure relief must not sink an outer def into a loop block"); + tc_fini(&tc); +} + +static void opt_pressure_relief_preserves_multi_use_constant(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + Val imm = add_val(f, tc.i32); + Val x = add_val(f, tc.i32); + Val y = add_val(f, tc.i32); + Val a = add_val(f, tc.i32); + Val b = add_val(f, tc.i32); + emit_load_imm(f, f->entry, imm, tc.i32, 9); + u32 original_inst = f->val_def_inst[imm]; + emit_scalar_input(f, f->entry, x, tc.i32); + emit_scalar_input(f, f->entry, y, tc.i32); + emit_binop(f, f->entry, a, imm, x, tc.i32); + emit_binop(f, f->entry, b, imm, y, tc.i32); + emit_ret_val(f, f->entry, b, tc.i32); + + opt_build_cfg(f); + opt_build_loop_tree(f); + opt_pressure_relief(f); + opt_verify(f, "test-pressure-multi-use"); + + EXPECT(f->val_def_inst[imm] == original_inst, + "pressure relief must leave multi-use constants in place"); + EXPECT(count_uses_of(f, imm) == 2, + "multi-use constant should still have both uses"); + tc_fini(&tc); +} + +static void opt_pressure_relief_does_not_cross_memory_ops(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); + Val imm = add_val(f, tc.i32); + Val loaded = add_val(f, tc.i32); + Val x = add_val(f, tc.i32); + Val out = add_val(f, tc.i32); + emit_load_imm(f, f->entry, imm, tc.i32, 11); + u32 original_inst = f->val_def_inst[imm]; + emit_load_local(f, f->entry, loaded, fs, tc.i32, 0); + emit_scalar_input(f, f->entry, x, tc.i32); + emit_binop(f, f->entry, out, imm, x, tc.i32); + emit_ret_val(f, f->entry, out, tc.i32); + + opt_build_cfg(f); + opt_build_loop_tree(f); + opt_pressure_relief(f); + opt_verify(f, "test-pressure-memory-barrier"); + + EXPECT(f->val_def_inst[imm] == original_inst, + "pressure relief must not sink constants across memory operations"); + EXPECT(f->val_def_inst[loaded] == original_inst + 1u, + "memory operation should stay after the original constant"); + tc_fini(&tc); +} + static void opt_regalloc_priority(void) { TestCtx tc; tc_init(&tc); @@ -5796,6 +5920,10 @@ int main(void) { opt_loop_tree_does_not_mutate_cfg(); opt_licm_hoists_safe_invariant_to_preheader(); opt_licm_preserves_trapping_and_memory_ops(); + opt_pressure_relief_sinks_single_use_load_imm(); + opt_pressure_relief_does_not_sink_into_loop(); + opt_pressure_relief_preserves_multi_use_constant(); + opt_pressure_relief_does_not_cross_memory_ops(); opt_liveness_branch(); opt_block_liveness_phase1(); opt_live_ranges_phase2();