commit ca216b998dd7f60f3801867c9ae76b9e3ce9dded
parent c72fb5b7d435448fad6f4baf766a7d5670809fc8
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 18:00:15 -0700
Implement O2 pressure relief
Diffstat:
4 files changed, 246 insertions(+), 1 deletion(-)
diff --git a/doc/OPT.md b/doc/OPT.md
@@ -802,7 +802,7 @@ Implement in order:
9. [x] extended memory tracker for joins/path state
10. [x] `opt_dse`
11. [x] `opt_licm`
-12. [ ] `opt_pressure_relief`
+12. [x] `opt_pressure_relief`
13. [x] `opt_make_conventional_ssa`
14. [ ] `opt_ssa_combine`
15. [x] `opt_undo_ssa`
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -1446,6 +1446,8 @@ static void opt_run_o2_pipeline(OptImpl* o) {
opt_build_loop_tree(o->f);
opt_licm(o->f);
opt_verify(o->f, "o2-licm-ssa");
+ opt_pressure_relief(o->f);
+ opt_verify(o->f, "o2-pressure-relief-ssa");
opt_make_conventional_ssa(o->f);
opt_verify(o->f, "o2-conventional-ssa");
opt_undo_ssa(o->f);
diff --git a/src/opt/pass_o2.c b/src/opt/pass_o2.c
@@ -2411,6 +2411,121 @@ void opt_licm(Func* f) {
opt_rebuild_def_use(f);
}
+static int pressure_candidate(const Inst* in) {
+ if (!in || in->def == VAL_NONE || in->ndefs) return 0;
+ switch ((IROp)in->op) {
+ case IR_LOAD_IMM:
+ case IR_CONST_I:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int pressure_barrier(const Inst* in) {
+ if (!in) return 1;
+ if (o2_is_terminator(in)) return 1;
+ switch ((IROp)in->op) {
+ case IR_LOAD:
+ case IR_STORE:
+ case IR_LOAD_CONST:
+ case IR_AGG_COPY:
+ case IR_AGG_SET:
+ case IR_BITFIELD_LOAD:
+ case IR_BITFIELD_STORE:
+ case IR_CALL:
+ case IR_SCOPE_BEGIN:
+ case IR_SCOPE_ELSE:
+ case IR_SCOPE_END:
+ case IR_BREAK_TO:
+ case IR_CONTINUE_TO:
+ case IR_ALLOCA:
+ case IR_VA_START:
+ case IR_VA_ARG:
+ case IR_VA_END:
+ case IR_VA_COPY:
+ case IR_ATOMIC_LOAD:
+ case IR_ATOMIC_STORE:
+ case IR_ATOMIC_RMW:
+ case IR_ATOMIC_CAS:
+ case IR_FENCE:
+ case IR_ASM_BLOCK:
+ case IR_INTRINSIC:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int pressure_single_use(Func* f, Val v, OptUse* out) {
+ u32 n = 0;
+ OptUse one;
+ memset(&one, 0, sizeof one);
+ for (u32 u = f->opt_first_use_by_val[v]; u != OPT_USE_NONE;
+ u = f->opt_uses[u].next_for_val) {
+ one = f->opt_uses[u];
+ ++n;
+ if (n > 1u) return 0;
+ }
+ if (n != 1u) return 0;
+ *out = one;
+ return 1;
+}
+
+static int pressure_can_move_within_block(Func* f, u32 b, u32 from, u32 to) {
+ if (b >= f->nblocks || from >= to) return 0;
+ Block* bl = &f->blocks[b];
+ if (to > bl->ninsts) return 0;
+ for (u32 i = from + 1u; i < to; ++i)
+ if (pressure_barrier(&bl->insts[i])) return 0;
+ return 1;
+}
+
+static void pressure_move_before_use(Func* f, u32 b, u32 from, u32 use_inst) {
+ Block* bl = &f->blocks[b];
+ Inst moved = bl->insts[from];
+ for (u32 i = from + 1u; i < use_inst; ++i) bl->insts[i - 1u] = bl->insts[i];
+ bl->insts[use_inst - 1u] = moved;
+ dse_refresh_def_locations(f);
+}
+
+static int pressure_relief_once(Func* f) {
+ opt_rebuild_def_use(f);
+ for (Val v = 1; v < f->nvals; ++v) {
+ u32 db = f->val_def_block[v];
+ u32 di = f->val_def_inst[v];
+ if (db >= f->nblocks || di >= f->blocks[db].ninsts) continue;
+ Inst* def = &f->blocks[db].insts[di];
+ if (def->def != v || !pressure_candidate(def)) continue;
+
+ OptUse use;
+ if (!pressure_single_use(f, v, &use)) continue;
+ if (use.kind != OPT_USE_OPERAND) continue;
+ if (use.block != db) continue;
+ if (use.inst <= di + 1u) continue;
+ if (f->blocks[use.block].loop_depth > f->blocks[db].loop_depth) continue;
+ if (!pressure_can_move_within_block(f, db, di, use.inst)) continue;
+
+ pressure_move_before_use(f, db, di, use.inst);
+ opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
+ return 1;
+ }
+ return 0;
+}
+
+void opt_pressure_relief(Func* f) {
+ if (!f || f->opt_rewritten) return;
+ if (!f->opt_reg_ssa && f->npregs > 1) {
+ opt_rebuild_def_use(f);
+ return;
+ }
+
+ int changed = 0;
+ while (pressure_relief_once(f)) changed = 1;
+ if (changed) opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
+ opt_rebuild_def_use(f);
+}
+
void opt_cleanup(Func* f) {
if (!f) return;
opt_ssa_dce(f);
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -3679,6 +3679,130 @@ static void opt_licm_preserves_trapping_and_memory_ops(void) {
tc_fini(&tc);
}
+static void opt_pressure_relief_sinks_single_use_load_imm(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ Val x = add_val(f, tc.i32);
+ Val y = add_val(f, tc.i32);
+ Val imm = add_val(f, tc.i32);
+ Val last = add_val(f, tc.i32);
+ Val out = add_val(f, tc.i32);
+
+ emit_scalar_input(f, f->entry, x, tc.i32);
+ emit_scalar_input(f, f->entry, y, tc.i32);
+ emit_load_imm(f, f->entry, imm, tc.i32, 42);
+ for (u32 i = 0; i < 12; ++i) {
+ last = add_val(f, tc.i32);
+ emit_binop(f, f->entry, last, x, y, tc.i32);
+ }
+ emit_binop(f, f->entry, out, imm, last, tc.i32);
+ emit_ret_val(f, f->entry, out, tc.i32);
+
+ opt_build_cfg(f);
+ opt_build_loop_tree(f);
+ opt_pressure_relief(f);
+ opt_verify(f, "test-pressure-sink");
+
+ EXPECT(f->val_def_block[imm] == f->entry,
+ "pressure relief should keep same-block load_imm in entry");
+ EXPECT(f->val_def_inst[imm] + 1u == f->val_def_inst[out],
+ "pressure relief should sink load_imm immediately before its use");
+ tc_fini(&tc);
+}
+
+static void opt_pressure_relief_does_not_sink_into_loop(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ u32 entry = f->entry;
+ u32 header = ir_block_new(f);
+ u32 body = ir_block_new(f);
+ u32 exit = ir_block_new(f);
+ ir_note_emit(f, header);
+ ir_note_emit(f, body);
+ ir_note_emit(f, exit);
+
+ Val cond = add_val(f, tc.i32);
+ Val imm = add_val(f, tc.i32);
+ Val x = add_val(f, tc.i32);
+ Val out = add_val(f, tc.i32);
+ emit_scalar_input(f, entry, cond, tc.i32);
+ emit_load_imm(f, entry, imm, tc.i32, 7);
+ emit_scalar_input(f, entry, x, tc.i32);
+ emit_br_to(f, entry, header);
+ emit_cond_branch(f, header, cond, body, exit, tc.i32);
+ emit_binop(f, body, out, imm, x, tc.i32);
+ emit_br_to(f, body, header);
+ emit_ret_val(f, exit, x, tc.i32);
+
+ opt_build_cfg(f);
+ opt_build_loop_tree(f);
+ opt_pressure_relief(f);
+ opt_verify(f, "test-pressure-no-loop-sink");
+
+ EXPECT(f->val_def_block[imm] == entry,
+ "pressure relief must not sink an outer def into a loop block");
+ tc_fini(&tc);
+}
+
+static void opt_pressure_relief_preserves_multi_use_constant(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ Val imm = add_val(f, tc.i32);
+ Val x = add_val(f, tc.i32);
+ Val y = add_val(f, tc.i32);
+ Val a = add_val(f, tc.i32);
+ Val b = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, imm, tc.i32, 9);
+ u32 original_inst = f->val_def_inst[imm];
+ emit_scalar_input(f, f->entry, x, tc.i32);
+ emit_scalar_input(f, f->entry, y, tc.i32);
+ emit_binop(f, f->entry, a, imm, x, tc.i32);
+ emit_binop(f, f->entry, b, imm, y, tc.i32);
+ emit_ret_val(f, f->entry, b, tc.i32);
+
+ opt_build_cfg(f);
+ opt_build_loop_tree(f);
+ opt_pressure_relief(f);
+ opt_verify(f, "test-pressure-multi-use");
+
+ EXPECT(f->val_def_inst[imm] == original_inst,
+ "pressure relief must leave multi-use constants in place");
+ EXPECT(count_uses_of(f, imm) == 2,
+ "multi-use constant should still have both uses");
+ tc_fini(&tc);
+}
+
+static void opt_pressure_relief_does_not_cross_memory_ops(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN);
+ Val imm = add_val(f, tc.i32);
+ Val loaded = add_val(f, tc.i32);
+ Val x = add_val(f, tc.i32);
+ Val out = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, imm, tc.i32, 11);
+ u32 original_inst = f->val_def_inst[imm];
+ emit_load_local(f, f->entry, loaded, fs, tc.i32, 0);
+ emit_scalar_input(f, f->entry, x, tc.i32);
+ emit_binop(f, f->entry, out, imm, x, tc.i32);
+ emit_ret_val(f, f->entry, out, tc.i32);
+
+ opt_build_cfg(f);
+ opt_build_loop_tree(f);
+ opt_pressure_relief(f);
+ opt_verify(f, "test-pressure-memory-barrier");
+
+ EXPECT(f->val_def_inst[imm] == original_inst,
+ "pressure relief must not sink constants across memory operations");
+ EXPECT(f->val_def_inst[loaded] == original_inst + 1u,
+ "memory operation should stay after the original constant");
+ tc_fini(&tc);
+}
+
static void opt_regalloc_priority(void) {
TestCtx tc;
tc_init(&tc);
@@ -5796,6 +5920,10 @@ int main(void) {
opt_loop_tree_does_not_mutate_cfg();
opt_licm_hoists_safe_invariant_to_preheader();
opt_licm_preserves_trapping_and_memory_ops();
+ opt_pressure_relief_sinks_single_use_load_imm();
+ opt_pressure_relief_does_not_sink_into_loop();
+ opt_pressure_relief_preserves_multi_use_constant();
+ opt_pressure_relief_does_not_cross_memory_ops();
opt_liveness_branch();
opt_block_liveness_phase1();
opt_live_ranges_phase2();