kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit f37c2ae1c3c2cb762a390861aaa6994d830f66ef
parent 8ac1385246eaad90b7b36d075d58086f995dc88d
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 14 May 2026 13:14:39 -0700

Tighten O1 combine and DCE

Diffstat:
Mdoc/OPT1.md | 18++++++++++++++----
Msrc/opt/pass_lower.c | 316+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mtest/opt/opt_test.c | 172+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 494 insertions(+), 12 deletions(-)

diff --git a/doc/OPT1.md b/doc/OPT1.md @@ -96,9 +96,19 @@ substitute a behaviorally similar shortcut without updating both documents. ## Remaining Todos -- Finish the general safe single-use fold portion of `opt_combine`. -- Expand post-rewrite DCE into true dead-definition elimination on hard - registers (requires a post-rewrite liveness pass). Pre-rewrite DCE is - already precise via `opt_dead_def_elim`. +- Finish the general safe single-use fold portion of `opt_combine`: + - [ ] Substitute one-use physical copies into users when the rewritten IR + form is guaranteed target-legal. + - [ ] Fold one-use integer `load_imm` defs into `binop`, `cmp`, and + `cmp_branch` operands accepted by `CGTarget`. + - [ ] Collapse physical copy chains before post-RA DCE. + - [ ] Fold redundant `IR_CONVERT` chains, starting with identical + conversion pairs. + - [ ] Keep branch-shape cleanup out of `-O1` unless it is purely local and + does not require CFG/jump optimization. +- Continue tightening post-rewrite DCE: + - [ ] Model call clobbers and hard-reg call arguments explicitly. + - [ ] Add tests for dead caller-saved defs before calls, live call returns, + and hard-reg call arguments. - Add full CG corpus `-O1` validation runs for x64 and RV64 (currently validated only on aa64). diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c @@ -235,6 +235,10 @@ static void copy_bits(u64* dst, const u64* src, u32 words) { for (u32 w = 0; w < words; ++w) dst[w] = src[w]; } +static int mem_observable(const MemAccess* m) { + return (m->flags & (MF_VOLATILE | MF_ATOMIC)) != 0; +} + void opt_machinize(Func* f, CGTarget* target) { f->opt_target = target->c->target; f->opt_has_target = 1; @@ -786,6 +790,8 @@ static void rewrite_func(Func* f) { f->opt_rewritten = 1; } +static int inst_has_side_effect(Func* f, const Inst* in); + static int all_defs_dead(Func* f, Inst* in, u64* live) { (void)f; if (in->def != VAL_NONE && bit_has(live, in->def)) return 0; @@ -794,8 +800,6 @@ static int all_defs_dead(Func* f, Inst* in, u64* live) { return 1; } -static int side_effecting(IROp op); - void opt_dead_def_elim(Func* f) { u32 words = f->opt_live_words; for (u32 b = 0; b < f->nblocks; ++b) { @@ -808,7 +812,7 @@ void opt_dead_def_elim(Func* f) { for (u32 ri = bl->ninsts; ri > 0; --ri) { u32 i = ri - 1u; Inst* in = &bl->insts[i]; - if (!side_effecting((IROp)in->op) && all_defs_dead(f, in, live)) { + if (!inst_has_side_effect(f, in) && all_defs_dead(f, in, live)) { continue; } new_insts[w++] = *in; @@ -881,6 +885,67 @@ void opt_regalloc(Func* f, int allow_live_range_split) { rewrite_func(f); } +static int same_reg_operand(const Operand* a, const Operand* b) { + return a->kind == OPK_REG && b->kind == OPK_REG && a->cls == b->cls && + a->v.reg == b->v.reg; +} + +static int frame_slot_is_spill(Func* f, FrameSlot fs) { + if (fs == FRAME_SLOT_NONE || fs > f->nframe_slots) return 0; + return f->frame_slots[fs - 1u].kind == FS_SPILL; +} + +static int spill_local_slot(Func* f, const Operand* addr, const MemAccess* mem, + FrameSlot* out) { + if (!addr || addr->kind != OPK_LOCAL) return 0; + if (mem_observable(mem)) return 0; + if (mem->alias.kind != ALIAS_LOCAL) return 0; + if (mem->alias.v.local_id != (i32)addr->v.frame_slot) return 0; + if (!frame_slot_is_spill(f, addr->v.frame_slot)) return 0; + *out = addr->v.frame_slot; + return 1; +} + +static int same_spill_access(Func* f, const Inst* a, const Inst* b, + FrameSlot* slot_out) { + FrameSlot as = FRAME_SLOT_NONE; + FrameSlot bs = FRAME_SLOT_NONE; + if (!spill_local_slot(f, &a->opnds[0], &a->extra.mem, &as)) return 0; + if (!spill_local_slot(f, &b->opnds[0], &b->extra.mem, &bs)) return 0; + if (as != bs) return 0; + if (a->extra.mem.size != b->extra.mem.size) return 0; + if (a->extra.mem.addr_space != b->extra.mem.addr_space) return 0; + if (slot_out) *slot_out = as; + return 1; +} + +static int load_spill_slot(Func* f, const Inst* in, FrameSlot* slot_out) { + if ((IROp)in->op != IR_LOAD || in->nopnds < 2) return 0; + return spill_local_slot(f, &in->opnds[1], &in->extra.mem, slot_out); +} + +static int store_spill_slot(Func* f, const Inst* in, FrameSlot* slot_out) { + if ((IROp)in->op != IR_STORE || in->nopnds < 2) return 0; + return spill_local_slot(f, &in->opnds[0], &in->extra.mem, slot_out); +} + +static int same_spill_slot_and_size(Func* f, const Inst* a, const Inst* b) { + FrameSlot as = FRAME_SLOT_NONE; + FrameSlot bs = FRAME_SLOT_NONE; + if ((IROp)a->op == IR_LOAD) { + if (!load_spill_slot(f, a, &as)) return 0; + } else if (!store_spill_slot(f, a, &as)) { + return 0; + } + if ((IROp)b->op == IR_LOAD) { + if (!load_spill_slot(f, b, &bs)) return 0; + } else if (!store_spill_slot(f, b, &bs)) { + return 0; + } + return as == bs && a->extra.mem.size == b->extra.mem.size && + a->extra.mem.addr_space == b->extra.mem.addr_space; +} + void opt_combine(Func* f) { for (u32 b = 0; b < f->nblocks; ++b) { Block* bl = &f->blocks[b]; @@ -888,18 +953,50 @@ void opt_combine(Func* f) { for (u32 i = 0; i < bl->ninsts; ++i) { Inst* in = &bl->insts[i]; if ((IROp)in->op == IR_COPY && in->nopnds == 2 && - in->opnds[0].kind == OPK_REG && in->opnds[1].kind == OPK_REG && - in->opnds[0].v.reg == in->opnds[1].v.reg) { + same_reg_operand(&in->opnds[0], &in->opnds[1])) { continue; } + + if (w) { + Inst* prev = &bl->insts[w - 1u]; + if ((IROp)prev->op == IR_STORE && (IROp)in->op == IR_LOAD && + same_spill_slot_and_size(f, prev, in) && + same_reg_operand(&prev->opnds[1], &in->opnds[0])) { + continue; + } + if ((IROp)prev->op == IR_LOAD && (IROp)in->op == IR_STORE && + same_spill_slot_and_size(f, prev, in) && + same_reg_operand(&prev->opnds[0], &in->opnds[1])) { + continue; + } + if ((IROp)prev->op == IR_LOAD && (IROp)in->op == IR_LOAD && + same_spill_slot_and_size(f, prev, in) && + same_reg_operand(&prev->opnds[0], &in->opnds[0])) { + continue; + } + if ((IROp)prev->op == IR_STORE && (IROp)in->op == IR_STORE && + same_spill_access(f, prev, in, NULL)) { + bl->insts[w - 1u] = *in; + continue; + } + } + bl->insts[w++] = *in; } bl->ninsts = w; } } -static int side_effecting(IROp op) { - switch (op) { +static int inst_has_side_effect(Func* f, const Inst* in) { + (void)f; + switch ((IROp)in->op) { + case IR_LOAD: + return mem_observable(&in->extra.mem); + case IR_BITFIELD_LOAD: { + IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux; + return aux && mem_observable(&aux->access.storage); + } + case IR_ALLOCA: case IR_STORE: case IR_AGG_COPY: case IR_AGG_SET: @@ -915,8 +1012,10 @@ static int side_effecting(IROp op) { case IR_BREAK_TO: case IR_CONTINUE_TO: case IR_VA_START: + case IR_VA_ARG: case IR_VA_END: case IR_VA_COPY: + case IR_ATOMIC_LOAD: case IR_ATOMIC_STORE: case IR_ATOMIC_RMW: case IR_ATOMIC_CAS: @@ -929,14 +1028,215 @@ static int side_effecting(IROp op) { } } +typedef struct HardRegSet { + u32 cls[OPT_REG_CLASSES]; +} HardRegSet; + +static void hard_add(HardRegSet* s, u8 cls, Reg r) { + if (cls >= OPT_REG_CLASSES || r >= 32) return; + s->cls[cls] |= 1u << r; +} + +static int hard_empty(const HardRegSet* s) { + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) + if (s->cls[c]) return 0; + return 1; +} + +static int hard_intersects(const HardRegSet* a, const HardRegSet* b) { + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) + if (a->cls[c] & b->cls[c]) return 1; + return 0; +} + +static void hard_live_step(HardRegSet* live, const HardRegSet* use, + const HardRegSet* def) { + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) + live->cls[c] = (live->cls[c] & ~def->cls[c]) | use->cls[c]; +} + +static void hard_use_operand(HardRegSet* s, const Operand* op) { + if (!op) return; + if (op->kind == OPK_REG) { + hard_add(s, op->cls, op->v.reg); + } else if (op->kind == OPK_INDIRECT) { + hard_add(s, RC_INT, op->v.ind.base); + } +} + +static void hard_def_operand(HardRegSet* s, const Operand* op) { + if (op && op->kind == OPK_REG) hard_add(s, op->cls, op->v.reg); +} + +static void hard_use_abivalue(HardRegSet* use, const CGABIValue* v) { + if (!v) return; + hard_use_operand(use, &v->storage); + for (u32 i = 0; i < v->nparts; ++i) hard_use_operand(use, &v->parts[i].op); +} + +static void hard_def_abivalue(HardRegSet* def, const CGABIValue* v) { + if (!v) return; + hard_def_operand(def, &v->storage); + for (u32 i = 0; i < v->nparts; ++i) hard_def_operand(def, &v->parts[i].op); +} + +static void hard_inst_use_def(const Inst* in, HardRegSet* use, + HardRegSet* def) { + memset(use, 0, sizeof *use); + memset(def, 0, sizeof *def); + switch ((IROp)in->op) { + case IR_LOAD_IMM: + case IR_LOAD_CONST: + case IR_TLS_ADDR_OF: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + break; + case IR_COPY: + case IR_CONVERT: + case IR_UNOP: + case IR_VA_ARG: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + break; + case IR_LOAD: + case IR_ADDR_OF: + case IR_BITFIELD_LOAD: + case IR_ATOMIC_LOAD: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + break; + case IR_BINOP: + case IR_CMP: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + if (in->nopnds >= 3) hard_use_operand(use, &in->opnds[2]); + break; + case IR_STORE: + case IR_AGG_COPY: + case IR_AGG_SET: + case IR_BITFIELD_STORE: + case IR_VA_COPY: + if (in->nopnds >= 1) hard_use_operand(use, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + break; + case IR_CALL: { + IRCallAux* aux = (IRCallAux*)in->extra.aux; + if (!aux) break; + hard_use_operand(use, &aux->desc.callee); + for (u32 i = 0; i < aux->desc.nargs; ++i) + hard_use_abivalue(use, &aux->desc.args[i]); + hard_def_abivalue(def, &aux->desc.ret); + break; + } + case IR_CMP_BRANCH: + case IR_CONDBR: + for (u32 i = 0; i < in->nopnds; ++i) hard_use_operand(use, &in->opnds[i]); + break; + case IR_RET: { + IRRetAux* aux = (IRRetAux*)in->extra.aux; + if (aux && aux->present) hard_use_abivalue(use, &aux->val); + break; + } + case IR_SCOPE_BEGIN: { + IRScopeAux* aux = (IRScopeAux*)in->extra.aux; + if (aux) hard_use_operand(use, &aux->desc.cond); + break; + } + case IR_ALLOCA: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + break; + case IR_VA_START: + case IR_VA_END: + if (in->nopnds >= 1) hard_use_operand(use, &in->opnds[0]); + break; + case IR_ATOMIC_STORE: + if (in->nopnds >= 1) hard_use_operand(use, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + break; + case IR_ATOMIC_RMW: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]); + if (in->nopnds >= 3) hard_use_operand(use, &in->opnds[2]); + break; + case IR_ATOMIC_CAS: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + if (in->nopnds >= 2) hard_def_operand(def, &in->opnds[1]); + if (in->nopnds >= 3) hard_use_operand(use, &in->opnds[2]); + if (in->nopnds >= 4) hard_use_operand(use, &in->opnds[3]); + if (in->nopnds >= 5) hard_use_operand(use, &in->opnds[4]); + break; + case IR_ASM_BLOCK: { + IRAsmAux* aux = (IRAsmAux*)in->extra.aux; + if (!aux) break; + for (u32 i = 0; i < aux->nin; ++i) hard_use_operand(use, &aux->in_ops[i]); + for (u32 i = 0; i < aux->nout; ++i) + hard_def_operand(def, &aux->out_ops[i]); + break; + } + case IR_INTRINSIC: { + IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; + if (!aux) break; + for (u32 i = 0; i < aux->narg; ++i) hard_use_operand(use, &aux->args[i]); + for (u32 i = 0; i < aux->ndst; ++i) hard_def_operand(def, &aux->dsts[i]); + break; + } + default: + break; + } +} + +static HardRegSet hard_live_out_from_values(Func* f, const Block* bl) { + HardRegSet live; + memset(&live, 0, sizeof live); + if (!bl->live_out || !f->val_info) return live; + for (Val v = 1; v < f->nvals; ++v) { + if (!bit_has(bl->live_out, v)) continue; + if (f->val_info[v].alloc_kind != OPT_ALLOC_HARD) continue; + hard_add(&live, f->val_info[v].cls, f->val_info[v].hard_reg); + } + return live; +} + void opt_dce(Func* f) { for (u32 b = 0; b < f->nblocks; ++b) { Block* bl = &f->blocks[b]; + if (f->opt_rewritten) { + HardRegSet live = hard_live_out_from_values(f, bl); + Inst* new_insts = arena_array(f->arena, Inst, bl->ninsts); + u32 w = 0; + for (u32 ri = bl->ninsts; ri > 0; --ri) { + u32 i = ri - 1u; + Inst* in = &bl->insts[i]; + HardRegSet use, def; + if ((IROp)in->op == IR_NOP) continue; + hard_inst_use_def(in, &use, &def); + if (!inst_has_side_effect(f, in) && !hard_empty(&def) && + !hard_intersects(&def, &live)) { + continue; + } + if (!inst_has_side_effect(f, in) && hard_empty(&def) && + in->nopnds == 0) { + continue; + } + new_insts[w++] = *in; + hard_live_step(&live, &use, &def); + } + for (u32 i = 0; i < w / 2; ++i) { + Inst tmp = new_insts[i]; + new_insts[i] = new_insts[w - 1u - i]; + new_insts[w - 1u - i] = tmp; + } + bl->insts = new_insts; + bl->ninsts = w; + bl->cap = w; + continue; + } + u32 w = 0; for (u32 i = 0; i < bl->ninsts; ++i) { Inst* in = &bl->insts[i]; if ((IROp)in->op == IR_NOP) continue; - if (!side_effecting((IROp)in->op) && in->def == VAL_NONE && + if (!inst_has_side_effect(f, in) && in->def == VAL_NONE && in->ndefs == 0 && in->nopnds == 0) continue; bl->insts[w++] = *in; diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -111,6 +111,29 @@ static Operand op_imm_(i64 v, CfreeCgTypeId ty) { return o; } +static Operand op_local_(FrameSlot fs, CfreeCgTypeId ty) { + Operand o; + memset(&o, 0, sizeof o); + o.kind = OPK_LOCAL; + o.cls = RC_INT; + o.type = ty; + o.v.frame_slot = fs; + return o; +} + +static MemAccess mem_local_(FrameSlot fs, CfreeCgTypeId ty, u32 size, + u16 flags) { + MemAccess m; + memset(&m, 0, sizeof m); + m.type = ty; + m.size = size; + m.align = size >= 8 ? 8 : size; + m.flags = flags; + m.alias.kind = ALIAS_LOCAL; + m.alias.v.local_id = (i32)fs; + return m; +} + static Func* new_func(TestCtx* tc) { CGFuncDesc fd; CfreeCgFuncSig sig; @@ -133,6 +156,18 @@ static Val add_val_cls(Func* f, CfreeCgTypeId ty, RegClass cls) { return ir_alloc_val(f, ty, cls); } +static FrameSlot add_frame_slot(Func* f, CfreeCgTypeId ty, FrameSlotKind kind, + u32 size, u16 flags) { + FrameSlotDesc d; + memset(&d, 0, sizeof d); + d.type = ty; + d.size = size; + d.align = size >= 8 ? 8 : size; + d.kind = kind; + d.flags = flags; + return ir_frame_slot_new(f, &d); +} + static Inst* emit_load_imm(Func* f, u32 b, Val dst, CfreeCgTypeId ty, i64 imm) { Inst* in = ir_emit(f, b, IR_LOAD_IMM); @@ -176,6 +211,21 @@ static Inst* emit_binop(Func* f, u32 b, Val dst, Val a, Val c, return in; } +static Inst* emit_load_local(Func* f, u32 b, Val dst, FrameSlot fs, + CfreeCgTypeId ty, u16 flags) { + Inst* in = ir_emit(f, b, IR_LOAD); + in->opnds = arena_array(f->arena, Operand, 2); + in->opnds[0] = op_reg_(dst, ty); + in->opnds[1] = op_local_(fs, ty); + in->nopnds = 2; + in->def = dst; + in->type = ty; + in->extra.mem = mem_local_(fs, ty, 4, flags); + f->val_def_block[dst] = b; + f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; + return in; +} + static Inst* emit_call_void(Func* f, u32 b) { Inst* in = ir_emit(f, b, IR_CALL); IRCallAux* aux = arena_znew(f->arena, IRCallAux); @@ -1091,6 +1141,125 @@ static void opt_post_rewrite_dce(void) { tc_fini(&tc); } +static void opt_combine_spill_peeps(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + f->opt_rewritten = 1; + FrameSlot fs = add_frame_slot(f, tc.i32, FS_SPILL, 4, 0); + + Inst* st = ir_emit(f, f->entry, IR_STORE); + st->opnds = arena_array(f->arena, Operand, 2); + st->opnds[0] = op_local_(fs, tc.i32); + st->opnds[1] = op_reg_(9, tc.i32); + st->nopnds = 2; + st->extra.mem = mem_local_(fs, tc.i32, 4, 0); + + Inst* ld = ir_emit(f, f->entry, IR_LOAD); + ld->opnds = arena_array(f->arena, Operand, 2); + ld->opnds[0] = op_reg_(9, tc.i32); + ld->opnds[1] = op_local_(fs, tc.i32); + ld->nopnds = 2; + ld->extra.mem = mem_local_(fs, tc.i32, 4, 0); + + opt_combine(f); + EXPECT(f->blocks[f->entry].ninsts == 1, + "store followed by same-reg spill reload should combine to one inst"); + EXPECT((IROp)f->blocks[f->entry].insts[0].op == IR_STORE, + "remaining inst should be the spill store"); + + Func* g = new_func(&tc); + g->opt_rewritten = 1; + fs = add_frame_slot(g, tc.i32, FS_SPILL, 4, 0); + + ld = ir_emit(g, g->entry, IR_LOAD); + ld->opnds = arena_array(g->arena, Operand, 2); + ld->opnds[0] = op_reg_(9, tc.i32); + ld->opnds[1] = op_local_(fs, tc.i32); + ld->nopnds = 2; + ld->extra.mem = mem_local_(fs, tc.i32, 4, 0); + + st = ir_emit(g, g->entry, IR_STORE); + st->opnds = arena_array(g->arena, Operand, 2); + st->opnds[0] = op_local_(fs, tc.i32); + st->opnds[1] = op_reg_(9, tc.i32); + st->nopnds = 2; + st->extra.mem = mem_local_(fs, tc.i32, 4, 0); + + opt_combine(g); + EXPECT(g->blocks[g->entry].ninsts == 1, + "spill reload followed by same-reg writeback should combine to one inst"); + EXPECT((IROp)g->blocks[g->entry].insts[0].op == IR_LOAD, + "remaining inst should be the spill reload"); + tc_fini(&tc); +} + +static void opt_dce_physical_dead_defs(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + f->opt_rewritten = 1; + Inst* in = ir_emit(f, f->entry, IR_LOAD_IMM); + in->opnds = arena_array(f->arena, Operand, 1); + in->opnds[0] = op_reg_(19, tc.i32); + in->nopnds = 1; + in->extra.imm = 1; + ir_emit(f, f->entry, IR_RET); + + opt_dce(f); + EXPECT(count_op(f, IR_LOAD_IMM) == 0, + "post-rewrite DCE should remove dead physical register defs"); + + Func* g = new_func(&tc); + g->opt_rewritten = 1; + in = ir_emit(g, g->entry, IR_LOAD_IMM); + in->opnds = arena_array(g->arena, Operand, 1); + in->opnds[0] = op_reg_(19, tc.i32); + in->nopnds = 1; + in->extra.imm = 1; + emit_ret_val(g, g->entry, 19, tc.i32); + + opt_dce(g); + EXPECT(count_op(g, IR_LOAD_IMM) == 1, + "post-rewrite DCE should keep physical defs used by ret"); + tc_fini(&tc); +} + +static void opt_dead_def_keeps_observable_loads(void) { + TestCtx tc; + tc_init(&tc); + MockCGTarget mock; + mock_init(&mock, tc.c); + static const Reg pool[] = {19}; + static const Reg scratch[] = {9, 10}; + mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); + + Func* f = new_func(&tc); + opt_machinize(f, &mock.base); + FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_VOLATILE); + Val dead = add_val(f, tc.i32); + emit_load_local(f, f->entry, dead, fs, tc.i32, MF_VOLATILE); + opt_build_cfg(f); + opt_build_loop_tree(f); + opt_live_info(f); + opt_dead_def_elim(f); + EXPECT(count_op(f, IR_LOAD) == 1, + "dead_def_elim should keep volatile loads even when the result dies"); + + Func* g = new_func(&tc); + opt_machinize(g, &mock.base); + fs = add_frame_slot(g, tc.i32, FS_LOCAL, 4, 0); + dead = add_val(g, tc.i32); + emit_load_local(g, g->entry, dead, fs, tc.i32, MF_ATOMIC); + opt_build_cfg(g); + opt_build_loop_tree(g); + opt_live_info(g); + opt_dead_def_elim(g); + EXPECT(count_op(g, IR_LOAD) == 1, + "dead_def_elim should keep atomic loads even when the result dies"); + tc_fini(&tc); +} + static void opt_dead_def_elim_test(void) { TestCtx tc; tc_init(&tc); @@ -1207,6 +1376,9 @@ int main(void) { opt_spill_pressure(); opt_inline_asm_tied_fixed_regs(); opt_post_rewrite_dce(); + opt_combine_spill_peeps(); + opt_dce_physical_dead_defs(); + opt_dead_def_keeps_observable_loads(); opt_dead_def_elim_test(); opt_emit_no_virtual_alloc(); simple_regalloc_reports_exact_used_regs();