commit f37c2ae1c3c2cb762a390861aaa6994d830f66ef
parent 8ac1385246eaad90b7b36d075d58086f995dc88d
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 14 May 2026 13:14:39 -0700
Tighten O1 combine and DCE
Diffstat:
3 files changed, 494 insertions(+), 12 deletions(-)
diff --git a/doc/OPT1.md b/doc/OPT1.md
@@ -96,9 +96,19 @@ substitute a behaviorally similar shortcut without updating both documents.
## Remaining Todos
-- Finish the general safe single-use fold portion of `opt_combine`.
-- Expand post-rewrite DCE into true dead-definition elimination on hard
- registers (requires a post-rewrite liveness pass). Pre-rewrite DCE is
- already precise via `opt_dead_def_elim`.
+- Finish the general safe single-use fold portion of `opt_combine`:
+ - [ ] Substitute one-use physical copies into users when the rewritten IR
+ form is guaranteed target-legal.
+ - [ ] Fold one-use integer `load_imm` defs into `binop`, `cmp`, and
+ `cmp_branch` operands accepted by `CGTarget`.
+ - [ ] Collapse physical copy chains before post-RA DCE.
+ - [ ] Fold redundant `IR_CONVERT` chains, starting with identical
+ conversion pairs.
+ - [ ] Keep branch-shape cleanup out of `-O1` unless it is purely local and
+ does not require CFG/jump optimization.
+- Continue tightening post-rewrite DCE:
+ - [ ] Model call clobbers and hard-reg call arguments explicitly.
+ - [ ] Add tests for dead caller-saved defs before calls, live call returns,
+ and hard-reg call arguments.
- Add full CG corpus `-O1` validation runs for x64 and RV64 (currently
validated only on aa64).
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -235,6 +235,10 @@ static void copy_bits(u64* dst, const u64* src, u32 words) {
for (u32 w = 0; w < words; ++w) dst[w] = src[w];
}
+static int mem_observable(const MemAccess* m) {
+ return (m->flags & (MF_VOLATILE | MF_ATOMIC)) != 0;
+}
+
void opt_machinize(Func* f, CGTarget* target) {
f->opt_target = target->c->target;
f->opt_has_target = 1;
@@ -786,6 +790,8 @@ static void rewrite_func(Func* f) {
f->opt_rewritten = 1;
}
+static int inst_has_side_effect(Func* f, const Inst* in);
+
static int all_defs_dead(Func* f, Inst* in, u64* live) {
(void)f;
if (in->def != VAL_NONE && bit_has(live, in->def)) return 0;
@@ -794,8 +800,6 @@ static int all_defs_dead(Func* f, Inst* in, u64* live) {
return 1;
}
-static int side_effecting(IROp op);
-
void opt_dead_def_elim(Func* f) {
u32 words = f->opt_live_words;
for (u32 b = 0; b < f->nblocks; ++b) {
@@ -808,7 +812,7 @@ void opt_dead_def_elim(Func* f) {
for (u32 ri = bl->ninsts; ri > 0; --ri) {
u32 i = ri - 1u;
Inst* in = &bl->insts[i];
- if (!side_effecting((IROp)in->op) && all_defs_dead(f, in, live)) {
+ if (!inst_has_side_effect(f, in) && all_defs_dead(f, in, live)) {
continue;
}
new_insts[w++] = *in;
@@ -881,6 +885,67 @@ void opt_regalloc(Func* f, int allow_live_range_split) {
rewrite_func(f);
}
+static int same_reg_operand(const Operand* a, const Operand* b) {
+ return a->kind == OPK_REG && b->kind == OPK_REG && a->cls == b->cls &&
+ a->v.reg == b->v.reg;
+}
+
+static int frame_slot_is_spill(Func* f, FrameSlot fs) {
+ if (fs == FRAME_SLOT_NONE || fs > f->nframe_slots) return 0;
+ return f->frame_slots[fs - 1u].kind == FS_SPILL;
+}
+
+static int spill_local_slot(Func* f, const Operand* addr, const MemAccess* mem,
+ FrameSlot* out) {
+ if (!addr || addr->kind != OPK_LOCAL) return 0;
+ if (mem_observable(mem)) return 0;
+ if (mem->alias.kind != ALIAS_LOCAL) return 0;
+ if (mem->alias.v.local_id != (i32)addr->v.frame_slot) return 0;
+ if (!frame_slot_is_spill(f, addr->v.frame_slot)) return 0;
+ *out = addr->v.frame_slot;
+ return 1;
+}
+
+static int same_spill_access(Func* f, const Inst* a, const Inst* b,
+ FrameSlot* slot_out) {
+ FrameSlot as = FRAME_SLOT_NONE;
+ FrameSlot bs = FRAME_SLOT_NONE;
+ if (!spill_local_slot(f, &a->opnds[0], &a->extra.mem, &as)) return 0;
+ if (!spill_local_slot(f, &b->opnds[0], &b->extra.mem, &bs)) return 0;
+ if (as != bs) return 0;
+ if (a->extra.mem.size != b->extra.mem.size) return 0;
+ if (a->extra.mem.addr_space != b->extra.mem.addr_space) return 0;
+ if (slot_out) *slot_out = as;
+ return 1;
+}
+
+static int load_spill_slot(Func* f, const Inst* in, FrameSlot* slot_out) {
+ if ((IROp)in->op != IR_LOAD || in->nopnds < 2) return 0;
+ return spill_local_slot(f, &in->opnds[1], &in->extra.mem, slot_out);
+}
+
+static int store_spill_slot(Func* f, const Inst* in, FrameSlot* slot_out) {
+ if ((IROp)in->op != IR_STORE || in->nopnds < 2) return 0;
+ return spill_local_slot(f, &in->opnds[0], &in->extra.mem, slot_out);
+}
+
+static int same_spill_slot_and_size(Func* f, const Inst* a, const Inst* b) {
+ FrameSlot as = FRAME_SLOT_NONE;
+ FrameSlot bs = FRAME_SLOT_NONE;
+ if ((IROp)a->op == IR_LOAD) {
+ if (!load_spill_slot(f, a, &as)) return 0;
+ } else if (!store_spill_slot(f, a, &as)) {
+ return 0;
+ }
+ if ((IROp)b->op == IR_LOAD) {
+ if (!load_spill_slot(f, b, &bs)) return 0;
+ } else if (!store_spill_slot(f, b, &bs)) {
+ return 0;
+ }
+ return as == bs && a->extra.mem.size == b->extra.mem.size &&
+ a->extra.mem.addr_space == b->extra.mem.addr_space;
+}
+
void opt_combine(Func* f) {
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
@@ -888,18 +953,50 @@ void opt_combine(Func* f) {
for (u32 i = 0; i < bl->ninsts; ++i) {
Inst* in = &bl->insts[i];
if ((IROp)in->op == IR_COPY && in->nopnds == 2 &&
- in->opnds[0].kind == OPK_REG && in->opnds[1].kind == OPK_REG &&
- in->opnds[0].v.reg == in->opnds[1].v.reg) {
+ same_reg_operand(&in->opnds[0], &in->opnds[1])) {
continue;
}
+
+ if (w) {
+ Inst* prev = &bl->insts[w - 1u];
+ if ((IROp)prev->op == IR_STORE && (IROp)in->op == IR_LOAD &&
+ same_spill_slot_and_size(f, prev, in) &&
+ same_reg_operand(&prev->opnds[1], &in->opnds[0])) {
+ continue;
+ }
+ if ((IROp)prev->op == IR_LOAD && (IROp)in->op == IR_STORE &&
+ same_spill_slot_and_size(f, prev, in) &&
+ same_reg_operand(&prev->opnds[0], &in->opnds[1])) {
+ continue;
+ }
+ if ((IROp)prev->op == IR_LOAD && (IROp)in->op == IR_LOAD &&
+ same_spill_slot_and_size(f, prev, in) &&
+ same_reg_operand(&prev->opnds[0], &in->opnds[0])) {
+ continue;
+ }
+ if ((IROp)prev->op == IR_STORE && (IROp)in->op == IR_STORE &&
+ same_spill_access(f, prev, in, NULL)) {
+ bl->insts[w - 1u] = *in;
+ continue;
+ }
+ }
+
bl->insts[w++] = *in;
}
bl->ninsts = w;
}
}
-static int side_effecting(IROp op) {
- switch (op) {
+static int inst_has_side_effect(Func* f, const Inst* in) {
+ (void)f;
+ switch ((IROp)in->op) {
+ case IR_LOAD:
+ return mem_observable(&in->extra.mem);
+ case IR_BITFIELD_LOAD: {
+ IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux;
+ return aux && mem_observable(&aux->access.storage);
+ }
+ case IR_ALLOCA:
case IR_STORE:
case IR_AGG_COPY:
case IR_AGG_SET:
@@ -915,8 +1012,10 @@ static int side_effecting(IROp op) {
case IR_BREAK_TO:
case IR_CONTINUE_TO:
case IR_VA_START:
+ case IR_VA_ARG:
case IR_VA_END:
case IR_VA_COPY:
+ case IR_ATOMIC_LOAD:
case IR_ATOMIC_STORE:
case IR_ATOMIC_RMW:
case IR_ATOMIC_CAS:
@@ -929,14 +1028,215 @@ static int side_effecting(IROp op) {
}
}
+typedef struct HardRegSet {
+ u32 cls[OPT_REG_CLASSES];
+} HardRegSet;
+
+static void hard_add(HardRegSet* s, u8 cls, Reg r) {
+ if (cls >= OPT_REG_CLASSES || r >= 32) return;
+ s->cls[cls] |= 1u << r;
+}
+
+static int hard_empty(const HardRegSet* s) {
+ for (u32 c = 0; c < OPT_REG_CLASSES; ++c)
+ if (s->cls[c]) return 0;
+ return 1;
+}
+
+static int hard_intersects(const HardRegSet* a, const HardRegSet* b) {
+ for (u32 c = 0; c < OPT_REG_CLASSES; ++c)
+ if (a->cls[c] & b->cls[c]) return 1;
+ return 0;
+}
+
+static void hard_live_step(HardRegSet* live, const HardRegSet* use,
+ const HardRegSet* def) {
+ for (u32 c = 0; c < OPT_REG_CLASSES; ++c)
+ live->cls[c] = (live->cls[c] & ~def->cls[c]) | use->cls[c];
+}
+
+static void hard_use_operand(HardRegSet* s, const Operand* op) {
+ if (!op) return;
+ if (op->kind == OPK_REG) {
+ hard_add(s, op->cls, op->v.reg);
+ } else if (op->kind == OPK_INDIRECT) {
+ hard_add(s, RC_INT, op->v.ind.base);
+ }
+}
+
+static void hard_def_operand(HardRegSet* s, const Operand* op) {
+ if (op && op->kind == OPK_REG) hard_add(s, op->cls, op->v.reg);
+}
+
+static void hard_use_abivalue(HardRegSet* use, const CGABIValue* v) {
+ if (!v) return;
+ hard_use_operand(use, &v->storage);
+ for (u32 i = 0; i < v->nparts; ++i) hard_use_operand(use, &v->parts[i].op);
+}
+
+static void hard_def_abivalue(HardRegSet* def, const CGABIValue* v) {
+ if (!v) return;
+ hard_def_operand(def, &v->storage);
+ for (u32 i = 0; i < v->nparts; ++i) hard_def_operand(def, &v->parts[i].op);
+}
+
+static void hard_inst_use_def(const Inst* in, HardRegSet* use,
+ HardRegSet* def) {
+ memset(use, 0, sizeof *use);
+ memset(def, 0, sizeof *def);
+ switch ((IROp)in->op) {
+ case IR_LOAD_IMM:
+ case IR_LOAD_CONST:
+ case IR_TLS_ADDR_OF:
+ if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]);
+ break;
+ case IR_COPY:
+ case IR_CONVERT:
+ case IR_UNOP:
+ case IR_VA_ARG:
+ if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]);
+ if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]);
+ break;
+ case IR_LOAD:
+ case IR_ADDR_OF:
+ case IR_BITFIELD_LOAD:
+ case IR_ATOMIC_LOAD:
+ if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]);
+ if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]);
+ break;
+ case IR_BINOP:
+ case IR_CMP:
+ if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]);
+ if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]);
+ if (in->nopnds >= 3) hard_use_operand(use, &in->opnds[2]);
+ break;
+ case IR_STORE:
+ case IR_AGG_COPY:
+ case IR_AGG_SET:
+ case IR_BITFIELD_STORE:
+ case IR_VA_COPY:
+ if (in->nopnds >= 1) hard_use_operand(use, &in->opnds[0]);
+ if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]);
+ break;
+ case IR_CALL: {
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ if (!aux) break;
+ hard_use_operand(use, &aux->desc.callee);
+ for (u32 i = 0; i < aux->desc.nargs; ++i)
+ hard_use_abivalue(use, &aux->desc.args[i]);
+ hard_def_abivalue(def, &aux->desc.ret);
+ break;
+ }
+ case IR_CMP_BRANCH:
+ case IR_CONDBR:
+ for (u32 i = 0; i < in->nopnds; ++i) hard_use_operand(use, &in->opnds[i]);
+ break;
+ case IR_RET: {
+ IRRetAux* aux = (IRRetAux*)in->extra.aux;
+ if (aux && aux->present) hard_use_abivalue(use, &aux->val);
+ break;
+ }
+ case IR_SCOPE_BEGIN: {
+ IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
+ if (aux) hard_use_operand(use, &aux->desc.cond);
+ break;
+ }
+ case IR_ALLOCA:
+ if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]);
+ if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]);
+ break;
+ case IR_VA_START:
+ case IR_VA_END:
+ if (in->nopnds >= 1) hard_use_operand(use, &in->opnds[0]);
+ break;
+ case IR_ATOMIC_STORE:
+ if (in->nopnds >= 1) hard_use_operand(use, &in->opnds[0]);
+ if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]);
+ break;
+ case IR_ATOMIC_RMW:
+ if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]);
+ if (in->nopnds >= 2) hard_use_operand(use, &in->opnds[1]);
+ if (in->nopnds >= 3) hard_use_operand(use, &in->opnds[2]);
+ break;
+ case IR_ATOMIC_CAS:
+ if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]);
+ if (in->nopnds >= 2) hard_def_operand(def, &in->opnds[1]);
+ if (in->nopnds >= 3) hard_use_operand(use, &in->opnds[2]);
+ if (in->nopnds >= 4) hard_use_operand(use, &in->opnds[3]);
+ if (in->nopnds >= 5) hard_use_operand(use, &in->opnds[4]);
+ break;
+ case IR_ASM_BLOCK: {
+ IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
+ if (!aux) break;
+ for (u32 i = 0; i < aux->nin; ++i) hard_use_operand(use, &aux->in_ops[i]);
+ for (u32 i = 0; i < aux->nout; ++i)
+ hard_def_operand(def, &aux->out_ops[i]);
+ break;
+ }
+ case IR_INTRINSIC: {
+ IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
+ if (!aux) break;
+ for (u32 i = 0; i < aux->narg; ++i) hard_use_operand(use, &aux->args[i]);
+ for (u32 i = 0; i < aux->ndst; ++i) hard_def_operand(def, &aux->dsts[i]);
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static HardRegSet hard_live_out_from_values(Func* f, const Block* bl) {
+ HardRegSet live;
+ memset(&live, 0, sizeof live);
+ if (!bl->live_out || !f->val_info) return live;
+ for (Val v = 1; v < f->nvals; ++v) {
+ if (!bit_has(bl->live_out, v)) continue;
+ if (f->val_info[v].alloc_kind != OPT_ALLOC_HARD) continue;
+ hard_add(&live, f->val_info[v].cls, f->val_info[v].hard_reg);
+ }
+ return live;
+}
+
void opt_dce(Func* f) {
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
+ if (f->opt_rewritten) {
+ HardRegSet live = hard_live_out_from_values(f, bl);
+ Inst* new_insts = arena_array(f->arena, Inst, bl->ninsts);
+ u32 w = 0;
+ for (u32 ri = bl->ninsts; ri > 0; --ri) {
+ u32 i = ri - 1u;
+ Inst* in = &bl->insts[i];
+ HardRegSet use, def;
+ if ((IROp)in->op == IR_NOP) continue;
+ hard_inst_use_def(in, &use, &def);
+ if (!inst_has_side_effect(f, in) && !hard_empty(&def) &&
+ !hard_intersects(&def, &live)) {
+ continue;
+ }
+ if (!inst_has_side_effect(f, in) && hard_empty(&def) &&
+ in->nopnds == 0) {
+ continue;
+ }
+ new_insts[w++] = *in;
+ hard_live_step(&live, &use, &def);
+ }
+ for (u32 i = 0; i < w / 2; ++i) {
+ Inst tmp = new_insts[i];
+ new_insts[i] = new_insts[w - 1u - i];
+ new_insts[w - 1u - i] = tmp;
+ }
+ bl->insts = new_insts;
+ bl->ninsts = w;
+ bl->cap = w;
+ continue;
+ }
+
u32 w = 0;
for (u32 i = 0; i < bl->ninsts; ++i) {
Inst* in = &bl->insts[i];
if ((IROp)in->op == IR_NOP) continue;
- if (!side_effecting((IROp)in->op) && in->def == VAL_NONE &&
+ if (!inst_has_side_effect(f, in) && in->def == VAL_NONE &&
in->ndefs == 0 && in->nopnds == 0)
continue;
bl->insts[w++] = *in;
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -111,6 +111,29 @@ static Operand op_imm_(i64 v, CfreeCgTypeId ty) {
return o;
}
+static Operand op_local_(FrameSlot fs, CfreeCgTypeId ty) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_LOCAL;
+ o.cls = RC_INT;
+ o.type = ty;
+ o.v.frame_slot = fs;
+ return o;
+}
+
+static MemAccess mem_local_(FrameSlot fs, CfreeCgTypeId ty, u32 size,
+ u16 flags) {
+ MemAccess m;
+ memset(&m, 0, sizeof m);
+ m.type = ty;
+ m.size = size;
+ m.align = size >= 8 ? 8 : size;
+ m.flags = flags;
+ m.alias.kind = ALIAS_LOCAL;
+ m.alias.v.local_id = (i32)fs;
+ return m;
+}
+
static Func* new_func(TestCtx* tc) {
CGFuncDesc fd;
CfreeCgFuncSig sig;
@@ -133,6 +156,18 @@ static Val add_val_cls(Func* f, CfreeCgTypeId ty, RegClass cls) {
return ir_alloc_val(f, ty, cls);
}
+static FrameSlot add_frame_slot(Func* f, CfreeCgTypeId ty, FrameSlotKind kind,
+ u32 size, u16 flags) {
+ FrameSlotDesc d;
+ memset(&d, 0, sizeof d);
+ d.type = ty;
+ d.size = size;
+ d.align = size >= 8 ? 8 : size;
+ d.kind = kind;
+ d.flags = flags;
+ return ir_frame_slot_new(f, &d);
+}
+
static Inst* emit_load_imm(Func* f, u32 b, Val dst, CfreeCgTypeId ty,
i64 imm) {
Inst* in = ir_emit(f, b, IR_LOAD_IMM);
@@ -176,6 +211,21 @@ static Inst* emit_binop(Func* f, u32 b, Val dst, Val a, Val c,
return in;
}
+static Inst* emit_load_local(Func* f, u32 b, Val dst, FrameSlot fs,
+ CfreeCgTypeId ty, u16 flags) {
+ Inst* in = ir_emit(f, b, IR_LOAD);
+ in->opnds = arena_array(f->arena, Operand, 2);
+ in->opnds[0] = op_reg_(dst, ty);
+ in->opnds[1] = op_local_(fs, ty);
+ in->nopnds = 2;
+ in->def = dst;
+ in->type = ty;
+ in->extra.mem = mem_local_(fs, ty, 4, flags);
+ f->val_def_block[dst] = b;
+ f->val_def_inst[dst] = f->blocks[b].ninsts - 1u;
+ return in;
+}
+
static Inst* emit_call_void(Func* f, u32 b) {
Inst* in = ir_emit(f, b, IR_CALL);
IRCallAux* aux = arena_znew(f->arena, IRCallAux);
@@ -1091,6 +1141,125 @@ static void opt_post_rewrite_dce(void) {
tc_fini(&tc);
}
+static void opt_combine_spill_peeps(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ f->opt_rewritten = 1;
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_SPILL, 4, 0);
+
+ Inst* st = ir_emit(f, f->entry, IR_STORE);
+ st->opnds = arena_array(f->arena, Operand, 2);
+ st->opnds[0] = op_local_(fs, tc.i32);
+ st->opnds[1] = op_reg_(9, tc.i32);
+ st->nopnds = 2;
+ st->extra.mem = mem_local_(fs, tc.i32, 4, 0);
+
+ Inst* ld = ir_emit(f, f->entry, IR_LOAD);
+ ld->opnds = arena_array(f->arena, Operand, 2);
+ ld->opnds[0] = op_reg_(9, tc.i32);
+ ld->opnds[1] = op_local_(fs, tc.i32);
+ ld->nopnds = 2;
+ ld->extra.mem = mem_local_(fs, tc.i32, 4, 0);
+
+ opt_combine(f);
+ EXPECT(f->blocks[f->entry].ninsts == 1,
+ "store followed by same-reg spill reload should combine to one inst");
+ EXPECT((IROp)f->blocks[f->entry].insts[0].op == IR_STORE,
+ "remaining inst should be the spill store");
+
+ Func* g = new_func(&tc);
+ g->opt_rewritten = 1;
+ fs = add_frame_slot(g, tc.i32, FS_SPILL, 4, 0);
+
+ ld = ir_emit(g, g->entry, IR_LOAD);
+ ld->opnds = arena_array(g->arena, Operand, 2);
+ ld->opnds[0] = op_reg_(9, tc.i32);
+ ld->opnds[1] = op_local_(fs, tc.i32);
+ ld->nopnds = 2;
+ ld->extra.mem = mem_local_(fs, tc.i32, 4, 0);
+
+ st = ir_emit(g, g->entry, IR_STORE);
+ st->opnds = arena_array(g->arena, Operand, 2);
+ st->opnds[0] = op_local_(fs, tc.i32);
+ st->opnds[1] = op_reg_(9, tc.i32);
+ st->nopnds = 2;
+ st->extra.mem = mem_local_(fs, tc.i32, 4, 0);
+
+ opt_combine(g);
+ EXPECT(g->blocks[g->entry].ninsts == 1,
+ "spill reload followed by same-reg writeback should combine to one inst");
+ EXPECT((IROp)g->blocks[g->entry].insts[0].op == IR_LOAD,
+ "remaining inst should be the spill reload");
+ tc_fini(&tc);
+}
+
+static void opt_dce_physical_dead_defs(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ f->opt_rewritten = 1;
+ Inst* in = ir_emit(f, f->entry, IR_LOAD_IMM);
+ in->opnds = arena_array(f->arena, Operand, 1);
+ in->opnds[0] = op_reg_(19, tc.i32);
+ in->nopnds = 1;
+ in->extra.imm = 1;
+ ir_emit(f, f->entry, IR_RET);
+
+ opt_dce(f);
+ EXPECT(count_op(f, IR_LOAD_IMM) == 0,
+ "post-rewrite DCE should remove dead physical register defs");
+
+ Func* g = new_func(&tc);
+ g->opt_rewritten = 1;
+ in = ir_emit(g, g->entry, IR_LOAD_IMM);
+ in->opnds = arena_array(g->arena, Operand, 1);
+ in->opnds[0] = op_reg_(19, tc.i32);
+ in->nopnds = 1;
+ in->extra.imm = 1;
+ emit_ret_val(g, g->entry, 19, tc.i32);
+
+ opt_dce(g);
+ EXPECT(count_op(g, IR_LOAD_IMM) == 1,
+ "post-rewrite DCE should keep physical defs used by ret");
+ tc_fini(&tc);
+}
+
+static void opt_dead_def_keeps_observable_loads(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg pool[] = {19};
+ static const Reg scratch[] = {9, 10};
+ mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu);
+
+ Func* f = new_func(&tc);
+ opt_machinize(f, &mock.base);
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_VOLATILE);
+ Val dead = add_val(f, tc.i32);
+ emit_load_local(f, f->entry, dead, fs, tc.i32, MF_VOLATILE);
+ opt_build_cfg(f);
+ opt_build_loop_tree(f);
+ opt_live_info(f);
+ opt_dead_def_elim(f);
+ EXPECT(count_op(f, IR_LOAD) == 1,
+ "dead_def_elim should keep volatile loads even when the result dies");
+
+ Func* g = new_func(&tc);
+ opt_machinize(g, &mock.base);
+ fs = add_frame_slot(g, tc.i32, FS_LOCAL, 4, 0);
+ dead = add_val(g, tc.i32);
+ emit_load_local(g, g->entry, dead, fs, tc.i32, MF_ATOMIC);
+ opt_build_cfg(g);
+ opt_build_loop_tree(g);
+ opt_live_info(g);
+ opt_dead_def_elim(g);
+ EXPECT(count_op(g, IR_LOAD) == 1,
+ "dead_def_elim should keep atomic loads even when the result dies");
+ tc_fini(&tc);
+}
+
static void opt_dead_def_elim_test(void) {
TestCtx tc;
tc_init(&tc);
@@ -1207,6 +1376,9 @@ int main(void) {
opt_spill_pressure();
opt_inline_asm_tied_fixed_regs();
opt_post_rewrite_dce();
+ opt_combine_spill_peeps();
+ opt_dce_physical_dead_defs();
+ opt_dead_def_keeps_observable_loads();
opt_dead_def_elim_test();
opt_emit_no_virtual_alloc();
simple_regalloc_reports_exact_used_regs();