commit 0849cc998838c260588811ad254b1aa8a5a8823c
parent f37c2ae1c3c2cb762a390861aaa6994d830f66ef
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 14 May 2026 14:15:51 -0700
opt: fold safe single-use post-RA operands
Diffstat:
3 files changed, 568 insertions(+), 5 deletions(-)
diff --git a/doc/OPT1.md b/doc/OPT1.md
@@ -97,14 +97,14 @@ substitute a behaviorally similar shortcut without updating both documents.
## Remaining Todos
- Finish the general safe single-use fold portion of `opt_combine`:
- - [ ] Substitute one-use physical copies into users when the rewritten IR
+ - [x] Substitute one-use physical copies into users when the rewritten IR
form is guaranteed target-legal.
- - [ ] Fold one-use integer `load_imm` defs into `binop`, `cmp`, and
+ - [x] Fold one-use integer `load_imm` defs into `binop`, `cmp`, and
`cmp_branch` operands accepted by `CGTarget`.
- - [ ] Collapse physical copy chains before post-RA DCE.
- - [ ] Fold redundant `IR_CONVERT` chains, starting with identical
+ - [x] Collapse physical copy chains before post-RA DCE.
+ - [x] Fold redundant `IR_CONVERT` chains, starting with identical
conversion pairs.
- - [ ] Keep branch-shape cleanup out of `-O1` unless it is purely local and
+ - [x] Keep branch-shape cleanup out of `-O1` unless it is purely local and
does not require CFG/jump optimization.
- Continue tightening post-rewrite DCE:
- [ ] Model call clobbers and hard-reg call arguments explicitly.
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -946,9 +946,325 @@ static int same_spill_slot_and_size(Func* f, const Inst* a, const Inst* b) {
a->extra.mem.addr_space == b->extra.mem.addr_space;
}
+static int same_phys_reg(const Operand* a, const Operand* b) {
+ return a && b && a->kind == OPK_REG && b->kind == OPK_REG &&
+ a->cls == b->cls && a->v.reg == b->v.reg;
+}
+
+static int operand_uses_phys_reg(const Operand* op, const Operand* r) {
+ if (!op || !r || r->kind != OPK_REG) return 0;
+ if (op->kind == OPK_REG)
+ return op->cls == r->cls && op->v.reg == r->v.reg;
+ if (op->kind == OPK_INDIRECT)
+ return r->cls == RC_INT && op->v.ind.base == r->v.reg;
+ return 0;
+}
+
+static int count_operand_phys_uses(const Operand* op, const Operand* r) {
+ return operand_uses_phys_reg(op, r) ? 1 : 0;
+}
+
+static int abi_uses_phys_reg(const CGABIValue* v, const Operand* r) {
+ int n = 0;
+ if (!v) return 0;
+ n += count_operand_phys_uses(&v->storage, r);
+ for (u32 i = 0; i < v->nparts; ++i)
+ n += count_operand_phys_uses(&v->parts[i].op, r);
+ return n;
+}
+
+static int inst_uses_phys_reg(const Inst* in, const Operand* r) {
+ int n = 0;
+ switch ((IROp)in->op) {
+ case IR_COPY:
+ case IR_CONVERT:
+ case IR_UNOP:
+ case IR_VA_ARG:
+ if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r);
+ break;
+ case IR_LOAD:
+ case IR_ADDR_OF:
+ case IR_BITFIELD_LOAD:
+ case IR_ATOMIC_LOAD:
+ if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r);
+ break;
+ case IR_BINOP:
+ case IR_CMP:
+ if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r);
+ if (in->nopnds >= 3) n += count_operand_phys_uses(&in->opnds[2], r);
+ break;
+ case IR_STORE:
+ case IR_AGG_COPY:
+ case IR_AGG_SET:
+ case IR_BITFIELD_STORE:
+ case IR_VA_COPY:
+ if (in->nopnds >= 1) n += count_operand_phys_uses(&in->opnds[0], r);
+ if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r);
+ break;
+ case IR_CALL: {
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ if (!aux) break;
+ n += count_operand_phys_uses(&aux->desc.callee, r);
+ for (u32 i = 0; i < aux->desc.nargs; ++i)
+ n += abi_uses_phys_reg(&aux->desc.args[i], r);
+ break;
+ }
+ case IR_CMP_BRANCH:
+ case IR_CONDBR:
+ for (u32 i = 0; i < in->nopnds; ++i)
+ n += count_operand_phys_uses(&in->opnds[i], r);
+ break;
+ case IR_RET: {
+ IRRetAux* aux = (IRRetAux*)in->extra.aux;
+ if (aux && aux->present) n += abi_uses_phys_reg(&aux->val, r);
+ break;
+ }
+ case IR_SCOPE_BEGIN: {
+ IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
+ if (aux) n += count_operand_phys_uses(&aux->desc.cond, r);
+ break;
+ }
+ case IR_ALLOCA:
+ if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r);
+ break;
+ case IR_VA_START:
+ case IR_VA_END:
+ if (in->nopnds >= 1) n += count_operand_phys_uses(&in->opnds[0], r);
+ break;
+ case IR_ATOMIC_STORE:
+ if (in->nopnds >= 1) n += count_operand_phys_uses(&in->opnds[0], r);
+ if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r);
+ break;
+ case IR_ATOMIC_RMW:
+ if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r);
+ if (in->nopnds >= 3) n += count_operand_phys_uses(&in->opnds[2], r);
+ break;
+ case IR_ATOMIC_CAS:
+ if (in->nopnds >= 3) n += count_operand_phys_uses(&in->opnds[2], r);
+ if (in->nopnds >= 4) n += count_operand_phys_uses(&in->opnds[3], r);
+ if (in->nopnds >= 5) n += count_operand_phys_uses(&in->opnds[4], r);
+ break;
+ case IR_ASM_BLOCK: {
+ IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
+ if (!aux) break;
+ for (u32 i = 0; i < aux->nin; ++i)
+ n += count_operand_phys_uses(&aux->in_ops[i], r);
+ break;
+ }
+ case IR_INTRINSIC: {
+ IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
+ if (!aux) break;
+ for (u32 i = 0; i < aux->narg; ++i)
+ n += count_operand_phys_uses(&aux->args[i], r);
+ break;
+ }
+ default:
+ break;
+ }
+ return n;
+}
+
+static int abi_defines_phys_reg(const CGABIValue* v, const Operand* r) {
+ int n = 0;
+ if (!v) return 0;
+ if (same_phys_reg(&v->storage, r)) ++n;
+ for (u32 i = 0; i < v->nparts; ++i)
+ if (same_phys_reg(&v->parts[i].op, r)) ++n;
+ return n;
+}
+
+static int inst_defines_phys_reg(const Inst* in, const Operand* r) {
+ if (!r || r->kind != OPK_REG) return 0;
+ switch ((IROp)in->op) {
+ case IR_LOAD_IMM:
+ case IR_LOAD_CONST:
+ case IR_COPY:
+ case IR_LOAD:
+ case IR_ADDR_OF:
+ case IR_TLS_ADDR_OF:
+ case IR_BITFIELD_LOAD:
+ case IR_BINOP:
+ case IR_UNOP:
+ case IR_CMP:
+ case IR_CONVERT:
+ case IR_ALLOCA:
+ case IR_VA_ARG:
+ case IR_ATOMIC_LOAD:
+ case IR_ATOMIC_RMW:
+ return in->nopnds >= 1 && same_phys_reg(&in->opnds[0], r);
+ case IR_CALL: {
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ return aux && abi_defines_phys_reg(&aux->desc.ret, r);
+ }
+ case IR_ATOMIC_CAS:
+ return (in->nopnds >= 1 && same_phys_reg(&in->opnds[0], r)) ||
+ (in->nopnds >= 2 && same_phys_reg(&in->opnds[1], r));
+ case IR_ASM_BLOCK: {
+ IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
+ if (!aux) return 0;
+ for (u32 i = 0; i < aux->nout; ++i)
+ if (same_phys_reg(&aux->out_ops[i], r)) return 1;
+ return 0;
+ }
+ case IR_INTRINSIC: {
+ IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
+ if (!aux) return 0;
+ for (u32 i = 0; i < aux->ndst; ++i)
+ if (same_phys_reg(&aux->dsts[i], r)) return 1;
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+static int block_live_out_has_phys_reg(Func* f, const Block* bl,
+ const Operand* r) {
+ if (!f->val_info || !bl->live_out || !r || r->kind != OPK_REG) return 0;
+ for (Val v = 1; v < f->nvals; ++v) {
+ if (!bit_has(bl->live_out, v)) continue;
+ if (f->val_info[v].alloc_kind != OPT_ALLOC_HARD) continue;
+ if (f->val_info[v].cls == r->cls && f->val_info[v].hard_reg == r->v.reg)
+ return 1;
+ }
+ return 0;
+}
+
+static int copy_fold_slot(const Inst* in, u32 idx) {
+ switch ((IROp)in->op) {
+ case IR_COPY:
+ case IR_CONVERT:
+ case IR_UNOP:
+ return idx == 1;
+ case IR_BINOP:
+ case IR_CMP:
+ return idx == 1 || idx == 2;
+ case IR_CMP_BRANCH:
+ return idx == 0 || idx == 1;
+ case IR_CONDBR:
+ return idx == 0;
+ case IR_STORE:
+ return idx == 1;
+ case IR_ALLOCA:
+ return idx == 1;
+ case IR_ATOMIC_RMW:
+ return idx == 2;
+ default:
+ return 0;
+ }
+}
+
+static int imm_fold_slot(const Inst* in, u32 idx) {
+ switch ((IROp)in->op) {
+ case IR_BINOP:
+ case IR_CMP:
+ return idx == 1 || idx == 2;
+ case IR_CMP_BRANCH:
+ return idx == 0 || idx == 1;
+ default:
+ return 0;
+ }
+}
+
+static int identical_convert_pair(const Inst* a, const Inst* b) {
+ if ((IROp)a->op != IR_CONVERT || (IROp)b->op != IR_CONVERT) return 0;
+ if (a->nopnds < 2 || b->nopnds < 2) return 0;
+ if (a->extra.imm != b->extra.imm) return 0;
+ return a->opnds[1].type == b->opnds[1].type &&
+ a->opnds[0].type == b->opnds[0].type;
+}
+
+static int find_single_direct_use(Func* f, Block* bl, u32 def_i,
+ const Operand* def, const Operand* src,
+ int check_src, int imm_fold, int conv_fold,
+ u32* use_i_out, u32* op_i_out) {
+ int total_uses = 0;
+ int source_clobbered = 0;
+ int killed = 0;
+ int found = 0;
+ u32 found_i = 0;
+ u32 found_op = 0;
+
+ for (u32 i = def_i + 1u; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ int uses = inst_uses_phys_reg(in, def);
+ if (uses) {
+ if (check_src && source_clobbered) return 0;
+ total_uses += uses;
+ if (total_uses > 1) return 0;
+ for (u32 oi = 0; oi < in->nopnds; ++oi) {
+ int ok = conv_fold
+ ? (oi == 1 &&
+ identical_convert_pair(&bl->insts[def_i], in))
+ : (imm_fold ? imm_fold_slot(in, oi)
+ : copy_fold_slot(in, oi));
+ if (!ok) continue;
+ if (!same_phys_reg(&in->opnds[oi], def)) continue;
+ found_i = i;
+ found_op = oi;
+ found = 1;
+ }
+ }
+
+ if ((IROp)in->op == IR_CALL) {
+ if (check_src) source_clobbered = 1;
+ killed = 1;
+ break;
+ }
+ if (check_src && src && inst_defines_phys_reg(in, src))
+ source_clobbered = 1;
+ if (inst_defines_phys_reg(in, def)) {
+ killed = 1;
+ break;
+ }
+ }
+
+ if (total_uses != 1) return 0;
+ if (!found) return 0;
+ if (!killed && block_live_out_has_phys_reg(f, bl, def)) return 0;
+ *use_i_out = found_i;
+ *op_i_out = found_op;
+ return 1;
+}
+
+static void opt_combine_fold_block(Func* f, Block* bl) {
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ u32 use_i = 0;
+ u32 op_i = 0;
+ if ((IROp)in->op == IR_COPY && in->nopnds >= 2 &&
+ in->opnds[0].kind == OPK_REG && in->opnds[1].kind == OPK_REG &&
+ !same_phys_reg(&in->opnds[0], &in->opnds[1]) &&
+ find_single_direct_use(f, bl, i, &in->opnds[0], &in->opnds[1], 1, 0,
+ 0, &use_i, &op_i)) {
+ bl->insts[use_i].opnds[op_i] = in->opnds[1];
+ continue;
+ }
+
+ if ((IROp)in->op == IR_LOAD_IMM && in->nopnds >= 1 &&
+ in->opnds[0].kind == OPK_REG &&
+ find_single_direct_use(f, bl, i, &in->opnds[0], NULL, 0, 1, 0,
+ &use_i, &op_i)) {
+ Operand imm = in->opnds[0];
+ imm.kind = OPK_IMM;
+ imm.v.imm = in->extra.imm;
+ bl->insts[use_i].opnds[op_i] = imm;
+ continue;
+ }
+
+ if ((IROp)in->op == IR_CONVERT && in->nopnds >= 2 &&
+ in->opnds[0].kind == OPK_REG && in->opnds[1].kind == OPK_REG &&
+ find_single_direct_use(f, bl, i, &in->opnds[0], &in->opnds[1], 1, 0,
+ 1, &use_i, &op_i)) {
+ bl->insts[use_i].opnds[op_i] = in->opnds[1];
+ }
+ }
+}
+
void opt_combine(Func* f) {
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
+ opt_combine_fold_block(f, bl);
u32 w = 0;
for (u32 i = 0; i < bl->ninsts; ++i) {
Inst* in = &bl->insts[i];
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -211,6 +211,23 @@ static Inst* emit_binop(Func* f, u32 b, Val dst, Val a, Val c,
return in;
}
+static Inst* emit_convert(Func* f, u32 b, Val dst, Val src, CfreeCgTypeId ty,
+ ConvKind k) {
+ Inst* in = ir_emit(f, b, IR_CONVERT);
+ in->opnds = arena_array(f->arena, Operand, 2);
+ in->opnds[0] = op_reg_(dst, ty);
+ in->opnds[1] = op_reg_(src, ty);
+ in->nopnds = 2;
+ in->def = dst;
+ in->type = ty;
+ in->extra.imm = k;
+ if (dst < f->nvals) {
+ f->val_def_block[dst] = b;
+ f->val_def_inst[dst] = f->blocks[b].ninsts - 1u;
+ }
+ return in;
+}
+
static Inst* emit_load_local(Func* f, u32 b, Val dst, FrameSlot fs,
CfreeCgTypeId ty, u16 flags) {
Inst* in = ir_emit(f, b, IR_LOAD);
@@ -1194,6 +1211,233 @@ static void opt_combine_spill_peeps(void) {
tc_fini(&tc);
}
+static void opt_combine_single_use_copy_and_imm(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ f->opt_rewritten = 1;
+
+ Inst* li = ir_emit(f, f->entry, IR_LOAD_IMM);
+ li->opnds = arena_array(f->arena, Operand, 1);
+ li->opnds[0] = op_reg_(9, tc.i32);
+ li->nopnds = 1;
+ li->extra.imm = 7;
+
+ Inst* cp = ir_emit(f, f->entry, IR_COPY);
+ cp->opnds = arena_array(f->arena, Operand, 2);
+ cp->opnds[0] = op_reg_(10, tc.i32);
+ cp->opnds[1] = op_reg_(11, tc.i32);
+ cp->nopnds = 2;
+
+ Inst* add = ir_emit(f, f->entry, IR_BINOP);
+ add->opnds = arena_array(f->arena, Operand, 3);
+ add->opnds[0] = op_reg_(19, tc.i32);
+ add->opnds[1] = op_reg_(9, tc.i32);
+ add->opnds[2] = op_reg_(10, tc.i32);
+ add->nopnds = 3;
+ add->extra.imm = BO_IADD;
+ emit_ret_val(f, f->entry, 19, tc.i32);
+
+ opt_combine(f);
+ add = &f->blocks[f->entry].insts[2];
+ EXPECT(add->opnds[1].kind == OPK_IMM && add->opnds[1].v.imm == 7,
+ "one-use load_imm should fold into binop immediate operand");
+ EXPECT(add->opnds[2].kind == OPK_REG && add->opnds[2].v.reg == 11,
+ "one-use physical copy should fold into binop register operand");
+
+ opt_dce(f);
+ EXPECT(count_op(f, IR_LOAD_IMM) == 0,
+ "folded load_imm should be removed by post-rewrite DCE");
+ EXPECT(count_op(f, IR_COPY) == 0,
+ "folded physical copy should be removed by post-rewrite DCE");
+
+ Func* g = new_func(&tc);
+ g->opt_rewritten = 1;
+ li = ir_emit(g, g->entry, IR_LOAD_IMM);
+ li->opnds = arena_array(g->arena, Operand, 1);
+ li->opnds[0] = op_reg_(9, tc.i32);
+ li->nopnds = 1;
+ li->extra.imm = 0;
+
+ Inst* cmp = ir_emit(g, g->entry, IR_CMP);
+ cmp->opnds = arena_array(g->arena, Operand, 3);
+ cmp->opnds[0] = op_reg_(19, tc.i32);
+ cmp->opnds[1] = op_reg_(10, tc.i32);
+ cmp->opnds[2] = op_reg_(9, tc.i32);
+ cmp->nopnds = 3;
+ cmp->extra.imm = CMP_EQ;
+ emit_ret_val(g, g->entry, 19, tc.i32);
+
+ opt_combine(g);
+ cmp = &g->blocks[g->entry].insts[1];
+ EXPECT(cmp->opnds[2].kind == OPK_IMM && cmp->opnds[2].v.imm == 0,
+ "one-use load_imm should fold into cmp immediate operand");
+
+ Func* h = new_func(&tc);
+ h->opt_rewritten = 1;
+ u32 taken = ir_block_new(h);
+ u32 fallthrough = ir_block_new(h);
+ li = ir_emit(h, h->entry, IR_LOAD_IMM);
+ li->opnds = arena_array(h->arena, Operand, 1);
+ li->opnds[0] = op_reg_(9, tc.i32);
+ li->nopnds = 1;
+ li->extra.imm = 1;
+
+ Inst* br = ir_emit(h, h->entry, IR_CMP_BRANCH);
+ br->opnds = arena_array(h->arena, Operand, 2);
+ br->opnds[0] = op_reg_(9, tc.i32);
+ br->opnds[1] = op_reg_(10, tc.i32);
+ br->nopnds = 2;
+ br->extra.imm = CMP_NE;
+ h->blocks[h->entry].succ[0] = taken;
+ h->blocks[h->entry].succ[1] = fallthrough;
+ h->blocks[h->entry].nsucc = 2;
+
+ opt_combine(h);
+ br = &h->blocks[h->entry].insts[1];
+ EXPECT(br->opnds[0].kind == OPK_IMM && br->opnds[0].v.imm == 1,
+ "one-use load_imm should fold into cmp_branch immediate operand");
+ tc_fini(&tc);
+}
+
+static void opt_combine_keeps_unsafe_and_multiuse_defs(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ f->opt_rewritten = 1;
+
+ Inst* cp = ir_emit(f, f->entry, IR_COPY);
+ cp->opnds = arena_array(f->arena, Operand, 2);
+ cp->opnds[0] = op_reg_(10, tc.i32);
+ cp->opnds[1] = op_reg_(11, tc.i32);
+ cp->nopnds = 2;
+
+ Inst* clobber = ir_emit(f, f->entry, IR_LOAD_IMM);
+ clobber->opnds = arena_array(f->arena, Operand, 1);
+ clobber->opnds[0] = op_reg_(11, tc.i32);
+ clobber->nopnds = 1;
+ clobber->extra.imm = 4;
+
+ Inst* add = ir_emit(f, f->entry, IR_BINOP);
+ add->opnds = arena_array(f->arena, Operand, 3);
+ add->opnds[0] = op_reg_(19, tc.i32);
+ add->opnds[1] = op_reg_(10, tc.i32);
+ add->opnds[2] = op_reg_(12, tc.i32);
+ add->nopnds = 3;
+ add->extra.imm = BO_IADD;
+ emit_ret_val(f, f->entry, 19, tc.i32);
+
+ opt_combine(f);
+ add = &f->blocks[f->entry].insts[2];
+ EXPECT(add->opnds[1].kind == OPK_REG && add->opnds[1].v.reg == 10,
+ "copy fold should not cross a clobber of the source register");
+
+ Func* call_f = new_func(&tc);
+ call_f->opt_rewritten = 1;
+ cp = ir_emit(call_f, call_f->entry, IR_COPY);
+ cp->opnds = arena_array(call_f->arena, Operand, 2);
+ cp->opnds[0] = op_reg_(10, tc.i32);
+ cp->opnds[1] = op_reg_(11, tc.i32);
+ cp->nopnds = 2;
+ emit_call_void(call_f, call_f->entry);
+
+ add = ir_emit(call_f, call_f->entry, IR_BINOP);
+ add->opnds = arena_array(call_f->arena, Operand, 3);
+ add->opnds[0] = op_reg_(19, tc.i32);
+ add->opnds[1] = op_reg_(10, tc.i32);
+ add->opnds[2] = op_reg_(12, tc.i32);
+ add->nopnds = 3;
+ add->extra.imm = BO_IADD;
+ emit_ret_val(call_f, call_f->entry, 19, tc.i32);
+
+ opt_combine(call_f);
+ add = &call_f->blocks[call_f->entry].insts[2];
+ EXPECT(add->opnds[1].kind == OPK_REG && add->opnds[1].v.reg == 10,
+ "copy fold should not cross a call clobber barrier");
+
+ Func* g = new_func(&tc);
+ g->opt_rewritten = 1;
+ Inst* li = ir_emit(g, g->entry, IR_LOAD_IMM);
+ li->opnds = arena_array(g->arena, Operand, 1);
+ li->opnds[0] = op_reg_(9, tc.i32);
+ li->nopnds = 1;
+ li->extra.imm = 7;
+
+ add = ir_emit(g, g->entry, IR_BINOP);
+ add->opnds = arena_array(g->arena, Operand, 3);
+ add->opnds[0] = op_reg_(19, tc.i32);
+ add->opnds[1] = op_reg_(9, tc.i32);
+ add->opnds[2] = op_reg_(12, tc.i32);
+ add->nopnds = 3;
+ add->extra.imm = BO_IADD;
+
+ Inst* cmp = ir_emit(g, g->entry, IR_CMP);
+ cmp->opnds = arena_array(g->arena, Operand, 3);
+ cmp->opnds[0] = op_reg_(20, tc.i32);
+ cmp->opnds[1] = op_reg_(9, tc.i32);
+ cmp->opnds[2] = op_reg_(13, tc.i32);
+ cmp->nopnds = 3;
+ cmp->extra.imm = CMP_EQ;
+ emit_ret_val(g, g->entry, 19, tc.i32);
+
+ opt_combine(g);
+ add = &g->blocks[g->entry].insts[1];
+ EXPECT(add->opnds[1].kind == OPK_REG && add->opnds[1].v.reg == 9,
+ "multi-use load_imm should not fold into the first user");
+ tc_fini(&tc);
+}
+
+static void opt_combine_copy_chains_and_convert_pairs(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ f->opt_rewritten = 1;
+
+ Inst* cp = ir_emit(f, f->entry, IR_COPY);
+ cp->opnds = arena_array(f->arena, Operand, 2);
+ cp->opnds[0] = op_reg_(10, tc.i32);
+ cp->opnds[1] = op_reg_(11, tc.i32);
+ cp->nopnds = 2;
+
+ cp = ir_emit(f, f->entry, IR_COPY);
+ cp->opnds = arena_array(f->arena, Operand, 2);
+ cp->opnds[0] = op_reg_(12, tc.i32);
+ cp->opnds[1] = op_reg_(10, tc.i32);
+ cp->nopnds = 2;
+
+ Inst* add = ir_emit(f, f->entry, IR_BINOP);
+ add->opnds = arena_array(f->arena, Operand, 3);
+ add->opnds[0] = op_reg_(19, tc.i32);
+ add->opnds[1] = op_reg_(12, tc.i32);
+ add->opnds[2] = op_reg_(13, tc.i32);
+ add->nopnds = 3;
+ add->extra.imm = BO_IADD;
+ emit_ret_val(f, f->entry, 19, tc.i32);
+
+ opt_combine(f);
+ add = &f->blocks[f->entry].insts[2];
+ EXPECT(add->opnds[1].kind == OPK_REG && add->opnds[1].v.reg == 11,
+ "single-use physical copy chains should collapse before DCE");
+ opt_dce(f);
+ EXPECT(count_op(f, IR_COPY) == 0,
+ "collapsed copy chain should be removed by post-rewrite DCE");
+
+ Func* g = new_func(&tc);
+ g->opt_rewritten = 1;
+ emit_convert(g, g->entry, 10, 11, tc.i32, CV_BITCAST);
+ emit_convert(g, g->entry, 12, 10, tc.i32, CV_BITCAST);
+ emit_ret_val(g, g->entry, 12, tc.i32);
+
+ opt_combine(g);
+ Inst* cv = &g->blocks[g->entry].insts[1];
+ EXPECT(cv->opnds[1].kind == OPK_REG && cv->opnds[1].v.reg == 11,
+ "identical one-use convert pairs should fold to the original source");
+ opt_dce(g);
+ EXPECT(count_op(g, IR_CONVERT) == 1,
+ "folded convert producer should be removed by post-rewrite DCE");
+ tc_fini(&tc);
+}
+
static void opt_dce_physical_dead_defs(void) {
TestCtx tc;
tc_init(&tc);
@@ -1377,6 +1621,9 @@ int main(void) {
opt_inline_asm_tied_fixed_regs();
opt_post_rewrite_dce();
opt_combine_spill_peeps();
+ opt_combine_single_use_copy_and_imm();
+ opt_combine_keeps_unsafe_and_multiuse_defs();
+ opt_combine_copy_chains_and_convert_pairs();
opt_dce_physical_dead_defs();
opt_dead_def_keeps_observable_loads();
opt_dead_def_elim_test();