kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 0849cc998838c260588811ad254b1aa8a5a8823c
parent f37c2ae1c3c2cb762a390861aaa6994d830f66ef
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 14 May 2026 14:15:51 -0700

opt: fold safe single-use post-RA operands

Diffstat:
Mdoc/OPT1.md | 10+++++-----
Msrc/opt/pass_lower.c | 316+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/opt/opt_test.c | 247+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 568 insertions(+), 5 deletions(-)

diff --git a/doc/OPT1.md b/doc/OPT1.md @@ -97,14 +97,14 @@ substitute a behaviorally similar shortcut without updating both documents. ## Remaining Todos - Finish the general safe single-use fold portion of `opt_combine`: - - [ ] Substitute one-use physical copies into users when the rewritten IR + - [x] Substitute one-use physical copies into users when the rewritten IR form is guaranteed target-legal. - - [ ] Fold one-use integer `load_imm` defs into `binop`, `cmp`, and + - [x] Fold one-use integer `load_imm` defs into `binop`, `cmp`, and `cmp_branch` operands accepted by `CGTarget`. - - [ ] Collapse physical copy chains before post-RA DCE. - - [ ] Fold redundant `IR_CONVERT` chains, starting with identical + - [x] Collapse physical copy chains before post-RA DCE. + - [x] Fold redundant `IR_CONVERT` chains, starting with identical conversion pairs. - - [ ] Keep branch-shape cleanup out of `-O1` unless it is purely local and + - [x] Keep branch-shape cleanup out of `-O1` unless it is purely local and does not require CFG/jump optimization. - Continue tightening post-rewrite DCE: - [ ] Model call clobbers and hard-reg call arguments explicitly. diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c @@ -946,9 +946,325 @@ static int same_spill_slot_and_size(Func* f, const Inst* a, const Inst* b) { a->extra.mem.addr_space == b->extra.mem.addr_space; } +static int same_phys_reg(const Operand* a, const Operand* b) { + return a && b && a->kind == OPK_REG && b->kind == OPK_REG && + a->cls == b->cls && a->v.reg == b->v.reg; +} + +static int operand_uses_phys_reg(const Operand* op, const Operand* r) { + if (!op || !r || r->kind != OPK_REG) return 0; + if (op->kind == OPK_REG) + return op->cls == r->cls && op->v.reg == r->v.reg; + if (op->kind == OPK_INDIRECT) + return r->cls == RC_INT && op->v.ind.base == r->v.reg; + return 0; +} + +static int count_operand_phys_uses(const Operand* op, const Operand* r) { + return operand_uses_phys_reg(op, r) ? 1 : 0; +} + +static int abi_uses_phys_reg(const CGABIValue* v, const Operand* r) { + int n = 0; + if (!v) return 0; + n += count_operand_phys_uses(&v->storage, r); + for (u32 i = 0; i < v->nparts; ++i) + n += count_operand_phys_uses(&v->parts[i].op, r); + return n; +} + +static int inst_uses_phys_reg(const Inst* in, const Operand* r) { + int n = 0; + switch ((IROp)in->op) { + case IR_COPY: + case IR_CONVERT: + case IR_UNOP: + case IR_VA_ARG: + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + break; + case IR_LOAD: + case IR_ADDR_OF: + case IR_BITFIELD_LOAD: + case IR_ATOMIC_LOAD: + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + break; + case IR_BINOP: + case IR_CMP: + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + if (in->nopnds >= 3) n += count_operand_phys_uses(&in->opnds[2], r); + break; + case IR_STORE: + case IR_AGG_COPY: + case IR_AGG_SET: + case IR_BITFIELD_STORE: + case IR_VA_COPY: + if (in->nopnds >= 1) n += count_operand_phys_uses(&in->opnds[0], r); + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + break; + case IR_CALL: { + IRCallAux* aux = (IRCallAux*)in->extra.aux; + if (!aux) break; + n += count_operand_phys_uses(&aux->desc.callee, r); + for (u32 i = 0; i < aux->desc.nargs; ++i) + n += abi_uses_phys_reg(&aux->desc.args[i], r); + break; + } + case IR_CMP_BRANCH: + case IR_CONDBR: + for (u32 i = 0; i < in->nopnds; ++i) + n += count_operand_phys_uses(&in->opnds[i], r); + break; + case IR_RET: { + IRRetAux* aux = (IRRetAux*)in->extra.aux; + if (aux && aux->present) n += abi_uses_phys_reg(&aux->val, r); + break; + } + case IR_SCOPE_BEGIN: { + IRScopeAux* aux = (IRScopeAux*)in->extra.aux; + if (aux) n += count_operand_phys_uses(&aux->desc.cond, r); + break; + } + case IR_ALLOCA: + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + break; + case IR_VA_START: + case IR_VA_END: + if (in->nopnds >= 1) n += count_operand_phys_uses(&in->opnds[0], r); + break; + case IR_ATOMIC_STORE: + if (in->nopnds >= 1) n += count_operand_phys_uses(&in->opnds[0], r); + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + break; + case IR_ATOMIC_RMW: + if (in->nopnds >= 2) n += count_operand_phys_uses(&in->opnds[1], r); + if (in->nopnds >= 3) n += count_operand_phys_uses(&in->opnds[2], r); + break; + case IR_ATOMIC_CAS: + if (in->nopnds >= 3) n += count_operand_phys_uses(&in->opnds[2], r); + if (in->nopnds >= 4) n += count_operand_phys_uses(&in->opnds[3], r); + if (in->nopnds >= 5) n += count_operand_phys_uses(&in->opnds[4], r); + break; + case IR_ASM_BLOCK: { + IRAsmAux* aux = (IRAsmAux*)in->extra.aux; + if (!aux) break; + for (u32 i = 0; i < aux->nin; ++i) + n += count_operand_phys_uses(&aux->in_ops[i], r); + break; + } + case IR_INTRINSIC: { + IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; + if (!aux) break; + for (u32 i = 0; i < aux->narg; ++i) + n += count_operand_phys_uses(&aux->args[i], r); + break; + } + default: + break; + } + return n; +} + +static int abi_defines_phys_reg(const CGABIValue* v, const Operand* r) { + int n = 0; + if (!v) return 0; + if (same_phys_reg(&v->storage, r)) ++n; + for (u32 i = 0; i < v->nparts; ++i) + if (same_phys_reg(&v->parts[i].op, r)) ++n; + return n; +} + +static int inst_defines_phys_reg(const Inst* in, const Operand* r) { + if (!r || r->kind != OPK_REG) return 0; + switch ((IROp)in->op) { + case IR_LOAD_IMM: + case IR_LOAD_CONST: + case IR_COPY: + case IR_LOAD: + case IR_ADDR_OF: + case IR_TLS_ADDR_OF: + case IR_BITFIELD_LOAD: + case IR_BINOP: + case IR_UNOP: + case IR_CMP: + case IR_CONVERT: + case IR_ALLOCA: + case IR_VA_ARG: + case IR_ATOMIC_LOAD: + case IR_ATOMIC_RMW: + return in->nopnds >= 1 && same_phys_reg(&in->opnds[0], r); + case IR_CALL: { + IRCallAux* aux = (IRCallAux*)in->extra.aux; + return aux && abi_defines_phys_reg(&aux->desc.ret, r); + } + case IR_ATOMIC_CAS: + return (in->nopnds >= 1 && same_phys_reg(&in->opnds[0], r)) || + (in->nopnds >= 2 && same_phys_reg(&in->opnds[1], r)); + case IR_ASM_BLOCK: { + IRAsmAux* aux = (IRAsmAux*)in->extra.aux; + if (!aux) return 0; + for (u32 i = 0; i < aux->nout; ++i) + if (same_phys_reg(&aux->out_ops[i], r)) return 1; + return 0; + } + case IR_INTRINSIC: { + IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; + if (!aux) return 0; + for (u32 i = 0; i < aux->ndst; ++i) + if (same_phys_reg(&aux->dsts[i], r)) return 1; + return 0; + } + default: + return 0; + } +} + +static int block_live_out_has_phys_reg(Func* f, const Block* bl, + const Operand* r) { + if (!f->val_info || !bl->live_out || !r || r->kind != OPK_REG) return 0; + for (Val v = 1; v < f->nvals; ++v) { + if (!bit_has(bl->live_out, v)) continue; + if (f->val_info[v].alloc_kind != OPT_ALLOC_HARD) continue; + if (f->val_info[v].cls == r->cls && f->val_info[v].hard_reg == r->v.reg) + return 1; + } + return 0; +} + +static int copy_fold_slot(const Inst* in, u32 idx) { + switch ((IROp)in->op) { + case IR_COPY: + case IR_CONVERT: + case IR_UNOP: + return idx == 1; + case IR_BINOP: + case IR_CMP: + return idx == 1 || idx == 2; + case IR_CMP_BRANCH: + return idx == 0 || idx == 1; + case IR_CONDBR: + return idx == 0; + case IR_STORE: + return idx == 1; + case IR_ALLOCA: + return idx == 1; + case IR_ATOMIC_RMW: + return idx == 2; + default: + return 0; + } +} + +static int imm_fold_slot(const Inst* in, u32 idx) { + switch ((IROp)in->op) { + case IR_BINOP: + case IR_CMP: + return idx == 1 || idx == 2; + case IR_CMP_BRANCH: + return idx == 0 || idx == 1; + default: + return 0; + } +} + +static int identical_convert_pair(const Inst* a, const Inst* b) { + if ((IROp)a->op != IR_CONVERT || (IROp)b->op != IR_CONVERT) return 0; + if (a->nopnds < 2 || b->nopnds < 2) return 0; + if (a->extra.imm != b->extra.imm) return 0; + return a->opnds[1].type == b->opnds[1].type && + a->opnds[0].type == b->opnds[0].type; +} + +static int find_single_direct_use(Func* f, Block* bl, u32 def_i, + const Operand* def, const Operand* src, + int check_src, int imm_fold, int conv_fold, + u32* use_i_out, u32* op_i_out) { + int total_uses = 0; + int source_clobbered = 0; + int killed = 0; + int found = 0; + u32 found_i = 0; + u32 found_op = 0; + + for (u32 i = def_i + 1u; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + int uses = inst_uses_phys_reg(in, def); + if (uses) { + if (check_src && source_clobbered) return 0; + total_uses += uses; + if (total_uses > 1) return 0; + for (u32 oi = 0; oi < in->nopnds; ++oi) { + int ok = conv_fold + ? (oi == 1 && + identical_convert_pair(&bl->insts[def_i], in)) + : (imm_fold ? imm_fold_slot(in, oi) + : copy_fold_slot(in, oi)); + if (!ok) continue; + if (!same_phys_reg(&in->opnds[oi], def)) continue; + found_i = i; + found_op = oi; + found = 1; + } + } + + if ((IROp)in->op == IR_CALL) { + if (check_src) source_clobbered = 1; + killed = 1; + break; + } + if (check_src && src && inst_defines_phys_reg(in, src)) + source_clobbered = 1; + if (inst_defines_phys_reg(in, def)) { + killed = 1; + break; + } + } + + if (total_uses != 1) return 0; + if (!found) return 0; + if (!killed && block_live_out_has_phys_reg(f, bl, def)) return 0; + *use_i_out = found_i; + *op_i_out = found_op; + return 1; +} + +static void opt_combine_fold_block(Func* f, Block* bl) { + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + u32 use_i = 0; + u32 op_i = 0; + if ((IROp)in->op == IR_COPY && in->nopnds >= 2 && + in->opnds[0].kind == OPK_REG && in->opnds[1].kind == OPK_REG && + !same_phys_reg(&in->opnds[0], &in->opnds[1]) && + find_single_direct_use(f, bl, i, &in->opnds[0], &in->opnds[1], 1, 0, + 0, &use_i, &op_i)) { + bl->insts[use_i].opnds[op_i] = in->opnds[1]; + continue; + } + + if ((IROp)in->op == IR_LOAD_IMM && in->nopnds >= 1 && + in->opnds[0].kind == OPK_REG && + find_single_direct_use(f, bl, i, &in->opnds[0], NULL, 0, 1, 0, + &use_i, &op_i)) { + Operand imm = in->opnds[0]; + imm.kind = OPK_IMM; + imm.v.imm = in->extra.imm; + bl->insts[use_i].opnds[op_i] = imm; + continue; + } + + if ((IROp)in->op == IR_CONVERT && in->nopnds >= 2 && + in->opnds[0].kind == OPK_REG && in->opnds[1].kind == OPK_REG && + find_single_direct_use(f, bl, i, &in->opnds[0], &in->opnds[1], 1, 0, + 1, &use_i, &op_i)) { + bl->insts[use_i].opnds[op_i] = in->opnds[1]; + } + } +} + void opt_combine(Func* f) { for (u32 b = 0; b < f->nblocks; ++b) { Block* bl = &f->blocks[b]; + opt_combine_fold_block(f, bl); u32 w = 0; for (u32 i = 0; i < bl->ninsts; ++i) { Inst* in = &bl->insts[i]; diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -211,6 +211,23 @@ static Inst* emit_binop(Func* f, u32 b, Val dst, Val a, Val c, return in; } +static Inst* emit_convert(Func* f, u32 b, Val dst, Val src, CfreeCgTypeId ty, + ConvKind k) { + Inst* in = ir_emit(f, b, IR_CONVERT); + in->opnds = arena_array(f->arena, Operand, 2); + in->opnds[0] = op_reg_(dst, ty); + in->opnds[1] = op_reg_(src, ty); + in->nopnds = 2; + in->def = dst; + in->type = ty; + in->extra.imm = k; + if (dst < f->nvals) { + f->val_def_block[dst] = b; + f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; + } + return in; +} + static Inst* emit_load_local(Func* f, u32 b, Val dst, FrameSlot fs, CfreeCgTypeId ty, u16 flags) { Inst* in = ir_emit(f, b, IR_LOAD); @@ -1194,6 +1211,233 @@ static void opt_combine_spill_peeps(void) { tc_fini(&tc); } +static void opt_combine_single_use_copy_and_imm(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + f->opt_rewritten = 1; + + Inst* li = ir_emit(f, f->entry, IR_LOAD_IMM); + li->opnds = arena_array(f->arena, Operand, 1); + li->opnds[0] = op_reg_(9, tc.i32); + li->nopnds = 1; + li->extra.imm = 7; + + Inst* cp = ir_emit(f, f->entry, IR_COPY); + cp->opnds = arena_array(f->arena, Operand, 2); + cp->opnds[0] = op_reg_(10, tc.i32); + cp->opnds[1] = op_reg_(11, tc.i32); + cp->nopnds = 2; + + Inst* add = ir_emit(f, f->entry, IR_BINOP); + add->opnds = arena_array(f->arena, Operand, 3); + add->opnds[0] = op_reg_(19, tc.i32); + add->opnds[1] = op_reg_(9, tc.i32); + add->opnds[2] = op_reg_(10, tc.i32); + add->nopnds = 3; + add->extra.imm = BO_IADD; + emit_ret_val(f, f->entry, 19, tc.i32); + + opt_combine(f); + add = &f->blocks[f->entry].insts[2]; + EXPECT(add->opnds[1].kind == OPK_IMM && add->opnds[1].v.imm == 7, + "one-use load_imm should fold into binop immediate operand"); + EXPECT(add->opnds[2].kind == OPK_REG && add->opnds[2].v.reg == 11, + "one-use physical copy should fold into binop register operand"); + + opt_dce(f); + EXPECT(count_op(f, IR_LOAD_IMM) == 0, + "folded load_imm should be removed by post-rewrite DCE"); + EXPECT(count_op(f, IR_COPY) == 0, + "folded physical copy should be removed by post-rewrite DCE"); + + Func* g = new_func(&tc); + g->opt_rewritten = 1; + li = ir_emit(g, g->entry, IR_LOAD_IMM); + li->opnds = arena_array(g->arena, Operand, 1); + li->opnds[0] = op_reg_(9, tc.i32); + li->nopnds = 1; + li->extra.imm = 0; + + Inst* cmp = ir_emit(g, g->entry, IR_CMP); + cmp->opnds = arena_array(g->arena, Operand, 3); + cmp->opnds[0] = op_reg_(19, tc.i32); + cmp->opnds[1] = op_reg_(10, tc.i32); + cmp->opnds[2] = op_reg_(9, tc.i32); + cmp->nopnds = 3; + cmp->extra.imm = CMP_EQ; + emit_ret_val(g, g->entry, 19, tc.i32); + + opt_combine(g); + cmp = &g->blocks[g->entry].insts[1]; + EXPECT(cmp->opnds[2].kind == OPK_IMM && cmp->opnds[2].v.imm == 0, + "one-use load_imm should fold into cmp immediate operand"); + + Func* h = new_func(&tc); + h->opt_rewritten = 1; + u32 taken = ir_block_new(h); + u32 fallthrough = ir_block_new(h); + li = ir_emit(h, h->entry, IR_LOAD_IMM); + li->opnds = arena_array(h->arena, Operand, 1); + li->opnds[0] = op_reg_(9, tc.i32); + li->nopnds = 1; + li->extra.imm = 1; + + Inst* br = ir_emit(h, h->entry, IR_CMP_BRANCH); + br->opnds = arena_array(h->arena, Operand, 2); + br->opnds[0] = op_reg_(9, tc.i32); + br->opnds[1] = op_reg_(10, tc.i32); + br->nopnds = 2; + br->extra.imm = CMP_NE; + h->blocks[h->entry].succ[0] = taken; + h->blocks[h->entry].succ[1] = fallthrough; + h->blocks[h->entry].nsucc = 2; + + opt_combine(h); + br = &h->blocks[h->entry].insts[1]; + EXPECT(br->opnds[0].kind == OPK_IMM && br->opnds[0].v.imm == 1, + "one-use load_imm should fold into cmp_branch immediate operand"); + tc_fini(&tc); +} + +static void opt_combine_keeps_unsafe_and_multiuse_defs(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + f->opt_rewritten = 1; + + Inst* cp = ir_emit(f, f->entry, IR_COPY); + cp->opnds = arena_array(f->arena, Operand, 2); + cp->opnds[0] = op_reg_(10, tc.i32); + cp->opnds[1] = op_reg_(11, tc.i32); + cp->nopnds = 2; + + Inst* clobber = ir_emit(f, f->entry, IR_LOAD_IMM); + clobber->opnds = arena_array(f->arena, Operand, 1); + clobber->opnds[0] = op_reg_(11, tc.i32); + clobber->nopnds = 1; + clobber->extra.imm = 4; + + Inst* add = ir_emit(f, f->entry, IR_BINOP); + add->opnds = arena_array(f->arena, Operand, 3); + add->opnds[0] = op_reg_(19, tc.i32); + add->opnds[1] = op_reg_(10, tc.i32); + add->opnds[2] = op_reg_(12, tc.i32); + add->nopnds = 3; + add->extra.imm = BO_IADD; + emit_ret_val(f, f->entry, 19, tc.i32); + + opt_combine(f); + add = &f->blocks[f->entry].insts[2]; + EXPECT(add->opnds[1].kind == OPK_REG && add->opnds[1].v.reg == 10, + "copy fold should not cross a clobber of the source register"); + + Func* call_f = new_func(&tc); + call_f->opt_rewritten = 1; + cp = ir_emit(call_f, call_f->entry, IR_COPY); + cp->opnds = arena_array(call_f->arena, Operand, 2); + cp->opnds[0] = op_reg_(10, tc.i32); + cp->opnds[1] = op_reg_(11, tc.i32); + cp->nopnds = 2; + emit_call_void(call_f, call_f->entry); + + add = ir_emit(call_f, call_f->entry, IR_BINOP); + add->opnds = arena_array(call_f->arena, Operand, 3); + add->opnds[0] = op_reg_(19, tc.i32); + add->opnds[1] = op_reg_(10, tc.i32); + add->opnds[2] = op_reg_(12, tc.i32); + add->nopnds = 3; + add->extra.imm = BO_IADD; + emit_ret_val(call_f, call_f->entry, 19, tc.i32); + + opt_combine(call_f); + add = &call_f->blocks[call_f->entry].insts[2]; + EXPECT(add->opnds[1].kind == OPK_REG && add->opnds[1].v.reg == 10, + "copy fold should not cross a call clobber barrier"); + + Func* g = new_func(&tc); + g->opt_rewritten = 1; + Inst* li = ir_emit(g, g->entry, IR_LOAD_IMM); + li->opnds = arena_array(g->arena, Operand, 1); + li->opnds[0] = op_reg_(9, tc.i32); + li->nopnds = 1; + li->extra.imm = 7; + + add = ir_emit(g, g->entry, IR_BINOP); + add->opnds = arena_array(g->arena, Operand, 3); + add->opnds[0] = op_reg_(19, tc.i32); + add->opnds[1] = op_reg_(9, tc.i32); + add->opnds[2] = op_reg_(12, tc.i32); + add->nopnds = 3; + add->extra.imm = BO_IADD; + + Inst* cmp = ir_emit(g, g->entry, IR_CMP); + cmp->opnds = arena_array(g->arena, Operand, 3); + cmp->opnds[0] = op_reg_(20, tc.i32); + cmp->opnds[1] = op_reg_(9, tc.i32); + cmp->opnds[2] = op_reg_(13, tc.i32); + cmp->nopnds = 3; + cmp->extra.imm = CMP_EQ; + emit_ret_val(g, g->entry, 19, tc.i32); + + opt_combine(g); + add = &g->blocks[g->entry].insts[1]; + EXPECT(add->opnds[1].kind == OPK_REG && add->opnds[1].v.reg == 9, + "multi-use load_imm should not fold into the first user"); + tc_fini(&tc); +} + +static void opt_combine_copy_chains_and_convert_pairs(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + f->opt_rewritten = 1; + + Inst* cp = ir_emit(f, f->entry, IR_COPY); + cp->opnds = arena_array(f->arena, Operand, 2); + cp->opnds[0] = op_reg_(10, tc.i32); + cp->opnds[1] = op_reg_(11, tc.i32); + cp->nopnds = 2; + + cp = ir_emit(f, f->entry, IR_COPY); + cp->opnds = arena_array(f->arena, Operand, 2); + cp->opnds[0] = op_reg_(12, tc.i32); + cp->opnds[1] = op_reg_(10, tc.i32); + cp->nopnds = 2; + + Inst* add = ir_emit(f, f->entry, IR_BINOP); + add->opnds = arena_array(f->arena, Operand, 3); + add->opnds[0] = op_reg_(19, tc.i32); + add->opnds[1] = op_reg_(12, tc.i32); + add->opnds[2] = op_reg_(13, tc.i32); + add->nopnds = 3; + add->extra.imm = BO_IADD; + emit_ret_val(f, f->entry, 19, tc.i32); + + opt_combine(f); + add = &f->blocks[f->entry].insts[2]; + EXPECT(add->opnds[1].kind == OPK_REG && add->opnds[1].v.reg == 11, + "single-use physical copy chains should collapse before DCE"); + opt_dce(f); + EXPECT(count_op(f, IR_COPY) == 0, + "collapsed copy chain should be removed by post-rewrite DCE"); + + Func* g = new_func(&tc); + g->opt_rewritten = 1; + emit_convert(g, g->entry, 10, 11, tc.i32, CV_BITCAST); + emit_convert(g, g->entry, 12, 10, tc.i32, CV_BITCAST); + emit_ret_val(g, g->entry, 12, tc.i32); + + opt_combine(g); + Inst* cv = &g->blocks[g->entry].insts[1]; + EXPECT(cv->opnds[1].kind == OPK_REG && cv->opnds[1].v.reg == 11, + "identical one-use convert pairs should fold to the original source"); + opt_dce(g); + EXPECT(count_op(g, IR_CONVERT) == 1, + "folded convert producer should be removed by post-rewrite DCE"); + tc_fini(&tc); +} + static void opt_dce_physical_dead_defs(void) { TestCtx tc; tc_init(&tc); @@ -1377,6 +1621,9 @@ int main(void) { opt_inline_asm_tied_fixed_regs(); opt_post_rewrite_dce(); opt_combine_spill_peeps(); + opt_combine_single_use_copy_and_imm(); + opt_combine_keeps_unsafe_and_multiuse_defs(); + opt_combine_copy_chains_and_convert_pairs(); opt_dce_physical_dead_defs(); opt_dead_def_keeps_observable_loads(); opt_dead_def_elim_test();