commit f3ab1d915b4053125a8d32af099878dd6f0accc7
parent b946ca4de6884fcfbe3285ff5e6154ebaf0aadc2
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 28 May 2026 08:12:29 -0700
opt: elide identity moves and the terminal IR_RET branch
aa_move: skip same-class same-reg moves. Catches no-op IR_CONVERT
(BITCAST, ZEXT/SEXT with src_bits>=dst_bits, FEXT/FTRUNC across-class)
when the allocator put dst and src in the same hard reg — surfaced by
the ABI return-reg coalescing, e.g. `convert opnds=[v0,v0]` after a
pointer-returning call was emitting `mov x0,x0`.
emit_block / emit_ret: skip the trailing branch-to-epilogue when the
IR_RET is the very last inst of the last emit_order block. func_end
places the epilogue label at the next position, so the branch would
jump to PC+4. The actual `ret` instruction lives in func_end's
restore-frame sequence and is unaffected.
Diffstat:
2 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c
@@ -1412,6 +1412,18 @@ static void aa_load_label_addr(NativeTarget* t, NativeLoc dst, MCLabel target) {
}
static void aa_move(NativeTarget* t, NativeLoc dst, NativeLoc src) {
+ /* Identity move elision: same-class same-reg is a no-op on aarch64
+ * regardless of width (mov xN,xN and mov wN,wN both leave the low bits
+ * untouched). Catches no-op IR_CONVERT (BITCAST, ZEXT/SEXT with
+ * src_bits>=dst_bits, FEXT/FTRUNC across-class) when the allocator put
+ * dst and src in the same hard reg — common post #2.5 return-reg
+ * coalescing, e.g. `convert opnds=[v0,v0]` after a pointer-returning call
+ * was emitting `mov x0,x0`. Cross-class (fp<->gpr) bitcasts are not
+ * elided here even when the reg numbers match — the register files are
+ * disjoint. */
+ if (dst.kind == NATIVE_LOC_REG && src.kind == NATIVE_LOC_REG &&
+ loc_is_fp(dst) == loc_is_fp(src) && dst.v.reg == src.v.reg)
+ return;
if (loc_is_fp(dst) && loc_is_fp(src)) {
aa_emit32(t->mc, aa_fmov_fp(type_size32(t, dst.type) == 8u, loc_reg(dst),
loc_reg(src)));
diff --git a/src/opt/pass_native_emit.c b/src/opt/pass_native_emit.c
@@ -30,6 +30,11 @@ typedef struct NativeEmitCtx {
u32 local_static_base;
u32 local_static_size;
u8 local_static_active;
+ /* Set by emit_block for the IR_RET that is the last inst of the last block
+ * in emit_order. emit_ret consults it to skip the trailing
+ * branch-to-epilogue: func_end places the epilogue label at the very next
+ * position, so the branch would just jump to the next 4 bytes. */
+ u8 emitting_terminal_ret;
} NativeEmitCtx;
static _Noreturn void emit_panic(NativeEmitCtx* e, SrcLoc loc,
@@ -709,7 +714,12 @@ static void emit_ret(NativeEmitCtx* e, Inst* in, const CGFuncDesc* fd) {
e->target->plan_ret(e->target, fd, values, values ? 1u : 0u, &rets, &nrets);
for (u32 i = 0; i < nrets; ++i)
write_loc(e, rets[i].dst, rets[i].src, rets[i].mem, in->loc);
- e->target->ret(e->target);
+ /* Skip the trailing branch-to-epilogue when this IR_RET is the very last
+ * inst emitted: func_end will place the epilogue label at mc->pos right
+ * after this, so the branch would jump to the next 4 bytes. The actual
+ * `ret` instruction lives in func_end's restore-frame sequence and is
+ * unaffected. */
+ if (!e->emitting_terminal_ret) e->target->ret(e->target);
}
static void emit_inst(NativeEmitCtx* e, u32 block, u32 order_index, Inst* in,
@@ -1257,8 +1267,13 @@ static void emit_block(NativeEmitCtx* e, u32 block, u32 order_index,
ensure_label(e, block, (SrcLoc){0, 0, 0}));
}
Block* bl = &e->f->blocks[block];
- for (u32 i = 0; i < bl->ninsts; ++i)
+ int is_last_block = order_index + 1u == e->f->emit_order_n;
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ e->emitting_terminal_ret = is_last_block && i + 1u == bl->ninsts &&
+ (IROp)bl->insts[i].op == IR_RET;
emit_inst(e, block, order_index, &bl->insts[i], fd);
+ }
+ e->emitting_terminal_ret = 0;
if (bl->nsucc == 1u &&
(bl->ninsts == 0 ||
!native_emit_terminates(&bl->insts[bl->ninsts - 1u]))) {