kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit f3ab1d915b4053125a8d32af099878dd6f0accc7
parent b946ca4de6884fcfbe3285ff5e6154ebaf0aadc2
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 28 May 2026 08:12:29 -0700

opt: elide identity moves and the terminal IR_RET branch

aa_move: skip same-class same-reg moves. Catches no-op IR_CONVERT
(BITCAST, ZEXT/SEXT with src_bits>=dst_bits, FEXT/FTRUNC across-class)
when the allocator put dst and src in the same hard reg — surfaced by
the ABI return-reg coalescing, e.g. `convert opnds=[v0,v0]` after a
pointer-returning call was emitting `mov x0,x0`.

emit_block / emit_ret: skip the trailing branch-to-epilogue when the
IR_RET is the very last inst of the last emit_order block. func_end
places the epilogue label at the next position, so the branch would
jump to PC+4. The actual `ret` instruction lives in func_end's
restore-frame sequence and is unaffected.

Diffstat:
Msrc/arch/aa64/native.c | 12++++++++++++
Msrc/opt/pass_native_emit.c | 19+++++++++++++++++--
2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c @@ -1412,6 +1412,18 @@ static void aa_load_label_addr(NativeTarget* t, NativeLoc dst, MCLabel target) { } static void aa_move(NativeTarget* t, NativeLoc dst, NativeLoc src) { + /* Identity move elision: same-class same-reg is a no-op on aarch64 + * regardless of width (mov xN,xN and mov wN,wN both leave the low bits + * untouched). Catches no-op IR_CONVERT (BITCAST, ZEXT/SEXT with + * src_bits>=dst_bits, FEXT/FTRUNC across-class) when the allocator put + * dst and src in the same hard reg — common post #2.5 return-reg + * coalescing, e.g. `convert opnds=[v0,v0]` after a pointer-returning call + * was emitting `mov x0,x0`. Cross-class (fp<->gpr) bitcasts are not + * elided here even when the reg numbers match — the register files are + * disjoint. */ + if (dst.kind == NATIVE_LOC_REG && src.kind == NATIVE_LOC_REG && + loc_is_fp(dst) == loc_is_fp(src) && dst.v.reg == src.v.reg) + return; if (loc_is_fp(dst) && loc_is_fp(src)) { aa_emit32(t->mc, aa_fmov_fp(type_size32(t, dst.type) == 8u, loc_reg(dst), loc_reg(src))); diff --git a/src/opt/pass_native_emit.c b/src/opt/pass_native_emit.c @@ -30,6 +30,11 @@ typedef struct NativeEmitCtx { u32 local_static_base; u32 local_static_size; u8 local_static_active; + /* Set by emit_block for the IR_RET that is the last inst of the last block + * in emit_order. emit_ret consults it to skip the trailing + * branch-to-epilogue: func_end places the epilogue label at the very next + * position, so the branch would just jump to the next 4 bytes. */ + u8 emitting_terminal_ret; } NativeEmitCtx; static _Noreturn void emit_panic(NativeEmitCtx* e, SrcLoc loc, @@ -709,7 +714,12 @@ static void emit_ret(NativeEmitCtx* e, Inst* in, const CGFuncDesc* fd) { e->target->plan_ret(e->target, fd, values, values ? 1u : 0u, &rets, &nrets); for (u32 i = 0; i < nrets; ++i) write_loc(e, rets[i].dst, rets[i].src, rets[i].mem, in->loc); - e->target->ret(e->target); + /* Skip the trailing branch-to-epilogue when this IR_RET is the very last + * inst emitted: func_end will place the epilogue label at mc->pos right + * after this, so the branch would jump to the next 4 bytes. The actual + * `ret` instruction lives in func_end's restore-frame sequence and is + * unaffected. */ + if (!e->emitting_terminal_ret) e->target->ret(e->target); } static void emit_inst(NativeEmitCtx* e, u32 block, u32 order_index, Inst* in, @@ -1257,8 +1267,13 @@ static void emit_block(NativeEmitCtx* e, u32 block, u32 order_index, ensure_label(e, block, (SrcLoc){0, 0, 0})); } Block* bl = &e->f->blocks[block]; - for (u32 i = 0; i < bl->ninsts; ++i) + int is_last_block = order_index + 1u == e->f->emit_order_n; + for (u32 i = 0; i < bl->ninsts; ++i) { + e->emitting_terminal_ret = is_last_block && i + 1u == bl->ninsts && + (IROp)bl->insts[i].op == IR_RET; emit_inst(e, block, order_index, &bl->insts[i], fd); + } + e->emitting_terminal_ret = 0; if (bl->nsucc == 1u && (bl->ninsts == 0 || !native_emit_terminates(&bl->insts[bl->ninsts - 1u]))) {