commit debe1032c9ec9285e04e7194e61cfc3411489f84
parent f3ab1d915b4053125a8d32af099878dd6f0accc7
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 28 May 2026 08:34:49 -0700
opt: hint call-arg sources to ABI dest regs + propagate through copies
Extends apply_abi_aliasing_hints so that for each IR_CALL arg whose
storage is a single OPK_REG, the source PReg is hinted to the matching
ABI arg register (mirroring the existing IR_CALL result / IR_RET value
hints to the ABI return reg). To make this fire on the common
`load -> copy -> call arg` shape the frontend emits, a second pass
propagates preferred_hard_reg backward across IR_COPY so the actual
producer of the value also prefers the ABI dest; the intervening copy
then becomes an identity move that combine elides.
aux->desc.abi is now populated by lower_call so the hint pass doesn't
re-derive ABIFuncInfo per call.
Tail calls are skipped: the tail-call shuffle in the backend already
resolves arg permutations (e.g. caller(x,y,z) -> tail target(z,x,y)),
and hinting the param PRegs across that shuffle creates cycles the
entry-bind moves can't unbreak. Variadic / sret are also skipped
because the per-class arg counter logic doesn't account for them.
Impact on binary-trees at -O1 (depth=20 hot path): ItemCheck recursive
call setups collapse `ldr x8,[x19]; mov x0,x8; bl ItemCheck` to
`ldr x0,[x19]; bl ItemCheck` (-2 insns/call * ~33M calls);
DeleteTree saves 3 insns/call; NewTreeNode saves 1 insn/call.
Total `mov x0,xN`/`mov x1,xN` count in the object: 30 -> 17.
Diffstat:
2 files changed, 111 insertions(+), 1 deletion(-)
diff --git a/src/opt/cg_ir_lower.c b/src/opt/cg_ir_lower.c
@@ -586,6 +586,12 @@ static void lower_call(CgIrLower* l, Inst* out, const CgIrInst* in) {
aux->desc.flags = src->desc.flags;
aux->desc.tail_policy = src->desc.tail_policy;
aux->desc.inline_policy = src->desc.inline_policy;
+ /* Cache the function ABI on the desc so downstream passes (e.g. the
+ * regalloc hint pass that steers call-arg sources toward their ABI dest
+ * register) don't have to re-derive it per call. abi_cg_func_info is the
+ * canonical lookup. */
+ if (l->f->c && l->f->c->abi)
+ aux->desc.abi = abi_cg_func_info(l->f->c->abi, src->desc.fn_type);
if (src->desc.nargs) {
aux->desc.args = arena_zarray(l->f->arena, OptCGABIValue, src->desc.nargs);
for (u32 i = 0; i < src->desc.nargs; ++i)
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -364,9 +364,109 @@ static void set_preg_pref_for_abivalue(Func* f, const CGABIValue* v) {
set_preg_pref_to_ret_reg(f, &v->parts[i].op);
}
+/* Soft hint: prefer a specific ABI register for `op`'s PReg. Symmetric to
+ * set_preg_pref_to_ret_reg but takes an arbitrary hint reg (the matching
+ * arg reg for the i-th call argument). Same rationale for clearing the
+ * apply_param_incoming_register_hazards forbid: the source operand of a
+ * call arg is defined in the body, after every entry-bind has run. */
+static void set_preg_pref_to_arg_reg(Func* f, const Operand* op, Reg hint) {
+ if (!op || op->kind != OPK_REG) return;
+ if (hint == REG_NONE || hint >= 32) return;
+ PReg v = (PReg)op->v.reg;
+ if (v == PREG_NONE || v == 0 || v >= opt_reg_count(f)) return;
+ u8 cls = f->preg_info[v].cls;
+ if (cls >= OPT_REG_CLASSES) return;
+ if (f->preg_info[v].tied_hard_reg >= 0) return;
+ if (f->preg_info[v].preferred_hard_reg >= 0) return;
+ f->preg_info[v].forbidden_hard_regs &= ~(1u << hint);
+ f->preg_info[v].preferred_hard_reg = (i8)hint;
+}
+
+/* For each IR_CALL arg whose source storage is a single OPK_REG, hint that
+ * PReg to the matching ABI arg register. Sequential int/fp counters mirror
+ * the per-class arg slot assignment in collect_param_incoming_regs. Skips
+ * variadic, has_sret, and indirect/aggregate args: they need per-target
+ * counter logic that hasn't been factored out of plan_call. */
+static void set_preg_pref_for_call_args(Func* f, const CGCallDesc* desc) {
+ if (!f || !desc) return;
+ /* Tail calls handle arg routing through the tail-call shuffle in the
+ * backend, which can resolve permutations (e.g. swap of caller's incoming
+ * x0/x1 into tail-call x1/x0). Hinting the arg source PRegs across that
+ * shuffle creates cycles bind_param / the entry-bind moves can't unbreak,
+ * miscompiling permute / cycle cases (toy 24, 27, 28, ...). Skip. */
+ if (desc->flags & CG_CALL_TAIL) return;
+ const ABIFuncInfo* abi = desc->abi;
+ if (!abi && f->c && f->c->abi)
+ abi = abi_cg_func_info(f->c->abi, desc->fn_type);
+ if (!abi || abi->variadic || abi->has_sret) return;
+ u32 next_int = 0, next_fp = 0;
+ for (u32 i = 0; i < desc->nargs; ++i) {
+ if (i >= abi->nparams) break;
+ const CGABIValue* a = &desc->args[i];
+ const ABIArgInfo* ai = &abi->params[i];
+ if (ai->kind == ABI_ARG_IGNORE) continue;
+ if (ai->kind == ABI_ARG_INDIRECT) {
+ ++next_int;
+ continue;
+ }
+ if (ai->kind != ABI_ARG_DIRECT || ai->nparts == 0) continue;
+ const ABIArgPart* part0 = &ai->parts[0];
+ u8 cls = part0->cls == ABI_CLASS_FP ? RC_FP : RC_INT;
+ u32* counter = cls == RC_FP ? &next_fp : &next_int;
+ if (*counter < 8u && ai->nparts == 1) {
+ Reg hint = REG_NONE;
+ if (phys_arg_reg_for_index(f, cls, *counter, &hint))
+ set_preg_pref_to_arg_reg(f, &a->storage, hint);
+ }
+ for (u16 p = 0; p < ai->nparts; ++p) {
+ u32* c = (ai->parts[p].cls == ABI_CLASS_FP) ? &next_fp : &next_int;
+ *c += 1u;
+ }
+ }
+}
+
+/* Propagate preferred_hard_reg backward across IR_COPY chains. When the
+ * frontend emits `copy def=v_arg opnds=[v_arg, v_value]` to carry a value
+ * into a call's arg slot, set_preg_pref_for_call_args hints v_arg to the
+ * ABI dest (e.g. x0). But the underlying producer (v_value, e.g. the load
+ * result) is unhinted and lands in a generic caller-save (e.g. x8); the
+ * copy then emits `mov x0, x8`. Propagating the hint from v_arg to v_value
+ * lets both land at x0 and turns the copy into an identity move that
+ * combine elides. Walks insts; for each IR_COPY whose def has a hint and
+ * whose single OPK_REG source operand has none, propagate (and clear the
+ * source's forbid for the hint reg). Safe because the copy itself dies
+ * once both sides share the reg. */
+static void propagate_hint_through_copies(Func* f) {
+ if (!f || !f->preg_info) return;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if ((IROp)in->op != IR_COPY) continue;
+ if (in->nopnds < 2 || in->opnds[0].kind != OPK_REG ||
+ in->opnds[1].kind != OPK_REG)
+ continue;
+ PReg dst = (PReg)in->opnds[0].v.reg;
+ PReg src = (PReg)in->opnds[1].v.reg;
+ if (dst == PREG_NONE || dst == 0 || dst >= opt_reg_count(f)) continue;
+ if (src == PREG_NONE || src == 0 || src >= opt_reg_count(f)) continue;
+ i8 dst_pref = f->preg_info[dst].preferred_hard_reg;
+ if (dst_pref < 0) continue;
+ if (f->preg_info[src].tied_hard_reg >= 0) continue;
+ if (f->preg_info[src].preferred_hard_reg >= 0) continue;
+ if (f->preg_info[dst].cls != f->preg_info[src].cls) continue;
+ f->preg_info[src].forbidden_hard_regs &= ~(1u << (Reg)dst_pref);
+ f->preg_info[src].preferred_hard_reg = dst_pref;
+ }
+ }
+}
+
/* Set a soft "prefer the ABI return reg" hint on:
* - IR_CALL result PRegs (so emit_call's `mov result, x0` is elided)
* - IR_RET value PRegs (so emit_ret's `mov x0, value` is elided)
+ * - IR_CALL arg source PRegs (so emit_call's `mov x0, src` is elided);
+ * hints are propagated backward through IR_COPY so the actual producer
+ * of the value also prefers the ABI dest reg.
*
* The hint is a tie-breaker only — see hard_reg_alloc_score. The allocator's
* existing conflict checks already exclude regs with real interference (e.g.
@@ -379,13 +479,17 @@ static void apply_abi_aliasing_hints(Func* f) {
Inst* in = &bl->insts[i];
if ((IROp)in->op == IR_CALL) {
IRCallAux* aux = (IRCallAux*)in->extra.aux;
- if (aux) set_preg_pref_for_abivalue(f, &aux->desc.ret);
+ if (aux) {
+ set_preg_pref_for_abivalue(f, &aux->desc.ret);
+ set_preg_pref_for_call_args(f, &aux->desc);
+ }
} else if ((IROp)in->op == IR_RET) {
IRRetAux* aux = (IRRetAux*)in->extra.aux;
if (aux && aux->present) set_preg_pref_for_abivalue(f, &aux->val);
}
}
}
+ propagate_hint_through_copies(f);
}
/* ---------------------------------------------------------------------------