commit 12e73593a9c7eb07271ec79cb696b4c57edccb2e
parent e53522e9bf0377e3b3629e3706e00ac60d0a1510
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 24 May 2026 06:37:40 -0700
opt: don't reuse indirect-call target scratch as parallel-move temp
replay_planned_call moves an indirect call's target out of the way when
an argument would overwrite its register, parking it in a scratch reg
(x16/IP0 on aarch64) via replay_scratch_reg. It then resolved the
argument parallel-copy with replay_parallel_moves, whose cycle-breaking
temporary also came from replay_scratch_reg with avoid=REG_NONE — and so
picked the *same* x16, clobbering the saved call target. The emitted
sequence ended `mov x16, <target>; ...; mov x16, <swap tmp>; blr x16`,
branching to garbage (Bus error).
Thread the reserved callee register into replay_parallel_moves as the
`avoid` argument so cycle-breaking temporaries steer clear of it (it now
picks x17, the second IP scratch). Ret moves pass REG_NONE since the
callee scratch is dead once the call is emitted.
Surfaced building cfree at -O1 with cfree (lex_next miscompiled).
Minimal repro — `long g(fn f,long a){ return f(a,(long)f); }` — forces a
callee->scratch save plus an x0<->x1 arg swap; pre-fix it crashed at -O1,
post-fix returns correctly at -O0/-O1/-O2.
test-opt, test-smoke-{x64,rv64}, test-parse, test-toy, test-driver,
test-isa pass; bootstrap stays bitwise-reproducible (stage2 == stage3).
Diffstat:
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/src/opt/pass_emit.c b/src/opt/pass_emit.c
@@ -261,8 +261,11 @@ static void replay_emit_move(CGTarget* w, const ReplayParallelMove* move) {
}
}
+/* `avoid` names a physical register the caller has reserved across these
+ * moves (e.g. the scratch holding an indirect call's target). Cycle-breaking
+ * temporaries must steer clear of it, or they would clobber the live value. */
static void replay_parallel_moves(ReplayCtx* r, ReplayParallelMove* moves,
- u32 n) {
+ u32 n, Reg avoid) {
CGTarget* w = r->tgt;
u32 remaining = 0;
for (u32 i = 0; i < n; ++i) {
@@ -286,7 +289,7 @@ static void replay_parallel_moves(ReplayCtx* r, ReplayParallelMove* moves,
for (u32 i = 0; i < n; ++i) {
if (moves[i].done || moves[i].src.kind == OPK_REG) continue;
- Reg sr = replay_scratch_reg(r, (RegClass)moves[i].dst.cls, REG_NONE);
+ Reg sr = replay_scratch_reg(r, (RegClass)moves[i].dst.cls, avoid);
if (sr == (Reg)REG_NONE) continue;
Operand tmp =
phys_reg_operand(sr, (RegClass)moves[i].dst.cls, moves[i].dst.type);
@@ -306,7 +309,7 @@ static void replay_parallel_moves(ReplayCtx* r, ReplayParallelMove* moves,
while (first < n && moves[first].done) ++first;
if (first == n) break;
Operand save = moves[first].src;
- Reg sr = replay_scratch_reg(r, (RegClass)save.cls, REG_NONE);
+ Reg sr = replay_scratch_reg(r, (RegClass)save.cls, avoid);
if (sr == (Reg)REG_NONE) {
SrcLoc loc = {0, 0, 0};
compiler_panic(r->c, loc,
@@ -434,7 +437,7 @@ static void replay_planned_call(ReplayCtx* r, const IRCallAux* aux) {
}
}
- replay_parallel_moves(r, arg_moves, nargs);
+ replay_parallel_moves(r, arg_moves, nargs, callee_scratch);
r->tgt->emit_call_plan(r->tgt, &plan);
if (plan.flags & CG_CALL_TAIL) return;
@@ -460,7 +463,7 @@ static void replay_planned_call(ReplayCtx* r, const IRCallAux* aux) {
ret_moves[nrets].is_ret = 1;
++nrets;
}
- replay_parallel_moves(r, ret_moves, nrets);
+ replay_parallel_moves(r, ret_moves, nrets, REG_NONE);
}
static Label ensure_label(ReplayCtx* r, u32 b) {