kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 12e73593a9c7eb07271ec79cb696b4c57edccb2e
parent e53522e9bf0377e3b3629e3706e00ac60d0a1510
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 24 May 2026 06:37:40 -0700

opt: don't reuse indirect-call target scratch as parallel-move temp

replay_planned_call moves an indirect call's target out of the way when
an argument would overwrite its register, parking it in a scratch reg
(x16/IP0 on aarch64) via replay_scratch_reg. It then resolved the
argument parallel-copy with replay_parallel_moves, whose cycle-breaking
temporary also came from replay_scratch_reg with avoid=REG_NONE — and so
picked the *same* x16, clobbering the saved call target. The emitted
sequence ended `mov x16, <target>; ...; mov x16, <swap tmp>; blr x16`,
branching to garbage (Bus error).

Thread the reserved callee register into replay_parallel_moves as the
`avoid` argument so cycle-breaking temporaries steer clear of it (it now
picks x17, the second IP scratch). Ret moves pass REG_NONE since the
callee scratch is dead once the call is emitted.

Surfaced building cfree at -O1 with cfree (lex_next miscompiled).
Minimal repro — `long g(fn f,long a){ return f(a,(long)f); }` — forces a
callee->scratch save plus an x0<->x1 arg swap; pre-fix it crashed at -O1,
post-fix returns correctly at -O0/-O1/-O2.

test-opt, test-smoke-{x64,rv64}, test-parse, test-toy, test-driver,
test-isa pass; bootstrap stays bitwise-reproducible (stage2 == stage3).

Diffstat:
Msrc/opt/pass_emit.c | 13++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/opt/pass_emit.c b/src/opt/pass_emit.c @@ -261,8 +261,11 @@ static void replay_emit_move(CGTarget* w, const ReplayParallelMove* move) { } } +/* `avoid` names a physical register the caller has reserved across these + * moves (e.g. the scratch holding an indirect call's target). Cycle-breaking + * temporaries must steer clear of it, or they would clobber the live value. */ static void replay_parallel_moves(ReplayCtx* r, ReplayParallelMove* moves, - u32 n) { + u32 n, Reg avoid) { CGTarget* w = r->tgt; u32 remaining = 0; for (u32 i = 0; i < n; ++i) { @@ -286,7 +289,7 @@ static void replay_parallel_moves(ReplayCtx* r, ReplayParallelMove* moves, for (u32 i = 0; i < n; ++i) { if (moves[i].done || moves[i].src.kind == OPK_REG) continue; - Reg sr = replay_scratch_reg(r, (RegClass)moves[i].dst.cls, REG_NONE); + Reg sr = replay_scratch_reg(r, (RegClass)moves[i].dst.cls, avoid); if (sr == (Reg)REG_NONE) continue; Operand tmp = phys_reg_operand(sr, (RegClass)moves[i].dst.cls, moves[i].dst.type); @@ -306,7 +309,7 @@ static void replay_parallel_moves(ReplayCtx* r, ReplayParallelMove* moves, while (first < n && moves[first].done) ++first; if (first == n) break; Operand save = moves[first].src; - Reg sr = replay_scratch_reg(r, (RegClass)save.cls, REG_NONE); + Reg sr = replay_scratch_reg(r, (RegClass)save.cls, avoid); if (sr == (Reg)REG_NONE) { SrcLoc loc = {0, 0, 0}; compiler_panic(r->c, loc, @@ -434,7 +437,7 @@ static void replay_planned_call(ReplayCtx* r, const IRCallAux* aux) { } } - replay_parallel_moves(r, arg_moves, nargs); + replay_parallel_moves(r, arg_moves, nargs, callee_scratch); r->tgt->emit_call_plan(r->tgt, &plan); if (plan.flags & CG_CALL_TAIL) return; @@ -460,7 +463,7 @@ static void replay_planned_call(ReplayCtx* r, const IRCallAux* aux) { ret_moves[nrets].is_ret = 1; ++nrets; } - replay_parallel_moves(r, ret_moves, nrets); + replay_parallel_moves(r, ret_moves, nrets, REG_NONE); } static Label ensure_label(ReplayCtx* r, u32 b) {