commit d1a5d01b64822a3d2c2657e7755eb8bf53887a85
parent dfc5f2358d70e838ab8574941d0d61c4d6bbbb6f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 12:32:52 -0700
cg: save the full value width when breaking an arg-shuffle cycle
The parallel-copy scheduler breaks a register cycle by parking one member's
register in the class scratch, then pointing the cycle's readers at the scratch.
It sized that save from moves[k].dst's own type — but the value parked in that
register belongs to whoever *reads* it, and the reader's source type can be wider
(an int rotating through the slot a pointer reader consumes). Sizing the save
from a reader's source type preserves every bit, fixing a truncated pointer when
a 4-arg call permutes its argument registers in a full cycle (variadic_04_pointer
and similar indirect-call shuffles).
Diffstat:
1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/src/cg/native_argmove.c b/src/cg/native_argmove.c
@@ -57,19 +57,36 @@ void native_arg_shuffle(const NativeArgShuffle* s, NativeArgMove* moves, u32 n)
u32 k = 0;
NativeAllocClass bc, sc;
NativeLoc scratchloc;
+ Reg broken_reg;
+ CfreeCgTypeId val_type;
while (k < n &&
(done[k] || nam_src_reg(&moves[k], &sc) == REG_NONE))
++k;
bc = (NativeAllocClass)moves[k].dst.cls;
+ broken_reg = moves[k].dst.v.reg;
+ /* The value parked in broken_reg is whatever the cycle's readers consume,
+ * and its width is their *source* type — not moves[k].dst's own incoming
+ * type, which can be narrower (e.g. a 4-byte int rotating into the slot a
+ * reader treats as an 8-byte pointer). Saving at the destination's width
+ * would truncate the parked value before the reader loads it back, so use
+ * a reader's source type to preserve every bit. */
+ val_type = moves[k].dst.type;
+ for (j = 0; j < n; ++j) {
+ Reg sr = nam_src_reg(&moves[j], &sc);
+ if (!done[j] && sr == broken_reg && sc == bc) {
+ val_type = moves[j].src.type;
+ break;
+ }
+ }
memset(&scratchloc, 0, sizeof scratchloc);
scratchloc.kind = NATIVE_LOC_REG;
scratchloc.cls = (u8)bc;
- scratchloc.type = moves[k].dst.type;
+ scratchloc.type = val_type;
scratchloc.v.reg = s->scratch[bc];
s->reg_move(s->t, scratchloc, moves[k].dst);
for (j = 0; j < n; ++j) {
Reg sr = nam_src_reg(&moves[j], &sc);
- if (!done[j] && sr != REG_NONE && sr == moves[k].dst.v.reg && sc == bc) {
+ if (!done[j] && sr != REG_NONE && sr == broken_reg && sc == bc) {
moves[j].src = scratchloc;
moves[j].src_offset = 0;
}