commit 09bf3f299ea580495f0b4ce4a88dcacfe7a10f31
parent d11c5c90c2371c2d260a161f8f7d61fc30824615
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 5 Jun 2026 21:12:01 -0700
Fix tail sret forwarding
Diffstat:
5 files changed, 78 insertions(+), 13 deletions(-)
diff --git a/doc/plan/TODO.md b/doc/plan/TODO.md
@@ -5,14 +5,6 @@ fixed, remove it instead of checking it off or keeping a closed entry.
Add new deferred fixes below as they are discovered.
-## x86_64-windows: tail-call + sret miscompiles at O1 (access violation)
-
-`test/toy/cases/{36_musttail_sret,37_tail_sret}.toy` built `-target
-x86_64-windows -O1` crash at runtime with 0xC0000005 (STATUS_ACCESS_VIOLATION);
-they pass at O0, on aarch64-freebsd, and `130_record_sret_return` passes at O1 —
-so it is specifically tail-call + struct-return (sret) lowering on the Win64 ABI
-at O1. Reproduced reliably in the Windows VM. Surfaced by `test-toy-windows-vm`.
-
## O1: `&&label` whose address is taken but never `goto`'d → undefined `.Lcfblk.N`
At `-O1`, taking a label's address with the GNU labels-as-values extension
diff --git a/mk/test.mk b/mk/test.mk
@@ -788,7 +788,7 @@ test-macho: lib $(TEST_RT_DEP) $(ROUNDTRIP_BIN_MACHO) $(LINK_EXE_RUNNER) $(JIT_R
OPT_TEST_BIN = build/test/cg_ir_lower_test
TINY_INLINE_TEST_BIN = build/test/tiny_inline_test
-test-opt: bin $(OPT_TEST_BIN) test-opt-tiny-inline test-opt-inline test-opt-zero-arg test-opt-static-prune-aa64 test-opt-aa64-tail test-opt-prologue-tier test-opt-whole-program-inline test-opt-lto-phase1
+test-opt: bin $(OPT_TEST_BIN) test-opt-tiny-inline test-opt-inline test-opt-zero-arg test-opt-static-prune-aa64 test-opt-aa64-tail test-opt-x64-win-tail-sret test-opt-prologue-tier test-opt-whole-program-inline test-opt-lto-phase1
$(OPT_TEST_BIN)
@@ -813,6 +813,10 @@ test-opt-static-prune-aa64: bin
test-opt-aa64-tail: bin
@KIT=$(abspath $(BIN)) bash test/opt/aa64_tail_call.sh
+.PHONY: test-opt-x64-win-tail-sret
+test-opt-x64-win-tail-sret: bin
+ @KIT=$(abspath $(BIN)) bash test/opt/x64_win_tail_sret.sh
+
# Structural disasm check: the -O1 known-frame prologue cost-model tiers
# (aa64 reference + ported x64 slim/red-zone and rv64 leaf shapes).
.PHONY: test-opt-prologue-tier
diff --git a/src/arch/riscv/native.c b/src/arch/riscv/native.c
@@ -2296,7 +2296,7 @@ static void rv_plan_call(NativeTarget* t, const NativeCallDesc* desc,
}
}
rv_emit_reg_arg_moves(t, moves, nmoves);
- if (abi && abi->has_sret && desc->nresults) {
+ if (abi && abi->has_sret) {
/* sret pointer goes in a0; arg loads have completed. A tail call forwards
* the caller's own incoming sret pointer (spilled at entry) so the
* sibling writes the result into the caller's caller's destination;
@@ -2305,7 +2305,7 @@ static void rv_plan_call(NativeTarget* t, const NativeCallDesc* desc,
if (tail)
rv_load_part(t, a0, native_loc_stack(i64t, a->sret_ptr_slot, 0), 0,
a->variant->ptr_bytes);
- else
+ else if (desc->nresults)
rv_addr_of_loc(t, a0, desc->results[0]);
}
}
diff --git a/src/arch/x64/native.c b/src/arch/x64/native.c
@@ -2544,7 +2544,7 @@ static void x64_plan_call(NativeTarget* t, const NativeCallDesc* desc,
(plan->callee.kind == NATIVE_LOC_REG && plan->callee.v.reg == X64_R11)
? X64_TMP_INT
: X64_TMP_INT2);
- if (abi && abi->has_sret && desc->nresults) {
+ if (abi && abi->has_sret) {
/* sret pointer in the first int-arg reg. A tail call forwards the
* caller's own incoming sret pointer (spilled at entry); otherwise pass
* the address of this call's result slot. */
@@ -2552,7 +2552,7 @@ static void x64_plan_call(NativeTarget* t, const NativeCallDesc* desc,
if (tail)
x64_load_part(t, sret, native_loc_stack(i64t, a->sret_ptr_slot, 0), 0,
8);
- else
+ else if (desc->nresults)
x64_addr_of_loc(t, sret, desc->results[0]);
}
/* Variadic call: AL = number of vector regs used. */
diff --git a/test/opt/x64_win_tail_sret.sh b/test/opt/x64_win_tail_sret.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+# Structural x86_64-windows tail+sret check.
+#
+# A realized tail call returning an indirect aggregate has no explicit result
+# local at O1, but Win64 still requires the hidden sret destination in rcx.
+# The tail site must forward the caller's saved sret pointer before jumping.
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+KIT="${KIT:-$ROOT/build/kit}"
+WORK="$ROOT/build/test/opt/x64_win_tail_sret"
+mkdir -p "$WORK"
+
+fail() {
+ printf 'x64-win-tail-sret check FAILED: %s\n' "$1" >&2
+ if [ -n "${2:-}" ] && [ -f "$2" ]; then
+ sed 's/^/ | /' "$2" >&2
+ fi
+ exit 1
+}
+
+slice_asm_func() {
+ local src="$1" func="$2" out="$3"
+ awk -v name="$func" '
+ $0 == name ":" { in_fn = 1; print; next }
+ /^[A-Za-z_.$][A-Za-z0-9_.$]*:/ { if (in_fn) in_fn = 0 }
+ in_fn { print }
+ ' "$src" > "$out"
+}
+
+check_case() {
+ local name="$1" src="$2"
+ local asm="$WORK/$name.s"
+ local body="$WORK/$name.forward.s"
+
+ "$KIT" cc -S -O1 -target x86_64-windows "$src" -o "$asm" \
+ > "$WORK/$name.cc.out" 2> "$WORK/$name.cc.err"
+ slice_asm_func "$asm" forward "$body"
+
+ if grep -Eq '\bcallq?[[:space:]]+make\b' "$body"; then
+ fail "$name used a normal call to make" "$body"
+ fi
+ if ! grep -Eq '^[[:space:]]*jmp[[:space:]]+make\b' "$body"; then
+ fail "$name did not emit a sibling jump to make" "$body"
+ fi
+ if ! awk '
+ /^[[:space:]]*movq[[:space:]]+-[0-9]+[(]%rbp[)],[[:space:]]*%rcx[[:space:]]*$/ {
+ reloaded = 1
+ next
+ }
+ reloaded && /^[[:space:]]*jmp[[:space:]]+make[[:space:]]*$/ {
+ found = 1
+ exit 0
+ }
+ /^[[:space:]]*jmp[[:space:]]+make[[:space:]]*$/ {
+ found = 1
+ exit 1
+ }
+ END { if (!found) exit 1 }
+ ' "$body"; then
+ fail "$name did not reload saved sret pointer into rcx before tail jump" \
+ "$body"
+ fi
+}
+
+check_case 36_musttail_sret "$ROOT/test/toy/cases/36_musttail_sret.toy"
+check_case 37_tail_sret "$ROOT/test/toy/cases/37_tail_sret.toy"
+
+printf 'x64-win-tail-sret: ok\n'