kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 09bf3f299ea580495f0b4ce4a88dcacfe7a10f31
parent d11c5c90c2371c2d260a161f8f7d61fc30824615
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  5 Jun 2026 21:12:01 -0700

Fix tail sret forwarding

Diffstat:
Mdoc/plan/TODO.md | 8--------
Mmk/test.mk | 6+++++-
Msrc/arch/riscv/native.c | 4++--
Msrc/arch/x64/native.c | 4++--
Atest/opt/x64_win_tail_sret.sh | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 78 insertions(+), 13 deletions(-)

diff --git a/doc/plan/TODO.md b/doc/plan/TODO.md @@ -5,14 +5,6 @@ fixed, remove it instead of checking it off or keeping a closed entry. Add new deferred fixes below as they are discovered. -## x86_64-windows: tail-call + sret miscompiles at O1 (access violation) - -`test/toy/cases/{36_musttail_sret,37_tail_sret}.toy` built `-target -x86_64-windows -O1` crash at runtime with 0xC0000005 (STATUS_ACCESS_VIOLATION); -they pass at O0, on aarch64-freebsd, and `130_record_sret_return` passes at O1 — -so it is specifically tail-call + struct-return (sret) lowering on the Win64 ABI -at O1. Reproduced reliably in the Windows VM. Surfaced by `test-toy-windows-vm`. - ## O1: `&&label` whose address is taken but never `goto`'d → undefined `.Lcfblk.N` At `-O1`, taking a label's address with the GNU labels-as-values extension diff --git a/mk/test.mk b/mk/test.mk @@ -788,7 +788,7 @@ test-macho: lib $(TEST_RT_DEP) $(ROUNDTRIP_BIN_MACHO) $(LINK_EXE_RUNNER) $(JIT_R OPT_TEST_BIN = build/test/cg_ir_lower_test TINY_INLINE_TEST_BIN = build/test/tiny_inline_test -test-opt: bin $(OPT_TEST_BIN) test-opt-tiny-inline test-opt-inline test-opt-zero-arg test-opt-static-prune-aa64 test-opt-aa64-tail test-opt-prologue-tier test-opt-whole-program-inline test-opt-lto-phase1 +test-opt: bin $(OPT_TEST_BIN) test-opt-tiny-inline test-opt-inline test-opt-zero-arg test-opt-static-prune-aa64 test-opt-aa64-tail test-opt-x64-win-tail-sret test-opt-prologue-tier test-opt-whole-program-inline test-opt-lto-phase1 $(OPT_TEST_BIN) @@ -813,6 +813,10 @@ test-opt-static-prune-aa64: bin test-opt-aa64-tail: bin @KIT=$(abspath $(BIN)) bash test/opt/aa64_tail_call.sh +.PHONY: test-opt-x64-win-tail-sret +test-opt-x64-win-tail-sret: bin + @KIT=$(abspath $(BIN)) bash test/opt/x64_win_tail_sret.sh + # Structural disasm check: the -O1 known-frame prologue cost-model tiers # (aa64 reference + ported x64 slim/red-zone and rv64 leaf shapes). .PHONY: test-opt-prologue-tier diff --git a/src/arch/riscv/native.c b/src/arch/riscv/native.c @@ -2296,7 +2296,7 @@ static void rv_plan_call(NativeTarget* t, const NativeCallDesc* desc, } } rv_emit_reg_arg_moves(t, moves, nmoves); - if (abi && abi->has_sret && desc->nresults) { + if (abi && abi->has_sret) { /* sret pointer goes in a0; arg loads have completed. A tail call forwards * the caller's own incoming sret pointer (spilled at entry) so the * sibling writes the result into the caller's caller's destination; @@ -2305,7 +2305,7 @@ static void rv_plan_call(NativeTarget* t, const NativeCallDesc* desc, if (tail) rv_load_part(t, a0, native_loc_stack(i64t, a->sret_ptr_slot, 0), 0, a->variant->ptr_bytes); - else + else if (desc->nresults) rv_addr_of_loc(t, a0, desc->results[0]); } } diff --git a/src/arch/x64/native.c b/src/arch/x64/native.c @@ -2544,7 +2544,7 @@ static void x64_plan_call(NativeTarget* t, const NativeCallDesc* desc, (plan->callee.kind == NATIVE_LOC_REG && plan->callee.v.reg == X64_R11) ? X64_TMP_INT : X64_TMP_INT2); - if (abi && abi->has_sret && desc->nresults) { + if (abi && abi->has_sret) { /* sret pointer in the first int-arg reg. A tail call forwards the * caller's own incoming sret pointer (spilled at entry); otherwise pass * the address of this call's result slot. */ @@ -2552,7 +2552,7 @@ static void x64_plan_call(NativeTarget* t, const NativeCallDesc* desc, if (tail) x64_load_part(t, sret, native_loc_stack(i64t, a->sret_ptr_slot, 0), 0, 8); - else + else if (desc->nresults) x64_addr_of_loc(t, sret, desc->results[0]); } /* Variadic call: AL = number of vector regs used. */ diff --git a/test/opt/x64_win_tail_sret.sh b/test/opt/x64_win_tail_sret.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# Structural x86_64-windows tail+sret check. +# +# A realized tail call returning an indirect aggregate has no explicit result +# local at O1, but Win64 still requires the hidden sret destination in rcx. +# The tail site must forward the caller's saved sret pointer before jumping. +set -euo pipefail + +ROOT="$(cd "$(dirname "$0")/../.." && pwd)" +KIT="${KIT:-$ROOT/build/kit}" +WORK="$ROOT/build/test/opt/x64_win_tail_sret" +mkdir -p "$WORK" + +fail() { + printf 'x64-win-tail-sret check FAILED: %s\n' "$1" >&2 + if [ -n "${2:-}" ] && [ -f "$2" ]; then + sed 's/^/ | /' "$2" >&2 + fi + exit 1 +} + +slice_asm_func() { + local src="$1" func="$2" out="$3" + awk -v name="$func" ' + $0 == name ":" { in_fn = 1; print; next } + /^[A-Za-z_.$][A-Za-z0-9_.$]*:/ { if (in_fn) in_fn = 0 } + in_fn { print } + ' "$src" > "$out" +} + +check_case() { + local name="$1" src="$2" + local asm="$WORK/$name.s" + local body="$WORK/$name.forward.s" + + "$KIT" cc -S -O1 -target x86_64-windows "$src" -o "$asm" \ + > "$WORK/$name.cc.out" 2> "$WORK/$name.cc.err" + slice_asm_func "$asm" forward "$body" + + if grep -Eq '\bcallq?[[:space:]]+make\b' "$body"; then + fail "$name used a normal call to make" "$body" + fi + if ! grep -Eq '^[[:space:]]*jmp[[:space:]]+make\b' "$body"; then + fail "$name did not emit a sibling jump to make" "$body" + fi + if ! awk ' + /^[[:space:]]*movq[[:space:]]+-[0-9]+[(]%rbp[)],[[:space:]]*%rcx[[:space:]]*$/ { + reloaded = 1 + next + } + reloaded && /^[[:space:]]*jmp[[:space:]]+make[[:space:]]*$/ { + found = 1 + exit 0 + } + /^[[:space:]]*jmp[[:space:]]+make[[:space:]]*$/ { + found = 1 + exit 1 + } + END { if (!found) exit 1 } + ' "$body"; then + fail "$name did not reload saved sret pointer into rcx before tail jump" \ + "$body" + fi +} + +check_case 36_musttail_sret "$ROOT/test/toy/cases/36_musttail_sret.toy" +check_case 37_tail_sret "$ROOT/test/toy/cases/37_tail_sret.toy" + +printf 'x64-win-tail-sret: ok\n'