commit 040f0c2838b6b40acd1c3f38b981cc7efedadd2d
parent dc1fabbf4cea6f8570680c2365efc94d2cbf4a09
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 5 Jun 2026 07:54:39 -0700
test-toy: run the Toy corpus hosted in the FreeBSD/Windows VMs
Add test/toy/vm.sh, the hosted-VM counterpart to run.sh's freestanding-Linux
X lane: compile each Toy case against a real OS sysroot (FreeBSD base.txz extract
or the llvm-mingw UCRT sysroot) and execute it on the genuine OS in a VM,
asserting the same .expected exit-code oracle. Per (os,arch) it stages every
applicable case x opt x link-mode into one dir and drains it in a single VM
session via a new `run-batch <arch> <dir>` subcommand added to both
scripts/freebsd_vm.sh (tar-pipe over ssh) and scripts/windows_vm.sh (scp +
run-remote.ps1). Opt-in targets: test-toy-{freebsd,windows}-vm, test-toy-vm.
Verified full corpus O0+O1: fbsd-aarch64 652/0/1 (static+dynamic),
fbsd-riscv64 648/0/5, win-aarch64 324/2/1, win-x64 322/2/2, fbsd-amd64 648/0/2
(after the cpu fix below).
Windows specifics: Defender blocks some kit exes as PUA and PowerShell's
$LASTEXITCODE keeps a neighbor's code on a blocked launch, so run-batch adds a
temp-dir Defender exclusion and run-remote.ps1 captures via
Start-Process -PassThru .ExitCode. Exit codes are masked to 8 bits since the
oracle is a POSIX exit status (Windows does not truncate).
freebsd_vm.sh amd64: default qemu64 lacks POPCNT (kit emits popcnt for
@popcount) -> SIGILL; boot with -cpu qemu64,+popcnt,+sse4.1,+sse4.2 (-cpu max
breaks the FreeBSD/EDK2 TCG boot).
The lanes surfaced two genuine kit bugs, filed in doc/plan/TODO.md:
aarch64-windows link ADRP-out-of-range (118_decl_extra_attrs) and x86_64-windows
tail-call+sret O1 access violation (36_musttail_sret/37_tail_sret).
run.sh and the corpus engine are untouched; default test-toy is unaffected.
Diffstat:
6 files changed, 540 insertions(+), 4 deletions(-)
diff --git a/doc/TESTING.md b/doc/TESTING.md
@@ -298,6 +298,22 @@ Paths the interpreter/C/Wasm targets don't yet implement emit a greppable
signal stays "real regressions". `test/toy/err/` holds compile-failure cases
checked against an expected diagnostic substring.
+`test/toy/vm.sh <freebsd|windows>` adds two **hosted-VM lanes** on the same
+corpus + oracle, the hosted counterparts to the freestanding-Linux X lane: it
+links each case against a real OS sysroot (FreeBSD `base.txz` extract via
+`scripts/freebsd_sysroot.sh`, or the llvm-mingw UCRT sysroot) and runs the
+binary on the genuine OS in a VM, so the full hosted path — ABI, CRT startup,
+the platform loader, syscalls/Win32 — is exercised. Per (os, arch) it stages
+every applicable case × opt × link-mode into one dir and drains it in a single
+VM session via the VM scripts' `run-batch` subcommand (`<id> <rc>` per binary,
+joined back to the oracle). FreeBSD covers amd64/aarch64/riscv64 (static +
+dynamic); Windows covers x64 (Prism emulation) + aarch64 on one ARM64 VM. These
+are opt-in (`make test-toy-freebsd-vm` / `test-toy-windows-vm` / `test-toy-vm`),
+not in the default set, since they need provisioned VMs + cross sysroots and
+amd64/riscv64 FreeBSD run under slow TCG. Inapplicable cases SKIP (a committed
+`<name>.{freebsd,windows}.skip` sidecar, the shared `.link.skip`, or arch-only
+`*_aa64`/`*_x64`/`*_rv64` suffixes); genuine codegen gaps are left RED.
+
Because the Toy corpus is broad and oracle-carrying, it is reused for free
coverage elsewhere: `test/asm/roundtrip_toy.sh` runs it through the L2 exec
round-trip (`cc -S | as | run` and `| ld | exec`, exit must equal the oracle),
diff --git a/doc/plan/TODO.md b/doc/plan/TODO.md
@@ -1,8 +1,23 @@
# kit — deferred fixes & code-smell backlog
-No open items.
-
This file is an open-backlog catalog, not a completion ledger. When an item is
fixed, remove it instead of checking it off or keeping a closed entry.
Add new deferred fixes below as they are discovered.
+
+## aarch64-windows: `118_decl_extra_attrs` fails to link (ADRP out of range)
+
+`kit cc -target aarch64-windows` on `test/toy/cases/118_decl_extra_attrs.toy`
+aborts with `fatal: link: ADR_PREL_PG_HI21 out of range (need ±4GiB)` at both O0
+and O1. The same case links cleanly on x86_64-windows and aarch64-freebsd, so it
+is aarch64-windows-specific. The case combines TLS-model attrs + ifunc/dllimport
++ section/merge attrs; the resulting aarch64-windows section/layout overflows an
+ADRP page-relative relocation. Surfaced by `test-toy-windows-vm` (test/toy/vm.sh).
+
+## x86_64-windows: tail-call + sret miscompiles at O1 (access violation)
+
+`test/toy/cases/{36_musttail_sret,37_tail_sret}.toy` built `-target
+x86_64-windows -O1` crash at runtime with 0xC0000005 (STATUS_ACCESS_VIOLATION);
+they pass at O0, on aarch64-freebsd, and `130_record_sret_return` passes at O1 —
+so it is specifically tail-call + struct-return (sret) lowering on the Win64 ABI
+at O1. Reproduced reliably in the Windows VM. Surfaced by `test-toy-windows-vm`.
diff --git a/mk/test.mk b/mk/test.mk
@@ -170,7 +170,8 @@ DEFAULT_TEST_TARGETS = \
bootstrap \
test-bootstrap-toy
-.PHONY: test $(TEST_TARGETS) windows-ucrt-sysroots
+.PHONY: test $(TEST_TARGETS) windows-ucrt-sysroots \
+ test-toy-vm test-toy-freebsd-vm test-toy-windows-vm
test: $(DEFAULT_TEST_TARGETS)
@@ -441,6 +442,21 @@ test-native-direct-target: $(NATIVE_DIRECT_TARGET_TEST_BIN)
test-toy: bin
@KIT=$(abspath $(BIN)) test/toy/run.sh
+# Opt-in: run the Toy corpus as real hosted programs inside the FreeBSD and
+# Windows VMs (test/toy/vm.sh), asserting each case's .expected exit code on the
+# genuine OS — the hosted counterpart to run.sh's freestanding-Linux X lane.
+# Not in the default set (needs provisioned VMs + cross sysroots; amd64/riscv64
+# FreeBSD run under slow TCG). The FreeBSD script lazily builds its base.txz
+# sysroots and self-skips arches whose VM/sysroot is absent; the Windows target
+# carries the same rt + UCRT-sysroot deps as test-coff-windows-vm.
+test-toy-freebsd-vm: bin
+ @KIT=$(abspath $(BIN)) bash test/toy/vm.sh freebsd
+
+test-toy-windows-vm: bin rt-x86_64-pc-windows rt-aarch64-windows windows-ucrt-sysroots
+ @KIT=$(abspath $(BIN)) bash test/toy/vm.sh windows
+
+test-toy-vm: test-toy-freebsd-vm test-toy-windows-vm
+
# test-bootstrap-toy: run the Toy corpus through the bootstrapped (self-built)
# stage3 kit instead of the host-built binary, so the self-hosted compiler is
# exercised on real codegen (not just self-reproduction like `make bootstrap`).
diff --git a/scripts/freebsd_vm.sh b/scripts/freebsd_vm.sh
@@ -39,6 +39,8 @@ commands:
run <arch> [qemu...] run VM in foreground with host-only SSH forwarding
wait-ssh <arch> wait for SSH and print uname
ssh <arch> [cmd...] SSH into a running VM
+ run-batch <arch> <dir> ship a staging dir into the VM, run its run-remote.sh,
+ and relay its "<id> <rc>" output (used by test-toy)
arches:
amd64 | x64 | aarch64 | arm64 | rv64 | riscv64
@@ -322,7 +324,14 @@ append_machine_args() {
# primary. Otherwise it picks the (headless) VGA as primary console and
# all userland/cloud-init output plus the login getty go to a head we
# never display, making a slow TCG boot indistinguishable from a hang.
- QEMU_ARGS=("${QEMU_ARGS[@]}" -machine q35 -vga none)
+ #
+ # QEMU's default x86_64 TCG model is "qemu64" (~x86-64-v1, no
+ # POPCNT/SSE4.2/BMI). kit targets a modern x86_64 baseline and emits e.g.
+ # `popcnt` for @popcount, which #UDs (SIGILL -> exit 132) on qemu64. Add the
+ # missing baseline-ish features explicitly: "-cpu max" enables enough that
+ # the FreeBSD/EDK2 boot fails under TCG, so extend qemu64 minimally instead.
+ QEMU_ARGS=("${QEMU_ARGS[@]}" -machine q35 -vga none \
+ -cpu "qemu64,+popcnt,+sse4.1,+sse4.2")
append_firmware_args "$arch"
;;
aarch64)
@@ -620,6 +629,26 @@ ssh_arch() {
exec ssh "${args[@]}" "$SSH_USER@127.0.0.1" "$@"
}
+# run_batch ARCH STAGEDIR
+# Drain a whole directory of staged executables in a single SSH session: tar
+# the stagedir into a guest tempdir and run its run-remote.sh entry script,
+# which executes each binary and prints one "<id> <rc>" line. We only forward
+# that stdout (and the remote rc); the caller joins the lines with its local
+# manifest. One VM round-trip per (arch) keeps the test-toy VM lane viable
+# even at hundreds of binaries per arch. The VM must already be reachable
+# (the caller boots it and waits for SSH).
+run_batch_arch() {
+ local arch="$1" stagedir="$2" args
+ arch="$(canon_arch "$arch")"
+ [ -d "$stagedir" ] || die "run-batch: no such staging dir: $stagedir"
+ [ -f "$stagedir/run-remote.sh" ] || die "run-batch: missing $stagedir/run-remote.sh"
+ command -v tar >/dev/null 2>&1 || die "run-batch: tar not found on host"
+ # shellcheck disable=SC2207
+ args=($(ssh_args "$arch"))
+ tar -C "$stagedir" -cf - . | ssh "${args[@]}" "$SSH_USER@127.0.0.1" \
+ 'd=$(mktemp -d /tmp/kit-toy.XXXXXX) && tar -C "$d" -xf - && sh "$d/run-remote.sh"; rc=$?; rm -rf "$d"; exit $rc'
+}
+
doctor() {
printf 'host: %s/%s\n' "$(uname -s 2>/dev/null)" "$(uname -m 2>/dev/null)"
printf 'vm root: %s\n' "$VM_ROOT"
@@ -652,6 +681,7 @@ case "$cmd" in
run) [ $# -ge 2 ] || { usage; exit 2; }; arch="$2"; shift 2; run_arch "$arch" "$@" ;;
wait-ssh) [ $# -eq 2 ] || { usage; exit 2; }; wait_ssh "$2" ;;
ssh) [ $# -ge 2 ] || { usage; exit 2; }; arch="$2"; shift 2; ssh_arch "$arch" "$@" ;;
+ run-batch) [ $# -eq 3 ] || { usage; exit 2; }; run_batch_arch "$2" "$3" ;;
-h|--help|help|"") usage ;;
*) usage; exit 2 ;;
esac
diff --git a/scripts/windows_vm.sh b/scripts/windows_vm.sh
@@ -90,6 +90,8 @@ provisioning (single Windows 11 ARM64 VM, serves both arches):
execution (used by the COFF/PE smoke tests):
smoke <arch> run a small probe in the VM
run <arch> exe [args] upload exe to the VM, run it, then remove it
+ run-batch <arch> <dir> upload a staging dir's *.exe + run-remote.ps1, run it,
+ and relay its "<id> <rc>" output (used by test-toy)
arches: x64 | x86_64 | amd64 | aarch64 | arm64 | aa64
@@ -823,6 +825,46 @@ run_exe() {
return "$rc"
}
+# run-batch ARCH STAGEDIR
+# Drain a whole staging dir in one VM session: upload its *.exe plus the
+# run-remote.ps1 entry script (authored by the caller), run the script, and
+# relay its "<id> <rc>" stdout (one line per binary). The caller joins those
+# lines with its host-side manifest. One upload+run per arch keeps the
+# test-toy VM lane viable across hundreds of binaries. Mirrors run_exe's
+# ssh/scp plumbing. The VM must already be reachable.
+run_batch() {
+ local arch="$1" stage="$2" destdir dest_fwd run_ps rc
+ [ -d "$stage" ] || die "run-batch: no such staging dir: $stage"
+ [ -f "$stage/run-remote.ps1" ] || die "run-batch: missing $stage/run-remote.ps1"
+ command -v ssh >/dev/null 2>&1 || die "ssh not found"
+ command -v scp >/dev/null 2>&1 || die "scp not found"
+ ssh_setup "$arch"
+ destdir="$(remote_mkdir)"
+ dest_fwd="${destdir//\\//}"
+ # Some kit-produced test exes trip a Windows Defender PUA heuristic, which
+ # blocks the launch (and would otherwise corrupt exit-code capture). Exclude
+ # the temp tree the binaries are uploaded into BEFORE the upload, so neither
+ # the on-write nor the on-execute scan quarantines them. Best-effort: requires
+ # an admin session + Defender, but works even with Tamper Protection on
+ # (path exclusions are still honored, unlike disabling real-time monitoring).
+ remote_ps "try { Add-MpPreference -ExclusionPath \$env:TEMP -ErrorAction Stop } catch {}" >/dev/null 2>&1 || true
+ # Upload only the executables + runner; the host-side manifest/logs stay home.
+ if ! scp "${SSH_ARGS[@]}" "$stage"/*.exe "$stage/run-remote.ps1" \
+ "$SSH_DEST:$dest_fwd/" >/dev/null 2>&1; then
+ remote_cleanup "$destdir"
+ die "scp upload failed -> $dest_fwd"
+ fi
+ # Invoke the runner from the upload dir (it Set-Location's to $PSScriptRoot
+ # and prints "<id> <rc>" lines to stdout, which remote_ps relays verbatim).
+ run_ps="\$ErrorActionPreference='Continue'; & (Join-Path '$(ps_sq "$destdir")' 'run-remote.ps1')"
+ set +e
+ remote_ps "$run_ps"
+ rc=$?
+ set -e
+ remote_cleanup "$destdir"
+ return "$rc"
+}
+
smoke_arch() {
local arch="$1" ps
ssh_setup "$arch"
@@ -924,6 +966,7 @@ case "$cmd" in
stop) powerdown ;;
smoke) [ $# -eq 2 ] || { usage; exit 2; }; smoke_arch "$2" ;;
run) [ $# -ge 3 ] || { usage; exit 2; }; arch="$2"; exe="$3"; shift 3; run_exe "$arch" "$exe" "$@" ;;
+ run-batch) [ $# -eq 3 ] || { usage; exit 2; }; run_batch "$2" "$3" ;;
-h|--help|help|"") usage ;;
*) usage; exit 2 ;;
esac
diff --git a/test/toy/vm.sh b/test/toy/vm.sh
@@ -0,0 +1,416 @@
+#!/usr/bin/env bash
+# test/toy/vm.sh — run the .toy corpus as real *hosted* programs inside the
+# FreeBSD and Windows VMs, asserting each case's `.expected` exit code.
+#
+# This is the VM counterpart to test/toy/run.sh's X lane: X cross-compiles a
+# freestanding ELF and runs it under qemu-user/podman with a kit-built _start
+# stub; here we link a full hosted binary against a real OS sysroot
+# (FreeBSD base.txz extract, or the llvm-mingw UCRT sysroot) and execute it on
+# the genuine OS in a VM. So this exercises the whole hosted path — ABI, CRT
+# startup, the platform loader, syscalls/Win32 — that the Linux lanes cannot.
+#
+# usage: test/toy/vm.sh <os> [name_filter]
+# os freebsd | windows
+# name_filter substring of the case basename to restrict the run (TDD)
+#
+# env:
+# KIT kit driver (default build/kit)
+# KIT_TOY_FREEBSD_ARCHES default "amd64 aarch64 riscv64"
+# KIT_TOY_WINDOWS_ARCHES default "x64 aarch64"
+# KIT_OPT_LEVELS default "0 1"
+# KIT_FREEBSD_LINK static | dynamic | both (default both)
+# KIT_TEST_FILTER same as the positional name_filter
+# KIT_TOY_VM_KEEP_UP if 1, do not shut a VM we started (debugging)
+#
+# Execution model: per (os, arch) we compile every applicable case at every
+# (opt, link-mode) into ONE staging dir, ship that dir into the VM, and run all
+# the binaries in a single SSH session (scripts/<os>_vm.sh run-batch), which
+# emits "<id> <rc>" per binary. We then join those exit codes with a host-side
+# manifest and report pass/fail. One VM round-trip per arch keeps the corpus
+# (hundreds of binaries per arch at O0+O1) viable.
+#
+# Like test/toy/run.sh, skips (missing sysroot/VM, arch-inapplicable cases) are
+# non-fatal: only a real FAIL gates the exit. Cases that legitimately do not
+# lower yet for an OS/arch are left RED on purpose — opt them out only with a
+# committed <name>.<os>.skip sidecar when the case is genuinely inapplicable.
+
+set -u
+
+ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+KIT="${KIT:-$ROOT/build/kit}"
+TEST_DIR="$ROOT/test/toy"
+BUILD_DIR="$ROOT/build/test/toy-vm"
+
+# shellcheck source=../lib/kit_sh_report.sh
+. "$ROOT/test/lib/kit_sh_report.sh"
+kit_report_init
+
+OS="${1:-}"
+FILTER="${2:-${KIT_TEST_FILTER:-}}"
+case "$OS" in
+ freebsd|windows) ;;
+ *) echo "usage: $0 <freebsd|windows> [name_filter]" >&2; exit 2 ;;
+esac
+
+OPT_LEVELS="${KIT_OPT_LEVELS:-0 1}"
+FREEBSD_ARCHES="${KIT_TOY_FREEBSD_ARCHES:-amd64 aarch64 riscv64}"
+WINDOWS_ARCHES="${KIT_TOY_WINDOWS_ARCHES:-x64 aarch64}"
+LINK="${KIT_FREEBSD_LINK:-both}"
+
+if [ ! -x "$KIT" ]; then
+ echo "$OS: kit binary not found at $KIT (run 'make bin')" >&2
+ exit 2
+fi
+
+shopt -s nullglob
+
+# ---- target / sysroot resolution -------------------------------------------
+
+# triple OS ARCH -> kit -target triple
+triple_for() {
+ case "$1/$2" in
+ freebsd/amd64) echo x86_64-freebsd ;;
+ freebsd/aarch64) echo aarch64-freebsd ;;
+ freebsd/riscv64) echo riscv64-freebsd ;;
+ windows/x64) echo x86_64-windows ;;
+ windows/aarch64) echo aarch64-windows ;;
+ *) echo "" ;;
+ esac
+}
+
+# sysroot OS ARCH -> sysroot dir (may be empty / nonexistent)
+sysroot_for() {
+ case "$1" in
+ freebsd) "$ROOT/scripts/freebsd_sysroot.sh" path "$2" 2>/dev/null ;;
+ windows) "$ROOT/scripts/llvm_mingw_sysroot.sh" path "$2" 2>/dev/null ;;
+ esac
+}
+
+# The basename suffix this (os,arch) "owns" — arch-only cases named *_x64 /
+# *_aa64 / *_rv64 use intrinsics that only lower on that arch, so they apply to
+# exactly one arch (cf. test/toy/run.sh:cross_one).
+arch_suffix_for() {
+ case "$1/$2" in
+ freebsd/amd64|windows/x64) echo _x64 ;;
+ */aarch64) echo _aa64 ;;
+ freebsd/riscv64) echo _rv64 ;;
+ *) echo "" ;;
+ esac
+}
+
+# case_skip_reason OS ARCH NAME SRC -> echoes a skip reason, or "" if applicable.
+case_skip_reason() {
+ local os="$1" arch="$2" name="$3" src="$4" own
+ if [ -e "${src%.toy}.link.skip" ]; then
+ head -n1 "${src%.toy}.link.skip"; return; fi
+ if [ -e "${src%.toy}.$os.skip" ]; then
+ head -n1 "${src%.toy}.$os.skip"; return; fi
+ # asmnop is an aa64-only construct (no x64/rv64 lowering before toy asm
+ # selectors), matching the X lane's blanket skip.
+ if [ "$arch" != aarch64 ] && grep -q 'asmnop' "$src" 2>/dev/null; then
+ echo "asmnop is aa64-only"; return; fi
+ own="$(arch_suffix_for "$os" "$arch")"
+ case "$name" in
+ *_aa64) [ "$own" = _aa64 ] || { echo "aa64-only case"; return; } ;;
+ *_x64) [ "$own" = _x64 ] || { echo "x64-only case"; return; } ;;
+ *_rv64) [ "$own" = _rv64 ] || { echo "rv64-only case"; return; } ;;
+ esac
+ echo ""
+}
+
+# link modes for a given OS (FreeBSD honors KIT_FREEBSD_LINK; Windows is the
+# single dynamic UCRT console mode).
+modes_for() {
+ if [ "$1" = windows ]; then echo ucrt; return; fi
+ case "$LINK" in
+ static) echo static ;;
+ dynamic) echo dynamic ;;
+ both|*) echo static dynamic ;;
+ esac
+}
+
+# cc_extra_flags OS MODE -> extra kit cc flags for this OS/link mode
+cc_extra_flags() {
+ case "$1/$2" in
+ freebsd/static) echo -static ;;
+ freebsd/dynamic) echo ;;
+ windows/ucrt) echo -mconsole ;;
+ esac
+}
+
+# ---- staging ---------------------------------------------------------------
+# stage_arch OS ARCH TRIPLE SYSROOT STAGEDIR
+# Compile every applicable case×opt×mode into STAGEDIR (binaries named by a
+# running id), write the host-side manifest ($STAGEDIR/manifest: "id expected
+# label") and the guest entry script (run-remote.sh / run-remote.ps1 listing
+# the ids). Compile failures are reported as FAIL immediately. Returns the
+# number of staged binaries via the global STAGED_N.
+STAGED_N=0
+stage_arch() {
+ local os="$1" arch="$2" triple="$3" sysroot="$4" stage="$5"
+ local ext="" id=0 ids="" name base reason opt mode label out cc_err
+ [ "$os" = windows ] && ext=".exe"
+ rm -rf "$stage"; mkdir -p "$stage"
+ : > "$stage/manifest"
+
+ for src in "$TEST_DIR"/cases/*.toy; do
+ base="$(basename "${src%.toy}")"
+ [ -n "$FILTER" ] && case "$base" in *"$FILTER"*) ;; *) continue ;; esac
+ reason="$(case_skip_reason "$os" "$arch" "$base" "$src")"
+ if [ -n "$reason" ]; then
+ kit_skip "$base/$os-$arch" "$reason"
+ continue
+ fi
+ local expected=0
+ [ -f "${src%.toy}.expected" ] && expected="$(cat "${src%.toy}.expected")"
+ for opt in $OPT_LEVELS; do
+ for mode in $(modes_for "$os"); do
+ label="$base/$os-$arch-$mode-O$opt"
+ out="$stage/$id$ext"
+ cc_err="$stage/$id.cc.err"
+ # shellcheck disable=SC2046
+ if ! "$KIT" cc "-O$opt" -target "$triple" --sysroot "$sysroot" \
+ $(cc_extra_flags "$os" "$mode") "$src" -o "$out" \
+ > "$stage/$id.cc.out" 2> "$cc_err"; then
+ kit_fail "$label" "kit cc -target $triple failed"
+ sed 's/^/ | /' "$cc_err"
+ continue
+ fi
+ if [ -s "$cc_err" ]; then
+ kit_fail "$label" "kit cc -target $triple wrote stderr"
+ sed 's/^/ | /' "$cc_err"
+ continue
+ fi
+ printf '%s %s %s\n' "$id" "$expected" "$label" >> "$stage/manifest"
+ ids="$ids $id"
+ id=$((id + 1))
+ done
+ done
+ done
+
+ STAGED_N=$id
+ [ "$id" -eq 0 ] && return 0
+ if [ "$os" = windows ]; then
+ write_remote_ps1 "$stage" "$ids"
+ else
+ write_remote_sh "$stage" "$ids"
+ fi
+}
+
+# The guest entry scripts. They run each staged binary with output suppressed
+# and print "<id> <rc>"; a crash reports its own rc and the loop continues.
+write_remote_sh() {
+ local stage="$1" ids="$2"
+ {
+ echo '#!/bin/sh'
+ echo 'cd "$(dirname "$0")" || exit 99'
+ printf 'for id in%s; do\n' "$ids"
+ echo ' chmod +x "./$id" 2>/dev/null'
+ echo ' "./$id" >/dev/null 2>&1'
+ echo ' echo "$id $?"'
+ echo 'done'
+ echo 'exit 0'
+ } > "$stage/run-remote.sh"
+}
+
+# Capture each exit code via Start-Process -PassThru .ExitCode rather than
+# `& exe; $LASTEXITCODE`: a launch that Windows blocks (e.g. a Defender PUA
+# false-positive on a kit-produced exe) does NOT update $LASTEXITCODE, so the
+# bare-`&` form silently reports the PREVIOUS binary's code. Start-Process throws
+# on a blocked launch instead, which we turn into a distinct LAUNCHFAIL token so
+# report_results flags it rather than mis-scoring a neighbor's exit code.
+write_remote_ps1() {
+ local stage="$1" ids="$2" id
+ {
+ echo '$ErrorActionPreference = "Continue"'
+ echo 'Set-Location -LiteralPath $PSScriptRoot'
+ echo '$o = Join-Path $env:TEMP "kit_toy_o.txt"'
+ echo '$e = Join-Path $env:TEMP "kit_toy_e.txt"'
+ for id in $ids; do
+ printf 'try { $p = Start-Process -FilePath ".\\%s.exe" -Wait -PassThru -WindowStyle Hidden -RedirectStandardOutput $o -RedirectStandardError $e; "%s $($p.ExitCode)" } catch { "%s LAUNCHFAIL" }\n' "$id" "$id" "$id"
+ done
+ echo 'exit 0'
+ } > "$stage/run-remote.ps1"
+}
+
+# ---- result join -----------------------------------------------------------
+# report_results MANIFEST BATCHOUT : join the VM's "<id> <rc>" lines with the
+# host manifest and emit pass/fail per case. expected is masked to 8 bits to
+# match POSIX exit semantics (Windows codes are small, so identical).
+report_results() {
+ local manifest="$1" batchout="$2" joined="$2.joined"
+ # The Windows VM returns CRLF lines; strip CR so the rc field stays numeric.
+ tr -d '\r' < "$batchout" > "$batchout.clean"
+ awk 'NR==FNR{rc[$1]=$2;next}{print $1, $2, $3, (($1) in rc ? rc[$1] : "?")}' \
+ "$batchout.clean" "$manifest" > "$joined"
+ local id expected label rc exp got
+ while read -r id expected label rc; do
+ exp=$((expected & 255))
+ # "?" = the VM emitted no line for this id; "LAUNCHFAIL" = the binary could
+ # not be started (e.g. a Defender block). Either way it did not run — FAIL,
+ # never silently scored against a stale/neighboring exit code.
+ case "$rc" in
+ ''|*[!0-9-]*)
+ kit_fail "$label" "binary did not run in VM (rc=$rc)"
+ continue ;;
+ esac
+ # The .toy oracle is a POSIX 8-bit exit status: cases may return values >255
+ # and rely on the kernel truncating to the low byte. FreeBSD/Linux already
+ # truncate, but Windows preserves the full 32-bit exit code, so mask both
+ # sides to compare on the same 8-bit oracle every other lane uses.
+ got=$(( rc & 255 ))
+ if [ "$got" -eq "$exp" ] 2>/dev/null; then
+ kit_pass "$label"
+ else
+ kit_fail "$label" "expected rc $exp, got $rc"
+ fi
+ done < "$joined"
+}
+
+# ---- FreeBSD lane ----------------------------------------------------------
+fbsd_vm() { "$ROOT/scripts/freebsd_vm.sh" "$@"; }
+
+fbsd_qemu_for() {
+ case "$1" in amd64) echo qemu-system-x86_64 ;; *) echo "qemu-system-$1" ;; esac
+}
+
+run_freebsd_arch() {
+ local arch="$1" triple sysroot stage qemu started=0 batch
+ triple="$(triple_for freebsd "$arch")"
+ if [ -z "$triple" ]; then kit_skip_na "toy/freebsd-$arch" "unknown arch"; return; fi
+ sysroot="$(sysroot_for freebsd "$arch")"
+ if [ -z "$sysroot" ] || [ ! -d "$sysroot/usr/include" ]; then
+ kit_skip "toy/freebsd-$arch" "missing sysroot (scripts/freebsd_sysroot.sh $arch)"; return; fi
+ case "$LINK" in
+ static|both) [ -f "$sysroot/usr/lib/libc.a" ] || {
+ kit_skip "toy/freebsd-$arch" "missing $sysroot/usr/lib/libc.a"; return; } ;;
+ esac
+ case "$LINK" in
+ dynamic|both) [ -f "$sysroot/lib/libc.so.7" ] || {
+ kit_skip "toy/freebsd-$arch" "missing $sysroot/lib/libc.so.7"; return; } ;;
+ esac
+
+ stage="$BUILD_DIR/freebsd/$arch"
+ stage_arch freebsd "$arch" "$triple" "$sysroot" "$stage"
+ [ "$STAGED_N" -eq 0 ] && return
+
+ qemu="$(fbsd_qemu_for "$arch")"
+ if ! command -v "$qemu" >/dev/null 2>&1; then
+ kit_skip "toy/freebsd-$arch" "$qemu missing (staged $STAGED_N binaries, not run)"; return; fi
+ if [ ! -f "$ROOT/build/freebsd-vm/images/freebsd-$arch.provisioned" ] &&
+ ! fbsd_vm ssh "$arch" true >/dev/null 2>&1; then
+ kit_skip "toy/freebsd-$arch" "VM not provisioned (scripts/freebsd_vm.sh prepare $arch)"; return; fi
+
+ if fbsd_vm ssh "$arch" true >/dev/null 2>&1; then
+ : # reuse a guest that is already up
+ else
+ fbsd_vm run "$arch" > "$stage/vm.log" 2>&1 &
+ echo "$!" > "$stage/vm.pid"; started=1
+ fi
+ if ! fbsd_vm wait-ssh "$arch" > "$stage/wait.log" 2>&1; then
+ kit_fail "toy/freebsd-$arch" "VM did not become reachable"
+ sed 's/^/ | /' "$stage/wait.log" | head -20
+ [ "$started" = 1 ] && fbsd_stop "$arch" "$stage"
+ return
+ fi
+
+ batch="$stage/batch.out"
+ if ! fbsd_vm run-batch "$arch" "$stage" > "$batch" 2> "$stage/batch.err"; then
+ kit_fail "toy/freebsd-$arch" "run-batch failed"
+ sed 's/^/ | /' "$stage/batch.err" | head -20
+ [ "$started" = 1 ] && fbsd_stop "$arch" "$stage"
+ return
+ fi
+ report_results "$stage/manifest" "$batch"
+ [ "$started" = 1 ] && fbsd_stop "$arch" "$stage"
+}
+
+fbsd_stop() {
+ local arch="$1" stage="$2" pid
+ [ "${KIT_TOY_VM_KEEP_UP:-0}" = 1 ] && return 0
+ fbsd_vm ssh "$arch" 'sync; shutdown -p now' >/dev/null 2>&1 || true
+ [ -f "$stage/vm.pid" ] || return 0
+ pid="$(cat "$stage/vm.pid")"
+ for _ in $(seq 1 30); do kill -0 "$pid" 2>/dev/null || return 0; sleep 1; done
+ kill "$pid" 2>/dev/null || true; sleep 1
+ kill -0 "$pid" 2>/dev/null && kill -9 "$pid" 2>/dev/null || true
+ wait "$pid" 2>/dev/null || true
+}
+
+run_freebsd() {
+ local arch
+ printf 'toy-vm freebsd: arches="%s" opts="%s" link=%s\n' \
+ "$FREEBSD_ARCHES" "$OPT_LEVELS" "$LINK"
+ for arch in $FREEBSD_ARCHES; do run_freebsd_arch "$arch"; done
+}
+
+# ---- Windows lane ----------------------------------------------------------
+win_vm() { "$ROOT/scripts/windows_vm.sh" "$@"; }
+
+run_windows() {
+ local arch triple sysroot stage started=0 any=0 batch
+ printf 'toy-vm windows: arches="%s" opts="%s"\n' "$WINDOWS_ARCHES" "$OPT_LEVELS"
+
+ # Stage every arch first (host-only work); only boot the VM if something was
+ # actually staged. One ARM64 VM serves both arches (x64 via Prism emulation).
+ for arch in $WINDOWS_ARCHES; do
+ triple="$(triple_for windows "$arch")"
+ if [ -z "$triple" ]; then kit_skip_na "toy/windows-$arch" "unknown arch"; continue; fi
+ sysroot="$(sysroot_for windows "$arch")"
+ if [ -z "$sysroot" ] || [ ! -r "$sysroot/include/windows.h" ] ||
+ [ ! -r "$sysroot/lib/libucrt.a" ]; then
+ kit_skip "toy/windows-$arch" "missing UCRT sysroot (scripts/llvm_mingw_sysroot.sh prepare $arch)"; continue; fi
+ stage="$BUILD_DIR/windows/$arch"
+ stage_arch windows "$arch" "$triple" "$sysroot" "$stage"
+ [ "$STAGED_N" -gt 0 ] && any=1
+ done
+ [ "$any" -eq 0 ] && return
+
+ if ! command -v "${KIT_WINDOWS_QEMU:-qemu-system-aarch64}" >/dev/null 2>&1; then
+ kit_skip "toy/windows" "qemu-system-aarch64 missing (staged, not run)"; return; fi
+
+ if win_vm ssh aarch64 ver >/dev/null 2>&1; then
+ : # reuse a running VM
+ else
+ if ! win_vm boot > "$BUILD_DIR/windows/boot.log" 2>&1; then
+ kit_skip "toy/windows" "VM unavailable ($(tail -n1 "$BUILD_DIR/windows/boot.log" 2>/dev/null))"; return; fi
+ started=1
+ fi
+ if ! win_vm wait-ssh 900 > "$BUILD_DIR/windows/wait.log" 2>&1; then
+ kit_fail "toy/windows" "VM did not become reachable"
+ sed 's/^/ | /' "$BUILD_DIR/windows/wait.log" | head -20
+ [ "$started" = 1 ] && win_stop
+ return
+ fi
+
+ for arch in $WINDOWS_ARCHES; do
+ stage="$BUILD_DIR/windows/$arch"
+ [ -f "$stage/manifest" ] && [ -s "$stage/manifest" ] || continue
+ batch="$stage/batch.out"
+ if ! win_vm run-batch "$arch" "$stage" > "$batch" 2> "$stage/batch.err"; then
+ kit_fail "toy/windows-$arch" "run-batch failed"
+ sed 's/^/ | /' "$stage/batch.err" | head -20
+ continue
+ fi
+ report_results "$stage/manifest" "$batch"
+ done
+ [ "$started" = 1 ] && win_stop
+}
+
+win_stop() {
+ [ "${KIT_TOY_VM_KEEP_UP:-0}" = 1 ] && return 0
+ win_vm stop >/dev/null 2>&1 || true
+}
+
+# ---- drive -----------------------------------------------------------------
+mkdir -p "$BUILD_DIR/freebsd" "$BUILD_DIR/windows"
+case "$OS" in
+ freebsd) run_freebsd ;;
+ windows) run_windows ;;
+esac
+
+# Skips (missing sysroot/VM, inapplicable cases) are non-fatal, matching
+# test/toy/run.sh — only a real FAIL gates the exit.
+KIT_SKIP_IS_FAILURE=0
+kit_summary "toy-vm-$OS"
+kit_exit