commit efe821c70ccf8844a0591a7ce0c2e0ef06d0c81c
parent 68782ef83e884f5df4fdc516ad044a14cc6f7d10
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 30 May 2026 13:16:22 -0700
test+asm: host-assembler execution lane over the Toy corpus (test-hostas-toy)
Prove cfree's `cc -S` is *standard* assembly a third-party assembler accepts
and that means the same thing, judged by EXECUTION (exit code), not by matching
bytes -- cfree and clang produce different code, so a byte/text match would be
meaningless.
Per Toy case (native target, -O0+-O1), emit ONE `cc -S` and feed it to two
assemblers, each linked with `cfree ld` and run, exit asserted against the case
oracle, so the assembler is the only variable:
cfree-as: cfree cc -S | cfree as | cfree ld | ./a.out (baseline)
clang-as: cfree cc -S | clang -c | cfree ld | ./b.out (the real test)
The cfree-as lane mirrors roundtrip_toy.sh's /ld lane (312 pass / 0 fail). The
clang-as lane is the point: a standard assembler can't paper over a private-
dialect quirk the way cfree's own `as` can (cf. test/asm/diff_llvm.sh, but by
execution).
It immediately surfaced a real bug roundtrip_toy.sh structurally can't catch:
on the native Mach-O target `cc -S` emits a broken hybrid -- Mach-O
section/symbol conventions (_main, .section __TEXT,__eh_frame) mixed with
ELF-only .type/.size directives and an @progbits type token inside the Mach-O
.section. cfree's own `as` accepts it (so roundtrip_toy.sh is green and blind to
it); clang/llvm-mc reject it ("unknown directive", "unexpected token in
'.section'"). So the clang-as lane is XFAIL (312 expected failures) and does NOT
gate by default -- the target exits 0 on cfree-as green alone. Run with
CFREE_HOSTAS_ENFORCE_CLANG=1 to make clang-as gate while fixing the printer; an
unexpected pass under the default reports XPASS as a reminder to flip
enforcement on once the Mach-O fix (in src/api/asm_emit.c) lands.
Opt-in (make test-hostas-toy; in TEST_TARGETS, not the default set); skips
cleanly when clang is absent. SKIP=141_threadlocal_mutate (the same TLS-
symbolization gap the round-trip lane quarantines). Doc in
doc/ASM_ROUNDTRIP_TESTING.md.
Diffstat:
3 files changed, 216 insertions(+), 0 deletions(-)
diff --git a/doc/ASM_ROUNDTRIP_TESTING.md b/doc/ASM_ROUNDTRIP_TESTING.md
@@ -180,6 +180,43 @@ Currently 269 agree, 34 reloc-equivalent, 0 differ over the corpus at -O0/-O1.
Opt-in; skips cleanly when `llvm-mc` is absent. The host carries the
aarch64/x86_64/riscv64 llvm tools.
+### Host-assembler execution lane (`test-hostas-toy`)
+
+The round-trip and llvm lanes either use cfree's *own* assembler or compare
+*bytes*. This lane instead proves `cc -S` is **standard assembly a third-party
+assembler accepts and that means the same thing**, judged by **execution**, not
+bytes (cfree and clang produce different code, so a byte/text match would be
+meaningless). For each Toy case (native target, `-O0`+`-O1`),
+`test/asm/hostas_toy.sh` emits **one** `cc -S` and feeds it to two assemblers,
+each linked with `cfree ld` and run, exit asserted against the case oracle —
+so the *assembler* is the only variable:
+
+- **`cfree-as` lane** (baseline): `cfree cc -S | cfree as | cfree ld | ./a.out`.
+ Mirrors `roundtrip_toy.sh`'s `/ld` lane; **312 pass / 0 fail**.
+- **`clang-as` lane** (the real test): `cfree cc -S | clang -c | cfree ld |
+ ./b.out`. A standard assembler can't paper over a private-dialect quirk the
+ way cfree's own `as` can.
+
+This immediately surfaced a real bug the round-trip lane structurally cannot
+catch: on the native **Mach-O** target `cc -S` emits a broken hybrid — Mach-O
+section/symbol conventions (`_main`, `.section __TEXT,__eh_frame`) mixed with
+**ELF-only** `.type`/`.size` directives and an `@progbits` type token inside the
+Mach-O `.section`. `cfree as` accepts it (so `roundtrip_toy.sh` is green and
+blind to it); clang/llvm-mc reject it (`unknown directive`, `unexpected token in
+'.section'`). So the `clang-as` lane is currently **XFAIL** (312 expected
+failures) and does **not** gate by default; the target exits 0 on
+`cfree-as` green alone. Run with `CFREE_HOSTAS_ENFORCE_CLANG=1` to make
+`clang-as` gate (use while fixing the printer); an unexpected pass under the
+default reports **XPASS** as a reminder to flip enforcement on once the fix
+lands. The fix belongs in the Mach-O path of `src/api/asm_emit.c` (guard the
+ELF-only `.type`/`.size`; drop the `@progbits` token under Mach-O); tracked as a
+follow-up. The same `141_threadlocal_mutate` TLS-symbolization skip applies.
+
+Opt-in (`make test-hostas-toy`); skips cleanly when `clang` is absent. ELF
+cross-targets (`aarch64/x86_64/riscv64-linux-gnu`) already assemble cleanly with
+clang/llvm-mc and can extend this lane to podman/qemu cross-execution (à la
+`test/toy/run.sh` path X) once the native Mach-O printer is fixed.
+
## Background — what cfree can do today (verified)
- **`cc -S` exists** and is *disassembly-to-text plus module scaffolding*:
diff --git a/test/asm/hostas_toy.sh b/test/asm/hostas_toy.sh
@@ -0,0 +1,166 @@
+#!/usr/bin/env bash
+# test/asm/hostas_toy.sh — prove cfree's `cc -S` is STANDARD assembly by feeding
+# the SAME `cc -S` output to two assemblers, linking + running both, and asserting
+# the toy exit-code oracle for each.
+#
+# Per toy case (native target; both O0 and O1):
+# cfree cc -S -> s.s (one native .s, shared by both)
+# A /cfree-as: cfree as s.s | cfree ld | ./a.out exit == oracle
+# B /clang-as: clang -c s.s | cfree ld | ./b.out exit == oracle
+#
+# The only difference between the two lanes is the ASSEMBLER, so this is a
+# controlled test of whether cfree's emitted asm is something a third-party
+# toolchain accepts and that means the same thing. Lane A is the baseline (cfree
+# both writes and reads the text, so a private-dialect quirk can hide). Lane B is
+# the real test: a standard assembler (clang) can't paper over such a quirk —
+# same idea as test/asm/diff_llvm.sh for the C corpus, but checked by EXECUTION
+# (exit code), not by matching bytes (cfree and clang produce different code, so
+# a byte/text match would be meaningless).
+#
+# KNOWN-RED today: on the native Mach-O target cfree's `cc -S` emits ELF-only
+# `.type`/`.size` directives and an `@progbits` type token inside Mach-O
+# `.section`s, which clang/llvm-mc reject while cfree's own `as` accepts them. So
+# the clang-as lane is EXPECTED to fail until that printer gap is fixed; those
+# failures report as XFAIL and do NOT fail this target by default. Set
+# CFREE_HOSTAS_ENFORCE_CLANG=1 to make the clang-as lane gate (use while fixing);
+# an unexpected clang-as pass under the default reports XPASS as a reminder to
+# flip enforcement on once the fix lands.
+
+set -u
+
+ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+CFREE="$ROOT/build/cfree"
+CASES="$ROOT/test/toy/cases"
+WORK="$ROOT/build/test/asm/hostas_toy"
+OPTS="${CFREE_TEST_OPTS:-O0 O1}"
+FILTER="${1:-}"
+ENFORCE_CLANG="${CFREE_HOSTAS_ENFORCE_CLANG:-0}"
+
+# Cases blocked on a separate, known cc -S symbolizer gap (the round-trip lane
+# quarantines the same set): 141 emits an unsymbolized `adrp x,0x0` for a
+# thread-local access (TLS symbolization, tracked separately).
+SKIP="141_threadlocal_mutate"
+
+CLANG="${CLANG:-$(command -v clang 2>/dev/null || true)}"
+
+color_red() { printf '\033[31m%s\033[0m' "$1"; }
+color_grn() { printf '\033[32m%s\033[0m' "$1"; }
+color_yel() { printf '\033[33m%s\033[0m' "$1"; }
+
+if [ ! -x "$CFREE" ]; then
+ printf 'hostas-toy: %s cfree missing — run "make bin"\n' "$(color_red FATAL)" >&2
+ exit 1
+fi
+if [ -z "$CLANG" ] || [ ! -x "$CLANG" ]; then
+ printf 'hostas-toy: %s no clang (host assembler); skipping\n' "$(color_yel SKIP)"
+ exit 0
+fi
+mkdir -p "$WORK"
+
+is_skip() { case " $SKIP " in *" $1 "*) return 0;; *) return 1;; esac; }
+
+# First meaningful diagnostic from an assembler's stderr. clang prints a harmless
+# `-Wmissing-sysroot` warning first (we never link with clang, so the SDK is
+# irrelevant), so prefer the first real `error:` line over a blind `head -1`.
+err_reason() {
+ local f="$1" line=""
+ line=$(grep -m1 -E 'error:' "$f" 2>/dev/null | sed 's|.*error: *||')
+ [ -z "$line" ] && line=$(grep -m1 -E 'unknown directive|unknown section type' "$f" 2>/dev/null | sed 's|^[[:space:]]*||')
+ [ -z "$line" ] && line=$(head -1 "$f" 2>/dev/null | sed 's|.*: ||')
+ printf '%s' "$line"
+}
+
+a_pass=0; a_fail=0
+b_pass=0; b_xfail=0; b_xpass=0; b_efail=0
+skip=0
+a_failnames=()
+
+printf 'hostas-toy: cfree=%s\n' "$CFREE"
+printf 'hostas-toy: clang=%s opts="%s" enforce_clang=%s\n' "$CLANG" "$OPTS" "$ENFORCE_CLANG"
+
+shopt -s nullglob
+for src in "$CASES"/*.toy; do
+ name="$(basename "$src" .toy)"
+ [ -n "$FILTER" ] && [[ "$name" != *"$FILTER"* ]] && continue
+ if is_skip "$name"; then
+ skip=$((skip+1)); printf ' %s %s — known cc -S symbolizer gap\n' "$(color_yel SKIP)" "$name"; continue
+ fi
+ exp=0; [ -f "$CASES/$name.expected" ] && exp=$(head -n1 "$CASES/$name.expected")
+ exp=$((exp & 255))
+ for opt in $OPTS; do
+ w="$WORK/$name/$opt"; rm -rf "$w"; mkdir -p "$w"
+
+ # One native cc -S, shared by both lanes.
+ if ! "$CFREE" cc -S "-$opt" "$src" -o "$w/s.s" 2>"$w/ccs.err"; then
+ a_fail=$((a_fail+1)); a_failnames+=("$name[-$opt] cc-S: $(head -1 "$w/ccs.err"|sed 's|.*: ||')")
+ printf ' %s %s[-%s] cc -S failed: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/ccs.err"|sed 's|.*: ||')"
+ continue
+ fi
+
+ # Lane A: cfree as -> cfree ld -> run.
+ if ! "$CFREE" as "$w/s.s" -o "$w/a.o" 2>"$w/a.as.err"; then
+ a_fail=$((a_fail+1)); a_failnames+=("$name[-$opt]/cfree-as: $(head -1 "$w/a.as.err"|sed 's|.*: ||')")
+ printf ' %s %s[-%s]/cfree-as: as: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/a.as.err"|sed 's|.*: ||')"
+ elif ! "$CFREE" ld "$w/a.o" -o "$w/a.out" 2>"$w/a.ld.err" || [ -s "$w/a.ld.err" ]; then
+ a_fail=$((a_fail+1)); a_failnames+=("$name[-$opt]/cfree-as ld: $(head -1 "$w/a.ld.err"|sed 's|.*: ||')")
+ printf ' %s %s[-%s]/cfree-as: ld: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/a.ld.err"|sed 's|.*: ||')"
+ else
+ chmod +x "$w/a.out" 2>/dev/null || true
+ "$w/a.out" >"$w/a.out.txt" 2>"$w/a.run.err"; arc=$?
+ if [ -s "$w/a.run.err" ]; then
+ a_fail=$((a_fail+1)); a_failnames+=("$name[-$opt]/cfree-as run stderr")
+ printf ' %s %s[-%s]/cfree-as: run stderr: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/a.run.err")"
+ elif [ "$arc" -eq "$exp" ]; then
+ a_pass=$((a_pass+1))
+ else
+ a_fail=$((a_fail+1)); a_failnames+=("$name[-$opt]/cfree-as exit $arc != $exp")
+ printf ' %s %s[-%s]/cfree-as: exit %d != %d\n' "$(color_red FAIL)" "$name" "$opt" "$arc" "$exp"
+ fi
+ fi
+
+ # Lane B: clang -c (third-party assembler) -> cfree ld -> run.
+ # cfree ld on both lanes isolates the assembler as the only variable.
+ b_ok=0; b_reason=""
+ if ! "$CLANG" -c "$w/s.s" -o "$w/b.o" 2>"$w/b.as.err"; then
+ b_reason="clang as: $(err_reason "$w/b.as.err")"
+ elif ! "$CFREE" ld "$w/b.o" -o "$w/b.out" 2>"$w/b.ld.err" || [ -s "$w/b.ld.err" ]; then
+ b_reason="cfree ld: $(head -1 "$w/b.ld.err"|sed 's|.*: ||')"
+ else
+ chmod +x "$w/b.out" 2>/dev/null || true
+ "$w/b.out" >"$w/b.out.txt" 2>"$w/b.run.err"; brc=$?
+ if [ -s "$w/b.run.err" ]; then b_reason="run stderr: $(head -1 "$w/b.run.err")"
+ elif [ "$brc" -eq "$exp" ]; then b_ok=1
+ else b_reason="exit $brc != $exp"; fi
+ fi
+ if [ "$b_ok" -eq 1 ]; then
+ if [ "$ENFORCE_CLANG" = "1" ]; then
+ b_pass=$((b_pass+1)); printf ' %s %s[-%s]/clang-as\n' "$(color_grn PASS)" "$name" "$opt"
+ else
+ b_xpass=$((b_xpass+1)); printf ' %s %s[-%s]/clang-as — now passes; set CFREE_HOSTAS_ENFORCE_CLANG=1\n' "$(color_grn XPASS)" "$name" "$opt"
+ fi
+ else
+ if [ "$ENFORCE_CLANG" = "1" ]; then
+ b_efail=$((b_efail+1)); printf ' %s %s[-%s]/clang-as: %s\n' "$(color_red FAIL)" "$name" "$opt" "$b_reason"
+ else
+ b_xfail=$((b_xfail+1)); printf ' %s %s[-%s]/clang-as: %s\n' "$(color_yel XFAIL)" "$name" "$opt" "$b_reason"
+ fi
+ fi
+ done
+done
+shopt -u nullglob
+
+printf '\n'
+[ "${#a_failnames[@]}" -gt 0 ] && { printf 'cfree-as failures:\n'; for f in "${a_failnames[@]}"; do printf ' %s\n' "$f"; done; }
+printf 'hostas-toy: cfree-as %d pass, %d fail | clang-as %d pass, %d xfail, %d xpass, %d efail | %d skip\n' \
+ "$a_pass" "$a_fail" "$b_pass" "$b_xfail" "$b_xpass" "$b_efail" "$skip"
+if [ "$ENFORCE_CLANG" != "1" ] && [ "$b_xfail" -gt 0 ]; then
+ printf 'hostas-toy: clang-as is XFAIL (expected red) — native Mach-O cc -S emits ELF-only .type/.size/@progbits clang rejects; fix tracked separately. Run with CFREE_HOSTAS_ENFORCE_CLANG=1 to gate it.\n'
+fi
+if [ "$ENFORCE_CLANG" != "1" ] && [ "$b_xpass" -gt 0 ]; then
+ printf 'hostas-toy: clang-as had %d XPASS — the asm gap looks fixed; set CFREE_HOSTAS_ENFORCE_CLANG=1 to enforce.\n' "$b_xpass"
+fi
+
+rc=0
+[ "$a_fail" -gt 0 ] && rc=1
+[ "$ENFORCE_CLANG" = "1" ] && [ "$b_efail" -gt 0 ] && rc=1
+exit $rc
diff --git a/test/test.mk b/test/test.mk
@@ -42,6 +42,7 @@ TEST_TARGETS = \
test-asm-roundtrip-exec \
test-asm-symmetry \
test-asm-roundtrip-toy \
+ test-hostas-toy \
test-diff-llvm \
test-bounce \
test-cbackend \
@@ -733,6 +734,18 @@ test-diff-llvm: bin
test-asm-roundtrip-toy: bin
@bash test/asm/roundtrip_toy.sh
+# test-hostas-toy: feed one native `cc -S` to BOTH cfree's own `as` and clang (a
+# third-party host assembler), link + run each, and assert the toy exit-code
+# oracle. Only the assembler differs between the two lanes, so the clang lane is
+# the real test: a standard assembler can't paper over a private-dialect quirk
+# the way cfree's own `as` can (cf. test/asm/diff_llvm.sh, but by execution).
+# The clang lane is currently XFAIL (native Mach-O cc -S emits ELF-only
+# .type/.size and an @progbits token inside Mach-O .sections, which clang
+# rejects) and does not gate by default; run with CFREE_HOSTAS_ENFORCE_CLANG=1
+# to gate it while fixing. Opt-in; skips cleanly if clang is absent.
+test-hostas-toy: bin
+ @bash test/asm/hostas_toy.sh
+
test-wasm: test-wasm-front test-wasm-target test-wasm-toy
test-wasm-front: bin $(WASM_TOOL) $(LINK_EXE_RUNNER) $(JIT_RUNNER)