commit 7c7a63e8d3974ae74e72eecf861f92915a74b352
parent aed06e272cc04f04a687906e11e9c1b8165d28df
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 4 May 2026 07:12:53 -0700
tcc 3 stage harness
Diffstat:
4 files changed, 109 insertions(+), 39 deletions(-)
diff --git a/Makefile b/Makefile
@@ -21,8 +21,13 @@
# make tcc-gcc same flatten output, built with stock gcc
# (sanity check; ARCH in {aarch64, amd64})
# make tcc-tcc second-stage tcc: tcc-boot2 compiles
-# tcc.flat.c into a self-built tcc; the
-# tcc-cc / tcc-libc suites use this
+# tcc.flat.c into a self-built tcc
+# (ARCH in {aarch64, amd64, riscv64})
+# make tcc-tcc-tcc third-stage tcc: tcc-tcc compiles
+# tcc.flat.c. README endpoint —
+# `(define tcc (tcc1 tcc.c))`. The
+# tcc-cc / tcc-libc suites run twice,
+# stage 2 then stage 3
# (ARCH in {aarch64, amd64, riscv64})
# make test every suite, every arch
# make test SUITE=m1pp m1pp suite, every arch
@@ -92,7 +97,8 @@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \
# --- Targets --------------------------------------------------------------
.PHONY: all m1pp hex2pp scheme1 cc test image tools tables \
- tools-native cloc clean help tcc-boot2 tcc-flat tcc-gcc tcc-tcc
+ tools-native cloc clean help tcc-boot2 tcc-flat tcc-gcc \
+ tcc-tcc tcc-tcc-tcc
all: m1pp hex2pp
@@ -430,7 +436,17 @@ TCC_LIBC_LIBC := $(TCC_LIBC_DIR)/libc.o
# fixtures through tcc-tcc, not tcc-boot2 — so a regression in
# cc.scm's emitted code surfaces as a tcc-tcc misbehavior on a
# fixture, and the test set spans tcc compiling itself.
-TCC_TCC_BIN := build/$(ARCH)/tcc-tcc/tcc-tcc
+TCC_TCC_BIN := build/$(ARCH)/tcc-tcc/tcc-tcc
+
+# tcc-tcc-tcc: third-stage tcc. tcc-tcc compiles tcc.flat.c through
+# the same boot-build-tcc-tcc.sh recipe (parameterized on the input
+# compiler) into a fresh binary. This is the README's
+# `(define tcc (tcc1 tcc.c))` — the first tcc whose machine code was
+# emitted by an actual tcc rather than cc.scm. The tcc-cc / tcc-libc
+# suites run twice: once through tcc-tcc (stage 2), once through
+# tcc-tcc-tcc (stage 3). Bootstrap fixed-point check: if the two
+# stages diverge on any fixture, the codegen is non-idempotent.
+TCC_TCC_TCC_BIN := build/$(ARCH)/tcc-tcc-tcc/tcc-tcc-tcc
$(TCC_CC_START): tcc-cc/$(ARCH)/start.S $(TCC_ASM_DEPS)
mkdir -p $(@D)
@@ -523,6 +539,24 @@ $(TCC_TCC_BIN): scripts/boot-build-tcc-tcc.sh \
$(call PODMAN,$(ARCH)) \
sh scripts/boot-build-tcc-tcc.sh $@
+# --- tcc-tcc-tcc: third-stage tcc ----------------------------------------
+#
+# Same recipe as tcc-tcc, but the input compiler is tcc-tcc rather
+# than tcc-boot2. boot-build-tcc-tcc.sh accepts the compiler as its
+# second arg; lib-arm64.o / va_list.o are rebuilt by the new compiler
+# into $(@D), so each stage owns its own helpers.
+tcc-tcc-tcc: $(TCC_TCC_TCC_BIN)
+
+$(TCC_TCC_TCC_BIN): scripts/boot-build-tcc-tcc.sh \
+ $(TCC_FLAT) tcc-libc/va_list_shim.h \
+ $(TCC_TCC_BIN) \
+ $(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) \
+ $(TCC_LIBC_MEM) $(TCC_LIBC_LIBC) \
+ build/$(ARCH)/.image
+ mkdir -p $(@D)
+ $(call PODMAN,$(ARCH)) \
+ sh scripts/boot-build-tcc-tcc.sh $@ $(TCC_TCC_BIN)
+
# --- Native tools (opt-in dev-loop helpers) -------------------------------
NATIVE_TOOLS := build/native-tools/M1 build/native-tools/hex2 \
@@ -609,10 +643,11 @@ TEST_CC_LIBC_DEPS := $(TEST_CC_DEPS) \
P1/entry-libc.P1pp P1/elf-end.P1pp
TEST_TCC_CC_DEPS := build/$(ARCH)/.image \
- $(TCC_TCC_BIN) $(TCC_CC_START) $(TCC_CC_MEM) $(TCC_CC_VA_LIST)
+ $(TCC_TCC_BIN) $(TCC_TCC_TCC_BIN) \
+ $(TCC_CC_START) $(TCC_CC_MEM) $(TCC_CC_VA_LIST)
TEST_TCC_LIBC_DEPS := build/$(ARCH)/.image \
- $(TCC_TCC_BIN) \
+ $(TCC_TCC_BIN) $(TCC_TCC_TCC_BIN) \
$(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) $(TCC_LIBC_MEM) $(TCC_LIBC_LIBC) \
$(TCC_CC_VA_LIST)
@@ -653,13 +688,19 @@ else ifeq ($(SUITE),tcc-cc)
echo "tcc-cc supports ARCH in {$(TCC_HARNESS_ARCHES)} only (got '$(ARCH)')" >&2; exit 2; \
fi
@$(MAKE) --no-print-directory ARCH=$(ARCH) $(TEST_TCC_CC_DEPS)
- sh scripts/run-tests.sh --suite=tcc-cc --arch=$(ARCH) $(NAMES)
+ @s2=0; s3=0; \
+ sh scripts/run-tests.sh --suite=tcc-cc --arch=$(ARCH) --stage=2 $(NAMES) || s2=$$?; \
+ sh scripts/run-tests.sh --suite=tcc-cc --arch=$(ARCH) --stage=3 $(NAMES) || s3=$$?; \
+ [ $$s2 -eq 0 ] && [ $$s3 -eq 0 ]
else ifeq ($(SUITE),tcc-libc)
@if [ -z "$(filter $(ARCH),$(TCC_HARNESS_ARCHES))" ]; then \
echo "tcc-libc supports ARCH in {$(TCC_HARNESS_ARCHES)} only (got '$(ARCH)')" >&2; exit 2; \
fi
@$(MAKE) --no-print-directory ARCH=$(ARCH) $(TEST_TCC_LIBC_DEPS)
- sh scripts/run-tests.sh --suite=tcc-libc --arch=$(ARCH) $(NAMES)
+ @s2=0; s3=0; \
+ sh scripts/run-tests.sh --suite=tcc-libc --arch=$(ARCH) --stage=2 $(NAMES) || s2=$$?; \
+ sh scripts/run-tests.sh --suite=tcc-libc --arch=$(ARCH) --stage=3 $(NAMES) || s3=$$?; \
+ [ $$s2 -eq 0 ] && [ $$s3 -eq 0 ]
else
@echo "unknown SUITE='$(SUITE)' (m1pp | p1 | scheme1 | cc-util | cc-lex | cc-pp | cc-cg | cc | cc-libc | cc-ext | tcc-cc | tcc-libc)" >&2; exit 2
endif
diff --git a/scripts/boot-build-tcc-tcc.sh b/scripts/boot-build-tcc-tcc.sh
@@ -1,8 +1,15 @@
#!/bin/sh
-## boot-build-tcc-tcc.sh — second-stage tcc.
+## boot-build-tcc-tcc.sh — next-stage tcc, parametrized by the
+## compiler that does the building.
##
-## tcc-boot2 (the cc.scm-built tcc) compiles tcc.flat.c into a fresh
-## tcc binary, linked against the same libc.o / mem.o / sys_stubs.o /
+## Drives one stage of the README's tcc compilation chain:
+## tcc0 = cc.scm compiles tcc.flat.c -> tcc-boot2 (compile 1)
+## tcc1 = tcc-boot2 compiles tcc.flat.c -> tcc-tcc (compile 2)
+## tcc2 = tcc-tcc compiles tcc.flat.c -> tcc-tcc-tcc (compile 3)
+##
+## The compiler binary is supplied as the optional second positional
+## arg (default: build/$ARCH/tcc-boot2/tcc-boot2 — i.e. compile 2).
+## Output is linked against the same libc.o / mem.o / sys_stubs.o /
## start.o the tcc-libc suite uses, plus per-target libtcc1 helpers:
## - aarch64 / riscv64: lib-arm64.c (soft-float TFmode helpers —
## __addtf3 / __extenddftf2 / …; libgcc-equivalent — same source
@@ -16,20 +23,19 @@
## lib/Makefile). Long double on amd64 is x87 80-bit and tcc
## emits native FPU instructions, so no soft-float helper is
## needed.
-## The result — tcc-tcc — is the
-## "twice-compiled" tcc: stage 1 was cc.scm compiling tcc; stage 2 is
-## tcc compiling tcc. Both are bit-distinct from each other but
-## functionally equivalent; the tcc-cc / tcc-libc suites use tcc-tcc
-## as their reference compiler.
+## Helpers are rebuilt by $CC into the same dirname as $OUT so each
+## stage owns its own lib-arm64.o / va_list.o.
##
## Env: ARCH in {aarch64, amd64, riscv64}
-## Usage: boot-build-tcc-tcc.sh <out>
+## Usage: boot-build-tcc-tcc.sh <out> [<cc>]
set -eu
: "${ARCH:?ARCH must be set}"
-[ "$#" -eq 1 ] || { echo "usage: ARCH=<arch> $0 <out>" >&2; exit 2; }
+[ "$#" -ge 1 ] && [ "$#" -le 2 ] \
+ || { echo "usage: ARCH=<arch> $0 <out> [<cc>]" >&2; exit 2; }
OUT=$1
+CC=${2:-build/$ARCH/tcc-boot2/tcc-boot2}
case "$ARCH" in
aarch64) TCC_TARGET=ARM64; LIB_TARGET_DEFINES="-D TCC_TARGET_ARM64=1 -D TCC_TARGET_ARM=1" ;;
@@ -38,7 +44,6 @@ case "$ARCH" in
*) echo "boot-build-tcc-tcc.sh: unsupported ARCH '$ARCH'" >&2; exit 2 ;;
esac
-TCC_BOOT2=build/$ARCH/tcc-boot2/tcc-boot2
TCC_INC=build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/include
TCC_FLAT=build/tcc/$TCC_TARGET/tcc.flat.c
LIBC_O=build/$ARCH/tcc-libc/libc.o
@@ -61,7 +66,7 @@ if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "riscv64" ]; then
# in lib/Makefile); the file gates the arm64-specific cache-flush
# wrapper on !__riscv.
# shellcheck disable=SC2086 # LIB_TARGET_DEFINES is intentionally word-split.
- "$TCC_BOOT2" -nostdlib -I "$TCC_INC" \
+ "$CC" -nostdlib -I "$TCC_INC" \
-D HAVE_CONFIG_H=1 $LIB_TARGET_DEFINES \
-c -o "$WORK/lib-arm64.o" "$TCC_LIB_DIR/lib-arm64.c"
LIB_OBJS=$WORK/lib-arm64.o
@@ -70,14 +75,14 @@ elif [ "$ARCH" = "amd64" ]; then
# lowers va_start / va_arg to direct calls into these intrinsics
# (see tcc/include/stdarg.h, lib/va_list.c). Without them the
# tcc-tcc link fails with undefined symbols.
- "$TCC_BOOT2" -nostdlib -I "$TCC_INC" \
+ "$CC" -nostdlib -I "$TCC_INC" \
-D TCC_TARGET_X86_64=1 \
-c -o "$WORK/va_list.o" "$TCC_LIB_DIR/va_list.c"
LIB_OBJS=$WORK/va_list.o
fi
-# Compile + link tcc-tcc in one tcc-boot2 invocation.
+# Compile + link the next-stage tcc in one $CC invocation.
# shellcheck disable=SC2086 # $LIB_OBJS is intentionally word-split (may be empty).
-"$TCC_BOOT2" -nostdlib -I "$TCC_INC" -include "$SHIM" \
+"$CC" -nostdlib -I "$TCC_INC" -include "$SHIM" \
"$START_O" "$SYS_O" "$MEM_O" "$LIBC_O" $LIB_OBJS \
"$TCC_FLAT" -o "$OUT"
diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh
@@ -595,10 +595,13 @@ run_cc_ext_suite() {
## --- tcc-cc suite -------------------------------------------------------
##
-## Runs the plain tests/cc fixtures through tcc-tcc — the second-stage
-## tcc, built by tcc-boot2 (which itself was built by cc.scm). The
-## Makefile target tcc-tcc supplies the binary; start.o / mem.o come
-## from the tcc-cc tree (cross-asm and tcc-boot2-built respectively).
+## Runs the plain tests/cc fixtures through a self-built tcc. STAGE
+## selects the compiler — STAGE=2 uses tcc-tcc (twice-compiled, built
+## by tcc-boot2 which was itself built by cc.scm), STAGE=3 uses
+## tcc-tcc-tcc (thrice-compiled, built by tcc-tcc — the README
+## endpoint, the first tcc whose machine code an actual tcc emitted).
+## start.o / mem.o come from the tcc-cc tree (cross-asm and
+## tcc-boot2-built respectively); they don't change between stages.
run_tcc_cc_suite() {
case "$ARCH" in
aarch64) tcc_target=ARM64; tcc_banner='AArch64' ;;
@@ -611,7 +614,15 @@ run_tcc_cc_suite() {
;;
esac
- tcc=build/$ARCH/tcc-tcc/tcc-tcc
+ case "${STAGE:-2}" in
+ 2) tcc=build/$ARCH/tcc-tcc/tcc-tcc; stage_tag=stage2 ;;
+ 3) tcc=build/$ARCH/tcc-tcc-tcc/tcc-tcc-tcc; stage_tag=stage3 ;;
+ *)
+ echo " FAIL [$ARCH] tcc-cc"
+ echo " unknown STAGE='$STAGE' (expected 2 or 3)" >&2
+ return
+ ;;
+ esac
start=build/$ARCH/tcc-cc/start.o
mem=build/$ARCH/tcc-cc/mem.o
tcc_include=build/tcc/$tcc_target/tcc-0.9.26-1147-gee75a10c/include
@@ -663,9 +674,9 @@ run_tcc_cc_suite() {
expexit=0
fi
- elf=build/$ARCH/tests/tcc-cc/$name
- workdir=build/$ARCH/.work/tests/tcc-cc/$name
- label="[$ARCH] tcc-cc/$name"
+ elf=build/$ARCH/tests/tcc-cc/$stage_tag/$name
+ workdir=build/$ARCH/.work/tests/tcc-cc/$stage_tag/$name
+ label="[$ARCH] tcc-cc[$stage_tag]/$name"
mkdir -p "$(dirname "$elf")" "$workdir"
tcc_log=$workdir/tcc.log
@@ -690,10 +701,11 @@ run_tcc_cc_suite() {
## --- tcc-libc suite -----------------------------------------------------
##
-## End-to-end "tcc as a real compiler" check, run through tcc-tcc — the
-## twice-compiled tcc (cc.scm built tcc-boot2; tcc-boot2 built tcc-tcc).
-## tcc-boot2 already compiled mes-libc into libc.o; for each tests/cc-libc
-## fixture, tcc-tcc compiles + links the fixture against
+## End-to-end "tcc as a real compiler" check, run through a self-built
+## tcc. STAGE selects the compiler — STAGE=2 uses tcc-tcc (twice-
+## compiled), STAGE=3 uses tcc-tcc-tcc (thrice-compiled, README
+## endpoint). tcc-boot2 already compiled mes-libc into libc.o; for each
+## tests/cc-libc fixture, the selected tcc compiles + links it against
## start.o per-arch entry stub: __libc_init then main then exit
## sys_stubs.o per-arch raw-syscall sys_* implementations
## mem.o mem* compiler-builtin runtime (memcpy/memmove/memset/memcmp)
@@ -711,7 +723,15 @@ run_tcc_libc_suite() {
;;
esac
- tcc=build/$ARCH/tcc-tcc/tcc-tcc
+ case "${STAGE:-2}" in
+ 2) tcc=build/$ARCH/tcc-tcc/tcc-tcc; stage_tag=stage2 ;;
+ 3) tcc=build/$ARCH/tcc-tcc-tcc/tcc-tcc-tcc; stage_tag=stage3 ;;
+ *)
+ echo " FAIL [$ARCH] tcc-libc"
+ echo " unknown STAGE='$STAGE' (expected 2 or 3)" >&2
+ return
+ ;;
+ esac
start=build/$ARCH/tcc-libc/start.o
sys_stubs=build/$ARCH/tcc-libc/sys_stubs.o
mem=build/$ARCH/tcc-libc/mem.o
@@ -757,9 +777,9 @@ run_tcc_libc_suite() {
expexit=0
fi
- elf=build/$ARCH/tests/tcc-libc/$name
- workdir=build/$ARCH/.work/tests/tcc-libc/$name
- label="[$ARCH] tcc-libc/$name"
+ elf=build/$ARCH/tests/tcc-libc/$stage_tag/$name
+ workdir=build/$ARCH/.work/tests/tcc-libc/$stage_tag/$name
+ label="[$ARCH] tcc-libc[$stage_tag]/$name"
mkdir -p "$(dirname "$elf")" "$workdir"
tcc_log=$workdir/tcc.log
diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh
@@ -47,6 +47,7 @@ set -eu
SUITE=
ARCH=
NAMES=
+STAGE=
while [ "$#" -gt 0 ]; do
case "$1" in
@@ -54,6 +55,8 @@ while [ "$#" -gt 0 ]; do
--suite=*) SUITE=${1#--suite=} ;;
--arch) shift; ARCH=$1 ;;
--arch=*) ARCH=${1#--arch=} ;;
+ --stage) shift; STAGE=$1 ;;
+ --stage=*) STAGE=${1#--stage=} ;;
--) shift; while [ "$#" -gt 0 ]; do NAMES="$NAMES $1"; shift; done; break ;;
-*) echo "$0: unknown flag '$1'" >&2; exit 2 ;;
*) NAMES="$NAMES $1" ;;
@@ -86,6 +89,7 @@ run_in_container() {
-e "ARCH=$arch" \
-e "CC_TRACE_EMIT=${CC_TRACE_EMIT:-0}" \
-e "CC_DEBUG=${CC_DEBUG:-0}" \
+ -e "STAGE=${STAGE:-}" \
-v "$REPO":/work -w /work \
"boot2-busybox:$arch" "$@"
}