boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 7c7a63e8d3974ae74e72eecf861f92915a74b352
parent aed06e272cc04f04a687906e11e9c1b8165d28df
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon,  4 May 2026 07:12:53 -0700

tcc 3 stage harness

Diffstat:
MMakefile | 57+++++++++++++++++++++++++++++++++++++++++++++++++--------
Mscripts/boot-build-tcc-tcc.sh | 35++++++++++++++++++++---------------
Mscripts/boot-run-tests.sh | 52++++++++++++++++++++++++++++++++++++----------------
Mscripts/run-tests.sh | 4++++
4 files changed, 109 insertions(+), 39 deletions(-)

diff --git a/Makefile b/Makefile @@ -21,8 +21,13 @@ # make tcc-gcc same flatten output, built with stock gcc # (sanity check; ARCH in {aarch64, amd64}) # make tcc-tcc second-stage tcc: tcc-boot2 compiles -# tcc.flat.c into a self-built tcc; the -# tcc-cc / tcc-libc suites use this +# tcc.flat.c into a self-built tcc +# (ARCH in {aarch64, amd64, riscv64}) +# make tcc-tcc-tcc third-stage tcc: tcc-tcc compiles +# tcc.flat.c. README endpoint — +# `(define tcc (tcc1 tcc.c))`. The +# tcc-cc / tcc-libc suites run twice, +# stage 2 then stage 3 # (ARCH in {aarch64, amd64, riscv64}) # make test every suite, every arch # make test SUITE=m1pp m1pp suite, every arch @@ -92,7 +97,8 @@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \ # --- Targets -------------------------------------------------------------- .PHONY: all m1pp hex2pp scheme1 cc test image tools tables \ - tools-native cloc clean help tcc-boot2 tcc-flat tcc-gcc tcc-tcc + tools-native cloc clean help tcc-boot2 tcc-flat tcc-gcc \ + tcc-tcc tcc-tcc-tcc all: m1pp hex2pp @@ -430,7 +436,17 @@ TCC_LIBC_LIBC := $(TCC_LIBC_DIR)/libc.o # fixtures through tcc-tcc, not tcc-boot2 — so a regression in # cc.scm's emitted code surfaces as a tcc-tcc misbehavior on a # fixture, and the test set spans tcc compiling itself. -TCC_TCC_BIN := build/$(ARCH)/tcc-tcc/tcc-tcc +TCC_TCC_BIN := build/$(ARCH)/tcc-tcc/tcc-tcc + +# tcc-tcc-tcc: third-stage tcc. tcc-tcc compiles tcc.flat.c through +# the same boot-build-tcc-tcc.sh recipe (parameterized on the input +# compiler) into a fresh binary. This is the README's +# `(define tcc (tcc1 tcc.c))` — the first tcc whose machine code was +# emitted by an actual tcc rather than cc.scm. The tcc-cc / tcc-libc +# suites run twice: once through tcc-tcc (stage 2), once through +# tcc-tcc-tcc (stage 3). Bootstrap fixed-point check: if the two +# stages diverge on any fixture, the codegen is non-idempotent. +TCC_TCC_TCC_BIN := build/$(ARCH)/tcc-tcc-tcc/tcc-tcc-tcc $(TCC_CC_START): tcc-cc/$(ARCH)/start.S $(TCC_ASM_DEPS) mkdir -p $(@D) @@ -523,6 +539,24 @@ $(TCC_TCC_BIN): scripts/boot-build-tcc-tcc.sh \ $(call PODMAN,$(ARCH)) \ sh scripts/boot-build-tcc-tcc.sh $@ +# --- tcc-tcc-tcc: third-stage tcc ---------------------------------------- +# +# Same recipe as tcc-tcc, but the input compiler is tcc-tcc rather +# than tcc-boot2. boot-build-tcc-tcc.sh accepts the compiler as its +# second arg; lib-arm64.o / va_list.o are rebuilt by the new compiler +# into $(@D), so each stage owns its own helpers. +tcc-tcc-tcc: $(TCC_TCC_TCC_BIN) + +$(TCC_TCC_TCC_BIN): scripts/boot-build-tcc-tcc.sh \ + $(TCC_FLAT) tcc-libc/va_list_shim.h \ + $(TCC_TCC_BIN) \ + $(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) \ + $(TCC_LIBC_MEM) $(TCC_LIBC_LIBC) \ + build/$(ARCH)/.image + mkdir -p $(@D) + $(call PODMAN,$(ARCH)) \ + sh scripts/boot-build-tcc-tcc.sh $@ $(TCC_TCC_BIN) + # --- Native tools (opt-in dev-loop helpers) ------------------------------- NATIVE_TOOLS := build/native-tools/M1 build/native-tools/hex2 \ @@ -609,10 +643,11 @@ TEST_CC_LIBC_DEPS := $(TEST_CC_DEPS) \ P1/entry-libc.P1pp P1/elf-end.P1pp TEST_TCC_CC_DEPS := build/$(ARCH)/.image \ - $(TCC_TCC_BIN) $(TCC_CC_START) $(TCC_CC_MEM) $(TCC_CC_VA_LIST) + $(TCC_TCC_BIN) $(TCC_TCC_TCC_BIN) \ + $(TCC_CC_START) $(TCC_CC_MEM) $(TCC_CC_VA_LIST) TEST_TCC_LIBC_DEPS := build/$(ARCH)/.image \ - $(TCC_TCC_BIN) \ + $(TCC_TCC_BIN) $(TCC_TCC_TCC_BIN) \ $(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) $(TCC_LIBC_MEM) $(TCC_LIBC_LIBC) \ $(TCC_CC_VA_LIST) @@ -653,13 +688,19 @@ else ifeq ($(SUITE),tcc-cc) echo "tcc-cc supports ARCH in {$(TCC_HARNESS_ARCHES)} only (got '$(ARCH)')" >&2; exit 2; \ fi @$(MAKE) --no-print-directory ARCH=$(ARCH) $(TEST_TCC_CC_DEPS) - sh scripts/run-tests.sh --suite=tcc-cc --arch=$(ARCH) $(NAMES) + @s2=0; s3=0; \ + sh scripts/run-tests.sh --suite=tcc-cc --arch=$(ARCH) --stage=2 $(NAMES) || s2=$$?; \ + sh scripts/run-tests.sh --suite=tcc-cc --arch=$(ARCH) --stage=3 $(NAMES) || s3=$$?; \ + [ $$s2 -eq 0 ] && [ $$s3 -eq 0 ] else ifeq ($(SUITE),tcc-libc) @if [ -z "$(filter $(ARCH),$(TCC_HARNESS_ARCHES))" ]; then \ echo "tcc-libc supports ARCH in {$(TCC_HARNESS_ARCHES)} only (got '$(ARCH)')" >&2; exit 2; \ fi @$(MAKE) --no-print-directory ARCH=$(ARCH) $(TEST_TCC_LIBC_DEPS) - sh scripts/run-tests.sh --suite=tcc-libc --arch=$(ARCH) $(NAMES) + @s2=0; s3=0; \ + sh scripts/run-tests.sh --suite=tcc-libc --arch=$(ARCH) --stage=2 $(NAMES) || s2=$$?; \ + sh scripts/run-tests.sh --suite=tcc-libc --arch=$(ARCH) --stage=3 $(NAMES) || s3=$$?; \ + [ $$s2 -eq 0 ] && [ $$s3 -eq 0 ] else @echo "unknown SUITE='$(SUITE)' (m1pp | p1 | scheme1 | cc-util | cc-lex | cc-pp | cc-cg | cc | cc-libc | cc-ext | tcc-cc | tcc-libc)" >&2; exit 2 endif diff --git a/scripts/boot-build-tcc-tcc.sh b/scripts/boot-build-tcc-tcc.sh @@ -1,8 +1,15 @@ #!/bin/sh -## boot-build-tcc-tcc.sh — second-stage tcc. +## boot-build-tcc-tcc.sh — next-stage tcc, parametrized by the +## compiler that does the building. ## -## tcc-boot2 (the cc.scm-built tcc) compiles tcc.flat.c into a fresh -## tcc binary, linked against the same libc.o / mem.o / sys_stubs.o / +## Drives one stage of the README's tcc compilation chain: +## tcc0 = cc.scm compiles tcc.flat.c -> tcc-boot2 (compile 1) +## tcc1 = tcc-boot2 compiles tcc.flat.c -> tcc-tcc (compile 2) +## tcc2 = tcc-tcc compiles tcc.flat.c -> tcc-tcc-tcc (compile 3) +## +## The compiler binary is supplied as the optional second positional +## arg (default: build/$ARCH/tcc-boot2/tcc-boot2 — i.e. compile 2). +## Output is linked against the same libc.o / mem.o / sys_stubs.o / ## start.o the tcc-libc suite uses, plus per-target libtcc1 helpers: ## - aarch64 / riscv64: lib-arm64.c (soft-float TFmode helpers — ## __addtf3 / __extenddftf2 / …; libgcc-equivalent — same source @@ -16,20 +23,19 @@ ## lib/Makefile). Long double on amd64 is x87 80-bit and tcc ## emits native FPU instructions, so no soft-float helper is ## needed. -## The result — tcc-tcc — is the -## "twice-compiled" tcc: stage 1 was cc.scm compiling tcc; stage 2 is -## tcc compiling tcc. Both are bit-distinct from each other but -## functionally equivalent; the tcc-cc / tcc-libc suites use tcc-tcc -## as their reference compiler. +## Helpers are rebuilt by $CC into the same dirname as $OUT so each +## stage owns its own lib-arm64.o / va_list.o. ## ## Env: ARCH in {aarch64, amd64, riscv64} -## Usage: boot-build-tcc-tcc.sh <out> +## Usage: boot-build-tcc-tcc.sh <out> [<cc>] set -eu : "${ARCH:?ARCH must be set}" -[ "$#" -eq 1 ] || { echo "usage: ARCH=<arch> $0 <out>" >&2; exit 2; } +[ "$#" -ge 1 ] && [ "$#" -le 2 ] \ + || { echo "usage: ARCH=<arch> $0 <out> [<cc>]" >&2; exit 2; } OUT=$1 +CC=${2:-build/$ARCH/tcc-boot2/tcc-boot2} case "$ARCH" in aarch64) TCC_TARGET=ARM64; LIB_TARGET_DEFINES="-D TCC_TARGET_ARM64=1 -D TCC_TARGET_ARM=1" ;; @@ -38,7 +44,6 @@ case "$ARCH" in *) echo "boot-build-tcc-tcc.sh: unsupported ARCH '$ARCH'" >&2; exit 2 ;; esac -TCC_BOOT2=build/$ARCH/tcc-boot2/tcc-boot2 TCC_INC=build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/include TCC_FLAT=build/tcc/$TCC_TARGET/tcc.flat.c LIBC_O=build/$ARCH/tcc-libc/libc.o @@ -61,7 +66,7 @@ if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "riscv64" ]; then # in lib/Makefile); the file gates the arm64-specific cache-flush # wrapper on !__riscv. # shellcheck disable=SC2086 # LIB_TARGET_DEFINES is intentionally word-split. - "$TCC_BOOT2" -nostdlib -I "$TCC_INC" \ + "$CC" -nostdlib -I "$TCC_INC" \ -D HAVE_CONFIG_H=1 $LIB_TARGET_DEFINES \ -c -o "$WORK/lib-arm64.o" "$TCC_LIB_DIR/lib-arm64.c" LIB_OBJS=$WORK/lib-arm64.o @@ -70,14 +75,14 @@ elif [ "$ARCH" = "amd64" ]; then # lowers va_start / va_arg to direct calls into these intrinsics # (see tcc/include/stdarg.h, lib/va_list.c). Without them the # tcc-tcc link fails with undefined symbols. - "$TCC_BOOT2" -nostdlib -I "$TCC_INC" \ + "$CC" -nostdlib -I "$TCC_INC" \ -D TCC_TARGET_X86_64=1 \ -c -o "$WORK/va_list.o" "$TCC_LIB_DIR/va_list.c" LIB_OBJS=$WORK/va_list.o fi -# Compile + link tcc-tcc in one tcc-boot2 invocation. +# Compile + link the next-stage tcc in one $CC invocation. # shellcheck disable=SC2086 # $LIB_OBJS is intentionally word-split (may be empty). -"$TCC_BOOT2" -nostdlib -I "$TCC_INC" -include "$SHIM" \ +"$CC" -nostdlib -I "$TCC_INC" -include "$SHIM" \ "$START_O" "$SYS_O" "$MEM_O" "$LIBC_O" $LIB_OBJS \ "$TCC_FLAT" -o "$OUT" diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh @@ -595,10 +595,13 @@ run_cc_ext_suite() { ## --- tcc-cc suite ------------------------------------------------------- ## -## Runs the plain tests/cc fixtures through tcc-tcc — the second-stage -## tcc, built by tcc-boot2 (which itself was built by cc.scm). The -## Makefile target tcc-tcc supplies the binary; start.o / mem.o come -## from the tcc-cc tree (cross-asm and tcc-boot2-built respectively). +## Runs the plain tests/cc fixtures through a self-built tcc. STAGE +## selects the compiler — STAGE=2 uses tcc-tcc (twice-compiled, built +## by tcc-boot2 which was itself built by cc.scm), STAGE=3 uses +## tcc-tcc-tcc (thrice-compiled, built by tcc-tcc — the README +## endpoint, the first tcc whose machine code an actual tcc emitted). +## start.o / mem.o come from the tcc-cc tree (cross-asm and +## tcc-boot2-built respectively); they don't change between stages. run_tcc_cc_suite() { case "$ARCH" in aarch64) tcc_target=ARM64; tcc_banner='AArch64' ;; @@ -611,7 +614,15 @@ run_tcc_cc_suite() { ;; esac - tcc=build/$ARCH/tcc-tcc/tcc-tcc + case "${STAGE:-2}" in + 2) tcc=build/$ARCH/tcc-tcc/tcc-tcc; stage_tag=stage2 ;; + 3) tcc=build/$ARCH/tcc-tcc-tcc/tcc-tcc-tcc; stage_tag=stage3 ;; + *) + echo " FAIL [$ARCH] tcc-cc" + echo " unknown STAGE='$STAGE' (expected 2 or 3)" >&2 + return + ;; + esac start=build/$ARCH/tcc-cc/start.o mem=build/$ARCH/tcc-cc/mem.o tcc_include=build/tcc/$tcc_target/tcc-0.9.26-1147-gee75a10c/include @@ -663,9 +674,9 @@ run_tcc_cc_suite() { expexit=0 fi - elf=build/$ARCH/tests/tcc-cc/$name - workdir=build/$ARCH/.work/tests/tcc-cc/$name - label="[$ARCH] tcc-cc/$name" + elf=build/$ARCH/tests/tcc-cc/$stage_tag/$name + workdir=build/$ARCH/.work/tests/tcc-cc/$stage_tag/$name + label="[$ARCH] tcc-cc[$stage_tag]/$name" mkdir -p "$(dirname "$elf")" "$workdir" tcc_log=$workdir/tcc.log @@ -690,10 +701,11 @@ run_tcc_cc_suite() { ## --- tcc-libc suite ----------------------------------------------------- ## -## End-to-end "tcc as a real compiler" check, run through tcc-tcc — the -## twice-compiled tcc (cc.scm built tcc-boot2; tcc-boot2 built tcc-tcc). -## tcc-boot2 already compiled mes-libc into libc.o; for each tests/cc-libc -## fixture, tcc-tcc compiles + links the fixture against +## End-to-end "tcc as a real compiler" check, run through a self-built +## tcc. STAGE selects the compiler — STAGE=2 uses tcc-tcc (twice- +## compiled), STAGE=3 uses tcc-tcc-tcc (thrice-compiled, README +## endpoint). tcc-boot2 already compiled mes-libc into libc.o; for each +## tests/cc-libc fixture, the selected tcc compiles + links it against ## start.o per-arch entry stub: __libc_init then main then exit ## sys_stubs.o per-arch raw-syscall sys_* implementations ## mem.o mem* compiler-builtin runtime (memcpy/memmove/memset/memcmp) @@ -711,7 +723,15 @@ run_tcc_libc_suite() { ;; esac - tcc=build/$ARCH/tcc-tcc/tcc-tcc + case "${STAGE:-2}" in + 2) tcc=build/$ARCH/tcc-tcc/tcc-tcc; stage_tag=stage2 ;; + 3) tcc=build/$ARCH/tcc-tcc-tcc/tcc-tcc-tcc; stage_tag=stage3 ;; + *) + echo " FAIL [$ARCH] tcc-libc" + echo " unknown STAGE='$STAGE' (expected 2 or 3)" >&2 + return + ;; + esac start=build/$ARCH/tcc-libc/start.o sys_stubs=build/$ARCH/tcc-libc/sys_stubs.o mem=build/$ARCH/tcc-libc/mem.o @@ -757,9 +777,9 @@ run_tcc_libc_suite() { expexit=0 fi - elf=build/$ARCH/tests/tcc-libc/$name - workdir=build/$ARCH/.work/tests/tcc-libc/$name - label="[$ARCH] tcc-libc/$name" + elf=build/$ARCH/tests/tcc-libc/$stage_tag/$name + workdir=build/$ARCH/.work/tests/tcc-libc/$stage_tag/$name + label="[$ARCH] tcc-libc[$stage_tag]/$name" mkdir -p "$(dirname "$elf")" "$workdir" tcc_log=$workdir/tcc.log diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh @@ -47,6 +47,7 @@ set -eu SUITE= ARCH= NAMES= +STAGE= while [ "$#" -gt 0 ]; do case "$1" in @@ -54,6 +55,8 @@ while [ "$#" -gt 0 ]; do --suite=*) SUITE=${1#--suite=} ;; --arch) shift; ARCH=$1 ;; --arch=*) ARCH=${1#--arch=} ;; + --stage) shift; STAGE=$1 ;; + --stage=*) STAGE=${1#--stage=} ;; --) shift; while [ "$#" -gt 0 ]; do NAMES="$NAMES $1"; shift; done; break ;; -*) echo "$0: unknown flag '$1'" >&2; exit 2 ;; *) NAMES="$NAMES $1" ;; @@ -86,6 +89,7 @@ run_in_container() { -e "ARCH=$arch" \ -e "CC_TRACE_EMIT=${CC_TRACE_EMIT:-0}" \ -e "CC_DEBUG=${CC_DEBUG:-0}" \ + -e "STAGE=${STAGE:-}" \ -v "$REPO":/work -w /work \ "boot2-busybox:$arch" "$@" }