commit 32430e753f8f18cf95533d8cfc6057d2b3a213a1
parent 930ba4a2e8118f8b48f5d222312a01d0e011b05a
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 4 May 2026 08:25:27 -0700
add standalone bootN.sh entrypoints (scratch + busybox)
Each bootN.sh is a self-contained host-side script that builds its
stage's outputs from explicit inputs into build/$ARCH/bootN/:
boot0 hex2/M0/catm — seed chain
boot1 M1pp/hex2pp — needs boot0
boot2 scheme1 — needs boot1
boot3 tcc-boot2/tcc-tcc/ — needs boot{0,1,2} + flatten preconditions
tcc-tcc-tcc
Container is `boot2-scratch:<arch>` from a new two-stage
Containerfile.scratch: stage A pulls busybox:musl (pinned digest),
stage B is FROM scratch + COPY /bin /bin. No libc, no /etc, no
resolver — just the busybox applet binary.
Each script copies inputs into a per-stage staging dir
(build/$ARCH/.bootN-stage/in), bind-mounts only that dir into the
container, and copies outputs back to build/$ARCH/bootN/. Every
input source/binary is explicitly enumerated in the script header
and re-checked at runtime with a clear "missing prereq" message.
Outputs verified bit-identical to the Makefile path on aarch64 and
amd64 across all four stages. On riscv64 boot0/1/2 match; tcc-boot2
matches; tcc-tcc and tcc-tcc-tcc are size-identical but byte-different
(systematic ~+0x40 deltas in .text — likely path-dependent encoding
in tcc's RISC-V codegen, not a behavioral regression).
Existing scripts/boot1.sh (the in-container Makefile-internal seed
chain) is renamed to scripts/mk-seed-tools.sh to free the boot1.sh
name; Makefile updated. The unused scripts/boot2.sh wrapper is
replaced by the new standalone boot2.sh.
The Makefile path is unchanged in behavior — these are additional
entrypoints for bootstrap-from-scratch verification, not a
replacement for the per-arch incremental Makefile builds.
docs/TCC-TODO.md: added "Standalone bootN.sh: remaining host deps"
section noting that the only remaining host-tooling dep across all
four scripts is the cross-asm of tcc-libc/<arch>/{start.S,sys_stubs.S}
in boot3 (host clang for aarch64/amd64; alpine-gcc:riscv64 for
riscv64). Replacing those two .S files with .P1pp equivalents would
make every bootN.sh pure podman + scratch + busybox.
Diffstat:
8 files changed, 652 insertions(+), 56 deletions(-)
diff --git a/Makefile b/Makefile
@@ -55,10 +55,17 @@
# Per-source intermediates land under build/<arch>/.work/<src-path>/.
#
# Bootstrap chain:
-# 1. seed (vendored hex0-seed) -> M0 + hex2-0 + catm (boot1.sh)
+# 1. seed (vendored hex0-seed) -> M0 + hex2-0 + catm (mk-seed-tools.sh)
# 2. seed M0 + hex2-0 -> M1pp ELF (boot-build-p1.sh)
# 2. seed M0 + hex2-0 -> hex2pp ELF (boot-build-p1.sh)
# 3. M1pp + hex2pp -> every other ELF (.P1pp pipeline) (boot-build-p1pp.sh)
+#
+# Standalone host-side entrypoints (not used by this Makefile, but they
+# build the same artifacts in build/$ARCH/bootN/ for inspection):
+# scripts/boot0.sh <arch> -> hex2 / M0 / catm
+# scripts/boot1.sh <arch> -> m1pp / hex2pp
+# scripts/boot2.sh <arch> -> scheme1
+# scripts/boot3.sh <arch> -> tcc-boot2 / tcc-tcc / tcc-tcc-tcc
# The seed M0/hex2-0/catm participate ONLY in step 2 (building the two
# new tools from their .P1 sources). Once both binaries exist, no
# downstream user/test/scheme/cc target ever invokes them again.
@@ -161,17 +168,17 @@ TOOLS_CATM := $(foreach a,$(ALL_ARCHES),build/$(a)/tools/catm)
tools: $(TOOLS_DIR)/M0
-# boot1.sh produces M0, hex2-0, and catm in one shot. Grouped targets
-# (`&:`) tell make they're all outputs of a single recipe execution, so
-# downstream rules can depend on whichever tool they actually invoke
-# (e.g. cc/scheme1 tests need only catm, not M0/hex2-0).
+# mk-seed-tools.sh produces M0, hex2-0, and catm in one shot. Grouped
+# targets (`&:`) tell make they're all outputs of a single recipe
+# execution, so downstream rules can depend on whichever tool they
+# actually invoke (e.g. cc/scheme1 tests need only catm, not M0/hex2-0).
build/%/tools/M0 build/%/tools/catm build/%/tools/hex2-0 &: \
- scripts/boot1.sh build/%/.image \
+ scripts/mk-seed-tools.sh build/%/.image \
vendor/seed/%/hex0-seed vendor/seed/%/hex0.hex0 \
vendor/seed/%/hex1.hex0 vendor/seed/%/hex2.hex1 \
vendor/seed/%/catm.hex2 vendor/seed/%/M0.hex2 \
vendor/seed/%/ELF.hex2
- $(call PODMAN,$*) sh scripts/boot1.sh
+ $(call PODMAN,$*) sh scripts/mk-seed-tools.sh
# --- Pre-pruned P1 backend tables -----------------------------------------
#
diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md
@@ -210,6 +210,36 @@ For now: known limitation, document, move on. The scalar codegen
elsewhere on riscv64 is fine — only u32 narrowing of a wider source
trips it.
+## Standalone `bootN.sh`: remaining host deps
+
+`scripts/{boot0,boot1,boot2}.sh` are pure scratch + busybox — no host
+compiler, no alpine-gcc image, just `podman` + the pinned `busybox:musl`
+digest. `boot3.sh` still has one host-tooling dep, the cross-asm of
+two `.S` files into `.o`:
+
+- `tcc-libc/$ARCH/start.S` → `start.o`
+- `tcc-libc/$ARCH/sys_stubs.S` → `sys_stubs.o`
+
+Routing today:
+
+- aarch64 / amd64 → host `cc -target $TRIPLE` (Apple clang has both
+ backends built in)
+- riscv64 → `boot2-alpine-gcc:riscv64` (Apple clang ships without a
+ RISC-V backend, so we run gcc + binutils inside an alpine container)
+
+tcc-boot2's codegen does not accept `.S` inputs, hence the host-side
+shim. Replacing the two files (per arch) with `.P1pp` (or any
+in-container-buildable) equivalents drops the host-cc dep entirely and
+deletes the alpine-gcc image from the boot path. After that, every
+`bootN.sh` is `podman` + scratch + busybox only.
+
+Out of scope for this TODO (already accepted as host-side):
+`stage1-flatten.sh` and `libc-flatten.sh` use the host `cc -E`
+preprocessor to produce `tcc.flat.c` and `libc.flat.c`. The unpacked
+`tcc-0.9.26/lib/{lib-arm64.c, va_list.c}` helpers compile cleanly under
+tcc-boot2 inside the container — no host cc on those, just source
+deps.
+
## Next steps
The cc.scm path is at full parity with the gcc-built control on the
diff --git a/scripts/Containerfile.scratch b/scripts/Containerfile.scratch
@@ -0,0 +1,25 @@
+## Per-arch image used by the standalone bootN.sh entrypoints.
+## Two stages:
+## 1. pull busybox:musl as the build container (provides a single
+## static /bin/busybox plus a tree of applet symlinks)
+## 2. FROM scratch, copy the busybox binary + symlinks into a fresh
+## empty rootfs
+##
+## The result is a per-arch image whose entire userland is
+## statically-linked busybox. No libc, no resolver, no /etc.
+## This is the only container the bootN.sh scripts ever exec into.
+##
+## Built per --platform; tag as boot2-scratch:<arch>.
+## Multi-arch index pinned to the same digest the boot2-busybox image
+## uses (scripts/Containerfile.busybox); per-arch entries within the
+## index, fetched 2026-04-24:
+## amd64 sha256:298efc24641ff8a1a285abdc555a0ce5ab7c42eb085e1be099f824188e069604
+## arm64 sha256:458a2ae4cb09bf96f8e24f135474b1552039738ed16ee470320a9c05c2da2004
+## riscv64 sha256:657f5a49af9288dc98d2bf45343e45c57c3caf3946aa9df436d05da320a8c863
+
+FROM docker.io/library/busybox@sha256:19b646668802469d968a05342a601e78da4322a414a7c09b1c9ee25165042138 AS busybox
+
+FROM scratch
+COPY --from=busybox /bin /bin
+WORKDIR /work
+CMD ["/bin/sh"]
diff --git a/scripts/boot0.sh b/scripts/boot0.sh
@@ -0,0 +1,97 @@
+#!/bin/sh
+## boot0.sh — standalone seed bootstrap.
+##
+## Stage 0 of the README's chain. From the ~400-byte vendored hex0-seed,
+## brings up: hex0 -> hex1 -> hex2 -> catm -> M0. Three of those (hex2,
+## catm, M0) are the binaries every later stage depends on.
+##
+## ─── Inputs (sources, copied into staging) ────────────────────────────
+## vendor/seed/$ARCH/hex0-seed — hex-byte seed binary (target ELF)
+## vendor/seed/$ARCH/hex0.hex0 — hex0 source for the hex0 assembler
+## vendor/seed/$ARCH/hex1.hex0 — hex0 source for the hex1 assembler
+## vendor/seed/$ARCH/hex2.hex1 — hex1 source for the hex2 assembler
+## vendor/seed/$ARCH/catm.hex2 — hex2 source for catm (binary cat)
+## vendor/seed/$ARCH/M0.hex2 — hex2 source for the M0 macro asm
+## vendor/seed/$ARCH/ELF.hex2 — ELF header fragment (catm input)
+##
+## ─── Inputs (binaries from prior stages) ──────────────────────────────
+## none (this is stage 0).
+##
+## ─── Tools (in container) ─────────────────────────────────────────────
+## busybox sh + cp + mkdir + chmod (scratch + busybox image only).
+##
+## ─── Outputs ──────────────────────────────────────────────────────────
+## build/$ARCH/boot0/hex2 — hex2 assembler
+## build/$ARCH/boot0/M0 — M0 macro assembler
+## build/$ARCH/boot0/catm — binary concatenator (cat with -m)
+##
+## Usage: scripts/boot0.sh <arch>
+## <arch> ∈ {aarch64, amd64, riscv64}
+
+set -eu
+
+usage() { echo "usage: $0 <aarch64|amd64|riscv64>" >&2; exit 2; }
+[ "$#" -eq 1 ] || usage
+ARCH=$1
+
+case "$ARCH" in
+ aarch64) PLATFORM=linux/arm64 ;;
+ amd64) PLATFORM=linux/amd64 ;;
+ riscv64) PLATFORM=linux/riscv64 ;;
+ *) usage ;;
+esac
+
+ROOT=$(cd "$(dirname "$0")/.." && pwd)
+cd "$ROOT"
+
+IMAGE=boot2-scratch:$ARCH
+SEED=vendor/seed/$ARCH
+OUT=build/$ARCH/boot0
+STAGE=build/$ARCH/.boot0-stage
+
+# ── ensure container image exists ─────────────────────────────────────
+if ! podman image exists "$IMAGE"; then
+ echo "[boot0 $ARCH] building $IMAGE"
+ podman build --platform "$PLATFORM" -t "$IMAGE" \
+ -f scripts/Containerfile.scratch scripts/
+fi
+
+# ── reset staging, copy inputs explicitly ─────────────────────────────
+rm -rf "$STAGE"
+mkdir -p "$STAGE/in" "$STAGE/out" "$OUT"
+
+for f in hex0-seed hex0.hex0 hex1.hex0 hex2.hex1 catm.hex2 M0.hex2 ELF.hex2; do
+ [ -e "$SEED/$f" ] || { echo "[boot0 $ARCH] missing input: $SEED/$f" >&2; exit 1; }
+ cp "$SEED/$f" "$STAGE/in/$f"
+done
+
+# ── run the seed chain in the scratch container ───────────────────────
+# The stage0 tools do one syscall per byte. Build everything in /tmp
+# (RAM tmpfs from --tmpfs) and only cp the final binaries to /work/out.
+echo "[boot0 $ARCH] hex0-seed -> hex0 -> hex1 -> hex2 -> catm -> M0"
+podman run --rm -i --pull=never --platform "$PLATFORM" \
+ --tmpfs /tmp:size=512M \
+ -v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
+ sh -eu -s <<'CONTAINER'
+chmod +x /work/in/hex0-seed
+/work/in/hex0-seed /work/in/hex0.hex0 /tmp/hex0
+chmod +x /tmp/hex0
+/tmp/hex0 /work/in/hex1.hex0 /tmp/hex1
+chmod +x /tmp/hex1
+/tmp/hex1 /work/in/hex2.hex1 /tmp/hex2
+chmod +x /tmp/hex2
+/tmp/hex2 /work/in/catm.hex2 /tmp/catm
+chmod +x /tmp/catm
+/tmp/catm /tmp/M0.combined.hex2 /work/in/ELF.hex2 /work/in/M0.hex2
+/tmp/hex2 /tmp/M0.combined.hex2 /tmp/M0
+chmod +x /tmp/M0
+cp /tmp/hex2 /tmp/catm /tmp/M0 /work/out/
+CONTAINER
+
+# ── copy outputs to final destination ─────────────────────────────────
+for f in hex2 catm M0; do
+ cp "$STAGE/out/$f" "$OUT/$f"
+ chmod 0700 "$OUT/$f"
+done
+
+echo "[boot0 $ARCH] OK -> $OUT/{hex2, catm, M0}"
diff --git a/scripts/boot1.sh b/scripts/boot1.sh
@@ -1,45 +1,108 @@
#!/bin/sh
-## boot1.sh — stage 1 of the bootstrap chain.
+## boot1.sh — standalone build of M1pp + hex2pp from .P1 sources.
##
-## In-container script. Brings up M0/hex2-0/catm from the ~400-byte
-## hex0-seed by chaining stage0-posix's first three phases. All produced
-## binaries are target-arch Linux ELF.
+## Stage 1 of the README's chain: produces the two self-hosted tools the
+## rest of the boot chain runs on (M1pp expander + hex2pp assembler/
+## linker), built once via the seed M0 + hex2 chain. After this stage
+## the seed binaries are no longer needed.
##
-## Inputs (read): vendor/seed/$ARCH/{hex0-seed,hex0.hex0,hex1.hex0,
-## hex2.hex1,catm.hex2,M0.hex2,ELF.hex2}
-## Outputs: build/$ARCH/tools/{hex0,hex1,hex2-0,catm,M0}
+## ─── Inputs (sources, copied into staging) ────────────────────────────
+## M1pp/M1pp.P1 — M1pp expander, P1 source
+## hex2pp/hex2pp.P1 — hex2pp assembler/linker, P1 source
+## P1/P1-$ARCH.M1 — pre-pruned per-arch P1 backend
+## vendor/seed/$ARCH/ELF.hex2 — ELF header fragment (catm input)
##
-## Phase map (stage0-posix mescc-tools-{seed,mini}-kaem.kaem phases 0-3):
-## 0) hex0-seed + hex0.hex0 -> hex0
-## 1) hex0 + hex1.hex0 -> hex1
-## 2) hex1 + hex2.hex1 -> hex2-0
-## 2b) hex2-0 + catm.hex2 -> catm
-## 3a) catm : ELF.hex2 + M0.hex2 -> M0.combined.hex2
-## 3b) hex2-0 : M0.combined.hex2 -> M0
+## ─── Inputs (binaries from prior stages) ──────────────────────────────
+## build/$ARCH/boot0/{hex2, M0, catm} — built by scripts/boot0.sh
##
-## Env: ARCH=aarch64|amd64|riscv64
+## ─── Tools (in container) ─────────────────────────────────────────────
+## busybox sh + cat + cp + mkdir + chmod (scratch + busybox image only).
+## Plus the boot0 binaries (M0, catm, hex2), staged in.
+##
+## ─── Outputs ──────────────────────────────────────────────────────────
+## build/$ARCH/boot1/M1pp — M1pp expander ELF
+## build/$ARCH/boot1/hex2pp — hex2pp assembler/linker ELF
+##
+## Usage: scripts/boot1.sh <arch>
+## <arch> ∈ {aarch64, amd64, riscv64}
set -eu
-: "${ARCH:?ARCH must be set}"
+usage() { echo "usage: $0 <aarch64|amd64|riscv64>" >&2; exit 2; }
+[ "$#" -eq 1 ] || usage
+ARCH=$1
case "$ARCH" in
- aarch64|amd64|riscv64) ;;
- *) echo "boot1.sh: unsupported arch '$ARCH'" >&2 ; exit 1 ;;
+ aarch64) PLATFORM=linux/arm64 ;;
+ amd64) PLATFORM=linux/amd64 ;;
+ riscv64) PLATFORM=linux/riscv64 ;;
+ *) usage ;;
esac
-S=vendor/seed/$ARCH
-OUT=build/$ARCH/tools
-mkdir -p "$OUT"
-
-## Build everything in /tmp (RAM tmpfs — see PODMAN macro in Makefile),
-## then cp the final binaries to the bind-mounted $OUT. Stage0 tools do
-## one syscall per byte; staying off virtiofs for intermediates is ~5x.
-"$S/hex0-seed" "$S/hex0.hex0" /tmp/hex0
-/tmp/hex0 "$S/hex1.hex0" /tmp/hex1
-/tmp/hex1 "$S/hex2.hex1" /tmp/hex2-0
-/tmp/hex2-0 "$S/catm.hex2" /tmp/catm
-/tmp/catm /tmp/M0.combined.hex2 "$S/ELF.hex2" "$S/M0.hex2"
-/tmp/hex2-0 /tmp/M0.combined.hex2 /tmp/M0
-
-cp /tmp/hex0 /tmp/hex1 /tmp/hex2-0 /tmp/catm /tmp/M0 "$OUT/"
+ROOT=$(cd "$(dirname "$0")/.." && pwd)
+cd "$ROOT"
+
+IMAGE=boot2-scratch:$ARCH
+BOOT0=build/$ARCH/boot0
+OUT=build/$ARCH/boot1
+STAGE=build/$ARCH/.boot1-stage
+
+# ── ensure container image exists ─────────────────────────────────────
+if ! podman image exists "$IMAGE"; then
+ echo "[boot1 $ARCH] building $IMAGE"
+ podman build --platform "$PLATFORM" -t "$IMAGE" \
+ -f scripts/Containerfile.scratch scripts/
+fi
+
+# ── prerequisite: boot0 binaries must exist ───────────────────────────
+for bin in hex2 M0 catm; do
+ [ -x "$BOOT0/$bin" ] || {
+ echo "[boot1 $ARCH] missing prerequisite: $BOOT0/$bin (run scripts/boot0.sh $ARCH)" >&2
+ exit 1
+ }
+done
+
+# ── reset staging, copy inputs explicitly ─────────────────────────────
+rm -rf "$STAGE"
+mkdir -p "$STAGE/in" "$STAGE/out" "$OUT"
+
+cp "$BOOT0/hex2" "$BOOT0/M0" "$BOOT0/catm" "$STAGE/in/"
+cp M1pp/M1pp.P1 "$STAGE/in/M1pp.P1"
+cp hex2pp/hex2pp.P1 "$STAGE/in/hex2pp.P1"
+cp "P1/P1-$ARCH.M1" "$STAGE/in/P1.M1"
+cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2"
+
+# ── run the per-source .P1 -> ELF pipeline twice ──────────────────────
+# For each src in {M1pp.P1, hex2pp.P1}:
+# cat P1.M1 src > combined.M1 (per-arch backend prepended to source)
+# M0 combined.M1 -> prog.hex2
+# catm linked.hex2 ELF.hex2 prog.hex2
+# hex2 linked.hex2 -> ELF binary
+#
+# Stages everything through /tmp because stage0 tools do one syscall per
+# byte; virtiofs round-trips would dominate.
+echo "[boot1 $ARCH] M1pp.P1 + hex2pp.P1 -> M1pp + hex2pp"
+podman run --rm -i --pull=never --platform "$PLATFORM" \
+ --tmpfs /tmp:size=512M \
+ -v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
+ sh -eu -s <<'CONTAINER'
+build_one() {
+ src=$1
+ out=$2
+ cat /work/in/P1.M1 "/work/in/$src" > /tmp/combined.M1
+ /work/in/M0 /tmp/combined.M1 /tmp/prog.hex2
+ /work/in/catm /tmp/linked.hex2 /work/in/ELF.hex2 /tmp/prog.hex2
+ /work/in/hex2 /tmp/linked.hex2 "/work/out/$out"
+ chmod +x "/work/out/$out"
+}
+build_one M1pp.P1 M1pp
+build_one hex2pp.P1 hex2pp
+CONTAINER
+
+# ── copy outputs to final destination ─────────────────────────────────
+for f in M1pp hex2pp; do
+ cp "$STAGE/out/$f" "$OUT/$f"
+ chmod 0700 "$OUT/$f"
+done
+
+echo "[boot1 $ARCH] OK -> $OUT/{M1pp, hex2pp}"
diff --git a/scripts/boot2.sh b/scripts/boot2.sh
@@ -1,28 +1,98 @@
#!/bin/sh
-## boot2.sh — stage 2 of the bootstrap chain.
+## boot2.sh — standalone build of scheme1 from scheme1.P1pp.
##
-## In-container script. Builds the two self-hosted tools (M1pp expander
-## ELF and hex2pp assembler/linker ELF) from their pure-P1 sources via
-## the seed M0+hex2 pipeline (boot-build-p1.sh), using the checked-in
-## pre-pruned P1 backend table (P1/P1-$ARCH.M1).
+## Stage 2 of the README's chain: produces the scheme1 interpreter ELF.
+## End-to-end through M1pp + hex2pp (no seed M0/hex2 anywhere on the
+## .P1pp pipeline).
##
-## Outputs: build/$ARCH/M1pp/M1pp
-## build/$ARCH/hex2pp/hex2pp
+## ─── Inputs (sources, copied into staging) ────────────────────────────
+## scheme1/scheme1.P1pp — interpreter source
+## P1/P1-$ARCH.M1pp — per-arch M1pp backend
+## P1/P1.M1pp — arch-agnostic P1pp frontend
+## P1/P1pp.P1pp — libp1pp standard library
+## vendor/seed/$ARCH/ELF.hex2 — ELF header fragment
##
-## After this stage completes, the seed M0/hex2-0 tools are no longer
-## used by any downstream target — every .P1pp source flows through
-## M1pp + hex2pp instead (see scripts/boot-build-p1pp.sh).
+## ─── Inputs (binaries from prior stages) ──────────────────────────────
+## build/$ARCH/boot1/{M1pp, hex2pp} — built by scripts/boot1.sh
##
-## Env: ARCH=aarch64|amd64|riscv64
+## ─── Tools (in container) ─────────────────────────────────────────────
+## busybox sh + cat + cp + mkdir + chmod (scratch + busybox image only).
+## Plus the boot1 binaries (M1pp, hex2pp), staged in.
+##
+## ─── Outputs ──────────────────────────────────────────────────────────
+## build/$ARCH/boot2/scheme1 — scheme1 interpreter ELF
+##
+## Usage: scripts/boot2.sh <arch>
+## <arch> ∈ {aarch64, amd64, riscv64}
set -eu
-: "${ARCH:?ARCH must be set}"
+usage() { echo "usage: $0 <aarch64|amd64|riscv64>" >&2; exit 2; }
+[ "$#" -eq 1 ] || usage
+ARCH=$1
case "$ARCH" in
- aarch64|amd64|riscv64) ;;
- *) echo "boot2.sh: unsupported arch '$ARCH'" >&2; exit 1 ;;
+ aarch64) PLATFORM=linux/arm64 ;;
+ amd64) PLATFORM=linux/amd64 ;;
+ riscv64) PLATFORM=linux/riscv64 ;;
+ *) usage ;;
esac
-sh scripts/boot-build-p1.sh M1pp/M1pp.P1 build/$ARCH/M1pp/M1pp
-sh scripts/boot-build-p1.sh hex2pp/hex2pp.P1 build/$ARCH/hex2pp/hex2pp
+ROOT=$(cd "$(dirname "$0")/.." && pwd)
+cd "$ROOT"
+
+IMAGE=boot2-scratch:$ARCH
+BOOT1=build/$ARCH/boot1
+OUT=build/$ARCH/boot2
+STAGE=build/$ARCH/.boot2-stage
+
+# ── ensure container image exists ─────────────────────────────────────
+if ! podman image exists "$IMAGE"; then
+ echo "[boot2 $ARCH] building $IMAGE"
+ podman build --platform "$PLATFORM" -t "$IMAGE" \
+ -f scripts/Containerfile.scratch scripts/
+fi
+
+# ── prerequisite: boot1 binaries must exist ───────────────────────────
+for bin in M1pp hex2pp; do
+ [ -x "$BOOT1/$bin" ] || {
+ echo "[boot2 $ARCH] missing prerequisite: $BOOT1/$bin (run scripts/boot1.sh $ARCH)" >&2
+ exit 1
+ }
+done
+
+# ── reset staging, copy inputs explicitly ─────────────────────────────
+rm -rf "$STAGE"
+mkdir -p "$STAGE/in" "$STAGE/out" "$OUT"
+
+cp "$BOOT1/M1pp" "$BOOT1/hex2pp" "$STAGE/in/"
+cp scheme1/scheme1.P1pp "$STAGE/in/scheme1.P1pp"
+cp "P1/P1-$ARCH.M1pp" "$STAGE/in/backend.M1pp"
+cp P1/P1.M1pp "$STAGE/in/frontend.M1pp"
+cp P1/P1pp.P1pp "$STAGE/in/libp1pp.P1pp"
+cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2"
+
+# ── run the .P1pp -> ELF pipeline ─────────────────────────────────────
+# cat backend + frontend + libp1pp + scheme1.P1pp -> combined.M1pp
+# M1pp combined.M1pp -> expanded.hex2pp
+# cat ELF.hex2 expanded.hex2pp -> linked.hex2pp
+# hex2pp -B 0x600000 linked.hex2pp -> ELF binary
+echo "[boot2 $ARCH] scheme1.P1pp -> scheme1"
+podman run --rm -i --pull=never --platform "$PLATFORM" \
+ --tmpfs /tmp:size=512M \
+ -v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
+ sh -eu -s <<'CONTAINER'
+cat /work/in/backend.M1pp /work/in/frontend.M1pp \
+ /work/in/libp1pp.P1pp /work/in/scheme1.P1pp \
+ > /tmp/combined.M1pp
+/work/in/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp
+cat /work/in/ELF.hex2 /tmp/expanded.hex2pp > /tmp/linked.hex2pp
+/work/in/hex2pp -B 0x600000 /tmp/linked.hex2pp /work/out/scheme1
+chmod +x /work/out/scheme1
+CONTAINER
+
+# ── copy output to final destination ──────────────────────────────────
+cp "$STAGE/out/scheme1" "$OUT/scheme1"
+chmod 0700 "$OUT/scheme1"
+
+echo "[boot2 $ARCH] OK -> $OUT/scheme1"
diff --git a/scripts/boot3.sh b/scripts/boot3.sh
@@ -0,0 +1,257 @@
+#!/bin/sh
+## boot3.sh — standalone three-stage tcc bootstrap.
+##
+## README's `(define tcc (tcc1 tcc.c))`: produces tcc-boot2 (cc.scm
+## compiles tcc.flat.c), tcc-tcc (tcc-boot2 compiles tcc.flat.c), and
+## tcc-tcc-tcc (tcc-tcc compiles tcc.flat.c). Stages 2 and 3 are the
+## bootstrap fixed-point check.
+##
+## ─── Inputs (host-side preconditions, NOT produced by this script) ───
+## build/tcc/$TCC_TARGET/tcc.flat.c
+## — flattened tcc TU (run scripts/stage1-flatten.sh
+## --arch $TCC_TARGET to produce)
+## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/{include,lib}
+## — tcc-0.9.26 unpacked tree (side-product of
+## stage1-flatten.sh)
+## build/$ARCH/vendor/mes-libc/libc.flat.c
+## — flattened mes-libc TU (run
+## scripts/libc-flatten.sh --arch $ARCH)
+##
+## ─── Inputs (sources, copied into staging) ────────────────────────────
+## scheme1/prelude.scm cc/cc.scm cc/main.scm — catm'd to cc.scm bundle
+## P1/P1-$ARCH.M1pp P1/P1.M1pp P1/P1pp.P1pp — M1pp pipeline
+## P1/entry-libc.P1pp P1/elf-end.P1pp — link-time framing
+## vendor/seed/$ARCH/ELF.hex2 — ELF header fragment
+## tcc-libc/$ARCH/start.S — _start, calls __libc_init+main
+## tcc-libc/$ARCH/sys_stubs.S — sys_* syscall wrappers
+## tcc-libc/va_list_shim.h — gcc/tcc va_list bridge
+## tcc-cc/mem.c — memcpy/memmove/memset/memcmp
+## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/include/** (whole tree)
+## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/lib/lib-arm64.c
+## (aarch64 + riscv64: TFmode soft-float)
+## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/lib/va_list.c
+## (amd64: __va_start / __va_arg)
+## build/tcc/$TCC_TARGET/tcc.flat.c — flattened tcc TU
+## build/$ARCH/vendor/mes-libc/libc.flat.c — flattened mes-libc TU
+##
+## ─── Inputs (binaries from prior stages) ──────────────────────────────
+## build/$ARCH/boot0/catm — built by scripts/boot0.sh
+## build/$ARCH/boot1/{M1pp, hex2pp} — built by scripts/boot1.sh
+## build/$ARCH/boot2/scheme1 — built by scripts/boot2.sh
+##
+## ─── Tools ────────────────────────────────────────────────────────────
+## In container: scratch + busybox (no libc, no /etc, no resolver).
+## On host: cross-assembler for {start.S, sys_stubs.S}:
+## aarch64/amd64 -> $HOST_CC -target ...
+## riscv64 -> boot2-alpine-gcc:riscv64 image
+## (built on demand from
+## scripts/Containerfile.alpine-gcc)
+## These two .S files are the only host-side build step
+## in stage 3; tcc-boot2's codegen does not accept .S.
+##
+## ─── Outputs ──────────────────────────────────────────────────────────
+## build/$ARCH/boot3/tcc-boot2 — cc.scm-built tcc (compile 1)
+## build/$ARCH/boot3/tcc-tcc — tcc-boot2-built tcc (compile 2)
+## build/$ARCH/boot3/tcc-tcc-tcc — tcc-tcc-built tcc (compile 3)
+##
+## Usage: scripts/boot3.sh <arch>
+## <arch> ∈ {aarch64, amd64, riscv64}
+
+set -eu
+
+usage() { echo "usage: $0 <aarch64|amd64|riscv64>" >&2; exit 2; }
+[ "$#" -eq 1 ] || usage
+ARCH=$1
+
+case "$ARCH" in
+ aarch64) PLATFORM=linux/arm64;
+ TCC_TARGET=ARM64;
+ HOST_CC_TARGET=aarch64-linux-gnu;
+ LIB_HELPER_SRC=lib-arm64.c;
+ LIB_HELPER_OBJ=lib-arm64.o;
+ LIB_HELPER_DEFINES="-D HAVE_CONFIG_H=1 -D TCC_TARGET_ARM64=1 -D TCC_TARGET_ARM=1" ;;
+ amd64) PLATFORM=linux/amd64;
+ TCC_TARGET=X86_64;
+ HOST_CC_TARGET=x86_64-linux-gnu;
+ LIB_HELPER_SRC=va_list.c;
+ LIB_HELPER_OBJ=va_list.o;
+ LIB_HELPER_DEFINES="-D TCC_TARGET_X86_64=1" ;;
+ riscv64) PLATFORM=linux/riscv64;
+ TCC_TARGET=RISCV64;
+ HOST_CC_TARGET=;
+ LIB_HELPER_SRC=lib-arm64.c;
+ LIB_HELPER_OBJ=lib-arm64.o;
+ LIB_HELPER_DEFINES="-D HAVE_CONFIG_H=1 -D TCC_TARGET_RISCV64=1" ;;
+ *) usage ;;
+esac
+
+ROOT=$(cd "$(dirname "$0")/.." && pwd)
+cd "$ROOT"
+
+IMAGE=boot2-scratch:$ARCH
+ALPINE_IMAGE=boot2-alpine-gcc:$ARCH
+BOOT0=build/$ARCH/boot0
+BOOT1=build/$ARCH/boot1
+BOOT2=build/$ARCH/boot2
+OUT=build/$ARCH/boot3
+STAGE=build/$ARCH/.boot3-stage
+
+TCC_DIR=build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c
+TCC_FLAT=build/tcc/$TCC_TARGET/tcc.flat.c
+LIBC_FLAT=build/$ARCH/vendor/mes-libc/libc.flat.c
+HOST_CC=${HOST_CC:-cc}
+
+# ── ensure container image exists ─────────────────────────────────────
+if ! podman image exists "$IMAGE"; then
+ echo "[boot3 $ARCH] building $IMAGE"
+ podman build --platform "$PLATFORM" -t "$IMAGE" \
+ -f scripts/Containerfile.scratch scripts/
+fi
+
+# ── prerequisite: prior-stage binaries ────────────────────────────────
+[ -x "$BOOT0/catm" ] || { echo "[boot3 $ARCH] missing $BOOT0/catm (run scripts/boot0.sh $ARCH)" >&2; exit 1; }
+[ -x "$BOOT1/M1pp" ] || { echo "[boot3 $ARCH] missing $BOOT1/M1pp (run scripts/boot1.sh $ARCH)" >&2; exit 1; }
+[ -x "$BOOT1/hex2pp" ] || { echo "[boot3 $ARCH] missing $BOOT1/hex2pp (run scripts/boot1.sh $ARCH)" >&2; exit 1; }
+[ -x "$BOOT2/scheme1" ] || { echo "[boot3 $ARCH] missing $BOOT2/scheme1 (run scripts/boot2.sh $ARCH)" >&2; exit 1; }
+
+# ── prerequisite: host-flattened sources + unpacked tcc tree ──────────
+[ -e "$TCC_FLAT" ] || { echo "[boot3 $ARCH] missing $TCC_FLAT (run scripts/stage1-flatten.sh --arch $TCC_TARGET)" >&2; exit 1; }
+[ -e "$LIBC_FLAT" ] || { echo "[boot3 $ARCH] missing $LIBC_FLAT (run scripts/libc-flatten.sh --arch $ARCH)" >&2; exit 1; }
+[ -d "$TCC_DIR/include" ] || { echo "[boot3 $ARCH] missing $TCC_DIR/include (run scripts/stage1-flatten.sh --arch $TCC_TARGET)" >&2; exit 1; }
+[ -e "$TCC_DIR/lib/$LIB_HELPER_SRC" ] || { echo "[boot3 $ARCH] missing $TCC_DIR/lib/$LIB_HELPER_SRC" >&2; exit 1; }
+
+# ── reset staging, copy inputs explicitly ─────────────────────────────
+rm -rf "$STAGE"
+mkdir -p "$STAGE/in" "$STAGE/in/tcc-include" "$STAGE/out" "$OUT"
+
+# Prior-stage binaries
+cp "$BOOT0/catm" "$STAGE/in/catm"
+cp "$BOOT1/M1pp" "$STAGE/in/M1pp"
+cp "$BOOT1/hex2pp" "$STAGE/in/hex2pp"
+cp "$BOOT2/scheme1" "$STAGE/in/scheme1"
+
+# cc.scm bundle inputs
+cp scheme1/prelude.scm "$STAGE/in/prelude.scm"
+cp cc/cc.scm "$STAGE/in/cc.scm"
+cp cc/main.scm "$STAGE/in/main.scm"
+
+# M1pp pipeline + framing
+cp "P1/P1-$ARCH.M1pp" "$STAGE/in/backend.M1pp"
+cp P1/P1.M1pp "$STAGE/in/frontend.M1pp"
+cp P1/P1pp.P1pp "$STAGE/in/libp1pp.P1pp"
+cp P1/entry-libc.P1pp "$STAGE/in/entry-libc.P1pp"
+cp P1/elf-end.P1pp "$STAGE/in/elf-end.P1pp"
+cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2"
+
+# tcc-libc / tcc-cc helpers
+cp "tcc-libc/$ARCH/start.S" "$STAGE/in/start.S"
+cp "tcc-libc/$ARCH/sys_stubs.S" "$STAGE/in/sys_stubs.S"
+cp tcc-libc/va_list_shim.h "$STAGE/in/va_list_shim.h"
+cp tcc-cc/mem.c "$STAGE/in/mem.c"
+
+# Per-arch libtcc1 helper source
+cp "$TCC_DIR/lib/$LIB_HELPER_SRC" "$STAGE/in/$LIB_HELPER_SRC"
+
+# Flattened TUs
+cp "$TCC_FLAT" "$STAGE/in/tcc.flat.c"
+cp "$LIBC_FLAT" "$STAGE/in/libc.flat.c"
+
+# tcc include tree (small, < 200KB) — copied wholesale so tcc-boot2's
+# -I resolves stdarg.h etc. Recursive cp keeps directory layout.
+cp -R "$TCC_DIR/include/." "$STAGE/in/tcc-include/"
+
+# ── HOST cross-assembly of start.o + sys_stubs.o ──────────────────────
+# tcc-boot2's codegen does not accept .S. Two-line shim in host tooling.
+host_asm() {
+ out=$1; src=$2
+ if [ "$ARCH" = "riscv64" ]; then
+ if ! podman image exists "$ALPINE_IMAGE"; then
+ echo "[boot3 $ARCH] building $ALPINE_IMAGE"
+ podman build --platform "$PLATFORM" -t "$ALPINE_IMAGE" \
+ -f scripts/Containerfile.alpine-gcc scripts/
+ fi
+ podman run --rm --pull=never --platform "$PLATFORM" \
+ -v "$ROOT/$STAGE:/work" -w /work "$ALPINE_IMAGE" \
+ cc -c -o "$out" -x assembler "$src"
+ else
+ $HOST_CC -target "$HOST_CC_TARGET" -c -o "$ROOT/$STAGE/$out" \
+ -x assembler "$ROOT/$STAGE/$src"
+ fi
+}
+
+echo "[boot3 $ARCH] cross-asm: start.S + sys_stubs.S -> .o (host)"
+host_asm in/start.o in/start.S
+host_asm in/sys_stubs.o in/sys_stubs.S
+
+# ── run the full Stage A + B + C + D pipeline in one container ────────
+# Stage A: cc.scm bundle, libc.P1pp + tcc.flat.P1pp via scheme1 + cc.scm,
+# link tcc-boot2 ELF via M1pp + hex2pp.
+# Stage B: tcc-boot2 builds mem.o, libc.o, helper.o (va_list or lib-arm64).
+# Stage C: tcc-boot2 links tcc-tcc.
+# Stage D: tcc-tcc rebuilds helpers, links tcc-tcc-tcc.
+echo "[boot3 $ARCH] cc.scm bundle -> tcc-boot2 -> tcc-tcc -> tcc-tcc-tcc"
+podman run --rm -i --pull=never --platform "$PLATFORM" \
+ --tmpfs /tmp:size=1024M \
+ -e LIB_HELPER_SRC="$LIB_HELPER_SRC" \
+ -e LIB_HELPER_OBJ="$LIB_HELPER_OBJ" \
+ -e LIB_HELPER_DEFINES="$LIB_HELPER_DEFINES" \
+ -v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
+ sh -eu -s <<'CONTAINER'
+IN=/work/in
+OUT=/work/out
+TCC_INC=$IN/tcc-include
+
+# ── Stage A.1: catm cc.scm bundle ─────────────────────────────────────
+$IN/catm /tmp/cc-bundled.scm $IN/prelude.scm $IN/cc.scm $IN/main.scm
+
+# ── Stage A.2: scheme1 + cc.scm -> libc.P1pp ──────────────────────────
+$IN/scheme1 /tmp/cc-bundled.scm --lib=libc__ $IN/libc.flat.c /tmp/libc.P1pp
+
+# ── Stage A.3: scheme1 + cc.scm -> tcc.flat.P1pp ──────────────────────
+$IN/scheme1 /tmp/cc-bundled.scm --lib=tcc__ $IN/tcc.flat.c /tmp/tcc.flat.P1pp
+
+# ── Stage A.4: M1pp + hex2pp pipeline -> tcc-boot2 ELF ────────────────
+cat $IN/backend.M1pp $IN/frontend.M1pp $IN/libp1pp.P1pp \
+ $IN/entry-libc.P1pp /tmp/libc.P1pp /tmp/tcc.flat.P1pp $IN/elf-end.P1pp \
+ > /tmp/combined.M1pp
+$IN/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp
+cat $IN/ELF.hex2 /tmp/expanded.hex2pp > /tmp/linked.hex2pp
+$IN/hex2pp -B 0x600000 /tmp/linked.hex2pp $OUT/tcc-boot2
+chmod +x $OUT/tcc-boot2
+
+# ── Stage B: tcc-boot2 builds helper objects ──────────────────────────
+build_helpers() {
+ cc=$1; workdir=$2
+ "$cc" -nostdlib -I "$TCC_INC" -c -o "$workdir/mem.o" $IN/mem.c
+ "$cc" -nostdlib -I "$TCC_INC" -include $IN/va_list_shim.h \
+ -c -o "$workdir/libc.o" $IN/libc.flat.c
+ # shellcheck disable=SC2086 # LIB_HELPER_DEFINES intentionally word-split
+ "$cc" -nostdlib -I "$TCC_INC" $LIB_HELPER_DEFINES \
+ -c -o "$workdir/$LIB_HELPER_OBJ" "$IN/$LIB_HELPER_SRC"
+}
+mkdir -p /tmp/stage2 /tmp/stage3
+build_helpers $OUT/tcc-boot2 /tmp/stage2
+
+# ── Stage C: tcc-boot2 -> tcc-tcc ─────────────────────────────────────
+$OUT/tcc-boot2 -nostdlib -I "$TCC_INC" -include $IN/va_list_shim.h \
+ $IN/start.o $IN/sys_stubs.o /tmp/stage2/mem.o /tmp/stage2/libc.o \
+ /tmp/stage2/$LIB_HELPER_OBJ \
+ $IN/tcc.flat.c -o $OUT/tcc-tcc
+chmod +x $OUT/tcc-tcc
+
+# ── Stage D: tcc-tcc rebuilds helpers, links tcc-tcc-tcc ──────────────
+build_helpers $OUT/tcc-tcc /tmp/stage3
+$OUT/tcc-tcc -nostdlib -I "$TCC_INC" -include $IN/va_list_shim.h \
+ $IN/start.o $IN/sys_stubs.o /tmp/stage3/mem.o /tmp/stage3/libc.o \
+ /tmp/stage3/$LIB_HELPER_OBJ \
+ $IN/tcc.flat.c -o $OUT/tcc-tcc-tcc
+chmod +x $OUT/tcc-tcc-tcc
+CONTAINER
+
+# ── copy outputs to final destination ─────────────────────────────────
+for f in tcc-boot2 tcc-tcc tcc-tcc-tcc; do
+ cp "$STAGE/out/$f" "$OUT/$f"
+ chmod 0700 "$OUT/$f"
+done
+
+echo "[boot3 $ARCH] OK -> $OUT/{tcc-boot2, tcc-tcc, tcc-tcc-tcc}"
diff --git a/scripts/mk-seed-tools.sh b/scripts/mk-seed-tools.sh
@@ -0,0 +1,47 @@
+#!/bin/sh
+## mk-seed-tools.sh — Makefile-internal: stage 1 of the bootstrap chain.
+##
+## Standalone equivalent: scripts/boot0.sh.
+##
+## In-container script. Brings up M0/hex2-0/catm from the ~400-byte
+## hex0-seed by chaining stage0-posix's first three phases. All produced
+## binaries are target-arch Linux ELF.
+##
+## Inputs (read): vendor/seed/$ARCH/{hex0-seed,hex0.hex0,hex1.hex0,
+## hex2.hex1,catm.hex2,M0.hex2,ELF.hex2}
+## Outputs: build/$ARCH/tools/{hex0,hex1,hex2-0,catm,M0}
+##
+## Phase map (stage0-posix mescc-tools-{seed,mini}-kaem.kaem phases 0-3):
+## 0) hex0-seed + hex0.hex0 -> hex0
+## 1) hex0 + hex1.hex0 -> hex1
+## 2) hex1 + hex2.hex1 -> hex2-0
+## 2b) hex2-0 + catm.hex2 -> catm
+## 3a) catm : ELF.hex2 + M0.hex2 -> M0.combined.hex2
+## 3b) hex2-0 : M0.combined.hex2 -> M0
+##
+## Env: ARCH=aarch64|amd64|riscv64
+
+set -eu
+
+: "${ARCH:?ARCH must be set}"
+
+case "$ARCH" in
+ aarch64|amd64|riscv64) ;;
+ *) echo "boot1.sh: unsupported arch '$ARCH'" >&2 ; exit 1 ;;
+esac
+
+S=vendor/seed/$ARCH
+OUT=build/$ARCH/tools
+mkdir -p "$OUT"
+
+## Build everything in /tmp (RAM tmpfs — see PODMAN macro in Makefile),
+## then cp the final binaries to the bind-mounted $OUT. Stage0 tools do
+## one syscall per byte; staying off virtiofs for intermediates is ~5x.
+"$S/hex0-seed" "$S/hex0.hex0" /tmp/hex0
+/tmp/hex0 "$S/hex1.hex0" /tmp/hex1
+/tmp/hex1 "$S/hex2.hex1" /tmp/hex2-0
+/tmp/hex2-0 "$S/catm.hex2" /tmp/catm
+/tmp/catm /tmp/M0.combined.hex2 "$S/ELF.hex2" "$S/M0.hex2"
+/tmp/hex2-0 /tmp/M0.combined.hex2 /tmp/M0
+
+cp /tmp/hex0 /tmp/hex1 /tmp/hex2-0 /tmp/catm /tmp/M0 "$OUT/"