commit 0aa867eecc1187ba9b9ef6194511152c877eafc0
parent 1be0f6600f387c486fff3eba3a2d76393e96bbdb
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 4 May 2026 19:38:51 -0700
boot{3,4,5}: split boot3 into tcc0 (boot3) + tcc rebuilds (boot4); musl is now boot5
boot3 now produces only tcc0 (cc.scm-built bootstrap). The tcc0 -> tcc1
-> tcc2 -> tcc3 self-host chain, helper/archive builds, mes-libc hello,
and the tcc2 == tcc3 fixed-point check move to a new boot4.sh. The
former boot4 (musl) is renamed to boot5; boot{4,5}-calibrate.sh and the
musl shim references followed. boot.sh now runs boot0..boot5 and clears
build/$ARCH first so each run starts from a clean per-arch tree.
Verified end-to-end on aarch64 via boot.sh.
Diffstat:
12 files changed, 778 insertions(+), 651 deletions(-)
diff --git a/Makefile b/Makefile
@@ -65,7 +65,9 @@
# scripts/boot0.sh <arch> -> hex2 / M0 / catm
# scripts/boot1.sh <arch> -> m1pp / hex2pp
# scripts/boot2.sh <arch> -> scheme1
-# scripts/boot3.sh <arch> -> tcc-boot2 / tcc-tcc / tcc-tcc-tcc
+# scripts/boot3.sh <arch> -> tcc0 (cc.scm-built bootstrap)
+# scripts/boot4.sh <arch> -> tcc1 / tcc2 / tcc3 + libtcc1.a + libc.a + hello
+# scripts/boot5.sh <arch> -> static musl libc + hello
# The seed M0/hex2-0/catm participate ONLY in step 2 (building the two
# new tools from their .P1 sources). Once both binaries exist, no
# downstream user/test/scheme/cc target ever invokes them again.
diff --git a/README.md b/README.md
@@ -29,15 +29,15 @@
(hex2pp (catm ELF.hex2 (m1pp (catm P1A.M1pp P1.M1pp P1pp.P1pp src)))))
;; Rebuild catm from P1pp; after this stage the seed boot0 catm is
-;; no longer needed and boot3 runs with only boot1 + boot2 binaries.
+;; no longer needed and boot3+ run with only boot1 + boot2 binaries.
(define catm (ppexe catm.P1pp))
(define scheme (ppexe scheme1.P1pp))
-;; ── boot3.sh ── C ────────────────────────────────────────────────────
+;; ── boot3.sh / boot4.sh ── C ─────────────────────────────────────────
(defn scc (C-src) (ppexe (scheme cc.scm C-src)))
-(define tcc0 (scc tcc.c)) ;; compiler: scheme cc.scm
-(define tcc1 (tcc0 tcc.c)) ;; compiler: scheme-compiled tcc
-(define tcc (tcc1 tcc.c)) ;; compiler: tcc-compiled tcc
+(define tcc0 (scc tcc.c)) ;; boot3: compiler is scheme cc.scm
+(define tcc1 (tcc0 tcc.c)) ;; boot4: compiler is scheme-compiled tcc
+(define tcc (tcc1 tcc.c)) ;; boot4: compiler is tcc-compiled tcc
```
* P1: [docs/P1.md](docs/P1.md.html)
diff --git a/docs/MUSL.md b/docs/MUSL.md
@@ -1,7 +1,7 @@
-# boot4 musl spec
+# boot5 musl spec
-`scripts/boot4.sh <arch>` builds a static musl 1.2.5 libc with the
-verified boot3 tcc for the same architecture, then links and runs a
+`scripts/boot5.sh <arch>` builds a static musl 1.2.5 libc with the
+verified boot4 tcc for the same architecture, then links and runs a
static hello-world smoke binary. Supported architectures are `amd64`,
`aarch64`, and `riscv64`; aarch64 is verified end-to-end every run, and
the same recipe has previously been validated against amd64 and riscv64.
@@ -20,23 +20,24 @@ minimal shell.
```sh
scripts/boot3.sh <amd64|aarch64|riscv64>
-scripts/boot4-calibrate.sh <amd64|aarch64|riscv64> # once per arch
scripts/boot4.sh <amd64|aarch64|riscv64>
+scripts/boot5-calibrate.sh <amd64|aarch64|riscv64> # once per arch
+scripts/boot5.sh <amd64|aarch64|riscv64>
```
## Inputs
| Path | Purpose |
|------|---------|
-| `build/$ARCH/boot3/tcc3` | fixed-point self-host tcc from boot3 |
-| `build/$ARCH/boot3/libtcc1.a` | tcc runtime archive produced by boot3 |
+| `build/$ARCH/boot4/tcc3` | fixed-point self-host tcc from boot4 |
+| `build/$ARCH/boot4/libtcc1.a` | tcc runtime archive produced by boot4 |
| `vendor/upstream/musl-1.2.5.tar.gz` | pristine upstream musl source |
| `vendor/upstream/musl-1.2.5-overrides/` | post-patch files vendored as a tree (replaces the old patch + `patch` binary) |
| `vendor/upstream/musl-1.2.5-deletes.txt` | upstream files removed by the same patch set, one path per line |
| `vendor/upstream/musl-1.2.5-generated/$MUSL_ARCH/{alltypes,syscall}.h` | per-arch headers pre-generated at vendor time (replaces musl's mkalltypes.sed + `__NR_`→`SYS_` rewrite, so the container needs no awk) |
-| `vendor/upstream/musl-1.2.5-skip-$ARCH.txt` | per-arch calibration list — sources tcc 0.9.26 cannot compile, produced by `scripts/boot4-calibrate.sh` |
-| `scripts/boot4-musl-shim-$ARCH.h` | per-arch `__builtin_va_list` bridge |
-| `scripts/boot-hello.c` | smoke-test source (shared with boot3) |
+| `vendor/upstream/musl-1.2.5-skip-$ARCH.txt` | per-arch calibration list — sources tcc 0.9.26 cannot compile, produced by `scripts/boot5-calibrate.sh` |
+| `build/tcc/stdarg-bridge.h` | shared `__builtin_va_list` bridge (one file, three arches gated by `#ifdef`; produced by `scripts/stage1-flatten.sh`) |
+| `scripts/boot-hello.c` | smoke-test source (shared with boot4) |
Architecture mapping:
@@ -48,27 +49,27 @@ Architecture mapping:
## Outputs
-`scripts/boot4.sh` writes final artifacts to `build/$ARCH/boot4/`:
+`scripts/boot5.sh` writes final artifacts to `build/$ARCH/boot5/`:
| File | Purpose |
|------|---------|
| `libc.a` | static musl libc archive |
| `crt1.o`, `crti.o`, `crtn.o` | static startup and init/fini CRT objects |
-| `hello` | static smoke-test ELF linked by boot4 |
+| `hello` | static smoke-test ELF linked by boot5 |
-Staging lives under `build/$ARCH/.boot4-stage/`, organized as:
+Staging lives under `build/$ARCH/.boot5-stage/`, organized as:
| Subdir | Role |
|--------|------|
| `in/` | exactly the files the container reads (bind-mounted as `/work/in`) |
| `_host/` | host-only scratch (source enumeration outputs); not visible to the container |
-| `out/` | container writes here; host then copies to `build/$ARCH/boot4/` |
+| `out/` | container writes here; host then copies to `build/$ARCH/boot5/` |
-The entire `.boot4-stage` tree is disposable; every `boot4.sh` run rebuilds it.
+The entire `.boot5-stage` tree is disposable; every `boot5.sh` run rebuilds it.
## Pipeline
-1. **Stage inputs (host)**. Copy boot3 `tcc3` and `libtcc1.a` to `in/`.
+1. **Stage inputs (host)**. Copy boot4 `tcc3` and `libtcc1.a` to `in/`.
Extract the musl tarball into `in/musl-1.2.5/`. Overlay the vendored
`musl-1.2.5-overrides/` tree on top of it. Remove every path listed
in `musl-1.2.5-deletes.txt`. The result is the post-patch tree that
@@ -98,16 +99,16 @@ The entire `.boot4-stage` tree is disposable; every `boot4.sh` run rebuilds it.
container `cp -R`s the prepared tree into tmpfs (its bind-mounted
`/work/in` is logically read-only) and executes `run.sh` straight
through.
-6. **Verify (host)**. Copy outputs into `build/$ARCH/boot4/`. The
+6. **Verify (host)**. Copy outputs into `build/$ARCH/boot5/`. The
smoke-test `hello` was already executed inside the container as
the last line of `run.sh`.
`musl`'s own `configure` script is **not run** — it only produces
-`config.mak`, which we don't read. boot4 supplies its own hardcoded
+`config.mak`, which we don't read. boot5 supplies its own hardcoded
`CFLAGS_BASE`.
Assembler inputs must not receive the va-list shim. tcc 0.9.26 applies
-`-include` to `.s`/`.S` as well as `.c`, so boot4 keeps separate
+`-include` to `.s`/`.S` as well as `.c`, so boot5 keeps separate
`CFLAGS_C` and `CFLAGS_ASM`.
## Compatibility Surface
@@ -118,13 +119,13 @@ surfaces tcc 0.9.26 cannot compile:
| Area | Rule |
|------|------|
| syscalls | replace GCC register-asm-variable wrappers with per-arch asm trampolines |
-| atomics / thread pointer | replace inline asm operands with extern asm helpers on aarch64 (true atomic via raw `.long` LL/SC) and riscv64 (single-threaded C-inline a_cas — sufficient for the boot4 hello smoke binary; tcc-asm has no LR/SC mnemonics) |
+| atomics / thread pointer | replace inline asm operands with extern asm helpers on aarch64 (true atomic via raw `.long` LL/SC) and riscv64 (single-threaded C-inline a_cas — sufficient for the boot5 hello smoke binary; tcc-asm has no LR/SC mnemonics) |
| crt entry trampoline | aarch64 + riscv64: replace upstream `crt_arch.h` with a minimal `_start` that passes `sp` to `_start_c` and tail-jumps. Drops `.option`, `lla gp`, and the `tail` pseudo (none parseable by tcc-asm). |
| weak aliases | implement `weak_alias` via assembler `.weak`/`.set` directives |
| C99 array parameters | remove `[static N]` qualifiers tcc does not parse |
| `_Complex` | stub `complex.h` and remove complex sources |
| arch asm overrides | delete unsupported fenv, signal, setjmp, thread, string, math overrides as needed |
-| varargs | pre-include `scripts/boot4-musl-shim-$ARCH.h` for C translation units |
+| varargs | pre-include `build/tcc/stdarg-bridge.h` (the post-patch tcc `<stdarg.h>`) for C translation units |
Required tcc fixes live under `scripts/simple-patches/tcc-0.9.26/`.
The musl build depends on the aarch64 literal-address load/store fixes
@@ -132,7 +133,7 @@ and the LP64 `L`-suffix constant fix.
## Calibration
-`scripts/boot4-calibrate.sh <arch>` produces
+`scripts/boot5-calibrate.sh <arch>` produces
`vendor/upstream/musl-1.2.5-skip-$arch.txt`, the list of musl sources
tcc 0.9.26 cannot compile for that arch. It runs the legacy skip-on-fail
loop in the container once and captures the failures.
@@ -142,7 +143,7 @@ Re-run calibration whenever any of these change:
- the musl overrides or deletes;
- the vendored tcc or musl source tarballs.
-The calibration list lets `boot4.sh` emit a flat `run.sh` whose compile
+The calibration list lets `boot5.sh` emit a flat `run.sh` whose compile
loop has no `if $TCC … ; then ok else skip fi` branch — every emitted
command is expected to succeed.
@@ -154,19 +155,19 @@ command is expected to succeed.
| `amd64` | yes | 12 |
| `riscv64` | yes | 3 |
-Skipped sources are outside the boot4 hello closure. They fall into two
+Skipped sources are outside the boot5 hello closure. They fall into two
categories:
- long-double constant-folding files that tcc 0.9.26 cannot compile;
- thread exit / low-level asm files needing inline-asm operand support.
-Anything that references a skipped function may fail to link. The boot4
+Anything that references a skipped function may fail to link. The boot5
contract is a static libc sufficient to link and run the included hello
smoke program, not full musl conformance.
## Smoke Output
-Successful boot4 ends by running:
+Successful boot5 ends by running:
```text
hello from tcc-built libc; argc=4
@@ -174,5 +175,5 @@ strdup: works, strlen: 5
```
(The same `hello` source, `scripts/boot-hello.c`, is also linked and
-run by boot3 against the mes-libc closure — proving both libc closures
+run by boot4 against the mes-libc closure — proving both libc closures
are exec-correct under their respective build systems.)
diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md
@@ -212,15 +212,15 @@ trips it.
### tcc0 → tcc1 is not a fixed point on riscv64 (cc.scm behavioral bug)
-`boot3.sh` produces four staged compilers:
+`boot3.sh` + `boot4.sh` produce four staged compilers:
-- `tcc0` = tcc-source compiled by cc.scm
-- `tcc1` = tcc-source compiled by tcc0
-- `tcc2` = tcc-source compiled by tcc1
-- `tcc3` = tcc-source compiled by tcc2
+- `tcc0` = tcc-source compiled by cc.scm (boot3 output)
+- `tcc1` = tcc-source compiled by tcc0 (boot4)
+- `tcc2` = tcc-source compiled by tcc1 (boot4)
+- `tcc3` = tcc-source compiled by tcc2 (boot4)
The fixed-point check is **`tcc2 == tcc3`** (asserted at the end of
-`boot3.sh`, verified on aarch64, amd64, riscv64). On riscv64 the
+`boot4.sh`, verified on aarch64, amd64, riscv64). On riscv64 the
weaker `tcc1 == tcc2` does *not* hold: `tcc0(tcc.flat.c)` produces
a 616100-byte `.o` while `tcc1(tcc.flat.c)` and `tcc2(tcc.flat.c)`
produce a byte-identical 615892-byte `.o` — 208 bytes larger from
@@ -267,9 +267,9 @@ path.
#### Repro / starting point
```sh
-# In the riscv64 container with boot3 outputs present:
-$TCC0 -nostdlib -I $TCC_INC -include $TCC_INC/stdarg.h -c -o /tmp/flat-tcc0.o tcc.flat.c
-$TCC1 -nostdlib -I $TCC_INC -include $TCC_INC/stdarg.h -c -o /tmp/flat-tcc1.o tcc.flat.c
+# In the riscv64 container with boot3+boot4 outputs present:
+$TCC0 -nostdlib -c -o /tmp/flat-tcc0.o tcc.flat.c
+$TCC1 -nostdlib -c -o /tmp/flat-tcc1.o tcc.flat.c
# wc -c /tmp/flat-tcc0.o /tmp/flat-tcc1.o → 616100 vs 615892
# objdump -d both, normalize addresses, diff to find divergent functions
```
@@ -286,20 +286,23 @@ canonical compiler.
`scripts/{boot0,boot1,boot2}.sh` are pure scratch + busybox — no host
compiler, no alpine-gcc image, just `podman` + the pinned `busybox:musl`
-digest. `boot3.sh` still has one host-tooling dep on **aarch64 only**:
+digest. `boot3.sh` is also pure scratch + busybox (it's just
+scheme1 + M1pp + hex2pp on `.flat.c` inputs flattened by host `cc -E`).
+`boot4.sh` previously had one host-tooling dep on **aarch64 only**:
cross-asm of `tcc-libc/aarch64/{start,sys_stubs}.S` to `.o` via
`$HOST_CC -target aarch64-linux-gnu`. tcc 0.9.26's aarch64 backend has
no assembler (no `arm64-asm.c`) and no inline-asm support, so .S inputs
-must be pre-compiled host-side.
+historically needed pre-compilation host-side; the patched arm64-asm.c
+now removes that requirement (see `docs/TCC-ARM64-ASM.md`).
amd64 and riscv64 backends both ship `CONFIG_TCC_ASM` and assemble .S
-in-container via tcc-boot2 itself (stages C+D in `boot3.sh`). The
+in-container via tcc-boot2 itself (stages C+D in `boot4.sh`). The
riscv64 .S files are macroed behind `#ifdef __TINYC__` because tcc's
riscv64 asm parser uses 3-operand load/store syntax (`ld rd, base, off`,
`sd base, src, off` — base first for stores) instead of GAS's
`ld rd, off(base)` / `sd src, off(base)`; the GAS path stays usable
for the Makefile's alpine-gcc fallback. The `boot2-alpine-gcc:riscv64`
-image is no longer used by `boot3.sh`.
+image is no longer used by `boot3.sh` / `boot4.sh`.
Replacing the aarch64 .S pair with `.P1pp` (or any in-container-buildable)
equivalents drops the host-cc dep entirely. After that, every
diff --git a/scripts/boot.sh b/scripts/boot.sh
@@ -4,8 +4,11 @@ set -ex
ARCH=$1
+rm -rf build/$ARCH
+
./scripts/boot0.sh $ARCH
./scripts/boot1.sh $ARCH
./scripts/boot2.sh $ARCH
./scripts/boot3.sh $ARCH
./scripts/boot4.sh $ARCH
+./scripts/boot5.sh $ARCH
diff --git a/scripts/boot3.sh b/scripts/boot3.sh
@@ -1,26 +1,16 @@
#!/bin/sh
-## boot3.sh — standalone four-stage tcc bootstrap.
+## boot3.sh — bootstrap tcc0 from cc.scm (Stage A of the four-stage tcc
+## chain; the tcc0 → tcc1 → tcc2 → tcc3 rebuild lives in boot4.sh).
##
-## README's `(define tcc (tcc1 tcc.c))`: cc.scm compiles tcc.flat.c
-## into tcc0; tcc0 compiles tcc.flat.c into tcc1; tcc1 does the same
-## to produce tcc2; tcc2 does the same to produce tcc3.
+## README's `(define tcc (tcc1 tcc.c))`: cc.scm compiles tcc.flat.c into
+## tcc0. boot3 stops there. boot4 picks up tcc0 and self-hosts the rest
+## of the chain (tcc0 → tcc1 → tcc2 → tcc3, with tcc2 == tcc3 as the
+## fixed-point check).
##
-## tcc0 = tcc-source compiled by cc.scm
-## tcc1 = tcc-source compiled by tcc0
-## tcc2 = tcc-source compiled by tcc1
-## tcc3 = tcc-source compiled by tcc2
-##
-## The bootstrap fixed-point check is `tcc2 == tcc3`: once tcc is
-## compiling itself with no help from cc.scm, the chain reaches a
-## byte-identical fixed point. tcc0 ≠ tcc1 in *behavior* (not just
-## in code size) because cc.scm's emitted machine code introduces
-## subtle codegen-decision differences — e.g. on riscv64 cc.scm
-## misses several immediate-folding peepholes that tcc applies, so
-## tcc0(tcc.flat.c) emits ~200 more bytes of `.text` than
-## tcc1(tcc.flat.c) does. tcc1 is faithful tcc behavior (its source
-## is tcc.flat.c, run through the cc.scm-built tcc0 translator
-## semantically intact); tcc2 is the first binary whose machine code
-## was emitted by faithful tcc.
+## tcc0 = tcc-source compiled by cc.scm ← produced here
+## tcc1 = tcc-source compiled by tcc0 ← boot4
+## tcc2 = tcc-source compiled by tcc1 ← boot4
+## tcc3 = tcc-source compiled by tcc2 ← boot4
##
## ─── Inputs (host-side, auto-built if missing) ────────────────────────
## build/tcc/$TCC_TARGET/tcc.flat.c
@@ -38,24 +28,6 @@
## P1/P1-$ARCH.M1pp P1/P1.M1pp P1/P1pp.P1pp — M1pp pipeline
## P1/entry-libc.P1pp P1/elf-end.P1pp — link-time framing
## vendor/seed/$ARCH/ELF.hex2 — ELF header fragment
-## tcc-libc/$ARCH/start.S — _start, calls __libc_init+main
-## tcc-libc/$ARCH/sys_stubs.S — sys_* syscall wrappers
-## (Throughout this script: tcc0/tcc1/tcc2/tcc3 are the four stages
-## above; tcc0 is the cc.scm-built bootstrap, tcc2/tcc3 form the
-## self-host fixed-point check.)
-## tcc-cc/mem.c — memcpy/memmove/memset/memcmp
-## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/include/** (whole tree)
-## vendor/mes-libc/include/** — mes-libc headers for hello
-## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/lib/libtcc1.c
-## (amd64: generic compiler helper runtime)
-## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/lib/lib-arm64.c
-## (aarch64 + riscv64: TFmode soft-float)
-## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/lib/va_list.c
-## (amd64: __va_start / __va_arg)
-## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/lib/alloca86_64*.S
-## (amd64: alloca helpers)
-## build/tcc/$TCC_TARGET/tcc.flat.c — flattened tcc TU
-## build/$ARCH/vendor/mes-libc/libc.flat.c — flattened mes-libc TU
##
## ─── Inputs (binaries from prior stages) ──────────────────────────────
## build/$ARCH/boot1/{M1pp, hex2pp} — built by scripts/boot1.sh
@@ -63,25 +35,12 @@
##
## ─── Tools ────────────────────────────────────────────────────────────
## In container: scratch + busybox (no libc, no /etc, no resolver).
-## On host: none — every arch has CONFIG_TCC_ASM and assembles
-## .S inputs (start.S, sys_stubs.S) directly inside the
-## container in stages B/D/E. The aarch64 assembler is
-## the phase-1 arm64-asm.c that flatten patches into
-## tcc-0.9.26 (see docs/TCC-ARM64-ASM.md).
+## On host: none — Stage A is pure scheme1 + M1pp + hex2pp; no
+## asm step is required.
##
## ─── Outputs ──────────────────────────────────────────────────────────
-## build/$ARCH/boot3/tcc3 — final fixed-point self-host tcc
-## build/$ARCH/boot3/crt1.o
-## — tcc2-built startup object, kept outside
-## libc.a because it must lead link lines
-## build/$ARCH/boot3/libc.a
-## — tcc2-built archive of sys_stubs.o + mem.o
-## + libc.o
-## build/$ARCH/boot3/libtcc1.a
-## — tcc2-built tcc compiler helper archive
-## build/$ARCH/boot3/hello — mes-libc-linked smoke binary, run here
-## tcc2 and tcc3 are byte-identical (asserted at the end of this
-## script) — that equality is the fixed-point check.
+## build/$ARCH/boot3/tcc0 — cc.scm-built bootstrap tcc, consumed by
+## scripts/boot4.sh
##
## Usage: scripts/boot3.sh <arch>
## <arch> ∈ {aarch64, amd64, riscv64}
@@ -93,30 +52,9 @@ usage() { echo "usage: $0 <aarch64|amd64|riscv64>" >&2; exit 2; }
ARCH=$1
case "$ARCH" in
- aarch64) PLATFORM=linux/arm64;
- TCC_TARGET=ARM64;
- LIB_HELPER_SRC=lib-arm64.c;
- LIB_HELPER_OBJ=lib-arm64.o;
- LIB_HELPER_DEFINES="-D HAVE_CONFIG_H=1 -D TCC_TARGET_ARM64=1 -D TCC_TARGET_ARM=1";
- LIBTCC1_C_SRCS="lib-arm64.c";
- LIBTCC1_C_DEFS="-D HAVE_CONFIG_H=1 -D TCC_TARGET_ARM64=1 -D TCC_TARGET_ARM=1";
- LIBTCC1_ASM_SRCS="" ;;
- amd64) PLATFORM=linux/amd64;
- TCC_TARGET=X86_64;
- LIB_HELPER_SRC=va_list.c;
- LIB_HELPER_OBJ=va_list.o;
- LIB_HELPER_DEFINES="-D TCC_TARGET_X86_64=1";
- LIBTCC1_C_SRCS="libtcc1.c va_list.c";
- LIBTCC1_C_DEFS="-D TCC_TARGET_X86_64=1";
- LIBTCC1_ASM_SRCS="alloca86_64.S alloca86_64-bt.S" ;;
- riscv64) PLATFORM=linux/riscv64;
- TCC_TARGET=RISCV64;
- LIB_HELPER_SRC=lib-arm64.c;
- LIB_HELPER_OBJ=lib-arm64.o;
- LIB_HELPER_DEFINES="-D HAVE_CONFIG_H=1 -D TCC_TARGET_RISCV64=1";
- LIBTCC1_C_SRCS="lib-arm64.c";
- LIBTCC1_C_DEFS="-D HAVE_CONFIG_H=1 -D TCC_TARGET_RISCV64=1";
- LIBTCC1_ASM_SRCS="" ;;
+ aarch64) PLATFORM=linux/arm64; TCC_TARGET=ARM64 ;;
+ amd64) PLATFORM=linux/amd64; TCC_TARGET=X86_64 ;;
+ riscv64) PLATFORM=linux/riscv64; TCC_TARGET=RISCV64 ;;
*) usage ;;
esac
@@ -132,7 +70,6 @@ STAGE=build/$ARCH/.boot3-stage
TCC_DIR=build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c
TCC_FLAT=build/tcc/$TCC_TARGET/tcc.flat.c
LIBC_FLAT=build/$ARCH/vendor/mes-libc/libc.flat.c
-HOST_CC=${HOST_CC:-cc}
# ── ensure container image exists ─────────────────────────────────────
if ! podman image exists "$IMAGE"; then
@@ -151,7 +88,7 @@ fi
# tcc.flat.c + the unpacked $TCC_DIR/{include,lib} tree are produced
# together by stage1-flatten.sh; libc.flat.c by libc-flatten.sh. Both
# run on the host (cc -E), no container — auto-invoke if missing.
-if [ ! -e "$TCC_FLAT" ] || [ ! -d "$TCC_DIR/include" ] || [ ! -e "$TCC_DIR/lib/$LIB_HELPER_SRC" ] || [ ! -e build/tcc/stdarg-bridge.h ]; then
+if [ ! -e "$TCC_FLAT" ] || [ ! -d "$TCC_DIR/include" ] || [ ! -e build/tcc/stdarg-bridge.h ]; then
echo "[boot3 $ARCH] flatten tcc.flat.c (host)"
scripts/stage1-flatten.sh --arch "$TCC_TARGET"
fi
@@ -159,15 +96,11 @@ if [ ! -e "$LIBC_FLAT" ]; then
echo "[boot3 $ARCH] flatten libc.flat.c (host)"
scripts/libc-flatten.sh --arch "$ARCH"
fi
-for f in $LIBTCC1_C_SRCS $LIBTCC1_ASM_SRCS; do
- [ -e "$TCC_DIR/lib/$f" ] || { echo "[boot3 $ARCH] missing $TCC_DIR/lib/$f" >&2; exit 1; }
-done
# ── reset staging, copy inputs explicitly ─────────────────────────────
rm -rf "$STAGE"
-mkdir -p "$STAGE/in" "$STAGE/in/tcc-lib" "$STAGE/out" "$OUT"
-rm -f "$OUT/tcc0" "$OUT/tcc1" "$OUT/tcc2" \
- "$OUT/start.o" "$OUT/sys_stubs.o" "$OUT/mem.o" "$OUT/libc.o"
+mkdir -p "$STAGE/in" "$STAGE/out" "$OUT"
+rm -f "$OUT/tcc0"
# Prior-stage binaries
cp "$BOOT1/M1pp" "$STAGE/in/M1pp"
@@ -188,85 +121,16 @@ cp P1/entry-libc.P1pp "$STAGE/in/entry-libc.P1pp"
cp P1/elf-end.P1pp "$STAGE/in/elf-end.P1pp"
cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2"
-# tcc-libc / tcc-cc helpers
-cp "tcc-libc/$ARCH/start.S" "$STAGE/in/start.S"
-cp "tcc-libc/$ARCH/sys_stubs.S" "$STAGE/in/sys_stubs.S"
-cp tcc-cc/mem.c "$STAGE/in/mem.c"
-
-# Per-arch libtcc1 helper sources. LIB_HELPER_SRC is always also in
-# LIBTCC1_C_SRCS (lib-arm64.c on aarch64/riscv64, va_list.c on amd64),
-# so a single staging path under tcc-lib/ covers both consumers — the
-# emit_helpers step (uses LIB_HELPER_SRC) and the emit_archive step
-# (iterates LIBTCC1_C_SRCS + LIBTCC1_ASM_SRCS).
-for f in $LIBTCC1_C_SRCS $LIBTCC1_ASM_SRCS; do
- cp "$TCC_DIR/lib/$f" "$STAGE/in/tcc-lib/$f"
-done
-
# Flattened TUs. The patched tcc <stdarg.h> bridge is already prepended
# (under #ifndef CCSCM) into both .flat.c files by the flatten scripts,
-# so the in-container compiles need no -I /work/in/tcc-include or
-# -include …/stdarg.h. hello.c uses forward declarations (no system
-# headers), so mes-include is no longer staged either.
+# so the in-container compiles need no -I/-include flags.
cp "$TCC_FLAT" "$STAGE/in/tcc.flat.c"
cp "$LIBC_FLAT" "$STAGE/in/libc.flat.c"
-cp scripts/boot-hello.c "$STAGE/in/hello.c"
-
-# Every arch's tcc-boot2 has CONFIG_TCC_ASM and assembles .S inputs
-# itself inside the container — no host cross-asm step.
-
# ── emit flat container build script ──────────────────────────────────
-# Generates a straight-line shell program: cc.scm bundle → tcc0 → tcc1
-# → tcc2 → tcc3 with all per-stage repetition unrolled and per-arch
-# values (LIB_HELPER_SRC/OBJ, LIBTCC1_C_SRCS, LIBTCC1_ASM_SRCS, etc.)
-# resolved on the host. Container shell sees only sequential exec —
+# Generates a straight-line shell program: cc.scm bundle → tcc0 ELF via
+# scheme1 + M1pp + hex2pp. Container shell sees only sequential exec —
# no functions, no for-loops, no parameter expansion.
-#
-# Stage A: cc.scm bundle, libc.P1pp + tcc.flat.P1pp via scheme1+cc.scm,
-# link tcc0 ELF via M1pp + hex2pp.
-# Stage B: tcc0 builds mem.o, libc.o, helper.o.
-# Stage C: tcc0 compiles+links tcc1.
-# Stage D: tcc1 rebuilds helpers, archives, compiles+links tcc2.
-# Stage E: tcc2 rebuilds helpers, archives, compiles+links tcc3 (host
-# asserts tcc2 == tcc3 after container exits).
-
-# Helper: emit the build_asm + build_helpers + archive_runtime closure
-# for one stage. $1 = compiler (path inside container), $2 = workdir.
-emit_helpers () {
- cc=$1
- workdir=$2
- echo "$cc -nostdlib -c -o $workdir/start.o /work/in/start.S"
- echo "$cc -nostdlib -c -o $workdir/sys_stubs.o /work/in/sys_stubs.S"
- echo "$cc -nostdlib -c -o $workdir/mem.o /work/in/mem.c"
- echo "$cc -nostdlib -c -o $workdir/libc.o /work/in/libc.flat.c"
- echo "$cc -nostdlib $LIB_HELPER_DEFINES -c -o $workdir/$LIB_HELPER_OBJ /work/in/tcc-lib/$LIB_HELPER_SRC"
-}
-emit_archive () {
- cc=$1
- workdir=$2
- libtcc1_objs=""
- echo "cp $workdir/start.o $workdir/crt1.o"
- echo "$cc -ar rcs $workdir/libc.a $workdir/sys_stubs.o $workdir/mem.o $workdir/libc.o"
- echo "mkdir -p $workdir/libtcc1-obj"
- for src in $LIBTCC1_C_SRCS; do
- obj=$workdir/libtcc1-obj/${src%.c}.o
- echo "$cc -nostdlib $LIBTCC1_C_DEFS -c -o $obj /work/in/tcc-lib/$src"
- libtcc1_objs="$libtcc1_objs $obj"
- done
- for src in $LIBTCC1_ASM_SRCS; do
- obj=$workdir/libtcc1-obj/${src%.S}.o
- echo "$cc -nostdlib -c -o $obj /work/in/tcc-lib/$src"
- libtcc1_objs="$libtcc1_objs $obj"
- done
- echo "$cc -ar rcs $workdir/libtcc1.a$libtcc1_objs"
-}
-emit_link_tcc () {
- cc=$1
- workdir=$2
- out=$3
- echo "$cc -nostdlib $workdir/crt1.o /work/in/tcc.flat.c $workdir/libc.a $workdir/libtcc1.a $workdir/libc.a -o $out"
-}
-
RUN_SCRIPT=$STAGE/in/run.sh
{
echo '#!/bin/sh'
@@ -280,62 +144,20 @@ RUN_SCRIPT=$STAGE/in/run.sh
echo '/work/in/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp'
echo '/work/in/catm /tmp/linked.hex2pp /work/in/ELF.hex2 /tmp/expanded.hex2pp'
echo '/work/in/hex2pp -B 0x600000 /tmp/linked.hex2pp /work/out/tcc0'
- echo
- echo '# Stage B: tcc0 builds helper objects (stage1)'
- echo 'mkdir -p /tmp/stage1 /tmp/stage2 /tmp/stage3'
- emit_helpers /work/out/tcc0 /tmp/stage1
- echo
- echo '# Stage C: tcc0 -> tcc1 (link with raw .o files; no archive yet)'
- echo "/work/out/tcc0 -nostdlib /tmp/stage1/start.o /tmp/stage1/sys_stubs.o /tmp/stage1/mem.o /tmp/stage1/libc.o /tmp/stage1/$LIB_HELPER_OBJ /work/in/tcc.flat.c -o /work/out/tcc1"
- echo 'chmod +x /work/out/tcc1'
- echo
- echo '# Stage D: tcc1 rebuilds helpers + archive, links tcc2'
- emit_helpers /work/out/tcc1 /tmp/stage2
- emit_archive /work/out/tcc1 /tmp/stage2
- emit_link_tcc /work/out/tcc1 /tmp/stage2 /work/out/tcc2
- echo 'chmod +x /work/out/tcc2'
- echo
- echo '# Stage E: tcc2 rebuilds helpers + archive, links tcc3.'
- echo '# (Host asserts tcc2 == tcc3 after the container exits.)'
- emit_helpers /work/out/tcc2 /tmp/stage3
- emit_archive /work/out/tcc2 /tmp/stage3
- emit_link_tcc /work/out/tcc2 /tmp/stage3 /work/out/tcc3
- echo 'chmod +x /work/out/tcc3'
- echo
- echo '# Publish the tcc2-built mes-libc link closure + smoke-test hello.'
- echo '# (tcc2 and tcc3 are byte-identical by the fixed-point check, so'
- echo '# rebuilding with tcc3 would only repeat the cycle.)'
- echo 'cp /tmp/stage3/crt1.o /tmp/stage3/libc.a /tmp/stage3/libtcc1.a /work/out/'
- echo '/work/out/tcc2 -nostdlib /work/out/crt1.o /work/in/hello.c /work/out/libc.a /work/out/libtcc1.a /work/out/libc.a -o /work/out/hello'
- echo 'chmod +x /work/out/hello'
- echo 'echo "--- run ---"'
- echo '/work/out/hello a b c'
} > "$RUN_SCRIPT"
chmod +x "$RUN_SCRIPT"
echo "[boot3 $ARCH] generated run.sh: $(wc -l <"$RUN_SCRIPT") lines"
# ── run flat build script in scratch+busybox container ────────────────
-echo "[boot3 $ARCH] cc.scm -> tcc0 -> tcc1 -> tcc2 -> tcc3"
+echo "[boot3 $ARCH] cc.scm -> tcc0"
podman run --rm -i --pull=never --platform "$PLATFORM" \
--tmpfs /tmp:size=1024M \
-v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
sh -eu /work/in/run.sh
-# ── fixed-point check (host-side; container has no cmp) ──────────────
-if ! cmp -s "$STAGE/out/tcc2" "$STAGE/out/tcc3"; then
- s2=$(wc -c <"$STAGE/out/tcc2")
- s3=$(wc -c <"$STAGE/out/tcc3")
- echo "[boot3 $ARCH] FIXED-POINT FAIL: tcc2 ($s2) != tcc3 ($s3)" >&2
- exit 1
-fi
-
-# ── copy outputs to final destination ─────────────────────────────────
-rm -f "$OUT/tcc0" "$OUT/tcc1" "$OUT/tcc2" \
- "$OUT/start.o" "$OUT/sys_stubs.o" "$OUT/mem.o" "$OUT/libc.o"
-for f in tcc3 crt1.o libc.a libtcc1.a hello; do
- cp "$STAGE/out/$f" "$OUT/$f"
-done
-chmod 0700 "$OUT/tcc3" "$OUT/hello"
+# ── copy output to final destination ──────────────────────────────────
+cp "$STAGE/out/tcc0" "$OUT/tcc0"
+chmod 0700 "$OUT/tcc0"
-echo "[boot3 $ARCH] sizes: libtcc1.a=$(wc -c <"$OUT/libtcc1.a") libc.a=$(wc -c <"$OUT/libc.a") hello=$(wc -c <"$OUT/hello")"
-echo "[boot3 $ARCH] OK -> $OUT/{tcc3, crt1.o, libc.a, libtcc1.a, hello} (fixed point: tcc2 == tcc3)"
+echo "[boot3 $ARCH] sizes: tcc0=$(wc -c <"$OUT/tcc0")"
+echo "[boot3 $ARCH] OK -> $OUT/tcc0"
diff --git a/scripts/boot4-calibrate.sh b/scripts/boot4-calibrate.sh
@@ -1,164 +0,0 @@
-#!/bin/sh
-## boot4-calibrate.sh — produce vendor/upstream/musl-1.2.5-skip-$ARCH.txt
-##
-## NOT on the boot.sh path. Generates the per-arch calibration list
-## boot4.sh uses to drop skip-on-fail logic from the container. Run
-## this once per architecture when the patch set, calibration arch, or
-## tcc version changes; commit the resulting file alongside the rest of
-## the vendored musl artifacts.
-##
-## What it does:
-## 1. Stage the same prerequisites boot4.sh stages (boot3/tcc3,
-## libtcc1.a, vendored overrides + deletes, pre-generated headers,
-## shim).
-## 2. Run a skip-on-fail compile loop in the container over every
-## musl source. Whatever tcc 0.9.26 cannot compile gets recorded.
-## 3. Copy the resulting skip list out to
-## vendor/upstream/musl-1.2.5-skip-$ARCH.txt.
-##
-## Boot4.sh then enumerates sources on the host and subtracts this
-## list, emitting a flat sequential build script with no in-container
-## branch on $TCC's exit code.
-##
-## Usage: scripts/boot4-calibrate.sh <amd64|aarch64|riscv64>
-
-set -eu
-
-usage() { echo "usage: $0 <amd64|aarch64|riscv64>" >&2; exit 2; }
-[ "$#" -eq 1 ] || usage
-ARCH=$1
-
-case "$ARCH" in
- amd64) PLATFORM=linux/amd64; MUSL_ARCH=x86_64 ;;
- aarch64) PLATFORM=linux/arm64; MUSL_ARCH=aarch64 ;;
- riscv64) PLATFORM=linux/riscv64; MUSL_ARCH=riscv64 ;;
- *) usage ;;
-esac
-
-ROOT=$(cd "$(dirname "$0")/.." && pwd)
-cd "$ROOT"
-
-IMAGE=boot2-scratch:$ARCH
-BOOT3=build/$ARCH/boot3
-STAGE=build/$ARCH/.boot4-calibrate
-MUSL_TARBALL=vendor/upstream/musl-1.2.5.tar.gz
-MUSL_OVERRIDES=vendor/upstream/musl-1.2.5-overrides
-MUSL_DELETES=vendor/upstream/musl-1.2.5-deletes.txt
-MUSL_GENERATED=vendor/upstream/musl-1.2.5-generated/$MUSL_ARCH
-BRIDGE_FILE=build/tcc/stdarg-bridge.h
-SKIP_OUT=vendor/upstream/musl-1.2.5-skip-$ARCH.txt
-
-[ -x "$BOOT3/tcc3" ] || { echo "missing $BOOT3/tcc3 (run scripts/boot3.sh $ARCH)" >&2; exit 1; }
-[ -e "$BOOT3/libtcc1.a" ] || { echo "missing $BOOT3/libtcc1.a" >&2; exit 1; }
-[ -e "$MUSL_TARBALL" ] || { echo "missing $MUSL_TARBALL" >&2; exit 1; }
-[ -d "$MUSL_OVERRIDES" ] || { echo "missing $MUSL_OVERRIDES" >&2; exit 1; }
-[ -e "$MUSL_DELETES" ] || { echo "missing $MUSL_DELETES" >&2; exit 1; }
-[ -d "$MUSL_GENERATED" ] || { echo "missing $MUSL_GENERATED (run scripts/musl-vendor.sh)" >&2; exit 1; }
-[ -e "$BRIDGE_FILE" ] || { echo "missing $BRIDGE_FILE (run scripts/stage1-flatten.sh)" >&2; exit 1; }
-
-if ! podman image exists "$IMAGE"; then
- podman build --platform "$PLATFORM" -t "$IMAGE" \
- -f scripts/Containerfile.scratch scripts/
-fi
-
-rm -rf "$STAGE"
-mkdir -p "$STAGE/in" "$STAGE/out"
-
-cp "$BOOT3/tcc3" "$STAGE/in/tcc"
-cp "$BOOT3/libtcc1.a" "$STAGE/in/libtcc1.a"
-tar xzf "$MUSL_TARBALL" -C "$STAGE/in/"
-MUSL_DIR=$STAGE/in/musl-1.2.5
-cp -R "$MUSL_OVERRIDES/." "$MUSL_DIR/"
-while read -r p; do
- [ -n "$p" ] && rm -rf "$MUSL_DIR/$p"
-done < "$MUSL_DELETES"
-cp "$BRIDGE_FILE" "$STAGE/in/tcc-stdarg-bridge.h"
-cp "$MUSL_GENERATED/alltypes.h" "$STAGE/in/musl-alltypes.h"
-cp "$MUSL_GENERATED/syscall.h" "$STAGE/in/musl-syscall.h"
-
-echo "[calibrate $ARCH] running skip-on-fail compile loop in container"
-podman run --rm -i --pull=never --platform "$PLATFORM" \
- --tmpfs /tmp:size=1024M \
- -e MUSL_ARCH="$MUSL_ARCH" \
- -v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
- sh -eu -s <<'CONTAINER'
-IN=/work/in
-OUT=/work/out
-TCC=$IN/tcc
-
-cd /tmp
-cp -R "$IN/musl-1.2.5" .
-cd musl-1.2.5
-
-mkdir -p obj/include/bits obj/src/internal
-cp $IN/musl-alltypes.h obj/include/bits/alltypes.h
-cp $IN/musl-syscall.h obj/include/bits/syscall.h
-echo '#define VERSION "1.2.5-tcc-boot4"' > obj/src/internal/version.h
-
-CFLAGS_BASE="-std=c99 -nostdinc -ffreestanding -fno-strict-aliasing
- -D_XOPEN_SOURCE=700
- -I./arch/$MUSL_ARCH -I./arch/generic -Iobj/src/internal
- -I./src/include -I./src/internal -Iobj/include -I./include
- -O2 -fomit-frame-pointer
- -Werror=implicit-function-declaration -Werror=implicit-int
- -Werror=pointer-sign -Werror=pointer-arith"
-CFLAGS_C="$CFLAGS_BASE -include $IN/tcc-stdarg-bridge.h"
-CFLAGS_ASM="$CFLAGS_BASE"
-
-SRC_TOP="src/aio src/conf src/crypt src/ctype src/dirent
- src/env src/errno src/exit src/fcntl src/fenv src/internal
- src/ipc src/legacy src/linux src/locale src/malloc
- src/malloc/mallocng src/math src/misc src/mman src/mq
- src/multibyte src/network src/passwd src/prng src/process
- src/regex src/sched src/search src/select src/setjmp src/signal
- src/stat src/stdio src/stdlib src/string src/temp src/termios
- src/thread src/time src/unistd"
-
-BASE_SRCS=""; ARCH_SRCS=""
-for d in $SRC_TOP; do
- [ -d "$d" ] || continue
- for f in $d/*.c; do [ -f "$f" ] && BASE_SRCS="$BASE_SRCS $f"; done
- for f in $d/$MUSL_ARCH/*.c $d/$MUSL_ARCH/*.s $d/$MUSL_ARCH/*.S; do
- [ -f "$f" ] && ARCH_SRCS="$ARCH_SRCS $f"
- done
-done
-REPLACED=""
-for a in $ARCH_SRCS; do
- p=${a%.*}
- head=${p%%/${MUSL_ARCH}/*}
- tail=${p#*/${MUSL_ARCH}/}
- REPLACED="$REPLACED $head/$tail"
-done
-KEEP=""
-for b in $BASE_SRCS; do
- stem=${b%.c}; skip=0
- for r in $REPLACED; do [ "$stem" = "$r" ] && { skip=1; break; }; done
- [ $skip -eq 0 ] && KEEP="$KEEP $b"
-done
-KEEP="$KEEP $ARCH_SRCS"
-
-mkdir -p obj/lib
-n=0; n_ok=0; n_skip=0
-: >$OUT/skipped.txt
-for src in $KEEP; do
- obj="obj/${src%.*}.o"
- mkdir -p "$(dirname $obj)"
- case "$src" in
- *.c) flags="$CFLAGS_C" ;;
- *.s | *.S) flags="$CFLAGS_ASM" ;;
- *) flags="$CFLAGS_C" ;;
- esac
- if $TCC $flags -c "$src" -o "$obj" >/tmp/compile.log 2>&1; then
- n_ok=$((n_ok+1))
- else
- n_skip=$((n_skip+1))
- echo "$src" >>$OUT/skipped.txt
- fi
- n=$((n+1))
- [ $((n % 200)) -eq 0 ] && echo " $n done (ok=$n_ok skip=$n_skip)"
-done
-echo " compiled=$n_ok skipped=$n_skip total=$n"
-CONTAINER
-
-sort -u "$STAGE/out/skipped.txt" > "$SKIP_OUT"
-echo "[calibrate $ARCH] wrote $SKIP_OUT ($(wc -l <"$SKIP_OUT") entries)"
diff --git a/scripts/boot4.sh b/scripts/boot4.sh
@@ -1,54 +1,110 @@
#!/bin/sh
-## boot4.sh — build musl-1.2.5 with boot3 artifacts and link hello.
+## boot4.sh — self-host tcc rebuild stages on top of boot3's tcc0.
##
-## Builds on top of boot3's verified-fixed-point tcc (tcc2 == tcc3) and
-## demonstrates that the same compiler can produce a working static libc
-## from upstream musl source — patched only as far as needed to work
-## around tcc's missing GCC extensions (register-asm-variable syscalls,
-## attribute(alias) weak refs, _Complex, x86_64 SSE/x87 inline asm).
+## boot3 produced tcc0 (cc.scm-built bootstrap). boot4 runs the rest of
+## the four-stage chain: tcc0 → tcc1 → tcc2 → tcc3. The bootstrap
+## fixed-point check is `tcc2 == tcc3`: once tcc is compiling itself
+## with no help from cc.scm, the chain reaches a byte-identical fixed
+## point. tcc0 ≠ tcc1 in *behavior* (not just in code size) because
+## cc.scm's emitted machine code introduces subtle codegen-decision
+## differences — e.g. on riscv64 cc.scm misses several immediate-folding
+## peepholes that tcc applies, so tcc0(tcc.flat.c) emits ~200 more bytes
+## of `.text` than tcc1(tcc.flat.c) does. tcc1 is faithful tcc behavior
+## (its source is tcc.flat.c, run through the cc.scm-built tcc0
+## translator semantically intact); tcc2 is the first binary whose
+## machine code was emitted by faithful tcc.
##
-## ─── Inputs ──────────────────────────────────────────────────────────
-## build/$ARCH/boot3/tcc3
-## — boot3's verified self-host tcc
-## build/$ARCH/boot3/libtcc1.a
-## — boot3's tcc runtime archive
-## vendor/upstream/musl-1.2.5.tar.gz
-## — pristine musl source
-## vendor/upstream/musl-1.2.5-overrides/
-## — tree of files that replace upstream
-## ones (tcc-compat patches; the post-
-## patch state vendored directly so the
-## build needs no `patch` binary). See
-## docs/MUSL.md.
-## vendor/upstream/musl-1.2.5-deletes.txt
-## — list of upstream files removed by the
-## same patch set (one path per line,
-## relative to musl-1.2.5/).
-## build/tcc/stdarg-bridge.h
-## — per-arch __builtin_va_list bridge,
-## generated by scripts/stage1-flatten.sh
-## (shared with boot3; one file, three
-## arches gated by #ifdef inside)
+## tcc0 = tcc-source compiled by cc.scm ← boot3
+## tcc1 = tcc-source compiled by tcc0 ← produced here
+## tcc2 = tcc-source compiled by tcc1 ← produced here
+## tcc3 = tcc-source compiled by tcc2 ← produced here
##
-## ─── Outputs ─────────────────────────────────────────────────────────
+## ─── Inputs (host-side, auto-built if missing) ────────────────────────
+## build/tcc/$TCC_TARGET/tcc.flat.c
+## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/{include,lib}
+## — flattened tcc TU + unpacked tree; built
+## via scripts/stage1-flatten.sh --arch
+## $TCC_TARGET (host cc -E, no container)
+## build/$ARCH/vendor/mes-libc/libc.flat.c
+## — flattened mes-libc TU; built via
+## scripts/libc-flatten.sh --arch $ARCH
+## (host cc -E, no container)
+##
+## ─── Inputs (sources, copied into staging) ────────────────────────────
+## tcc-libc/$ARCH/start.S — _start, calls __libc_init+main
+## tcc-libc/$ARCH/sys_stubs.S — sys_* syscall wrappers
+## tcc-cc/mem.c — memcpy/memmove/memset/memcmp
+## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/lib/libtcc1.c
+## (amd64: generic compiler helper runtime)
+## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/lib/lib-arm64.c
+## (aarch64 + riscv64: TFmode soft-float)
+## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/lib/va_list.c
+## (amd64: __va_start / __va_arg)
+## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/lib/alloca86_64*.S
+## (amd64: alloca helpers)
+## build/tcc/$TCC_TARGET/tcc.flat.c — flattened tcc TU
+## build/$ARCH/vendor/mes-libc/libc.flat.c — flattened mes-libc TU
+## scripts/boot-hello.c — smoke binary
+##
+## ─── Inputs (binaries from prior stages) ──────────────────────────────
+## build/$ARCH/boot3/tcc0 — built by scripts/boot3.sh
+##
+## ─── Tools ────────────────────────────────────────────────────────────
+## In container: scratch + busybox (no libc, no /etc, no resolver).
+## On host: none — every arch has CONFIG_TCC_ASM and assembles
+## .S inputs (start.S, sys_stubs.S) directly inside the
+## container in stages B/D/E. The aarch64 assembler is
+## the phase-1 arm64-asm.c that flatten patches into
+## tcc-0.9.26 (see docs/TCC-ARM64-ASM.md).
+##
+## ─── Outputs ──────────────────────────────────────────────────────────
+## build/$ARCH/boot4/tcc3 — final fixed-point self-host tcc
+## build/$ARCH/boot4/crt1.o
+## — tcc2-built startup object, kept outside
+## libc.a because it must lead link lines
## build/$ARCH/boot4/libc.a
-## build/$ARCH/boot4/{crt1.o, crti.o, crtn.o}
-## build/$ARCH/boot4/hello — static, runs in the container
+## — tcc2-built archive of sys_stubs.o + mem.o
+## + libc.o
+## build/$ARCH/boot4/libtcc1.a
+## — tcc2-built tcc compiler helper archive
+## build/$ARCH/boot4/hello — mes-libc-linked smoke binary, run here
+## tcc2 and tcc3 are byte-identical (asserted at the end of this
+## script) — that equality is the fixed-point check.
##
## Usage: scripts/boot4.sh <arch>
-## <arch> ∈ {amd64, aarch64, riscv64}
-## All three architectures are verified end-to-end.
+## <arch> ∈ {aarch64, amd64, riscv64}
set -eu
-usage() { echo "usage: $0 <amd64|aarch64|riscv64>" >&2; exit 2; }
+usage() { echo "usage: $0 <aarch64|amd64|riscv64>" >&2; exit 2; }
[ "$#" -eq 1 ] || usage
ARCH=$1
case "$ARCH" in
- amd64) PLATFORM=linux/amd64; MUSL_ARCH=x86_64 ;;
- aarch64) PLATFORM=linux/arm64; MUSL_ARCH=aarch64 ;;
- riscv64) PLATFORM=linux/riscv64; MUSL_ARCH=riscv64 ;;
+ aarch64) PLATFORM=linux/arm64;
+ TCC_TARGET=ARM64;
+ LIB_HELPER_SRC=lib-arm64.c;
+ LIB_HELPER_OBJ=lib-arm64.o;
+ LIB_HELPER_DEFINES="-D HAVE_CONFIG_H=1 -D TCC_TARGET_ARM64=1 -D TCC_TARGET_ARM=1";
+ LIBTCC1_C_SRCS="lib-arm64.c";
+ LIBTCC1_C_DEFS="-D HAVE_CONFIG_H=1 -D TCC_TARGET_ARM64=1 -D TCC_TARGET_ARM=1";
+ LIBTCC1_ASM_SRCS="" ;;
+ amd64) PLATFORM=linux/amd64;
+ TCC_TARGET=X86_64;
+ LIB_HELPER_SRC=va_list.c;
+ LIB_HELPER_OBJ=va_list.o;
+ LIB_HELPER_DEFINES="-D TCC_TARGET_X86_64=1";
+ LIBTCC1_C_SRCS="libtcc1.c va_list.c";
+ LIBTCC1_C_DEFS="-D TCC_TARGET_X86_64=1";
+ LIBTCC1_ASM_SRCS="alloca86_64.S alloca86_64-bt.S" ;;
+ riscv64) PLATFORM=linux/riscv64;
+ TCC_TARGET=RISCV64;
+ LIB_HELPER_SRC=lib-arm64.c;
+ LIB_HELPER_OBJ=lib-arm64.o;
+ LIB_HELPER_DEFINES="-D HAVE_CONFIG_H=1 -D TCC_TARGET_RISCV64=1";
+ LIBTCC1_C_SRCS="lib-arm64.c";
+ LIBTCC1_C_DEFS="-D HAVE_CONFIG_H=1 -D TCC_TARGET_RISCV64=1";
+ LIBTCC1_ASM_SRCS="" ;;
*) usage ;;
esac
@@ -59,236 +115,182 @@ IMAGE=boot2-scratch:$ARCH
BOOT3=build/$ARCH/boot3
OUT=build/$ARCH/boot4
STAGE=build/$ARCH/.boot4-stage
-MUSL_TARBALL=vendor/upstream/musl-1.2.5.tar.gz
-MUSL_OVERRIDES=vendor/upstream/musl-1.2.5-overrides
-MUSL_DELETES=vendor/upstream/musl-1.2.5-deletes.txt
-MUSL_GENERATED=vendor/upstream/musl-1.2.5-generated/$MUSL_ARCH
-MUSL_SKIP=vendor/upstream/musl-1.2.5-skip-$ARCH.txt
-BRIDGE_FILE=build/tcc/stdarg-bridge.h
-# ── prerequisites ─────────────────────────────────────────────────────
-[ -x "$BOOT3/tcc3" ] || { echo "[boot4 $ARCH] missing $BOOT3/tcc3 (run scripts/boot3.sh $ARCH)" >&2; exit 1; }
-[ -e "$BOOT3/libtcc1.a" ] || { echo "[boot4 $ARCH] missing $BOOT3/libtcc1.a (run scripts/boot3.sh $ARCH)" >&2; exit 1; }
-[ -e "$MUSL_TARBALL" ] || { echo "[boot4 $ARCH] missing $MUSL_TARBALL" >&2; exit 1; }
-[ -d "$MUSL_OVERRIDES" ] || { echo "[boot4 $ARCH] missing $MUSL_OVERRIDES" >&2; exit 1; }
-[ -e "$MUSL_DELETES" ] || { echo "[boot4 $ARCH] missing $MUSL_DELETES" >&2; exit 1; }
-[ -d "$MUSL_GENERATED" ] || { echo "[boot4 $ARCH] missing $MUSL_GENERATED (run scripts/musl-vendor.sh)" >&2; exit 1; }
-[ -e "$MUSL_SKIP" ] || { echo "[boot4 $ARCH] missing $MUSL_SKIP (run scripts/boot4-calibrate.sh $ARCH)" >&2; exit 1; }
-[ -e "$BRIDGE_FILE" ] || { echo "[boot4 $ARCH] missing $BRIDGE_FILE (run scripts/stage1-flatten.sh)" >&2; exit 1; }
+TCC_DIR=build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c
+TCC_FLAT=build/tcc/$TCC_TARGET/tcc.flat.c
+LIBC_FLAT=build/$ARCH/vendor/mes-libc/libc.flat.c
+# ── ensure container image exists ─────────────────────────────────────
if ! podman image exists "$IMAGE"; then
echo "[boot4 $ARCH] building $IMAGE"
podman build --platform "$PLATFORM" -t "$IMAGE" \
-f scripts/Containerfile.scratch scripts/
fi
-# ── stage inputs ──────────────────────────────────────────────────────
-# $STAGE/in/ — exactly what the container reads (bind-mounted /work/in)
-# $STAGE/_host/ — host-side scratch (enumeration outputs, intermediates);
-# not visible to the container
-# $STAGE/out/ — container writes here
-rm -rf "$STAGE"
-mkdir -p "$STAGE/in" "$STAGE/_host" "$STAGE/out" "$OUT"
-rm -f "$OUT/libtcc1.a"
-
-cp "$BOOT3/tcc3" "$STAGE/in/tcc"
-cp "$BOOT3/libtcc1.a" "$STAGE/in/libtcc1.a"
-# (No tcc-include/ stage — boot4 compiles musl, which provides its own
-# headers via -I./include / -Iarch/$MUSL_ARCH / -Iarch/generic / -Iobj/include.
-# tcc itself is invoked with -nostdinc so it never reads CONFIG_TCCDIR.)
-
-# Extract musl on the host, then apply overrides + deletes on the host
-# too — gives us a fully-prepared tree at $STAGE/in/musl-1.2.5/ that we
-# can enumerate to drive the (kaem-friendly) flat container script.
-# The container then just `cp -R`s the staged tree into tmpfs (its
-# bind-mounted /work/in is logically read-only).
-tar xzf "$MUSL_TARBALL" -C "$STAGE/in/"
-MUSL_DIR=$STAGE/in/musl-1.2.5
-cp -R "$MUSL_OVERRIDES/." "$MUSL_DIR/"
-while read -r p; do
- [ -n "$p" ] && rm -rf "$MUSL_DIR/$p"
-done < "$MUSL_DELETES"
+# ── prerequisite: prior-stage binaries ────────────────────────────────
+[ -x "$BOOT3/tcc0" ] || { echo "[boot4 $ARCH] missing $BOOT3/tcc0 (run scripts/boot3.sh $ARCH)" >&2; exit 1; }
-cp "$BRIDGE_FILE" "$STAGE/in/tcc-stdarg-bridge.h"
-# Pre-generated alltypes.h + syscall.h for $MUSL_ARCH; replace the
-# in-container awk that ran mkalltypes.sed and the SYS_ rewrite. Source
-# of truth is scripts/musl-vendor.sh (regenerates these files).
-cp "$MUSL_GENERATED/alltypes.h" "$STAGE/in/musl-alltypes.h"
-cp "$MUSL_GENERATED/syscall.h" "$STAGE/in/musl-syscall.h"
-# version.h is pre-staged so the container body has no `>` redirection.
-echo '#define VERSION "1.2.5-tcc-boot4"' > "$STAGE/in/musl-version.h"
-
-cp scripts/boot-hello.c "$STAGE/in/hello.c"
-
-# ── enumerate musl sources on the host (kaem-friendly: no for/while/
-# case/${%}/${#}/$((..)) inside the container) ───────────────────────
-# Mirrors musl's Makefile rule: a per-arch override (under
-# $d/$MUSL_ARCH/) replaces the same-stem base file (under $d/). We
-# subtract the calibration skip list so the container script never
-# needs an `if $TCC ...; then ok else skip fi` branch.
-SRC_TOP="src/aio src/conf src/crypt src/ctype src/dirent
- src/env src/errno src/exit src/fcntl src/fenv src/internal
- src/ipc src/legacy src/linux src/locale src/malloc
- src/malloc/mallocng src/math src/misc src/mman src/mq
- src/multibyte src/network src/passwd src/prng src/process
- src/regex src/sched src/search src/select src/setjmp src/signal
- src/stat src/stdio src/stdlib src/string src/temp src/termios
- src/thread src/time src/unistd"
-
-(
- cd "$MUSL_DIR"
- for d in $SRC_TOP; do
- [ -d "$d" ] || continue
- for f in $d/*.c; do [ -f "$f" ] && echo "$f"; done
- done
-) > "$STAGE/_host/base.txt"
+# ── prerequisite: host-flattened sources + unpacked tcc tree ──────────
+# Normally these were produced by boot3 (auto-invoked by stage1-flatten
+# / libc-flatten there). Re-check here so boot4 runs standalone if a
+# user has tcc0 but blew away build/tcc/.
+if [ ! -e "$TCC_FLAT" ] || [ ! -d "$TCC_DIR/include" ] || [ ! -e "$TCC_DIR/lib/$LIB_HELPER_SRC" ] || [ ! -e build/tcc/stdarg-bridge.h ]; then
+ echo "[boot4 $ARCH] flatten tcc.flat.c (host)"
+ scripts/stage1-flatten.sh --arch "$TCC_TARGET"
+fi
+if [ ! -e "$LIBC_FLAT" ]; then
+ echo "[boot4 $ARCH] flatten libc.flat.c (host)"
+ scripts/libc-flatten.sh --arch "$ARCH"
+fi
+for f in $LIBTCC1_C_SRCS $LIBTCC1_ASM_SRCS; do
+ [ -e "$TCC_DIR/lib/$f" ] || { echo "[boot4 $ARCH] missing $TCC_DIR/lib/$f" >&2; exit 1; }
+done
-(
- cd "$MUSL_DIR"
- for d in $SRC_TOP; do
- [ -d "$d/$MUSL_ARCH" ] || continue
- for f in $d/$MUSL_ARCH/*.c $d/$MUSL_ARCH/*.s $d/$MUSL_ARCH/*.S; do
- [ -f "$f" ] && echo "$f"
- done
- done
-) > "$STAGE/_host/arch.txt"
+# ── reset staging, copy inputs explicitly ─────────────────────────────
+rm -rf "$STAGE"
+mkdir -p "$STAGE/in" "$STAGE/in/tcc-lib" "$STAGE/out" "$OUT"
+rm -f "$OUT/tcc1" "$OUT/tcc2" \
+ "$OUT/start.o" "$OUT/sys_stubs.o" "$OUT/mem.o" "$OUT/libc.o"
-# REPLACED: bases that have arch-specific overrides (drop them from
-# BASE). KEEP = (BASE - REPLACED) ∪ ARCH, then minus calibration skips.
-awk -v ARCH="$MUSL_ARCH" '
- {
- sub(/\.[^.]*$/, "") # strip extension
- slot = "/" ARCH "/"
- i = index($0, slot)
- head = substr($0, 1, i - 1)
- tail = substr($0, i + length(slot))
- print head "/" tail
- }
-' "$STAGE/_host/arch.txt" | sort -u > "$STAGE/_host/replaced.txt"
+# Prior-stage binary
+cp "$BOOT3/tcc0" "$STAGE/in/tcc0"
-# Filter base by removing stems that appear in replaced.
-awk -v REPF="$STAGE/_host/replaced.txt" '
- BEGIN { while ((getline l < REPF) > 0) rep[l] = 1 }
- {
- stem = $0
- sub(/\.c$/, "", stem)
- if (!(stem in rep)) print
- }
-' "$STAGE/_host/base.txt" > "$STAGE/_host/keep_base.txt"
+# tcc-libc / tcc-cc helpers
+cp "tcc-libc/$ARCH/start.S" "$STAGE/in/start.S"
+cp "tcc-libc/$ARCH/sys_stubs.S" "$STAGE/in/sys_stubs.S"
+cp tcc-cc/mem.c "$STAGE/in/mem.c"
-cat "$STAGE/_host/keep_base.txt" "$STAGE/_host/arch.txt" | sort -u > "$STAGE/_host/keep.txt"
+# Per-arch libtcc1 helper sources. LIB_HELPER_SRC is always also in
+# LIBTCC1_C_SRCS (lib-arm64.c on aarch64/riscv64, va_list.c on amd64),
+# so a single staging path under tcc-lib/ covers both consumers — the
+# emit_helpers step (uses LIB_HELPER_SRC) and the emit_archive step
+# (iterates LIBTCC1_C_SRCS + LIBTCC1_ASM_SRCS).
+for f in $LIBTCC1_C_SRCS $LIBTCC1_ASM_SRCS; do
+ cp "$TCC_DIR/lib/$f" "$STAGE/in/tcc-lib/$f"
+done
-# Subtract the calibration skip list. Lines without a / are bogus; the
-# skip file is one path per line, comments allowed via leading '#'.
-awk -v SKIPF="$MUSL_SKIP" '
- BEGIN { while ((getline l < SKIPF) > 0) if (l !~ /^#/ && l != "") skip[l] = 1 }
- { if (!($0 in skip)) print }
-' "$STAGE/_host/keep.txt" > "$STAGE/_host/build-srcs.txt"
+# Flattened TUs. The patched tcc <stdarg.h> bridge is already prepended
+# (under #ifndef CCSCM) into both .flat.c files by the flatten scripts,
+# so the in-container compiles need no -I/-include flags. hello.c uses
+# forward declarations (no system headers).
+cp "$TCC_FLAT" "$STAGE/in/tcc.flat.c"
+cp "$LIBC_FLAT" "$STAGE/in/libc.flat.c"
-# Per-source-dir mkdir list (unique, for `mkdir -p obj/...`).
-awk '
- {
- sub(/\.[^.]*$/, "")
- if (match($0, /\/[^\/]*$/)) print "obj/" substr($0, 1, RSTART - 1)
- }
-' "$STAGE/_host/build-srcs.txt" | sort -u > "$STAGE/_host/build-objdirs.txt"
+cp scripts/boot-hello.c "$STAGE/in/hello.c"
-n_src=$(wc -l < "$STAGE/_host/build-srcs.txt")
-n_skip=$(wc -l < "$MUSL_SKIP")
-echo "[boot4 $ARCH] keep=$n_src skip=$n_skip (calibrated)"
+# Every arch's tcc-boot2 has CONFIG_TCC_ASM and assembles .S inputs
+# itself inside the container — no host cross-asm step.
# ── emit flat container build script ──────────────────────────────────
-# Generates a straight-line shell program: mkdir, cp, then one tcc
-# invocation per source, then ar, then link+run hello. No control flow
-# beyond sequential exec, no shell redirection, no `cd`; suitable for a
-# kaem-class shell. All paths are absolute.
-CWORK=/tmp/musl-1.2.5
-CFLAGS_BASE="-std=c99 -nostdinc -ffreestanding -fno-strict-aliasing -D_XOPEN_SOURCE=700 -I$CWORK/arch/$MUSL_ARCH -I$CWORK/arch/generic -I$CWORK/obj/src/internal -I$CWORK/src/include -I$CWORK/src/internal -I$CWORK/obj/include -I$CWORK/include -O2 -fomit-frame-pointer -Werror=implicit-function-declaration -Werror=implicit-int -Werror=pointer-sign -Werror=pointer-arith"
-CFLAGS_C="$CFLAGS_BASE -include /work/in/tcc-stdarg-bridge.h"
-CFLAGS_ASM="$CFLAGS_BASE"
-CRTFLAGS_C="$CFLAGS_C -fno-stack-protector -DCRT"
-CRTFLAGS_ASM="$CFLAGS_ASM -fno-stack-protector -DCRT"
+# Generates a straight-line shell program: tcc0 → tcc1 → tcc2 → tcc3
+# with all per-stage repetition unrolled and per-arch values
+# (LIB_HELPER_SRC/OBJ, LIBTCC1_C_SRCS, LIBTCC1_ASM_SRCS, etc.) resolved
+# on the host. Container shell sees only sequential exec — no
+# functions, no for-loops, no parameter expansion.
+#
+# Stage B: tcc0 builds mem.o, libc.o, helper.o.
+# Stage C: tcc0 compiles+links tcc1.
+# Stage D: tcc1 rebuilds helpers, archives, compiles+links tcc2.
+# Stage E: tcc2 rebuilds helpers, archives, compiles+links tcc3 (host
+# asserts tcc2 == tcc3 after container exits).
-# Resolve the per-arch CRT branch on the host (eliminates the in-
-# container if/then/else).
-if [ -f "$MUSL_DIR/crt/$MUSL_ARCH/crti.s" ]; then
- CRT_LINES_TXT=$(printf '%s\n' \
- "/work/in/tcc $CRTFLAGS_ASM -c $CWORK/crt/$MUSL_ARCH/crti.s -o $CWORK/obj/crt/crti.o" \
- "/work/in/tcc $CRTFLAGS_ASM -c $CWORK/crt/$MUSL_ARCH/crtn.s -o $CWORK/obj/crt/crtn.o")
-else
- CRT_LINES_TXT=$(printf '%s\n' \
- "/work/in/tcc $CRTFLAGS_C -c $CWORK/crt/crti.c -o $CWORK/obj/crt/crti.o" \
- "/work/in/tcc $CRTFLAGS_C -c $CWORK/crt/crtn.c -o $CWORK/obj/crt/crtn.o")
-fi
+# Helper: emit the build_asm + build_helpers + archive_runtime closure
+# for one stage. $1 = compiler (path inside container), $2 = workdir.
+emit_helpers () {
+ cc=$1
+ workdir=$2
+ echo "$cc -nostdlib -c -o $workdir/start.o /work/in/start.S"
+ echo "$cc -nostdlib -c -o $workdir/sys_stubs.o /work/in/sys_stubs.S"
+ echo "$cc -nostdlib -c -o $workdir/mem.o /work/in/mem.c"
+ echo "$cc -nostdlib -c -o $workdir/libc.o /work/in/libc.flat.c"
+ echo "$cc -nostdlib $LIB_HELPER_DEFINES -c -o $workdir/$LIB_HELPER_OBJ /work/in/tcc-lib/$LIB_HELPER_SRC"
+}
+emit_archive () {
+ cc=$1
+ workdir=$2
+ libtcc1_objs=""
+ echo "cp $workdir/start.o $workdir/crt1.o"
+ echo "$cc -ar rcs $workdir/libc.a $workdir/sys_stubs.o $workdir/mem.o $workdir/libc.o"
+ echo "mkdir -p $workdir/libtcc1-obj"
+ for src in $LIBTCC1_C_SRCS; do
+ obj=$workdir/libtcc1-obj/${src%.c}.o
+ echo "$cc -nostdlib $LIBTCC1_C_DEFS -c -o $obj /work/in/tcc-lib/$src"
+ libtcc1_objs="$libtcc1_objs $obj"
+ done
+ for src in $LIBTCC1_ASM_SRCS; do
+ obj=$workdir/libtcc1-obj/${src%.S}.o
+ echo "$cc -nostdlib -c -o $obj /work/in/tcc-lib/$src"
+ libtcc1_objs="$libtcc1_objs $obj"
+ done
+ echo "$cc -ar rcs $workdir/libtcc1.a$libtcc1_objs"
+}
+emit_link_tcc () {
+ cc=$1
+ workdir=$2
+ out=$3
+ echo "$cc -nostdlib $workdir/crt1.o /work/in/tcc.flat.c $workdir/libc.a $workdir/libtcc1.a $workdir/libc.a -o $out"
+}
RUN_SCRIPT=$STAGE/in/run.sh
{
echo '#!/bin/sh'
echo 'set -eu'
echo
- echo '# stage A: working tree in tmpfs'
- echo "cp -R /work/in/musl-1.2.5 $CWORK"
- echo
- echo '# stage B: pre-generated headers + version stamp'
- echo "mkdir -p $CWORK/obj/include/bits $CWORK/obj/src/internal $CWORK/obj/lib $CWORK/obj/crt $CWORK/lib"
- echo "cp /work/in/musl-alltypes.h $CWORK/obj/include/bits/alltypes.h"
- echo "cp /work/in/musl-syscall.h $CWORK/obj/include/bits/syscall.h"
- echo "cp /work/in/musl-version.h $CWORK/obj/src/internal/version.h"
- echo
- echo '# per-source obj directories'
- while read -r d; do echo "mkdir -p $CWORK/$d"; done < "$STAGE/_host/build-objdirs.txt"
- echo
- echo "# stage C: compile sources ($n_src after calibration)"
- awk -v CC=/work/in/tcc -v CF="$CFLAGS_C" -v AF="$CFLAGS_ASM" -v PFX="$CWORK/" '
- {
- src = $0
- obj = "obj/" src
- sub(/\.[^.]*$/, ".o", obj)
- if (src ~ /\.c$/) flags = CF
- else if (src ~ /\.[sS]$/) flags = AF
- else flags = CF
- print CC " " flags " -c " PFX src " -o " PFX obj
- }
- ' "$STAGE/_host/build-srcs.txt"
+ echo '# Stage B: tcc0 builds helper objects (stage1)'
+ echo 'mkdir -p /tmp/stage1 /tmp/stage2 /tmp/stage3'
+ emit_helpers /work/in/tcc0 /tmp/stage1
echo
- echo '# stage D: CRT objects'
- echo "/work/in/tcc $CRTFLAGS_C -fPIC -c $CWORK/crt/Scrt1.c -o $CWORK/obj/crt/Scrt1.o"
- echo "/work/in/tcc $CRTFLAGS_C -c $CWORK/crt/crt1.c -o $CWORK/obj/crt/crt1.o"
- echo "/work/in/tcc $CRTFLAGS_C -fPIC -c $CWORK/crt/rcrt1.c -o $CWORK/obj/crt/rcrt1.o"
- printf '%s\n' "$CRT_LINES_TXT"
- echo "cp $CWORK/obj/crt/Scrt1.o $CWORK/obj/crt/crt1.o $CWORK/obj/crt/rcrt1.o $CWORK/obj/crt/crti.o $CWORK/obj/crt/crtn.o $CWORK/lib/"
+ echo '# Stage C: tcc0 -> tcc1 (link with raw .o files; no archive yet)'
+ echo "/work/in/tcc0 -nostdlib /tmp/stage1/start.o /tmp/stage1/sys_stubs.o /tmp/stage1/mem.o /tmp/stage1/libc.o /tmp/stage1/$LIB_HELPER_OBJ /work/in/tcc.flat.c -o /work/out/tcc1"
+ echo 'chmod +x /work/out/tcc1'
echo
- echo '# stage E: archive libc.a'
- printf '/work/in/tcc -ar rcs %s/lib/libc.a' "$CWORK"
- awk -v PFX="$CWORK/" '{ obj = "obj/" $0; sub(/\.[^.]*$/, ".o", obj); printf " %s%s", PFX, obj }' "$STAGE/_host/build-srcs.txt"
+ echo '# Stage D: tcc1 rebuilds helpers + archive, links tcc2'
+ emit_helpers /work/out/tcc1 /tmp/stage2
+ emit_archive /work/out/tcc1 /tmp/stage2
+ emit_link_tcc /work/out/tcc1 /tmp/stage2 /work/out/tcc2
+ echo 'chmod +x /work/out/tcc2'
echo
+ echo '# Stage E: tcc2 rebuilds helpers + archive, links tcc3.'
+ echo '# (Host asserts tcc2 == tcc3 after the container exits.)'
+ emit_helpers /work/out/tcc2 /tmp/stage3
+ emit_archive /work/out/tcc2 /tmp/stage3
+ emit_link_tcc /work/out/tcc2 /tmp/stage3 /work/out/tcc3
+ echo 'chmod +x /work/out/tcc3'
echo
- echo '# publish artifacts to /work/out'
- echo "cp $CWORK/lib/libc.a $CWORK/lib/crt1.o $CWORK/lib/crti.o $CWORK/lib/crtn.o /work/out/"
- echo
- echo '# stage F: link + run hello'
- echo "/work/in/tcc -static -nostdinc -nostdlib -include /work/in/tcc-stdarg-bridge.h -I$CWORK/include -I$CWORK/arch/$MUSL_ARCH -I$CWORK/arch/generic -I$CWORK/obj/include $CWORK/lib/crt1.o /work/in/hello.c -L$CWORK/lib -lc -L/work/in -ltcc1 -L$CWORK/lib -lc -o /work/out/hello"
+ echo '# Publish the tcc2-built mes-libc link closure + smoke-test hello.'
+ echo '# (tcc2 and tcc3 are byte-identical by the fixed-point check, so'
+ echo '# rebuilding with tcc3 would only repeat the cycle.)'
+ echo 'cp /tmp/stage3/crt1.o /tmp/stage3/libc.a /tmp/stage3/libtcc1.a /work/out/'
+ echo '/work/out/tcc2 -nostdlib /work/out/crt1.o /work/in/hello.c /work/out/libc.a /work/out/libtcc1.a /work/out/libc.a -o /work/out/hello'
+ echo 'chmod +x /work/out/hello'
echo 'echo "--- run ---"'
echo '/work/out/hello a b c'
} > "$RUN_SCRIPT"
chmod +x "$RUN_SCRIPT"
-echo "[boot4 $ARCH] generated run.sh: $(wc -l <"$RUN_SCRIPT") lines, $(wc -c <"$RUN_SCRIPT") bytes"
+echo "[boot4 $ARCH] generated run.sh: $(wc -l <"$RUN_SCRIPT") lines"
-# ── run pipeline in scratch+busybox container ─────────────────────────
-# The container body is a single line: source the host-generated build
-# script. All control flow (loops, conditionals, parameter expansion)
-# was resolved on the host; the container shell sees only sequential
-# `tcc -c …` / `cp` / `mkdir` lines.
-echo "[boot4 $ARCH] boot3/libtcc1.a + musl libc.a + crt -> hello"
+# ── run flat build script in scratch+busybox container ────────────────
+echo "[boot4 $ARCH] tcc0 -> tcc1 -> tcc2 -> tcc3"
podman run --rm -i --pull=never --platform "$PLATFORM" \
--tmpfs /tmp:size=1024M \
-v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
sh -eu /work/in/run.sh
-# ── copy outputs to final destination ────────────────────────────────
-for f in libc.a crt1.o crti.o crtn.o hello; do
+# ── fixed-point check (host-side; container has no cmp) ──────────────
+if ! cmp -s "$STAGE/out/tcc2" "$STAGE/out/tcc3"; then
+ s2=$(wc -c <"$STAGE/out/tcc2")
+ s3=$(wc -c <"$STAGE/out/tcc3")
+ echo "[boot4 $ARCH] FIXED-POINT FAIL: tcc2 ($s2) != tcc3 ($s3)" >&2
+ exit 1
+fi
+
+# ── copy outputs to final destination ─────────────────────────────────
+rm -f "$OUT/tcc1" "$OUT/tcc2" \
+ "$OUT/start.o" "$OUT/sys_stubs.o" "$OUT/mem.o" "$OUT/libc.o"
+for f in tcc3 crt1.o libc.a libtcc1.a hello; do
cp "$STAGE/out/$f" "$OUT/$f"
done
+chmod 0700 "$OUT/tcc3" "$OUT/hello"
-echo "[boot4 $ARCH] sizes: libc.a=$(wc -c <"$OUT/libc.a") hello=$(wc -c <"$OUT/hello")"
-echo "[boot4 $ARCH] OK -> $OUT/{libc.a, crt1.o, crti.o, crtn.o, hello}"
+echo "[boot4 $ARCH] sizes: libtcc1.a=$(wc -c <"$OUT/libtcc1.a") libc.a=$(wc -c <"$OUT/libc.a") hello=$(wc -c <"$OUT/hello")"
+echo "[boot4 $ARCH] OK -> $OUT/{tcc3, crt1.o, libc.a, libtcc1.a, hello} (fixed point: tcc2 == tcc3)"
diff --git a/scripts/boot5-calibrate.sh b/scripts/boot5-calibrate.sh
@@ -0,0 +1,164 @@
+#!/bin/sh
+## boot5-calibrate.sh — produce vendor/upstream/musl-1.2.5-skip-$ARCH.txt
+##
+## NOT on the boot.sh path. Generates the per-arch calibration list
+## boot5.sh uses to drop skip-on-fail logic from the container. Run
+## this once per architecture when the patch set, calibration arch, or
+## tcc version changes; commit the resulting file alongside the rest of
+## the vendored musl artifacts.
+##
+## What it does:
+## 1. Stage the same prerequisites boot5.sh stages (boot4/tcc3,
+## libtcc1.a, vendored overrides + deletes, pre-generated headers,
+## stdarg bridge).
+## 2. Run a skip-on-fail compile loop in the container over every
+## musl source. Whatever tcc 0.9.26 cannot compile gets recorded.
+## 3. Copy the resulting skip list out to
+## vendor/upstream/musl-1.2.5-skip-$ARCH.txt.
+##
+## boot5.sh then enumerates sources on the host and subtracts this
+## list, emitting a flat sequential build script with no in-container
+## branch on $TCC's exit code.
+##
+## Usage: scripts/boot5-calibrate.sh <amd64|aarch64|riscv64>
+
+set -eu
+
+usage() { echo "usage: $0 <amd64|aarch64|riscv64>" >&2; exit 2; }
+[ "$#" -eq 1 ] || usage
+ARCH=$1
+
+case "$ARCH" in
+ amd64) PLATFORM=linux/amd64; MUSL_ARCH=x86_64 ;;
+ aarch64) PLATFORM=linux/arm64; MUSL_ARCH=aarch64 ;;
+ riscv64) PLATFORM=linux/riscv64; MUSL_ARCH=riscv64 ;;
+ *) usage ;;
+esac
+
+ROOT=$(cd "$(dirname "$0")/.." && pwd)
+cd "$ROOT"
+
+IMAGE=boot2-scratch:$ARCH
+BOOT4=build/$ARCH/boot4
+STAGE=build/$ARCH/.boot5-calibrate
+MUSL_TARBALL=vendor/upstream/musl-1.2.5.tar.gz
+MUSL_OVERRIDES=vendor/upstream/musl-1.2.5-overrides
+MUSL_DELETES=vendor/upstream/musl-1.2.5-deletes.txt
+MUSL_GENERATED=vendor/upstream/musl-1.2.5-generated/$MUSL_ARCH
+BRIDGE_FILE=build/tcc/stdarg-bridge.h
+SKIP_OUT=vendor/upstream/musl-1.2.5-skip-$ARCH.txt
+
+[ -x "$BOOT4/tcc3" ] || { echo "missing $BOOT4/tcc3 (run scripts/boot4.sh $ARCH)" >&2; exit 1; }
+[ -e "$BOOT4/libtcc1.a" ] || { echo "missing $BOOT4/libtcc1.a" >&2; exit 1; }
+[ -e "$MUSL_TARBALL" ] || { echo "missing $MUSL_TARBALL" >&2; exit 1; }
+[ -d "$MUSL_OVERRIDES" ] || { echo "missing $MUSL_OVERRIDES" >&2; exit 1; }
+[ -e "$MUSL_DELETES" ] || { echo "missing $MUSL_DELETES" >&2; exit 1; }
+[ -d "$MUSL_GENERATED" ] || { echo "missing $MUSL_GENERATED (run scripts/musl-vendor.sh)" >&2; exit 1; }
+[ -e "$BRIDGE_FILE" ] || { echo "missing $BRIDGE_FILE (run scripts/stage1-flatten.sh)" >&2; exit 1; }
+
+if ! podman image exists "$IMAGE"; then
+ podman build --platform "$PLATFORM" -t "$IMAGE" \
+ -f scripts/Containerfile.scratch scripts/
+fi
+
+rm -rf "$STAGE"
+mkdir -p "$STAGE/in" "$STAGE/out"
+
+cp "$BOOT4/tcc3" "$STAGE/in/tcc"
+cp "$BOOT4/libtcc1.a" "$STAGE/in/libtcc1.a"
+tar xzf "$MUSL_TARBALL" -C "$STAGE/in/"
+MUSL_DIR=$STAGE/in/musl-1.2.5
+cp -R "$MUSL_OVERRIDES/." "$MUSL_DIR/"
+while read -r p; do
+ [ -n "$p" ] && rm -rf "$MUSL_DIR/$p"
+done < "$MUSL_DELETES"
+cp "$BRIDGE_FILE" "$STAGE/in/tcc-stdarg-bridge.h"
+cp "$MUSL_GENERATED/alltypes.h" "$STAGE/in/musl-alltypes.h"
+cp "$MUSL_GENERATED/syscall.h" "$STAGE/in/musl-syscall.h"
+
+echo "[calibrate $ARCH] running skip-on-fail compile loop in container"
+podman run --rm -i --pull=never --platform "$PLATFORM" \
+ --tmpfs /tmp:size=1024M \
+ -e MUSL_ARCH="$MUSL_ARCH" \
+ -v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
+ sh -eu -s <<'CONTAINER'
+IN=/work/in
+OUT=/work/out
+TCC=$IN/tcc
+
+cd /tmp
+cp -R "$IN/musl-1.2.5" .
+cd musl-1.2.5
+
+mkdir -p obj/include/bits obj/src/internal
+cp $IN/musl-alltypes.h obj/include/bits/alltypes.h
+cp $IN/musl-syscall.h obj/include/bits/syscall.h
+echo '#define VERSION "1.2.5-tcc-boot5"' > obj/src/internal/version.h
+
+CFLAGS_BASE="-std=c99 -nostdinc -ffreestanding -fno-strict-aliasing
+ -D_XOPEN_SOURCE=700
+ -I./arch/$MUSL_ARCH -I./arch/generic -Iobj/src/internal
+ -I./src/include -I./src/internal -Iobj/include -I./include
+ -O2 -fomit-frame-pointer
+ -Werror=implicit-function-declaration -Werror=implicit-int
+ -Werror=pointer-sign -Werror=pointer-arith"
+CFLAGS_C="$CFLAGS_BASE -include $IN/tcc-stdarg-bridge.h"
+CFLAGS_ASM="$CFLAGS_BASE"
+
+SRC_TOP="src/aio src/conf src/crypt src/ctype src/dirent
+ src/env src/errno src/exit src/fcntl src/fenv src/internal
+ src/ipc src/legacy src/linux src/locale src/malloc
+ src/malloc/mallocng src/math src/misc src/mman src/mq
+ src/multibyte src/network src/passwd src/prng src/process
+ src/regex src/sched src/search src/select src/setjmp src/signal
+ src/stat src/stdio src/stdlib src/string src/temp src/termios
+ src/thread src/time src/unistd"
+
+BASE_SRCS=""; ARCH_SRCS=""
+for d in $SRC_TOP; do
+ [ -d "$d" ] || continue
+ for f in $d/*.c; do [ -f "$f" ] && BASE_SRCS="$BASE_SRCS $f"; done
+ for f in $d/$MUSL_ARCH/*.c $d/$MUSL_ARCH/*.s $d/$MUSL_ARCH/*.S; do
+ [ -f "$f" ] && ARCH_SRCS="$ARCH_SRCS $f"
+ done
+done
+REPLACED=""
+for a in $ARCH_SRCS; do
+ p=${a%.*}
+ head=${p%%/${MUSL_ARCH}/*}
+ tail=${p#*/${MUSL_ARCH}/}
+ REPLACED="$REPLACED $head/$tail"
+done
+KEEP=""
+for b in $BASE_SRCS; do
+ stem=${b%.c}; skip=0
+ for r in $REPLACED; do [ "$stem" = "$r" ] && { skip=1; break; }; done
+ [ $skip -eq 0 ] && KEEP="$KEEP $b"
+done
+KEEP="$KEEP $ARCH_SRCS"
+
+mkdir -p obj/lib
+n=0; n_ok=0; n_skip=0
+: >$OUT/skipped.txt
+for src in $KEEP; do
+ obj="obj/${src%.*}.o"
+ mkdir -p "$(dirname $obj)"
+ case "$src" in
+ *.c) flags="$CFLAGS_C" ;;
+ *.s | *.S) flags="$CFLAGS_ASM" ;;
+ *) flags="$CFLAGS_C" ;;
+ esac
+ if $TCC $flags -c "$src" -o "$obj" >/tmp/compile.log 2>&1; then
+ n_ok=$((n_ok+1))
+ else
+ n_skip=$((n_skip+1))
+ echo "$src" >>$OUT/skipped.txt
+ fi
+ n=$((n+1))
+ [ $((n % 200)) -eq 0 ] && echo " $n done (ok=$n_ok skip=$n_skip)"
+done
+echo " compiled=$n_ok skipped=$n_skip total=$n"
+CONTAINER
+
+sort -u "$STAGE/out/skipped.txt" > "$SKIP_OUT"
+echo "[calibrate $ARCH] wrote $SKIP_OUT ($(wc -l <"$SKIP_OUT") entries)"
diff --git a/scripts/boot5.sh b/scripts/boot5.sh
@@ -0,0 +1,294 @@
+#!/bin/sh
+## boot5.sh — build musl-1.2.5 with boot4 artifacts and link hello.
+##
+## Builds on top of boot4's verified-fixed-point tcc (tcc2 == tcc3) and
+## demonstrates that the same compiler can produce a working static libc
+## from upstream musl source — patched only as far as needed to work
+## around tcc's missing GCC extensions (register-asm-variable syscalls,
+## attribute(alias) weak refs, _Complex, x86_64 SSE/x87 inline asm).
+##
+## ─── Inputs ──────────────────────────────────────────────────────────
+## build/$ARCH/boot4/tcc3
+## — boot4's verified self-host tcc
+## build/$ARCH/boot4/libtcc1.a
+## — boot4's tcc runtime archive
+## vendor/upstream/musl-1.2.5.tar.gz
+## — pristine musl source
+## vendor/upstream/musl-1.2.5-overrides/
+## — tree of files that replace upstream
+## ones (tcc-compat patches; the post-
+## patch state vendored directly so the
+## build needs no `patch` binary). See
+## docs/MUSL.md.
+## vendor/upstream/musl-1.2.5-deletes.txt
+## — list of upstream files removed by the
+## same patch set (one path per line,
+## relative to musl-1.2.5/).
+## build/tcc/stdarg-bridge.h
+## — per-arch __builtin_va_list bridge,
+## generated by scripts/stage1-flatten.sh
+## (shared with boot3/boot4; one file,
+## three arches gated by #ifdef inside)
+##
+## ─── Outputs ─────────────────────────────────────────────────────────
+## build/$ARCH/boot5/libc.a
+## build/$ARCH/boot5/{crt1.o, crti.o, crtn.o}
+## build/$ARCH/boot5/hello — static, runs in the container
+##
+## Usage: scripts/boot5.sh <arch>
+## <arch> ∈ {amd64, aarch64, riscv64}
+## All three architectures are verified end-to-end.
+
+set -eu
+
+usage() { echo "usage: $0 <amd64|aarch64|riscv64>" >&2; exit 2; }
+[ "$#" -eq 1 ] || usage
+ARCH=$1
+
+case "$ARCH" in
+ amd64) PLATFORM=linux/amd64; MUSL_ARCH=x86_64 ;;
+ aarch64) PLATFORM=linux/arm64; MUSL_ARCH=aarch64 ;;
+ riscv64) PLATFORM=linux/riscv64; MUSL_ARCH=riscv64 ;;
+ *) usage ;;
+esac
+
+ROOT=$(cd "$(dirname "$0")/.." && pwd)
+cd "$ROOT"
+
+IMAGE=boot2-scratch:$ARCH
+BOOT4=build/$ARCH/boot4
+OUT=build/$ARCH/boot5
+STAGE=build/$ARCH/.boot5-stage
+MUSL_TARBALL=vendor/upstream/musl-1.2.5.tar.gz
+MUSL_OVERRIDES=vendor/upstream/musl-1.2.5-overrides
+MUSL_DELETES=vendor/upstream/musl-1.2.5-deletes.txt
+MUSL_GENERATED=vendor/upstream/musl-1.2.5-generated/$MUSL_ARCH
+MUSL_SKIP=vendor/upstream/musl-1.2.5-skip-$ARCH.txt
+BRIDGE_FILE=build/tcc/stdarg-bridge.h
+
+# ── prerequisites ─────────────────────────────────────────────────────
+[ -x "$BOOT4/tcc3" ] || { echo "[boot5 $ARCH] missing $BOOT4/tcc3 (run scripts/boot4.sh $ARCH)" >&2; exit 1; }
+[ -e "$BOOT4/libtcc1.a" ] || { echo "[boot5 $ARCH] missing $BOOT4/libtcc1.a (run scripts/boot4.sh $ARCH)" >&2; exit 1; }
+[ -e "$MUSL_TARBALL" ] || { echo "[boot5 $ARCH] missing $MUSL_TARBALL" >&2; exit 1; }
+[ -d "$MUSL_OVERRIDES" ] || { echo "[boot5 $ARCH] missing $MUSL_OVERRIDES" >&2; exit 1; }
+[ -e "$MUSL_DELETES" ] || { echo "[boot5 $ARCH] missing $MUSL_DELETES" >&2; exit 1; }
+[ -d "$MUSL_GENERATED" ] || { echo "[boot5 $ARCH] missing $MUSL_GENERATED (run scripts/musl-vendor.sh)" >&2; exit 1; }
+[ -e "$MUSL_SKIP" ] || { echo "[boot5 $ARCH] missing $MUSL_SKIP (run scripts/boot5-calibrate.sh $ARCH)" >&2; exit 1; }
+[ -e "$BRIDGE_FILE" ] || { echo "[boot5 $ARCH] missing $BRIDGE_FILE (run scripts/stage1-flatten.sh)" >&2; exit 1; }
+
+if ! podman image exists "$IMAGE"; then
+ echo "[boot5 $ARCH] building $IMAGE"
+ podman build --platform "$PLATFORM" -t "$IMAGE" \
+ -f scripts/Containerfile.scratch scripts/
+fi
+
+# ── stage inputs ──────────────────────────────────────────────────────
+# $STAGE/in/ — exactly what the container reads (bind-mounted /work/in)
+# $STAGE/_host/ — host-side scratch (enumeration outputs, intermediates);
+# not visible to the container
+# $STAGE/out/ — container writes here
+rm -rf "$STAGE"
+mkdir -p "$STAGE/in" "$STAGE/_host" "$STAGE/out" "$OUT"
+rm -f "$OUT/libtcc1.a"
+
+cp "$BOOT4/tcc3" "$STAGE/in/tcc"
+cp "$BOOT4/libtcc1.a" "$STAGE/in/libtcc1.a"
+# (No tcc-include/ stage — boot5 compiles musl, which provides its own
+# headers via -I./include / -Iarch/$MUSL_ARCH / -Iarch/generic / -Iobj/include.
+# tcc itself is invoked with -nostdinc so it never reads CONFIG_TCCDIR.)
+
+# Extract musl on the host, then apply overrides + deletes on the host
+# too — gives us a fully-prepared tree at $STAGE/in/musl-1.2.5/ that we
+# can enumerate to drive the (kaem-friendly) flat container script.
+# The container then just `cp -R`s the staged tree into tmpfs (its
+# bind-mounted /work/in is logically read-only).
+tar xzf "$MUSL_TARBALL" -C "$STAGE/in/"
+MUSL_DIR=$STAGE/in/musl-1.2.5
+cp -R "$MUSL_OVERRIDES/." "$MUSL_DIR/"
+while read -r p; do
+ [ -n "$p" ] && rm -rf "$MUSL_DIR/$p"
+done < "$MUSL_DELETES"
+
+cp "$BRIDGE_FILE" "$STAGE/in/tcc-stdarg-bridge.h"
+# Pre-generated alltypes.h + syscall.h for $MUSL_ARCH; replace the
+# in-container awk that ran mkalltypes.sed and the SYS_ rewrite. Source
+# of truth is scripts/musl-vendor.sh (regenerates these files).
+cp "$MUSL_GENERATED/alltypes.h" "$STAGE/in/musl-alltypes.h"
+cp "$MUSL_GENERATED/syscall.h" "$STAGE/in/musl-syscall.h"
+# version.h is pre-staged so the container body has no `>` redirection.
+echo '#define VERSION "1.2.5-tcc-boot5"' > "$STAGE/in/musl-version.h"
+
+cp scripts/boot-hello.c "$STAGE/in/hello.c"
+
+# ── enumerate musl sources on the host (kaem-friendly: no for/while/
+# case/${%}/${#}/$((..)) inside the container) ───────────────────────
+# Mirrors musl's Makefile rule: a per-arch override (under
+# $d/$MUSL_ARCH/) replaces the same-stem base file (under $d/). We
+# subtract the calibration skip list so the container script never
+# needs an `if $TCC ...; then ok else skip fi` branch.
+SRC_TOP="src/aio src/conf src/crypt src/ctype src/dirent
+ src/env src/errno src/exit src/fcntl src/fenv src/internal
+ src/ipc src/legacy src/linux src/locale src/malloc
+ src/malloc/mallocng src/math src/misc src/mman src/mq
+ src/multibyte src/network src/passwd src/prng src/process
+ src/regex src/sched src/search src/select src/setjmp src/signal
+ src/stat src/stdio src/stdlib src/string src/temp src/termios
+ src/thread src/time src/unistd"
+
+(
+ cd "$MUSL_DIR"
+ for d in $SRC_TOP; do
+ [ -d "$d" ] || continue
+ for f in $d/*.c; do [ -f "$f" ] && echo "$f"; done
+ done
+) > "$STAGE/_host/base.txt"
+
+(
+ cd "$MUSL_DIR"
+ for d in $SRC_TOP; do
+ [ -d "$d/$MUSL_ARCH" ] || continue
+ for f in $d/$MUSL_ARCH/*.c $d/$MUSL_ARCH/*.s $d/$MUSL_ARCH/*.S; do
+ [ -f "$f" ] && echo "$f"
+ done
+ done
+) > "$STAGE/_host/arch.txt"
+
+# REPLACED: bases that have arch-specific overrides (drop them from
+# BASE). KEEP = (BASE - REPLACED) ∪ ARCH, then minus calibration skips.
+awk -v ARCH="$MUSL_ARCH" '
+ {
+ sub(/\.[^.]*$/, "") # strip extension
+ slot = "/" ARCH "/"
+ i = index($0, slot)
+ head = substr($0, 1, i - 1)
+ tail = substr($0, i + length(slot))
+ print head "/" tail
+ }
+' "$STAGE/_host/arch.txt" | sort -u > "$STAGE/_host/replaced.txt"
+
+# Filter base by removing stems that appear in replaced.
+awk -v REPF="$STAGE/_host/replaced.txt" '
+ BEGIN { while ((getline l < REPF) > 0) rep[l] = 1 }
+ {
+ stem = $0
+ sub(/\.c$/, "", stem)
+ if (!(stem in rep)) print
+ }
+' "$STAGE/_host/base.txt" > "$STAGE/_host/keep_base.txt"
+
+cat "$STAGE/_host/keep_base.txt" "$STAGE/_host/arch.txt" | sort -u > "$STAGE/_host/keep.txt"
+
+# Subtract the calibration skip list. Lines without a / are bogus; the
+# skip file is one path per line, comments allowed via leading '#'.
+awk -v SKIPF="$MUSL_SKIP" '
+ BEGIN { while ((getline l < SKIPF) > 0) if (l !~ /^#/ && l != "") skip[l] = 1 }
+ { if (!($0 in skip)) print }
+' "$STAGE/_host/keep.txt" > "$STAGE/_host/build-srcs.txt"
+
+# Per-source-dir mkdir list (unique, for `mkdir -p obj/...`).
+awk '
+ {
+ sub(/\.[^.]*$/, "")
+ if (match($0, /\/[^\/]*$/)) print "obj/" substr($0, 1, RSTART - 1)
+ }
+' "$STAGE/_host/build-srcs.txt" | sort -u > "$STAGE/_host/build-objdirs.txt"
+
+n_src=$(wc -l < "$STAGE/_host/build-srcs.txt")
+n_skip=$(wc -l < "$MUSL_SKIP")
+echo "[boot5 $ARCH] keep=$n_src skip=$n_skip (calibrated)"
+
+# ── emit flat container build script ──────────────────────────────────
+# Generates a straight-line shell program: mkdir, cp, then one tcc
+# invocation per source, then ar, then link+run hello. No control flow
+# beyond sequential exec, no shell redirection, no `cd`; suitable for a
+# kaem-class shell. All paths are absolute.
+CWORK=/tmp/musl-1.2.5
+CFLAGS_BASE="-std=c99 -nostdinc -ffreestanding -fno-strict-aliasing -D_XOPEN_SOURCE=700 -I$CWORK/arch/$MUSL_ARCH -I$CWORK/arch/generic -I$CWORK/obj/src/internal -I$CWORK/src/include -I$CWORK/src/internal -I$CWORK/obj/include -I$CWORK/include -O2 -fomit-frame-pointer -Werror=implicit-function-declaration -Werror=implicit-int -Werror=pointer-sign -Werror=pointer-arith"
+CFLAGS_C="$CFLAGS_BASE -include /work/in/tcc-stdarg-bridge.h"
+CFLAGS_ASM="$CFLAGS_BASE"
+CRTFLAGS_C="$CFLAGS_C -fno-stack-protector -DCRT"
+CRTFLAGS_ASM="$CFLAGS_ASM -fno-stack-protector -DCRT"
+
+# Resolve the per-arch CRT branch on the host (eliminates the in-
+# container if/then/else).
+if [ -f "$MUSL_DIR/crt/$MUSL_ARCH/crti.s" ]; then
+ CRT_LINES_TXT=$(printf '%s\n' \
+ "/work/in/tcc $CRTFLAGS_ASM -c $CWORK/crt/$MUSL_ARCH/crti.s -o $CWORK/obj/crt/crti.o" \
+ "/work/in/tcc $CRTFLAGS_ASM -c $CWORK/crt/$MUSL_ARCH/crtn.s -o $CWORK/obj/crt/crtn.o")
+else
+ CRT_LINES_TXT=$(printf '%s\n' \
+ "/work/in/tcc $CRTFLAGS_C -c $CWORK/crt/crti.c -o $CWORK/obj/crt/crti.o" \
+ "/work/in/tcc $CRTFLAGS_C -c $CWORK/crt/crtn.c -o $CWORK/obj/crt/crtn.o")
+fi
+
+RUN_SCRIPT=$STAGE/in/run.sh
+{
+ echo '#!/bin/sh'
+ echo 'set -eu'
+ echo
+ echo '# stage A: working tree in tmpfs'
+ echo "cp -R /work/in/musl-1.2.5 $CWORK"
+ echo
+ echo '# stage B: pre-generated headers + version stamp'
+ echo "mkdir -p $CWORK/obj/include/bits $CWORK/obj/src/internal $CWORK/obj/lib $CWORK/obj/crt $CWORK/lib"
+ echo "cp /work/in/musl-alltypes.h $CWORK/obj/include/bits/alltypes.h"
+ echo "cp /work/in/musl-syscall.h $CWORK/obj/include/bits/syscall.h"
+ echo "cp /work/in/musl-version.h $CWORK/obj/src/internal/version.h"
+ echo
+ echo '# per-source obj directories'
+ while read -r d; do echo "mkdir -p $CWORK/$d"; done < "$STAGE/_host/build-objdirs.txt"
+ echo
+ echo "# stage C: compile sources ($n_src after calibration)"
+ awk -v CC=/work/in/tcc -v CF="$CFLAGS_C" -v AF="$CFLAGS_ASM" -v PFX="$CWORK/" '
+ {
+ src = $0
+ obj = "obj/" src
+ sub(/\.[^.]*$/, ".o", obj)
+ if (src ~ /\.c$/) flags = CF
+ else if (src ~ /\.[sS]$/) flags = AF
+ else flags = CF
+ print CC " " flags " -c " PFX src " -o " PFX obj
+ }
+ ' "$STAGE/_host/build-srcs.txt"
+ echo
+ echo '# stage D: CRT objects'
+ echo "/work/in/tcc $CRTFLAGS_C -fPIC -c $CWORK/crt/Scrt1.c -o $CWORK/obj/crt/Scrt1.o"
+ echo "/work/in/tcc $CRTFLAGS_C -c $CWORK/crt/crt1.c -o $CWORK/obj/crt/crt1.o"
+ echo "/work/in/tcc $CRTFLAGS_C -fPIC -c $CWORK/crt/rcrt1.c -o $CWORK/obj/crt/rcrt1.o"
+ printf '%s\n' "$CRT_LINES_TXT"
+ echo "cp $CWORK/obj/crt/Scrt1.o $CWORK/obj/crt/crt1.o $CWORK/obj/crt/rcrt1.o $CWORK/obj/crt/crti.o $CWORK/obj/crt/crtn.o $CWORK/lib/"
+ echo
+ echo '# stage E: archive libc.a'
+ printf '/work/in/tcc -ar rcs %s/lib/libc.a' "$CWORK"
+ awk -v PFX="$CWORK/" '{ obj = "obj/" $0; sub(/\.[^.]*$/, ".o", obj); printf " %s%s", PFX, obj }' "$STAGE/_host/build-srcs.txt"
+ echo
+ echo
+ echo '# publish artifacts to /work/out'
+ echo "cp $CWORK/lib/libc.a $CWORK/lib/crt1.o $CWORK/lib/crti.o $CWORK/lib/crtn.o /work/out/"
+ echo
+ echo '# stage F: link + run hello'
+ echo "/work/in/tcc -static -nostdinc -nostdlib -include /work/in/tcc-stdarg-bridge.h -I$CWORK/include -I$CWORK/arch/$MUSL_ARCH -I$CWORK/arch/generic -I$CWORK/obj/include $CWORK/lib/crt1.o /work/in/hello.c -L$CWORK/lib -lc -L/work/in -ltcc1 -L$CWORK/lib -lc -o /work/out/hello"
+ echo 'echo "--- run ---"'
+ echo '/work/out/hello a b c'
+} > "$RUN_SCRIPT"
+chmod +x "$RUN_SCRIPT"
+echo "[boot5 $ARCH] generated run.sh: $(wc -l <"$RUN_SCRIPT") lines, $(wc -c <"$RUN_SCRIPT") bytes"
+
+# ── run pipeline in scratch+busybox container ─────────────────────────
+# The container body is a single line: source the host-generated build
+# script. All control flow (loops, conditionals, parameter expansion)
+# was resolved on the host; the container shell sees only sequential
+# `tcc -c …` / `cp` / `mkdir` lines.
+echo "[boot5 $ARCH] boot4/libtcc1.a + musl libc.a + crt -> hello"
+podman run --rm -i --pull=never --platform "$PLATFORM" \
+ --tmpfs /tmp:size=1024M \
+ -v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
+ sh -eu /work/in/run.sh
+
+# ── copy outputs to final destination ────────────────────────────────
+for f in libc.a crt1.o crti.o crtn.o hello; do
+ cp "$STAGE/out/$f" "$OUT/$f"
+done
+
+echo "[boot5 $ARCH] sizes: libc.a=$(wc -c <"$OUT/libc.a") hello=$(wc -c <"$OUT/hello")"
+echo "[boot5 $ARCH] OK -> $OUT/{libc.a, crt1.o, crti.o, crtn.o, hello}"
diff --git a/scripts/musl-vendor.sh b/scripts/musl-vendor.sh
@@ -5,7 +5,7 @@
## NOT on the boot.sh path. This is a vendoring helper run on a dev host
## any time vendor/upstream/musl-1.2.5-tcc.patch changes; it requires
## the host's `patch` binary. The output it produces (the overrides
-## directory tree + the deletes list) is what boot4.sh consumes — boot4
+## directory tree + the deletes list) is what boot5.sh consumes — boot5
## itself never invokes `patch`.
##
## What the script does:
@@ -73,7 +73,7 @@ while read -r rel; do
fi
done
-# (4) per-arch sweep: aarch64 + riscv64 files boot4 can't compile yet.
+# (4) per-arch sweep: aarch64 + riscv64 files boot5 can't compile yet.
# Globs are expanded against the upstream tree, so every entry is a
# concrete file path (no glob in the deletes list itself).
for f in "$SRC"/src/math/aarch64/*.c; do
@@ -114,7 +114,7 @@ sort -u "$DELETES" -o "$DELETES"
# (6) pre-generate per-arch alltypes.h + syscall.h.
# These are deterministic given the upstream tree + chosen arch (musl's
# Makefile runs the same two transformations at build time). Vendoring
-# them lets the boot4 container drop awk entirely — it just `cp`s the
+# them lets the boot5 container drop awk entirely — it just `cp`s the
# right file in. mkalltypes runs on the post-overrides tree (overrides
# don't touch alltypes.h.in, but applying them keeps the procedure
# coherent). Apply overrides + deletes to a fresh post-patch copy and
@@ -154,7 +154,7 @@ for MARCH in aarch64 x86_64 riscv64; do
# int :8*(sizeof(time_t)-sizeof(long))*(__BYTE_ORDER!=4321);
# };
#
- # On all three boot4 arches sizeof(time_t)==sizeof(long)==8, so
+ # On all three boot5 arches sizeof(time_t)==sizeof(long)==8, so
# both bitfields are 0 width and the layout is identical to the
# simple two-field form. The leading bitfield does not warn (a
# sibling follows); the trailing one does, 387 times per build.
diff --git a/scripts/stage1-flatten.sh b/scripts/stage1-flatten.sh
@@ -212,7 +212,7 @@ apply_our_patch asm-hash-bol-only "$SRC/tccpp.c"
# tail comments (`mov %rax,%rsi # size, a second parm…`) that the
# x86_64 assembler now rejects with "end of line expected". They are
# inert documentation; strip them. The file is only compiled when
-# building the amd64 libtcc1.a (LIBTCC1_ASM_SRCS in boot3.sh), so this
+# building the amd64 libtcc1.a (LIBTCC1_ASM_SRCS in boot4.sh), so this
# rewrite is a no-op on aarch64/riscv64 builds.
awk '{ sub(/\t#.*$/, ""); print }' "$SRC/lib/alloca86_64-bt.S" \
> "$SRC/lib/alloca86_64-bt.S.tmp"
@@ -273,8 +273,8 @@ FLAT=$WORK/tcc.flat.c
"$SRC/tcc.c" > "$FLAT.body"
# Publish the post-patch tcc <stdarg.h> as a shared bridge file.
-# libc-flatten.sh prepends the same bridge to libc.flat.c, so the boot3
-# container compile no longer needs `-I /work/in/tcc-include
+# libc-flatten.sh prepends the same bridge to libc.flat.c, so the
+# boot3/boot4 container compiles no longer need `-I /work/in/tcc-include
# -include /work/in/tcc-include/stdarg.h`. The patched stdarg.h is
# byte-identical across X86_64 / ARM64 / RISCV64 (per-arch logic lives
# inside its #ifdefs), so a cross-arch shared path is fine — whichever