commit 3a02008437033476d122af2f3ec92dcd6c61235e parent 15391ae704f949821f6f8e7f7088a106e12b18d0 Author: Ryan Sepassi <rsepassi@gmail.com> Date: Wed, 6 May 2026 15:10:24 -0700 reorg PR1: move scripts/ into boot|bootprep|tcc|tests|tools, flatten vendor/upstream Pure relocation + path rewrites — no behavioral change. Per REORG.md: - scripts/boot{0..6,.sh,-run-scheme1.sh}, lib-*.sh, Containerfile.{empty,scratch} -> boot/ (+ boot/containers/) - scripts/{prep-src,prep-musl,musl-vendor,boot5-calibrate, libc-flatten,stage1-flatten,prune-p1-table}.sh, mkalltypes.awk -> bootprep/ - vendor/boot2-include -> bootprep/include - tcc-{cc,gcc,libc}/ -> tcc/{cc,gcc,libc}/; scripts/{build-tcc-gcc,run-gcc-libc-flat-tcc}.sh -> tcc/scripts/ - scripts/{lint,count-lines,disasm-elf,diag-livebootstrap-qemu}.sh, m1-symbols.py -> tools/ - scripts/Containerfile.{busybox,alpine-gcc} -> tests/containers/; boot-run-scheme1.sh -> tests/ (only tests/run-suite.sh invokes it) - vendor/upstream/musl-1.2.5-* -> vendor/musl/ (overrides, deletes, skip-*, generated, patches/tcc.patch); musl-1.2.5.tar.gz -> vendor/musl/1.2.5.tar.gz; tcc-0.9.26.tar.gz -> vendor/tcc/ - scripts/simple-patches/tcc-0.9.26{,-lb} -> vendor/tcc/patches{,-lb} - build/<arch>/tcc-{cc,gcc,libc} -> build/<arch>/tcc/{cc,gcc,libc} Deleted: confirmed orphans (boot-build-tcc-tcc.sh, mk-seed-tools.sh) and the unwired live-bootstrap-mirror experiment (stage2-alpine.sh, stage3-rebuild.sh, boot-undef.sh) plus the matching docs/TCC.md sections and docs/LIBC.md instruction line. PR2 follow-up will move boot{4,5,6}-gen-runscm.sh + the static fixtures (boot3-run.scm, boot-hello.c) out of scripts/ and split the runtime/prep boundary properly. Diffstat:
229 files changed, 3518 insertions(+), 4490 deletions(-)
diff --git a/Makefile b/Makefile @@ -1,6 +1,6 @@ # boot2 — Make-driven bootstrap pipeline. # -# The bootN.sh scripts under scripts/ are the canonical builders. This +# The bootN.sh scripts under boot/ are the canonical builders. This # Makefile is dependency tracking: every target is a real output path, # and the rule body invokes the right bootN.sh (or prep-src.sh / # prep-musl.sh) script with the appropriate ARCH and DRIVER. @@ -41,7 +41,7 @@ ifeq ($(filter $(DRIVER),$(ALL_DRIVERS)),) $(error DRIVER '$(DRIVER)' not supported — use one of $(ALL_DRIVERS)) endif -# Per-arch metadata mirrored from scripts/lib-arch.sh. +# Per-arch metadata mirrored from boot/lib-arch.sh. PLATFORM_aarch64 := linux/arm64 PLATFORM_amd64 := linux/amd64 PLATFORM_riscv64 := linux/riscv64 @@ -95,19 +95,19 @@ clean: # canonical-tree build. PREP_SRC_COMMON_SRCS := \ - scripts/prep-src.sh scripts/lib-arch.sh \ - scripts/stage1-flatten.sh scripts/libc-flatten.sh \ + bootprep/prep-src.sh boot/lib-arch.sh \ + bootprep/stage1-flatten.sh bootprep/libc-flatten.sh \ M1pp/M1pp.P1 hex2pp/hex2pp.P1 \ P1/P1.M1pp P1/P1pp.P1pp \ P1/entry-libc.P1pp P1/entry-plain.P1pp P1/elf-end.P1pp \ catm/catm.P1pp \ scheme1/scheme1.P1pp scheme1/prelude.scm \ cc/cc.scm cc/main.scm \ - tcc-cc/mem.c \ + tcc/cc/mem.c \ scripts/boot-hello.c \ seed-kernel/kernel.c \ - vendor/upstream/musl-1.2.5.tar.gz \ - vendor/upstream/musl-1.2.5-deletes.txt + vendor/musl/1.2.5.tar.gz \ + vendor/musl/deletes.txt prep_src_arch_srcs = \ vendor/seed/$1/hex0-seed \ @@ -115,17 +115,17 @@ prep_src_arch_srcs = \ vendor/seed/$1/hex0.hex0 vendor/seed/$1/hex1.hex0 vendor/seed/$1/hex2.hex1 \ vendor/seed/$1/catm.hex2 vendor/seed/$1/M0.hex2 \ P1/P1-$1.M1 P1/P1-$1.M1pp \ - tcc-libc/$1/start.S tcc-libc/$1/sys_stubs.S \ + tcc/libc/$1/start.S tcc/libc/$1/sys_stubs.S \ $(wildcard seed-kernel/arch/$1/*) \ $(wildcard seed-kernel/user/*) \ - vendor/upstream/musl-1.2.5-generated/$(MUSL_ARCH_$1)/alltypes.h \ - vendor/upstream/musl-1.2.5-generated/$(MUSL_ARCH_$1)/syscall.h \ - $(wildcard vendor/upstream/musl-1.2.5-skip-$1.txt) \ - $(shell find vendor/upstream/musl-1.2.5-overrides -type f 2>/dev/null) \ + vendor/musl/generated/$(MUSL_ARCH_$1)/alltypes.h \ + vendor/musl/generated/$(MUSL_ARCH_$1)/syscall.h \ + $(wildcard vendor/musl/skip-$1.txt) \ + $(shell find vendor/musl/overrides -type f 2>/dev/null) \ $(shell find vendor/mes-libc -type f \( -name '*.c' -o -name '*.h' \) 2>/dev/null) \ $(wildcard vendor/mes-libc/patches/*.before) \ $(wildcard vendor/mes-libc/patches/*.after) \ - $(wildcard vendor/upstream/tcc-0.9.26.tar.gz) + $(wildcard vendor/tcc/0.9.26.tar.gz) # DRIVER=seed bootN stages run scheme1 under QEMU using the podman-built # boot6 kernel. Add that as a make dep so a clean-tree seed build pulls @@ -135,17 +135,17 @@ seed_kernel_dep = $(if $(filter seed,$2),build/$1/podman/boot6/$(KERNEL_NAME_$1) # Per-arch prep-src + prep-musl rules. Driver-independent. define PREP_RULES build/$1/src/.stamp: $$(PREP_SRC_COMMON_SRCS) $$(call prep_src_arch_srcs,$1) - scripts/prep-src.sh $1 + bootprep/prep-src.sh $1 @touch $$@ # Filtered musl tree. Depends on the canonical tree from prep-src and # (when present) the committed per-arch skip list. If the skip list is -# missing, prep-musl.sh runs scripts/boot5-calibrate.sh, which drives +# missing, prep-musl.sh runs bootprep/boot5-calibrate.sh, which drives # its own boot4 build outside this make graph — keep make's deps simple. build/$1/src/musl/.stamp: build/$1/src/.stamp \ - scripts/prep-musl.sh scripts/lib-arch.sh \ - $$(wildcard vendor/upstream/musl-1.2.5-skip-$1.txt) - scripts/prep-musl.sh $1 + bootprep/prep-musl.sh boot/lib-arch.sh \ + $$(wildcard vendor/musl/skip-$1.txt) + bootprep/prep-musl.sh $1 @mkdir -p $$(@D) && touch $$@ endef @@ -164,9 +164,9 @@ define BOOT_CHAIN_RULES # boot0: hex0-seed -> hex2 / M0 / catm build/$1/$2/boot0/.stamp: \ build/$1/src/.stamp \ - scripts/boot0.sh scripts/lib-arch.sh scripts/lib-pipeline.sh \ + boot/boot0.sh boot/lib-arch.sh boot/lib-pipeline.sh \ $$(call seed_kernel_dep,$1,$2) - DRIVER=$2 scripts/boot0.sh $1 + DRIVER=$2 boot/boot0.sh $1 @touch $$@ build/$1/$2/boot0/hex2 build/$1/$2/boot0/M0 build/$1/$2/boot0/catm: \ @@ -175,9 +175,9 @@ build/$1/$2/boot0/hex2 build/$1/$2/boot0/M0 build/$1/$2/boot0/catm: \ # boot1: M1pp.P1 + hex2pp.P1 -> M1pp + hex2pp build/$1/$2/boot1/.stamp: \ build/$1/$2/boot0/.stamp \ - scripts/boot1.sh scripts/lib-arch.sh scripts/lib-pipeline.sh \ + boot/boot1.sh boot/lib-arch.sh boot/lib-pipeline.sh \ $$(call seed_kernel_dep,$1,$2) - DRIVER=$2 scripts/boot1.sh $1 + DRIVER=$2 boot/boot1.sh $1 @touch $$@ build/$1/$2/boot1/M1pp build/$1/$2/boot1/hex2pp: build/$1/$2/boot1/.stamp ; @@ -185,9 +185,9 @@ build/$1/$2/boot1/M1pp build/$1/$2/boot1/hex2pp: build/$1/$2/boot1/.stamp ; # boot2: catm.P1pp + scheme1.P1pp -> catm + scheme1 build/$1/$2/boot2/.stamp: \ build/$1/$2/boot1/.stamp build/$1/$2/boot0/.stamp \ - scripts/boot2.sh scripts/lib-arch.sh scripts/lib-pipeline.sh \ + boot/boot2.sh boot/lib-arch.sh boot/lib-pipeline.sh \ $$(call seed_kernel_dep,$1,$2) - DRIVER=$2 scripts/boot2.sh $1 + DRIVER=$2 boot/boot2.sh $1 @touch $$@ build/$1/$2/boot2/catm build/$1/$2/boot2/scheme1: build/$1/$2/boot2/.stamp ; @@ -195,10 +195,10 @@ build/$1/$2/boot2/catm build/$1/$2/boot2/scheme1: build/$1/$2/boot2/.stamp ; # boot3: cc.scm-built bootstrap tcc (tcc0) build/$1/$2/boot3/.stamp: \ build/$1/$2/boot2/.stamp build/$1/$2/boot1/.stamp \ - scripts/boot3.sh scripts/lib-arch.sh scripts/lib-runscm.sh \ + boot/boot3.sh boot/lib-arch.sh boot/lib-runscm.sh \ scripts/boot3-run.scm \ $$(call seed_kernel_dep,$1,$2) - DRIVER=$2 scripts/boot3.sh $1 + DRIVER=$2 boot/boot3.sh $1 @touch $$@ build/$1/$2/boot3/tcc0 build/$1/$2/boot3/libc.P1pp \ @@ -207,10 +207,10 @@ build/$1/$2/boot3/tcc.flat.P1pp: build/$1/$2/boot3/.stamp ; # boot4: tcc0 -> tcc1 -> tcc2 -> tcc3 self-host chain (+ libc.a, libtcc1.a, hello) build/$1/$2/boot4/.stamp: \ build/$1/$2/boot3/.stamp build/$1/$2/boot2/.stamp \ - scripts/boot4.sh scripts/boot4-gen-runscm.sh \ - scripts/lib-arch.sh scripts/lib-runscm.sh \ + boot/boot4.sh scripts/boot4-gen-runscm.sh \ + boot/lib-arch.sh boot/lib-runscm.sh \ $$(call seed_kernel_dep,$1,$2) - DRIVER=$2 scripts/boot4.sh $1 + DRIVER=$2 boot/boot4.sh $1 @touch $$@ build/$1/$2/boot4/tcc1 build/$1/$2/boot4/tcc2 \ @@ -222,10 +222,10 @@ build/$1/$2/boot4/libtcc1.a: build/$1/$2/boot4/.stamp ; build/$1/$2/boot5/.stamp: \ build/$1/$2/boot4/.stamp build/$1/$2/boot2/.stamp \ build/$1/src/musl/.stamp \ - scripts/boot5.sh scripts/boot5-gen-runscm.sh \ - scripts/lib-arch.sh scripts/lib-runscm.sh \ + boot/boot5.sh scripts/boot5-gen-runscm.sh \ + boot/lib-arch.sh boot/lib-runscm.sh \ $$(call seed_kernel_dep,$1,$2) - DRIVER=$2 scripts/boot5.sh $1 + DRIVER=$2 boot/boot5.sh $1 @touch $$@ build/$1/$2/boot5/libc.a build/$1/$2/boot5/crt1.o \ @@ -235,10 +235,10 @@ build/$1/$2/boot5/hello: build/$1/$2/boot5/.stamp ; # boot6: seed-kernel ELF/Image, built with boot4's tcc3 build/$1/$2/boot6/.stamp: \ build/$1/$2/boot4/.stamp build/$1/$2/boot2/.stamp \ - scripts/boot6.sh scripts/boot6-gen-runscm.sh \ - scripts/lib-arch.sh scripts/lib-runscm.sh \ + boot/boot6.sh scripts/boot6-gen-runscm.sh \ + boot/lib-arch.sh boot/lib-runscm.sh \ $$(call seed_kernel_dep,$1,$2) - DRIVER=$2 scripts/boot6.sh $1 + DRIVER=$2 boot/boot6.sh $1 @touch $$@ build/$1/$2/boot6/$$(KERNEL_NAME_$1): build/$1/$2/boot6/.stamp ; @@ -284,7 +284,7 @@ CLOC_FILES := \ cc/cc.scm cloc: - @sh scripts/count-lines.sh $(CLOC_FILES) + @sh tools/count-lines.sh $(CLOC_FILES) # ── Test infrastructure (suites + their build deps) ────────────────────── # tests/Makefile owns the test-build rules and the `test` / `image` / diff --git a/README.md b/README.md @@ -57,11 +57,7 @@ ## Architectures × drivers -| | DRIVER=podman | DRIVER=seed | -|---------|------------------------|--------------------------------------------| -| aarch64 | boot0 → boot6 complete | boot0 → boot6 complete | -| amd64 | boot0 → boot6 complete | boot0 → boot2 complete; boot3+ unvalidated | -| riscv64 | boot0 → boot6 complete | boot0 → boot6 complete | +`DRIVER={podman,seed} x {aarch64,amd64,riscv64}` `DRIVER` selects the runtime that executes each bootN stage: @@ -82,9 +78,9 @@ they coexist. End-to-end via the driver script: ```sh -./scripts/boot.sh aarch64 # default DRIVER=podman -DRIVER=seed ./scripts/boot.sh aarch64 # re-run on the tcc-built kernel -./scripts/boot.sh --help # env vars (DRIVER, BOOT*_TIMEOUT, …) +./boot/boot.sh aarch64 # default DRIVER=podman +DRIVER=seed ./boot/boot.sh aarch64 # re-run on the tcc-built kernel +./boot/boot.sh --help # env vars (DRIVER, BOOT*_TIMEOUT, …) ``` Or via path-based Make targets — outputs are the targets, so deps walk diff --git a/boot/boot.sh b/boot/boot.sh @@ -0,0 +1,90 @@ +#!/bin/sh +## boot.sh — drive boot0 → boot6 end-to-end under one driver. +## +## Usage: boot/boot.sh <arch> +## DRIVER=seed boot/boot.sh <amd64|aarch64|riscv64> +## DRIVER=podman boot/boot.sh <amd64|aarch64|riscv64> +## +## DRIVER (default podman) is exported and consumed by each bootN.sh. +## Outputs land at build/$ARCH/$DRIVER/bootN/, so the two driver trees +## coexist on disk. DRIVER=seed runs the build pipeline on top of the +## podman-built boot6 kernel at build/$ARCH/podman/boot6/{Image,kernel.elf}; +## first-time setup therefore requires one prior podman pass: +## ./boot/boot.sh <arch> # default DRIVER=podman +## DRIVER=seed ./boot/boot.sh <arch> # re-run on tcc-built kernel +## Subsequent DRIVER=seed runs reuse the Image directly — no stashing. + +set -eu + +case "${1:-}" in + -h|--help) + cat <<'EOF' +boot.sh — drive boot0 → boot6 end-to-end under one driver. + +Usage: + boot/boot.sh <aarch64|amd64|riscv64> + +Environment variables (all optional): + DRIVER podman (default) | seed. + podman: containerised builds. + seed: builds run inside the tcc-built + seed kernel under qemu (requires + one prior DRIVER=podman pass to + mint build/$ARCH/podman/boot6/). + BOOT3_TIMEOUT (default 1800) boot3 (scheme1) wall-clock seconds. + BOOT4_TIMEOUT (default 5400) boot4 (tcc1/2/3) wall-clock seconds. + BOOT5_TIMEOUT (default 7200) boot5 (musl) wall-clock seconds. + BOOT6_TIMEOUT (default 1200) boot6 (kernel) wall-clock seconds. + QEMU_MEM (default 3072M) guest RAM passed to the seed-driver qemu. + TCC_BOOTSTRAP_RELAX_FIXEDPOINT set to 1 in boot4 to accept tcc2 != tcc3. + After a codegen-altering tcc patch the + two-stage rule needs a third bounce to + converge; the next boot4 run, started + from this run's tcc3, will reach + tcc2 == tcc3 with no extra knob. +EOF + exit 0 + ;; +esac + +. boot/lib-arch.sh +bootlib_init boot "${1:-}" + +if [ "$DRIVER" = seed ]; then + KERNEL=build/$ARCH/podman/boot6/$KERNEL_NAME + if [ ! -f "$KERNEL" ]; then + echo "[$BOOT_TAG] missing $KERNEL" >&2 + echo "[$BOOT_TAG] run './boot/boot.sh $ARCH' first (default DRIVER=podman) to produce it" >&2 + exit 1 + fi +fi + +# Wipe only this driver's tree so the other driver's outputs survive +# (the seed driver consumes build/$ARCH/podman/boot6/$KERNEL_NAME). +rm -rf build/$ARCH/$DRIVER + +# Per-stage timing comes from each child's own bootlib EXIT trap +# (`[bootN/$DRIVER/$ARCH] done in Xs (cum Ys)`); this orchestrator only +# adds its own total at the end (also via the lib trap). + +# A0a: build the canonical generated source tree at build/$ARCH/src/. +# Boot stages read source from there exclusively (no flatten/unpack/ +# patch inside boot{N}.sh). +./bootprep/prep-src.sh $ARCH + +./boot/boot0.sh $ARCH +./boot/boot1.sh $ARCH +./boot/boot2.sh $ARCH +./boot/boot3.sh $ARCH +./boot/boot4.sh $ARCH + +# A0b: apply the per-arch musl skip filter (needs tcc3 from boot4 if +# the calibration list is missing; the committed list is the common +# case and runs without compiler). +./bootprep/prep-musl.sh $ARCH + +./boot/boot5.sh $ARCH + +# boot6 builds the seed-kernel ELF/Image with boot4's tcc3 (no `ld -T`, +# no objcopy). +./boot/boot6.sh $ARCH diff --git a/boot/boot0.sh b/boot/boot0.sh @@ -0,0 +1,48 @@ +#!/bin/sh +## boot0.sh — seed bootstrap: hex0-seed → hex0 → hex1 → hex2 → catm → M0. +## +## Stage 0 of the README's chain. From the ~400-byte vendored hex0-seed, +## brings up the three binaries every later stage depends on (hex2, catm, +## M0). +## +## ─── Inputs (sources, from canonical tree) ─────────────────────────── +## build/$ARCH/src/bin/hex0-seed +## build/$ARCH/src/src/vendor-seed/{hex0.hex0, hex1.hex0, hex2.hex1, +## catm.hex2, M0.hex2, ELF.hex2} +## +## ─── Outputs ────────────────────────────────────────────────────────── +## build/$ARCH/$DRIVER/boot0/{hex2, catm, M0} +## +## Usage: boot/boot0.sh <arch> +## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). + +set -eu + +. boot/lib-arch.sh +bootlib_init boot0 "${1:-}" +driver_init scratch +require_src + +. boot/lib-pipeline.sh +pipeline_init "$STAGE" "$OUT" "$DRIVER" + +# ─── inputs (from canonical src tree) ───────────────────────────────── +pipeline_input hex0-seed "build/$ARCH/src/bin/hex0-seed" +for f in hex0.hex0 hex1.hex0 hex2.hex1 catm.hex2 M0.hex2 ELF.hex2; do + pipeline_input_from_src "vendor-seed/$f" +done + +# ─── pipeline ───────────────────────────────────────────────────────── +echo "[$BOOT_TAG] hex0-seed -> hex0 -> hex1 -> hex2 -> catm -> M0" + +stage hex0-seed hex0.hex0 hex0 -- hex0.hex0 -- hex0 +stage hex0 hex1.hex0 hex1 -- hex1.hex0 -- hex1 +stage hex1 hex2.hex1 hex2 -- hex2.hex1 -- hex2 +stage hex2 catm.hex2 catm -- catm.hex2 -- catm +stage catm M0.combined.hex2 ELF.hex2 M0.hex2 -- ELF.hex2 M0.hex2 -- M0.combined.hex2 +stage hex2 M0.combined.hex2 M0 -- M0.combined.hex2 -- M0 + +pipeline_export hex2 catm M0 +pipeline_run + +echo "[$BOOT_TAG] OK -> $OUT/{hex2, catm, M0}" diff --git a/boot/boot1.sh b/boot/boot1.sh @@ -0,0 +1,65 @@ +#!/bin/sh +## boot1.sh — build the self-hosted M1pp + hex2pp pair. +## +## Stage 1 of the README's chain: produces M1pp and hex2pp from their +## .P1 sources via the seed M0 + hex2 chain. catm is rebuilt later in +## boot2 from catm.P1pp. +## +## ─── Inputs (sources, from canonical tree) ─────────────────────────── +## build/$ARCH/src/src/M1pp/M1pp.P1 +## build/$ARCH/src/src/hex2pp/hex2pp.P1 +## build/$ARCH/src/src/P1/P1-$ARCH.M1 +## build/$ARCH/src/src/vendor-seed/ELF.hex2 +## +## ─── Inputs (binaries from prior stages) ────────────────────────────── +## build/$ARCH/$DRIVER/boot0/{hex2, M0, catm} +## +## ─── Outputs ────────────────────────────────────────────────────────── +## build/$ARCH/$DRIVER/boot1/{M1pp, hex2pp} +## +## Usage: boot/boot1.sh <arch> +## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). + +set -eu + +. boot/lib-arch.sh +bootlib_init boot1 "${1:-}" +driver_init scratch +require_src + +BOOT0=build/$ARCH/$DRIVER/boot0 +require_prev "$BOOT0" hex2 M0 catm + +. boot/lib-pipeline.sh +pipeline_init "$STAGE" "$OUT" "$DRIVER" + +# ─── inputs ─────────────────────────────────────────────────────────── +pipeline_input hex2 "$BOOT0/hex2" +pipeline_input M0 "$BOOT0/M0" +pipeline_input catm "$BOOT0/catm" +pipeline_input_from_src "P1/P1-$ARCH.M1" P1.M1 +pipeline_input_from_src vendor-seed/ELF.hex2 +pipeline_input_from_src M1pp/M1pp.P1 +pipeline_input_from_src hex2pp/hex2pp.P1 + +# ─── pipeline ───────────────────────────────────────────────────────── +# .P1 -> ELF, applied to each of M1pp.P1 and hex2pp.P1: +# catm P1.M1 + <src> -> combined.M1 +# M0 combined.M1 -> prog.hex2 +# catm ELF.hex2 + prog.hex2 -> linked.hex2 +# hex2 linked.hex2 -> ELF binary +build_p1() { # $1 = source .P1, $2 = output binary name + stage catm combined.M1 P1.M1 "$1" -- P1.M1 "$1" -- combined.M1 + stage M0 combined.M1 prog.hex2 -- combined.M1 -- prog.hex2 + stage catm linked.hex2 ELF.hex2 prog.hex2 -- ELF.hex2 prog.hex2 -- linked.hex2 + stage hex2 linked.hex2 "$2" -- linked.hex2 -- "$2" +} + +echo "[$BOOT_TAG] M1pp.P1 + hex2pp.P1 -> M1pp + hex2pp" +build_p1 M1pp.P1 M1pp +build_p1 hex2pp.P1 hex2pp + +pipeline_export M1pp hex2pp +pipeline_run + +echo "[$BOOT_TAG] OK -> $OUT/{M1pp, hex2pp}" diff --git a/boot/boot2.sh b/boot/boot2.sh @@ -0,0 +1,73 @@ +#!/bin/sh +## boot2.sh — rebuild catm via M1pp+hex2pp, then build scheme1. +## +## Stage 2 of the README's chain. First rebuilds catm from catm.P1pp via +## the freshly-built M1pp+hex2pp pipeline (replacing the seed boot0 catm +## so later stages have zero boot0 dependencies); then builds the +## scheme1 interpreter from scheme1.P1pp using the new catm. +## +## ─── Inputs (sources, from canonical tree) ─────────────────────────── +## build/$ARCH/src/src/catm/catm.P1pp +## build/$ARCH/src/src/scheme1/scheme1.P1pp +## build/$ARCH/src/src/P1/{P1-$ARCH.M1pp, P1.M1pp, P1pp.P1pp} +## build/$ARCH/src/src/vendor-seed/ELF.hex2 +## +## ─── Inputs (binaries from prior stages) ────────────────────────────── +## build/$ARCH/$DRIVER/boot0/catm (only to bootstrap catm.P1pp build) +## build/$ARCH/$DRIVER/boot1/{M1pp, hex2pp} +## +## ─── Outputs ────────────────────────────────────────────────────────── +## build/$ARCH/$DRIVER/boot2/{catm, scheme1} +## +## Usage: boot/boot2.sh <arch> +## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). + +set -eu + +. boot/lib-arch.sh +bootlib_init boot2 "${1:-}" +driver_init scratch +require_src + +BOOT0=build/$ARCH/$DRIVER/boot0 +BOOT1=build/$ARCH/$DRIVER/boot1 +require_prev "$BOOT0" catm +require_prev "$BOOT1" M1pp hex2pp + +. boot/lib-pipeline.sh +pipeline_init "$STAGE" "$OUT" "$DRIVER" + +# ─── inputs ─────────────────────────────────────────────────────────── +pipeline_input catm0 "$BOOT0/catm" # bootstrap; replaced by output 'catm' +pipeline_input M1pp "$BOOT1/M1pp" +pipeline_input hex2pp "$BOOT1/hex2pp" +pipeline_input_from_src "P1/P1-$ARCH.M1pp" backend.M1pp +pipeline_input_from_src P1/P1.M1pp frontend.M1pp +pipeline_input_from_src P1/P1pp.P1pp libp1pp.P1pp +pipeline_input_from_src vendor-seed/ELF.hex2 +pipeline_input_from_src catm/catm.P1pp +pipeline_input_from_src scheme1/scheme1.P1pp + +# ─── pipeline ───────────────────────────────────────────────────────── +# .P1pp -> ELF, applied to each of catm.P1pp and scheme1.P1pp: +# catm backend + frontend + libp1pp + <src> -> combined.M1pp +# M1pp combined.M1pp -> expanded.hex2pp +# catm ELF.hex2 + expanded.hex2pp -> linked.hex2pp +# hex2pp -B 0x600000 linked.hex2pp -> ELF binary +build_p1pp() { # $1 = catm-bin name (catm0 or catm), $2 = src .P1pp, $3 = out + _catm=$1; _src=$2; _out=$3 + stage "$_catm" combined.M1pp backend.M1pp frontend.M1pp libp1pp.P1pp "$_src" \ + -- backend.M1pp frontend.M1pp libp1pp.P1pp "$_src" -- combined.M1pp + stage M1pp combined.M1pp expanded.hex2pp -- combined.M1pp -- expanded.hex2pp + stage "$_catm" linked.hex2pp ELF.hex2 expanded.hex2pp -- ELF.hex2 expanded.hex2pp -- linked.hex2pp + stage hex2pp -B 0x600000 linked.hex2pp "$_out" -- linked.hex2pp -- "$_out" +} + +echo "[$BOOT_TAG] catm.P1pp -> catm; scheme1.P1pp -> scheme1" +build_p1pp catm0 catm.P1pp catm # bootstrap with boot0 catm +build_p1pp catm scheme1.P1pp scheme1 # uses just-built catm + +pipeline_export catm scheme1 +pipeline_run + +echo "[$BOOT_TAG] OK -> $OUT/{catm, scheme1}" diff --git a/boot/boot3.sh b/boot/boot3.sh @@ -0,0 +1,88 @@ +#!/bin/sh +## boot3.sh — bootstrap tcc0 from cc.scm. +## +## Stage A of the four-stage tcc chain: cc.scm compiles tcc.flat.c into +## tcc0. boot4 picks up tcc0 and self-hosts the rest of the chain +## (tcc0 → tcc1 → tcc2 → tcc3, with tcc2 == tcc3 as the fixed-point +## check). +## +## tcc0 = tcc-source compiled by cc.scm ← produced here +## tcc1 = tcc-source compiled by tcc0 ← boot4 +## tcc2 = tcc-source compiled by tcc1 ← boot4 +## tcc3 = tcc-source compiled by tcc2 ← boot4 +## +## ─── Inputs (sources, from canonical tree) ─────────────────────────── +## build/$ARCH/src/src/scheme1/prelude.scm scheme bundle +## build/$ARCH/src/src/cc/{cc.scm, main.scm} scheme bundle +## build/$ARCH/src/src/P1/{P1-$ARCH.M1pp, P1.M1pp, P1pp.P1pp} M1pp pipeline +## build/$ARCH/src/src/P1/{entry-libc.P1pp, elf-end.P1pp} link framing +## build/$ARCH/src/src/vendor-seed/ELF.hex2 ELF header +## build/$ARCH/src/src/tcc/tcc.flat.c flattened tcc TU +## build/$ARCH/src/src/libc/libc.flat.c flattened mes-libc TU +## +## ─── Inputs (binaries from prior stages) ────────────────────────────── +## build/$ARCH/$DRIVER/boot1/{M1pp, hex2pp} +## build/$ARCH/$DRIVER/boot2/{catm, scheme1} +## +## ─── Tools ──────────────────────────────────────────────────────────── +## scheme1 evaluates scripts/boot3-run.scm against the flat staging +## root. Same run.scm drives both DRIVER=podman (cwd=/work) and +## DRIVER=seed (cwd=/). Stage A is pure scheme1 + M1pp + hex2pp; no +## asm step. +## +## ─── Outputs ────────────────────────────────────────────────────────── +## build/$ARCH/$DRIVER/boot3/tcc0 — cc.scm-built bootstrap tcc +## build/$ARCH/$DRIVER/boot3/libc.P1pp — cc.scm-built mes-libc (lib mode); +## consumed by the cc-libc test suite +## build/$ARCH/$DRIVER/boot3/tcc.flat.P1pp — cc.scm-built tcc TU (lib mode); +## debug/inspection artifact +## +## Usage: boot/boot3.sh <arch> +## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). + +set -eu + +. boot/lib-arch.sh +bootlib_init boot3 "${1:-}" +driver_init empty +require_src + +BOOT1=build/$ARCH/$DRIVER/boot1 +BOOT2=build/$ARCH/$DRIVER/boot2 +SRC=build/$ARCH/src + +require_prev "$BOOT1" M1pp hex2pp +require_prev "$BOOT2" catm scheme1 + +# ── stage inputs and run scheme1 + boot3-run.scm under $DRIVER ──────── +. boot/lib-runscm.sh +runscm_init "$STAGE" "$OUT" +runscm_scheme1 "$BOOT2/scheme1" +runscm_prelude "$SRC/src/scheme1/prelude.scm" +runscm_runscm scripts/boot3-run.scm + +runscm_input catm "$BOOT2/catm" +runscm_input M1pp "$BOOT1/M1pp" +runscm_input hex2pp "$BOOT1/hex2pp" +# scheme1 binary itself is staged by runscm_run (so a `(run "scheme1" …)` +# inside boot3-run.scm finds it at cwd-relative ./scheme1). + +runscm_input_from_src scheme1/prelude.scm +runscm_input_from_src cc/cc.scm +runscm_input_from_src cc/main.scm + +runscm_input_from_src "P1/P1-$ARCH.M1pp" backend.M1pp +runscm_input_from_src P1/P1.M1pp frontend.M1pp +runscm_input_from_src P1/P1pp.P1pp libp1pp.P1pp +runscm_input_from_src P1/entry-libc.P1pp +runscm_input_from_src P1/elf-end.P1pp +runscm_input_from_src vendor-seed/ELF.hex2 + +runscm_input_from_src tcc/tcc.flat.c +runscm_input_from_src libc/libc.flat.c + +runscm_export tcc0 libc.P1pp tcc.flat.P1pp +runscm_run "${BOOT3_TIMEOUT:-1800}" + +echo "[$BOOT_TAG] sizes: tcc0=$(wc -c <"$OUT/tcc0") libc.P1pp=$(wc -c <"$OUT/libc.P1pp")" +echo "[$BOOT_TAG] OK -> $OUT/tcc0" diff --git a/boot/boot4.sh b/boot/boot4.sh @@ -0,0 +1,133 @@ +#!/bin/sh +## boot4.sh — self-host tcc rebuild stages on top of boot3's tcc0. +## +## boot3 produced tcc0 (cc.scm-built bootstrap). boot4 runs the rest of +## the four-stage chain: tcc0 → tcc1 → tcc2 → tcc3. The bootstrap +## fixed-point check is `tcc2 == tcc3`: once tcc is compiling itself +## with no help from cc.scm, the chain reaches a byte-identical fixed +## point. (See docs/PLAN.md for the cc.scm vs tcc codegen-divergence +## reasoning behind needing four stages rather than two.) +## +## tcc0 = tcc-source compiled by cc.scm ← boot3 +## tcc1 = tcc-source compiled by tcc0 ← produced here +## tcc2 = tcc-source compiled by tcc1 ← produced here +## tcc3 = tcc-source compiled by tcc2 ← produced here +## +## ─── Inputs (sources, from canonical tree) ─────────────────────────── +## build/$ARCH/src/src/tcc/libc/$ARCH/{start.S, sys_stubs.S} +## build/$ARCH/src/src/tcc/cc/mem.c +## build/$ARCH/src/src/tcc/tcc-0.9.26-1147-gee75a10c/lib/<arch-specific> +## build/$ARCH/src/src/tcc/tcc.flat.c +## build/$ARCH/src/src/libc/libc.flat.c +## build/$ARCH/src/src/test-fixtures/boot-hello.c +## +## ─── Inputs (binaries from prior stages) ────────────────────────────── +## build/$ARCH/$DRIVER/boot3/tcc0 +## build/$ARCH/$DRIVER/boot2/{catm, scheme1} +## +## ─── Tools ──────────────────────────────────────────────────────────── +## scheme1 evaluates a host-generated run.scm (from boot4-gen-runscm.sh) +## against the flat staging root. Every arch has CONFIG_TCC_ASM and +## assembles .S inputs (start.S, sys_stubs.S) directly inside the +## container; no host asm step. The aarch64 assembler is the phase-1 +## arm64-asm.c that flatten patches into tcc-0.9.26 (see +## docs/TCC-ARM64-ASM.md). +## +## ─── Outputs ────────────────────────────────────────────────────────── +## build/$ARCH/$DRIVER/boot4/{tcc1, tcc2, tcc3} +## tcc2 and tcc3 are byte-identical (asserted +## below) — that equality is the fixed-point. +## build/$ARCH/$DRIVER/boot4/crt1.o +## tcc2-built startup object, kept outside +## libc.a because it must lead link lines. +## build/$ARCH/$DRIVER/boot4/libc.a +## tcc2-built archive of sys_stubs.o + mem.o +## + libc.o +## build/$ARCH/$DRIVER/boot4/libtcc1.a +## tcc2-built tcc compiler helper archive +## build/$ARCH/$DRIVER/boot4/hello — mes-libc-linked smoke binary +## +## ─── Env knobs ──────────────────────────────────────────────────────── +## TCC_BOOTSTRAP_RELAX_FIXEDPOINT=1 +## After a codegen-altering tcc patch, the two-stage rule needs a +## third bounce to converge. Set this to accept tcc3 even when +## tcc2 != tcc3; the next boot4 run, started from this run's +## tcc3, will reach tcc2 == tcc3 with no extra knob. +## +## Usage: boot/boot4.sh <arch> +## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). + +set -eu + +. boot/lib-arch.sh +bootlib_init boot4 "${1:-}" +driver_init empty +require_src + +case "$ARCH" in + aarch64) LIBTCC1_C_SRCS="lib-arm64.c"; LIBTCC1_ASM_SRCS="" ;; + amd64) LIBTCC1_C_SRCS="libtcc1.c va_list.c"; LIBTCC1_ASM_SRCS="alloca86_64.S alloca86_64-bt.S" ;; + riscv64) LIBTCC1_C_SRCS="lib-arm64.c"; LIBTCC1_ASM_SRCS="" ;; +esac + +BOOT2=build/$ARCH/$DRIVER/boot2 +BOOT3=build/$ARCH/$DRIVER/boot3 +SRC=build/$ARCH/src + +TCC_PKG=tcc-0.9.26-1147-gee75a10c +TCC_LIB_REL=tcc/$TCC_PKG/lib + +# ── prerequisites ───────────────────────────────────────────────────── +require_prev "$BOOT3" tcc0 +require_prev "$BOOT2" catm scheme1 +for f in $LIBTCC1_C_SRCS $LIBTCC1_ASM_SRCS; do + require_file "$SRC/src/$TCC_LIB_REL/$f" +done + +# ── stage inputs and run scheme1 + boot4 run.scm under $DRIVER ──────── +. boot/lib-runscm.sh +runscm_init "$STAGE" "$OUT" +runscm_gen scripts/boot4-gen-runscm.sh "$ARCH" + +runscm_scheme1 "$BOOT2/scheme1" +runscm_prelude "$SRC/src/scheme1/prelude.scm" + +runscm_input tcc0 "$BOOT3/tcc0" +runscm_input catm "$BOOT2/catm" + +runscm_input_from_src "tcc/libc/$ARCH/start.S" +runscm_input_from_src "tcc/libc/$ARCH/sys_stubs.S" +runscm_input_from_src tcc/cc/mem.c +for f in $LIBTCC1_C_SRCS $LIBTCC1_ASM_SRCS; do + runscm_input_from_src "$TCC_LIB_REL/$f" +done + +runscm_input_from_src tcc/tcc.flat.c +runscm_input_from_src libc/libc.flat.c +runscm_input_from_src test-fixtures/boot-hello.c hello.c + +runscm_export tcc1 tcc2 tcc3 s3-crt1.o s3-libc.a s3-libtcc1.a hello +runscm_run "${BOOT4_TIMEOUT:-5400}" + +# ── fixed-point check (host-side) ───────────────────────────────────── +if ! cmp -s "$OUT/tcc2" "$OUT/tcc3"; then + s2=$(wc -c <"$OUT/tcc2") + s3=$(wc -c <"$OUT/tcc3") + if [ "${TCC_BOOTSTRAP_RELAX_FIXEDPOINT:-0}" = 1 ]; then + echo "[$BOOT_TAG] WARN: tcc2 ($s2) != tcc3 ($s3); TCC_BOOTSTRAP_RELAX_FIXEDPOINT=1, accepting tcc3" >&2 + else + echo "[$BOOT_TAG] FIXED-POINT FAIL: tcc2 ($s2) != tcc3 ($s3)" >&2 + exit 1 + fi +fi + +# ── normalize output names (drop s3- prefix) ────────────────────────── +# tcc1 / tcc2 are kept on disk: the test path (tcc-cc / tcc-libc suites) +# uses them as stage-2 / stage-3 self-built tcc binaries. +mv "$OUT/s3-crt1.o" "$OUT/crt1.o" +mv "$OUT/s3-libc.a" "$OUT/libc.a" +mv "$OUT/s3-libtcc1.a" "$OUT/libtcc1.a" +chmod 0700 "$OUT/tcc1" "$OUT/tcc2" "$OUT/tcc3" "$OUT/hello" + +echo "[$BOOT_TAG] sizes: libtcc1.a=$(wc -c <"$OUT/libtcc1.a") libc.a=$(wc -c <"$OUT/libc.a") hello=$(wc -c <"$OUT/hello")" +echo "[$BOOT_TAG] OK -> $OUT/{tcc3, crt1.o, libc.a, libtcc1.a, hello} (fixed point: tcc2 == tcc3)" diff --git a/boot/boot5.sh b/boot/boot5.sh @@ -0,0 +1,170 @@ +#!/bin/sh +## boot5.sh — build musl-1.2.5 with boot4 artifacts and link hello. +## +## Builds on top of boot4's verified-fixed-point tcc (tcc2 == tcc3) and +## demonstrates that the same compiler can produce a working static libc +## from upstream musl source — patched only as far as needed to work +## around tcc's missing GCC extensions (register-asm-variable syscalls, +## attribute(alias) weak refs, _Complex, x86_64 SSE/x87 inline asm). +## +## ─── Inputs ────────────────────────────────────────────────────────── +## build/$ARCH/$DRIVER/boot4/tcc3 — boot4's verified self-host tcc +## build/$ARCH/$DRIVER/boot4/libtcc1.a — boot4's tcc runtime archive +## build/$ARCH/$DRIVER/boot2/{catm, scheme1} +## build/$ARCH/src/src/musl/ — canonical musl tree (overrides +## merged, deletes applied, +## alltypes.h/syscall.h generated, +## per-arch skip filter applied) +## build/$ARCH/src/src/tcc/stdarg-bridge.h +## build/$ARCH/src/src/test-fixtures/boot-hello.c +## +## ─── Tools ──────────────────────────────────────────────────────────── +## scheme1 evaluates a host-generated run.scm (from boot5-gen-runscm.sh) +## against the flat staging root. +## +## ─── Outputs ───────────────────────────────────────────────────────── +## build/$ARCH/$DRIVER/boot5/libc.a +## build/$ARCH/$DRIVER/boot5/{crt1.o, crti.o, crtn.o} +## build/$ARCH/$DRIVER/boot5/hello — static, runs in the container +## +## Usage: boot/boot5.sh <arch> +## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). + +set -eu + +. boot/lib-arch.sh +bootlib_init boot5 "${1:-}" +driver_init empty +require_src + +BOOT2=build/$ARCH/$DRIVER/boot2 +BOOT4=build/$ARCH/$DRIVER/boot4 +SRC=build/$ARCH/src +MUSL_DIR=$SRC/src/musl + +# ── prerequisites ───────────────────────────────────────────────────── +require_prev "$BOOT4" tcc3 +require_prev "$BOOT2" catm scheme1 +require_file "$BOOT4/libtcc1.a" "run boot/boot4.sh $ARCH" +require_file "$MUSL_DIR" "run bootprep/prep-src.sh $ARCH and bootprep/prep-musl.sh $ARCH" +require_file "$MUSL_DIR/skip.txt" "run bootprep/prep-musl.sh $ARCH" +require_file "$SRC/src/tcc/stdarg-bridge.h" "run bootprep/prep-src.sh $ARCH" + +# ── prepare staging dirs ────────────────────────────────────────────── +# $STAGE/in/ — read-only inputs (becomes /work/in or in/ in tmpfs) +# $STAGE/out/ — writable outputs (becomes /work/out or out/ in tmpfs) +# $STAGE/_host/ — host-side scratch (enumeration outputs); never +# visible to the container/kernel. +. boot/lib-runscm.sh +runscm_init "$STAGE" "$OUT" +mkdir -p "$STAGE/_host" + +# ── enumerate musl sources from the canonical tree ──────────────────── +# Mirrors musl's Makefile rule: a per-arch override (under +# $d/$MUSL_ARCH/) replaces the same-stem base file (under $d/). The +# canonical tree already had the per-arch skip filter applied by +# prep-musl.sh, so no skip subtraction is needed here. +SRC_TOP="src/aio src/conf src/crypt src/ctype src/dirent + src/env src/errno src/exit src/fcntl src/fenv src/internal + src/ipc src/legacy src/linux src/locale src/malloc + src/malloc/mallocng src/math src/misc src/mman src/mq + src/multibyte src/network src/passwd src/prng src/process + src/regex src/sched src/search src/select src/setjmp src/signal + src/stat src/stdio src/stdlib src/string src/temp src/termios + src/thread src/time src/unistd" + +( + cd "$MUSL_DIR" + for d in $SRC_TOP; do + [ -d "$d" ] || continue + for f in $d/*.c; do [ -f "$f" ] && echo "$f"; done + done +) > "$STAGE/_host/base.txt" + +( + cd "$MUSL_DIR" + for d in $SRC_TOP; do + [ -d "$d/$MUSL_ARCH" ] || continue + for f in $d/$MUSL_ARCH/*.c $d/$MUSL_ARCH/*.s $d/$MUSL_ARCH/*.S; do + [ -f "$f" ] && echo "$f" + done + done +) > "$STAGE/_host/arch.txt" + +# REPLACED: bases that have arch-specific overrides (drop them from +# BASE). KEEP = (BASE - REPLACED) ∪ ARCH. +awk -v ARCH="$MUSL_ARCH" ' + { + sub(/\.[^.]*$/, "") # strip extension + slot = "/" ARCH "/" + i = index($0, slot) + head = substr($0, 1, i - 1) + tail = substr($0, i + length(slot)) + print head "/" tail + } +' "$STAGE/_host/arch.txt" | sort -u > "$STAGE/_host/replaced.txt" + +# Filter base by removing stems that appear in replaced. +awk -v REPF="$STAGE/_host/replaced.txt" ' + BEGIN { while ((getline l < REPF) > 0) rep[l] = 1 } + { + stem = $0 + sub(/\.c$/, "", stem) + if (!(stem in rep)) print + } +' "$STAGE/_host/base.txt" > "$STAGE/_host/keep_base.txt" + +cat "$STAGE/_host/keep_base.txt" "$STAGE/_host/arch.txt" | sort -u > "$STAGE/_host/build-srcs.txt" + +n_src=$(wc -l < "$STAGE/_host/build-srcs.txt") +n_skip=$(grep -cv '^[[:space:]]*\(#\|$\)' "$MUSL_DIR/skip.txt" || true) +echo "[$BOOT_TAG] keep=$n_src skip=$n_skip (calibrated)" + +# Record CRT mode (asm vs c) so the gen-runscm step picks the right +# crti/crtn source set without re-checking $MUSL_DIR. +if [ -f "$MUSL_DIR/crt/$MUSL_ARCH/crti.s" ]; then + echo asm > "$STAGE/_host/crt-mode" +else + echo c > "$STAGE/_host/crt-mode" +fi + +# Pre-create per-source obj/ directories under $STAGE/out/obj/musl/ so +# scheme1's (run "in/tcc" -c …) doesn't need to mkdir at runtime (tcc +# errors out if the parent dir is missing, and scheme1 has no mkdir +# primitive). +awk ' + { + sub(/\.[^.]*$/, "") + if (match($0, /\/[^\/]*$/)) print substr($0, 1, RSTART - 1) + } +' "$STAGE/_host/build-srcs.txt" | sort -u > "$STAGE/_host/build-objdirs.txt" +COBJ=$STAGE/out/obj/musl +mkdir -p "$COBJ/crt" +while read -r d; do mkdir -p "$COBJ/$d"; done < "$STAGE/_host/build-objdirs.txt" + +# ── generate run.scm and stage chain binaries ───────────────────────── +runscm_gen scripts/boot5-gen-runscm.sh "$MUSL_ARCH" "$STAGE/_host" + +runscm_scheme1 "$BOOT2/scheme1" +runscm_prelude "$SRC/src/scheme1/prelude.scm" + +# Chain binaries staged at flat in/ root (cwd-relative names in run.scm). +runscm_input tcc "$BOOT4/tcc3" +runscm_input libtcc1.a "$BOOT4/libtcc1.a" +runscm_input catm "$BOOT2/catm" +runscm_input_from_src tcc/stdarg-bridge.h tcc-stdarg-bridge.h +runscm_input_from_src test-fixtures/boot-hello.c hello.c + +# Stage the canonical musl tree under in/musl/. Both drivers pick it +# up automatically (podman bind-mounts $STAGE/in; seed packs +# `find in -type f` into the cpio). +runscm_input_tree_from_src musl musl + +runscm_export libc.a crt1.o crti.o crtn.o hello + +# boot5 has ~1300 spawns + heavy tcc work; bump qemu memory + timeout for +# the seed driver. Podman ignores QEMU_MEM and uses host memory directly. +QEMU_MEM=${QEMU_MEM:-3072M} runscm_run "${BOOT5_TIMEOUT:-7200}" + +echo "[$BOOT_TAG] sizes: libc.a=$(wc -c <"$OUT/libc.a") hello=$(wc -c <"$OUT/hello")" +echo "[$BOOT_TAG] OK -> $OUT/{libc.a, crt1.o, crti.o, crtn.o, hello}" diff --git a/boot/boot6.sh b/boot/boot6.sh @@ -0,0 +1,69 @@ +#!/bin/sh +## boot6.sh — build the seed-kernel ELF/Image with boot4's tcc3. +## +## Drives tcc3 to compile + link the seed kernel directly: no `ld -T +## kernel.lds`, no objcopy. aarch64 emits the flat Image QEMU expects; +## amd64/riscv64 emit the ELF consumed by QEMU's -kernel path. +## +## ─── Inputs (sources, from canonical tree) ─────────────────────────── +## build/$ARCH/src/src/kernel/arch/$ARCH/{kernel.S, mmu.c, arch.h} +## build/$ARCH/src/src/kernel/kernel.c +## build/$ARCH/src/src/tcc/cc/mem.c +## +## ─── Inputs (binaries from prior stages) ────────────────────────────── +## build/$ARCH/$DRIVER/boot4/tcc3 +## build/$ARCH/$DRIVER/boot2/scheme1 +## +## ─── Tools ──────────────────────────────────────────────────────────── +## scheme1 evaluates a host-generated run.scm (from boot6-gen-runscm.sh) +## against the flat staging root. +## +## ─── Outputs ───────────────────────────────────────────────────────── +## build/$ARCH/$DRIVER/boot6/$KERNEL_NAME +## aarch64: Image — flat boot Image, byte-format identical to the gcc +## Makefile's `objcopy -O binary` output. QEMU's `-kernel` +## detects `ARM\x64` magic at file offset 0x38 and follows +## the arm64 boot protocol, putting DTB phys in x0 before +## jumping to _start. +## amd64/riscv64: kernel.elf — ELF consumed via QEMU's -kernel path. +## +## Usage: boot/boot6.sh <arch> +## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). + +set -eu + +. boot/lib-arch.sh +bootlib_init boot6 "${1:-}" +driver_init empty +require_src + +BOOT2=build/$ARCH/$DRIVER/boot2 +BOOT4=build/$ARCH/$DRIVER/boot4 +SRC=build/$ARCH/src + +# ── prerequisites ───────────────────────────────────────────────────── +require_prev "$BOOT4" tcc3 +require_prev "$BOOT2" scheme1 +for f in kernel/arch/$ARCH/kernel.S kernel/arch/$ARCH/mmu.c kernel/arch/$ARCH/arch.h kernel/kernel.c tcc/cc/mem.c; do + require_file "$SRC/src/$f" +done + +# ── stage inputs and run scheme1 + run.scm under $DRIVER ────────────── +. boot/lib-runscm.sh +runscm_init "$STAGE" "$OUT" +runscm_gen scripts/boot6-gen-runscm.sh "$ARCH" + +runscm_scheme1 "$BOOT2/scheme1" +runscm_prelude "$SRC/src/scheme1/prelude.scm" + +runscm_input tcc3 "$BOOT4/tcc3" +runscm_input_from_src "kernel/arch/$ARCH/kernel.S" +runscm_input_from_src kernel/kernel.c +runscm_input_from_src "kernel/arch/$ARCH/arch.h" +runscm_input_from_src "kernel/arch/$ARCH/mmu.c" +runscm_input_from_src tcc/cc/mem.c + +runscm_export "$KERNEL_NAME" +runscm_run "${BOOT6_TIMEOUT:-1200}" + +echo "[$BOOT_TAG] OK -> $OUT/$KERNEL_NAME ($(wc -c <"$OUT/$KERNEL_NAME") bytes)" diff --git a/boot/containers/Containerfile.empty b/boot/containers/Containerfile.empty @@ -0,0 +1,14 @@ +## Per-arch image used by boot3/4/5.sh — fully empty rootfs (FROM scratch +## with no copy stages). Unlike boot/containers/Containerfile.scratch, this image +## does not bundle busybox: boot3/4/5 invoke scheme1 directly via argv +## and run.scm spawns only staged binaries (catm, scheme1, M1pp, hex2pp, +## tcc), so no in-container shell, /bin/sh, or applet tree is needed. +## +## Built per --platform with --no-cache; tagged as boot2-empty:<arch>. +## --no-cache is required because podman's layer cache key for an empty +## FROM-scratch + WORKDIR Containerfile is identical across platforms, +## so a second --platform build silently aliases its tag onto the first +## arch's image SHA — leaving e.g. boot2-empty:riscv64 pointing at an +## arm64 image. +FROM scratch +WORKDIR /work diff --git a/boot/containers/Containerfile.scratch b/boot/containers/Containerfile.scratch @@ -0,0 +1,25 @@ +## Per-arch image used by the standalone bootN.sh entrypoints. +## Two stages: +## 1. pull busybox:musl as the build container (provides a single +## static /bin/busybox plus a tree of applet symlinks) +## 2. FROM scratch, copy the busybox binary + symlinks into a fresh +## empty rootfs +## +## The result is a per-arch image whose entire userland is +## statically-linked busybox. No libc, no resolver, no /etc. +## This is the only container the bootN.sh scripts ever exec into. +## +## Built per --platform; tag as boot2-scratch:<arch>. +## Multi-arch index pinned to the same digest the boot2-busybox image +## uses (tests/containers/Containerfile.busybox); per-arch entries within the +## index, fetched 2026-04-24: +## amd64 sha256:298efc24641ff8a1a285abdc555a0ce5ab7c42eb085e1be099f824188e069604 +## arm64 sha256:458a2ae4cb09bf96f8e24f135474b1552039738ed16ee470320a9c05c2da2004 +## riscv64 sha256:657f5a49af9288dc98d2bf45343e45c57c3caf3946aa9df436d05da320a8c863 + +FROM docker.io/library/busybox@sha256:19b646668802469d968a05342a601e78da4322a414a7c09b1c9ee25165042138 AS busybox + +FROM scratch +COPY --from=busybox /bin /bin +WORKDIR /work +CMD ["/bin/sh"] diff --git a/boot/lib-arch.sh b/boot/lib-arch.sh @@ -0,0 +1,177 @@ +# lib-arch.sh — single source for arch + driver setup shared by +# boot/boot.sh, boot/boot{0..6}.sh, lib-pipeline.sh, lib-runscm.sh. +# +# Public entry points (call in this order from a bootN.sh): +# +# bootlib_init <stage> <arch> # validate <arch>, cd to repo root, +# # set ARCH/PLATFORM/KERNEL_NAME/ +# # MUSL_ARCH/DRIVER/BOOT_STAGE/BOOT_TAG. +# driver_init [<image-kind>] # set OUT/STAGE; podman: build IMAGE +# # if missing (image-kind ∈ scratch| +# # empty; default scratch); seed: +# # verify boot6 kernel exists. +# require_src # die if build/$ARCH/src/ missing. +# require_prev <dir> <name>... # die if any <dir>/<name> is +# # missing or non-executable. +# require_file <path> [<hint>] # die if <path> missing; print a +# # uniform diagnostic with hint. +# +# After bootlib_init, the following shell vars are set/exported: +# ARCH input architecture token (aarch64|amd64|riscv64) +# ROOT repo root (cwd is set to ROOT) +# DRIVER podman|seed (defaults to podman) +# PLATFORM linux/<arm64|amd64|riscv64> for podman --platform +# KERNEL_NAME Image (aarch64) | kernel.elf (amd64,riscv64) +# MUSL_ARCH aarch64 | x86_64 | riscv64 +# BOOT_TAG "<stage>/<driver>/<arch>" for log prefixes +# BOOT_STAGE stage name as passed in (boot0|boot1|...) +# +# After driver_init (boot stages only — prep-* skip it): +# OUT build/$ARCH/$DRIVER/$BOOT_STAGE (stage output dir) +# STAGE build/$ARCH/$DRIVER/.$BOOT_STAGE-stage (scratch staging dir) +# podman: IMAGE +# seed: KERNEL_IMAGE, EXTRACT, SEED_ARCH + +bootlib_init() { + _stage=$1; _arch=${2:-} + [ -n "$_stage" ] || { echo "lib-arch: bootlib_init: stage required" >&2; exit 2; } + case "$_arch" in + aarch64|amd64|riscv64) ;; + *) echo "usage: $0 <aarch64|amd64|riscv64>" >&2; exit 2 ;; + esac + ARCH=$_arch + ROOT=$(cd "$(dirname "$0")/.." && pwd) + cd "$ROOT" + DRIVER=${DRIVER:-podman} + case "$DRIVER" in + podman|seed) ;; + *) echo "[$_stage/$DRIVER/$ARCH] unknown DRIVER=$DRIVER (expected podman|seed)" >&2; exit 2 ;; + esac + BOOT_STAGE=$_stage + BOOT_TAG="$_stage/$DRIVER/$ARCH" + BOOT_T0=$(date +%s) + case "$ARCH" in + aarch64) PLATFORM=linux/arm64; KERNEL_NAME=Image; MUSL_ARCH=aarch64 ;; + amd64) PLATFORM=linux/amd64; KERNEL_NAME=kernel.elf; MUSL_ARCH=x86_64 ;; + riscv64) PLATFORM=linux/riscv64; KERNEL_NAME=kernel.elf; MUSL_ARCH=riscv64 ;; + esac + export ARCH ROOT DRIVER PLATFORM KERNEL_NAME MUSL_ARCH BOOT_TAG BOOT_STAGE BOOT_T0 + trap _bootlib_finish EXIT +} + +# _bootlib_finish — EXIT trap installed by bootlib_init. Prints +# `[$BOOT_TAG] done in Xs (cum Ys)` (or `failed after Xs` on error). +# On success, records the elapsed time so later stages can sum the +# chain. Cumulative = sum of all per-stage .timing files relevant to +# the current $ARCH/$DRIVER. +_bootlib_finish() { + _exit=$? + [ -n "${BOOT_T0:-}" ] || return 0 + _elapsed=$(( $(date +%s) - BOOT_T0 )) + if [ "$_exit" != 0 ]; then + echo "[$BOOT_TAG] failed after ${_elapsed}s (exit=$_exit)" >&2 + return 0 + fi + # Record this stage's time. Boot stages have OUT (set by driver_init); + # the orchestrator (BOOT_STAGE=boot) doesn't write — its time would + # double-count. Other stages without OUT (prep-src, prep-musl) write + # to a per-arch sidecar dir. + if [ "$BOOT_STAGE" != boot ]; then + if [ -n "${OUT:-}" ] && [ -d "$OUT" ]; then + echo "$_elapsed" > "$OUT/.timing" + elif [ -d "build/$ARCH" ]; then + mkdir -p "build/$ARCH/.timings" + echo "$_elapsed" > "build/$ARCH/.timings/$BOOT_STAGE" + fi + fi + # Cumulative: sum boot-stage timings for this driver + driver- + # independent prep timings. Glob may not match — guard each path. + _cum=0 + for _f in \ + "build/$ARCH/$DRIVER"/*/.timing \ + "build/$ARCH/.timings"/* + do + [ -f "$_f" ] || continue + _v=$(cat "$_f" 2>/dev/null) || continue + case "$_v" in *[!0-9]*|'') continue ;; esac + _cum=$((_cum + _v)) + done + echo "[$BOOT_TAG] done in ${_elapsed}s (cum ${_cum}s)" +} + +driver_init() { + _image_kind=${1:-scratch} + case "$_image_kind" in + scratch|empty) ;; + *) echo "[$BOOT_TAG] driver_init: image-kind must be scratch|empty (got $_image_kind)" >&2; exit 2 ;; + esac + OUT=build/$ARCH/$DRIVER/$BOOT_STAGE + STAGE=build/$ARCH/$DRIVER/.$BOOT_STAGE-stage + export OUT STAGE + case "$DRIVER" in + podman) + IMAGE=boot2-$_image_kind:$ARCH + if ! podman image exists "$IMAGE"; then + echo "[$BOOT_TAG] building $IMAGE" + # Containerfile.empty drops /etc resolver state etc.; no-cache + # avoids a stale layer surviving an upstream tag bump. + _no_cache= + [ "$_image_kind" = empty ] && _no_cache=--no-cache + podman build $_no_cache --platform "$PLATFORM" -t "$IMAGE" \ + -f boot/containers/Containerfile.$_image_kind boot/containers/ + fi + export IMAGE + ;; + seed) + # DRIVER=seed always consumes the podman-built boot6 kernel — + # tcc3 is platform-agnostic but we settled on a single canonical + # build location to reduce surface area. See docs/PLAN.md A3. + KERNEL_IMAGE=$ROOT/build/$ARCH/podman/boot6/$KERNEL_NAME + EXTRACT=$ROOT/seed-kernel/scripts/extract-blk.sh + [ -f "$KERNEL_IMAGE" ] || { + echo "[$BOOT_TAG] missing $KERNEL_IMAGE — run ./boot/boot.sh $ARCH (default DRIVER=podman) first" >&2 + exit 1 + } + export KERNEL_IMAGE EXTRACT + export SEED_ARCH=$ARCH + ;; + esac +} + +require_prev() { + _dir=$1; shift + for _n in "$@"; do + [ -x "$_dir/$_n" ] || { + _stage_name=$(basename "$_dir") + case "$_stage_name" in + boot*) _hint="run boot/$_stage_name.sh $ARCH" ;; + *) _hint="rebuild $_dir" ;; + esac + echo "[$BOOT_TAG] missing prerequisite: $_dir/$_n ($_hint)" >&2 + exit 1 + } + done +} + +# require_src — assert build/$ARCH/src/ exists (the canonical generated +# source tree built by bootprep/prep-src.sh). Every bootN.sh needs it. +require_src() { + [ -d "build/$ARCH/src" ] || { + echo "[$BOOT_TAG] missing build/$ARCH/src — run bootprep/prep-src.sh $ARCH" >&2 + exit 1 + } +} + +# require_file <path> [<hint>] — assert <path> exists; print a uniform +# "[$BOOT_TAG] missing <path> — <hint>" diagnostic on failure. +require_file() { + _path=$1; _hint=${2:-} + [ -e "$_path" ] || { + if [ -n "$_hint" ]; then + echo "[$BOOT_TAG] missing $_path — $_hint" >&2 + else + echo "[$BOOT_TAG] missing $_path" >&2 + fi + exit 1 + } +} diff --git a/boot/lib-pipeline.sh b/boot/lib-pipeline.sh @@ -0,0 +1,306 @@ +# lib-pipeline.sh — driver-agnostic DSL for boot stage pipelines. +# +# A bootN.sh's "wiring" is a sequence of file→file program invocations +# in a flat namespace. This library exposes that as four primitives so +# the same wiring can run under different transports: +# +# podman — accumulate stages into one /work/run.sh, run once in a +# container against $IMAGE / $PLATFORM (env-set by caller). +# seed — run each stage as one qemu boot of seed-kernel via +# tier1-gate.sh's pattern (cpio /init + in/<inputs> on +# virtio-blk hd0, output dumped to virtio-blk hd1 as SEEDFS, +# extract). aarch64 only. +# +# Both drivers respect the `in/`+`out/` convention: inputs read from +# `in/<name>`, outputs written to `out/<name>`. The stage primitive +# rewrites argv tokens that match input/output names with the +# appropriate prefix; bare flag/literal tokens pass through untouched. +# +# DSL (source as `. boot/lib-pipeline.sh`): +# +# pipeline_init <staging-dir> <out-dir> <driver> +# pipeline_input <name> <host-path> # repeatable +# pipeline_input_from_src <subpath> [<name>] # from build/$ARCH/src/src/ +# stage <bin> <argv...> -- <inputs...> -- <outputs...> +# pipeline_export <name>... # one or more +# pipeline_run +# +# `stage` semantics: invoke `<bin>` with argv=[<bin>, <argv1>, ...]; the +# stage reads the listed input names and produces the listed output +# names. <bin> is also a name in the flat namespace — typically a +# pipeline_input, but may be the output of an earlier stage. +# +# Required env for podman driver: PLATFORM, IMAGE. +# Required env for seed driver: KERNEL_IMAGE, EXTRACT. + +P_DRIVER= +P_STAGE_DIR= +P_OUT_DIR= +P_SCRIPT= +P_IDX=0 +P_EXPORTS= +P_INPUT_NAMES= +P_PRODUCED_NAMES= + +pipeline_init() { + P_STAGE_DIR=$1; P_OUT_DIR=$2; P_DRIVER=$3 + rm -rf "$P_STAGE_DIR" + mkdir -p "$P_STAGE_DIR/in" "$P_STAGE_DIR/out" "$P_OUT_DIR" + P_IDX=0 + P_EXPORTS= + P_INPUT_NAMES= + P_PRODUCED_NAMES= + case "$P_DRIVER" in + podman) + P_SCRIPT=$P_STAGE_DIR/run.sh + { + echo '#!/bin/sh' + echo 'set -eu' + # Stage everything in /tmp (RAM tmpfs) — the seed-stage tools + # do one syscall per byte, virtiofs round-trips would dominate. + # Mirror the in/ + out/ split so argv references resolve. + echo 'mkdir -p /tmp/in /tmp/out' + echo 'cp /work/in/* /tmp/in/' + echo 'cd /tmp' + } > "$P_SCRIPT" + ;; + seed) + mkdir -p "$P_STAGE_DIR/work" + : "${KERNEL_IMAGE:?lib-pipeline:seed: KERNEL_IMAGE not set}" + : "${EXTRACT:?lib-pipeline:seed: EXTRACT not set}" + ;; + *) + echo "lib-pipeline: unknown driver '$P_DRIVER'" >&2; exit 2 ;; + esac +} + +pipeline_input() { + name=$1; src=$2 + cp "$src" "$P_STAGE_DIR/in/$name" + if [ "$P_DRIVER" = "seed" ]; then + cp "$src" "$P_STAGE_DIR/work/$name" + fi + P_INPUT_NAMES="$P_INPUT_NAMES $name" +} + +# pipeline_input_from_src <subpath> [<name>] +# Pull a file from the canonical generated source tree at +# build/$ARCH/src/src/<subpath>. Stages it under in/<name>; +# <name> defaults to basename(subpath). For the rare `bin/` case +# (the seed hex0-seed binary), call pipeline_input directly with +# build/$ARCH/src/bin/<file>. +pipeline_input_from_src() { + _subpath=$1; _name=${2:-} + [ -n "$_name" ] || _name=$(basename "$_subpath") + pipeline_input "$_name" "build/$ARCH/src/src/$_subpath" +} + +# Look up a token: if it names an input, prefix `in/`; if it names a +# previously produced output, prefix `out/`; else leave unchanged. +_p_lookup() { + tok=$1 + for n in $P_IN; do [ "$tok" = "$n" ] && { echo "in/$tok"; return; }; done + for n in $P_OUT; do [ "$tok" = "$n" ] && { echo "out/$tok"; return; }; done + echo "$tok" +} + +# Resolve where the bin binary lives: in/ if it's a pipeline_input, out/ +# if a prior stage produced it. Stages with the same name as both an +# input and a produced output use the produced one. +_p_bin_path() { + b=$1 + for n in $P_PRODUCED_NAMES; do [ "$b" = "$n" ] && { echo "out/$b"; return; }; done + for n in $P_INPUT_NAMES; do [ "$b" = "$n" ] && { echo "in/$b"; return; }; done + echo "$b" +} + +# stage <bin> <argv...> -- <inputs...> -- <outputs...> +# +# The explicit input/output lists look redundant — most names already +# appear in <argv...> — but they are not. argv positions are tool- +# specific: a token like `M0.combined.hex2` is an output of one stage +# (catm produces it) and an input of the next (hex2 reads it). The +# framework cannot tell which from the token alone, so each stage +# declares both lists. Don't try to "simplify" by inferring from argv. +stage() { + bin=$1; shift + P_HEAD_RAW=""; P_IN=""; P_OUT=""; _s=head + while [ $# -gt 0 ]; do + if [ "$1" = "--" ]; then + case "$_s" in + head) _s=in ;; + in) _s=out ;; + *) echo "lib-pipeline: too many --" >&2; exit 2 ;; + esac + shift; continue + fi + case "$_s" in + head) P_HEAD_RAW="$P_HEAD_RAW $1" ;; + in) P_IN="$P_IN $1" ;; + out) P_OUT="$P_OUT $1" ;; + esac + shift + done + [ "$_s" = "out" ] || { echo "lib-pipeline: stage needs '<bin> argv... -- inputs... -- outputs...'" >&2; exit 2; } + + # Rewrite head tokens with in/ or out/ prefixes. + P_HEAD="" + for tok in $P_HEAD_RAW; do + P_HEAD="$P_HEAD $(_p_lookup "$tok")" + done + P_BIN_PATH=$(_p_bin_path "$bin") + + P_IDX=$((P_IDX + 1)) + case "$P_DRIVER" in + podman) _stage_podman ;; + seed) _stage_seed ;; + esac + + # Track produced names so later stages can locate the binary if a + # subsequent `stage` uses one of these as its bin. + for o in $P_OUT; do P_PRODUCED_NAMES="$P_PRODUCED_NAMES $o"; done +} + +_stage_podman() { + { + echo "# stage $P_IDX: $bin$P_HEAD" + echo "chmod +x ./$P_BIN_PATH" + echo "./$P_BIN_PATH$P_HEAD" + # Mirror this stage's outputs back into in/ so a later stage that + # declares one of them as an input finds it under in/<name>. + # (The seed driver does this naturally via its per-stage cpio.) + for o in $P_OUT; do + echo "cp -f out/$o in/$o" + done + } >> "$P_SCRIPT" +} + +_stage_seed() { + cpio_dir=$P_STAGE_DIR/s$(printf '%02d' "$P_IDX") + rm -rf "$cpio_dir"; mkdir -p "$cpio_dir/cpio/in" + cp "$P_STAGE_DIR/work/$bin" "$cpio_dir/cpio/init" + chmod +x "$cpio_dir/cpio/init" + NAMES="init" + for inp in $P_IN; do + cp "$P_STAGE_DIR/work/$inp" "$cpio_dir/cpio/in/$inp" + NAMES="$NAMES +in/$inp" + done + ( cd "$cpio_dir/cpio" && printf '%s\n' "$NAMES" | cpio -o -H newc 2>/dev/null ) > "$cpio_dir/initramfs.cpio" + sz=$(wc -c < "$cpio_dir/initramfs.cpio") + pad=$(( (512 - sz % 512) % 512 )) + if [ "$pad" -gt 0 ]; then + head -c "$pad" /dev/zero >> "$cpio_dir/initramfs.cpio" + fi + mv "$cpio_dir/initramfs.cpio" "$cpio_dir/in.img" + truncate -s 256M "$cpio_dir/out.img" + + APPEND="$bin$P_HEAD" + TRANSCRIPT=$cpio_dir/transcript.txt + echo "[lib-pipeline:seed] stage $P_IDX:$P_HEAD (bin=$bin)" >&2 + seed_arch=${SEED_ARCH:-aarch64} + case "$seed_arch" in + aarch64) + qemu-system-aarch64 \ + -machine virt,gic-version=3,accel=hvf -cpu host -m 2048M \ + -nographic -no-reboot \ + -global virtio-mmio.force-legacy=false \ + -kernel "$KERNEL_IMAGE" \ + -drive file="$cpio_dir/in.img",if=none,format=raw,id=hd0,readonly=on \ + -device virtio-blk-device,drive=hd0 \ + -drive file="$cpio_dir/out.img",if=none,format=raw,id=hd1 \ + -device virtio-blk-device,drive=hd1 \ + -append "$APPEND" \ + > "$TRANSCRIPT" 2>&1 & + ;; + riscv64) + qemu-system-riscv64 \ + -machine virt -m 2048M \ + -nographic -no-reboot \ + -global virtio-mmio.force-legacy=false \ + -kernel "$KERNEL_IMAGE" \ + -drive file="$cpio_dir/in.img",if=none,format=raw,id=hd0,readonly=on \ + -device virtio-blk-device,drive=hd0 \ + -drive file="$cpio_dir/out.img",if=none,format=raw,id=hd1 \ + -device virtio-blk-device,drive=hd1 \ + -append "$APPEND" \ + > "$TRANSCRIPT" 2>&1 & + ;; + amd64) + qemu-system-x86_64 \ + -machine microvm,acpi=off,pic=off,pit=off,rtc=off,isa-serial=on,auto-kernel-cmdline=off \ + -cpu max -m 2048M \ + -nodefaults -display none -serial stdio -no-reboot \ + -global virtio-mmio.force-legacy=false \ + -device isa-debug-exit,iobase=0x501,iosize=2 \ + -kernel "$KERNEL_IMAGE" \ + -drive file="$cpio_dir/in.img",if=none,format=raw,id=hd0,readonly=on \ + -device virtio-blk-device,drive=hd0 \ + -drive file="$cpio_dir/out.img",if=none,format=raw,id=hd1 \ + -device virtio-blk-device,drive=hd1 \ + -append "$APPEND" \ + > "$TRANSCRIPT" 2>&1 & + ;; + *) echo "[lib-pipeline:seed] unsupported SEED_ARCH=$seed_arch" >&2; exit 2 ;; + esac + QPID=$! + ( sleep 240; kill -9 $QPID 2>/dev/null ) </dev/null >/dev/null 2>&1 & + WATCHER=$! + disown $WATCHER 2>/dev/null || true + wait $QPID 2>/dev/null || true + kill $WATCHER 2>/dev/null || true + + mkdir -p "$cpio_dir/dump" + if ! "$EXTRACT" "$cpio_dir/dump" "$cpio_dir/out.img" >/dev/null 2>&1; then + echo "[lib-pipeline:seed] FAIL stage $P_IDX (bin=$bin): extract-blk failed" >&2 + tail -40 "$TRANSCRIPT" >&2 + exit 3 + fi + + for o in $P_OUT; do + if [ ! -f "$cpio_dir/dump/$o" ]; then + echo "[lib-pipeline:seed] FAIL stage $P_IDX: missing output '$o'" >&2 + ls "$cpio_dir/dump" >&2 || true + exit 3 + fi + cp "$cpio_dir/dump/$o" "$P_STAGE_DIR/work/$o" + done +} + +pipeline_export() { + for _n in "$@"; do P_EXPORTS="$P_EXPORTS $_n"; done +} + +pipeline_run() { + case "$P_DRIVER" in + podman) _run_podman ;; + seed) : ;; + esac + for n in $P_EXPORTS; do + case "$P_DRIVER" in + podman) cp "$P_STAGE_DIR/out/$n" "$P_OUT_DIR/$n" ;; + seed) cp "$P_STAGE_DIR/work/$n" "$P_OUT_DIR/$n" ;; + esac + chmod 0700 "$P_OUT_DIR/$n" + done +} + +_run_podman() { + : "${PLATFORM:?lib-pipeline:podman: PLATFORM not set}" + : "${IMAGE:?lib-pipeline:podman: IMAGE not set}" + if [ -n "$P_EXPORTS" ]; then + cmd="cp" + for n in $P_EXPORTS; do cmd="$cmd out/$n"; done + cmd="$cmd /work/out/" + echo "$cmd" >> "$P_SCRIPT" + fi + chmod +x "$P_SCRIPT" + SDIR=$(cd "$P_STAGE_DIR" && pwd) + podman run --rm -i --pull=never --platform "$PLATFORM" \ + --tmpfs /tmp:size=512M \ + -v "$SDIR/run.sh:/work/run.sh:ro" \ + -v "$SDIR/in:/work/in:ro" \ + -v "$SDIR/out:/work/out:rw" \ + -w /work "$IMAGE" \ + sh -eu /work/run.sh +} diff --git a/boot/lib-runscm.sh b/boot/lib-runscm.sh @@ -0,0 +1,257 @@ +# lib-runscm.sh — driver-agnostic harness for run.scm-driven stages. +# +# Boot3/4/5 each drive a per-stage pipeline by invoking scheme1 against a +# host-generated run.scm. Two transports: +# DRIVER=podman → bind-mount in/ ro and out/ rw under /work in a +# scratch+busybox container, exec in/scheme1 +# in/combined.scm with cwd=/work. +# DRIVER=seed → pack the staging dir into a cpio on a virtio-blk read- +# only disk (init at cpio root + in/ subtree), boot the +# seed kernel with init=init and combined.scm, recover +# outputs via the SEEDFS dump on a second virtio-blk +# disk. The host extractor filters to out/-prefixed +# entries, strips the prefix, writes to $STAGE/out/. +# +# Both drivers see the same flat namespace; run.scm uses explicit +# in/<name> for reads and out/<name> for writes. +# +# DSL (source as `. boot/lib-runscm.sh`): +# +# runscm_init <staging-dir> <out-dir> +# runscm_scheme1 <path> # init=scheme1 (boot2) +# runscm_prelude <path> # scheme1/prelude.scm +# runscm_runscm <path> # static driver script +# runscm_gen <gen-script> <args...> # OR generate run.scm, +# # log size, register it. +# runscm_input <name> <host-path> # repeatable; staged at in/<name> +# runscm_input_tree <prefix> <src-root> # repeatable; tree under in/<prefix> +# runscm_export <name>... # one or more output names +# runscm_run [timeout-s] # default 600s +# +# Required env per driver: +# podman: IMAGE, PLATFORM +# seed: KERNEL_IMAGE, EXTRACT, optional QEMU_MEM (default 2048M) +# both: DRIVER=podman|seed + +S_STAGE_DIR= +S_OUT_DIR= +S_SCHEME1= +S_PRELUDE= +S_RUNSCM= +S_EXPORTS= + +runscm_init() { + S_STAGE_DIR=$1; S_OUT_DIR=$2 + rm -rf "$S_STAGE_DIR" + mkdir -p "$S_STAGE_DIR/in" "$S_STAGE_DIR/out" "$S_OUT_DIR" + S_SCHEME1=; S_PRELUDE=; S_RUNSCM= + S_EXPORTS= +} + +runscm_scheme1() { S_SCHEME1=$1; } +runscm_prelude() { S_PRELUDE=$1; } +runscm_runscm() { S_RUNSCM=$1; } + +# runscm_gen <gen-script> <args...> +# Run a host-side generator that emits run.scm to $S_STAGE_DIR/run.scm, +# log its size, and register it as the driver script. Used by +# boot4/5/6 which build their run.scm dynamically. +runscm_gen() { + _gen=$1; shift + _runscm=$S_STAGE_DIR/run.scm + "$_gen" "$@" "$_runscm" + echo "[$BOOT_TAG] generated run.scm: $(wc -l <"$_runscm") lines, $(wc -c <"$_runscm") bytes" + S_RUNSCM=$_runscm +} + +runscm_input() { + name=$1; src=$2 + case "$name" in + */*) mkdir -p "$S_STAGE_DIR/in/$(dirname "$name")" ;; + esac + cp "$src" "$S_STAGE_DIR/in/$name" +} + +# Stage every regular file under <src-root> into in/<prefix>/..., +# preserving the relative directory tree. +runscm_input_tree() { + prefix=$1; src_root=$2 + [ -d "$src_root" ] || { echo "runscm: input_tree: $src_root not a dir" >&2; exit 2; } + ( cd "$src_root" && find . -type f ) | sed 's|^\./||' | sort | while read -r rel; do + [ -n "$rel" ] || continue + mkdir -p "$S_STAGE_DIR/in/$prefix/$(dirname "$rel")" + cp "$src_root/$rel" "$S_STAGE_DIR/in/$prefix/$rel" + done +} + +# runscm_input_from_src <subpath> [<name>] +# Pull a file from the canonical generated source tree at +# build/$ARCH/src/src/<subpath>. Stages it under in/<name>; +# <name> defaults to basename(subpath). For the rare `bin/` case, +# call runscm_input directly with build/$ARCH/src/bin/<file>. +runscm_input_from_src() { + _subpath=$1; _name=${2:-} + [ -n "$_name" ] || _name=$(basename "$_subpath") + runscm_input "$_name" "build/$ARCH/src/src/$_subpath" +} + +# runscm_input_tree_from_src <prefix> <subpath> +# Same as runscm_input_tree, but the source root is +# build/$ARCH/src/src/<subpath>. +runscm_input_tree_from_src() { + _prefix=$1; _subpath=$2 + runscm_input_tree "$_prefix" "build/$ARCH/src/src/$_subpath" +} + +runscm_export() { + for _n in "$@"; do S_EXPORTS="$S_EXPORTS $_n"; done +} + +runscm_run() { + timeout=${1:-600} + [ -n "$S_SCHEME1" ] || { echo "runscm: scheme1 not set" >&2; exit 2; } + [ -n "$S_PRELUDE" ] || { echo "runscm: prelude not set" >&2; exit 2; } + [ -n "$S_RUNSCM" ] || { echo "runscm: run.scm not set" >&2; exit 2; } + cp "$S_SCHEME1" "$S_STAGE_DIR/in/scheme1" + chmod +x "$S_STAGE_DIR/in/scheme1" + cat "$S_PRELUDE" "$S_RUNSCM" > "$S_STAGE_DIR/in/combined.scm" + # Top-level reference copy of run.scm for human inspection. + # boot4/5 gen scripts already write here; skip the self-copy. + case "$S_RUNSCM" in + "$S_STAGE_DIR/run.scm") : ;; + *) cp "$S_RUNSCM" "$S_STAGE_DIR/run.scm" ;; + esac + + case "${DRIVER:-podman}" in + podman) _runscm_run_podman "$timeout" ;; + seed) _runscm_run_seed "$timeout" ;; + *) echo "runscm: unknown DRIVER=$DRIVER (expected podman|seed)" >&2; exit 2 ;; + esac + + for n in $S_EXPORTS; do + if [ ! -f "$S_STAGE_DIR/out/$n" ]; then + echo "[runscm/$DRIVER] FAIL: missing output '$n'" >&2 + ls "$S_STAGE_DIR/out" >&2 || true + exit 5 + fi + cp "$S_STAGE_DIR/out/$n" "$S_OUT_DIR/$n" + chmod 0700 "$S_OUT_DIR/$n" + done +} + +# Podman: bind-mount in/ ro and out/ rw under /work; exec in/scheme1. +# Outputs land in $S_STAGE_DIR/out/ directly via the rw bind mount. +_runscm_run_podman() { + : "${IMAGE:?lib-runscm: IMAGE not set}" + : "${PLATFORM:?lib-runscm: PLATFORM not set}" + in_abs=$(cd "$S_STAGE_DIR/in" && pwd) + out_abs=$(cd "$S_STAGE_DIR/out" && pwd) + echo "[runscm/podman] scheme1 combined.scm under $IMAGE" >&2 + podman run --rm -i --pull=never --platform "$PLATFORM" \ + -v "$in_abs:/work/in:ro" \ + -v "$out_abs:/work/out:rw" \ + -w /work "$IMAGE" \ + in/scheme1 in/combined.scm +} + +# Seed: pack cpio with `init` at the root and the in/ subtree under it; +# boot kernel with init=init combined.scm; recover outputs from the +# SEEDFS dump on hd1; extract filters out/-prefixed entries directly +# into $S_STAGE_DIR/out/. +_runscm_run_seed() { + timeout=$1 + : "${KERNEL_IMAGE:?lib-runscm: KERNEL_IMAGE not set}" + : "${EXTRACT:?lib-runscm: EXTRACT not set}" + mem=${QEMU_MEM:-2048M} + cp "$S_STAGE_DIR/in/scheme1" "$S_STAGE_DIR/init" + chmod +x "$S_STAGE_DIR/init" + ( cd "$S_STAGE_DIR" && { echo init; find in -type f; } | sort -u | cpio -o -H newc 2>/dev/null ) \ + > "$S_STAGE_DIR/initramfs.cpio" + sz=$(wc -c < "$S_STAGE_DIR/initramfs.cpio") + pad=$(( (512 - sz % 512) % 512 )) + if [ "$pad" -gt 0 ]; then + head -c "$pad" /dev/zero >> "$S_STAGE_DIR/initramfs.cpio" + fi + mv "$S_STAGE_DIR/initramfs.cpio" "$S_STAGE_DIR/in.img" + truncate -s 256M "$S_STAGE_DIR/out.img" + + TRANSCRIPT=$S_STAGE_DIR/transcript.txt + echo "[runscm/seed] booting scheme1 + run.scm (timeout ${timeout}s)" >&2 + seed_arch=${SEED_ARCH:-aarch64} + case "$seed_arch" in + aarch64) + qemu-system-aarch64 \ + -machine virt,gic-version=3,accel=hvf -cpu host -m "$mem" \ + -nographic -no-reboot \ + -global virtio-mmio.force-legacy=false \ + -kernel "$KERNEL_IMAGE" \ + -drive file="$S_STAGE_DIR/in.img",if=none,format=raw,id=hd0,readonly=on \ + -device virtio-blk-device,drive=hd0 \ + -drive file="$S_STAGE_DIR/out.img",if=none,format=raw,id=hd1 \ + -device virtio-blk-device,drive=hd1 \ + -append "init in/combined.scm" \ + > "$TRANSCRIPT" 2>&1 & + ;; + riscv64) + # No hvf accel on Apple Silicon for riscv64 — TCG only. + qemu-system-riscv64 \ + -machine virt -m "$mem" \ + -nographic -no-reboot \ + -global virtio-mmio.force-legacy=false \ + -kernel "$KERNEL_IMAGE" \ + -drive file="$S_STAGE_DIR/in.img",if=none,format=raw,id=hd0,readonly=on \ + -device virtio-blk-device,drive=hd0 \ + -drive file="$S_STAGE_DIR/out.img",if=none,format=raw,id=hd1 \ + -device virtio-blk-device,drive=hd1 \ + -append "init in/combined.scm" \ + > "$TRANSCRIPT" 2>&1 & + ;; + amd64) + # microvm + isa-debug-exit mirrors seed-kernel/run.sh: the + # kernel writes to port 0x501 on user exit_group(0) so QEMU + # exits cleanly (no `-no-reboot` triple-fault gymnastics). + qemu-system-x86_64 \ + -machine microvm,acpi=off,pic=off,pit=off,rtc=off,isa-serial=on,auto-kernel-cmdline=off \ + -cpu max -m "$mem" \ + -nodefaults -display none -serial stdio -no-reboot \ + -global virtio-mmio.force-legacy=false \ + -device isa-debug-exit,iobase=0x501,iosize=2 \ + -kernel "$KERNEL_IMAGE" \ + -drive file="$S_STAGE_DIR/in.img",if=none,format=raw,id=hd0,readonly=on \ + -device virtio-blk-device,drive=hd0 \ + -drive file="$S_STAGE_DIR/out.img",if=none,format=raw,id=hd1 \ + -device virtio-blk-device,drive=hd1 \ + -append "init in/combined.scm" \ + > "$TRANSCRIPT" 2>&1 & + ;; + *) + echo "[runscm/seed] unsupported SEED_ARCH=$seed_arch" >&2 + exit 2 + ;; + esac + QPID=$! + ( sleep "$timeout"; kill -9 $QPID 2>/dev/null ) </dev/null >/dev/null 2>&1 & + WATCHER=$! + # `disown` removes the watcher from the shell's job table so that + # killing it on the happy path doesn't trigger bash's + # "Terminated: 15 PID ( sleep … )" job-status message — that + # message looks like a real failure but is just a noisy SIGTERM + # notification fired when qemu exited normally before the watcher's + # sleep elapsed. + disown $WATCHER 2>/dev/null || true + wait $QPID 2>/dev/null || true + kill $WATCHER 2>/dev/null || true + + if ! "$EXTRACT" "$S_STAGE_DIR/out" "$S_STAGE_DIR/out.img" >/dev/null 2>&1; then + echo "[runscm/seed] FAIL: extract-blk failed (kernel didn't reach exit?)" >&2 + tail -40 "$TRANSCRIPT" >&2 + exit 3 + fi + EXIT_LINE=$(grep -E "user exit_group" "$TRANSCRIPT" | tail -1 || true) + case "$EXIT_LINE" in + *"exit_group(0)"*) : ;; + *) echo "[runscm/seed] FAIL: driver did not exit 0: $EXIT_LINE" >&2 + tail -40 "$TRANSCRIPT" >&2 + exit 4 ;; + esac +} diff --git a/bootprep/boot5-calibrate.sh b/bootprep/boot5-calibrate.sh @@ -0,0 +1,164 @@ +#!/bin/sh +## boot5-calibrate.sh — produce vendor/musl/skip-$ARCH.txt +## +## NOT on the boot.sh path. Generates the per-arch calibration list +## boot5.sh uses to drop skip-on-fail logic from the container. Run +## this once per architecture when the patch set, calibration arch, or +## tcc version changes; commit the resulting file alongside the rest of +## the vendored musl artifacts. +## +## What it does: +## 1. Stage the same prerequisites boot5.sh stages (boot4/tcc3, +## libtcc1.a, vendored overrides + deletes, pre-generated headers, +## stdarg bridge). +## 2. Run a skip-on-fail compile loop in the container over every +## musl source. Whatever tcc 0.9.26 cannot compile gets recorded. +## 3. Copy the resulting skip list out to +## vendor/musl/skip-$ARCH.txt. +## +## boot5.sh then enumerates sources on the host and subtracts this +## list, emitting a flat sequential build script with no in-container +## branch on $TCC's exit code. +## +## Usage: bootprep/boot5-calibrate.sh <amd64|aarch64|riscv64> + +set -eu + +usage() { echo "usage: $0 <amd64|aarch64|riscv64>" >&2; exit 2; } +[ "$#" -eq 1 ] || usage +ARCH=$1 + +case "$ARCH" in + amd64) PLATFORM=linux/amd64; MUSL_ARCH=x86_64 ;; + aarch64) PLATFORM=linux/arm64; MUSL_ARCH=aarch64 ;; + riscv64) PLATFORM=linux/riscv64; MUSL_ARCH=riscv64 ;; + *) usage ;; +esac + +ROOT=$(cd "$(dirname "$0")/.." && pwd) +cd "$ROOT" + +IMAGE=boot2-scratch:$ARCH +BOOT4=build/$ARCH/boot4 +STAGE=build/$ARCH/.boot5-calibrate +MUSL_TARBALL=vendor/musl/1.2.5.tar.gz +MUSL_OVERRIDES=vendor/musl/overrides +MUSL_DELETES=vendor/musl/deletes.txt +MUSL_GENERATED=vendor/musl/generated/$MUSL_ARCH +BRIDGE_FILE=build/$ARCH/vendor/tcc/stdarg-bridge.h +SKIP_OUT=vendor/musl/skip-$ARCH.txt + +[ -x "$BOOT4/tcc3" ] || { echo "missing $BOOT4/tcc3 (run boot/boot4.sh $ARCH)" >&2; exit 1; } +[ -e "$BOOT4/libtcc1.a" ] || { echo "missing $BOOT4/libtcc1.a" >&2; exit 1; } +[ -e "$MUSL_TARBALL" ] || { echo "missing $MUSL_TARBALL" >&2; exit 1; } +[ -d "$MUSL_OVERRIDES" ] || { echo "missing $MUSL_OVERRIDES" >&2; exit 1; } +[ -e "$MUSL_DELETES" ] || { echo "missing $MUSL_DELETES" >&2; exit 1; } +[ -d "$MUSL_GENERATED" ] || { echo "missing $MUSL_GENERATED (run bootprep/musl-vendor.sh)" >&2; exit 1; } +[ -e "$BRIDGE_FILE" ] || { echo "missing $BRIDGE_FILE (run bootprep/stage1-flatten.sh)" >&2; exit 1; } + +if ! podman image exists "$IMAGE"; then + podman build --platform "$PLATFORM" -t "$IMAGE" \ + -f boot/containers/Containerfile.scratch boot/containers/ +fi + +rm -rf "$STAGE" +mkdir -p "$STAGE/in" "$STAGE/out" + +cp "$BOOT4/tcc3" "$STAGE/in/tcc" +cp "$BOOT4/libtcc1.a" "$STAGE/in/libtcc1.a" +tar xzf "$MUSL_TARBALL" -C "$STAGE/in/" +MUSL_DIR=$STAGE/in/musl-1.2.5 +cp -R "$MUSL_OVERRIDES/." "$MUSL_DIR/" +while read -r p; do + [ -n "$p" ] && rm -rf "$MUSL_DIR/$p" +done < "$MUSL_DELETES" +cp "$BRIDGE_FILE" "$STAGE/in/tcc-stdarg-bridge.h" +cp "$MUSL_GENERATED/alltypes.h" "$STAGE/in/musl-alltypes.h" +cp "$MUSL_GENERATED/syscall.h" "$STAGE/in/musl-syscall.h" + +echo "[calibrate $ARCH] running skip-on-fail compile loop in container" +podman run --rm -i --pull=never --platform "$PLATFORM" \ + --tmpfs /tmp:size=1024M \ + -e MUSL_ARCH="$MUSL_ARCH" \ + -v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \ + sh -eu -s <<'CONTAINER' +IN=/work/in +OUT=/work/out +TCC=$IN/tcc + +cd /tmp +cp -R "$IN/musl-1.2.5" . +cd musl-1.2.5 + +mkdir -p obj/include/bits obj/src/internal +cp $IN/musl-alltypes.h obj/include/bits/alltypes.h +cp $IN/musl-syscall.h obj/include/bits/syscall.h +echo '#define VERSION "1.2.5-tcc-boot5"' > obj/src/internal/version.h + +CFLAGS_BASE="-std=c99 -nostdinc -ffreestanding -fno-strict-aliasing + -D_XOPEN_SOURCE=700 + -I./arch/$MUSL_ARCH -I./arch/generic -Iobj/src/internal + -I./src/include -I./src/internal -Iobj/include -I./include + -O2 -fomit-frame-pointer + -Werror=implicit-function-declaration -Werror=implicit-int + -Werror=pointer-sign -Werror=pointer-arith" +CFLAGS_C="$CFLAGS_BASE -include $IN/tcc-stdarg-bridge.h" +CFLAGS_ASM="$CFLAGS_BASE" + +SRC_TOP="src/aio src/conf src/crypt src/ctype src/dirent + src/env src/errno src/exit src/fcntl src/fenv src/internal + src/ipc src/legacy src/linux src/locale src/malloc + src/malloc/mallocng src/math src/misc src/mman src/mq + src/multibyte src/network src/passwd src/prng src/process + src/regex src/sched src/search src/select src/setjmp src/signal + src/stat src/stdio src/stdlib src/string src/temp src/termios + src/thread src/time src/unistd" + +BASE_SRCS=""; ARCH_SRCS="" +for d in $SRC_TOP; do + [ -d "$d" ] || continue + for f in $d/*.c; do [ -f "$f" ] && BASE_SRCS="$BASE_SRCS $f"; done + for f in $d/$MUSL_ARCH/*.c $d/$MUSL_ARCH/*.s $d/$MUSL_ARCH/*.S; do + [ -f "$f" ] && ARCH_SRCS="$ARCH_SRCS $f" + done +done +REPLACED="" +for a in $ARCH_SRCS; do + p=${a%.*} + head=${p%%/${MUSL_ARCH}/*} + tail=${p#*/${MUSL_ARCH}/} + REPLACED="$REPLACED $head/$tail" +done +KEEP="" +for b in $BASE_SRCS; do + stem=${b%.c}; skip=0 + for r in $REPLACED; do [ "$stem" = "$r" ] && { skip=1; break; }; done + [ $skip -eq 0 ] && KEEP="$KEEP $b" +done +KEEP="$KEEP $ARCH_SRCS" + +mkdir -p obj/lib +n=0; n_ok=0; n_skip=0 +: >$OUT/skipped.txt +for src in $KEEP; do + obj="obj/${src%.*}.o" + mkdir -p "$(dirname $obj)" + case "$src" in + *.c) flags="$CFLAGS_C" ;; + *.s | *.S) flags="$CFLAGS_ASM" ;; + *) flags="$CFLAGS_C" ;; + esac + if $TCC $flags -c "$src" -o "$obj" >/tmp/compile.log 2>&1; then + n_ok=$((n_ok+1)) + else + n_skip=$((n_skip+1)) + echo "$src" >>$OUT/skipped.txt + fi + n=$((n+1)) + [ $((n % 200)) -eq 0 ] && echo " $n done (ok=$n_ok skip=$n_skip)" +done +echo " compiled=$n_ok skipped=$n_skip total=$n" +CONTAINER + +sort -u "$STAGE/out/skipped.txt" > "$SKIP_OUT" +echo "[calibrate $ARCH] wrote $SKIP_OUT ($(wc -l <"$SKIP_OUT") entries)" diff --git a/bootprep/include/stdarg.h b/bootprep/include/stdarg.h @@ -0,0 +1,39 @@ +/* boot2 stdarg.h — shadows mes/include/stdarg.h for both flatten + * paths (bootprep/{stage1,libc}-flatten.sh both have -I on the + * containing dir ahead of mes's include tree). Routes va_* through + * __builtin_va_*, so tcc.flat.c and libc.flat.c compile cleanly + * under both our cc.scm (which recognizes __builtin_va_list and + * __builtin_va_start/arg/end) and stock gcc/clang (where they're + * native). + * + * Mes's stdarg.h has a similar __builtin-routed branch but only + * activates under __riscv. We can't set -D __riscv at flatten time + * without also flipping setjmp.h and tcc-internal arch logic, so we + * shadow the whole header instead. + */ +#ifndef __MES_STDARG_H +#define __MES_STDARG_H 1 + +typedef __builtin_va_list va_list; + +#define va_start(v, l) __builtin_va_start((v), (l)) +#define va_end(v) __builtin_va_end((v)) +#define va_arg(v, t) __builtin_va_arg((v), t) +#define va_arg8(ap, type) va_arg((ap), type) +#define va_copy(d, s) __builtin_va_copy((d), (s)) + +/* mes/include/stdarg.h forward-declares the v* family here (instead + * of in <stdio.h>); tcc.c calls vsnprintf without ever including + * <stdio.h>, so dropping mes's stdarg.h in favor of this shim must + * still leak these prototypes. FILE and size_t come from a prior + * include in mes-libc TUs; tcc.c works because it includes + * <sys/types.h> for size_t and uses (FILE*) implicitly. */ +int vexec (char const *file_name, va_list ap); +int vfprintf (FILE *stream, char const *template, va_list ap); +int vfscanf (FILE *stream, char const *template, va_list ap); +int vprintf (char const *format, va_list ap); +int vsprintf (char *str, char const *format, va_list ap); +int vsnprintf(char *str, size_t size, char const *format, va_list ap); +int vsscanf (char const *s, char const *template, va_list ap); + +#endif /* __MES_STDARG_H */ diff --git a/bootprep/libc-flatten.sh b/bootprep/libc-flatten.sh @@ -0,0 +1,205 @@ +#!/bin/sh +## libc-flatten.sh — flatten the vendored mes-libc + boot2-syscall.c +## into a single libc.flat.c using the host preprocessor. Mirrors +## stage1-flatten.sh; runs on the host, no container — hence the +## non-`boot-` name (the convention in boot/ is that boot-*.sh +## runs inside the minimal container). +## +## Steps: +## 1. stage vendor/mes-libc → build/<arch>/vendor/mes-libc/libc-stage/ +## 2. apply simple-patches (literal-block replacement, idempotent) +## 3. HOST_CC -E -nostdinc -I staging/include … staging/unified-libc.c +## → build/<arch>/vendor/mes-libc/libc.flat.c +## +## Stage 4 (cc.scm libc.flat.c → libc.P1pp) is a separate Makefile rule +## that reuses scripts/boot-build-cc.sh inside the per-arch container. +## +## ARCH selects the boot2 target (aarch64/amd64/riscv64). MES_ARCH is +## the mes header tree we hand the host preprocessor; mes ships +## x86_64/riscv64 only, so aarch64 builds borrow riscv64's headers (the +## resulting libc.flat.c references no SYS_* / kernel-stat fields, so +## the choice only affects type widths, all 64-bit Linux-identical). +## +## Usage: bootprep/libc-flatten.sh [--arch <aarch64|amd64|riscv64>] + +set -eu + +ARCH=aarch64 +while [ $# -gt 0 ]; do + case "$1" in + --arch) ARCH=$2; shift 2 ;; + -h|--help) awk '/^##/ { sub(/^## ?/, ""); print }' "$0"; exit 0 ;; + *) echo "unknown arg: $1" >&2; exit 2 ;; + esac +done + +case "$ARCH" in + aarch64) MES_ARCH=riscv64 ;; + amd64) MES_ARCH=x86_64 ;; + riscv64) MES_ARCH=riscv64 ;; + *) echo "unknown ARCH: $ARCH" >&2; exit 2 ;; +esac + +ROOT=$(cd "$(dirname "$0")/.." && pwd) +VENDOR=$ROOT/vendor/mes-libc +WORK=$ROOT/build/$ARCH/vendor/mes-libc +STAGE=$WORK/libc-stage +FLAT=$WORK/libc.flat.c + +[ -d "$VENDOR" ] || { echo "missing $VENDOR" >&2; exit 1; } +[ -d "$VENDOR/include" ] || { echo "missing $VENDOR/include" >&2; exit 1; } +[ -d "$VENDOR/include/linux/$MES_ARCH" ] \ + || { echo "missing $VENDOR/include/linux/$MES_ARCH" >&2; exit 1; } + +# --- (1) stage -------------------------------------------------------- +mkdir -p "$WORK" +rm -rf "$STAGE" +mkdir -p "$STAGE" +# cp -R copies symlinks as files; staging is our writable scratch. +cp -R "$VENDOR/." "$STAGE/" + +# mes's sys/stat.h, signal.h, dirent.h reach for <arch/kernel-stat.h> +# and similar; the per-arch tree under include/linux/<MES_ARCH>/ is what +# they want. Copy the per-arch tree into include/arch so the unprefixed +# `arch/...` includes resolve. (cp -R, not ln -sfn — keeps the dep set +# down to coreutils we already use.) +cp -R "$STAGE/include/linux/$MES_ARCH" "$STAGE/include/arch" + + +# --- (2) patches ------------------------------------------------------ +# Same literal-block replacer as stage1-flatten.sh apply_simple_patch. +apply_simple_patch() { + target=$1; before=$2; after=$3 + [ -r "$target" ] || { echo "patch target missing: $target" >&2; exit 1; } + [ -r "$before" ] || { echo "patch before missing: $before" >&2; exit 1; } + [ -r "$after" ] || { echo "patch after missing: $after" >&2; exit 1; } + awk -v BFILE="$before" -v AFILE="$after" ' + BEGIN { + while ((getline line < BFILE) > 0) bef = bef line "\n"; + close(BFILE); + while ((getline line < AFILE) > 0) aft = aft line "\n"; + close(AFILE); + } + { src = src $0 "\n" } + END { + if (index(src, aft) > 0) { + printf "%s", src; + exit 0; + } + i = index(src, bef); + if (i == 0) { print "patch did not match" > "/dev/stderr"; exit 1 } + printf "%s%s%s", + substr(src, 1, i - 1), + aft, + substr(src, i + length(bef)); + } + ' "$target" > "$target.new" + mv "$target.new" "$target" +} + +PATCHES=$STAGE/patches +apply_simple_patch \ + "$STAGE/linux/malloc.c" \ + "$PATCHES/malloc-max-align.before" \ + "$PATCHES/malloc-max-align.after" +apply_simple_patch \ + "$STAGE/linux/malloc.c" \ + "$PATCHES/malloc-brk-check.before" \ + "$PATCHES/malloc-brk-check.after" +apply_simple_patch \ + "$STAGE/string/strstr.c" \ + "$PATCHES/strstr-drop-mman.before" \ + "$PATCHES/strstr-drop-mman.after" +apply_simple_patch \ + "$STAGE/include/mes/lib-mini.h" \ + "$PATCHES/libmini-write-proto.before" \ + "$PATCHES/libmini-write-proto.after" +apply_simple_patch \ + "$STAGE/include/mes/lib-mini.h" \ + "$PATCHES/libmini-write-proto2.before" \ + "$PATCHES/libmini-write-proto2.after" +apply_simple_patch \ + "$STAGE/include/mes/lib.h" \ + "$PATCHES/lib-mes-debug-proto.before" \ + "$PATCHES/lib-mes-debug-proto.after" +apply_simple_patch \ + "$STAGE/mes/ntoab.c" \ + "$PATCHES/ntoab-inline-defined.before" \ + "$PATCHES/ntoab-inline-defined.after" +# stdio/{printf,sprintf,snprintf}.c carry a mes-mescc-specific +# `ap += (__FOO_VARARGS + ...)` block guarded by `__GNUC__ && __x86_64__`. +# That arithmetic is meaningful only inside mes's compiler; under stock +# gcc preprocessing for amd64 it expands to a reference to an undefined +# `__FOO_VARARGS` and breaks cc.scm. Strip the block — the va_start that +# follows handles varargs correctly under any standard C compiler. +apply_simple_patch \ + "$STAGE/stdio/printf.c" \ + "$PATCHES/printf-mes-varargs.before" \ + "$PATCHES/printf-mes-varargs.after" +apply_simple_patch \ + "$STAGE/stdio/sprintf.c" \ + "$PATCHES/sprintf-mes-varargs.before" \ + "$PATCHES/sprintf-mes-varargs.after" +apply_simple_patch \ + "$STAGE/stdio/snprintf.c" \ + "$PATCHES/snprintf-mes-varargs.before" \ + "$PATCHES/snprintf-mes-varargs.after" +# stdio/vfprintf.c and stdio/vsnprintf.c read every integer / char +# variadic via `va_arg(ap, long)`. On amd64 SysV an `int` arg occupies +# an 8-byte reg-save slot whose upper 32 bits are unspecified — tcc's +# codegen (and most other compilers') doesn't sign-extend ints into +# the slot. Reading as `long` then leaks the garbage upper bits. Track +# the `l` length modifier and dispatch the va_arg type accordingly. +apply_simple_patch \ + "$STAGE/stdio/vfprintf.c" \ + "$PATCHES/printf-int-promo.before" \ + "$PATCHES/printf-int-promo.after" +apply_simple_patch \ + "$STAGE/stdio/vsnprintf.c" \ + "$PATCHES/vsnprintf-int-promo.before" \ + "$PATCHES/vsnprintf-int-promo.after" +# --- (3) flatten via host preprocessor -------------------------------- +HOST_CC=${HOST_CC:-cc} + +# Bridge file: post-patch tcc <stdarg.h>. Written by stage1-flatten.sh, +# which boot3.sh / Makefile run first. Required so we can prepend the +# per-arch va_list typedef + __builtin_va_* → tcc __va_* mapping into +# libc.flat.c, eliminating the need for `-I /work/in/tcc-include +# -include /work/in/tcc-include/stdarg.h` on every in-container compile. +BRIDGE=$ROOT/build/$ARCH/vendor/tcc/stdarg-bridge.h +[ -e "$BRIDGE" ] || { echo "missing $BRIDGE — run bootprep/stage1-flatten.sh first" >&2; exit 1; } + +# -I order matters: bootprep/include first so our stdarg.h shim +# (routes va_* through __builtin_va_*; see comment in that file) wins +# over mes's. Then $STAGE/include for everything else — <signal.h>, +# <stdio.h>, etc. hit the canonical mes/include versions; arch/<…> +# resolves through the include/arch symlink to include/linux/$MES_ARCH. +# Putting the per-arch directory ahead of include/ makes <signal.h> +# resolve to the partial arch-specific snippet (no stack_t typedef etc) +# and the build breaks. +"$HOST_CC" -E -P \ + -nostdinc \ + -I "$ROOT/bootprep/include" \ + -I "$STAGE/include" \ + -I "$STAGE" \ + -D HAVE_CONFIG_H=0 \ + -D __linux__=1 \ + -D __${MES_ARCH}__=1 \ + -D __riscv_xlen=64 \ + -D inline= \ + "$STAGE/unified-libc.c" > "$FLAT.body" + +# Prepend the bridge, guarded by !CCSCM (cc.scm predefines CCSCM and +# handles __builtin_va_* natively, so it must skip this block). Under +# tcc, the per-arch #ifdefs inside the bridge resolve and provide the +# va_list typedef + __builtin_va_* → tcc native __va_* macros. +{ + echo '#ifndef CCSCM' + cat "$BRIDGE" + echo '#endif' + cat "$FLAT.body" +} > "$FLAT" +rm -f "$FLAT.body" + +BYTES=$(wc -c < "$FLAT") +echo "produced $FLAT ($BYTES bytes)" diff --git a/scripts/mkalltypes.awk b/bootprep/mkalltypes.awk diff --git a/bootprep/musl-vendor.sh b/bootprep/musl-vendor.sh @@ -0,0 +1,172 @@ +#!/bin/sh +## musl-vendor.sh — regenerate vendor/musl/overrides/ and +## musl-1.2.5-deletes.txt from the upstream tarball + tcc-compat patch. +## +## NOT on the boot.sh path. This is a vendoring helper run on a dev host +## any time vendor/musl/patches/tcc.patch changes; it requires +## the host's `patch` binary. The output it produces (the overrides +## directory tree + the deletes list) is what boot5.sh consumes — boot5 +## itself never invokes `patch`. +## +## What the script does: +## 1. Extract a pristine copy of musl-1.2.5.tar.gz to a scratch dir. +## 2. Apply musl-1.2.5-tcc.patch. +## 3. For every file the patch touched: +## - if non-empty in the patched tree → vendor it into +## musl-1.2.5-overrides/<path>, mirroring the musl-1.2.5/ +## subtree layout. +## - if 0-byte (the patch's deleted-file marker) → record the +## path in musl-1.2.5-deletes.txt. +## 4. Append the per-arch sweep deletes (aarch64+riscv64 overrides +## arm64-asm.c phase 1+2 / riscv64-asm.c can't yet handle). +## 5. Sort + dedupe the deletes list. +## +## Boot4 then mirrors this state at build time without `patch`: +## cp -R overrides/. musl-1.2.5/ +## while read p; do rm -rf "musl-1.2.5/$p"; done < deletes.txt + +set -eu + +ROOT=$(cd "$(dirname "$0")/.." && pwd) +cd "$ROOT" + +TARBALL=vendor/musl/1.2.5.tar.gz +PATCH_FILE=vendor/musl/patches/tcc.patch +OVERRIDES=vendor/musl/overrides +DELETES=vendor/musl/deletes.txt +GENERATED=vendor/musl/generated +MKALLTYPES_AWK=bootprep/mkalltypes.awk + +[ -e "$TARBALL" ] || { echo "missing $TARBALL" >&2; exit 1; } +[ -e "$PATCH_FILE" ] || { echo "missing $PATCH_FILE" >&2; exit 1; } +command -v patch >/dev/null || { echo "host patch not found" >&2; exit 1; } + +WORK=$(mktemp -d) +trap 'rm -rf "$WORK"' EXIT + +# (1) extract +tar xzf "$TARBALL" -C "$WORK" +SRC=$WORK/musl-1.2.5 +[ -d "$SRC" ] || { echo "tarball did not produce musl-1.2.5/"; exit 1; } + +# (2) apply patch (cwd = parent of musl-1.2.5/, -p1 strips the a/ prefix) +( cd "$WORK" && patch -p1 < "$ROOT/$PATCH_FILE" ) >"$WORK/patch.log" 2>&1 \ + || { tail -40 "$WORK/patch.log" >&2; exit 1; } + +# (3) mirror touched files into overrides/, record empty ones as deletes +rm -rf "$OVERRIDES" +mkdir -p "$OVERRIDES" +: > "$DELETES" + +# Each diff in the patch starts with `diff -urN ... patched_musl/musl-1.2.5/<rel>`. +# Strip the "patched_musl/musl-1.2.5/" prefix to get a path under SRC. +awk '/^diff -urN/ { + sub(/^patched_musl\/musl-1.2.5\//, "", $4); print $4 + }' "$PATCH_FILE" | +while read -r rel; do + f=$SRC/$rel + if [ -s "$f" ]; then + mkdir -p "$OVERRIDES/$(dirname "$rel")" + cp "$f" "$OVERRIDES/$rel" + else + echo "$rel" >> "$DELETES" + fi +done + +# (4) per-arch sweep: aarch64 + riscv64 files boot5 can't compile yet. +# Globs are expanded against the upstream tree, so every entry is a +# concrete file path (no glob in the deletes list itself). +for f in "$SRC"/src/math/aarch64/*.c; do + [ -e "$f" ] && echo "${f#$SRC/}" >> "$DELETES" +done +for f in "$SRC"/src/math/riscv64/*.c; do + [ -e "$f" ] && echo "${f#$SRC/}" >> "$DELETES" +done +{ + echo src/string/aarch64/memset.S + echo src/string/aarch64/memcpy.S + echo src/fenv/aarch64/fenv.s + echo src/thread/aarch64/clone.s + echo src/thread/aarch64/syscall_cp.s + echo src/thread/aarch64/__unmapself.s + echo src/setjmp/aarch64/setjmp.s + echo src/setjmp/aarch64/longjmp.s + echo src/signal/aarch64/sigsetjmp.s + echo src/fenv/riscv64/fenv.S + echo src/fenv/riscv64/fenv-sf.c + echo src/setjmp/riscv64/setjmp.S + echo src/setjmp/riscv64/longjmp.S + echo src/signal/riscv64/sigsetjmp.s + echo src/signal/riscv64/restore.s + echo src/thread/riscv64/clone.s + echo src/thread/riscv64/syscall_cp.s + echo src/thread/riscv64/__unmapself.s + echo src/process/riscv64/vfork.s + # src/complex/ is gutted by the patch (every .c file becomes 0-byte + # in the patched tree, so it's already in DELETES). The dir entry + # cleans up the empty directory itself. + echo src/complex +} >> "$DELETES" + +# (5) sort + dedupe +sort -u "$DELETES" -o "$DELETES" + +# (6) pre-generate per-arch alltypes.h + syscall.h. +# These are deterministic given the upstream tree + chosen arch (musl's +# Makefile runs the same two transformations at build time). Vendoring +# them lets the boot5 container drop awk entirely — it just `cp`s the +# right file in. mkalltypes runs on the post-overrides tree (overrides +# don't touch alltypes.h.in, but applying them keeps the procedure +# coherent). Apply overrides + deletes to a fresh post-patch copy and +# generate from there. +POST=$WORK/post +mkdir -p "$POST" +cp -R "$SRC" "$POST/" +cp -R "$ROOT/$OVERRIDES/." "$POST/musl-1.2.5/" +while read -r p; do + [ -n "$p" ] && rm -rf "$POST/musl-1.2.5/$p" +done < "$ROOT/$DELETES" + +rm -rf "$ROOT/$GENERATED" +for MARCH in aarch64 x86_64 riscv64; do + out=$ROOT/$GENERATED/$MARCH + mkdir -p "$out" + # Post-process: simplify struct timespec to drop the trailing + # zero-width bitfield (see comment above the awk pipeline below). + awk -f "$ROOT/$MKALLTYPES_AWK" \ + "$POST/musl-1.2.5/arch/$MARCH/bits/alltypes.h.in" \ + "$POST/musl-1.2.5/include/alltypes.h.in" \ + | awk ' + /^struct timespec \{.*tv_nsec/ { + print "struct timespec { time_t tv_sec; long tv_nsec; };" + next + } + { print } + ' > "$out/alltypes.h" + # Why the post-process: tcc 0.9.26 emits "will touch memory past + # end of the struct (internal limitation)" for any struct ending + # in a zero-width bitfield. The musl alltypes.h definition wraps + # tv_nsec in two bitfield-padding tricks for 32-bit-time_t arches: + # + # struct timespec { time_t tv_sec; + # int :8*(sizeof(time_t)-sizeof(long))*(__BYTE_ORDER==4321); + # long tv_nsec; + # int :8*(sizeof(time_t)-sizeof(long))*(__BYTE_ORDER!=4321); + # }; + # + # On all three boot5 arches sizeof(time_t)==sizeof(long)==8, so + # both bitfields are 0 width and the layout is identical to the + # simple two-field form. The leading bitfield does not warn (a + # sibling follows); the trailing one does, 387 times per build. + cp "$POST/musl-1.2.5/arch/$MARCH/bits/syscall.h.in" "$out/syscall.h" + awk 'sub(/__NR_/, "SYS_") { print }' \ + < "$POST/musl-1.2.5/arch/$MARCH/bits/syscall.h.in" \ + >> "$out/syscall.h" +done + +n_files=$(find "$OVERRIDES" -type f | wc -l) +n_dels=$(wc -l < "$DELETES") +n_gen=$(find "$GENERATED" -type f | wc -l) +echo "musl-vendor: overrides=$n_files deletes=$n_dels generated=$n_gen" +echo " $OVERRIDES" +echo " $DELETES" diff --git a/bootprep/prep-musl.sh b/bootprep/prep-musl.sh @@ -0,0 +1,73 @@ +#!/bin/sh +## prep-musl.sh — A0b: apply the per-arch musl skip filter on top of +## build/<arch>/src/src/musl/. +## +## prep-src.sh (A0a) leaves the musl tree at build/<arch>/src/src/musl/ +## with overrides merged, deletes applied, and pre-generated alltypes.h +## / syscall.h dropped in. boot5-calibrate.sh's per-arch skip list (the +## set of musl translation units tcc 0.9.26 cannot compile) needs a +## working tcc3, so it can't be folded into A0a. +## +## A0b is a single transform: read the skip list (committed or freshly +## calibrated), copy it into the canonical tree as skip.txt, and remove +## every listed path from src/musl/. After A0b the tree is the exact +## set of files boot5 will compile — no skip enumeration at boot time. +## +## Skip-list source policy: +## - if vendor/musl/skip-<arch>.txt exists, use it +## verbatim (the common case — calibrations are committed). +## - else run bootprep/boot5-calibrate.sh <arch>, which itself depends +## on boot4/tcc3. The script writes the committed file for us. +## +## Usage: bootprep/prep-musl.sh <arch> +## <arch> ∈ {aarch64, amd64, riscv64} + +set -eu + +. boot/lib-arch.sh +bootlib_init prep-musl "${1:-}" + +DST=$ROOT/build/$ARCH/src +DST_MUSL=$DST/src/musl +SKIP_COMMITTED=vendor/musl/skip-$ARCH.txt + +TAG="[$BOOT_TAG]" + +[ -d "$DST_MUSL" ] || { + echo "$TAG missing $DST_MUSL — run bootprep/prep-src.sh $ARCH first" >&2 + exit 1 +} + +# ── (1) materialize the skip list ───────────────────────────────────── +if [ ! -e "$SKIP_COMMITTED" ]; then + echo "$TAG no committed skip list at $SKIP_COMMITTED — calibrating" + bootprep/boot5-calibrate.sh "$ARCH" + [ -e "$SKIP_COMMITTED" ] || { + echo "$TAG calibration did not produce $SKIP_COMMITTED" >&2 + exit 1 + } +fi +cp "$SKIP_COMMITTED" "$DST_MUSL/skip.txt" + +# ── (2) apply filter — drop every listed path from src/musl/ ────────── +n_skip=0 +n_missing=0 +while read -r rel; do + [ -n "$rel" ] || continue + case "$rel" in + \#*) continue ;; + esac + if [ -e "$DST_MUSL/$rel" ]; then + rm -rf "$DST_MUSL/$rel" + n_skip=$((n_skip + 1)) + else + n_missing=$((n_missing + 1)) + fi +done < "$DST_MUSL/skip.txt" + +if [ "$n_missing" -gt 0 ]; then + echo "$TAG WARN: $n_missing skip-list entries were not present in $DST_MUSL" >&2 +fi + +n_remaining=$(find "$DST_MUSL" -type f | wc -l | tr -d ' ') +echo "$TAG OK filtered=$n_skip remaining=$n_remaining files in $DST_MUSL" diff --git a/bootprep/prep-src.sh b/bootprep/prep-src.sh @@ -0,0 +1,196 @@ +#!/bin/sh +## prep-src.sh — A0a: build the canonical generated source tree. +## +## All host-side source preparation happens once, up front, into a +## single canonical tree at build/<arch>/src/. This tree is the audit +## basis and the only thing boot stages should read for source. Boot +## stages do no flattening, no unpacking, no patching, no calibration. +## +## Layout produced (see docs/PLAN.md §A0): +## build/<arch>/src/ +## bin/ binary inputs not built by a stage +## hex0-seed vendored seed only +## src/ everything textual +## vendor-seed/ ELF.hex2 + *.hex0|*.hex1|*.hex2 +## M1pp/ M1pp.P1 +## hex2pp/ hex2pp.P1 +## P1/ P1*.{M1,M1pp,P1pp}, entry-*.P1pp, +## elf-end.P1pp +## catm/ catm.P1pp +## scheme1/ scheme1.P1pp, prelude.scm +## cc/ cc.scm, main.scm +## tcc/ tcc.flat.c, stdarg-bridge.h, plus +## tcc-0.9.26-1147-gee75a10c/{include,lib} +## tcc/libc/$ARCH/ start.S, sys_stubs.S +## tcc/cc/ mem.c (memcpy/memmove/memset/memcmp) +## libc/ libc.flat.c (mes-libc flattened) +## musl/ filtered musl-1.2.5 tree (overrides +## merged, deletes applied, generated +## alltypes.h/syscall.h dropped in). +## prep-musl.sh applies the per-arch +## skip filter on top. +## kernel/ seed-kernel sources for this arch +## test-fixtures/ boot-hello.c smoke binary +## +## A0 is split: prep-src.sh runs before boot0 and produces everything +## that doesn't need a working compiler. prep-musl.sh runs after boot4 +## (or copies the committed skip list) and applies the calibration +## filter on top of src/musl/. +## +## Usage: bootprep/prep-src.sh <arch> +## <arch> ∈ {aarch64, amd64, riscv64} + +set -eu + +. boot/lib-arch.sh +bootlib_init prep-src "${1:-}" + +DST=$ROOT/build/$ARCH/src +DST_BIN=$DST/bin +DST_SRC=$DST/src + +TAG="[$BOOT_TAG]" + +# ── (0) reset destination ───────────────────────────────────────────── +rm -rf "$DST" +mkdir -p "$DST_BIN" "$DST_SRC" + +# ── (1) vendored seed (pre-built binary + textual sources) ──────────── +SEED=vendor/seed/$ARCH +[ -d "$SEED" ] || { echo "$TAG missing $SEED" >&2; exit 1; } + +cp "$SEED/hex0-seed" "$DST_BIN/hex0-seed" + +mkdir -p "$DST_SRC/vendor-seed" +for f in ELF.hex2 hex0.hex0 hex1.hex0 hex2.hex1 catm.hex2 M0.hex2; do + [ -e "$SEED/$f" ] || { echo "$TAG missing $SEED/$f" >&2; exit 1; } + cp "$SEED/$f" "$DST_SRC/vendor-seed/$f" +done + +# ── (2) repo-tree textual sources ───────────────────────────────────── +mkdir -p "$DST_SRC/M1pp" +cp M1pp/M1pp.P1 "$DST_SRC/M1pp/M1pp.P1" + +mkdir -p "$DST_SRC/hex2pp" +cp hex2pp/hex2pp.P1 "$DST_SRC/hex2pp/hex2pp.P1" + +mkdir -p "$DST_SRC/P1" +cp "P1/P1.M1pp" "$DST_SRC/P1/P1.M1pp" +cp "P1/P1-$ARCH.M1" "$DST_SRC/P1/P1-$ARCH.M1" +cp "P1/P1-$ARCH.M1pp" "$DST_SRC/P1/P1-$ARCH.M1pp" +cp "P1/P1pp.P1pp" "$DST_SRC/P1/P1pp.P1pp" +cp "P1/entry-libc.P1pp" "$DST_SRC/P1/entry-libc.P1pp" +cp "P1/entry-plain.P1pp" "$DST_SRC/P1/entry-plain.P1pp" +cp "P1/elf-end.P1pp" "$DST_SRC/P1/elf-end.P1pp" + +mkdir -p "$DST_SRC/catm" +cp catm/catm.P1pp "$DST_SRC/catm/catm.P1pp" + +mkdir -p "$DST_SRC/scheme1" +cp scheme1/scheme1.P1pp "$DST_SRC/scheme1/scheme1.P1pp" +cp scheme1/prelude.scm "$DST_SRC/scheme1/prelude.scm" + +mkdir -p "$DST_SRC/cc" +cp cc/cc.scm "$DST_SRC/cc/cc.scm" +cp cc/main.scm "$DST_SRC/cc/main.scm" + +# tcc-libc: per-arch _start + sys_* wrappers consumed by boot4. +mkdir -p "$DST_SRC/tcc/libc/$ARCH" +cp "tcc/libc/$ARCH/start.S" "$DST_SRC/tcc/libc/$ARCH/start.S" +cp "tcc/libc/$ARCH/sys_stubs.S" "$DST_SRC/tcc/libc/$ARCH/sys_stubs.S" + +# tcc-cc: tiny mem helpers consumed by boot4 + boot6. +mkdir -p "$DST_SRC/tcc/cc" +cp tcc/cc/mem.c "$DST_SRC/tcc/cc/mem.c" + +# Smoke binary linked by boot4 + boot5. +mkdir -p "$DST_SRC/test-fixtures" +cp scripts/boot-hello.c "$DST_SRC/test-fixtures/boot-hello.c" + +# ── (3) seed-kernel sources for this arch ───────────────────────────── +mkdir -p "$DST_SRC/kernel/arch/$ARCH" "$DST_SRC/kernel/user" +cp seed-kernel/kernel.c "$DST_SRC/kernel/kernel.c" +for f in seed-kernel/arch/$ARCH/*; do + [ -f "$f" ] || continue + cp "$f" "$DST_SRC/kernel/arch/$ARCH/$(basename "$f")" +done +for f in seed-kernel/user/*; do + [ -f "$f" ] || continue + cp "$f" "$DST_SRC/kernel/user/$(basename "$f")" +done + +# ── (4) tcc flatten ─────────────────────────────────────────────────── +# stage1-flatten.sh writes to build/<arch>/vendor/tcc/. Run it (it's +# idempotent) and mirror the relevant artifacts into src/tcc/. +echo "$TAG flatten tcc.flat.c (host)" +bootprep/stage1-flatten.sh --arch "$ARCH" + +TCC_VENDOR=$ROOT/build/$ARCH/vendor/tcc +TCC_PKG=tcc-0.9.26-1147-gee75a10c +[ -e "$TCC_VENDOR/tcc.flat.c" ] || { echo "$TAG flatten produced no tcc.flat.c" >&2; exit 1; } +[ -e "$TCC_VENDOR/stdarg-bridge.h" ] || { echo "$TAG flatten produced no stdarg-bridge.h" >&2; exit 1; } +[ -d "$TCC_VENDOR/$TCC_PKG/include" ] || { echo "$TAG flatten produced no $TCC_PKG/include" >&2; exit 1; } +[ -d "$TCC_VENDOR/$TCC_PKG/lib" ] || { echo "$TAG flatten produced no $TCC_PKG/lib" >&2; exit 1; } + +mkdir -p "$DST_SRC/tcc" +cp "$TCC_VENDOR/tcc.flat.c" "$DST_SRC/tcc/tcc.flat.c" +cp "$TCC_VENDOR/stdarg-bridge.h" "$DST_SRC/tcc/stdarg-bridge.h" +mkdir -p "$DST_SRC/tcc/$TCC_PKG" +cp -R "$TCC_VENDOR/$TCC_PKG/include" "$DST_SRC/tcc/$TCC_PKG/include" +cp -R "$TCC_VENDOR/$TCC_PKG/lib" "$DST_SRC/tcc/$TCC_PKG/lib" + +# ── (5) mes-libc flatten ────────────────────────────────────────────── +echo "$TAG flatten libc.flat.c (host)" +bootprep/libc-flatten.sh --arch "$ARCH" + +LIBC_VENDOR=$ROOT/build/$ARCH/vendor/mes-libc +[ -e "$LIBC_VENDOR/libc.flat.c" ] || { echo "$TAG flatten produced no libc.flat.c" >&2; exit 1; } + +mkdir -p "$DST_SRC/libc" +cp "$LIBC_VENDOR/libc.flat.c" "$DST_SRC/libc/libc.flat.c" + +# ── (6) musl unpack + overrides + deletes + generated headers ───────── +MUSL_TARBALL=vendor/musl/1.2.5.tar.gz +MUSL_OVERRIDES=vendor/musl/overrides +MUSL_DELETES=vendor/musl/deletes.txt +MUSL_GENERATED=vendor/musl/generated/$MUSL_ARCH + +[ -e "$MUSL_TARBALL" ] || { echo "$TAG missing $MUSL_TARBALL" >&2; exit 1; } +[ -d "$MUSL_OVERRIDES" ] || { echo "$TAG missing $MUSL_OVERRIDES" >&2; exit 1; } +[ -e "$MUSL_DELETES" ] || { echo "$TAG missing $MUSL_DELETES" >&2; exit 1; } +[ -d "$MUSL_GENERATED" ] || { echo "$TAG missing $MUSL_GENERATED (run bootprep/musl-vendor.sh)" >&2; exit 1; } + +echo "$TAG unpack musl-1.2.5 + apply overrides/deletes" +MUSL_TMP=$(mktemp -d) +trap 'rm -rf "$MUSL_TMP"' EXIT +tar xzf "$MUSL_TARBALL" -C "$MUSL_TMP" +[ -d "$MUSL_TMP/musl-1.2.5" ] || { echo "$TAG musl tarball did not unpack to musl-1.2.5/" >&2; exit 1; } + +cp -R "$MUSL_OVERRIDES/." "$MUSL_TMP/musl-1.2.5/" +while read -r p; do + [ -n "$p" ] && rm -rf "$MUSL_TMP/musl-1.2.5/$p" +done < "$MUSL_DELETES" + +# Drop pre-generated arch headers + version.h into the same obj/ layout +# boot5 expects. +mkdir -p "$MUSL_TMP/musl-1.2.5/obj/include/bits" \ + "$MUSL_TMP/musl-1.2.5/obj/src/internal" +cp "$MUSL_GENERATED/alltypes.h" "$MUSL_TMP/musl-1.2.5/obj/include/bits/alltypes.h" +cp "$MUSL_GENERATED/syscall.h" "$MUSL_TMP/musl-1.2.5/obj/include/bits/syscall.h" +echo '#define VERSION "1.2.5-tcc-boot5"' > "$MUSL_TMP/musl-1.2.5/obj/src/internal/version.h" + +mkdir -p "$DST_SRC/musl" +# Move into place — the canonical tree owns this from now on. +( cd "$MUSL_TMP/musl-1.2.5" && tar cf - . ) | ( cd "$DST_SRC/musl" && tar xf - ) + +# Seed src/musl/skip.txt with the committed skip list when one exists, +# so the canonical tree carries metadata even before prep-musl.sh +# applies the filter. prep-musl.sh refreshes/regenerates this. +SKIP_COMMITTED=vendor/musl/skip-$ARCH.txt +if [ -e "$SKIP_COMMITTED" ]; then + cp "$SKIP_COMMITTED" "$DST_SRC/musl/skip.txt" +fi + +# ── summary ─────────────────────────────────────────────────────────── +n_files=$(find "$DST" -type f | wc -l | tr -d ' ') +echo "$TAG OK -> $DST ($n_files files)" diff --git a/scripts/prune-p1-table.sh b/bootprep/prune-p1-table.sh diff --git a/bootprep/stage1-flatten.sh b/bootprep/stage1-flatten.sh @@ -0,0 +1,374 @@ +#!/bin/sh +## bootprep/stage1-flatten.sh — flatten upstream tcc-0.9.26 into a single +## C bytestream (tcc.flat.c) using only the host preprocessor. +## +## This is the first of three stages building tcc-0.9.26 without +## M2-Planet, MesCC, or Mes Scheme. See docs/TCC.md. +## +## Stages: +## 1. unpack tcc-0.9.26-1147-gee75a10c.tar.gz +## 2. apply live-bootstrap simple-patches (tcctools.c file-open reorder) +## 3. host cc -E -nostdinc with mes-bundled headers + tcc-mes defines +## 4. emit build/<arch>/vendor/tcc/tcc.flat.c +## 5. (--verify) compile tcc.flat.c with host cc to confirm well-formedness +## +## Stage 1 deliberately stays on the host: it is just text manipulation +## (preprocess + concat) and the resulting tcc.flat.c is a portable +## artifact downstream stages consume. No container needed. +## +## Usage: +## bootprep/stage1-flatten.sh [--arch <X86_64|I386|RISCV64|ARM64|AARCH64>] [--verify] + +set -eu + +# --- arg parse -------------------------------------------------------- +ARCH=X86_64 +VERIFY=0 +while [ $# -gt 0 ]; do + case "$1" in + --arch) ARCH=$2; shift 2 ;; + --verify) VERIFY=1; shift ;; + -h|--help) awk '/^##/ { sub(/^## ?/, ""); print }' "$0"; exit 0 ;; + *) echo "unknown arg: $1" >&2; exit 2 ;; + esac +done + +case "$ARCH" in + X86_64|x86_64|amd64) BOOT_ARCH=amd64; MES_ARCH=x86_64; HAVE_LL=1; TCC_TARGET_DEFINE=X86_64; CPP_ARCH=x86_64 ;; + I386|i386) BOOT_ARCH=i386; MES_ARCH=x86; HAVE_LL=0; TCC_TARGET_DEFINE=I386; CPP_ARCH=x86 ;; + RISCV64|riscv64) BOOT_ARCH=riscv64; MES_ARCH=riscv64; HAVE_LL=1; TCC_TARGET_DEFINE=RISCV64; CPP_ARCH=riscv64 ;; + ARM64|arm64|AARCH64|aarch64) + BOOT_ARCH=aarch64; MES_ARCH=riscv64; HAVE_LL=1; TCC_TARGET_DEFINE=ARM64; CPP_ARCH=aarch64 ;; + *) echo "unknown ARCH: $ARCH" >&2; exit 2 ;; +esac + +# --- paths ------------------------------------------------------------ +# Everything used by this script is in-tree under $ROOT. No reach into +# sibling repos. +# +# vendor/tcc/0.9.26.tar.gz — pristine upstream tarball +# vendor/tcc/patches-lb/ — live-bootstrap's tcc +# simple-patches, copied in +# for in-tree builds +# vendor/tcc/patches/ — our own tcc patches +# vendor/mes-libc/include/ — vendored mes-libc headers +# (byte-identical to upstream +# mes/include) +# bootprep/include/ — our own header shim, wins +# -I priority for stdarg.h +ROOT=$(cd "$(dirname "$0")/.." && pwd) +WORK=$ROOT/build/$BOOT_ARCH/vendor/tcc +DISTFILES=$ROOT/vendor/tcc +LB_PATCHES=$ROOT/vendor/tcc/patches-lb +OUR_PATCHES=$ROOT/vendor/tcc/patches +MES_INCLUDE=$ROOT/vendor/mes-libc/include +MES_INCLUDE_LINUX=$MES_INCLUDE/linux/$MES_ARCH + +TCC_TAR=$DISTFILES/0.9.26.tar.gz +TCC_PKG=tcc-0.9.26-1147-gee75a10c + +[ -r "$TCC_TAR" ] || { echo "missing $TCC_TAR" >&2; exit 1; } +[ -d "$LB_PATCHES" ] || { echo "missing $LB_PATCHES" >&2; exit 1; } +[ -d "$OUR_PATCHES" ] || { echo "missing $OUR_PATCHES" >&2; exit 1; } +[ -d "$MES_INCLUDE" ] || { echo "missing $MES_INCLUDE" >&2; exit 1; } +[ -d "$MES_INCLUDE_LINUX" ] || { echo "missing $MES_INCLUDE_LINUX" >&2; exit 1; } + +# --- (1) unpack ------------------------------------------------------- +mkdir -p "$WORK" +rm -rf "$WORK/$TCC_PKG" +tar -xzf "$TCC_TAR" -C "$WORK" + +SRC=$WORK/$TCC_PKG + +# --- (2) simple-patches ---------------------------------------------- +# Both patches edit tcctools.c. The pair (remove-fileopen, addback-fileopen) +# moves a fopen() block earlier in the function. We implement live-bootstrap's +# simple-patch as an awk literal-block replacer; no binary dep. +apply_simple_patch() { + target=$1; before=$2; after=$3 + [ -r "$target" ] || { echo "patch target missing: $target" >&2; exit 1; } + [ -r "$before" ] || { echo "patch before missing: $before" >&2; exit 1; } + [ -r "$after" ] || { echo "patch after missing: $after" >&2; exit 1; } + awk -v BFILE="$before" -v AFILE="$after" ' + BEGIN { + while ((getline line < BFILE) > 0) bef = bef line "\n"; + close(BFILE); + while ((getline line < AFILE) > 0) aft = aft line "\n"; + close(AFILE); + } + { src = src $0 "\n" } + END { + i = index(src, bef); + if (i == 0) { print "patch did not match" > "/dev/stderr"; exit 1 } + printf "%s%s%s", + substr(src, 1, i - 1), + aft, + substr(src, i + length(bef)); + } + ' "$target" > "$target.new" + mv "$target.new" "$target" +} + +apply_simple_patch \ + "$SRC/tcctools.c" \ + "$LB_PATCHES/remove-fileopen.before" \ + "$LB_PATCHES/remove-fileopen.after" + +apply_simple_patch \ + "$SRC/tcctools.c" \ + "$LB_PATCHES/addback-fileopen.before" \ + "$LB_PATCHES/addback-fileopen.after" + +# Bootstrap stub patches — eliminate libc symbols not provided by mes-mini-libc +# (mprotect, getcwd, getenv, gettimeofday, ldexp, time, localtime, sscanf) by +# gating call sites on the existing BOOTSTRAP CPP define. +apply_our_patch() { + name=$1; target=$2 + apply_simple_patch \ + "$target" \ + "$OUR_PATCHES/$name.before" \ + "$OUR_PATCHES/$name.after" +} + +apply_our_patch tcc-is-native-stub "$SRC/tcc.h" +apply_our_patch tccrun-include "$SRC/libtcc.c" +apply_our_patch tinyc-define "$SRC/libtcc.c" +apply_our_patch longjmp-stub "$SRC/libtcc.c" +apply_our_patch set-environment-stub "$SRC/tcc.c" +apply_our_patch getclock-ms-stub "$SRC/tcc.c" +apply_our_patch getcwd-stub "$SRC/tccgen.c" +apply_our_patch strip-file-prefix "$SRC/tccgen.c" +apply_our_patch ldexp-stub "$SRC/tccpp.c" +apply_our_patch date-time-stub "$SRC/tccpp.c" +apply_our_patch lex-char-unsigned "$SRC/tccpp.c" + +# LP64 constants: upstream's parser treats one `L` suffix as 64-bit +# only on x86_64. ARM64/RISCV64 are LP64 too; without this, `-4096UL` +# is zero-extended from 32 bits and musl's __syscall_ret rejects valid +# high mmap addresses as errors. +apply_our_patch lp64-long-constant "$SRC/tccpp.c" +apply_our_patch elfinterp-stub "$SRC/tccelf.c" + +# Auto-define `__bss_start` alongside tcc's existing `_end` symbol so a +# freestanding image (kernel.S) can zero its .bss with start/end anchors +# without an ld script. Mirrors the live-bootstrap convention. +apply_our_patch bss-start-symbol "$SRC/tccelf.c" + +# x86_64 static-link PLT32 collapse: under BOOTSTRAP we force +# static_link=1 with no .dynamic / no PT_INTERP, so the runtime linker +# never fills the PLT's GOT slots. Upstream tcc 0.9.26 only collapses +# PLT32→PC32 for hidden-visibility or LOCAL symbols, leaving global +# defined symbols going through unfilled PLT entries. The patch widens +# the condition to any symbol defined in this binary (st_shndx != +# SHN_UNDEF), which matches the aarch64 path's behavior. Harmless on +# other arches: the block is gated `#ifdef TCC_TARGET_X86_64`. +apply_our_patch x86_64-static-plt32 "$SRC/tccelf.c" + +# AT.2: native PT_NOTE for PVH boot. Stock tcc tags `.note.*` sections +# as SHT_PROGBITS and never emits a PT_NOTE phdr, so QEMU's PVH +# `-kernel` path on amd64 (which scans PT_NOTE for the Xen 18 entry) +# rejects the kernel. Three patches: (1) retype implicitly-created +# `.note*` sections to SHT_NOTE; (2) allocate a PT_NOTE phdr covering +# every SHT_NOTE+SHF_ALLOC section; (3) accept SHT_NOTE in the .o +# loader so kernel-asm.o's .note.Xen merges into the link output (else +# the subsequent .rela.note.Xen merge derefs sm_table[].s == NULL). +# The phnum bump in (2) is gated on actual presence so aarch64/riscv64 +# (no .note sections) keep their existing phdr count and stay +# byte-identical to pre-patch output. +apply_our_patch note-section-sht-note "$SRC/tccelf.c" +apply_our_patch pt-note-phdr "$SRC/tccelf.c" +apply_our_patch load-obj-accept-sht-note "$SRC/tccelf.c" + +# x86_64 va_list runtime: tcc's lib/va_list.c declares `extern void +# abort(void)` and calls it in an unreachable default branch of the +# arg-type switch. Under -nostdlib that abort() symbol is unresolved +# and the link fails. Replace with an inline spin — same effect, no +# libc dependency. Unconditional patch: lib/va_list.c is only +# compiled on amd64, but the .before block is gated by the file's +# `#if defined TCC_TARGET_X86_64` so other arches see the patch +# inert. +apply_our_patch va_list-no-abort "$SRC/lib/va_list.c" + +# Const-expr short-circuit: gen_opic/gen_opif must respect nocode_wanted +# so 1 || (1/0), 0 && (1/0), 1 ? 2 : 1/0 etc. don't abort with "division +# by zero in constant" in their unevaluated arms (C11 §6.6¶3). +apply_our_patch const-divzero-shortcircuit-int "$SRC/tccgen.c" +apply_our_patch const-divzero-shortcircuit-float "$SRC/tccgen.c" + +# AArch64 vararg fixes — only relevant when targeting ARM64; harmless +# to apply unconditionally since neither file is read on other arches. +apply_our_patch aarch64-stdarg-array "$SRC/include/stdarg.h" +apply_our_patch arm64-va-pointer-operand "$SRC/arm64-gen.c" +apply_our_patch arm64-va-arg-pointer "$SRC/arm64-gen.c" + +# AArch64 codegen: store/load through a literal integer address +# (VT_CONST | VT_LVAL without VT_SYM). Stock arm64-gen.c only handles +# the |VT_SYM case; bare integer addresses fall through to the +# `printf + assert(0)` tail. Hits in musl when tcc folds weak-hidden +# refs in __libc_start_main/mallocng. Patch is gated by the +# surrounding store/load functions which exist only under +# TCC_TARGET_ARM64. +apply_our_patch arm64-store-const-lvalue "$SRC/arm64-gen.c" +apply_our_patch arm64-load-const-lvalue "$SRC/arm64-gen.c" + +# Stock arm64-gen.c truncates SValue::c.i to uint32_t at the top of +# both load() and store(). Fine for struct-field offsets, fatal for +# pointer-sized constant addresses (e.g., the seed-kernel writing to +# the device alias VA 0x109000000 for the PL011 UART). Drop the +# truncation; signed 9-bit ldur/stur offsets fit regardless. +apply_our_patch arm64-svcul-no-truncate "$SRC/arm64-gen.c" +apply_our_patch arm64-svcul-no-truncate-store "$SRC/arm64-gen.c" + +# AArch64 assembler — phase 1. Drops in arm64-asm.c + arm64-tok.h and +# wires their includes into tcc.h, libtcc.c, and tcctok.h. Patches are +# gated by TCC_TARGET_ARM64 in the surrounding source so they no-op on +# other arches even when applied. See docs/TCC-ARM64-ASM.md. +cp "$OUR_PATCHES/files/arm64-asm.c" "$SRC/arm64-asm.c" +cp "$OUR_PATCHES/files/arm64-tok.h" "$SRC/arm64-tok.h" +apply_our_patch arm64-asm-include-tcc-h "$SRC/tcc.h" +apply_our_patch arm64-asm-include-libtcc-c "$SRC/libtcc.c" +apply_our_patch arm64-tok-include-tcctok-h "$SRC/tcctok.h" + +# arm64-asm.c emits gen_expr64 for `.quad sym - sym2`; declare it for +# arm64 too (was x86_64-only). +apply_our_patch tcc-h-gen-expr64-arm64 "$SRC/tcc.h" + +# Route .quad through asm_data on arm64 so symbol-difference expressions +# emit a relocation (R_AARCH64_PREL64) instead of failing to parse. +apply_our_patch tccasm-arm64-quad "$SRC/tccasm.c" +apply_our_patch tccasm-arm64-quad-asm-data "$SRC/tccasm.c" + +# Enable the relocations the assembler now emits: PREL64 (data symbol +# difference), CONDBR19 + TSTBR14 (forward conditional branch / tbz). +apply_our_patch arm64-link-asm-relocs "$SRC/arm64-link.c" +apply_our_patch arm64-link-prel64-condbr "$SRC/arm64-link.c" + +# tcc's lexer in ASM_FILE mode swallows mid-line '#' as a line comment, +# which kills the ARM/AArch64 '#imm' immediate prefix. Restrict the +# '#'-as-line-comment behavior to start-of-line so '#' tokenizes as +# itself in operand position. gas's own '#' line-comment rule is BOL +# only, so this matches stock gas semantics. Other arches' assemblers +# don't use '#' as an immediate prefix, so they're unaffected. +apply_our_patch asm-hash-bol-only "$SRC/tccpp.c" + +# Side effect of the patch above: alloca86_64-bt.S has two tab-prefixed +# tail comments (`mov %rax,%rsi # size, a second parm…`) that the +# x86_64 assembler now rejects with "end of line expected". They are +# inert documentation; strip them. The file is only compiled when +# building the amd64 libtcc1.a (LIBTCC1_ASM_SRCS in boot4.sh), so this +# rewrite is a no-op on aarch64/riscv64 builds. +awk '{ sub(/\t#.*$/, ""); print }' "$SRC/lib/alloca86_64-bt.S" \ + > "$SRC/lib/alloca86_64-bt.S.tmp" +mv "$SRC/lib/alloca86_64-bt.S.tmp" "$SRC/lib/alloca86_64-bt.S" + +# riscv64 int->llong cast: stock tcc 0.9.26 leaves unsigned int values +# in their native register width, but RV64 32-bit ops sign-extend bits +# 63:32, so widening an `unsigned int` to `unsigned long` reads garbage +# upper bits. Make gen_cvt_sxtw do the right thing for both signs, and +# always invoke it on riscv64. Hits be64() in the seed kernel's DTB +# parser; without the fix the kernel sees mem_start sign-extended to +# 0xffffffff80000000 and the boot panics during MMU bring-up. Patch is +# gated by the call-site / function name so it no-ops on other arches. +apply_our_patch riscv64-cvt-int-zext "$SRC/tccgen.c" +apply_our_patch riscv64-gen-cvt-sxtw "$SRC/riscv64-gen.c" +apply_our_patch riscv64-load-ptr-zext "$SRC/riscv64-gen.c" + +# riscv64 ELF default load address — stock tcc lands binaries at +# 0x10000, below the seed kernel's USER_VA_LO=0x200000. Move the +# default to 0x600000 so tcc-emitted ELFs slot into the user pool +# without per-link `-Wl,-Ttext=` overrides. Patch is gated by the +# stock literal in the before-block, so it no-ops elsewhere. +apply_our_patch riscv64-elf-start-addr "$SRC/riscv64-link.c" + +# riscv64 stdarg.h order fix — the upstream `#elif __riscv` branch +# uses `__builtin_va_list` before it's typedef'd. Stock tcc treats +# `__builtin_va_list` as a built-in keyword and forgives the forward +# reference; tcc-boot2's frontend does not. Swap the two typedefs so +# the base `char *__builtin_va_list` is in scope before va_list claims +# it. Affects only the riscv branch — the patch is gated by the +# `#elif __riscv` line in the before-block, so it's a no-op when that +# branch is absent (other tcc trees). +apply_our_patch riscv-stdarg-fix "$SRC/include/stdarg.h" + +# gcc/clang __builtin_va_* spelling bridge — append aliases at the end +# of tcc's <stdarg.h> so the same flat.c (which uses the gcc spelling +# because that's what cc.scm recognizes) also compiles back through +# tcc on amd64/aarch64. Gated `#ifndef __riscv` inside .after — the +# __riscv branch already maps these names natively. See the .after +# block for the full rationale. +apply_our_patch stdarg-builtin-aliases "$SRC/include/stdarg.h" + +# Empty config.h shims — pass1.kaem creates these via `catm <out>` (line 27-28). +: > "$SRC/config.h" +mkdir -p "$WORK/mes-overlay/mes" +: > "$WORK/mes-overlay/mes/config.h" + +# --- (3) flatten via host preprocessor -------------------------------- +HOST_CC=${HOST_CC:-cc} +FLAT=$WORK/tcc.flat.c + +"$HOST_CC" -E -P \ + -nostdinc \ + -I "$SRC" \ + -I "$WORK/mes-overlay" \ + -I "$ROOT/bootprep/include" \ + -I "$MES_INCLUDE_LINUX" \ + -I "$MES_INCLUDE" \ + -D __linux__=1 \ + -D __${CPP_ARCH}__=1 \ + -D BOOTSTRAP=1 \ + -D HAVE_LONG_LONG=$HAVE_LL \ + -D inline= \ + -D "CONFIG_TCCDIR=\"/lib/tcc\"" \ + -D "CONFIG_SYSROOT=\"/\"" \ + -D "CONFIG_TCC_CRTPREFIX=\"/lib\"" \ + -D "CONFIG_TCC_ELFINTERP=\"/mes/loader\"" \ + -D "CONFIG_TCC_SYSINCLUDEPATHS=\"/include/mes\"" \ + -D "TCC_LIBGCC=\"/lib/libc.a\"" \ + -D CONFIG_TCC_LIBTCC1_MES=0 \ + -D CONFIG_TCCBOOT=1 \ + -D CONFIG_TCC_STATIC=1 \ + -D CONFIG_USE_LIBGCC=1 \ + -D "TCC_VERSION=\"0.9.26\"" \ + -D ONE_SOURCE=1 \ + -D TCC_TARGET_${TCC_TARGET_DEFINE}=1 \ + "$SRC/tcc.c" > "$FLAT.body" + +# Publish the post-patch tcc <stdarg.h> as a per-arch bridge file +# alongside tcc.flat.c. libc-flatten.sh prepends the same bridge to +# libc.flat.c, so the boot3/boot4 container compiles no longer need +# `-I /work/in/tcc-include -include /work/in/tcc-include/stdarg.h`. +# The patched stdarg.h is byte-identical across X86_64 / ARM64 / RISCV64 +# (per-arch logic lives inside its #ifdefs); we still write a per-arch +# copy so every artifact under build/<arch>/ comes from a single +# `boot.sh <arch>` invocation, with nothing shared across arches. +BRIDGE=$WORK/stdarg-bridge.h +cp "$SRC/include/stdarg.h" "$BRIDGE" + +# Prepend the bridge into tcc.flat.c, guarded by !CCSCM so cc.scm +# (which has __builtin_va_list / __builtin_va_* as native frontend +# keywords and predefines CCSCM) skips the whole block. Under tcc, +# the per-arch #ifdef branches inside the bridge resolve and define +# the va_list typedef + __builtin_va_* → tcc native __va_* macros +# that flat.c needs. +{ + echo '#ifndef CCSCM' + cat "$BRIDGE" + echo '#endif' + cat "$FLAT.body" +} > "$FLAT" +rm -f "$FLAT.body" + +BYTES=$(wc -c < "$FLAT") +echo "produced $FLAT ($BYTES bytes)" + +# --- (4) optional verify --------------------------------------------- +if [ "$VERIFY" -eq 1 ]; then + HOST_OBJ=$WORK/tcc.flat.o + if "$HOST_CC" -c -w -o "$HOST_OBJ" "$FLAT" 2>"$WORK/host-cc.log"; then + echo "host cc: tcc.flat.c compiles cleanly to $HOST_OBJ" + else + echo "host cc: tcc.flat.c FAILED to compile; see $WORK/host-cc.log" >&2 + exit 1 + fi +fi diff --git a/docs/DEBUG.md b/docs/DEBUG.md @@ -56,14 +56,14 @@ resolves to the *function* containing it, not the trace's own ```sh # Pass the ELF; the tool reads <ELF>.workdir to find expanded.hex2pp. -scripts/m1-symbols.py lookup --elf build/aarch64/tests/cc/007-call-with-args 0x6019fc 0x6018fc +tools/m1-symbols.py lookup --elf build/aarch64/tests/cc/007-call-with-args 0x6019fc 0x6018fc # 0x6019fc main+0x24 # 0x6018fc g+0x2c # Pipe the trace log through it. ./build/aarch64/tests/cc/007-call-with-args 2>&1 \ | grep -oE '@[0-9a-f]+' | tr -d @ \ - | scripts/m1-symbols.py lookup --elf build/aarch64/tests/cc/007-call-with-args + | tools/m1-symbols.py lookup --elf build/aarch64/tests/cc/007-call-with-args ``` Other input modes: `--hex2 <expanded.hex2pp|prog.hex2>` (skip the @@ -85,7 +85,7 @@ real `<funcname>:` headers and `<PT_LOAD#0+0xNNN>` xrefs rewritten to `<label+offset>`: ```sh -scripts/disasm-elf.sh build/aarch64/tests/cc/007-call-with-args +tools/disasm-elf.sh build/aarch64/tests/cc/007-call-with-args # 0000000000601a34 <main>: # 601a34: ... # 601a40: ldr w17, ... <libp1pp__trace+0x0> @@ -157,9 +157,9 @@ tail -1 trace.log # 4. (Optional) Resolve all addresses to functions for context. grep -oE '@[0-9a-f]+' trace.log | tr -d @ \ - | scripts/m1-symbols.py lookup --elf build/aarch64/tests/cc/myprog + | tools/m1-symbols.py lookup --elf build/aarch64/tests/cc/myprog # 5. Inspect the disassembly around the crash site. -scripts/disasm-elf.sh build/aarch64/tests/cc/myprog \ +tools/disasm-elf.sh build/aarch64/tests/cc/myprog \ | grep -B 2 -A 20 '<parse_decl>:' ``` diff --git a/docs/LIBC.md b/docs/LIBC.md @@ -129,7 +129,7 @@ Headers: `vendor/mes-libc/include/` is a verbatim copy of ### Patches `vendor/mes-libc/patches/*.{before,after}` are literal-block pairs -applied by `scripts/libc-flatten.sh` (same `apply_simple_patch` +applied by `bootprep/libc-flatten.sh` (same `apply_simple_patch` shape `stage1-flatten.sh` uses for tcc): | patch | target | reason | @@ -171,7 +171,7 @@ The only file we author. Provides: ### Build -`scripts/libc-flatten.sh --arch <a>` (host): +`bootprep/libc-flatten.sh --arch <a>` (host): 1. Stage `vendor/mes-libc/` to `build/$ARCH/vendor/mes-libc/libc-stage/` so patching is non-destructive. @@ -413,10 +413,6 @@ That's tracked in [TCC.md](TCC.md), not here. ## Notes for the engineer -- Refresh LIBC.txt with `scripts/boot-undef.sh > docs/LIBC.txt` after - fixing a cc-libc bug that opens a previously-dead path. The - unresolved-symbol set may shift as cc.scm-libc bugs get fixed and - formerly-static-DCE'd code paths come live. - If a mes file pulls in a header path we don't have, the right move is almost always to copy the matching `mes/include/` header verbatim — don't write a substitute. diff --git a/docs/LIBP1PP.md b/docs/LIBP1PP.md @@ -354,7 +354,7 @@ for every build chain in this tree. cc.scm + libp1pp, cc-libc (libp1pp + libc), tcc-cc, and tcc-gcc all resolve bare `extern memcpy` against libp1pp here; the vendored mes-libc is flattened with its own copies omitted so the symbols are not duplicated at hex2++ time, and the -gcc-built tcc-gcc binary links `tcc-cc/mem.c` for the same reason. +gcc-built tcc-gcc binary links `tcc/cc/mem.c` for the same reason. `memcpy` does not support overlapping ranges where `dst > src && dst < src + n`; use `memmove` for overlap. diff --git a/docs/MUSL.md b/docs/MUSL.md @@ -1,6 +1,6 @@ # boot5 musl spec -`scripts/boot5.sh <arch>` builds a static musl 1.2.5 libc with the +`boot/boot5.sh <arch>` builds a static musl 1.2.5 libc with the verified boot4 tcc for the same architecture, then links and runs a static hello-world smoke binary. Supported architectures are `amd64`, `aarch64`, and `riscv64`; aarch64 is verified end-to-end every run, and @@ -19,10 +19,10 @@ minimal shell. ## Usage ```sh -scripts/boot3.sh <amd64|aarch64|riscv64> -scripts/boot4.sh <amd64|aarch64|riscv64> -scripts/boot5-calibrate.sh <amd64|aarch64|riscv64> # once per arch -scripts/boot5.sh <amd64|aarch64|riscv64> +boot/boot3.sh <amd64|aarch64|riscv64> +boot/boot4.sh <amd64|aarch64|riscv64> +bootprep/boot5-calibrate.sh <amd64|aarch64|riscv64> # once per arch +boot/boot5.sh <amd64|aarch64|riscv64> ``` ## Inputs @@ -31,12 +31,12 @@ scripts/boot5.sh <amd64|aarch64|riscv64> |------|---------| | `build/$ARCH/boot4/tcc3` | fixed-point self-host tcc from boot4 | | `build/$ARCH/boot4/libtcc1.a` | tcc runtime archive produced by boot4 | -| `vendor/upstream/musl-1.2.5.tar.gz` | pristine upstream musl source | -| `vendor/upstream/musl-1.2.5-overrides/` | post-patch files vendored as a tree (replaces the old patch + `patch` binary) | -| `vendor/upstream/musl-1.2.5-deletes.txt` | upstream files removed by the same patch set, one path per line | -| `vendor/upstream/musl-1.2.5-generated/$MUSL_ARCH/{alltypes,syscall}.h` | per-arch headers pre-generated at vendor time (replaces musl's mkalltypes.sed + `__NR_`→`SYS_` rewrite, so the container needs no awk) | -| `vendor/upstream/musl-1.2.5-skip-$ARCH.txt` | per-arch calibration list — sources tcc 0.9.26 cannot compile, produced by `scripts/boot5-calibrate.sh` | -| `build/$ARCH/vendor/tcc/stdarg-bridge.h` | per-arch `__builtin_va_list` bridge (byte-identical across arches, three arches gated by `#ifdef`; produced by `scripts/stage1-flatten.sh`) | +| `vendor/musl/1.2.5.tar.gz` | pristine upstream musl source | +| `vendor/musl/overrides/` | post-patch files vendored as a tree (replaces the old patch + `patch` binary) | +| `vendor/musl/deletes.txt` | upstream files removed by the same patch set, one path per line | +| `vendor/musl/generated/$MUSL_ARCH/{alltypes,syscall}.h` | per-arch headers pre-generated at vendor time (replaces musl's mkalltypes.sed + `__NR_`→`SYS_` rewrite, so the container needs no awk) | +| `vendor/musl/skip-$ARCH.txt` | per-arch calibration list — sources tcc 0.9.26 cannot compile, produced by `bootprep/boot5-calibrate.sh` | +| `build/$ARCH/vendor/tcc/stdarg-bridge.h` | per-arch `__builtin_va_list` bridge (byte-identical across arches, three arches gated by `#ifdef`; produced by `bootprep/stage1-flatten.sh`) | | `scripts/boot-hello.c` | smoke-test source (shared with boot4) | Architecture mapping: @@ -49,7 +49,7 @@ Architecture mapping: ## Outputs -`scripts/boot5.sh` writes final artifacts to `build/$ARCH/boot5/`: +`boot/boot5.sh` writes final artifacts to `build/$ARCH/boot5/`: | File | Purpose | |------|---------| @@ -77,8 +77,8 @@ The entire `.boot5-stage` tree is disposable; every `boot5.sh` run rebuilds it. a `patch` binary anywhere. 2. **Stage pre-generated headers (host)**. Copy `musl-1.2.5-generated/$MUSL_ARCH/alltypes.h` and `syscall.h` into - `in/`. These were produced by `scripts/musl-vendor.sh` (a host-only - helper that runs `awk -f scripts/mkalltypes.awk` and the SYS_ rewrite + `in/`. These were produced by `bootprep/musl-vendor.sh` (a host-only + helper that runs `awk -f bootprep/mkalltypes.awk` and the SYS_ rewrite once per arch when the patch set changes). 3. **Enumerate musl sources (host)**. Walk the prepared tree under `in/musl-1.2.5/`, mirror musl's per-arch override rule (per-arch file @@ -127,19 +127,19 @@ surfaces tcc 0.9.26 cannot compile: | arch asm overrides | delete unsupported fenv, signal, setjmp, thread, string, math overrides as needed | | varargs | pre-include `build/$ARCH/vendor/tcc/stdarg-bridge.h` (the post-patch tcc `<stdarg.h>`) for C translation units | -Required tcc fixes live under `scripts/simple-patches/tcc-0.9.26/`. +Required tcc fixes live under `vendor/tcc/patches/`. The musl build depends on the aarch64 literal-address load/store fixes and the LP64 `L`-suffix constant fix. ## Calibration -`scripts/boot5-calibrate.sh <arch>` produces -`vendor/upstream/musl-1.2.5-skip-$arch.txt`, the list of musl sources +`bootprep/boot5-calibrate.sh <arch>` produces +`vendor/musl/skip-$arch.txt`, the list of musl sources tcc 0.9.26 cannot compile for that arch. It runs the legacy skip-on-fail loop in the container once and captures the failures. Re-run calibration whenever any of these change: -- the tcc patches under `scripts/simple-patches/tcc-0.9.26/`; +- the tcc patches under `vendor/tcc/patches/`; - the musl overrides or deletes; - the vendored tcc or musl source tarballs. diff --git a/docs/SCHEME1.md b/docs/SCHEME1.md @@ -5,7 +5,7 @@ subset of R7RS-small. The interpreter reads s-expressions from `argv[1]`, evaluates them top-to-bottom in a single global env, and exits. -`scripts/boot-run-scheme1.sh` invokes `scheme1` with `prelude.scm` +`tests/boot-run-scheme1.sh` invokes `scheme1` with `prelude.scm` catted in front of the user file. The prelude (`scheme1/prelude.scm`) defines the R7RS surface that is expressible over the runtime primitives — equivalence aliases, list/char/string helpers, and the @@ -230,7 +230,7 @@ convention) when failure needs to be observable. ## Prelude surface `scheme1/prelude.scm` is bundled in front of every user program by -`scripts/boot-run-scheme1.sh`. It adds: +`tests/boot-run-scheme1.sh`. It adds: - **R7RS aliases**: `eqv?` ≡ `eq?`, `number?` ≡ `integer?`, `bytevector?` ≡ `string?`. diff --git a/docs/TCC.md b/docs/TCC.md @@ -1,27 +1,17 @@ # Building tcc-0.9.26 from this repo -Working doc. Describes a three-stage host pipeline that builds -tcc-0.9.26 from the upstream tarball, **without** depending on -M2-Planet, Mes Scheme, or MesCC at any point. Three scripts drive -the chain: - -- [scripts/stage1-flatten.sh](../scripts/stage1-flatten.sh) — runs on - the host (macOS or Linux). Flattens upstream `tcc.c` into a single - `tcc.flat.c` bytestream using only the host C preprocessor. -- [scripts/stage2-alpine.sh](../scripts/stage2-alpine.sh) — runs in an - `alpine:latest` container. gcc compiles `tcc.flat.c` to a working - `tcc-host`, which then builds mes libc and directly compiles+links - the patched real `tcc.c` against mes libc into `tcc-boot0-mes`. - **This is the slot our scheme1 cc replaces.** -- [scripts/stage3-rebuild.sh](../scripts/stage3-rebuild.sh) — runs in a - `busybox:musl` container. Uses `tcc-boot0-mes` to drive the - live-bootstrap-style chain `boot0 → boot1 → boot2`, ending in a - final tcc-0.9.26 built entirely from real (unflattened) sources by - a previous-stage tcc. - -This is the upstream half of the [CC.md](CC.md) story. Once our -scheme1-hosted compiler can ingest `tcc.flat.c`, it slots in where -gcc is in stage 2, so the alpine container goes away. +Describes the host-side flatten step that produces `tcc.flat.c` — +the single-file C bytestream that the boot pipeline (`boot3.sh` → +`boot4.sh`) feeds to cc.scm to bootstrap tcc-0.9.26. + +[bootprep/stage1-flatten.sh](../bootprep/stage1-flatten.sh) runs on +the host (macOS or Linux). It flattens upstream `tcc.c` into a single +`tcc.flat.c` bytestream using only the host C preprocessor — no +M2-Planet, Mes Scheme, or MesCC anywhere. + +This is the upstream half of the [CC.md](CC.md) story: once cc.scm +ingests `tcc.flat.c`, the rest of the pipeline (`boot3` → `boot4`) +is in-tree. ## Inputs @@ -45,32 +35,16 @@ tcc-0.9.26-1147-gee75a10c.tar.gz live-bootstrap source │ • apply 2 simple-patches │ • host cc -E -nostdinc with mes headers + tcc-mes defines ▼ -build/amd64/vendor/tcc/tcc.flat.c 608 KB single-file C - │ - │ stage2-alpine.sh (alpine:latest) - │ • apk add gcc musl-dev - │ • gcc -static tcc.flat.c errno-shim.c -> tcc-host - │ • set up mes-0.27.1 include tree - │ • tcc-host compiles mes libc per-file -> libc.a + libtcc1.a - │ • tcc-host -static compiles+links real tcc.c -> tcc-boot0-mes - │ (mirrors live-bootstrap's tcc-boot0 invocation) - ▼ -build/amd64/vendor/tcc/tcc-boot0-mes ~750 KB tcc-0.9.26 ELF - │ - │ stage3-rebuild.sh (busybox:musl) - │ • tcc-boot0-mes rebuilds libc, then compiles real tcc.c -> tcc-boot1 - │ • tcc-boot1 rebuilds libc, then compiles real tcc.c -> tcc-boot2 - ▼ -build/amd64/vendor/tcc/tcc-boot2 final tcc-0.9.26 +build/$ARCH/vendor/tcc/tcc.flat.c ~600 KB single-file C ``` -Two containers, three scripts, one host-side step. Stage 1's -`tcc.flat.c` is a portable artifact; stage 2's `tcc-boot0-mes` plus -mes libc bits cross into stage 3 via `build/amd64/vendor/tcc/stage3-input/`. +`tcc.flat.c` is a portable artifact; downstream the boot pipeline +(`boot3.sh` → `boot4.sh`) feeds it to cc.scm to produce a working +tcc-0.9.26. ## Stage 1 — flatten tcc.c into tcc.flat.c -`scripts/stage1-flatten.sh --arch X86_64` +`bootprep/stage1-flatten.sh --arch X86_64` Mirrors the live-bootstrap `tcc-mes` invocation ([steps/tcc-0.9.26/pass1.kaem:60–87](../../live-bootstrap/steps/tcc-0.9.26/pass1.kaem)) @@ -115,149 +89,6 @@ regardless of where stage 1 ran. macOS this produces a Mach-O .o; the verify is purely a "does the source compile" check. Failure here means the flatten step is wrong. -## Stage 2 — Alpine container builds tcc-boot0-mes - -`scripts/stage2-alpine.sh --arch X86_64` - -This is the **stand-in slot for our future scheme CC**. Today, native -gcc plays the role; tomorrow our scheme1-hosted C compiler does the -same job — take `tcc.flat.c`, produce a working tcc, then use that -tcc to build mes libc and compile+link a static `tcc-boot0-mes` -against it. - -### Why Alpine (musl, not glibc) - -mes headers declare `errno` as a plain global; glibc declares it as a -TLS symbol. Linking `tcc.flat.c` against glibc's libc.a fails with a -non-TLS / TLS clash on the errno definition, and patching tcc.flat.c -to use a TLS form would mean injecting a transformation into stage 1 -that exists only to satisfy stage 2's host libc choice. - -musl exposes errno only through `__errno_location()`, so a one-line -`int errno;` shim is the sole storage and links cleanly. Alpine -ships musl natively, so we use it. - -The output binary itself (`tcc-boot0-mes`) is statically linked -against **mes libc**, not musl — the alpine-side gcc only links -`tcc-host`, the transient compiler used inside the container. - -### errno shim - -`tcc.flat.c` came out of stage 1's mes-headers preprocess, so it -expects `extern int errno;` (Mes's plain-global form). musl provides -errno only via `__errno_location()`, so the gcc link of tcc-host -needs a definition. A one-line `errno-shim.c` (`int errno;`) is it. -Purely a stage-2 concern — `tcc-boot0-mes` itself links against mes -libc, where errno is the plain global mes expects. - -### Sub-steps (inside the container) - -1. **`apk add gcc musl-dev`**. -2. **`gcc -static -no-pie -o tcc-host tcc.flat.c errno-shim.c`**. - This is the only step that exercises the host toolchain. It - doubles as a validation of stage 1: if `tcc.flat.c` is malformed, - it fails here. ~700 KB output, musl-linked. -3. **Set up mes include tree**: - ``` - /tmp/mes-inc/ <- copy of mes/include/ - /tmp/mes-inc/arch -> linux/x86_64 <- symlink we create - ``` - (Issue §1 workaround — without the symlink, tcc 0.9.26 SEGVs on - `#include <arch/syscall.h>` rather than reporting cleanly.) -4. **Compile each mes libc .c file individually** with tcc-host. - (Issue §2 workaround — concatenated TU SEGVs at ~22+ files.) -5. **`tcc-host -ar`** the .o set into `/lib/libc.a` (~299 KB), build - `crt1.o`, build `libtcc1.a`, install at the baked-in paths - (`CONFIG_TCC_CRTPREFIX="/lib"`, `CONFIG_TCCDIR="/lib/tcc"`, - `CONFIG_TCC_SYSINCLUDEPATHS="/include/mes"`). -6. **Compile+link real tcc.c into tcc-boot0-mes**. tcc-host operates - on the patched tree from `$WORK/$TCC_PKG/` (stage 1 left it there - in place), with the same flag set live-bootstrap's `pass1.kaem` - passes to its own `tcc-mes -static -o tcc-boot0 ... tcc.c` call: - ``` - tcc-host -g -static -o tcc-boot0-mes \ - -D BOOTSTRAP=1 -D HAVE_FLOAT=1 -D HAVE_BITFIELD=1 \ - -D HAVE_LONG_LONG=1 -D HAVE_SETJMP=1 \ - -D TCC_TARGET_X86_64=1 ...CONFIG_TCC_* defines... \ - -D ONE_SOURCE=1 \ - -I . -I /include/mes -L . -L /lib \ - tcc.c - ``` - No intermediate `tcc-self.o`. An earlier iteration produced one as - a "tcc-host self-compile" check, then linked it against mes libc - in a separate step — but the standalone `-static` link exposed a - tcc 0.9.26 codegen bug, and the separate pass adds nothing the - live-bootstrap-style direct invocation doesn't already validate. - ~750 KB output. -7. **Stage out** mes libc + libtcc1 + crt1.o + headers into - `build/amd64/vendor/tcc/stage3-input/`, so stage 3 can mount - them without re-running stage 2. -8. **Smoke test**: `tcc-boot0-mes -version`. **Expected to SEGV under - QEMU on macOS arm64** (Issue §3); native x86_64 needed to verify - cleanly. - -## Stage 3 — busybox container drives boot1 / boot2 - -`scripts/stage3-rebuild.sh --arch X86_64` - -Mirrors live-bootstrap's pass1.kaem chain, but starting from -`tcc-boot0-mes` (= live-bootstrap's `tcc-boot0` slot) since we -skipped MesCC / `tcc-mes` entirely. Each pass uses the previous -stage's tcc to rebuild mes libc and recompile the **real**, -unflattened tcc-0.9.26 sources from the patched tree stage 1 -produced. - -### Why busybox - -This stage is "pure recompile" work — no host-toolchain dependency. -busybox provides everything we need (sh, tar, awk, basic file -utilities) and tcc-boot0-mes is its own preprocessor + assembler + -linker + ar. The container is ~5 MB; nothing else gets installed. - -### Sub-steps (inside the container, per pass) - -For each `cc ∈ {tcc-boot0-mes, tcc-boot1}`: - -1. **Rebuild libc** from mes sources using `cc`: - - `cc -c crt1.c -> /lib/crt1.o` - - `cc -c libtcc1.c`, `cc -ar -> /lib/tcc/libtcc1.a` - - per-file compile of mes libc, `cc -ar -> /lib/libc.a` -2. **Compile real tcc.c** using `cc`: - ``` - cc -g -static -o tcc-boot{1,2} \ - -D BOOTSTRAP=1 -D HAVE_FLOAT=1 -D HAVE_BITFIELD=1 \ - -D HAVE_LONG_LONG=1 -D HAVE_SETJMP=1 \ - -D TCC_TARGET_X86_64=1 ...CONFIG_TCC_* defines... \ - -D ONE_SOURCE=1 \ - -I . -I /include/mes -L . -L /lib \ - tcc.c - ``` -3. **Verify**: `tcc-boot{1,2} -version`. - -The build-time defines here are a strict superset of stage 1's: -`HAVE_FLOAT`, `HAVE_BITFIELD`, `HAVE_SETJMP` are added because the -real builds enable code paths the flatten deliberately omits. - -### Status today - -Stage 3 cannot complete on macOS arm64 hosts — Issue §3 (tcc-boot0-mes -SEGVs at startup under QEMU x86_64) blocks the very first -`tcc-boot0-mes -version`. The script is correct; it will run -end-to-end on native x86_64 hardware, or once a tcc 0.9.28rc backport -patches the prologue/_DYNAMIC issues. The failure mode is clean: the -script aborts at the first `-version` smoke and reports the blocker. - -## Relation to live-bootstrap - -``` -live-bootstrap path: mescc → tcc-mes → tcc-boot0 → tcc-boot1 → tcc-boot2 → tcc → tcc-0.9.27 → ... -our path: gcc → tcc-host → tcc-boot0-mes → tcc-boot1 → tcc-boot2 - └──── stage 2 (alpine) ────┘ └──── stage 3 (busybox) ────┘ -``` - -`tcc-boot0-mes` is the slot-equivalent of live-bootstrap's `tcc-boot0`. -Stages 3 stops at `tcc-boot2` ("final 0.9.26" per the project goal); -the further hop to tcc-0.9.27 lives outside this doc. ## What this unlocks for the scheme1 cc @@ -267,79 +98,47 @@ The interface for the slot scheme CC fills: - **Output**: a working ELF capable of compiling the same `tests/cc` fixtures the regular `cc` suite covers. -`make tcc-boot2 ARCH=aarch64` now runs that path end-to-end: +`make tcc-boot2 ARCH=aarch64` runs that path end-to-end: `cc.scm + tcc.flat.c → tcc-boot2`, linking against a `cc.scm`-built `libc.flat.c` instead of mes libc. The `tcc-cc` acceptance suite (`make test SUITE=tcc-cc`) shows full parity with the gcc-built -control on aarch64 and amd64. Alpine + gcc + `tcc-host` (stage 2 of -the original plan) is no longer in our boot2 path; the busybox + -scheme1-cc chain covers everything from stage 1's `tcc.flat.c` to a -runnable tcc. +control on aarch64 and amd64. ## Reproducibility ``` -scripts/stage1-flatten.sh --arch X86_64 -scripts/stage2-alpine.sh --arch X86_64 -scripts/stage3-rebuild.sh --arch X86_64 # blocked on Issue §3 today +bootprep/stage1-flatten.sh --arch X86_64 ``` -Artifacts in `build/amd64/vendor/tcc/`: +Artifacts in `build/$ARCH/vendor/tcc/`: -| File | Stage | Size | Built by | What it is | -|-------------------|-------|---------|-----------------------|-------------------------------------------| -| `tcc.flat.c` | 1 | 608 KB | host cc | flattened single-source tcc-0.9.26 | -| `tcc-host` | 2 | ~700 KB | alpine gcc + musl | transient stage-2 compiler | -| `tcc-boot0-mes` | 2 | ~750 KB | tcc-host + mes libc | static tcc-0.9.26, mes-libc-linked | -| `stage3-input/` | 2 | — | stage 2 | staged mes libc + libtcc1 + headers | -| `tcc-boot1` | 3 | ~750 KB | tcc-boot0-mes | first rebuild from real tcc.c | -| `tcc-boot2` | 3 | ~750 KB | tcc-boot1 | final 0.9.26 | +| File | Size | Built by | What it is | +|---------------|--------|----------|------------------------------------| +| `tcc.flat.c` | ~600KB | host cc | flattened single-source tcc-0.9.26 | `build/` is in `.gitignore`; nothing tracked outside the scripts themselves. ## Issues / bugs -Found while bringing this pipeline up. All three are real, none are -ours; (1) and (2) are bugs in tcc 0.9.26 that the scripts work -around; (3) is open and needs either native x86_64 testing or a -tcc backport. - -### 1. tcc 0.9.26 SEGV on missing include - -When tcc-host can't resolve an `#include "..."` or `#include <...>`, -it segfaults instead of reporting the error. Discovered when -`mes/include/dirent.h` does `#include <arch/syscall.h>`; mes/include -has no `arch/` directory by default. - -**Workaround**: in `stage2-alpine.sh`, create -`/tmp/mes-inc/arch -> linux/x86_64` before invoking tcc-host. The -live-bootstrap build presumably sets up the same symlink somewhere we -haven't traced — possibly via a configure step, or via the -`include_next` chain in `SYSTEM_LIBC` mode. - -A proper fix would patch tcc 0.9.26's preprocessor to error cleanly, -but the symlink workaround is enough for our purposes. - -### 2. tcc 0.9.26 SEGV on large concatenated TU +### tcc 0.9.26 SEGV on large concatenated TU When ~22+ mes libc files are catm'd into one TU and the chain hits a file with non-trivial inline asm (the trigger we found was -`linux/x86_64-mes-gcc/_exit.c`), tcc-host crashes mid-compile. Below -that threshold all combinations work. Each individual file compiles -fine. +`linux/x86_64-mes-gcc/_exit.c`), tcc-0.9.26 crashes mid-compile. +Below that threshold all combinations work. Each individual file +compiles fine. The interaction is some accumulator state inside tcc — symbol table, hash chain, or similar — that overflows or hits a corrupted state when the TU grows large enough. **Workaround**: compile each `.c` separately, then `ar` together. -Both `stage2-alpine.sh` and `stage3-rebuild.sh` do this for all 258 -mes libc .c files. Bonus: avoids ~250 redundant header re-parses, -faster overall. +The boot pipeline does this for the mes libc / musl per-file +sweeps. Bonus: avoids redundant header re-parses, faster overall. **Confirmed in canonical live-bootstrap.** The -[`scripts/diag-livebootstrap-qemu.sh`](../scripts/diag-livebootstrap-qemu.sh) +[`tools/diag-livebootstrap-qemu.sh`](../tools/diag-livebootstrap-qemu.sh) diagnostic runs upstream live-bootstrap's amd64 pass1 chain inside the same busybox + linux/amd64 QEMU we use, and its mescc-built `tcc-mes` SEGVs at exactly this step (`tcc-mes -c unified-libc.c` @@ -347,51 +146,6 @@ with `assert fail: 0` then SIGSEGV). The per-file workaround is load-bearing for any tcc-0.9.26-on-QEMU build, not specific to our path. -### 3. tcc-boot0-mes segfaults at startup under QEMU x86_64 - -`./tcc-boot0-mes -version` segfaults under `podman run --platform -linux/amd64` on macOS arm64. - -**This is specific to our build path, not a generic QEMU issue.** The -diagnostic in -[`scripts/diag-livebootstrap-qemu.sh`](../scripts/diag-livebootstrap-qemu.sh) -runs upstream live-bootstrap's amd64 pass1 chain inside the same -busybox + linux/amd64 QEMU. Its mescc-built `tcc-mes` (also -mes-libc-linked, also tcc-0.9.26) runs `tcc-mes -version` cleanly: - -``` -+> tcc-mes -version -tcc version 0.9.26 (x86_64 Linux) -``` - -So QEMU correctly executes mescc-built tcc-0.9.26 ELFs at startup. Our -`tcc-boot0-mes`, which differs in being produced by `tcc-host` -(itself a gcc-compile of `tcc.flat.c`) rather than by mescc, hits a -codegen path that mescc avoids. Most likely candidates: function -prologue around `_start` (mes's `crt1.c` declares `_start` as a -regular C function with inline asm; tcc-host's prologue may not match -what mes's runtime expects), or a `_DYNAMIC` symbol gap that tcc -0.9.28rc's patch series fixes. - -A related historical observation: when an earlier iteration of stage 2 -tried linking a tcc-host-output ELF against musl's `crt1.o` directly, -that binary also segfaulted at startup because tcc 0.9.26 doesn't -emit a `_DYNAMIC` symbol musl's `_start` expects. That path is -retired, but the same underlying tcc codegen gap is a plausible -cousin of the SEGV here — tcc 0.9.28rc's patch series likely fixes -both. - -**Path to fix**: backport tcc 0.9.28rc's prologue / `_DYNAMIC` fixes -onto `tcc.flat.c` (or onto the patched real tcc tree stage 2 builds -from), then rerun stage 2. Optionally compare disassembly of our -`tcc-boot0-mes` against live-bootstrap's `tcc-boot0` (output of the -same diagnostic chain at a later step) to localize the exact -divergence. - -Once tcc-boot0-mes runs, stage 3 is unblocked: the `tcc-boot1` / -`tcc-boot2` rebuilds mirror what live-bootstrap's pass1.kaem already -does, and the script is in place. - ## Known limitations (riscv64) aarch64 and amd64 are at full self-host parity (cc.scm path matches @@ -525,9 +279,9 @@ canonical compiler. ## AT-series patches (post-bootstrap uniformity) These patches go beyond the bootstrap-stub patches in -`scripts/simple-patches/tcc-0.9.26/` and exist to remove per-arch +`vendor/tcc/patches/` and exist to remove per-arch workarounds in seed-kernel and the build pipeline. They live in the -same patch directory and are listed in `scripts/stage1-flatten.sh`'s +same patch directory and are listed in `bootprep/stage1-flatten.sh`'s `apply_our_patch` block. ### AT.2 — native PT_NOTE for PVH boot diff --git a/scheme1/prelude.scm b/scheme1/prelude.scm @@ -1,5 +1,5 @@ ; scheme1 prelude. catm'd in front of the user .scm before invoking the -; scheme1 binary (see scripts/boot-run-scheme1.sh). Defines the R7RS +; scheme1 binary (see tests/boot-run-scheme1.sh). Defines the R7RS ; surface that's expressible over scheme1's existing primitives -- ; equivalence aliases, list helpers, characters as fixnum bytes, ; strings as NUL-terminated bytevectors -- plus the shell.scm process- diff --git a/scripts/Containerfile.alpine-gcc b/scripts/Containerfile.alpine-gcc @@ -1,18 +0,0 @@ -## Alpine with gcc + musl-dev + binutils baked in, used by the -## `make tcc-gcc` sanity-check target (Makefile). -## -## tcc-gcc compiles the same tcc.flat.c + libc.flat.c our cc.scm path -## consumes, but with stock gcc + libgcc soft-float helpers + a tiny -## hand-rolled _start / sys_* shim (tcc-gcc/<arch>/). It's a known-good -## reference: if our cc.scm-built tcc-boot2 misbehaves and tcc-gcc -## doesn't, the bug is in our codegen, not the source. -## -## Built per --platform; tag as boot2-alpine-gcc:<arch>. We don't pin -## a digest here because alpine:3.20 is only used by this opt-in -## harness — re-pin if the harness becomes load-bearing. - -FROM docker.io/library/alpine:3.20 - -RUN apk add --no-cache gcc musl-dev binutils - -CMD ["/bin/sh"] diff --git a/scripts/Containerfile.empty b/scripts/Containerfile.empty @@ -1,14 +0,0 @@ -## Per-arch image used by boot3/4/5.sh — fully empty rootfs (FROM scratch -## with no copy stages). Unlike scripts/Containerfile.scratch, this image -## does not bundle busybox: boot3/4/5 invoke scheme1 directly via argv -## and run.scm spawns only staged binaries (catm, scheme1, M1pp, hex2pp, -## tcc), so no in-container shell, /bin/sh, or applet tree is needed. -## -## Built per --platform with --no-cache; tagged as boot2-empty:<arch>. -## --no-cache is required because podman's layer cache key for an empty -## FROM-scratch + WORKDIR Containerfile is identical across platforms, -## so a second --platform build silently aliases its tag onto the first -## arch's image SHA — leaving e.g. boot2-empty:riscv64 pointing at an -## arm64 image. -FROM scratch -WORKDIR /work diff --git a/scripts/Containerfile.scratch b/scripts/Containerfile.scratch @@ -1,25 +0,0 @@ -## Per-arch image used by the standalone bootN.sh entrypoints. -## Two stages: -## 1. pull busybox:musl as the build container (provides a single -## static /bin/busybox plus a tree of applet symlinks) -## 2. FROM scratch, copy the busybox binary + symlinks into a fresh -## empty rootfs -## -## The result is a per-arch image whose entire userland is -## statically-linked busybox. No libc, no resolver, no /etc. -## This is the only container the bootN.sh scripts ever exec into. -## -## Built per --platform; tag as boot2-scratch:<arch>. -## Multi-arch index pinned to the same digest the boot2-busybox image -## uses (scripts/Containerfile.busybox); per-arch entries within the -## index, fetched 2026-04-24: -## amd64 sha256:298efc24641ff8a1a285abdc555a0ce5ab7c42eb085e1be099f824188e069604 -## arm64 sha256:458a2ae4cb09bf96f8e24f135474b1552039738ed16ee470320a9c05c2da2004 -## riscv64 sha256:657f5a49af9288dc98d2bf45343e45c57c3caf3946aa9df436d05da320a8c863 - -FROM docker.io/library/busybox@sha256:19b646668802469d968a05342a601e78da4322a414a7c09b1c9ee25165042138 AS busybox - -FROM scratch -COPY --from=busybox /bin /bin -WORKDIR /work -CMD ["/bin/sh"] diff --git a/scripts/boot-build-tcc-tcc.sh b/scripts/boot-build-tcc-tcc.sh @@ -1,88 +0,0 @@ -#!/bin/sh -## boot-build-tcc-tcc.sh — next-stage tcc, parametrized by the -## compiler that does the building. -## -## Drives one stage of the README's tcc compilation chain: -## tcc0 = cc.scm compiles tcc.flat.c -> tcc-boot2 (compile 1) -## tcc1 = tcc-boot2 compiles tcc.flat.c -> tcc-tcc (compile 2) -## tcc2 = tcc-tcc compiles tcc.flat.c -> tcc-tcc-tcc (compile 3) -## -## The compiler binary is supplied as the optional second positional -## arg (default: build/$ARCH/tcc-boot2/tcc-boot2 — i.e. compile 2). -## Output is linked against the same libc.o / mem.o / sys_stubs.o / -## start.o the tcc-libc suite uses, plus per-target libtcc1 helpers: -## - aarch64 / riscv64: lib-arm64.c (soft-float TFmode helpers — -## __addtf3 / __extenddftf2 / …; libgcc-equivalent — same source -## on both arches, gated internally by __riscv to skip the -## __arm64_clear_cache wrapper; upstream tcc names the .o -## lib-arm64.o for both via RISCV64_O = lib-arm64.o in -## lib/Makefile). -## - amd64: va_list.c (defines __va_start / __va_arg, the -## intrinsics tcc's x86_64 codegen calls for variadic functions -## — see tcc/include/stdarg.h on x86_64 and OBJ-x86_64 in -## lib/Makefile). Long double on amd64 is x87 80-bit and tcc -## emits native FPU instructions, so no soft-float helper is -## needed. -## Helpers are rebuilt by $CC into the same dirname as $OUT so each -## stage owns its own lib-arm64.o / va_list.o. -## -## Env: ARCH in {aarch64, amd64, riscv64} -## Usage: boot-build-tcc-tcc.sh <out> [<cc>] - -set -eu -: "${ARCH:?ARCH must be set}" -[ "$#" -ge 1 ] && [ "$#" -le 2 ] \ - || { echo "usage: ARCH=<arch> $0 <out> [<cc>]" >&2; exit 2; } - -OUT=$1 -CC=${2:-build/$ARCH/tcc-boot2/tcc-boot2} - -case "$ARCH" in - aarch64) LIB_TARGET_DEFINES="-D TCC_TARGET_ARM64=1 -D TCC_TARGET_ARM=1" ;; - amd64) LIB_TARGET_DEFINES= ;; - riscv64) LIB_TARGET_DEFINES="-D TCC_TARGET_RISCV64=1" ;; - *) echo "boot-build-tcc-tcc.sh: unsupported ARCH '$ARCH'" >&2; exit 2 ;; -esac - -TCC_VENDOR=build/$ARCH/vendor/tcc -TCC_INC=$TCC_VENDOR/tcc-0.9.26-1147-gee75a10c/include -TCC_FLAT=$TCC_VENDOR/tcc.flat.c -LIBC_O=build/$ARCH/tcc-libc/libc.o -MEM_O=build/$ARCH/tcc-libc/mem.o -SYS_O=build/$ARCH/tcc-libc/sys_stubs.o -START_O=build/$ARCH/tcc-libc/start.o -WORK=$(dirname "$OUT") - -mkdir -p "$WORK" - -TCC_LIB_DIR=$TCC_VENDOR/tcc-0.9.26-1147-gee75a10c/lib - -LIB_OBJS= -if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "riscv64" ]; then - # lib-arm64.o: TFmode soft-float helpers (__addtf3 / __extenddftf2 / - # …). tcc.flat.c references these for long double arithmetic; - # without them the final link fails with undefined symbols. Upstream - # tcc reuses lib-arm64.c for riscv64 too (RISCV64_O = lib-arm64.o - # in lib/Makefile); the file gates the arm64-specific cache-flush - # wrapper on !__riscv. - # shellcheck disable=SC2086 # LIB_TARGET_DEFINES is intentionally word-split. - "$CC" -nostdlib -I "$TCC_INC" \ - -D HAVE_CONFIG_H=1 $LIB_TARGET_DEFINES \ - -c -o "$WORK/lib-arm64.o" "$TCC_LIB_DIR/lib-arm64.c" - LIB_OBJS=$WORK/lib-arm64.o -elif [ "$ARCH" = "amd64" ]; then - # va_list.o: defines __va_start / __va_arg. tcc's x86_64 codegen - # lowers va_start / va_arg to direct calls into these intrinsics - # (see tcc/include/stdarg.h, lib/va_list.c). Without them the - # tcc-tcc link fails with undefined symbols. - "$CC" -nostdlib -I "$TCC_INC" \ - -D TCC_TARGET_X86_64=1 \ - -c -o "$WORK/va_list.o" "$TCC_LIB_DIR/va_list.c" - LIB_OBJS=$WORK/va_list.o -fi - -# Compile + link the next-stage tcc in one $CC invocation. -# shellcheck disable=SC2086 # $LIB_OBJS is intentionally word-split (may be empty). -"$CC" -nostdlib -I "$TCC_INC" -include "$TCC_INC/stdarg.h" \ - "$START_O" "$SYS_O" "$MEM_O" "$LIBC_O" $LIB_OBJS \ - "$TCC_FLAT" -o "$OUT" diff --git a/scripts/boot-undef.sh b/scripts/boot-undef.sh @@ -1,60 +0,0 @@ -#!/bin/sh -## scripts/boot-undef.sh — list hex2pp references with no matching definition. -## -## Cheap-and-cheerful linker-diagnostic for the live boot pipeline. M1pp -## emits expanded.hex2pp in asm-style: `:label` defines, `&label` (and -## other-sigil) references. A symbol with refs but no def is unresolved -## — the same thing hex2pp would flag, except hex2pp only prints the -## first miss before bailing, so this dumps the full list. -## -## Defaults to the expanded.hex2pp produced by the most recent -## `make tcc-boot2 ARCH=<arch>` build. Run that first if missing. -## -## Caveats: -## - Reads post-m1pp output, so %la(...) macro args are already -## expanded. Running this on the raw .P1pp would miss them. -## - m1pp rewrites local labels (@body, @end, ...) to per-expansion suffixed -## names, so they appear under both refs and defs naturally. -## -## Usage: -## scripts/boot-undef.sh [--arch <aarch64|amd64|riscv64>] [<expanded.hex2pp>] - -set -eu - -ARCH=aarch64 -LINKED= -while [ $# -gt 0 ]; do - case "$1" in - --arch) ARCH=$2; shift 2 ;; - -h|--help) sed -n 's/^## \{0,1\}//p' "$0"; exit 0 ;; - --) shift; break ;; - -*) echo "unknown arg: $1" >&2; exit 2 ;; - *) LINKED=$1; shift ;; - esac -done - -ROOT=$(cd "$(dirname "$0")/.." && pwd) -: "${LINKED:=$ROOT/build/$ARCH/.work/tcc-boot2/tcc-boot2/expanded.hex2pp}" - -[ -r "$LINKED" ] || { - echo "missing $LINKED" >&2 - echo " run: make tcc-boot2 ARCH=$ARCH" >&2 - exit 1 -} - -REFS=$(mktemp) -DEFS=$(mktemp) -trap 'rm -f "$REFS" "$DEFS"' EXIT - -grep -oE '&[a-zA-Z_][a-zA-Z_0-9]*' "$LINKED" | cut -c2- | sort -u > "$REFS" -grep -oE '^:[a-zA-Z_][a-zA-Z_0-9]*' "$LINKED" | cut -c2- | sort -u > "$DEFS" - -UNDEF=$(comm -23 "$REFS" "$DEFS") -NREF=$(wc -l < "$REFS" | tr -d ' ') -NDEF=$(wc -l < "$DEFS" | tr -d ' ') -NUND=$(printf '%s\n' "$UNDEF" | grep -c . || true) - -printf '[boot-undef %s] %s\n' "$ARCH" "$LINKED" >&2 -printf ' refs=%s defs=%s undef=%s\n' "$NREF" "$NDEF" "$NUND" >&2 - -[ "$NUND" -eq 0 ] || printf '%s\n' "$UNDEF" diff --git a/scripts/boot.sh b/scripts/boot.sh @@ -1,90 +0,0 @@ -#!/bin/sh -## boot.sh — drive boot0 → boot6 end-to-end under one driver. -## -## Usage: scripts/boot.sh <arch> -## DRIVER=seed scripts/boot.sh <amd64|aarch64|riscv64> -## DRIVER=podman scripts/boot.sh <amd64|aarch64|riscv64> -## -## DRIVER (default podman) is exported and consumed by each bootN.sh. -## Outputs land at build/$ARCH/$DRIVER/bootN/, so the two driver trees -## coexist on disk. DRIVER=seed runs the build pipeline on top of the -## podman-built boot6 kernel at build/$ARCH/podman/boot6/{Image,kernel.elf}; -## first-time setup therefore requires one prior podman pass: -## ./scripts/boot.sh <arch> # default DRIVER=podman -## DRIVER=seed ./scripts/boot.sh <arch> # re-run on tcc-built kernel -## Subsequent DRIVER=seed runs reuse the Image directly — no stashing. - -set -eu - -case "${1:-}" in - -h|--help) - cat <<'EOF' -boot.sh — drive boot0 → boot6 end-to-end under one driver. - -Usage: - scripts/boot.sh <aarch64|amd64|riscv64> - -Environment variables (all optional): - DRIVER podman (default) | seed. - podman: containerised builds. - seed: builds run inside the tcc-built - seed kernel under qemu (requires - one prior DRIVER=podman pass to - mint build/$ARCH/podman/boot6/). - BOOT3_TIMEOUT (default 1800) boot3 (scheme1) wall-clock seconds. - BOOT4_TIMEOUT (default 5400) boot4 (tcc1/2/3) wall-clock seconds. - BOOT5_TIMEOUT (default 7200) boot5 (musl) wall-clock seconds. - BOOT6_TIMEOUT (default 1200) boot6 (kernel) wall-clock seconds. - QEMU_MEM (default 3072M) guest RAM passed to the seed-driver qemu. - TCC_BOOTSTRAP_RELAX_FIXEDPOINT set to 1 in boot4 to accept tcc2 != tcc3. - After a codegen-altering tcc patch the - two-stage rule needs a third bounce to - converge; the next boot4 run, started - from this run's tcc3, will reach - tcc2 == tcc3 with no extra knob. -EOF - exit 0 - ;; -esac - -. scripts/lib-arch.sh -bootlib_init boot "${1:-}" - -if [ "$DRIVER" = seed ]; then - KERNEL=build/$ARCH/podman/boot6/$KERNEL_NAME - if [ ! -f "$KERNEL" ]; then - echo "[$BOOT_TAG] missing $KERNEL" >&2 - echo "[$BOOT_TAG] run './scripts/boot.sh $ARCH' first (default DRIVER=podman) to produce it" >&2 - exit 1 - fi -fi - -# Wipe only this driver's tree so the other driver's outputs survive -# (the seed driver consumes build/$ARCH/podman/boot6/$KERNEL_NAME). -rm -rf build/$ARCH/$DRIVER - -# Per-stage timing comes from each child's own bootlib EXIT trap -# (`[bootN/$DRIVER/$ARCH] done in Xs (cum Ys)`); this orchestrator only -# adds its own total at the end (also via the lib trap). - -# A0a: build the canonical generated source tree at build/$ARCH/src/. -# Boot stages read source from there exclusively (no flatten/unpack/ -# patch inside boot{N}.sh). -./scripts/prep-src.sh $ARCH - -./scripts/boot0.sh $ARCH -./scripts/boot1.sh $ARCH -./scripts/boot2.sh $ARCH -./scripts/boot3.sh $ARCH -./scripts/boot4.sh $ARCH - -# A0b: apply the per-arch musl skip filter (needs tcc3 from boot4 if -# the calibration list is missing; the committed list is the common -# case and runs without compiler). -./scripts/prep-musl.sh $ARCH - -./scripts/boot5.sh $ARCH - -# boot6 builds the seed-kernel ELF/Image with boot4's tcc3 (no `ld -T`, -# no objcopy). -./scripts/boot6.sh $ARCH diff --git a/scripts/boot0.sh b/scripts/boot0.sh @@ -1,48 +0,0 @@ -#!/bin/sh -## boot0.sh — seed bootstrap: hex0-seed → hex0 → hex1 → hex2 → catm → M0. -## -## Stage 0 of the README's chain. From the ~400-byte vendored hex0-seed, -## brings up the three binaries every later stage depends on (hex2, catm, -## M0). -## -## ─── Inputs (sources, from canonical tree) ─────────────────────────── -## build/$ARCH/src/bin/hex0-seed -## build/$ARCH/src/src/vendor-seed/{hex0.hex0, hex1.hex0, hex2.hex1, -## catm.hex2, M0.hex2, ELF.hex2} -## -## ─── Outputs ────────────────────────────────────────────────────────── -## build/$ARCH/$DRIVER/boot0/{hex2, catm, M0} -## -## Usage: scripts/boot0.sh <arch> -## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). - -set -eu - -. scripts/lib-arch.sh -bootlib_init boot0 "${1:-}" -driver_init scratch -require_src - -. scripts/lib-pipeline.sh -pipeline_init "$STAGE" "$OUT" "$DRIVER" - -# ─── inputs (from canonical src tree) ───────────────────────────────── -pipeline_input hex0-seed "build/$ARCH/src/bin/hex0-seed" -for f in hex0.hex0 hex1.hex0 hex2.hex1 catm.hex2 M0.hex2 ELF.hex2; do - pipeline_input_from_src "vendor-seed/$f" -done - -# ─── pipeline ───────────────────────────────────────────────────────── -echo "[$BOOT_TAG] hex0-seed -> hex0 -> hex1 -> hex2 -> catm -> M0" - -stage hex0-seed hex0.hex0 hex0 -- hex0.hex0 -- hex0 -stage hex0 hex1.hex0 hex1 -- hex1.hex0 -- hex1 -stage hex1 hex2.hex1 hex2 -- hex2.hex1 -- hex2 -stage hex2 catm.hex2 catm -- catm.hex2 -- catm -stage catm M0.combined.hex2 ELF.hex2 M0.hex2 -- ELF.hex2 M0.hex2 -- M0.combined.hex2 -stage hex2 M0.combined.hex2 M0 -- M0.combined.hex2 -- M0 - -pipeline_export hex2 catm M0 -pipeline_run - -echo "[$BOOT_TAG] OK -> $OUT/{hex2, catm, M0}" diff --git a/scripts/boot1.sh b/scripts/boot1.sh @@ -1,65 +0,0 @@ -#!/bin/sh -## boot1.sh — build the self-hosted M1pp + hex2pp pair. -## -## Stage 1 of the README's chain: produces M1pp and hex2pp from their -## .P1 sources via the seed M0 + hex2 chain. catm is rebuilt later in -## boot2 from catm.P1pp. -## -## ─── Inputs (sources, from canonical tree) ─────────────────────────── -## build/$ARCH/src/src/M1pp/M1pp.P1 -## build/$ARCH/src/src/hex2pp/hex2pp.P1 -## build/$ARCH/src/src/P1/P1-$ARCH.M1 -## build/$ARCH/src/src/vendor-seed/ELF.hex2 -## -## ─── Inputs (binaries from prior stages) ────────────────────────────── -## build/$ARCH/$DRIVER/boot0/{hex2, M0, catm} -## -## ─── Outputs ────────────────────────────────────────────────────────── -## build/$ARCH/$DRIVER/boot1/{M1pp, hex2pp} -## -## Usage: scripts/boot1.sh <arch> -## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). - -set -eu - -. scripts/lib-arch.sh -bootlib_init boot1 "${1:-}" -driver_init scratch -require_src - -BOOT0=build/$ARCH/$DRIVER/boot0 -require_prev "$BOOT0" hex2 M0 catm - -. scripts/lib-pipeline.sh -pipeline_init "$STAGE" "$OUT" "$DRIVER" - -# ─── inputs ─────────────────────────────────────────────────────────── -pipeline_input hex2 "$BOOT0/hex2" -pipeline_input M0 "$BOOT0/M0" -pipeline_input catm "$BOOT0/catm" -pipeline_input_from_src "P1/P1-$ARCH.M1" P1.M1 -pipeline_input_from_src vendor-seed/ELF.hex2 -pipeline_input_from_src M1pp/M1pp.P1 -pipeline_input_from_src hex2pp/hex2pp.P1 - -# ─── pipeline ───────────────────────────────────────────────────────── -# .P1 -> ELF, applied to each of M1pp.P1 and hex2pp.P1: -# catm P1.M1 + <src> -> combined.M1 -# M0 combined.M1 -> prog.hex2 -# catm ELF.hex2 + prog.hex2 -> linked.hex2 -# hex2 linked.hex2 -> ELF binary -build_p1() { # $1 = source .P1, $2 = output binary name - stage catm combined.M1 P1.M1 "$1" -- P1.M1 "$1" -- combined.M1 - stage M0 combined.M1 prog.hex2 -- combined.M1 -- prog.hex2 - stage catm linked.hex2 ELF.hex2 prog.hex2 -- ELF.hex2 prog.hex2 -- linked.hex2 - stage hex2 linked.hex2 "$2" -- linked.hex2 -- "$2" -} - -echo "[$BOOT_TAG] M1pp.P1 + hex2pp.P1 -> M1pp + hex2pp" -build_p1 M1pp.P1 M1pp -build_p1 hex2pp.P1 hex2pp - -pipeline_export M1pp hex2pp -pipeline_run - -echo "[$BOOT_TAG] OK -> $OUT/{M1pp, hex2pp}" diff --git a/scripts/boot2.sh b/scripts/boot2.sh @@ -1,73 +0,0 @@ -#!/bin/sh -## boot2.sh — rebuild catm via M1pp+hex2pp, then build scheme1. -## -## Stage 2 of the README's chain. First rebuilds catm from catm.P1pp via -## the freshly-built M1pp+hex2pp pipeline (replacing the seed boot0 catm -## so later stages have zero boot0 dependencies); then builds the -## scheme1 interpreter from scheme1.P1pp using the new catm. -## -## ─── Inputs (sources, from canonical tree) ─────────────────────────── -## build/$ARCH/src/src/catm/catm.P1pp -## build/$ARCH/src/src/scheme1/scheme1.P1pp -## build/$ARCH/src/src/P1/{P1-$ARCH.M1pp, P1.M1pp, P1pp.P1pp} -## build/$ARCH/src/src/vendor-seed/ELF.hex2 -## -## ─── Inputs (binaries from prior stages) ────────────────────────────── -## build/$ARCH/$DRIVER/boot0/catm (only to bootstrap catm.P1pp build) -## build/$ARCH/$DRIVER/boot1/{M1pp, hex2pp} -## -## ─── Outputs ────────────────────────────────────────────────────────── -## build/$ARCH/$DRIVER/boot2/{catm, scheme1} -## -## Usage: scripts/boot2.sh <arch> -## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). - -set -eu - -. scripts/lib-arch.sh -bootlib_init boot2 "${1:-}" -driver_init scratch -require_src - -BOOT0=build/$ARCH/$DRIVER/boot0 -BOOT1=build/$ARCH/$DRIVER/boot1 -require_prev "$BOOT0" catm -require_prev "$BOOT1" M1pp hex2pp - -. scripts/lib-pipeline.sh -pipeline_init "$STAGE" "$OUT" "$DRIVER" - -# ─── inputs ─────────────────────────────────────────────────────────── -pipeline_input catm0 "$BOOT0/catm" # bootstrap; replaced by output 'catm' -pipeline_input M1pp "$BOOT1/M1pp" -pipeline_input hex2pp "$BOOT1/hex2pp" -pipeline_input_from_src "P1/P1-$ARCH.M1pp" backend.M1pp -pipeline_input_from_src P1/P1.M1pp frontend.M1pp -pipeline_input_from_src P1/P1pp.P1pp libp1pp.P1pp -pipeline_input_from_src vendor-seed/ELF.hex2 -pipeline_input_from_src catm/catm.P1pp -pipeline_input_from_src scheme1/scheme1.P1pp - -# ─── pipeline ───────────────────────────────────────────────────────── -# .P1pp -> ELF, applied to each of catm.P1pp and scheme1.P1pp: -# catm backend + frontend + libp1pp + <src> -> combined.M1pp -# M1pp combined.M1pp -> expanded.hex2pp -# catm ELF.hex2 + expanded.hex2pp -> linked.hex2pp -# hex2pp -B 0x600000 linked.hex2pp -> ELF binary -build_p1pp() { # $1 = catm-bin name (catm0 or catm), $2 = src .P1pp, $3 = out - _catm=$1; _src=$2; _out=$3 - stage "$_catm" combined.M1pp backend.M1pp frontend.M1pp libp1pp.P1pp "$_src" \ - -- backend.M1pp frontend.M1pp libp1pp.P1pp "$_src" -- combined.M1pp - stage M1pp combined.M1pp expanded.hex2pp -- combined.M1pp -- expanded.hex2pp - stage "$_catm" linked.hex2pp ELF.hex2 expanded.hex2pp -- ELF.hex2 expanded.hex2pp -- linked.hex2pp - stage hex2pp -B 0x600000 linked.hex2pp "$_out" -- linked.hex2pp -- "$_out" -} - -echo "[$BOOT_TAG] catm.P1pp -> catm; scheme1.P1pp -> scheme1" -build_p1pp catm0 catm.P1pp catm # bootstrap with boot0 catm -build_p1pp catm scheme1.P1pp scheme1 # uses just-built catm - -pipeline_export catm scheme1 -pipeline_run - -echo "[$BOOT_TAG] OK -> $OUT/{catm, scheme1}" diff --git a/scripts/boot3.sh b/scripts/boot3.sh @@ -1,88 +0,0 @@ -#!/bin/sh -## boot3.sh — bootstrap tcc0 from cc.scm. -## -## Stage A of the four-stage tcc chain: cc.scm compiles tcc.flat.c into -## tcc0. boot4 picks up tcc0 and self-hosts the rest of the chain -## (tcc0 → tcc1 → tcc2 → tcc3, with tcc2 == tcc3 as the fixed-point -## check). -## -## tcc0 = tcc-source compiled by cc.scm ← produced here -## tcc1 = tcc-source compiled by tcc0 ← boot4 -## tcc2 = tcc-source compiled by tcc1 ← boot4 -## tcc3 = tcc-source compiled by tcc2 ← boot4 -## -## ─── Inputs (sources, from canonical tree) ─────────────────────────── -## build/$ARCH/src/src/scheme1/prelude.scm scheme bundle -## build/$ARCH/src/src/cc/{cc.scm, main.scm} scheme bundle -## build/$ARCH/src/src/P1/{P1-$ARCH.M1pp, P1.M1pp, P1pp.P1pp} M1pp pipeline -## build/$ARCH/src/src/P1/{entry-libc.P1pp, elf-end.P1pp} link framing -## build/$ARCH/src/src/vendor-seed/ELF.hex2 ELF header -## build/$ARCH/src/src/tcc/tcc.flat.c flattened tcc TU -## build/$ARCH/src/src/libc/libc.flat.c flattened mes-libc TU -## -## ─── Inputs (binaries from prior stages) ────────────────────────────── -## build/$ARCH/$DRIVER/boot1/{M1pp, hex2pp} -## build/$ARCH/$DRIVER/boot2/{catm, scheme1} -## -## ─── Tools ──────────────────────────────────────────────────────────── -## scheme1 evaluates scripts/boot3-run.scm against the flat staging -## root. Same run.scm drives both DRIVER=podman (cwd=/work) and -## DRIVER=seed (cwd=/). Stage A is pure scheme1 + M1pp + hex2pp; no -## asm step. -## -## ─── Outputs ────────────────────────────────────────────────────────── -## build/$ARCH/$DRIVER/boot3/tcc0 — cc.scm-built bootstrap tcc -## build/$ARCH/$DRIVER/boot3/libc.P1pp — cc.scm-built mes-libc (lib mode); -## consumed by the cc-libc test suite -## build/$ARCH/$DRIVER/boot3/tcc.flat.P1pp — cc.scm-built tcc TU (lib mode); -## debug/inspection artifact -## -## Usage: scripts/boot3.sh <arch> -## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). - -set -eu - -. scripts/lib-arch.sh -bootlib_init boot3 "${1:-}" -driver_init empty -require_src - -BOOT1=build/$ARCH/$DRIVER/boot1 -BOOT2=build/$ARCH/$DRIVER/boot2 -SRC=build/$ARCH/src - -require_prev "$BOOT1" M1pp hex2pp -require_prev "$BOOT2" catm scheme1 - -# ── stage inputs and run scheme1 + boot3-run.scm under $DRIVER ──────── -. scripts/lib-runscm.sh -runscm_init "$STAGE" "$OUT" -runscm_scheme1 "$BOOT2/scheme1" -runscm_prelude "$SRC/src/scheme1/prelude.scm" -runscm_runscm scripts/boot3-run.scm - -runscm_input catm "$BOOT2/catm" -runscm_input M1pp "$BOOT1/M1pp" -runscm_input hex2pp "$BOOT1/hex2pp" -# scheme1 binary itself is staged by runscm_run (so a `(run "scheme1" …)` -# inside boot3-run.scm finds it at cwd-relative ./scheme1). - -runscm_input_from_src scheme1/prelude.scm -runscm_input_from_src cc/cc.scm -runscm_input_from_src cc/main.scm - -runscm_input_from_src "P1/P1-$ARCH.M1pp" backend.M1pp -runscm_input_from_src P1/P1.M1pp frontend.M1pp -runscm_input_from_src P1/P1pp.P1pp libp1pp.P1pp -runscm_input_from_src P1/entry-libc.P1pp -runscm_input_from_src P1/elf-end.P1pp -runscm_input_from_src vendor-seed/ELF.hex2 - -runscm_input_from_src tcc/tcc.flat.c -runscm_input_from_src libc/libc.flat.c - -runscm_export tcc0 libc.P1pp tcc.flat.P1pp -runscm_run "${BOOT3_TIMEOUT:-1800}" - -echo "[$BOOT_TAG] sizes: tcc0=$(wc -c <"$OUT/tcc0") libc.P1pp=$(wc -c <"$OUT/libc.P1pp")" -echo "[$BOOT_TAG] OK -> $OUT/tcc0" diff --git a/scripts/boot4-gen-runscm.sh b/scripts/boot4-gen-runscm.sh @@ -1,6 +1,6 @@ #!/bin/sh ## boot4-gen-runscm.sh — emit run.scm driving boot4's tcc0→tcc1→tcc2→tcc3 -## chain inside the seed kernel. Mirrors scripts/boot4.sh's per-stage shell +## chain inside the seed kernel. Mirrors boot/boot4.sh's per-stage shell ## emission; per-arch values resolved on the host so the .scm body is ## straight-line (run …) calls. ## @@ -89,7 +89,7 @@ emit_link_tcc() { { cat <<'PROLOGUE' ;; boot4 run.scm — drive tcc0 -> tcc1 -> tcc2 -> tcc3 inside seed kernel. -;; Generated by scripts/boot4-gen-runscm.sh; mirrors scripts/boot4.sh's +;; Generated by scripts/boot4-gen-runscm.sh; mirrors boot/boot4.sh's ;; podman path stage-for-stage. Reads use in/; writes (intermediates and ;; exports) use out/. tcc0 is staged as in/tcc0; tcc1/tcc2/tcc3 are ;; produced and exported under out/. diff --git a/scripts/boot4.sh b/scripts/boot4.sh @@ -1,133 +0,0 @@ -#!/bin/sh -## boot4.sh — self-host tcc rebuild stages on top of boot3's tcc0. -## -## boot3 produced tcc0 (cc.scm-built bootstrap). boot4 runs the rest of -## the four-stage chain: tcc0 → tcc1 → tcc2 → tcc3. The bootstrap -## fixed-point check is `tcc2 == tcc3`: once tcc is compiling itself -## with no help from cc.scm, the chain reaches a byte-identical fixed -## point. (See docs/PLAN.md for the cc.scm vs tcc codegen-divergence -## reasoning behind needing four stages rather than two.) -## -## tcc0 = tcc-source compiled by cc.scm ← boot3 -## tcc1 = tcc-source compiled by tcc0 ← produced here -## tcc2 = tcc-source compiled by tcc1 ← produced here -## tcc3 = tcc-source compiled by tcc2 ← produced here -## -## ─── Inputs (sources, from canonical tree) ─────────────────────────── -## build/$ARCH/src/src/tcc-libc/$ARCH/{start.S, sys_stubs.S} -## build/$ARCH/src/src/tcc-cc/mem.c -## build/$ARCH/src/src/tcc/tcc-0.9.26-1147-gee75a10c/lib/<arch-specific> -## build/$ARCH/src/src/tcc/tcc.flat.c -## build/$ARCH/src/src/libc/libc.flat.c -## build/$ARCH/src/src/test-fixtures/boot-hello.c -## -## ─── Inputs (binaries from prior stages) ────────────────────────────── -## build/$ARCH/$DRIVER/boot3/tcc0 -## build/$ARCH/$DRIVER/boot2/{catm, scheme1} -## -## ─── Tools ──────────────────────────────────────────────────────────── -## scheme1 evaluates a host-generated run.scm (from boot4-gen-runscm.sh) -## against the flat staging root. Every arch has CONFIG_TCC_ASM and -## assembles .S inputs (start.S, sys_stubs.S) directly inside the -## container; no host asm step. The aarch64 assembler is the phase-1 -## arm64-asm.c that flatten patches into tcc-0.9.26 (see -## docs/TCC-ARM64-ASM.md). -## -## ─── Outputs ────────────────────────────────────────────────────────── -## build/$ARCH/$DRIVER/boot4/{tcc1, tcc2, tcc3} -## tcc2 and tcc3 are byte-identical (asserted -## below) — that equality is the fixed-point. -## build/$ARCH/$DRIVER/boot4/crt1.o -## tcc2-built startup object, kept outside -## libc.a because it must lead link lines. -## build/$ARCH/$DRIVER/boot4/libc.a -## tcc2-built archive of sys_stubs.o + mem.o -## + libc.o -## build/$ARCH/$DRIVER/boot4/libtcc1.a -## tcc2-built tcc compiler helper archive -## build/$ARCH/$DRIVER/boot4/hello — mes-libc-linked smoke binary -## -## ─── Env knobs ──────────────────────────────────────────────────────── -## TCC_BOOTSTRAP_RELAX_FIXEDPOINT=1 -## After a codegen-altering tcc patch, the two-stage rule needs a -## third bounce to converge. Set this to accept tcc3 even when -## tcc2 != tcc3; the next boot4 run, started from this run's -## tcc3, will reach tcc2 == tcc3 with no extra knob. -## -## Usage: scripts/boot4.sh <arch> -## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). - -set -eu - -. scripts/lib-arch.sh -bootlib_init boot4 "${1:-}" -driver_init empty -require_src - -case "$ARCH" in - aarch64) LIBTCC1_C_SRCS="lib-arm64.c"; LIBTCC1_ASM_SRCS="" ;; - amd64) LIBTCC1_C_SRCS="libtcc1.c va_list.c"; LIBTCC1_ASM_SRCS="alloca86_64.S alloca86_64-bt.S" ;; - riscv64) LIBTCC1_C_SRCS="lib-arm64.c"; LIBTCC1_ASM_SRCS="" ;; -esac - -BOOT2=build/$ARCH/$DRIVER/boot2 -BOOT3=build/$ARCH/$DRIVER/boot3 -SRC=build/$ARCH/src - -TCC_PKG=tcc-0.9.26-1147-gee75a10c -TCC_LIB_REL=tcc/$TCC_PKG/lib - -# ── prerequisites ───────────────────────────────────────────────────── -require_prev "$BOOT3" tcc0 -require_prev "$BOOT2" catm scheme1 -for f in $LIBTCC1_C_SRCS $LIBTCC1_ASM_SRCS; do - require_file "$SRC/src/$TCC_LIB_REL/$f" -done - -# ── stage inputs and run scheme1 + boot4 run.scm under $DRIVER ──────── -. scripts/lib-runscm.sh -runscm_init "$STAGE" "$OUT" -runscm_gen scripts/boot4-gen-runscm.sh "$ARCH" - -runscm_scheme1 "$BOOT2/scheme1" -runscm_prelude "$SRC/src/scheme1/prelude.scm" - -runscm_input tcc0 "$BOOT3/tcc0" -runscm_input catm "$BOOT2/catm" - -runscm_input_from_src "tcc-libc/$ARCH/start.S" -runscm_input_from_src "tcc-libc/$ARCH/sys_stubs.S" -runscm_input_from_src tcc-cc/mem.c -for f in $LIBTCC1_C_SRCS $LIBTCC1_ASM_SRCS; do - runscm_input_from_src "$TCC_LIB_REL/$f" -done - -runscm_input_from_src tcc/tcc.flat.c -runscm_input_from_src libc/libc.flat.c -runscm_input_from_src test-fixtures/boot-hello.c hello.c - -runscm_export tcc1 tcc2 tcc3 s3-crt1.o s3-libc.a s3-libtcc1.a hello -runscm_run "${BOOT4_TIMEOUT:-5400}" - -# ── fixed-point check (host-side) ───────────────────────────────────── -if ! cmp -s "$OUT/tcc2" "$OUT/tcc3"; then - s2=$(wc -c <"$OUT/tcc2") - s3=$(wc -c <"$OUT/tcc3") - if [ "${TCC_BOOTSTRAP_RELAX_FIXEDPOINT:-0}" = 1 ]; then - echo "[$BOOT_TAG] WARN: tcc2 ($s2) != tcc3 ($s3); TCC_BOOTSTRAP_RELAX_FIXEDPOINT=1, accepting tcc3" >&2 - else - echo "[$BOOT_TAG] FIXED-POINT FAIL: tcc2 ($s2) != tcc3 ($s3)" >&2 - exit 1 - fi -fi - -# ── normalize output names (drop s3- prefix) ────────────────────────── -# tcc1 / tcc2 are kept on disk: the test path (tcc-cc / tcc-libc suites) -# uses them as stage-2 / stage-3 self-built tcc binaries. -mv "$OUT/s3-crt1.o" "$OUT/crt1.o" -mv "$OUT/s3-libc.a" "$OUT/libc.a" -mv "$OUT/s3-libtcc1.a" "$OUT/libtcc1.a" -chmod 0700 "$OUT/tcc1" "$OUT/tcc2" "$OUT/tcc3" "$OUT/hello" - -echo "[$BOOT_TAG] sizes: libtcc1.a=$(wc -c <"$OUT/libtcc1.a") libc.a=$(wc -c <"$OUT/libc.a") hello=$(wc -c <"$OUT/hello")" -echo "[$BOOT_TAG] OK -> $OUT/{tcc3, crt1.o, libc.a, libtcc1.a, hello} (fixed point: tcc2 == tcc3)" diff --git a/scripts/boot5-calibrate.sh b/scripts/boot5-calibrate.sh @@ -1,164 +0,0 @@ -#!/bin/sh -## boot5-calibrate.sh — produce vendor/upstream/musl-1.2.5-skip-$ARCH.txt -## -## NOT on the boot.sh path. Generates the per-arch calibration list -## boot5.sh uses to drop skip-on-fail logic from the container. Run -## this once per architecture when the patch set, calibration arch, or -## tcc version changes; commit the resulting file alongside the rest of -## the vendored musl artifacts. -## -## What it does: -## 1. Stage the same prerequisites boot5.sh stages (boot4/tcc3, -## libtcc1.a, vendored overrides + deletes, pre-generated headers, -## stdarg bridge). -## 2. Run a skip-on-fail compile loop in the container over every -## musl source. Whatever tcc 0.9.26 cannot compile gets recorded. -## 3. Copy the resulting skip list out to -## vendor/upstream/musl-1.2.5-skip-$ARCH.txt. -## -## boot5.sh then enumerates sources on the host and subtracts this -## list, emitting a flat sequential build script with no in-container -## branch on $TCC's exit code. -## -## Usage: scripts/boot5-calibrate.sh <amd64|aarch64|riscv64> - -set -eu - -usage() { echo "usage: $0 <amd64|aarch64|riscv64>" >&2; exit 2; } -[ "$#" -eq 1 ] || usage -ARCH=$1 - -case "$ARCH" in - amd64) PLATFORM=linux/amd64; MUSL_ARCH=x86_64 ;; - aarch64) PLATFORM=linux/arm64; MUSL_ARCH=aarch64 ;; - riscv64) PLATFORM=linux/riscv64; MUSL_ARCH=riscv64 ;; - *) usage ;; -esac - -ROOT=$(cd "$(dirname "$0")/.." && pwd) -cd "$ROOT" - -IMAGE=boot2-scratch:$ARCH -BOOT4=build/$ARCH/boot4 -STAGE=build/$ARCH/.boot5-calibrate -MUSL_TARBALL=vendor/upstream/musl-1.2.5.tar.gz -MUSL_OVERRIDES=vendor/upstream/musl-1.2.5-overrides -MUSL_DELETES=vendor/upstream/musl-1.2.5-deletes.txt -MUSL_GENERATED=vendor/upstream/musl-1.2.5-generated/$MUSL_ARCH -BRIDGE_FILE=build/$ARCH/vendor/tcc/stdarg-bridge.h -SKIP_OUT=vendor/upstream/musl-1.2.5-skip-$ARCH.txt - -[ -x "$BOOT4/tcc3" ] || { echo "missing $BOOT4/tcc3 (run scripts/boot4.sh $ARCH)" >&2; exit 1; } -[ -e "$BOOT4/libtcc1.a" ] || { echo "missing $BOOT4/libtcc1.a" >&2; exit 1; } -[ -e "$MUSL_TARBALL" ] || { echo "missing $MUSL_TARBALL" >&2; exit 1; } -[ -d "$MUSL_OVERRIDES" ] || { echo "missing $MUSL_OVERRIDES" >&2; exit 1; } -[ -e "$MUSL_DELETES" ] || { echo "missing $MUSL_DELETES" >&2; exit 1; } -[ -d "$MUSL_GENERATED" ] || { echo "missing $MUSL_GENERATED (run scripts/musl-vendor.sh)" >&2; exit 1; } -[ -e "$BRIDGE_FILE" ] || { echo "missing $BRIDGE_FILE (run scripts/stage1-flatten.sh)" >&2; exit 1; } - -if ! podman image exists "$IMAGE"; then - podman build --platform "$PLATFORM" -t "$IMAGE" \ - -f scripts/Containerfile.scratch scripts/ -fi - -rm -rf "$STAGE" -mkdir -p "$STAGE/in" "$STAGE/out" - -cp "$BOOT4/tcc3" "$STAGE/in/tcc" -cp "$BOOT4/libtcc1.a" "$STAGE/in/libtcc1.a" -tar xzf "$MUSL_TARBALL" -C "$STAGE/in/" -MUSL_DIR=$STAGE/in/musl-1.2.5 -cp -R "$MUSL_OVERRIDES/." "$MUSL_DIR/" -while read -r p; do - [ -n "$p" ] && rm -rf "$MUSL_DIR/$p" -done < "$MUSL_DELETES" -cp "$BRIDGE_FILE" "$STAGE/in/tcc-stdarg-bridge.h" -cp "$MUSL_GENERATED/alltypes.h" "$STAGE/in/musl-alltypes.h" -cp "$MUSL_GENERATED/syscall.h" "$STAGE/in/musl-syscall.h" - -echo "[calibrate $ARCH] running skip-on-fail compile loop in container" -podman run --rm -i --pull=never --platform "$PLATFORM" \ - --tmpfs /tmp:size=1024M \ - -e MUSL_ARCH="$MUSL_ARCH" \ - -v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \ - sh -eu -s <<'CONTAINER' -IN=/work/in -OUT=/work/out -TCC=$IN/tcc - -cd /tmp -cp -R "$IN/musl-1.2.5" . -cd musl-1.2.5 - -mkdir -p obj/include/bits obj/src/internal -cp $IN/musl-alltypes.h obj/include/bits/alltypes.h -cp $IN/musl-syscall.h obj/include/bits/syscall.h -echo '#define VERSION "1.2.5-tcc-boot5"' > obj/src/internal/version.h - -CFLAGS_BASE="-std=c99 -nostdinc -ffreestanding -fno-strict-aliasing - -D_XOPEN_SOURCE=700 - -I./arch/$MUSL_ARCH -I./arch/generic -Iobj/src/internal - -I./src/include -I./src/internal -Iobj/include -I./include - -O2 -fomit-frame-pointer - -Werror=implicit-function-declaration -Werror=implicit-int - -Werror=pointer-sign -Werror=pointer-arith" -CFLAGS_C="$CFLAGS_BASE -include $IN/tcc-stdarg-bridge.h" -CFLAGS_ASM="$CFLAGS_BASE" - -SRC_TOP="src/aio src/conf src/crypt src/ctype src/dirent - src/env src/errno src/exit src/fcntl src/fenv src/internal - src/ipc src/legacy src/linux src/locale src/malloc - src/malloc/mallocng src/math src/misc src/mman src/mq - src/multibyte src/network src/passwd src/prng src/process - src/regex src/sched src/search src/select src/setjmp src/signal - src/stat src/stdio src/stdlib src/string src/temp src/termios - src/thread src/time src/unistd" - -BASE_SRCS=""; ARCH_SRCS="" -for d in $SRC_TOP; do - [ -d "$d" ] || continue - for f in $d/*.c; do [ -f "$f" ] && BASE_SRCS="$BASE_SRCS $f"; done - for f in $d/$MUSL_ARCH/*.c $d/$MUSL_ARCH/*.s $d/$MUSL_ARCH/*.S; do - [ -f "$f" ] && ARCH_SRCS="$ARCH_SRCS $f" - done -done -REPLACED="" -for a in $ARCH_SRCS; do - p=${a%.*} - head=${p%%/${MUSL_ARCH}/*} - tail=${p#*/${MUSL_ARCH}/} - REPLACED="$REPLACED $head/$tail" -done -KEEP="" -for b in $BASE_SRCS; do - stem=${b%.c}; skip=0 - for r in $REPLACED; do [ "$stem" = "$r" ] && { skip=1; break; }; done - [ $skip -eq 0 ] && KEEP="$KEEP $b" -done -KEEP="$KEEP $ARCH_SRCS" - -mkdir -p obj/lib -n=0; n_ok=0; n_skip=0 -: >$OUT/skipped.txt -for src in $KEEP; do - obj="obj/${src%.*}.o" - mkdir -p "$(dirname $obj)" - case "$src" in - *.c) flags="$CFLAGS_C" ;; - *.s | *.S) flags="$CFLAGS_ASM" ;; - *) flags="$CFLAGS_C" ;; - esac - if $TCC $flags -c "$src" -o "$obj" >/tmp/compile.log 2>&1; then - n_ok=$((n_ok+1)) - else - n_skip=$((n_skip+1)) - echo "$src" >>$OUT/skipped.txt - fi - n=$((n+1)) - [ $((n % 200)) -eq 0 ] && echo " $n done (ok=$n_ok skip=$n_skip)" -done -echo " compiled=$n_ok skipped=$n_skip total=$n" -CONTAINER - -sort -u "$STAGE/out/skipped.txt" > "$SKIP_OUT" -echo "[calibrate $ARCH] wrote $SKIP_OUT ($(wc -l <"$SKIP_OUT") entries)" diff --git a/scripts/boot5-gen-runscm.sh b/scripts/boot5-gen-runscm.sh @@ -1,6 +1,6 @@ #!/bin/sh ## boot5-gen-runscm.sh — emit run.scm driving boot5's musl + hello build -## inside the seed kernel. Mirrors scripts/boot5.sh's podman-path script +## inside the seed kernel. Mirrors boot/boot5.sh's podman-path script ## generation step-for-step: per-source `tcc -c`, per-arch CRT, archive, ## link hello. Source enumeration done by boot5.sh; this script consumes ## the resulting build-srcs.txt and emits one `(run "in/tcc" …)` form per TU. diff --git a/scripts/boot5.sh b/scripts/boot5.sh @@ -1,170 +0,0 @@ -#!/bin/sh -## boot5.sh — build musl-1.2.5 with boot4 artifacts and link hello. -## -## Builds on top of boot4's verified-fixed-point tcc (tcc2 == tcc3) and -## demonstrates that the same compiler can produce a working static libc -## from upstream musl source — patched only as far as needed to work -## around tcc's missing GCC extensions (register-asm-variable syscalls, -## attribute(alias) weak refs, _Complex, x86_64 SSE/x87 inline asm). -## -## ─── Inputs ────────────────────────────────────────────────────────── -## build/$ARCH/$DRIVER/boot4/tcc3 — boot4's verified self-host tcc -## build/$ARCH/$DRIVER/boot4/libtcc1.a — boot4's tcc runtime archive -## build/$ARCH/$DRIVER/boot2/{catm, scheme1} -## build/$ARCH/src/src/musl/ — canonical musl tree (overrides -## merged, deletes applied, -## alltypes.h/syscall.h generated, -## per-arch skip filter applied) -## build/$ARCH/src/src/tcc/stdarg-bridge.h -## build/$ARCH/src/src/test-fixtures/boot-hello.c -## -## ─── Tools ──────────────────────────────────────────────────────────── -## scheme1 evaluates a host-generated run.scm (from boot5-gen-runscm.sh) -## against the flat staging root. -## -## ─── Outputs ───────────────────────────────────────────────────────── -## build/$ARCH/$DRIVER/boot5/libc.a -## build/$ARCH/$DRIVER/boot5/{crt1.o, crti.o, crtn.o} -## build/$ARCH/$DRIVER/boot5/hello — static, runs in the container -## -## Usage: scripts/boot5.sh <arch> -## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). - -set -eu - -. scripts/lib-arch.sh -bootlib_init boot5 "${1:-}" -driver_init empty -require_src - -BOOT2=build/$ARCH/$DRIVER/boot2 -BOOT4=build/$ARCH/$DRIVER/boot4 -SRC=build/$ARCH/src -MUSL_DIR=$SRC/src/musl - -# ── prerequisites ───────────────────────────────────────────────────── -require_prev "$BOOT4" tcc3 -require_prev "$BOOT2" catm scheme1 -require_file "$BOOT4/libtcc1.a" "run scripts/boot4.sh $ARCH" -require_file "$MUSL_DIR" "run scripts/prep-src.sh $ARCH and scripts/prep-musl.sh $ARCH" -require_file "$MUSL_DIR/skip.txt" "run scripts/prep-musl.sh $ARCH" -require_file "$SRC/src/tcc/stdarg-bridge.h" "run scripts/prep-src.sh $ARCH" - -# ── prepare staging dirs ────────────────────────────────────────────── -# $STAGE/in/ — read-only inputs (becomes /work/in or in/ in tmpfs) -# $STAGE/out/ — writable outputs (becomes /work/out or out/ in tmpfs) -# $STAGE/_host/ — host-side scratch (enumeration outputs); never -# visible to the container/kernel. -. scripts/lib-runscm.sh -runscm_init "$STAGE" "$OUT" -mkdir -p "$STAGE/_host" - -# ── enumerate musl sources from the canonical tree ──────────────────── -# Mirrors musl's Makefile rule: a per-arch override (under -# $d/$MUSL_ARCH/) replaces the same-stem base file (under $d/). The -# canonical tree already had the per-arch skip filter applied by -# prep-musl.sh, so no skip subtraction is needed here. -SRC_TOP="src/aio src/conf src/crypt src/ctype src/dirent - src/env src/errno src/exit src/fcntl src/fenv src/internal - src/ipc src/legacy src/linux src/locale src/malloc - src/malloc/mallocng src/math src/misc src/mman src/mq - src/multibyte src/network src/passwd src/prng src/process - src/regex src/sched src/search src/select src/setjmp src/signal - src/stat src/stdio src/stdlib src/string src/temp src/termios - src/thread src/time src/unistd" - -( - cd "$MUSL_DIR" - for d in $SRC_TOP; do - [ -d "$d" ] || continue - for f in $d/*.c; do [ -f "$f" ] && echo "$f"; done - done -) > "$STAGE/_host/base.txt" - -( - cd "$MUSL_DIR" - for d in $SRC_TOP; do - [ -d "$d/$MUSL_ARCH" ] || continue - for f in $d/$MUSL_ARCH/*.c $d/$MUSL_ARCH/*.s $d/$MUSL_ARCH/*.S; do - [ -f "$f" ] && echo "$f" - done - done -) > "$STAGE/_host/arch.txt" - -# REPLACED: bases that have arch-specific overrides (drop them from -# BASE). KEEP = (BASE - REPLACED) ∪ ARCH. -awk -v ARCH="$MUSL_ARCH" ' - { - sub(/\.[^.]*$/, "") # strip extension - slot = "/" ARCH "/" - i = index($0, slot) - head = substr($0, 1, i - 1) - tail = substr($0, i + length(slot)) - print head "/" tail - } -' "$STAGE/_host/arch.txt" | sort -u > "$STAGE/_host/replaced.txt" - -# Filter base by removing stems that appear in replaced. -awk -v REPF="$STAGE/_host/replaced.txt" ' - BEGIN { while ((getline l < REPF) > 0) rep[l] = 1 } - { - stem = $0 - sub(/\.c$/, "", stem) - if (!(stem in rep)) print - } -' "$STAGE/_host/base.txt" > "$STAGE/_host/keep_base.txt" - -cat "$STAGE/_host/keep_base.txt" "$STAGE/_host/arch.txt" | sort -u > "$STAGE/_host/build-srcs.txt" - -n_src=$(wc -l < "$STAGE/_host/build-srcs.txt") -n_skip=$(grep -cv '^[[:space:]]*\(#\|$\)' "$MUSL_DIR/skip.txt" || true) -echo "[$BOOT_TAG] keep=$n_src skip=$n_skip (calibrated)" - -# Record CRT mode (asm vs c) so the gen-runscm step picks the right -# crti/crtn source set without re-checking $MUSL_DIR. -if [ -f "$MUSL_DIR/crt/$MUSL_ARCH/crti.s" ]; then - echo asm > "$STAGE/_host/crt-mode" -else - echo c > "$STAGE/_host/crt-mode" -fi - -# Pre-create per-source obj/ directories under $STAGE/out/obj/musl/ so -# scheme1's (run "in/tcc" -c …) doesn't need to mkdir at runtime (tcc -# errors out if the parent dir is missing, and scheme1 has no mkdir -# primitive). -awk ' - { - sub(/\.[^.]*$/, "") - if (match($0, /\/[^\/]*$/)) print substr($0, 1, RSTART - 1) - } -' "$STAGE/_host/build-srcs.txt" | sort -u > "$STAGE/_host/build-objdirs.txt" -COBJ=$STAGE/out/obj/musl -mkdir -p "$COBJ/crt" -while read -r d; do mkdir -p "$COBJ/$d"; done < "$STAGE/_host/build-objdirs.txt" - -# ── generate run.scm and stage chain binaries ───────────────────────── -runscm_gen scripts/boot5-gen-runscm.sh "$MUSL_ARCH" "$STAGE/_host" - -runscm_scheme1 "$BOOT2/scheme1" -runscm_prelude "$SRC/src/scheme1/prelude.scm" - -# Chain binaries staged at flat in/ root (cwd-relative names in run.scm). -runscm_input tcc "$BOOT4/tcc3" -runscm_input libtcc1.a "$BOOT4/libtcc1.a" -runscm_input catm "$BOOT2/catm" -runscm_input_from_src tcc/stdarg-bridge.h tcc-stdarg-bridge.h -runscm_input_from_src test-fixtures/boot-hello.c hello.c - -# Stage the canonical musl tree under in/musl/. Both drivers pick it -# up automatically (podman bind-mounts $STAGE/in; seed packs -# `find in -type f` into the cpio). -runscm_input_tree_from_src musl musl - -runscm_export libc.a crt1.o crti.o crtn.o hello - -# boot5 has ~1300 spawns + heavy tcc work; bump qemu memory + timeout for -# the seed driver. Podman ignores QEMU_MEM and uses host memory directly. -QEMU_MEM=${QEMU_MEM:-3072M} runscm_run "${BOOT5_TIMEOUT:-7200}" - -echo "[$BOOT_TAG] sizes: libc.a=$(wc -c <"$OUT/libc.a") hello=$(wc -c <"$OUT/hello")" -echo "[$BOOT_TAG] OK -> $OUT/{libc.a, crt1.o, crti.o, crtn.o, hello}" diff --git a/scripts/boot6.sh b/scripts/boot6.sh @@ -1,69 +0,0 @@ -#!/bin/sh -## boot6.sh — build the seed-kernel ELF/Image with boot4's tcc3. -## -## Drives tcc3 to compile + link the seed kernel directly: no `ld -T -## kernel.lds`, no objcopy. aarch64 emits the flat Image QEMU expects; -## amd64/riscv64 emit the ELF consumed by QEMU's -kernel path. -## -## ─── Inputs (sources, from canonical tree) ─────────────────────────── -## build/$ARCH/src/src/kernel/arch/$ARCH/{kernel.S, mmu.c, arch.h} -## build/$ARCH/src/src/kernel/kernel.c -## build/$ARCH/src/src/tcc-cc/mem.c -## -## ─── Inputs (binaries from prior stages) ────────────────────────────── -## build/$ARCH/$DRIVER/boot4/tcc3 -## build/$ARCH/$DRIVER/boot2/scheme1 -## -## ─── Tools ──────────────────────────────────────────────────────────── -## scheme1 evaluates a host-generated run.scm (from boot6-gen-runscm.sh) -## against the flat staging root. -## -## ─── Outputs ───────────────────────────────────────────────────────── -## build/$ARCH/$DRIVER/boot6/$KERNEL_NAME -## aarch64: Image — flat boot Image, byte-format identical to the gcc -## Makefile's `objcopy -O binary` output. QEMU's `-kernel` -## detects `ARM\x64` magic at file offset 0x38 and follows -## the arm64 boot protocol, putting DTB phys in x0 before -## jumping to _start. -## amd64/riscv64: kernel.elf — ELF consumed via QEMU's -kernel path. -## -## Usage: scripts/boot6.sh <arch> -## <arch> ∈ {aarch64, amd64, riscv64} for either DRIVER (default podman). - -set -eu - -. scripts/lib-arch.sh -bootlib_init boot6 "${1:-}" -driver_init empty -require_src - -BOOT2=build/$ARCH/$DRIVER/boot2 -BOOT4=build/$ARCH/$DRIVER/boot4 -SRC=build/$ARCH/src - -# ── prerequisites ───────────────────────────────────────────────────── -require_prev "$BOOT4" tcc3 -require_prev "$BOOT2" scheme1 -for f in kernel/arch/$ARCH/kernel.S kernel/arch/$ARCH/mmu.c kernel/arch/$ARCH/arch.h kernel/kernel.c tcc-cc/mem.c; do - require_file "$SRC/src/$f" -done - -# ── stage inputs and run scheme1 + run.scm under $DRIVER ────────────── -. scripts/lib-runscm.sh -runscm_init "$STAGE" "$OUT" -runscm_gen scripts/boot6-gen-runscm.sh "$ARCH" - -runscm_scheme1 "$BOOT2/scheme1" -runscm_prelude "$SRC/src/scheme1/prelude.scm" - -runscm_input tcc3 "$BOOT4/tcc3" -runscm_input_from_src "kernel/arch/$ARCH/kernel.S" -runscm_input_from_src kernel/kernel.c -runscm_input_from_src "kernel/arch/$ARCH/arch.h" -runscm_input_from_src "kernel/arch/$ARCH/mmu.c" -runscm_input_from_src tcc-cc/mem.c - -runscm_export "$KERNEL_NAME" -runscm_run "${BOOT6_TIMEOUT:-1200}" - -echo "[$BOOT_TAG] OK -> $OUT/$KERNEL_NAME ($(wc -c <"$OUT/$KERNEL_NAME") bytes)" diff --git a/scripts/build-tcc-gcc.sh b/scripts/build-tcc-gcc.sh @@ -1,42 +0,0 @@ -#!/bin/sh -## build-tcc-gcc.sh — link tcc.flat.c + libc.flat.c with stock gcc. -## -## Sanity-check sibling of the cc.scm path. Inputs are the *same* -## flatten outputs the cc.scm pipeline consumes; harness sources -## (tcc-gcc/<arch>/{start.S,sys_stubs.c}) provide a minimal _start and -## syscall stubs so we don't need musl's crt0 or its libc. If -## tcc-gcc -version works and our cc.scm-built tcc-boot2 doesn't, the -## bug is downstream of the C source. -## -## Runs inside the boot2-alpine-gcc:<arch> image. -nodefaultlibs (not -## -nostdlib) keeps libgcc available for long-double soft-float -## helpers (__addtf3 etc., needed on aarch64 musl). -## -## Env: ARCH=aarch64 (only arch wired today; start.S is per-arch) -## Usage: build-tcc-gcc.sh <out> <tcc.flat.c> <libc.flat.c> - -set -eu - -: "${ARCH:?ARCH must be set}" -[ "$#" -eq 3 ] || { echo "usage: ARCH=<arch> $0 <out> <tcc.flat.c> <libc.flat.c>" >&2; exit 2; } - -OUT=$1 -TCC_FLAT=$2 -LIBC_FLAT=$3 - -HARNESS=tcc-gcc/$ARCH -[ -d "$HARNESS" ] || { echo "no harness for ARCH=$ARCH at $HARNESS" >&2; exit 1; } - -mkdir -p "$(dirname "$OUT")" - -gcc -static -nodefaultlibs -nostartfiles -fno-stack-protector \ - -fno-builtin \ - -Wno-implicit-function-declaration \ - -Wno-builtin-declaration-mismatch \ - -Wno-incompatible-pointer-types \ - -Wno-int-conversion \ - -e _start \ - "$HARNESS/start.S" "$HARNESS/sys_stubs.c" \ - tcc-cc/mem.c \ - "$TCC_FLAT" "$LIBC_FLAT" \ - -lgcc -o "$OUT" diff --git a/scripts/count-lines.sh b/scripts/count-lines.sh @@ -1,32 +0,0 @@ -#!/bin/sh -## count-lines.sh — line counts for the core sources. -## -## Skips ZERO/comment/blank lines per the existing cloc convention. -## Pass file paths as arguments, or pass none to read from stdin -## (one path per line). Prints `<count> <path>` per file plus a -## trailing total. -## -## Usage: -## sh scripts/count-lines.sh file1 file2 … -## printf '%s\n' file1 file2 | sh scripts/count-lines.sh - -set -eu - -if [ "$#" -gt 0 ]; then - FILES="$*" -else - FILES=$(cat) -fi - -total=0 -for f in $FILES; do - [ -e "$f" ] || { echo "count-lines: missing $f" >&2; exit 1; } - n=$(grep -v "^ZERO.*" "$f" \ - | grep -v "^[[:space:]]*#.*" \ - | grep -v "^[[:space:]]*;.*" \ - | grep -v "^$" \ - | wc -l) - printf '%6d %s\n' "$n" "$f" - total=$((total + n)) -done -printf '%6d total\n' "$total" diff --git a/scripts/diag-livebootstrap-qemu.sh b/scripts/diag-livebootstrap-qemu.sh @@ -1,193 +0,0 @@ -#!/bin/sh -## scripts/diag-livebootstrap-qemu.sh — DIAGNOSTIC ONLY -## -## Runs live-bootstrap's stage0 → tcc-0.9.27 chain inside a busybox:musl -## container under linux/amd64 QEMU emulation, to determine whether the -## tcc-boot0-mes startup SEGV (Issue §3 in docs/TCC.md) is QEMU's fault -## or specific to our build. -## -## **This script is diagnostic, not part of the bootstrap chain.** It -## intentionally invokes live-bootstrap's M2-Planet / Mes / MesCC path -## — the very chain our project replaces. Nothing it produces feeds -## into the project's deliverables. Read result, then ignore. -## -## Outcome interpretation: -## - chain reaches tcc-0.9.27 and `tcc -version` works: -## QEMU is sound. Our tcc-boot0-mes SEGV is build-specific -## (codegen/runtime bug). Action: backport tcc 0.9.28rc fixes -## or compare disasm against live-bootstrap's tcc-boot0. -## - chain SEGVs at tcc-mes / tcc-boot0 / tcc-0.9.27: -## QEMU is broken for these binaries on macOS arm64. Action: -## either run on native x86_64 hardware, or use the linux/386 -## (32-bit) path with QEMU and accept the arch mismatch. -## -## Setup (host): -## - LIVE_BOOTSTRAP=<path>: required env var pointing at a working -## live-bootstrap checkout. Distfiles are taken from -## $LIVE_BOOTSTRAP/../lb-work/distfiles (the sibling layout -## live-bootstrap's own scripts produce). The diagnostic also -## needs tcc-0.9.27.tar.bz2; this script fetches it via curl on -## first run if missing. Stage0-posix tools (M2-Planet, -## mescc-tools, etc.) are bundled in -## $LIVE_BOOTSTRAP/seed/stage0-posix and don't need distfiles. -## -## The script has no implicit `../live-bootstrap` lookup — the rest -## of the build is in-tree, and this diagnostic is the sole script -## that needs the upstream tree, so make the dependency explicit. -## -## Pipeline: -## 1. (host) populate distfiles (curl tcc-0.9.27 if needed) -## 2. (host) assemble rootfs at build/diag-livebootstrap/rootfs/ -## - copy seed/stage0-posix/* (the hex0 binaries + M2-Planet -## + mescc-tools sources) to / -## - copy seed/{after,seed,preseeded}.kaem to / -## - copy steps/, lib/ to / -## - copy distfiles to /distfiles -## - write /steps/bootstrap.cfg with ARCH=amd64, CHROOT=True -## - truncate /steps/manifest after `build: tcc-0.9.27` so the -## chain stops at our target instead of running 200+ builds -## 3. (busybox:musl, linux/amd64) chroot into rootfs and run -## /bootstrap-seeds/POSIX/AMD64/kaem-optional-seed -## -## Runtime: many hours under QEMU emulation on macOS arm64. Set aside a -## work block. Set DIAG_PREP_ONLY=1 to do steps 1-2 and skip the run. -## -## Usage: -## scripts/diag-livebootstrap-qemu.sh - -set -eu - -ROOT=$(cd "$(dirname "$0")/.." && pwd) -: "${LIVE_BOOTSTRAP:?set LIVE_BOOTSTRAP=<path-to-live-bootstrap-checkout>}" -LB=$(cd "$LIVE_BOOTSTRAP" && pwd) -DISTFILES=${LB_DISTFILES:-$LB/../lb-work/distfiles} -WORK=$ROOT/build/diag-livebootstrap -ROOTFS=$WORK/rootfs - -[ -d "$LB" ] || { echo "missing live-bootstrap at $LB" >&2; exit 1; } -[ -d "$DISTFILES" ] || { echo "missing distfiles at $DISTFILES (override with LB_DISTFILES=<path>)" >&2; exit 1; } -command -v podman >/dev/null 2>&1 || { echo "podman required" >&2; exit 2; } - -# --- (1) ensure distfiles populated ---------------------------------- -NEED="mes-0.27.1.tar.gz tcc-0.9.26.tar.gz tcc-0.9.27.tar.bz2 nyacc-1.00.2-lb1.tar.gz" -for f in $NEED; do - if [ ! -r "$DISTFILES/$f" ]; then - echo "missing $DISTFILES/$f" - case "$f" in - tcc-0.9.27.tar.bz2) - echo "fetching from savannah..." - curl --fail --location \ - "https://download.savannah.gnu.org/releases/tinycc/$f" \ - -o "$DISTFILES/$f" - ;; - *) - echo " (cannot auto-fetch $f — please populate $DISTFILES/$f)" >&2 - exit 1 - ;; - esac - fi -done -echo "distfiles ok: $NEED" - -# --- (2) assemble rootfs -------------------------------------------- -echo "=== assembling rootfs at $ROOTFS ===" -rm -rf "$WORK" -mkdir -p "$ROOTFS" - -# seed/stage0-posix → / (stage0 tools, M2-Planet, mescc-tools, kaem etc) -cp -R "$LB/seed/stage0-posix/." "$ROOTFS/" - -# seed/*.kaem → / (preseeded.kaem, seed.kaem, after.kaem) -cp "$LB/seed/"*.kaem "$ROOTFS/" - -# Other seed files (configurator binaries, checksums) -cp "$LB/seed/configurator.c" "$LB/seed/configurator.amd64.checksums" "$ROOTFS/" 2>/dev/null || true -cp "$LB/seed/script-generator.c" "$LB/seed/script-generator.amd64.checksums" "$ROOTFS/" 2>/dev/null || true - -# steps/, lib/ from live-bootstrap -cp -R "$LB/steps" "$ROOTFS/" -cp -R "$LB/lib" "$ROOTFS/" - -# distfiles → /external/distfiles (live-bootstrap's steps/env sets -# DISTFILES=/external/distfiles, and helpers/build steps read from there) -mkdir -p "$ROOTFS/external/distfiles" -for f in $NEED; do - cp "$DISTFILES/$f" "$ROOTFS/external/distfiles/" -done - -# Truncate manifest to stop after the first `build: tcc-0.9.27`. The -# manifest has a header comment block (lines 1-33), then build steps -# starting at line 34. Line 38 is the first tcc-0.9.27 build. -awk ' - /^build: tcc-0\.9\.27/ && !seen_tcc27 { - print - seen_tcc27 = 1 - next - } - seen_tcc27 { - # drop everything after first tcc-0.9.27 build - next - } - { print } -' "$LB/steps/manifest" > "$ROOTFS/steps/manifest" - -# bootstrap.cfg — mirrors what rootfs.py would write for -# `--arch amd64 --chroot --mirrors file:///distfiles`. We disable -# every optional pipeline (kernels, configurator, fiwix) since this -# is a pass1-only diagnostic. -cat > "$ROOTFS/steps/bootstrap.cfg" <<'EOF' -ARCH=amd64 -ARCH_DIR=AMD64 -FORCE_TIMESTAMPS=False -CHROOT=True -UPDATE_CHECKSUMS=False -JOBS=2 -SWAP_SIZE=0 -FINAL_JOBS=2 -INTERNAL_CI=False -INTERACTIVE=False -QEMU=False -BARE_METAL=False -DISK=sda1 -KERNEL_BOOTSTRAP=False -BUILD_KERNELS=False -CONFIGURATOR=False -MIRRORS_LEN=0 -EOF - -echo "rootfs assembled." -du -sh "$ROOTFS" 2>/dev/null || true - -if [ "${DIAG_PREP_ONLY:-0}" = "1" ]; then - echo "DIAG_PREP_ONLY=1 — skipping container run." - exit 0 -fi - -# --- (3) run kaem-optional-seed in busybox:musl under linux/amd64 ---- -echo -echo "=== launching kaem-optional-seed via busybox:musl (linux/amd64 QEMU) ===" -echo " long-running. ctrl-C aborts. log lines stream below." -echo - -# busybox:musl ships chroot, sh, tar, awk, etc. — sufficient. -# /proc /dev /sys are mounted by podman; chroot inherits them via bind. -# We mount the rootfs as /work/rootfs inside the container, then chroot. -podman run --rm -i --platform linux/amd64 \ - -v "$ROOTFS":/rootfs \ - docker.io/library/busybox:musl sh -s <<'CONTAINER_SCRIPT' -set -eu - -# Ensure /proc /dev /sys exist inside the chroot for kaem etc. -mkdir -p /rootfs/proc /rootfs/dev /rootfs/sys /rootfs/tmp -mount -t proc proc /rootfs/proc 2>/dev/null || true -mount --rbind /dev /rootfs/dev 2>/dev/null || true -mount --rbind /sys /rootfs/sys 2>/dev/null || true - -echo "--- starting chroot kaem ---" -exec env -i PATH=/bin chroot /rootfs /bootstrap-seeds/POSIX/AMD64/kaem-optional-seed -CONTAINER_SCRIPT - -rc=$? -echo -echo "=== kaem exit=$rc ===" -exit "$rc" diff --git a/scripts/disasm-elf.sh b/scripts/disasm-elf.sh @@ -1,131 +0,0 @@ -#!/bin/sh -## disasm-elf.sh — disassemble a hex2pp-emitted ELF with llvm-objdump. -## -## Our seed ELF.hex2 sets ph_memsz to 512 MB (so the BSS region past -## ELF_end is mappable), but ph_filesz is just the on-disk size. -## llvm-objdump trusts memsz when laying out the segment for -## disassembly and runs off the end of the file with -## "The end of the file was unexpectedly encountered". The seed ELF -## also lacks section headers, so --start-address/--stop-address -## doesn't help on its own. -## -## Workaround: copy the ELF, patch ph_memsz down to ph_filesz, then -## disassemble. Output goes to stdout. -## -## We also auto-default --start-address to e_entry so the ELF header + -## program header bytes at the top of PT_LOAD aren't decoded as bogus -## instructions. Pass an explicit --start-address (e.g. 0x600000) to -## override and see the header bytes. -## -## boot-build-p1*.sh writes a one-line sidecar at <elf>.workdir pointing -## at build/$ARCH/.work/<src-without-ext>/. P1pp builds store -## expanded.hex2pp there; legacy raw-P1 seed builds store prog.hex2. -## When that sidecar is present we extract a label map via -## scripts/m1-symbols.py and: -## - default --stop-address to :_text_end if that sentinel label is -## present, so trailing rodata doesn't decode as bogus instructions -## - inject "<label>:" headers and rewrite "<PT_LOAD#0+0xNNN>" xrefs -## in the disasm output -## Pass NO_LABELS=1 to disable both behaviors. -## -## Usage: disasm-elf.sh <elf> [llvm-objdump args...] -## defaults to `-d` (text only). For data + text, pass `-D`. - -set -eu - -[ "$#" -ge 1 ] || { echo "usage: $0 <elf> [llvm-objdump args...]" >&2; exit 2; } - -ELF=$1; shift -[ -e "$ELF" ] || { echo "missing $ELF" >&2; exit 1; } - -OBJDUMP=${LLVM_OBJDUMP:-llvm-objdump} -TRIPLE=${TRIPLE:-aarch64-linux-gnu} - -# ELF fields we read (little-endian 8-byte): -# e_entry at file offset 0x18 -# ph_filesz at file offset 0x60 (e_phoff 0x40 + 0x20) -# ph_memsz at file offset 0x68 (e_phoff 0x40 + 0x28) -# Single-program-header layout, per our seed ELF. -read_le8() { - od -An -tu8 -N8 -j"$2" "$1" | tr -d ' \n' -} -write_le8() { - # $1 file, $2 offset, $3 value - printf '%016x' "$3" \ - | sed 's/\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)/\8\7\6\5\4\3\2\1/' \ - | xxd -r -p \ - | dd of="$1" bs=1 seek="$2" count=8 conv=notrunc status=none -} - -ENTRY=$(read_le8 "$ELF" 24) -FILESZ=$(read_le8 "$ELF" 96) -MEMSZ=$(read_le8 "$ELF" 104) - -TMP=$(mktemp -t disasm-elf.XXXXXX) -trap 'rm -f "$TMP"' EXIT -cp "$ELF" "$TMP" -chmod u+w "$TMP" - -if [ "$MEMSZ" != "$FILESZ" ]; then - write_le8 "$TMP" 104 "$FILESZ" -fi - -# Default to -d if no objdump flags given. -[ "$#" -eq 0 ] && set -- -d - -# Auto-skip the ELF header + program header by defaulting -# --start-address to e_entry, unless the user supplied their own. -have_start=0 -have_stop=0 -for arg in "$@"; do - case "$arg" in - --start-address=*|--start-address) have_start=1;; - --stop-address=*|--stop-address) have_stop=1;; - esac -done -if [ "$have_start" -eq 0 ]; then - set -- "--start-address=0x$(printf '%x' "$ENTRY")" "$@" -fi - -# Locate expanded.hex2pp (new P1pp path) or prog.hex2 (legacy raw-P1 -# path) via the <elf>.workdir sidecar produced by boot-build-p1*.sh. -# The sidecar holds a repo-relative path (build/$ARCH/.work/<src>/), so -# resolve it against the repo root inferred from this script's location. -HERE=$(dirname "$0") -REPO_ROOT=$(cd "$HERE/.." && pwd) -HEX2="" -if [ -e "$ELF.workdir" ]; then - workdir=$(cat "$ELF.workdir") - case "$workdir" in - /*) ;; # absolute, leave alone - *) workdir="$REPO_ROOT/$workdir" ;; - esac - if [ -e "$workdir/expanded.hex2pp" ]; then - HEX2="$workdir/expanded.hex2pp" - elif [ -e "$workdir/prog.hex2" ]; then - HEX2="$workdir/prog.hex2" - else - echo "disasm-elf: $ELF.workdir -> $workdir, but no expanded.hex2pp or prog.hex2 there" >&2 - fi -elif [ "${NO_LABELS:-0}" != "1" ]; then - echo "disasm-elf: no $ELF.workdir sidecar; rebuild for label annotation" >&2 -fi -MAP="" -if [ "${NO_LABELS:-0}" != "1" ] && [ -n "$HEX2" ]; then - MAP=$(mktemp -t disasm-elf-map.XXXXXX) - trap 'rm -f "$TMP" "$MAP"' EXIT - "$HERE/m1-symbols.py" map "$HEX2" > "$MAP" - # Default --stop-address to :_text_end if no user value and the - # sentinel exists in the map. - if [ "$have_stop" -eq 0 ]; then - text_end=$(awk '$2 == "_text_end" {print $1; exit}' "$MAP") - [ -n "$text_end" ] && set -- "--stop-address=$text_end" "$@" - fi -fi - -if [ -n "$MAP" ]; then - "$OBJDUMP" --triple="$TRIPLE" "$@" "$TMP" \ - | "$HERE/m1-symbols.py" annotate "$MAP" -else - exec "$OBJDUMP" --triple="$TRIPLE" "$@" "$TMP" -fi diff --git a/scripts/lib-arch.sh b/scripts/lib-arch.sh @@ -1,177 +0,0 @@ -# lib-arch.sh — single source for arch + driver setup shared by -# scripts/boot.sh, scripts/boot{0..6}.sh, lib-pipeline.sh, lib-runscm.sh. -# -# Public entry points (call in this order from a bootN.sh): -# -# bootlib_init <stage> <arch> # validate <arch>, cd to repo root, -# # set ARCH/PLATFORM/KERNEL_NAME/ -# # MUSL_ARCH/DRIVER/BOOT_STAGE/BOOT_TAG. -# driver_init [<image-kind>] # set OUT/STAGE; podman: build IMAGE -# # if missing (image-kind ∈ scratch| -# # empty; default scratch); seed: -# # verify boot6 kernel exists. -# require_src # die if build/$ARCH/src/ missing. -# require_prev <dir> <name>... # die if any <dir>/<name> is -# # missing or non-executable. -# require_file <path> [<hint>] # die if <path> missing; print a -# # uniform diagnostic with hint. -# -# After bootlib_init, the following shell vars are set/exported: -# ARCH input architecture token (aarch64|amd64|riscv64) -# ROOT repo root (cwd is set to ROOT) -# DRIVER podman|seed (defaults to podman) -# PLATFORM linux/<arm64|amd64|riscv64> for podman --platform -# KERNEL_NAME Image (aarch64) | kernel.elf (amd64,riscv64) -# MUSL_ARCH aarch64 | x86_64 | riscv64 -# BOOT_TAG "<stage>/<driver>/<arch>" for log prefixes -# BOOT_STAGE stage name as passed in (boot0|boot1|...) -# -# After driver_init (boot stages only — prep-* skip it): -# OUT build/$ARCH/$DRIVER/$BOOT_STAGE (stage output dir) -# STAGE build/$ARCH/$DRIVER/.$BOOT_STAGE-stage (scratch staging dir) -# podman: IMAGE -# seed: KERNEL_IMAGE, EXTRACT, SEED_ARCH - -bootlib_init() { - _stage=$1; _arch=${2:-} - [ -n "$_stage" ] || { echo "lib-arch: bootlib_init: stage required" >&2; exit 2; } - case "$_arch" in - aarch64|amd64|riscv64) ;; - *) echo "usage: $0 <aarch64|amd64|riscv64>" >&2; exit 2 ;; - esac - ARCH=$_arch - ROOT=$(cd "$(dirname "$0")/.." && pwd) - cd "$ROOT" - DRIVER=${DRIVER:-podman} - case "$DRIVER" in - podman|seed) ;; - *) echo "[$_stage/$DRIVER/$ARCH] unknown DRIVER=$DRIVER (expected podman|seed)" >&2; exit 2 ;; - esac - BOOT_STAGE=$_stage - BOOT_TAG="$_stage/$DRIVER/$ARCH" - BOOT_T0=$(date +%s) - case "$ARCH" in - aarch64) PLATFORM=linux/arm64; KERNEL_NAME=Image; MUSL_ARCH=aarch64 ;; - amd64) PLATFORM=linux/amd64; KERNEL_NAME=kernel.elf; MUSL_ARCH=x86_64 ;; - riscv64) PLATFORM=linux/riscv64; KERNEL_NAME=kernel.elf; MUSL_ARCH=riscv64 ;; - esac - export ARCH ROOT DRIVER PLATFORM KERNEL_NAME MUSL_ARCH BOOT_TAG BOOT_STAGE BOOT_T0 - trap _bootlib_finish EXIT -} - -# _bootlib_finish — EXIT trap installed by bootlib_init. Prints -# `[$BOOT_TAG] done in Xs (cum Ys)` (or `failed after Xs` on error). -# On success, records the elapsed time so later stages can sum the -# chain. Cumulative = sum of all per-stage .timing files relevant to -# the current $ARCH/$DRIVER. -_bootlib_finish() { - _exit=$? - [ -n "${BOOT_T0:-}" ] || return 0 - _elapsed=$(( $(date +%s) - BOOT_T0 )) - if [ "$_exit" != 0 ]; then - echo "[$BOOT_TAG] failed after ${_elapsed}s (exit=$_exit)" >&2 - return 0 - fi - # Record this stage's time. Boot stages have OUT (set by driver_init); - # the orchestrator (BOOT_STAGE=boot) doesn't write — its time would - # double-count. Other stages without OUT (prep-src, prep-musl) write - # to a per-arch sidecar dir. - if [ "$BOOT_STAGE" != boot ]; then - if [ -n "${OUT:-}" ] && [ -d "$OUT" ]; then - echo "$_elapsed" > "$OUT/.timing" - elif [ -d "build/$ARCH" ]; then - mkdir -p "build/$ARCH/.timings" - echo "$_elapsed" > "build/$ARCH/.timings/$BOOT_STAGE" - fi - fi - # Cumulative: sum boot-stage timings for this driver + driver- - # independent prep timings. Glob may not match — guard each path. - _cum=0 - for _f in \ - "build/$ARCH/$DRIVER"/*/.timing \ - "build/$ARCH/.timings"/* - do - [ -f "$_f" ] || continue - _v=$(cat "$_f" 2>/dev/null) || continue - case "$_v" in *[!0-9]*|'') continue ;; esac - _cum=$((_cum + _v)) - done - echo "[$BOOT_TAG] done in ${_elapsed}s (cum ${_cum}s)" -} - -driver_init() { - _image_kind=${1:-scratch} - case "$_image_kind" in - scratch|empty) ;; - *) echo "[$BOOT_TAG] driver_init: image-kind must be scratch|empty (got $_image_kind)" >&2; exit 2 ;; - esac - OUT=build/$ARCH/$DRIVER/$BOOT_STAGE - STAGE=build/$ARCH/$DRIVER/.$BOOT_STAGE-stage - export OUT STAGE - case "$DRIVER" in - podman) - IMAGE=boot2-$_image_kind:$ARCH - if ! podman image exists "$IMAGE"; then - echo "[$BOOT_TAG] building $IMAGE" - # Containerfile.empty drops /etc resolver state etc.; no-cache - # avoids a stale layer surviving an upstream tag bump. - _no_cache= - [ "$_image_kind" = empty ] && _no_cache=--no-cache - podman build $_no_cache --platform "$PLATFORM" -t "$IMAGE" \ - -f scripts/Containerfile.$_image_kind scripts/ - fi - export IMAGE - ;; - seed) - # DRIVER=seed always consumes the podman-built boot6 kernel — - # tcc3 is platform-agnostic but we settled on a single canonical - # build location to reduce surface area. See docs/PLAN.md A3. - KERNEL_IMAGE=$ROOT/build/$ARCH/podman/boot6/$KERNEL_NAME - EXTRACT=$ROOT/seed-kernel/scripts/extract-blk.sh - [ -f "$KERNEL_IMAGE" ] || { - echo "[$BOOT_TAG] missing $KERNEL_IMAGE — run ./scripts/boot.sh $ARCH (default DRIVER=podman) first" >&2 - exit 1 - } - export KERNEL_IMAGE EXTRACT - export SEED_ARCH=$ARCH - ;; - esac -} - -require_prev() { - _dir=$1; shift - for _n in "$@"; do - [ -x "$_dir/$_n" ] || { - _stage_name=$(basename "$_dir") - case "$_stage_name" in - boot*) _hint="run scripts/$_stage_name.sh $ARCH" ;; - *) _hint="rebuild $_dir" ;; - esac - echo "[$BOOT_TAG] missing prerequisite: $_dir/$_n ($_hint)" >&2 - exit 1 - } - done -} - -# require_src — assert build/$ARCH/src/ exists (the canonical generated -# source tree built by scripts/prep-src.sh). Every bootN.sh needs it. -require_src() { - [ -d "build/$ARCH/src" ] || { - echo "[$BOOT_TAG] missing build/$ARCH/src — run scripts/prep-src.sh $ARCH" >&2 - exit 1 - } -} - -# require_file <path> [<hint>] — assert <path> exists; print a uniform -# "[$BOOT_TAG] missing <path> — <hint>" diagnostic on failure. -require_file() { - _path=$1; _hint=${2:-} - [ -e "$_path" ] || { - if [ -n "$_hint" ]; then - echo "[$BOOT_TAG] missing $_path — $_hint" >&2 - else - echo "[$BOOT_TAG] missing $_path" >&2 - fi - exit 1 - } -} diff --git a/scripts/lib-pipeline.sh b/scripts/lib-pipeline.sh @@ -1,306 +0,0 @@ -# lib-pipeline.sh — driver-agnostic DSL for boot stage pipelines. -# -# A bootN.sh's "wiring" is a sequence of file→file program invocations -# in a flat namespace. This library exposes that as four primitives so -# the same wiring can run under different transports: -# -# podman — accumulate stages into one /work/run.sh, run once in a -# container against $IMAGE / $PLATFORM (env-set by caller). -# seed — run each stage as one qemu boot of seed-kernel via -# tier1-gate.sh's pattern (cpio /init + in/<inputs> on -# virtio-blk hd0, output dumped to virtio-blk hd1 as SEEDFS, -# extract). aarch64 only. -# -# Both drivers respect the `in/`+`out/` convention: inputs read from -# `in/<name>`, outputs written to `out/<name>`. The stage primitive -# rewrites argv tokens that match input/output names with the -# appropriate prefix; bare flag/literal tokens pass through untouched. -# -# DSL (source as `. scripts/lib-pipeline.sh`): -# -# pipeline_init <staging-dir> <out-dir> <driver> -# pipeline_input <name> <host-path> # repeatable -# pipeline_input_from_src <subpath> [<name>] # from build/$ARCH/src/src/ -# stage <bin> <argv...> -- <inputs...> -- <outputs...> -# pipeline_export <name>... # one or more -# pipeline_run -# -# `stage` semantics: invoke `<bin>` with argv=[<bin>, <argv1>, ...]; the -# stage reads the listed input names and produces the listed output -# names. <bin> is also a name in the flat namespace — typically a -# pipeline_input, but may be the output of an earlier stage. -# -# Required env for podman driver: PLATFORM, IMAGE. -# Required env for seed driver: KERNEL_IMAGE, EXTRACT. - -P_DRIVER= -P_STAGE_DIR= -P_OUT_DIR= -P_SCRIPT= -P_IDX=0 -P_EXPORTS= -P_INPUT_NAMES= -P_PRODUCED_NAMES= - -pipeline_init() { - P_STAGE_DIR=$1; P_OUT_DIR=$2; P_DRIVER=$3 - rm -rf "$P_STAGE_DIR" - mkdir -p "$P_STAGE_DIR/in" "$P_STAGE_DIR/out" "$P_OUT_DIR" - P_IDX=0 - P_EXPORTS= - P_INPUT_NAMES= - P_PRODUCED_NAMES= - case "$P_DRIVER" in - podman) - P_SCRIPT=$P_STAGE_DIR/run.sh - { - echo '#!/bin/sh' - echo 'set -eu' - # Stage everything in /tmp (RAM tmpfs) — the seed-stage tools - # do one syscall per byte, virtiofs round-trips would dominate. - # Mirror the in/ + out/ split so argv references resolve. - echo 'mkdir -p /tmp/in /tmp/out' - echo 'cp /work/in/* /tmp/in/' - echo 'cd /tmp' - } > "$P_SCRIPT" - ;; - seed) - mkdir -p "$P_STAGE_DIR/work" - : "${KERNEL_IMAGE:?lib-pipeline:seed: KERNEL_IMAGE not set}" - : "${EXTRACT:?lib-pipeline:seed: EXTRACT not set}" - ;; - *) - echo "lib-pipeline: unknown driver '$P_DRIVER'" >&2; exit 2 ;; - esac -} - -pipeline_input() { - name=$1; src=$2 - cp "$src" "$P_STAGE_DIR/in/$name" - if [ "$P_DRIVER" = "seed" ]; then - cp "$src" "$P_STAGE_DIR/work/$name" - fi - P_INPUT_NAMES="$P_INPUT_NAMES $name" -} - -# pipeline_input_from_src <subpath> [<name>] -# Pull a file from the canonical generated source tree at -# build/$ARCH/src/src/<subpath>. Stages it under in/<name>; -# <name> defaults to basename(subpath). For the rare `bin/` case -# (the seed hex0-seed binary), call pipeline_input directly with -# build/$ARCH/src/bin/<file>. -pipeline_input_from_src() { - _subpath=$1; _name=${2:-} - [ -n "$_name" ] || _name=$(basename "$_subpath") - pipeline_input "$_name" "build/$ARCH/src/src/$_subpath" -} - -# Look up a token: if it names an input, prefix `in/`; if it names a -# previously produced output, prefix `out/`; else leave unchanged. -_p_lookup() { - tok=$1 - for n in $P_IN; do [ "$tok" = "$n" ] && { echo "in/$tok"; return; }; done - for n in $P_OUT; do [ "$tok" = "$n" ] && { echo "out/$tok"; return; }; done - echo "$tok" -} - -# Resolve where the bin binary lives: in/ if it's a pipeline_input, out/ -# if a prior stage produced it. Stages with the same name as both an -# input and a produced output use the produced one. -_p_bin_path() { - b=$1 - for n in $P_PRODUCED_NAMES; do [ "$b" = "$n" ] && { echo "out/$b"; return; }; done - for n in $P_INPUT_NAMES; do [ "$b" = "$n" ] && { echo "in/$b"; return; }; done - echo "$b" -} - -# stage <bin> <argv...> -- <inputs...> -- <outputs...> -# -# The explicit input/output lists look redundant — most names already -# appear in <argv...> — but they are not. argv positions are tool- -# specific: a token like `M0.combined.hex2` is an output of one stage -# (catm produces it) and an input of the next (hex2 reads it). The -# framework cannot tell which from the token alone, so each stage -# declares both lists. Don't try to "simplify" by inferring from argv. -stage() { - bin=$1; shift - P_HEAD_RAW=""; P_IN=""; P_OUT=""; _s=head - while [ $# -gt 0 ]; do - if [ "$1" = "--" ]; then - case "$_s" in - head) _s=in ;; - in) _s=out ;; - *) echo "lib-pipeline: too many --" >&2; exit 2 ;; - esac - shift; continue - fi - case "$_s" in - head) P_HEAD_RAW="$P_HEAD_RAW $1" ;; - in) P_IN="$P_IN $1" ;; - out) P_OUT="$P_OUT $1" ;; - esac - shift - done - [ "$_s" = "out" ] || { echo "lib-pipeline: stage needs '<bin> argv... -- inputs... -- outputs...'" >&2; exit 2; } - - # Rewrite head tokens with in/ or out/ prefixes. - P_HEAD="" - for tok in $P_HEAD_RAW; do - P_HEAD="$P_HEAD $(_p_lookup "$tok")" - done - P_BIN_PATH=$(_p_bin_path "$bin") - - P_IDX=$((P_IDX + 1)) - case "$P_DRIVER" in - podman) _stage_podman ;; - seed) _stage_seed ;; - esac - - # Track produced names so later stages can locate the binary if a - # subsequent `stage` uses one of these as its bin. - for o in $P_OUT; do P_PRODUCED_NAMES="$P_PRODUCED_NAMES $o"; done -} - -_stage_podman() { - { - echo "# stage $P_IDX: $bin$P_HEAD" - echo "chmod +x ./$P_BIN_PATH" - echo "./$P_BIN_PATH$P_HEAD" - # Mirror this stage's outputs back into in/ so a later stage that - # declares one of them as an input finds it under in/<name>. - # (The seed driver does this naturally via its per-stage cpio.) - for o in $P_OUT; do - echo "cp -f out/$o in/$o" - done - } >> "$P_SCRIPT" -} - -_stage_seed() { - cpio_dir=$P_STAGE_DIR/s$(printf '%02d' "$P_IDX") - rm -rf "$cpio_dir"; mkdir -p "$cpio_dir/cpio/in" - cp "$P_STAGE_DIR/work/$bin" "$cpio_dir/cpio/init" - chmod +x "$cpio_dir/cpio/init" - NAMES="init" - for inp in $P_IN; do - cp "$P_STAGE_DIR/work/$inp" "$cpio_dir/cpio/in/$inp" - NAMES="$NAMES -in/$inp" - done - ( cd "$cpio_dir/cpio" && printf '%s\n' "$NAMES" | cpio -o -H newc 2>/dev/null ) > "$cpio_dir/initramfs.cpio" - sz=$(wc -c < "$cpio_dir/initramfs.cpio") - pad=$(( (512 - sz % 512) % 512 )) - if [ "$pad" -gt 0 ]; then - head -c "$pad" /dev/zero >> "$cpio_dir/initramfs.cpio" - fi - mv "$cpio_dir/initramfs.cpio" "$cpio_dir/in.img" - truncate -s 256M "$cpio_dir/out.img" - - APPEND="$bin$P_HEAD" - TRANSCRIPT=$cpio_dir/transcript.txt - echo "[lib-pipeline:seed] stage $P_IDX:$P_HEAD (bin=$bin)" >&2 - seed_arch=${SEED_ARCH:-aarch64} - case "$seed_arch" in - aarch64) - qemu-system-aarch64 \ - -machine virt,gic-version=3,accel=hvf -cpu host -m 2048M \ - -nographic -no-reboot \ - -global virtio-mmio.force-legacy=false \ - -kernel "$KERNEL_IMAGE" \ - -drive file="$cpio_dir/in.img",if=none,format=raw,id=hd0,readonly=on \ - -device virtio-blk-device,drive=hd0 \ - -drive file="$cpio_dir/out.img",if=none,format=raw,id=hd1 \ - -device virtio-blk-device,drive=hd1 \ - -append "$APPEND" \ - > "$TRANSCRIPT" 2>&1 & - ;; - riscv64) - qemu-system-riscv64 \ - -machine virt -m 2048M \ - -nographic -no-reboot \ - -global virtio-mmio.force-legacy=false \ - -kernel "$KERNEL_IMAGE" \ - -drive file="$cpio_dir/in.img",if=none,format=raw,id=hd0,readonly=on \ - -device virtio-blk-device,drive=hd0 \ - -drive file="$cpio_dir/out.img",if=none,format=raw,id=hd1 \ - -device virtio-blk-device,drive=hd1 \ - -append "$APPEND" \ - > "$TRANSCRIPT" 2>&1 & - ;; - amd64) - qemu-system-x86_64 \ - -machine microvm,acpi=off,pic=off,pit=off,rtc=off,isa-serial=on,auto-kernel-cmdline=off \ - -cpu max -m 2048M \ - -nodefaults -display none -serial stdio -no-reboot \ - -global virtio-mmio.force-legacy=false \ - -device isa-debug-exit,iobase=0x501,iosize=2 \ - -kernel "$KERNEL_IMAGE" \ - -drive file="$cpio_dir/in.img",if=none,format=raw,id=hd0,readonly=on \ - -device virtio-blk-device,drive=hd0 \ - -drive file="$cpio_dir/out.img",if=none,format=raw,id=hd1 \ - -device virtio-blk-device,drive=hd1 \ - -append "$APPEND" \ - > "$TRANSCRIPT" 2>&1 & - ;; - *) echo "[lib-pipeline:seed] unsupported SEED_ARCH=$seed_arch" >&2; exit 2 ;; - esac - QPID=$! - ( sleep 240; kill -9 $QPID 2>/dev/null ) </dev/null >/dev/null 2>&1 & - WATCHER=$! - disown $WATCHER 2>/dev/null || true - wait $QPID 2>/dev/null || true - kill $WATCHER 2>/dev/null || true - - mkdir -p "$cpio_dir/dump" - if ! "$EXTRACT" "$cpio_dir/dump" "$cpio_dir/out.img" >/dev/null 2>&1; then - echo "[lib-pipeline:seed] FAIL stage $P_IDX (bin=$bin): extract-blk failed" >&2 - tail -40 "$TRANSCRIPT" >&2 - exit 3 - fi - - for o in $P_OUT; do - if [ ! -f "$cpio_dir/dump/$o" ]; then - echo "[lib-pipeline:seed] FAIL stage $P_IDX: missing output '$o'" >&2 - ls "$cpio_dir/dump" >&2 || true - exit 3 - fi - cp "$cpio_dir/dump/$o" "$P_STAGE_DIR/work/$o" - done -} - -pipeline_export() { - for _n in "$@"; do P_EXPORTS="$P_EXPORTS $_n"; done -} - -pipeline_run() { - case "$P_DRIVER" in - podman) _run_podman ;; - seed) : ;; - esac - for n in $P_EXPORTS; do - case "$P_DRIVER" in - podman) cp "$P_STAGE_DIR/out/$n" "$P_OUT_DIR/$n" ;; - seed) cp "$P_STAGE_DIR/work/$n" "$P_OUT_DIR/$n" ;; - esac - chmod 0700 "$P_OUT_DIR/$n" - done -} - -_run_podman() { - : "${PLATFORM:?lib-pipeline:podman: PLATFORM not set}" - : "${IMAGE:?lib-pipeline:podman: IMAGE not set}" - if [ -n "$P_EXPORTS" ]; then - cmd="cp" - for n in $P_EXPORTS; do cmd="$cmd out/$n"; done - cmd="$cmd /work/out/" - echo "$cmd" >> "$P_SCRIPT" - fi - chmod +x "$P_SCRIPT" - SDIR=$(cd "$P_STAGE_DIR" && pwd) - podman run --rm -i --pull=never --platform "$PLATFORM" \ - --tmpfs /tmp:size=512M \ - -v "$SDIR/run.sh:/work/run.sh:ro" \ - -v "$SDIR/in:/work/in:ro" \ - -v "$SDIR/out:/work/out:rw" \ - -w /work "$IMAGE" \ - sh -eu /work/run.sh -} diff --git a/scripts/lib-runscm.sh b/scripts/lib-runscm.sh @@ -1,257 +0,0 @@ -# lib-runscm.sh — driver-agnostic harness for run.scm-driven stages. -# -# Boot3/4/5 each drive a per-stage pipeline by invoking scheme1 against a -# host-generated run.scm. Two transports: -# DRIVER=podman → bind-mount in/ ro and out/ rw under /work in a -# scratch+busybox container, exec in/scheme1 -# in/combined.scm with cwd=/work. -# DRIVER=seed → pack the staging dir into a cpio on a virtio-blk read- -# only disk (init at cpio root + in/ subtree), boot the -# seed kernel with init=init and combined.scm, recover -# outputs via the SEEDFS dump on a second virtio-blk -# disk. The host extractor filters to out/-prefixed -# entries, strips the prefix, writes to $STAGE/out/. -# -# Both drivers see the same flat namespace; run.scm uses explicit -# in/<name> for reads and out/<name> for writes. -# -# DSL (source as `. scripts/lib-runscm.sh`): -# -# runscm_init <staging-dir> <out-dir> -# runscm_scheme1 <path> # init=scheme1 (boot2) -# runscm_prelude <path> # scheme1/prelude.scm -# runscm_runscm <path> # static driver script -# runscm_gen <gen-script> <args...> # OR generate run.scm, -# # log size, register it. -# runscm_input <name> <host-path> # repeatable; staged at in/<name> -# runscm_input_tree <prefix> <src-root> # repeatable; tree under in/<prefix> -# runscm_export <name>... # one or more output names -# runscm_run [timeout-s] # default 600s -# -# Required env per driver: -# podman: IMAGE, PLATFORM -# seed: KERNEL_IMAGE, EXTRACT, optional QEMU_MEM (default 2048M) -# both: DRIVER=podman|seed - -S_STAGE_DIR= -S_OUT_DIR= -S_SCHEME1= -S_PRELUDE= -S_RUNSCM= -S_EXPORTS= - -runscm_init() { - S_STAGE_DIR=$1; S_OUT_DIR=$2 - rm -rf "$S_STAGE_DIR" - mkdir -p "$S_STAGE_DIR/in" "$S_STAGE_DIR/out" "$S_OUT_DIR" - S_SCHEME1=; S_PRELUDE=; S_RUNSCM= - S_EXPORTS= -} - -runscm_scheme1() { S_SCHEME1=$1; } -runscm_prelude() { S_PRELUDE=$1; } -runscm_runscm() { S_RUNSCM=$1; } - -# runscm_gen <gen-script> <args...> -# Run a host-side generator that emits run.scm to $S_STAGE_DIR/run.scm, -# log its size, and register it as the driver script. Used by -# boot4/5/6 which build their run.scm dynamically. -runscm_gen() { - _gen=$1; shift - _runscm=$S_STAGE_DIR/run.scm - "$_gen" "$@" "$_runscm" - echo "[$BOOT_TAG] generated run.scm: $(wc -l <"$_runscm") lines, $(wc -c <"$_runscm") bytes" - S_RUNSCM=$_runscm -} - -runscm_input() { - name=$1; src=$2 - case "$name" in - */*) mkdir -p "$S_STAGE_DIR/in/$(dirname "$name")" ;; - esac - cp "$src" "$S_STAGE_DIR/in/$name" -} - -# Stage every regular file under <src-root> into in/<prefix>/..., -# preserving the relative directory tree. -runscm_input_tree() { - prefix=$1; src_root=$2 - [ -d "$src_root" ] || { echo "runscm: input_tree: $src_root not a dir" >&2; exit 2; } - ( cd "$src_root" && find . -type f ) | sed 's|^\./||' | sort | while read -r rel; do - [ -n "$rel" ] || continue - mkdir -p "$S_STAGE_DIR/in/$prefix/$(dirname "$rel")" - cp "$src_root/$rel" "$S_STAGE_DIR/in/$prefix/$rel" - done -} - -# runscm_input_from_src <subpath> [<name>] -# Pull a file from the canonical generated source tree at -# build/$ARCH/src/src/<subpath>. Stages it under in/<name>; -# <name> defaults to basename(subpath). For the rare `bin/` case, -# call runscm_input directly with build/$ARCH/src/bin/<file>. -runscm_input_from_src() { - _subpath=$1; _name=${2:-} - [ -n "$_name" ] || _name=$(basename "$_subpath") - runscm_input "$_name" "build/$ARCH/src/src/$_subpath" -} - -# runscm_input_tree_from_src <prefix> <subpath> -# Same as runscm_input_tree, but the source root is -# build/$ARCH/src/src/<subpath>. -runscm_input_tree_from_src() { - _prefix=$1; _subpath=$2 - runscm_input_tree "$_prefix" "build/$ARCH/src/src/$_subpath" -} - -runscm_export() { - for _n in "$@"; do S_EXPORTS="$S_EXPORTS $_n"; done -} - -runscm_run() { - timeout=${1:-600} - [ -n "$S_SCHEME1" ] || { echo "runscm: scheme1 not set" >&2; exit 2; } - [ -n "$S_PRELUDE" ] || { echo "runscm: prelude not set" >&2; exit 2; } - [ -n "$S_RUNSCM" ] || { echo "runscm: run.scm not set" >&2; exit 2; } - cp "$S_SCHEME1" "$S_STAGE_DIR/in/scheme1" - chmod +x "$S_STAGE_DIR/in/scheme1" - cat "$S_PRELUDE" "$S_RUNSCM" > "$S_STAGE_DIR/in/combined.scm" - # Top-level reference copy of run.scm for human inspection. - # boot4/5 gen scripts already write here; skip the self-copy. - case "$S_RUNSCM" in - "$S_STAGE_DIR/run.scm") : ;; - *) cp "$S_RUNSCM" "$S_STAGE_DIR/run.scm" ;; - esac - - case "${DRIVER:-podman}" in - podman) _runscm_run_podman "$timeout" ;; - seed) _runscm_run_seed "$timeout" ;; - *) echo "runscm: unknown DRIVER=$DRIVER (expected podman|seed)" >&2; exit 2 ;; - esac - - for n in $S_EXPORTS; do - if [ ! -f "$S_STAGE_DIR/out/$n" ]; then - echo "[runscm/$DRIVER] FAIL: missing output '$n'" >&2 - ls "$S_STAGE_DIR/out" >&2 || true - exit 5 - fi - cp "$S_STAGE_DIR/out/$n" "$S_OUT_DIR/$n" - chmod 0700 "$S_OUT_DIR/$n" - done -} - -# Podman: bind-mount in/ ro and out/ rw under /work; exec in/scheme1. -# Outputs land in $S_STAGE_DIR/out/ directly via the rw bind mount. -_runscm_run_podman() { - : "${IMAGE:?lib-runscm: IMAGE not set}" - : "${PLATFORM:?lib-runscm: PLATFORM not set}" - in_abs=$(cd "$S_STAGE_DIR/in" && pwd) - out_abs=$(cd "$S_STAGE_DIR/out" && pwd) - echo "[runscm/podman] scheme1 combined.scm under $IMAGE" >&2 - podman run --rm -i --pull=never --platform "$PLATFORM" \ - -v "$in_abs:/work/in:ro" \ - -v "$out_abs:/work/out:rw" \ - -w /work "$IMAGE" \ - in/scheme1 in/combined.scm -} - -# Seed: pack cpio with `init` at the root and the in/ subtree under it; -# boot kernel with init=init combined.scm; recover outputs from the -# SEEDFS dump on hd1; extract filters out/-prefixed entries directly -# into $S_STAGE_DIR/out/. -_runscm_run_seed() { - timeout=$1 - : "${KERNEL_IMAGE:?lib-runscm: KERNEL_IMAGE not set}" - : "${EXTRACT:?lib-runscm: EXTRACT not set}" - mem=${QEMU_MEM:-2048M} - cp "$S_STAGE_DIR/in/scheme1" "$S_STAGE_DIR/init" - chmod +x "$S_STAGE_DIR/init" - ( cd "$S_STAGE_DIR" && { echo init; find in -type f; } | sort -u | cpio -o -H newc 2>/dev/null ) \ - > "$S_STAGE_DIR/initramfs.cpio" - sz=$(wc -c < "$S_STAGE_DIR/initramfs.cpio") - pad=$(( (512 - sz % 512) % 512 )) - if [ "$pad" -gt 0 ]; then - head -c "$pad" /dev/zero >> "$S_STAGE_DIR/initramfs.cpio" - fi - mv "$S_STAGE_DIR/initramfs.cpio" "$S_STAGE_DIR/in.img" - truncate -s 256M "$S_STAGE_DIR/out.img" - - TRANSCRIPT=$S_STAGE_DIR/transcript.txt - echo "[runscm/seed] booting scheme1 + run.scm (timeout ${timeout}s)" >&2 - seed_arch=${SEED_ARCH:-aarch64} - case "$seed_arch" in - aarch64) - qemu-system-aarch64 \ - -machine virt,gic-version=3,accel=hvf -cpu host -m "$mem" \ - -nographic -no-reboot \ - -global virtio-mmio.force-legacy=false \ - -kernel "$KERNEL_IMAGE" \ - -drive file="$S_STAGE_DIR/in.img",if=none,format=raw,id=hd0,readonly=on \ - -device virtio-blk-device,drive=hd0 \ - -drive file="$S_STAGE_DIR/out.img",if=none,format=raw,id=hd1 \ - -device virtio-blk-device,drive=hd1 \ - -append "init in/combined.scm" \ - > "$TRANSCRIPT" 2>&1 & - ;; - riscv64) - # No hvf accel on Apple Silicon for riscv64 — TCG only. - qemu-system-riscv64 \ - -machine virt -m "$mem" \ - -nographic -no-reboot \ - -global virtio-mmio.force-legacy=false \ - -kernel "$KERNEL_IMAGE" \ - -drive file="$S_STAGE_DIR/in.img",if=none,format=raw,id=hd0,readonly=on \ - -device virtio-blk-device,drive=hd0 \ - -drive file="$S_STAGE_DIR/out.img",if=none,format=raw,id=hd1 \ - -device virtio-blk-device,drive=hd1 \ - -append "init in/combined.scm" \ - > "$TRANSCRIPT" 2>&1 & - ;; - amd64) - # microvm + isa-debug-exit mirrors seed-kernel/run.sh: the - # kernel writes to port 0x501 on user exit_group(0) so QEMU - # exits cleanly (no `-no-reboot` triple-fault gymnastics). - qemu-system-x86_64 \ - -machine microvm,acpi=off,pic=off,pit=off,rtc=off,isa-serial=on,auto-kernel-cmdline=off \ - -cpu max -m "$mem" \ - -nodefaults -display none -serial stdio -no-reboot \ - -global virtio-mmio.force-legacy=false \ - -device isa-debug-exit,iobase=0x501,iosize=2 \ - -kernel "$KERNEL_IMAGE" \ - -drive file="$S_STAGE_DIR/in.img",if=none,format=raw,id=hd0,readonly=on \ - -device virtio-blk-device,drive=hd0 \ - -drive file="$S_STAGE_DIR/out.img",if=none,format=raw,id=hd1 \ - -device virtio-blk-device,drive=hd1 \ - -append "init in/combined.scm" \ - > "$TRANSCRIPT" 2>&1 & - ;; - *) - echo "[runscm/seed] unsupported SEED_ARCH=$seed_arch" >&2 - exit 2 - ;; - esac - QPID=$! - ( sleep "$timeout"; kill -9 $QPID 2>/dev/null ) </dev/null >/dev/null 2>&1 & - WATCHER=$! - # `disown` removes the watcher from the shell's job table so that - # killing it on the happy path doesn't trigger bash's - # "Terminated: 15 PID ( sleep … )" job-status message — that - # message looks like a real failure but is just a noisy SIGTERM - # notification fired when qemu exited normally before the watcher's - # sleep elapsed. - disown $WATCHER 2>/dev/null || true - wait $QPID 2>/dev/null || true - kill $WATCHER 2>/dev/null || true - - if ! "$EXTRACT" "$S_STAGE_DIR/out" "$S_STAGE_DIR/out.img" >/dev/null 2>&1; then - echo "[runscm/seed] FAIL: extract-blk failed (kernel didn't reach exit?)" >&2 - tail -40 "$TRANSCRIPT" >&2 - exit 3 - fi - EXIT_LINE=$(grep -E "user exit_group" "$TRANSCRIPT" | tail -1 || true) - case "$EXIT_LINE" in - *"exit_group(0)"*) : ;; - *) echo "[runscm/seed] FAIL: driver did not exit 0: $EXIT_LINE" >&2 - tail -40 "$TRANSCRIPT" >&2 - exit 4 ;; - esac -} diff --git a/scripts/libc-flatten.sh b/scripts/libc-flatten.sh @@ -1,205 +0,0 @@ -#!/bin/sh -## libc-flatten.sh — flatten the vendored mes-libc + boot2-syscall.c -## into a single libc.flat.c using the host preprocessor. Mirrors -## stage1-flatten.sh; runs on the host, no container — hence the -## non-`boot-` name (the convention in scripts/ is that boot-*.sh -## runs inside the minimal container). -## -## Steps: -## 1. stage vendor/mes-libc → build/<arch>/vendor/mes-libc/libc-stage/ -## 2. apply simple-patches (literal-block replacement, idempotent) -## 3. HOST_CC -E -nostdinc -I staging/include … staging/unified-libc.c -## → build/<arch>/vendor/mes-libc/libc.flat.c -## -## Stage 4 (cc.scm libc.flat.c → libc.P1pp) is a separate Makefile rule -## that reuses scripts/boot-build-cc.sh inside the per-arch container. -## -## ARCH selects the boot2 target (aarch64/amd64/riscv64). MES_ARCH is -## the mes header tree we hand the host preprocessor; mes ships -## x86_64/riscv64 only, so aarch64 builds borrow riscv64's headers (the -## resulting libc.flat.c references no SYS_* / kernel-stat fields, so -## the choice only affects type widths, all 64-bit Linux-identical). -## -## Usage: scripts/libc-flatten.sh [--arch <aarch64|amd64|riscv64>] - -set -eu - -ARCH=aarch64 -while [ $# -gt 0 ]; do - case "$1" in - --arch) ARCH=$2; shift 2 ;; - -h|--help) awk '/^##/ { sub(/^## ?/, ""); print }' "$0"; exit 0 ;; - *) echo "unknown arg: $1" >&2; exit 2 ;; - esac -done - -case "$ARCH" in - aarch64) MES_ARCH=riscv64 ;; - amd64) MES_ARCH=x86_64 ;; - riscv64) MES_ARCH=riscv64 ;; - *) echo "unknown ARCH: $ARCH" >&2; exit 2 ;; -esac - -ROOT=$(cd "$(dirname "$0")/.." && pwd) -VENDOR=$ROOT/vendor/mes-libc -WORK=$ROOT/build/$ARCH/vendor/mes-libc -STAGE=$WORK/libc-stage -FLAT=$WORK/libc.flat.c - -[ -d "$VENDOR" ] || { echo "missing $VENDOR" >&2; exit 1; } -[ -d "$VENDOR/include" ] || { echo "missing $VENDOR/include" >&2; exit 1; } -[ -d "$VENDOR/include/linux/$MES_ARCH" ] \ - || { echo "missing $VENDOR/include/linux/$MES_ARCH" >&2; exit 1; } - -# --- (1) stage -------------------------------------------------------- -mkdir -p "$WORK" -rm -rf "$STAGE" -mkdir -p "$STAGE" -# cp -R copies symlinks as files; staging is our writable scratch. -cp -R "$VENDOR/." "$STAGE/" - -# mes's sys/stat.h, signal.h, dirent.h reach for <arch/kernel-stat.h> -# and similar; the per-arch tree under include/linux/<MES_ARCH>/ is what -# they want. Copy the per-arch tree into include/arch so the unprefixed -# `arch/...` includes resolve. (cp -R, not ln -sfn — keeps the dep set -# down to coreutils we already use.) -cp -R "$STAGE/include/linux/$MES_ARCH" "$STAGE/include/arch" - - -# --- (2) patches ------------------------------------------------------ -# Same literal-block replacer as stage1-flatten.sh apply_simple_patch. -apply_simple_patch() { - target=$1; before=$2; after=$3 - [ -r "$target" ] || { echo "patch target missing: $target" >&2; exit 1; } - [ -r "$before" ] || { echo "patch before missing: $before" >&2; exit 1; } - [ -r "$after" ] || { echo "patch after missing: $after" >&2; exit 1; } - awk -v BFILE="$before" -v AFILE="$after" ' - BEGIN { - while ((getline line < BFILE) > 0) bef = bef line "\n"; - close(BFILE); - while ((getline line < AFILE) > 0) aft = aft line "\n"; - close(AFILE); - } - { src = src $0 "\n" } - END { - if (index(src, aft) > 0) { - printf "%s", src; - exit 0; - } - i = index(src, bef); - if (i == 0) { print "patch did not match" > "/dev/stderr"; exit 1 } - printf "%s%s%s", - substr(src, 1, i - 1), - aft, - substr(src, i + length(bef)); - } - ' "$target" > "$target.new" - mv "$target.new" "$target" -} - -PATCHES=$STAGE/patches -apply_simple_patch \ - "$STAGE/linux/malloc.c" \ - "$PATCHES/malloc-max-align.before" \ - "$PATCHES/malloc-max-align.after" -apply_simple_patch \ - "$STAGE/linux/malloc.c" \ - "$PATCHES/malloc-brk-check.before" \ - "$PATCHES/malloc-brk-check.after" -apply_simple_patch \ - "$STAGE/string/strstr.c" \ - "$PATCHES/strstr-drop-mman.before" \ - "$PATCHES/strstr-drop-mman.after" -apply_simple_patch \ - "$STAGE/include/mes/lib-mini.h" \ - "$PATCHES/libmini-write-proto.before" \ - "$PATCHES/libmini-write-proto.after" -apply_simple_patch \ - "$STAGE/include/mes/lib-mini.h" \ - "$PATCHES/libmini-write-proto2.before" \ - "$PATCHES/libmini-write-proto2.after" -apply_simple_patch \ - "$STAGE/include/mes/lib.h" \ - "$PATCHES/lib-mes-debug-proto.before" \ - "$PATCHES/lib-mes-debug-proto.after" -apply_simple_patch \ - "$STAGE/mes/ntoab.c" \ - "$PATCHES/ntoab-inline-defined.before" \ - "$PATCHES/ntoab-inline-defined.after" -# stdio/{printf,sprintf,snprintf}.c carry a mes-mescc-specific -# `ap += (__FOO_VARARGS + ...)` block guarded by `__GNUC__ && __x86_64__`. -# That arithmetic is meaningful only inside mes's compiler; under stock -# gcc preprocessing for amd64 it expands to a reference to an undefined -# `__FOO_VARARGS` and breaks cc.scm. Strip the block — the va_start that -# follows handles varargs correctly under any standard C compiler. -apply_simple_patch \ - "$STAGE/stdio/printf.c" \ - "$PATCHES/printf-mes-varargs.before" \ - "$PATCHES/printf-mes-varargs.after" -apply_simple_patch \ - "$STAGE/stdio/sprintf.c" \ - "$PATCHES/sprintf-mes-varargs.before" \ - "$PATCHES/sprintf-mes-varargs.after" -apply_simple_patch \ - "$STAGE/stdio/snprintf.c" \ - "$PATCHES/snprintf-mes-varargs.before" \ - "$PATCHES/snprintf-mes-varargs.after" -# stdio/vfprintf.c and stdio/vsnprintf.c read every integer / char -# variadic via `va_arg(ap, long)`. On amd64 SysV an `int` arg occupies -# an 8-byte reg-save slot whose upper 32 bits are unspecified — tcc's -# codegen (and most other compilers') doesn't sign-extend ints into -# the slot. Reading as `long` then leaks the garbage upper bits. Track -# the `l` length modifier and dispatch the va_arg type accordingly. -apply_simple_patch \ - "$STAGE/stdio/vfprintf.c" \ - "$PATCHES/printf-int-promo.before" \ - "$PATCHES/printf-int-promo.after" -apply_simple_patch \ - "$STAGE/stdio/vsnprintf.c" \ - "$PATCHES/vsnprintf-int-promo.before" \ - "$PATCHES/vsnprintf-int-promo.after" -# --- (3) flatten via host preprocessor -------------------------------- -HOST_CC=${HOST_CC:-cc} - -# Bridge file: post-patch tcc <stdarg.h>. Written by stage1-flatten.sh, -# which boot3.sh / Makefile run first. Required so we can prepend the -# per-arch va_list typedef + __builtin_va_* → tcc __va_* mapping into -# libc.flat.c, eliminating the need for `-I /work/in/tcc-include -# -include /work/in/tcc-include/stdarg.h` on every in-container compile. -BRIDGE=$ROOT/build/$ARCH/vendor/tcc/stdarg-bridge.h -[ -e "$BRIDGE" ] || { echo "missing $BRIDGE — run scripts/stage1-flatten.sh first" >&2; exit 1; } - -# -I order matters: vendor/boot2-include first so our stdarg.h shim -# (routes va_* through __builtin_va_*; see comment in that file) wins -# over mes's. Then $STAGE/include for everything else — <signal.h>, -# <stdio.h>, etc. hit the canonical mes/include versions; arch/<…> -# resolves through the include/arch symlink to include/linux/$MES_ARCH. -# Putting the per-arch directory ahead of include/ makes <signal.h> -# resolve to the partial arch-specific snippet (no stack_t typedef etc) -# and the build breaks. -"$HOST_CC" -E -P \ - -nostdinc \ - -I "$ROOT/vendor/boot2-include" \ - -I "$STAGE/include" \ - -I "$STAGE" \ - -D HAVE_CONFIG_H=0 \ - -D __linux__=1 \ - -D __${MES_ARCH}__=1 \ - -D __riscv_xlen=64 \ - -D inline= \ - "$STAGE/unified-libc.c" > "$FLAT.body" - -# Prepend the bridge, guarded by !CCSCM (cc.scm predefines CCSCM and -# handles __builtin_va_* natively, so it must skip this block). Under -# tcc, the per-arch #ifdefs inside the bridge resolve and provide the -# va_list typedef + __builtin_va_* → tcc native __va_* macros. -{ - echo '#ifndef CCSCM' - cat "$BRIDGE" - echo '#endif' - cat "$FLAT.body" -} > "$FLAT" -rm -f "$FLAT.body" - -BYTES=$(wc -c < "$FLAT") -echo "produced $FLAT ($BYTES bytes)" diff --git a/scripts/mk-seed-tools.sh b/scripts/mk-seed-tools.sh @@ -1,47 +0,0 @@ -#!/bin/sh -## mk-seed-tools.sh — Makefile-internal: stage 1 of the bootstrap chain. -## -## Standalone equivalent: scripts/boot0.sh. -## -## In-container script. Brings up M0/hex2-0/catm from the ~400-byte -## hex0-seed by chaining stage0-posix's first three phases. All produced -## binaries are target-arch Linux ELF. -## -## Inputs (read): vendor/seed/$ARCH/{hex0-seed,hex0.hex0,hex1.hex0, -## hex2.hex1,catm.hex2,M0.hex2,ELF.hex2} -## Outputs: build/$ARCH/tools/{hex0,hex1,hex2-0,catm,M0} -## -## Phase map (stage0-posix mescc-tools-{seed,mini}-kaem.kaem phases 0-3): -## 0) hex0-seed + hex0.hex0 -> hex0 -## 1) hex0 + hex1.hex0 -> hex1 -## 2) hex1 + hex2.hex1 -> hex2-0 -## 2b) hex2-0 + catm.hex2 -> catm -## 3a) catm : ELF.hex2 + M0.hex2 -> M0.combined.hex2 -## 3b) hex2-0 : M0.combined.hex2 -> M0 -## -## Env: ARCH=aarch64|amd64|riscv64 - -set -eu - -: "${ARCH:?ARCH must be set}" - -case "$ARCH" in - aarch64|amd64|riscv64) ;; - *) echo "boot1.sh: unsupported arch '$ARCH'" >&2 ; exit 1 ;; -esac - -S=vendor/seed/$ARCH -OUT=build/$ARCH/tools -mkdir -p "$OUT" - -## Build everything in /tmp (RAM tmpfs — see PODMAN macro in Makefile), -## then cp the final binaries to the bind-mounted $OUT. Stage0 tools do -## one syscall per byte; staying off virtiofs for intermediates is ~5x. -"$S/hex0-seed" "$S/hex0.hex0" /tmp/hex0 -/tmp/hex0 "$S/hex1.hex0" /tmp/hex1 -/tmp/hex1 "$S/hex2.hex1" /tmp/hex2-0 -/tmp/hex2-0 "$S/catm.hex2" /tmp/catm -/tmp/catm /tmp/M0.combined.hex2 "$S/ELF.hex2" "$S/M0.hex2" -/tmp/hex2-0 /tmp/M0.combined.hex2 /tmp/M0 - -cp /tmp/hex0 /tmp/hex1 /tmp/hex2-0 /tmp/catm /tmp/M0 "$OUT/" diff --git a/scripts/musl-vendor.sh b/scripts/musl-vendor.sh @@ -1,172 +0,0 @@ -#!/bin/sh -## musl-vendor.sh — regenerate vendor/upstream/musl-1.2.5-overrides/ and -## musl-1.2.5-deletes.txt from the upstream tarball + tcc-compat patch. -## -## NOT on the boot.sh path. This is a vendoring helper run on a dev host -## any time vendor/upstream/musl-1.2.5-tcc.patch changes; it requires -## the host's `patch` binary. The output it produces (the overrides -## directory tree + the deletes list) is what boot5.sh consumes — boot5 -## itself never invokes `patch`. -## -## What the script does: -## 1. Extract a pristine copy of musl-1.2.5.tar.gz to a scratch dir. -## 2. Apply musl-1.2.5-tcc.patch. -## 3. For every file the patch touched: -## - if non-empty in the patched tree → vendor it into -## musl-1.2.5-overrides/<path>, mirroring the musl-1.2.5/ -## subtree layout. -## - if 0-byte (the patch's deleted-file marker) → record the -## path in musl-1.2.5-deletes.txt. -## 4. Append the per-arch sweep deletes (aarch64+riscv64 overrides -## arm64-asm.c phase 1+2 / riscv64-asm.c can't yet handle). -## 5. Sort + dedupe the deletes list. -## -## Boot4 then mirrors this state at build time without `patch`: -## cp -R overrides/. musl-1.2.5/ -## while read p; do rm -rf "musl-1.2.5/$p"; done < deletes.txt - -set -eu - -ROOT=$(cd "$(dirname "$0")/.." && pwd) -cd "$ROOT" - -TARBALL=vendor/upstream/musl-1.2.5.tar.gz -PATCH_FILE=vendor/upstream/musl-1.2.5-tcc.patch -OVERRIDES=vendor/upstream/musl-1.2.5-overrides -DELETES=vendor/upstream/musl-1.2.5-deletes.txt -GENERATED=vendor/upstream/musl-1.2.5-generated -MKALLTYPES_AWK=scripts/mkalltypes.awk - -[ -e "$TARBALL" ] || { echo "missing $TARBALL" >&2; exit 1; } -[ -e "$PATCH_FILE" ] || { echo "missing $PATCH_FILE" >&2; exit 1; } -command -v patch >/dev/null || { echo "host patch not found" >&2; exit 1; } - -WORK=$(mktemp -d) -trap 'rm -rf "$WORK"' EXIT - -# (1) extract -tar xzf "$TARBALL" -C "$WORK" -SRC=$WORK/musl-1.2.5 -[ -d "$SRC" ] || { echo "tarball did not produce musl-1.2.5/"; exit 1; } - -# (2) apply patch (cwd = parent of musl-1.2.5/, -p1 strips the a/ prefix) -( cd "$WORK" && patch -p1 < "$ROOT/$PATCH_FILE" ) >"$WORK/patch.log" 2>&1 \ - || { tail -40 "$WORK/patch.log" >&2; exit 1; } - -# (3) mirror touched files into overrides/, record empty ones as deletes -rm -rf "$OVERRIDES" -mkdir -p "$OVERRIDES" -: > "$DELETES" - -# Each diff in the patch starts with `diff -urN ... patched_musl/musl-1.2.5/<rel>`. -# Strip the "patched_musl/musl-1.2.5/" prefix to get a path under SRC. -awk '/^diff -urN/ { - sub(/^patched_musl\/musl-1.2.5\//, "", $4); print $4 - }' "$PATCH_FILE" | -while read -r rel; do - f=$SRC/$rel - if [ -s "$f" ]; then - mkdir -p "$OVERRIDES/$(dirname "$rel")" - cp "$f" "$OVERRIDES/$rel" - else - echo "$rel" >> "$DELETES" - fi -done - -# (4) per-arch sweep: aarch64 + riscv64 files boot5 can't compile yet. -# Globs are expanded against the upstream tree, so every entry is a -# concrete file path (no glob in the deletes list itself). -for f in "$SRC"/src/math/aarch64/*.c; do - [ -e "$f" ] && echo "${f#$SRC/}" >> "$DELETES" -done -for f in "$SRC"/src/math/riscv64/*.c; do - [ -e "$f" ] && echo "${f#$SRC/}" >> "$DELETES" -done -{ - echo src/string/aarch64/memset.S - echo src/string/aarch64/memcpy.S - echo src/fenv/aarch64/fenv.s - echo src/thread/aarch64/clone.s - echo src/thread/aarch64/syscall_cp.s - echo src/thread/aarch64/__unmapself.s - echo src/setjmp/aarch64/setjmp.s - echo src/setjmp/aarch64/longjmp.s - echo src/signal/aarch64/sigsetjmp.s - echo src/fenv/riscv64/fenv.S - echo src/fenv/riscv64/fenv-sf.c - echo src/setjmp/riscv64/setjmp.S - echo src/setjmp/riscv64/longjmp.S - echo src/signal/riscv64/sigsetjmp.s - echo src/signal/riscv64/restore.s - echo src/thread/riscv64/clone.s - echo src/thread/riscv64/syscall_cp.s - echo src/thread/riscv64/__unmapself.s - echo src/process/riscv64/vfork.s - # src/complex/ is gutted by the patch (every .c file becomes 0-byte - # in the patched tree, so it's already in DELETES). The dir entry - # cleans up the empty directory itself. - echo src/complex -} >> "$DELETES" - -# (5) sort + dedupe -sort -u "$DELETES" -o "$DELETES" - -# (6) pre-generate per-arch alltypes.h + syscall.h. -# These are deterministic given the upstream tree + chosen arch (musl's -# Makefile runs the same two transformations at build time). Vendoring -# them lets the boot5 container drop awk entirely — it just `cp`s the -# right file in. mkalltypes runs on the post-overrides tree (overrides -# don't touch alltypes.h.in, but applying them keeps the procedure -# coherent). Apply overrides + deletes to a fresh post-patch copy and -# generate from there. -POST=$WORK/post -mkdir -p "$POST" -cp -R "$SRC" "$POST/" -cp -R "$ROOT/$OVERRIDES/." "$POST/musl-1.2.5/" -while read -r p; do - [ -n "$p" ] && rm -rf "$POST/musl-1.2.5/$p" -done < "$ROOT/$DELETES" - -rm -rf "$ROOT/$GENERATED" -for MARCH in aarch64 x86_64 riscv64; do - out=$ROOT/$GENERATED/$MARCH - mkdir -p "$out" - # Post-process: simplify struct timespec to drop the trailing - # zero-width bitfield (see comment above the awk pipeline below). - awk -f "$ROOT/$MKALLTYPES_AWK" \ - "$POST/musl-1.2.5/arch/$MARCH/bits/alltypes.h.in" \ - "$POST/musl-1.2.5/include/alltypes.h.in" \ - | awk ' - /^struct timespec \{.*tv_nsec/ { - print "struct timespec { time_t tv_sec; long tv_nsec; };" - next - } - { print } - ' > "$out/alltypes.h" - # Why the post-process: tcc 0.9.26 emits "will touch memory past - # end of the struct (internal limitation)" for any struct ending - # in a zero-width bitfield. The musl alltypes.h definition wraps - # tv_nsec in two bitfield-padding tricks for 32-bit-time_t arches: - # - # struct timespec { time_t tv_sec; - # int :8*(sizeof(time_t)-sizeof(long))*(__BYTE_ORDER==4321); - # long tv_nsec; - # int :8*(sizeof(time_t)-sizeof(long))*(__BYTE_ORDER!=4321); - # }; - # - # On all three boot5 arches sizeof(time_t)==sizeof(long)==8, so - # both bitfields are 0 width and the layout is identical to the - # simple two-field form. The leading bitfield does not warn (a - # sibling follows); the trailing one does, 387 times per build. - cp "$POST/musl-1.2.5/arch/$MARCH/bits/syscall.h.in" "$out/syscall.h" - awk 'sub(/__NR_/, "SYS_") { print }' \ - < "$POST/musl-1.2.5/arch/$MARCH/bits/syscall.h.in" \ - >> "$out/syscall.h" -done - -n_files=$(find "$OVERRIDES" -type f | wc -l) -n_dels=$(wc -l < "$DELETES") -n_gen=$(find "$GENERATED" -type f | wc -l) -echo "musl-vendor: overrides=$n_files deletes=$n_dels generated=$n_gen" -echo " $OVERRIDES" -echo " $DELETES" diff --git a/scripts/prep-musl.sh b/scripts/prep-musl.sh @@ -1,73 +0,0 @@ -#!/bin/sh -## prep-musl.sh — A0b: apply the per-arch musl skip filter on top of -## build/<arch>/src/src/musl/. -## -## prep-src.sh (A0a) leaves the musl tree at build/<arch>/src/src/musl/ -## with overrides merged, deletes applied, and pre-generated alltypes.h -## / syscall.h dropped in. boot5-calibrate.sh's per-arch skip list (the -## set of musl translation units tcc 0.9.26 cannot compile) needs a -## working tcc3, so it can't be folded into A0a. -## -## A0b is a single transform: read the skip list (committed or freshly -## calibrated), copy it into the canonical tree as skip.txt, and remove -## every listed path from src/musl/. After A0b the tree is the exact -## set of files boot5 will compile — no skip enumeration at boot time. -## -## Skip-list source policy: -## - if vendor/upstream/musl-1.2.5-skip-<arch>.txt exists, use it -## verbatim (the common case — calibrations are committed). -## - else run scripts/boot5-calibrate.sh <arch>, which itself depends -## on boot4/tcc3. The script writes the committed file for us. -## -## Usage: scripts/prep-musl.sh <arch> -## <arch> ∈ {aarch64, amd64, riscv64} - -set -eu - -. scripts/lib-arch.sh -bootlib_init prep-musl "${1:-}" - -DST=$ROOT/build/$ARCH/src -DST_MUSL=$DST/src/musl -SKIP_COMMITTED=vendor/upstream/musl-1.2.5-skip-$ARCH.txt - -TAG="[$BOOT_TAG]" - -[ -d "$DST_MUSL" ] || { - echo "$TAG missing $DST_MUSL — run scripts/prep-src.sh $ARCH first" >&2 - exit 1 -} - -# ── (1) materialize the skip list ───────────────────────────────────── -if [ ! -e "$SKIP_COMMITTED" ]; then - echo "$TAG no committed skip list at $SKIP_COMMITTED — calibrating" - scripts/boot5-calibrate.sh "$ARCH" - [ -e "$SKIP_COMMITTED" ] || { - echo "$TAG calibration did not produce $SKIP_COMMITTED" >&2 - exit 1 - } -fi -cp "$SKIP_COMMITTED" "$DST_MUSL/skip.txt" - -# ── (2) apply filter — drop every listed path from src/musl/ ────────── -n_skip=0 -n_missing=0 -while read -r rel; do - [ -n "$rel" ] || continue - case "$rel" in - \#*) continue ;; - esac - if [ -e "$DST_MUSL/$rel" ]; then - rm -rf "$DST_MUSL/$rel" - n_skip=$((n_skip + 1)) - else - n_missing=$((n_missing + 1)) - fi -done < "$DST_MUSL/skip.txt" - -if [ "$n_missing" -gt 0 ]; then - echo "$TAG WARN: $n_missing skip-list entries were not present in $DST_MUSL" >&2 -fi - -n_remaining=$(find "$DST_MUSL" -type f | wc -l | tr -d ' ') -echo "$TAG OK filtered=$n_skip remaining=$n_remaining files in $DST_MUSL" diff --git a/scripts/prep-src.sh b/scripts/prep-src.sh @@ -1,196 +0,0 @@ -#!/bin/sh -## prep-src.sh — A0a: build the canonical generated source tree. -## -## All host-side source preparation happens once, up front, into a -## single canonical tree at build/<arch>/src/. This tree is the audit -## basis and the only thing boot stages should read for source. Boot -## stages do no flattening, no unpacking, no patching, no calibration. -## -## Layout produced (see docs/PLAN.md §A0): -## build/<arch>/src/ -## bin/ binary inputs not built by a stage -## hex0-seed vendored seed only -## src/ everything textual -## vendor-seed/ ELF.hex2 + *.hex0|*.hex1|*.hex2 -## M1pp/ M1pp.P1 -## hex2pp/ hex2pp.P1 -## P1/ P1*.{M1,M1pp,P1pp}, entry-*.P1pp, -## elf-end.P1pp -## catm/ catm.P1pp -## scheme1/ scheme1.P1pp, prelude.scm -## cc/ cc.scm, main.scm -## tcc/ tcc.flat.c, stdarg-bridge.h, plus -## tcc-0.9.26-1147-gee75a10c/{include,lib} -## tcc-libc/$ARCH/ start.S, sys_stubs.S -## tcc-cc/ mem.c (memcpy/memmove/memset/memcmp) -## libc/ libc.flat.c (mes-libc flattened) -## musl/ filtered musl-1.2.5 tree (overrides -## merged, deletes applied, generated -## alltypes.h/syscall.h dropped in). -## prep-musl.sh applies the per-arch -## skip filter on top. -## kernel/ seed-kernel sources for this arch -## test-fixtures/ boot-hello.c smoke binary -## -## A0 is split: prep-src.sh runs before boot0 and produces everything -## that doesn't need a working compiler. prep-musl.sh runs after boot4 -## (or copies the committed skip list) and applies the calibration -## filter on top of src/musl/. -## -## Usage: scripts/prep-src.sh <arch> -## <arch> ∈ {aarch64, amd64, riscv64} - -set -eu - -. scripts/lib-arch.sh -bootlib_init prep-src "${1:-}" - -DST=$ROOT/build/$ARCH/src -DST_BIN=$DST/bin -DST_SRC=$DST/src - -TAG="[$BOOT_TAG]" - -# ── (0) reset destination ───────────────────────────────────────────── -rm -rf "$DST" -mkdir -p "$DST_BIN" "$DST_SRC" - -# ── (1) vendored seed (pre-built binary + textual sources) ──────────── -SEED=vendor/seed/$ARCH -[ -d "$SEED" ] || { echo "$TAG missing $SEED" >&2; exit 1; } - -cp "$SEED/hex0-seed" "$DST_BIN/hex0-seed" - -mkdir -p "$DST_SRC/vendor-seed" -for f in ELF.hex2 hex0.hex0 hex1.hex0 hex2.hex1 catm.hex2 M0.hex2; do - [ -e "$SEED/$f" ] || { echo "$TAG missing $SEED/$f" >&2; exit 1; } - cp "$SEED/$f" "$DST_SRC/vendor-seed/$f" -done - -# ── (2) repo-tree textual sources ───────────────────────────────────── -mkdir -p "$DST_SRC/M1pp" -cp M1pp/M1pp.P1 "$DST_SRC/M1pp/M1pp.P1" - -mkdir -p "$DST_SRC/hex2pp" -cp hex2pp/hex2pp.P1 "$DST_SRC/hex2pp/hex2pp.P1" - -mkdir -p "$DST_SRC/P1" -cp "P1/P1.M1pp" "$DST_SRC/P1/P1.M1pp" -cp "P1/P1-$ARCH.M1" "$DST_SRC/P1/P1-$ARCH.M1" -cp "P1/P1-$ARCH.M1pp" "$DST_SRC/P1/P1-$ARCH.M1pp" -cp "P1/P1pp.P1pp" "$DST_SRC/P1/P1pp.P1pp" -cp "P1/entry-libc.P1pp" "$DST_SRC/P1/entry-libc.P1pp" -cp "P1/entry-plain.P1pp" "$DST_SRC/P1/entry-plain.P1pp" -cp "P1/elf-end.P1pp" "$DST_SRC/P1/elf-end.P1pp" - -mkdir -p "$DST_SRC/catm" -cp catm/catm.P1pp "$DST_SRC/catm/catm.P1pp" - -mkdir -p "$DST_SRC/scheme1" -cp scheme1/scheme1.P1pp "$DST_SRC/scheme1/scheme1.P1pp" -cp scheme1/prelude.scm "$DST_SRC/scheme1/prelude.scm" - -mkdir -p "$DST_SRC/cc" -cp cc/cc.scm "$DST_SRC/cc/cc.scm" -cp cc/main.scm "$DST_SRC/cc/main.scm" - -# tcc-libc: per-arch _start + sys_* wrappers consumed by boot4. -mkdir -p "$DST_SRC/tcc-libc/$ARCH" -cp "tcc-libc/$ARCH/start.S" "$DST_SRC/tcc-libc/$ARCH/start.S" -cp "tcc-libc/$ARCH/sys_stubs.S" "$DST_SRC/tcc-libc/$ARCH/sys_stubs.S" - -# tcc-cc: tiny mem helpers consumed by boot4 + boot6. -mkdir -p "$DST_SRC/tcc-cc" -cp tcc-cc/mem.c "$DST_SRC/tcc-cc/mem.c" - -# Smoke binary linked by boot4 + boot5. -mkdir -p "$DST_SRC/test-fixtures" -cp scripts/boot-hello.c "$DST_SRC/test-fixtures/boot-hello.c" - -# ── (3) seed-kernel sources for this arch ───────────────────────────── -mkdir -p "$DST_SRC/kernel/arch/$ARCH" "$DST_SRC/kernel/user" -cp seed-kernel/kernel.c "$DST_SRC/kernel/kernel.c" -for f in seed-kernel/arch/$ARCH/*; do - [ -f "$f" ] || continue - cp "$f" "$DST_SRC/kernel/arch/$ARCH/$(basename "$f")" -done -for f in seed-kernel/user/*; do - [ -f "$f" ] || continue - cp "$f" "$DST_SRC/kernel/user/$(basename "$f")" -done - -# ── (4) tcc flatten ─────────────────────────────────────────────────── -# stage1-flatten.sh writes to build/<arch>/vendor/tcc/. Run it (it's -# idempotent) and mirror the relevant artifacts into src/tcc/. -echo "$TAG flatten tcc.flat.c (host)" -scripts/stage1-flatten.sh --arch "$ARCH" - -TCC_VENDOR=$ROOT/build/$ARCH/vendor/tcc -TCC_PKG=tcc-0.9.26-1147-gee75a10c -[ -e "$TCC_VENDOR/tcc.flat.c" ] || { echo "$TAG flatten produced no tcc.flat.c" >&2; exit 1; } -[ -e "$TCC_VENDOR/stdarg-bridge.h" ] || { echo "$TAG flatten produced no stdarg-bridge.h" >&2; exit 1; } -[ -d "$TCC_VENDOR/$TCC_PKG/include" ] || { echo "$TAG flatten produced no $TCC_PKG/include" >&2; exit 1; } -[ -d "$TCC_VENDOR/$TCC_PKG/lib" ] || { echo "$TAG flatten produced no $TCC_PKG/lib" >&2; exit 1; } - -mkdir -p "$DST_SRC/tcc" -cp "$TCC_VENDOR/tcc.flat.c" "$DST_SRC/tcc/tcc.flat.c" -cp "$TCC_VENDOR/stdarg-bridge.h" "$DST_SRC/tcc/stdarg-bridge.h" -mkdir -p "$DST_SRC/tcc/$TCC_PKG" -cp -R "$TCC_VENDOR/$TCC_PKG/include" "$DST_SRC/tcc/$TCC_PKG/include" -cp -R "$TCC_VENDOR/$TCC_PKG/lib" "$DST_SRC/tcc/$TCC_PKG/lib" - -# ── (5) mes-libc flatten ────────────────────────────────────────────── -echo "$TAG flatten libc.flat.c (host)" -scripts/libc-flatten.sh --arch "$ARCH" - -LIBC_VENDOR=$ROOT/build/$ARCH/vendor/mes-libc -[ -e "$LIBC_VENDOR/libc.flat.c" ] || { echo "$TAG flatten produced no libc.flat.c" >&2; exit 1; } - -mkdir -p "$DST_SRC/libc" -cp "$LIBC_VENDOR/libc.flat.c" "$DST_SRC/libc/libc.flat.c" - -# ── (6) musl unpack + overrides + deletes + generated headers ───────── -MUSL_TARBALL=vendor/upstream/musl-1.2.5.tar.gz -MUSL_OVERRIDES=vendor/upstream/musl-1.2.5-overrides -MUSL_DELETES=vendor/upstream/musl-1.2.5-deletes.txt -MUSL_GENERATED=vendor/upstream/musl-1.2.5-generated/$MUSL_ARCH - -[ -e "$MUSL_TARBALL" ] || { echo "$TAG missing $MUSL_TARBALL" >&2; exit 1; } -[ -d "$MUSL_OVERRIDES" ] || { echo "$TAG missing $MUSL_OVERRIDES" >&2; exit 1; } -[ -e "$MUSL_DELETES" ] || { echo "$TAG missing $MUSL_DELETES" >&2; exit 1; } -[ -d "$MUSL_GENERATED" ] || { echo "$TAG missing $MUSL_GENERATED (run scripts/musl-vendor.sh)" >&2; exit 1; } - -echo "$TAG unpack musl-1.2.5 + apply overrides/deletes" -MUSL_TMP=$(mktemp -d) -trap 'rm -rf "$MUSL_TMP"' EXIT -tar xzf "$MUSL_TARBALL" -C "$MUSL_TMP" -[ -d "$MUSL_TMP/musl-1.2.5" ] || { echo "$TAG musl tarball did not unpack to musl-1.2.5/" >&2; exit 1; } - -cp -R "$MUSL_OVERRIDES/." "$MUSL_TMP/musl-1.2.5/" -while read -r p; do - [ -n "$p" ] && rm -rf "$MUSL_TMP/musl-1.2.5/$p" -done < "$MUSL_DELETES" - -# Drop pre-generated arch headers + version.h into the same obj/ layout -# boot5 expects. -mkdir -p "$MUSL_TMP/musl-1.2.5/obj/include/bits" \ - "$MUSL_TMP/musl-1.2.5/obj/src/internal" -cp "$MUSL_GENERATED/alltypes.h" "$MUSL_TMP/musl-1.2.5/obj/include/bits/alltypes.h" -cp "$MUSL_GENERATED/syscall.h" "$MUSL_TMP/musl-1.2.5/obj/include/bits/syscall.h" -echo '#define VERSION "1.2.5-tcc-boot5"' > "$MUSL_TMP/musl-1.2.5/obj/src/internal/version.h" - -mkdir -p "$DST_SRC/musl" -# Move into place — the canonical tree owns this from now on. -( cd "$MUSL_TMP/musl-1.2.5" && tar cf - . ) | ( cd "$DST_SRC/musl" && tar xf - ) - -# Seed src/musl/skip.txt with the committed skip list when one exists, -# so the canonical tree carries metadata even before prep-musl.sh -# applies the filter. prep-musl.sh refreshes/regenerates this. -SKIP_COMMITTED=vendor/upstream/musl-1.2.5-skip-$ARCH.txt -if [ -e "$SKIP_COMMITTED" ]; then - cp "$SKIP_COMMITTED" "$DST_SRC/musl/skip.txt" -fi - -# ── summary ─────────────────────────────────────────────────────────── -n_files=$(find "$DST" -type f | wc -l | tr -d ' ') -echo "$TAG OK -> $DST ($n_files files)" diff --git a/scripts/run-gcc-libc-flat-tcc.sh b/scripts/run-gcc-libc-flat-tcc.sh @@ -1,154 +0,0 @@ -#!/bin/sh -## run-gcc-libc-flat-tcc.sh — tcc-gcc baseline runner. -## -## Builds the tcc.flat.c-built tcc-gcc against mes-libc's mem* sources -## into a runtime archive, then walks tests/cc/<name>.c through tcc-gcc -## linking against that archive. The control reference for the -## tcc-cc/tcc-libc suites — if a fixture passes here but fails through -## cc.scm + tcc-boot2 / tcc-tcc, the bug lives in our pipeline rather -## than in tcc-0.9.26 itself. -## -## Env: ARCH=aarch64 (default) | amd64 -## TCC=<path> overrides the per-arch tcc-gcc binary -## Usage: scripts/run-gcc-libc-flat-tcc.sh [<test-name>...] - -set -eu - -ROOT=$(cd "$(dirname "$0")/.." && pwd) -cd "$ROOT" - -ARCH=${ARCH:-aarch64} - -case "$ARCH" in - aarch64) TCC_TARGET=ARM64; RUNTIME_TARGET_DEFINES="-D TCC_TARGET_ARM64=1 -D TCC_TARGET_ARM=1"; LIB_HELPER_SRC=lib/lib-arm64.c; LIB_HELPER_NAME=lib-arm64.o ;; - amd64) TCC_TARGET=X86_64; RUNTIME_TARGET_DEFINES="-D TCC_TARGET_X86_64=1"; LIB_HELPER_SRC=; LIB_HELPER_NAME= ;; - *) echo "$0: unsupported ARCH '$ARCH' (aarch64|amd64)" >&2; exit 2 ;; -esac - -TCC=${TCC:-build/$ARCH/tcc-gcc/tcc-gcc} -START=build/$ARCH/tcc-cc/start.o -OUT_ROOT=build/$ARCH/tests/gcc-libc-flat-tcc -WORK_ROOT=build/$ARCH/.work/tests/gcc-libc-flat-tcc -TCC_SRC=build/$ARCH/vendor/tcc/tcc-0.9.26-1147-gee75a10c -MES_INC=vendor/mes-libc/include -case "$ARCH" in - aarch64) MES_LINUX_INC=vendor/mes-libc/include/linux/riscv64 ;; - amd64) MES_LINUX_INC=vendor/mes-libc/include/linux/x86_64 ;; -esac -RUNTIME=$WORK_ROOT/runtime.a - -[ -x "$TCC" ] || { - echo "missing $TCC; build it with scripts/build-tcc-gcc.sh and $TCC_SRC/../tcc.flat.c" >&2 - exit 2 -} -[ -r "$START" ] || { echo "missing $START" >&2; exit 2; } - -mkdir -p "$OUT_ROOT" "$WORK_ROOT" - -"$TCC" -v - -build_runtime() { - rm -rf "$WORK_ROOT/runtime-objs" - mkdir -p "$WORK_ROOT/runtime-objs" - - # shellcheck disable=SC2086 # RUNTIME_TARGET_DEFINES is intentionally word-split. - "$TCC" -c -D HAVE_CONFIG_H=1 -D HAVE_LONG_LONG=1 -D HAVE_FLOAT=1 \ - $RUNTIME_TARGET_DEFINES \ - -I "$TCC_SRC" -I "$TCC_SRC/include" \ - -o "$WORK_ROOT/runtime-objs/libtcc1.o" "$TCC_SRC/lib/libtcc1.c" - if [ -n "$LIB_HELPER_SRC" ]; then - # shellcheck disable=SC2086 # RUNTIME_TARGET_DEFINES is intentionally word-split. - "$TCC" -c -D HAVE_CONFIG_H=1 -D HAVE_LONG_LONG=1 -D HAVE_FLOAT=1 \ - $RUNTIME_TARGET_DEFINES \ - -I "$TCC_SRC" -I "$TCC_SRC/include" \ - -o "$WORK_ROOT/runtime-objs/$LIB_HELPER_NAME" "$TCC_SRC/$LIB_HELPER_SRC" - fi - - for src in string/memcpy.c string/memmove.c string/memset.c string/memcmp.c; do - obj=$WORK_ROOT/runtime-objs/$(basename "$src" .c).o - "$TCC" -c -D HAVE_CONFIG_H=1 -I "$MES_INC" -I "$MES_LINUX_INC" \ - -o "$obj" "vendor/mes-libc/$src" - done - - "$TCC" -ar cr "$RUNTIME" "$WORK_ROOT"/runtime-objs/*.o -} - -build_runtime - -if [ "$#" -gt 0 ]; then - NAMES="$*" -else - NAMES=$( - ls tests/cc 2>/dev/null \ - | sed -n 's/^\([^_][^.]*\)\.c$/\1/p' \ - | sort -u - ) -fi - -pass=0 -fail=0 - -check_one() { - name=$1 - src=tests/cc/$name.c - exe=$OUT_ROOT/$name - work=$WORK_ROOT/$name - tcc_log=$work/tcc.log - out=$work/stdout - mkdir -p "$work" - - if [ -e tests/cc/$name.expected ]; then - expout=$(cat tests/cc/$name.expected) - else - expout= - fi - if [ -e tests/cc/$name.expected-exit ]; then - expexit=$(cat tests/cc/$name.expected-exit) - else - expexit=0 - fi - - if ! "$TCC" -nostdlib -I "$TCC_SRC/include" \ - "$START" "$src" "$RUNTIME" -o "$exe" >"$tcc_log" 2>&1; then - echo " FAIL gcc-libc-flat-tcc/$name" - echo " tcc compile/link failed:" - sed 's/^/ /' "$tcc_log" - fail=$((fail + 1)) - return - fi - - if "$exe" >"$out" 2>&1; then - actexit=0 - else - actexit=$? - fi - actout=$(cat "$out") - - if [ "$actexit" != "$expexit" ]; then - echo " FAIL gcc-libc-flat-tcc/$name" - echo " exit: expected $expexit, got $actexit" - fail=$((fail + 1)) - return - fi - - if [ "$actout" != "$expout" ]; then - echo " FAIL gcc-libc-flat-tcc/$name" - echo " --- expected ---" - printf '%s\n' "$expout" | sed 's/^/ /' - echo " --- actual ---" - printf '%s\n' "$actout" | sed 's/^/ /' - fail=$((fail + 1)) - return - fi - - echo " PASS gcc-libc-flat-tcc/$name" - pass=$((pass + 1)) -} - -for name in $NAMES; do - [ -e tests/cc/$name.c ] || continue - check_one "$name" -done - -echo "$pass passed, $fail failed" -[ "$fail" -eq 0 ] diff --git a/scripts/stage1-flatten.sh b/scripts/stage1-flatten.sh @@ -1,374 +0,0 @@ -#!/bin/sh -## scripts/stage1-flatten.sh — flatten upstream tcc-0.9.26 into a single -## C bytestream (tcc.flat.c) using only the host preprocessor. -## -## This is the first of three stages building tcc-0.9.26 without -## M2-Planet, MesCC, or Mes Scheme. See docs/TCC.md. -## -## Stages: -## 1. unpack tcc-0.9.26-1147-gee75a10c.tar.gz -## 2. apply live-bootstrap simple-patches (tcctools.c file-open reorder) -## 3. host cc -E -nostdinc with mes-bundled headers + tcc-mes defines -## 4. emit build/<arch>/vendor/tcc/tcc.flat.c -## 5. (--verify) compile tcc.flat.c with host cc to confirm well-formedness -## -## Stage 1 deliberately stays on the host: it is just text manipulation -## (preprocess + concat) and the resulting tcc.flat.c is a portable -## artifact downstream stages consume. No container needed. -## -## Usage: -## scripts/stage1-flatten.sh [--arch <X86_64|I386|RISCV64|ARM64|AARCH64>] [--verify] - -set -eu - -# --- arg parse -------------------------------------------------------- -ARCH=X86_64 -VERIFY=0 -while [ $# -gt 0 ]; do - case "$1" in - --arch) ARCH=$2; shift 2 ;; - --verify) VERIFY=1; shift ;; - -h|--help) awk '/^##/ { sub(/^## ?/, ""); print }' "$0"; exit 0 ;; - *) echo "unknown arg: $1" >&2; exit 2 ;; - esac -done - -case "$ARCH" in - X86_64|x86_64|amd64) BOOT_ARCH=amd64; MES_ARCH=x86_64; HAVE_LL=1; TCC_TARGET_DEFINE=X86_64; CPP_ARCH=x86_64 ;; - I386|i386) BOOT_ARCH=i386; MES_ARCH=x86; HAVE_LL=0; TCC_TARGET_DEFINE=I386; CPP_ARCH=x86 ;; - RISCV64|riscv64) BOOT_ARCH=riscv64; MES_ARCH=riscv64; HAVE_LL=1; TCC_TARGET_DEFINE=RISCV64; CPP_ARCH=riscv64 ;; - ARM64|arm64|AARCH64|aarch64) - BOOT_ARCH=aarch64; MES_ARCH=riscv64; HAVE_LL=1; TCC_TARGET_DEFINE=ARM64; CPP_ARCH=aarch64 ;; - *) echo "unknown ARCH: $ARCH" >&2; exit 2 ;; -esac - -# --- paths ------------------------------------------------------------ -# Everything used by this script is in-tree under $ROOT. No reach into -# sibling repos. -# -# vendor/upstream/tcc-0.9.26.tar.gz — pristine upstream tarball -# scripts/simple-patches/tcc-0.9.26-lb/ — live-bootstrap's tcc -# simple-patches, copied in -# for in-tree builds -# scripts/simple-patches/tcc-0.9.26/ — our own tcc patches -# vendor/mes-libc/include/ — vendored mes-libc headers -# (byte-identical to upstream -# mes/include) -# vendor/boot2-include/ — our own header shim, wins -# -I priority for stdarg.h -ROOT=$(cd "$(dirname "$0")/.." && pwd) -WORK=$ROOT/build/$BOOT_ARCH/vendor/tcc -DISTFILES=$ROOT/vendor/upstream -LB_PATCHES=$ROOT/scripts/simple-patches/tcc-0.9.26-lb -OUR_PATCHES=$ROOT/scripts/simple-patches/tcc-0.9.26 -MES_INCLUDE=$ROOT/vendor/mes-libc/include -MES_INCLUDE_LINUX=$MES_INCLUDE/linux/$MES_ARCH - -TCC_TAR=$DISTFILES/tcc-0.9.26.tar.gz -TCC_PKG=tcc-0.9.26-1147-gee75a10c - -[ -r "$TCC_TAR" ] || { echo "missing $TCC_TAR" >&2; exit 1; } -[ -d "$LB_PATCHES" ] || { echo "missing $LB_PATCHES" >&2; exit 1; } -[ -d "$OUR_PATCHES" ] || { echo "missing $OUR_PATCHES" >&2; exit 1; } -[ -d "$MES_INCLUDE" ] || { echo "missing $MES_INCLUDE" >&2; exit 1; } -[ -d "$MES_INCLUDE_LINUX" ] || { echo "missing $MES_INCLUDE_LINUX" >&2; exit 1; } - -# --- (1) unpack ------------------------------------------------------- -mkdir -p "$WORK" -rm -rf "$WORK/$TCC_PKG" -tar -xzf "$TCC_TAR" -C "$WORK" - -SRC=$WORK/$TCC_PKG - -# --- (2) simple-patches ---------------------------------------------- -# Both patches edit tcctools.c. The pair (remove-fileopen, addback-fileopen) -# moves a fopen() block earlier in the function. We implement live-bootstrap's -# simple-patch as an awk literal-block replacer; no binary dep. -apply_simple_patch() { - target=$1; before=$2; after=$3 - [ -r "$target" ] || { echo "patch target missing: $target" >&2; exit 1; } - [ -r "$before" ] || { echo "patch before missing: $before" >&2; exit 1; } - [ -r "$after" ] || { echo "patch after missing: $after" >&2; exit 1; } - awk -v BFILE="$before" -v AFILE="$after" ' - BEGIN { - while ((getline line < BFILE) > 0) bef = bef line "\n"; - close(BFILE); - while ((getline line < AFILE) > 0) aft = aft line "\n"; - close(AFILE); - } - { src = src $0 "\n" } - END { - i = index(src, bef); - if (i == 0) { print "patch did not match" > "/dev/stderr"; exit 1 } - printf "%s%s%s", - substr(src, 1, i - 1), - aft, - substr(src, i + length(bef)); - } - ' "$target" > "$target.new" - mv "$target.new" "$target" -} - -apply_simple_patch \ - "$SRC/tcctools.c" \ - "$LB_PATCHES/remove-fileopen.before" \ - "$LB_PATCHES/remove-fileopen.after" - -apply_simple_patch \ - "$SRC/tcctools.c" \ - "$LB_PATCHES/addback-fileopen.before" \ - "$LB_PATCHES/addback-fileopen.after" - -# Bootstrap stub patches — eliminate libc symbols not provided by mes-mini-libc -# (mprotect, getcwd, getenv, gettimeofday, ldexp, time, localtime, sscanf) by -# gating call sites on the existing BOOTSTRAP CPP define. -apply_our_patch() { - name=$1; target=$2 - apply_simple_patch \ - "$target" \ - "$OUR_PATCHES/$name.before" \ - "$OUR_PATCHES/$name.after" -} - -apply_our_patch tcc-is-native-stub "$SRC/tcc.h" -apply_our_patch tccrun-include "$SRC/libtcc.c" -apply_our_patch tinyc-define "$SRC/libtcc.c" -apply_our_patch longjmp-stub "$SRC/libtcc.c" -apply_our_patch set-environment-stub "$SRC/tcc.c" -apply_our_patch getclock-ms-stub "$SRC/tcc.c" -apply_our_patch getcwd-stub "$SRC/tccgen.c" -apply_our_patch strip-file-prefix "$SRC/tccgen.c" -apply_our_patch ldexp-stub "$SRC/tccpp.c" -apply_our_patch date-time-stub "$SRC/tccpp.c" -apply_our_patch lex-char-unsigned "$SRC/tccpp.c" - -# LP64 constants: upstream's parser treats one `L` suffix as 64-bit -# only on x86_64. ARM64/RISCV64 are LP64 too; without this, `-4096UL` -# is zero-extended from 32 bits and musl's __syscall_ret rejects valid -# high mmap addresses as errors. -apply_our_patch lp64-long-constant "$SRC/tccpp.c" -apply_our_patch elfinterp-stub "$SRC/tccelf.c" - -# Auto-define `__bss_start` alongside tcc's existing `_end` symbol so a -# freestanding image (kernel.S) can zero its .bss with start/end anchors -# without an ld script. Mirrors the live-bootstrap convention. -apply_our_patch bss-start-symbol "$SRC/tccelf.c" - -# x86_64 static-link PLT32 collapse: under BOOTSTRAP we force -# static_link=1 with no .dynamic / no PT_INTERP, so the runtime linker -# never fills the PLT's GOT slots. Upstream tcc 0.9.26 only collapses -# PLT32→PC32 for hidden-visibility or LOCAL symbols, leaving global -# defined symbols going through unfilled PLT entries. The patch widens -# the condition to any symbol defined in this binary (st_shndx != -# SHN_UNDEF), which matches the aarch64 path's behavior. Harmless on -# other arches: the block is gated `#ifdef TCC_TARGET_X86_64`. -apply_our_patch x86_64-static-plt32 "$SRC/tccelf.c" - -# AT.2: native PT_NOTE for PVH boot. Stock tcc tags `.note.*` sections -# as SHT_PROGBITS and never emits a PT_NOTE phdr, so QEMU's PVH -# `-kernel` path on amd64 (which scans PT_NOTE for the Xen 18 entry) -# rejects the kernel. Three patches: (1) retype implicitly-created -# `.note*` sections to SHT_NOTE; (2) allocate a PT_NOTE phdr covering -# every SHT_NOTE+SHF_ALLOC section; (3) accept SHT_NOTE in the .o -# loader so kernel-asm.o's .note.Xen merges into the link output (else -# the subsequent .rela.note.Xen merge derefs sm_table[].s == NULL). -# The phnum bump in (2) is gated on actual presence so aarch64/riscv64 -# (no .note sections) keep their existing phdr count and stay -# byte-identical to pre-patch output. -apply_our_patch note-section-sht-note "$SRC/tccelf.c" -apply_our_patch pt-note-phdr "$SRC/tccelf.c" -apply_our_patch load-obj-accept-sht-note "$SRC/tccelf.c" - -# x86_64 va_list runtime: tcc's lib/va_list.c declares `extern void -# abort(void)` and calls it in an unreachable default branch of the -# arg-type switch. Under -nostdlib that abort() symbol is unresolved -# and the link fails. Replace with an inline spin — same effect, no -# libc dependency. Unconditional patch: lib/va_list.c is only -# compiled on amd64, but the .before block is gated by the file's -# `#if defined TCC_TARGET_X86_64` so other arches see the patch -# inert. -apply_our_patch va_list-no-abort "$SRC/lib/va_list.c" - -# Const-expr short-circuit: gen_opic/gen_opif must respect nocode_wanted -# so 1 || (1/0), 0 && (1/0), 1 ? 2 : 1/0 etc. don't abort with "division -# by zero in constant" in their unevaluated arms (C11 §6.6¶3). -apply_our_patch const-divzero-shortcircuit-int "$SRC/tccgen.c" -apply_our_patch const-divzero-shortcircuit-float "$SRC/tccgen.c" - -# AArch64 vararg fixes — only relevant when targeting ARM64; harmless -# to apply unconditionally since neither file is read on other arches. -apply_our_patch aarch64-stdarg-array "$SRC/include/stdarg.h" -apply_our_patch arm64-va-pointer-operand "$SRC/arm64-gen.c" -apply_our_patch arm64-va-arg-pointer "$SRC/arm64-gen.c" - -# AArch64 codegen: store/load through a literal integer address -# (VT_CONST | VT_LVAL without VT_SYM). Stock arm64-gen.c only handles -# the |VT_SYM case; bare integer addresses fall through to the -# `printf + assert(0)` tail. Hits in musl when tcc folds weak-hidden -# refs in __libc_start_main/mallocng. Patch is gated by the -# surrounding store/load functions which exist only under -# TCC_TARGET_ARM64. -apply_our_patch arm64-store-const-lvalue "$SRC/arm64-gen.c" -apply_our_patch arm64-load-const-lvalue "$SRC/arm64-gen.c" - -# Stock arm64-gen.c truncates SValue::c.i to uint32_t at the top of -# both load() and store(). Fine for struct-field offsets, fatal for -# pointer-sized constant addresses (e.g., the seed-kernel writing to -# the device alias VA 0x109000000 for the PL011 UART). Drop the -# truncation; signed 9-bit ldur/stur offsets fit regardless. -apply_our_patch arm64-svcul-no-truncate "$SRC/arm64-gen.c" -apply_our_patch arm64-svcul-no-truncate-store "$SRC/arm64-gen.c" - -# AArch64 assembler — phase 1. Drops in arm64-asm.c + arm64-tok.h and -# wires their includes into tcc.h, libtcc.c, and tcctok.h. Patches are -# gated by TCC_TARGET_ARM64 in the surrounding source so they no-op on -# other arches even when applied. See docs/TCC-ARM64-ASM.md. -cp "$OUR_PATCHES/files/arm64-asm.c" "$SRC/arm64-asm.c" -cp "$OUR_PATCHES/files/arm64-tok.h" "$SRC/arm64-tok.h" -apply_our_patch arm64-asm-include-tcc-h "$SRC/tcc.h" -apply_our_patch arm64-asm-include-libtcc-c "$SRC/libtcc.c" -apply_our_patch arm64-tok-include-tcctok-h "$SRC/tcctok.h" - -# arm64-asm.c emits gen_expr64 for `.quad sym - sym2`; declare it for -# arm64 too (was x86_64-only). -apply_our_patch tcc-h-gen-expr64-arm64 "$SRC/tcc.h" - -# Route .quad through asm_data on arm64 so symbol-difference expressions -# emit a relocation (R_AARCH64_PREL64) instead of failing to parse. -apply_our_patch tccasm-arm64-quad "$SRC/tccasm.c" -apply_our_patch tccasm-arm64-quad-asm-data "$SRC/tccasm.c" - -# Enable the relocations the assembler now emits: PREL64 (data symbol -# difference), CONDBR19 + TSTBR14 (forward conditional branch / tbz). -apply_our_patch arm64-link-asm-relocs "$SRC/arm64-link.c" -apply_our_patch arm64-link-prel64-condbr "$SRC/arm64-link.c" - -# tcc's lexer in ASM_FILE mode swallows mid-line '#' as a line comment, -# which kills the ARM/AArch64 '#imm' immediate prefix. Restrict the -# '#'-as-line-comment behavior to start-of-line so '#' tokenizes as -# itself in operand position. gas's own '#' line-comment rule is BOL -# only, so this matches stock gas semantics. Other arches' assemblers -# don't use '#' as an immediate prefix, so they're unaffected. -apply_our_patch asm-hash-bol-only "$SRC/tccpp.c" - -# Side effect of the patch above: alloca86_64-bt.S has two tab-prefixed -# tail comments (`mov %rax,%rsi # size, a second parm…`) that the -# x86_64 assembler now rejects with "end of line expected". They are -# inert documentation; strip them. The file is only compiled when -# building the amd64 libtcc1.a (LIBTCC1_ASM_SRCS in boot4.sh), so this -# rewrite is a no-op on aarch64/riscv64 builds. -awk '{ sub(/\t#.*$/, ""); print }' "$SRC/lib/alloca86_64-bt.S" \ - > "$SRC/lib/alloca86_64-bt.S.tmp" -mv "$SRC/lib/alloca86_64-bt.S.tmp" "$SRC/lib/alloca86_64-bt.S" - -# riscv64 int->llong cast: stock tcc 0.9.26 leaves unsigned int values -# in their native register width, but RV64 32-bit ops sign-extend bits -# 63:32, so widening an `unsigned int` to `unsigned long` reads garbage -# upper bits. Make gen_cvt_sxtw do the right thing for both signs, and -# always invoke it on riscv64. Hits be64() in the seed kernel's DTB -# parser; without the fix the kernel sees mem_start sign-extended to -# 0xffffffff80000000 and the boot panics during MMU bring-up. Patch is -# gated by the call-site / function name so it no-ops on other arches. -apply_our_patch riscv64-cvt-int-zext "$SRC/tccgen.c" -apply_our_patch riscv64-gen-cvt-sxtw "$SRC/riscv64-gen.c" -apply_our_patch riscv64-load-ptr-zext "$SRC/riscv64-gen.c" - -# riscv64 ELF default load address — stock tcc lands binaries at -# 0x10000, below the seed kernel's USER_VA_LO=0x200000. Move the -# default to 0x600000 so tcc-emitted ELFs slot into the user pool -# without per-link `-Wl,-Ttext=` overrides. Patch is gated by the -# stock literal in the before-block, so it no-ops elsewhere. -apply_our_patch riscv64-elf-start-addr "$SRC/riscv64-link.c" - -# riscv64 stdarg.h order fix — the upstream `#elif __riscv` branch -# uses `__builtin_va_list` before it's typedef'd. Stock tcc treats -# `__builtin_va_list` as a built-in keyword and forgives the forward -# reference; tcc-boot2's frontend does not. Swap the two typedefs so -# the base `char *__builtin_va_list` is in scope before va_list claims -# it. Affects only the riscv branch — the patch is gated by the -# `#elif __riscv` line in the before-block, so it's a no-op when that -# branch is absent (other tcc trees). -apply_our_patch riscv-stdarg-fix "$SRC/include/stdarg.h" - -# gcc/clang __builtin_va_* spelling bridge — append aliases at the end -# of tcc's <stdarg.h> so the same flat.c (which uses the gcc spelling -# because that's what cc.scm recognizes) also compiles back through -# tcc on amd64/aarch64. Gated `#ifndef __riscv` inside .after — the -# __riscv branch already maps these names natively. See the .after -# block for the full rationale. -apply_our_patch stdarg-builtin-aliases "$SRC/include/stdarg.h" - -# Empty config.h shims — pass1.kaem creates these via `catm <out>` (line 27-28). -: > "$SRC/config.h" -mkdir -p "$WORK/mes-overlay/mes" -: > "$WORK/mes-overlay/mes/config.h" - -# --- (3) flatten via host preprocessor -------------------------------- -HOST_CC=${HOST_CC:-cc} -FLAT=$WORK/tcc.flat.c - -"$HOST_CC" -E -P \ - -nostdinc \ - -I "$SRC" \ - -I "$WORK/mes-overlay" \ - -I "$ROOT/vendor/boot2-include" \ - -I "$MES_INCLUDE_LINUX" \ - -I "$MES_INCLUDE" \ - -D __linux__=1 \ - -D __${CPP_ARCH}__=1 \ - -D BOOTSTRAP=1 \ - -D HAVE_LONG_LONG=$HAVE_LL \ - -D inline= \ - -D "CONFIG_TCCDIR=\"/lib/tcc\"" \ - -D "CONFIG_SYSROOT=\"/\"" \ - -D "CONFIG_TCC_CRTPREFIX=\"/lib\"" \ - -D "CONFIG_TCC_ELFINTERP=\"/mes/loader\"" \ - -D "CONFIG_TCC_SYSINCLUDEPATHS=\"/include/mes\"" \ - -D "TCC_LIBGCC=\"/lib/libc.a\"" \ - -D CONFIG_TCC_LIBTCC1_MES=0 \ - -D CONFIG_TCCBOOT=1 \ - -D CONFIG_TCC_STATIC=1 \ - -D CONFIG_USE_LIBGCC=1 \ - -D "TCC_VERSION=\"0.9.26\"" \ - -D ONE_SOURCE=1 \ - -D TCC_TARGET_${TCC_TARGET_DEFINE}=1 \ - "$SRC/tcc.c" > "$FLAT.body" - -# Publish the post-patch tcc <stdarg.h> as a per-arch bridge file -# alongside tcc.flat.c. libc-flatten.sh prepends the same bridge to -# libc.flat.c, so the boot3/boot4 container compiles no longer need -# `-I /work/in/tcc-include -include /work/in/tcc-include/stdarg.h`. -# The patched stdarg.h is byte-identical across X86_64 / ARM64 / RISCV64 -# (per-arch logic lives inside its #ifdefs); we still write a per-arch -# copy so every artifact under build/<arch>/ comes from a single -# `boot.sh <arch>` invocation, with nothing shared across arches. -BRIDGE=$WORK/stdarg-bridge.h -cp "$SRC/include/stdarg.h" "$BRIDGE" - -# Prepend the bridge into tcc.flat.c, guarded by !CCSCM so cc.scm -# (which has __builtin_va_list / __builtin_va_* as native frontend -# keywords and predefines CCSCM) skips the whole block. Under tcc, -# the per-arch #ifdef branches inside the bridge resolve and define -# the va_list typedef + __builtin_va_* → tcc native __va_* macros -# that flat.c needs. -{ - echo '#ifndef CCSCM' - cat "$BRIDGE" - echo '#endif' - cat "$FLAT.body" -} > "$FLAT" -rm -f "$FLAT.body" - -BYTES=$(wc -c < "$FLAT") -echo "produced $FLAT ($BYTES bytes)" - -# --- (4) optional verify --------------------------------------------- -if [ "$VERIFY" -eq 1 ]; then - HOST_OBJ=$WORK/tcc.flat.o - if "$HOST_CC" -c -w -o "$HOST_OBJ" "$FLAT" 2>"$WORK/host-cc.log"; then - echo "host cc: tcc.flat.c compiles cleanly to $HOST_OBJ" - else - echo "host cc: tcc.flat.c FAILED to compile; see $WORK/host-cc.log" >&2 - exit 1 - fi -fi diff --git a/scripts/stage2-alpine.sh b/scripts/stage2-alpine.sh @@ -1,287 +0,0 @@ -#!/bin/sh -## scripts/stage2-alpine.sh — build tcc-boot0-mes in an alpine container. -## -## This is the stand-in slot for our scheme1-hosted C compiler: a real -## native gcc plays the role our scheme CC will eventually fill. Its -## job is to take stage 1's tcc.flat.c, build a working tcc, then use -## that tcc to compile mes libc and link a final static tcc-0.9.26 -## binary against it. See docs/TCC.md. -## -## We picked alpine (musl) over debian (glibc) because mes headers -## declare errno as a plain global, while glibc declares it TLS — a -## non-TLS / TLS clash at link time. musl exposes errno only via -## __errno_location(), so a one-line `int errno;` shim is the sole -## definition and links cleanly. -## -## Pre-condition: -## build/amd64/vendor/tcc/tcc.flat.c (run scripts/stage1-flatten.sh) -## -## Inside alpine:latest (linux/amd64): -## 1. apk add gcc musl-dev -## 2. gcc -static tcc.flat.c errno-shim.c -> tcc-host -## (validates the flatten output as well-formed C; tcc-host is a -## working musl-linked tcc-0.9.26 binary) -## 3. unpack mes-0.27.1, set up include tree with arch symlink -## (Issue §1 workaround — tcc 0.9.26 SEGVs on missing include) -## 4. tcc-host compiles each mes libc .c file individually -## (Issue §2 workaround — concatenated TU SEGVs around 22+ files) -## 5. tcc-host -ar -> /lib/libc.a + /lib/tcc/libtcc1.a, build crt1.o -## 6. tcc-host -static compiles patched real tcc.c against mes libc -## directly into tcc-boot0-mes — mirrors live-bootstrap's tcc-boot0 -## invocation. We skip the tcc-self.o intermediate that an older -## iteration used: it was a redundant round-trip and exposed a tcc -## 0.9.26 bug in the static-link codepath. -## 7. (best-effort) tcc-boot0-mes -version -## Expected to segfault under QEMU x86_64 emulation on macOS arm64 -## (Issue §3); native x86_64 needed to verify cleanly. -## -## Output: build/amd64/vendor/tcc/tcc-boot0-mes (static, mes-libc-linked). -## This artifact is what stage 3 (busybox) consumes to drive the -## tcc-boot1 / tcc-boot2 chain. -## -## Usage: -## scripts/stage2-alpine.sh [--arch X86_64] - -set -eu - -ARCH=X86_64 -while [ $# -gt 0 ]; do - case "$1" in - --arch) ARCH=$2; shift 2 ;; - -h|--help) sed -n 's/^## \{0,1\}//p' "$0"; exit 0 ;; - *) echo "unknown arg: $1" >&2; exit 2 ;; - esac -done - -if [ "$ARCH" != "X86_64" ]; then - echo "stage2 currently only supports X86_64 (live-bootstrap reference path)" >&2 - exit 2 -fi -MES_ARCH=x86_64 -BOOT_ARCH=amd64 - -ROOT=$(cd "$(dirname "$0")/.." && pwd) -WORK=$ROOT/build/$BOOT_ARCH/vendor/tcc -# This is the legacy gcc-driven path the cc.scm tcc-boot2 chain -# replaces (see docs/TCC.md). Not on the main `make tcc-boot2` -# build path; kept around as a verification fallback. Requires the -# upstream mes tarball, which lives outside the repo — set -# LIVE_BOOTSTRAP=<path> or MES_TAR=<path-to-mes-0.27.1.tar.gz> -# explicitly. See scripts/diag-livebootstrap-qemu.sh for the same -# pattern. -: "${MES_TAR:=}" -if [ -z "$MES_TAR" ]; then - : "${LIVE_BOOTSTRAP:?set LIVE_BOOTSTRAP=<path-to-live-bootstrap-checkout> or MES_TAR=<mes-0.27.1.tar.gz>}" - MES_TAR=$LIVE_BOOTSTRAP/../lb-work/distfiles/mes-0.27.1.tar.gz -fi -MES_PKG=mes-0.27.1 -FLAT=$WORK/tcc.flat.c - -[ -r "$FLAT" ] || { echo "missing $FLAT — run scripts/stage1-flatten.sh first" >&2; exit 1; } -[ -r "$MES_TAR" ] || { echo "missing $MES_TAR" >&2; exit 1; } -command -v podman >/dev/null 2>&1 || { echo "podman required" >&2; exit 2; } - -# Unpack mes outside the container so it lands on the bind mount. -if [ ! -d "$WORK/$MES_PKG" ]; then - tar -xzf "$MES_TAR" -C "$WORK" -fi -mkdir -p "$WORK/$MES_PKG/include/mes" -: > "$WORK/$MES_PKG/include/mes/config.h" - -# errno shim: tcc.flat.c references errno as a plain global (mes-libc -# convention). musl provides errno only via __errno_location(), so this -# one-line int errno; is the sole storage. Without it the link fails. -printf 'int errno;\n' > "$WORK/errno-shim.c" - -echo "=== stage 2: tcc-boot0-mes via alpine:latest ===" -echo "(slow on macOS arm64 — runs under QEMU linux/amd64)" - -TCC_PKG=tcc-0.9.26-1147-gee75a10c - -podman run --rm -i --platform linux/amd64 \ - -v "$ROOT":/work -w /work alpine:latest sh -s "$ARCH" "$MES_ARCH" "$MES_PKG" "$TCC_PKG" "$BOOT_ARCH" <<'CONTAINER_SCRIPT' -set -eu -ARCH=$1 -MES_ARCH=$2 -MES_PKG=$3 -TCC_PKG=$4 -BOOT_ARCH=$5 -WORK=/work/build/$BOOT_ARCH/vendor/tcc - -# --- (1) install gcc + musl-dev (provides libc.a for -static) -------- -apk add --no-cache gcc musl-dev >/dev/null -echo "host gcc: $(gcc --version | head -1)" - -# --- (2) gcc tcc.flat.c -> tcc-host ---------------------------------- -echo "--- gcc -static -> tcc-host ---" -gcc -w -static -no-pie -o "$WORK/tcc-host" \ - "$WORK/tcc.flat.c" "$WORK/errno-shim.c" -"$WORK/tcc-host" -version - -# --- (3) sanitized include tree -------------------------------------- -MES_SRC=$WORK/$MES_PKG -INC=/tmp/mes-inc -rm -rf $INC -mkdir -p $INC -cp -r $MES_SRC/include/. $INC/ -ln -sfn linux/$MES_ARCH $INC/arch - -# --- (4) compile mes libc per-file ----------------------------------- -echo "--- tcc-host compiling mes libc (per-file to dodge Issue §2) ---" -mkdir -p /tmp/objs -cd $MES_SRC/lib - -ALL_FILES="ctype/isalnum.c ctype/isalpha.c ctype/isascii.c ctype/iscntrl.c \ -ctype/isdigit.c ctype/isgraph.c ctype/islower.c ctype/isnumber.c \ -ctype/isprint.c ctype/ispunct.c ctype/isspace.c ctype/isupper.c \ -ctype/isxdigit.c ctype/tolower.c ctype/toupper.c \ -dirent/closedir.c dirent/__getdirentries.c dirent/opendir.c \ -linux/readdir.c linux/access.c linux/brk.c linux/chdir.c linux/chmod.c \ -linux/clock_gettime.c linux/close.c linux/dup2.c linux/dup.c linux/execve.c \ -linux/fcntl.c linux/fork.c linux/fsync.c linux/fstat.c linux/_getcwd.c \ -linux/getdents.c linux/getegid.c linux/geteuid.c linux/getgid.c linux/getpid.c \ -linux/getppid.c linux/getrusage.c linux/gettimeofday.c linux/getuid.c \ -linux/ioctl.c linux/ioctl3.c linux/kill.c linux/link.c linux/lseek.c \ -linux/lstat.c linux/malloc.c linux/mkdir.c linux/mknod.c linux/nanosleep.c \ -linux/_open3.c linux/pipe.c linux/_read.c linux/readlink.c linux/rename.c \ -linux/rmdir.c linux/setgid.c linux/settimer.c linux/setuid.c linux/signal.c \ -linux/sigprogmask.c linux/symlink.c linux/stat.c linux/time.c linux/unlink.c \ -linux/waitpid.c linux/wait4.c \ -linux/${MES_ARCH}-mes-gcc/_exit.c linux/${MES_ARCH}-mes-gcc/syscall.c \ -linux/${MES_ARCH}-mes-gcc/_write.c \ -math/ceil.c math/fabs.c math/floor.c \ -mes/abtod.c mes/abtol.c mes/__assert_fail.c mes/assert_msg.c \ -mes/__buffered_read.c mes/__init_io.c mes/cast.c mes/dtoab.c \ -mes/eputc.c mes/eputs.c mes/fdgetc.c mes/fdgets.c mes/fdputc.c mes/fdputs.c \ -mes/fdungetc.c mes/globals.c mes/itoa.c mes/ltoab.c mes/ltoa.c \ -mes/__mes_debug.c mes/mes_open.c mes/ntoab.c mes/oputc.c mes/oputs.c \ -mes/search-path.c mes/ultoa.c mes/utoa.c \ -posix/alarm.c posix/buffered-read.c posix/execl.c posix/execlp.c \ -posix/execv.c posix/execvp.c posix/getcwd.c posix/getenv.c posix/isatty.c \ -posix/mktemp.c posix/open.c posix/pathconf.c posix/raise.c posix/sbrk.c \ -posix/setenv.c posix/sleep.c posix/unsetenv.c posix/wait.c posix/write.c \ -stdio/clearerr.c stdio/fclose.c stdio/fdopen.c stdio/feof.c stdio/ferror.c \ -stdio/fflush.c stdio/fgetc.c stdio/fgets.c stdio/fileno.c stdio/fopen.c \ -stdio/fprintf.c stdio/fputc.c stdio/fputs.c stdio/fread.c stdio/freopen.c \ -stdio/fscanf.c stdio/fseek.c stdio/ftell.c stdio/fwrite.c stdio/getc.c \ -stdio/getchar.c stdio/perror.c stdio/printf.c stdio/putc.c stdio/putchar.c \ -stdio/remove.c stdio/snprintf.c stdio/sprintf.c stdio/sscanf.c stdio/ungetc.c \ -stdio/vfprintf.c stdio/vfscanf.c stdio/vprintf.c stdio/vsnprintf.c \ -stdio/vsprintf.c stdio/vsscanf.c \ -stdlib/abort.c stdlib/abs.c stdlib/alloca.c stdlib/atexit.c stdlib/atof.c \ -stdlib/atoi.c stdlib/atol.c stdlib/calloc.c stdlib/__exit.c stdlib/exit.c \ -stdlib/free.c stdlib/mbstowcs.c stdlib/puts.c stdlib/qsort.c stdlib/realloc.c \ -stdlib/strtod.c stdlib/strtof.c stdlib/strtol.c stdlib/strtold.c \ -stdlib/strtoll.c stdlib/strtoul.c stdlib/strtoull.c \ -string/bcmp.c string/bcopy.c string/bzero.c string/index.c string/memchr.c \ -string/memcmp.c string/memcpy.c string/memmem.c string/memmove.c string/memset.c \ -string/rindex.c string/strcat.c string/strchr.c string/strcmp.c string/strcpy.c \ -string/strcspn.c string/strdup.c string/strerror.c string/strlen.c \ -string/strlwr.c string/strncat.c string/strncmp.c string/strncpy.c \ -string/strpbrk.c string/strrchr.c string/strspn.c string/strstr.c string/strupr.c \ -stub/atan2.c stub/bsearch.c stub/chown.c stub/__cleanup.c stub/cos.c \ -stub/ctime.c stub/exp.c stub/fpurge.c stub/freadahead.c stub/frexp.c \ -stub/getgrgid.c stub/getgrnam.c stub/getlogin.c stub/getpgid.c stub/getpgrp.c \ -stub/getpwnam.c stub/getpwuid.c stub/gmtime.c stub/ldexp.c stub/localtime.c \ -stub/log.c stub/mktime.c stub/modf.c stub/mprotect.c stub/pclose.c \ -stub/popen.c stub/pow.c stub/putenv.c stub/rand.c stub/realpath.c stub/rewind.c \ -stub/setbuf.c stub/setgrent.c stub/setlocale.c stub/setvbuf.c stub/sigaction.c \ -stub/sigaddset.c stub/sigblock.c stub/sigdelset.c stub/sigemptyset.c \ -stub/sigsetmask.c stub/sin.c stub/sys_siglist.c stub/system.c stub/sqrt.c \ -stub/strftime.c stub/times.c stub/ttyname.c stub/umask.c stub/utime.c \ -${MES_ARCH}-mes-gcc/setjmp.c" - -OBJS= -n_compiled=0 -n_failed=0 -for f in $ALL_FILES; do - name=$(echo "$f" | tr / _) - o=/tmp/objs/${name%.c}.o - if "$WORK/tcc-host" -c -D HAVE_CONFIG_H=1 -I "$INC" -I "$INC/linux/$MES_ARCH" \ - -o "$o" "$f" 2>/dev/null; then - OBJS="$OBJS $o" - n_compiled=$((n_compiled+1)) - else - echo "compile failed: $f" >&2 - n_failed=$((n_failed+1)) - fi -done -echo "compiled $n_compiled libc .o files (failed: $n_failed)" -[ "$n_failed" -eq 0 ] || { echo "abort: some libc files failed" >&2; exit 1; } - -# --- (5) ar -> libc.a, crt1.o, libtcc1.a ----------------------------- -mkdir -p /lib/tcc /include/mes -"$WORK/tcc-host" -ar cr /lib/libc.a $OBJS - -"$WORK/tcc-host" -c -D HAVE_CONFIG_H=1 -I "$INC" -I "$INC/linux/$MES_ARCH" \ - -o /lib/crt1.o "linux/$MES_ARCH-mes-gcc/crt1.c" -: > /lib/crtn.o -: > /lib/crti.o - -"$WORK/tcc-host" -c -D HAVE_CONFIG_H=1 -D HAVE_LONG_LONG=1 -D HAVE_FLOAT=1 \ - -I "$INC" -I "$INC/linux/$MES_ARCH" \ - -o /tmp/libtcc1.o libtcc1.c -"$WORK/tcc-host" -ar cr /lib/tcc/libtcc1.a /tmp/libtcc1.o - -cp -r "$INC/." /include/mes/ -ls -la /lib/crt1.o /lib/libc.a /lib/tcc/libtcc1.a - -# --- (6) tcc-host -static compile real tcc.c -> tcc-boot0-mes -------- -# Mirrors live-bootstrap pass1.kaem's tcc-boot0 invocation: same flags, -# same source tree, just driven by tcc-host instead of tcc-mes. Direct -# compile+link in one shot — no intermediate .o. -echo "--- tcc-host -static compile+link real tcc.c -> tcc-boot0-mes ---" -cd "$WORK/$TCC_PKG" -"$WORK/tcc-host" \ - -g \ - -static \ - -o "$WORK/tcc-boot0-mes" \ - -D BOOTSTRAP=1 \ - -D HAVE_FLOAT=1 \ - -D HAVE_BITFIELD=1 \ - -D HAVE_LONG_LONG=1 \ - -D HAVE_SETJMP=1 \ - -I . \ - -I /include/mes \ - -D TCC_TARGET_${ARCH}=1 \ - -D CONFIG_TCCDIR=\"/lib/tcc\" \ - -D CONFIG_TCC_CRTPREFIX=\"/lib\" \ - -D CONFIG_TCC_ELFINTERP=\"/mes/loader\" \ - -D CONFIG_TCC_LIBPATHS=\"/lib:/lib/tcc\" \ - -D CONFIG_TCC_SYSINCLUDEPATHS=\"/include/mes\" \ - -D TCC_LIBGCC=\"/lib/libc.a\" \ - -D TCC_LIBTCC1=\"libtcc1.a\" \ - -D CONFIG_TCCBOOT=1 \ - -D CONFIG_TCC_STATIC=1 \ - -D CONFIG_USE_LIBGCC=1 \ - -D TCC_VERSION=\"0.9.26\" \ - -D ONE_SOURCE=1 \ - -L . \ - -L /lib \ - tcc.c -ls -la "$WORK/tcc-boot0-mes" - -# Stage out mes libc + libtcc1 + crt1.o + headers so stage 3 can mount -# them in next to tcc-boot0-mes without re-running stage 2. -STAGE3=$WORK/stage3-input -rm -rf "$STAGE3" -mkdir -p "$STAGE3/lib/tcc" "$STAGE3/include" -cp /lib/libc.a "$STAGE3/lib/" -cp /lib/crt1.o "$STAGE3/lib/" -cp /lib/crtn.o "$STAGE3/lib/" -cp /lib/crti.o "$STAGE3/lib/" -cp /lib/tcc/libtcc1.a "$STAGE3/lib/tcc/" -cp -r "$INC/." "$STAGE3/include/mes/" -echo "staged mes libc bits into $STAGE3 for stage 3" - -# --- (7) best-effort version probe ----------------------------------- -echo -echo "--- tcc-boot0-mes -version (Issue §3: expected SEGV under QEMU) ---" -rc=0; "$WORK/tcc-boot0-mes" -version 2>&1 || rc=$? -echo "exit=$rc" -CONTAINER_SCRIPT - -echo -echo "=== stage 2 artifacts ===" -ls -la "$WORK/tcc-host" "$WORK/tcc-boot0-mes" 2>/dev/null || \ - echo "(some artifacts missing — see container output above)" diff --git a/scripts/stage3-rebuild.sh b/scripts/stage3-rebuild.sh @@ -1,236 +0,0 @@ -#!/bin/sh -## scripts/stage3-rebuild.sh — drive the tcc-boot1 / tcc-boot2 chain -## inside a busybox container, consuming stage 2's tcc-boot0-mes. -## -## Mirrors live-bootstrap's pass1.kaem (steps/tcc-0.9.26) chain: -## tcc-boot0-mes (= tcc-boot0 slot) -## → rebuild libc → tcc-boot1 -## → rebuild libc → tcc-boot2 (= "final 0.9.26") -## -## Each rebuild compiles the **real**, unflattened tcc-0.9.26 sources -## (the patched tree at $WORK/tcc-0.9.26-1147-gee75a10c) using the -## previous-stage tcc, with the full live-bootstrap define set -## (HAVE_FLOAT, HAVE_BITFIELD, HAVE_SETJMP added on top of the stage 1 -## flatten set). -## -## Pre-condition: -## build/amd64/vendor/tcc/tcc-boot0-mes -## build/amd64/vendor/tcc/stage3-input/ (staged by stage 2) -## build/amd64/vendor/tcc/tcc-0.9.26-1147-gee75a10c/ (patched, from stage 1) -## build/amd64/vendor/tcc/mes-0.27.1/ (from stage 2) -## -## Container: docker.io/library/busybox:musl on linux/amd64. -## Tools used inside: busybox sh + tcc-boot0-mes (which provides its own -## preprocessor, assembler, linker, and `-ar`). -## -## Status: stage 3 cannot complete on macOS arm64 hosts today — Issue §3 -## (tcc-boot0-mes segfaults at startup under QEMU x86_64) blocks the -## very first step. The script is correct; it will run end-to-end on -## native x86_64 hardware or once a tcc 0.9.28rc backport patches the -## prologue/_DYNAMIC issues. The failure is reported clearly so the -## blocker is visible. -## -## Usage: -## scripts/stage3-rebuild.sh [--arch X86_64] - -set -eu - -ARCH=X86_64 -while [ $# -gt 0 ]; do - case "$1" in - --arch) ARCH=$2; shift 2 ;; - -h|--help) sed -n 's/^## \{0,1\}//p' "$0"; exit 0 ;; - *) echo "unknown arg: $1" >&2; exit 2 ;; - esac -done - -if [ "$ARCH" != "X86_64" ]; then - echo "stage3 currently only supports X86_64" >&2 - exit 2 -fi -MES_ARCH=x86_64 -BOOT_ARCH=amd64 - -ROOT=$(cd "$(dirname "$0")/.." && pwd) -WORK=$ROOT/build/$BOOT_ARCH/vendor/tcc -TCC_PKG=tcc-0.9.26-1147-gee75a10c -MES_PKG=mes-0.27.1 - -[ -x "$WORK/tcc-boot0-mes" ] || { echo "missing $WORK/tcc-boot0-mes — run stage2-alpine.sh" >&2; exit 1; } -[ -d "$WORK/stage3-input" ] || { echo "missing $WORK/stage3-input — run stage2-alpine.sh" >&2; exit 1; } -[ -d "$WORK/$TCC_PKG" ] || { echo "missing $WORK/$TCC_PKG — run stage1-flatten.sh" >&2; exit 1; } -[ -d "$WORK/$MES_PKG" ] || { echo "missing $WORK/$MES_PKG — run stage2-alpine.sh" >&2; exit 1; } -command -v podman >/dev/null 2>&1 || { echo "podman required" >&2; exit 2; } - -echo "=== stage 3: tcc-boot1 / tcc-boot2 via busybox:musl ===" -echo "(Issue §3 may block on macOS arm64 / QEMU; native x86_64 expected to succeed)" - -podman run --rm -i --platform linux/amd64 \ - -v "$ROOT":/work -w /work \ - docker.io/library/busybox:musl sh -s "$ARCH" "$MES_ARCH" "$TCC_PKG" "$MES_PKG" "$BOOT_ARCH" <<'CONTAINER_SCRIPT' -set -eu -ARCH=$1 -MES_ARCH=$2 -TCC_PKG=$3 -MES_PKG=$4 -BOOT_ARCH=$5 -WORK=/work/build/$BOOT_ARCH/vendor/tcc - -# --- install tcc-boot0-mes + mes libc bits at baked-in paths -------- -mkdir -p /lib/tcc /include/mes /bin -cp "$WORK/stage3-input/lib/libc.a" /lib/libc.a -cp "$WORK/stage3-input/lib/crt1.o" /lib/crt1.o -cp "$WORK/stage3-input/lib/crtn.o" /lib/crtn.o -cp "$WORK/stage3-input/lib/crti.o" /lib/crti.o -cp "$WORK/stage3-input/lib/tcc/libtcc1.a" /lib/tcc/libtcc1.a -cp -r "$WORK/stage3-input/include/mes/." /include/mes/ - -TCC=$WORK/tcc-boot0-mes - -echo "--- tcc-boot0-mes -version (smoke; Issue §3 may SEGV here) ---" -"$TCC" -version - -INC_MES=/include/mes -TCC_DEFS_BUILD="-D BOOTSTRAP=1 \ - -D HAVE_FLOAT=1 \ - -D HAVE_BITFIELD=1 \ - -D HAVE_LONG_LONG=1 \ - -D HAVE_SETJMP=1 \ - -D TCC_TARGET_${ARCH}=1 \ - -D CONFIG_TCCDIR=\"/lib/tcc\" \ - -D CONFIG_TCC_CRTPREFIX=\"/lib\" \ - -D CONFIG_TCC_ELFINTERP=\"/mes/loader\" \ - -D CONFIG_TCC_LIBPATHS=\"/lib:/lib/tcc\" \ - -D CONFIG_TCC_SYSINCLUDEPATHS=\"/include/mes\" \ - -D TCC_LIBGCC=\"/lib/libc.a\" \ - -D TCC_LIBTCC1=\"libtcc1.a\" \ - -D CONFIG_TCCBOOT=1 \ - -D CONFIG_TCC_STATIC=1 \ - -D CONFIG_USE_LIBGCC=1 \ - -D TCC_VERSION=\"0.9.26\" \ - -D ONE_SOURCE=1" - -# Helper: rebuild mes libc with the given tcc binary, install to /lib + /lib/tcc. -# Mirrors pass1.kaem's "Recompile libc" block after each tcc-bootN. -rebuild_libc() { - cc=$1 - label=$2 - echo "--- $label: rebuilding mes libc ---" - cd "$WORK/$MES_PKG" - "$cc" -c -D HAVE_CONFIG_H=1 -I include -I include/linux/$MES_ARCH \ - -o /lib/crt1.o lib/linux/$MES_ARCH-mes-gcc/crt1.c - - "$cc" -c -D HAVE_CONFIG_H=1 -D HAVE_LONG_LONG=1 -D HAVE_FLOAT=1 \ - -I include -I include/linux/$MES_ARCH \ - -o /tmp/libtcc1.o lib/libtcc1.c - "$cc" -ar cr /lib/tcc/libtcc1.a /tmp/libtcc1.o - - # libc.a: same per-file approach as stage 2 (Issue §2 workaround). - rm -rf /tmp/objs && mkdir -p /tmp/objs - cd lib - OBJS= - for f in $ALL_LIBC_FILES; do - name=$(echo "$f" | tr / _) - o=/tmp/objs/${name%.c}.o - "$cc" -c -D HAVE_CONFIG_H=1 -I ../include -I ../include/linux/$MES_ARCH \ - -o "$o" "$f" - OBJS="$OBJS $o" - done - "$cc" -ar cr /lib/libc.a $OBJS - cd "$WORK/$MES_PKG/.." -} - -# Helper: compile tcc.c (real, unflattened) into a new tcc binary. -# Mirrors pass1.kaem's tcc-boot0/1/2 invocations. -build_tcc() { - cc=$1 - out=$2 - echo "--- $out: $cc compiling real tcc.c ---" - cd "$WORK/$TCC_PKG" - eval "\"$cc\" -g -static -o \"$WORK/$out\" \ - $TCC_DEFS_BUILD \ - -I . -I $INC_MES \ - -L . -L /lib \ - tcc.c" - "$WORK/$out" -version -} - -# Same canonical mes libc list as stage 2 + pass1.kaem. -ALL_LIBC_FILES="ctype/isalnum.c ctype/isalpha.c ctype/isascii.c ctype/iscntrl.c \ -ctype/isdigit.c ctype/isgraph.c ctype/islower.c ctype/isnumber.c \ -ctype/isprint.c ctype/ispunct.c ctype/isspace.c ctype/isupper.c \ -ctype/isxdigit.c ctype/tolower.c ctype/toupper.c \ -dirent/closedir.c dirent/__getdirentries.c dirent/opendir.c \ -linux/readdir.c linux/access.c linux/brk.c linux/chdir.c linux/chmod.c \ -linux/clock_gettime.c linux/close.c linux/dup2.c linux/dup.c linux/execve.c \ -linux/fcntl.c linux/fork.c linux/fsync.c linux/fstat.c linux/_getcwd.c \ -linux/getdents.c linux/getegid.c linux/geteuid.c linux/getgid.c linux/getpid.c \ -linux/getppid.c linux/getrusage.c linux/gettimeofday.c linux/getuid.c \ -linux/ioctl.c linux/ioctl3.c linux/kill.c linux/link.c linux/lseek.c \ -linux/lstat.c linux/malloc.c linux/mkdir.c linux/mknod.c linux/nanosleep.c \ -linux/_open3.c linux/pipe.c linux/_read.c linux/readlink.c linux/rename.c \ -linux/rmdir.c linux/setgid.c linux/settimer.c linux/setuid.c linux/signal.c \ -linux/sigprogmask.c linux/symlink.c linux/stat.c linux/time.c linux/unlink.c \ -linux/waitpid.c linux/wait4.c \ -linux/${MES_ARCH}-mes-gcc/_exit.c linux/${MES_ARCH}-mes-gcc/syscall.c \ -linux/${MES_ARCH}-mes-gcc/_write.c \ -math/ceil.c math/fabs.c math/floor.c \ -mes/abtod.c mes/abtol.c mes/__assert_fail.c mes/assert_msg.c \ -mes/__buffered_read.c mes/__init_io.c mes/cast.c mes/dtoab.c \ -mes/eputc.c mes/eputs.c mes/fdgetc.c mes/fdgets.c mes/fdputc.c mes/fdputs.c \ -mes/fdungetc.c mes/globals.c mes/itoa.c mes/ltoab.c mes/ltoa.c \ -mes/__mes_debug.c mes/mes_open.c mes/ntoab.c mes/oputc.c mes/oputs.c \ -mes/search-path.c mes/ultoa.c mes/utoa.c \ -posix/alarm.c posix/buffered-read.c posix/execl.c posix/execlp.c \ -posix/execv.c posix/execvp.c posix/getcwd.c posix/getenv.c posix/isatty.c \ -posix/mktemp.c posix/open.c posix/pathconf.c posix/raise.c posix/sbrk.c \ -posix/setenv.c posix/sleep.c posix/unsetenv.c posix/wait.c posix/write.c \ -stdio/clearerr.c stdio/fclose.c stdio/fdopen.c stdio/feof.c stdio/ferror.c \ -stdio/fflush.c stdio/fgetc.c stdio/fgets.c stdio/fileno.c stdio/fopen.c \ -stdio/fprintf.c stdio/fputc.c stdio/fputs.c stdio/fread.c stdio/freopen.c \ -stdio/fscanf.c stdio/fseek.c stdio/ftell.c stdio/fwrite.c stdio/getc.c \ -stdio/getchar.c stdio/perror.c stdio/printf.c stdio/putc.c stdio/putchar.c \ -stdio/remove.c stdio/snprintf.c stdio/sprintf.c stdio/sscanf.c stdio/ungetc.c \ -stdio/vfprintf.c stdio/vfscanf.c stdio/vprintf.c stdio/vsnprintf.c \ -stdio/vsprintf.c stdio/vsscanf.c \ -stdlib/abort.c stdlib/abs.c stdlib/alloca.c stdlib/atexit.c stdlib/atof.c \ -stdlib/atoi.c stdlib/atol.c stdlib/calloc.c stdlib/__exit.c stdlib/exit.c \ -stdlib/free.c stdlib/mbstowcs.c stdlib/puts.c stdlib/qsort.c stdlib/realloc.c \ -stdlib/strtod.c stdlib/strtof.c stdlib/strtol.c stdlib/strtold.c \ -stdlib/strtoll.c stdlib/strtoul.c stdlib/strtoull.c \ -string/bcmp.c string/bcopy.c string/bzero.c string/index.c string/memchr.c \ -string/memcmp.c string/memcpy.c string/memmem.c string/memmove.c string/memset.c \ -string/rindex.c string/strcat.c string/strchr.c string/strcmp.c string/strcpy.c \ -string/strcspn.c string/strdup.c string/strerror.c string/strlen.c \ -string/strlwr.c string/strncat.c string/strncmp.c string/strncpy.c \ -string/strpbrk.c string/strrchr.c string/strspn.c string/strstr.c string/strupr.c \ -stub/atan2.c stub/bsearch.c stub/chown.c stub/__cleanup.c stub/cos.c \ -stub/ctime.c stub/exp.c stub/fpurge.c stub/freadahead.c stub/frexp.c \ -stub/getgrgid.c stub/getgrnam.c stub/getlogin.c stub/getpgid.c stub/getpgrp.c \ -stub/getpwnam.c stub/getpwuid.c stub/gmtime.c stub/ldexp.c stub/localtime.c \ -stub/log.c stub/mktime.c stub/modf.c stub/mprotect.c stub/pclose.c \ -stub/popen.c stub/pow.c stub/putenv.c stub/rand.c stub/realpath.c stub/rewind.c \ -stub/setbuf.c stub/setgrent.c stub/setlocale.c stub/setvbuf.c stub/sigaction.c \ -stub/sigaddset.c stub/sigblock.c stub/sigdelset.c stub/sigemptyset.c \ -stub/sigsetmask.c stub/sin.c stub/sys_siglist.c stub/system.c stub/sqrt.c \ -stub/strftime.c stub/times.c stub/ttyname.c stub/umask.c stub/utime.c \ -${MES_ARCH}-mes-gcc/setjmp.c" - -# --- Pass 1: tcc-boot0-mes -> tcc-boot1 ----------------------------- -rebuild_libc "$TCC" "tcc-boot0-mes" -build_tcc "$TCC" "tcc-boot1" - -# --- Pass 2: tcc-boot1 -> tcc-boot2 (final 0.9.26) ------------------ -TCC=$WORK/tcc-boot1 -rebuild_libc "$TCC" "tcc-boot1" -build_tcc "$TCC" "tcc-boot2" - -echo -echo "=== stage 3: tcc-boot2 is the final 0.9.26 build ===" -ls -la "$WORK/tcc-boot1" "$WORK/tcc-boot2" -CONTAINER_SCRIPT - -echo -echo "=== stage 3 artifacts ===" -ls -la "$WORK/tcc-boot1" "$WORK/tcc-boot2" 2>/dev/null || \ - echo "(stage 3 did not complete — see container output above; likely Issue §3)" diff --git a/seed-kernel/Makefile b/seed-kernel/Makefile @@ -70,10 +70,10 @@ $(OUT)/mmu.o: $(ARCHDIR)/mmu.c $(ARCHDIR)/arch.h | $(OUT) $(CC) $(KCFLAGS) -c -o $@ $< # Shared mem helpers (memcpy/memset/memmove/memcmp). Lives in -# tcc-cc/mem.c so the tcc-built and gcc-built kernels link the same +# tcc/cc/mem.c so the tcc-built and gcc-built kernels link the same # implementation; tcc lowers some struct copies to memmove() that gcc # inlines, so the kernel needs all three regardless of the compiler. -$(OUT)/mem.o: ../tcc-cc/mem.c | $(OUT) +$(OUT)/mem.o: ../tcc/cc/mem.c | $(OUT) $(CC) $(KCFLAGS) -c -o $@ $< $(KIMAGE): $(KOBJS) $(ARCHDIR)/kernel.lds diff --git a/seed-kernel/kernel.c b/seed-kernel/kernel.c @@ -51,10 +51,10 @@ __attribute__((noreturn)) static void hang(void) { for (;;) arch_pause(); } /* ─── Tiny libc-ish helpers ─────────────────────────────────────────────── */ -/* memcpy / memset / memmove come from tcc-cc/mem.c, linked alongside. +/* memcpy / memset / memmove come from tcc/cc/mem.c, linked alongside. * Both gcc and tcc emit calls to these for struct copies and bulk * zero-init past their inline thresholds; centralising them in - * tcc-cc/mem.c keeps the tcc-built and gcc-built kernels in sync. */ + * tcc/cc/mem.c keeps the tcc-built and gcc-built kernels in sync. */ void *memcpy(void *dst, const void *src, u64 n); void *memset(void *dst, int c, u64 n); void *memmove(void *dst, const void *src, u64 n); diff --git a/tcc-cc/riscv64/start.S b/tcc-cc/riscv64/start.S @@ -1,11 +0,0 @@ -/* Linux riscv64 entry stub for the tcc-cc suite — same shape as - * tcc-cc/aarch64/start.S: pull argc/argv off the kernel-supplied - * stack frame, call main, exit with main's return value. */ - - .globl _start -_start: - ld a0, 0(sp) /* argc */ - addi a1, sp, 8 /* argv */ - call main - li a7, 93 /* NR_exit */ - ecall diff --git a/tcc-libc/riscv64/start.S b/tcc-libc/riscv64/start.S @@ -1,42 +0,0 @@ -/* tcc-libc entry stub — riscv64 sibling of tcc-libc/aarch64/start.S. - * Linux brings argc at [sp] and argv at sp+8 on entry. Call - * __libc_init(argc, argv) so `environ` is set, then main(argc, argv), - * then exit with main's return value. - * - * Built by two assemblers from the same source: GAS (host alpine-gcc - * for the Makefile harness path) and tcc 0.9.26's riscv64-asm.c (in - * scripts/boot3.sh, scratch container). They agree on most mnemonics - * but diverge on load/store memory syntax: GAS uses `ld rd, off(rs)`, - * tcc-asm uses the 3-operand `ld rd, rs, off`. The LD/SD macros below - * branch on __TINYC__ to keep one source of truth. - * - * `jal ra, sym` and `jalr zero, ra, 0` are the canonical 2/3-operand - * forms both assemblers accept (GAS's `call` / `ret` / `j` pseudos are - * not in tcc-asm), so callsites use those directly. */ - -/* tcc-asm's S-type encoding takes the base register first, opposite of - * GAS: `sd base, src, off` vs GAS's `sd src, off(base)`. Hide the - * difference behind ST8(src, base, off). */ -#ifdef __TINYC__ -# define LD8(rd, base, off) ld rd, base, off -# define ST8(src, base, off) sd base, src, off -#else -# define LD8(rd, base, off) ld rd, off(base) -# define ST8(src, base, off) sd src, off(base) -#endif - - .globl _start -_start: - LD8(a0, sp, 0) /* argc */ - addi a1, sp, 8 /* argv */ - addi sp, sp, -16 /* save argc/argv across __libc_init */ - ST8(a0, sp, 0) - ST8(a1, sp, 8) - jal ra, __libc_init - LD8(a0, sp, 0) - LD8(a1, sp, 8) - addi sp, sp, 16 - jal ra, main - /* main's return is in a0 — feed it to exit(2). */ - li a7, 93 /* NR_exit */ - ecall diff --git a/tcc-cc/aarch64/start.S b/tcc/cc/aarch64/start.S diff --git a/tcc-cc/amd64/start.S b/tcc/cc/amd64/start.S diff --git a/tcc-cc/mem.c b/tcc/cc/mem.c diff --git a/tcc/cc/riscv64/start.S b/tcc/cc/riscv64/start.S @@ -0,0 +1,11 @@ +/* Linux riscv64 entry stub for the tcc-cc suite — same shape as + * tcc/cc/aarch64/start.S: pull argc/argv off the kernel-supplied + * stack frame, call main, exit with main's return value. */ + + .globl _start +_start: + ld a0, 0(sp) /* argc */ + addi a1, sp, 8 /* argv */ + call main + li a7, 93 /* NR_exit */ + ecall diff --git a/tcc-gcc/aarch64/start.S b/tcc/gcc/aarch64/start.S diff --git a/tcc-gcc/aarch64/sys_stubs.c b/tcc/gcc/aarch64/sys_stubs.c diff --git a/tcc-gcc/amd64/start.S b/tcc/gcc/amd64/start.S diff --git a/tcc-gcc/amd64/sys_stubs.c b/tcc/gcc/amd64/sys_stubs.c diff --git a/tcc-libc/aarch64/start.S b/tcc/libc/aarch64/start.S diff --git a/tcc-libc/aarch64/sys_stubs.S b/tcc/libc/aarch64/sys_stubs.S diff --git a/tcc-libc/amd64/start.S b/tcc/libc/amd64/start.S diff --git a/tcc-libc/amd64/sys_stubs.S b/tcc/libc/amd64/sys_stubs.S diff --git a/tcc/libc/riscv64/start.S b/tcc/libc/riscv64/start.S @@ -0,0 +1,42 @@ +/* tcc-libc entry stub — riscv64 sibling of tcc/libc/aarch64/start.S. + * Linux brings argc at [sp] and argv at sp+8 on entry. Call + * __libc_init(argc, argv) so `environ` is set, then main(argc, argv), + * then exit with main's return value. + * + * Built by two assemblers from the same source: GAS (host alpine-gcc + * for the Makefile harness path) and tcc 0.9.26's riscv64-asm.c (in + * boot/boot3.sh, scratch container). They agree on most mnemonics + * but diverge on load/store memory syntax: GAS uses `ld rd, off(rs)`, + * tcc-asm uses the 3-operand `ld rd, rs, off`. The LD/SD macros below + * branch on __TINYC__ to keep one source of truth. + * + * `jal ra, sym` and `jalr zero, ra, 0` are the canonical 2/3-operand + * forms both assemblers accept (GAS's `call` / `ret` / `j` pseudos are + * not in tcc-asm), so callsites use those directly. */ + +/* tcc-asm's S-type encoding takes the base register first, opposite of + * GAS: `sd base, src, off` vs GAS's `sd src, off(base)`. Hide the + * difference behind ST8(src, base, off). */ +#ifdef __TINYC__ +# define LD8(rd, base, off) ld rd, base, off +# define ST8(src, base, off) sd base, src, off +#else +# define LD8(rd, base, off) ld rd, off(base) +# define ST8(src, base, off) sd src, off(base) +#endif + + .globl _start +_start: + LD8(a0, sp, 0) /* argc */ + addi a1, sp, 8 /* argv */ + addi sp, sp, -16 /* save argc/argv across __libc_init */ + ST8(a0, sp, 0) + ST8(a1, sp, 8) + jal ra, __libc_init + LD8(a0, sp, 0) + LD8(a1, sp, 8) + addi sp, sp, 16 + jal ra, main + /* main's return is in a0 — feed it to exit(2). */ + li a7, 93 /* NR_exit */ + ecall diff --git a/tcc-libc/riscv64/sys_stubs.S b/tcc/libc/riscv64/sys_stubs.S diff --git a/tcc/scripts/build-tcc-gcc.sh b/tcc/scripts/build-tcc-gcc.sh @@ -0,0 +1,42 @@ +#!/bin/sh +## build-tcc-gcc.sh — link tcc.flat.c + libc.flat.c with stock gcc. +## +## Sanity-check sibling of the cc.scm path. Inputs are the *same* +## flatten outputs the cc.scm pipeline consumes; harness sources +## (tcc/gcc/<arch>/{start.S,sys_stubs.c}) provide a minimal _start and +## syscall stubs so we don't need musl's crt0 or its libc. If +## tcc-gcc -version works and our cc.scm-built tcc-boot2 doesn't, the +## bug is downstream of the C source. +## +## Runs inside the boot2-alpine-gcc:<arch> image. -nodefaultlibs (not +## -nostdlib) keeps libgcc available for long-double soft-float +## helpers (__addtf3 etc., needed on aarch64 musl). +## +## Env: ARCH=aarch64 (only arch wired today; start.S is per-arch) +## Usage: build-tcc-gcc.sh <out> <tcc.flat.c> <libc.flat.c> + +set -eu + +: "${ARCH:?ARCH must be set}" +[ "$#" -eq 3 ] || { echo "usage: ARCH=<arch> $0 <out> <tcc.flat.c> <libc.flat.c>" >&2; exit 2; } + +OUT=$1 +TCC_FLAT=$2 +LIBC_FLAT=$3 + +HARNESS=tcc/gcc/$ARCH +[ -d "$HARNESS" ] || { echo "no harness for ARCH=$ARCH at $HARNESS" >&2; exit 1; } + +mkdir -p "$(dirname "$OUT")" + +gcc -static -nodefaultlibs -nostartfiles -fno-stack-protector \ + -fno-builtin \ + -Wno-implicit-function-declaration \ + -Wno-builtin-declaration-mismatch \ + -Wno-incompatible-pointer-types \ + -Wno-int-conversion \ + -e _start \ + "$HARNESS/start.S" "$HARNESS/sys_stubs.c" \ + tcc/cc/mem.c \ + "$TCC_FLAT" "$LIBC_FLAT" \ + -lgcc -o "$OUT" diff --git a/tcc/scripts/run-gcc-libc-flat-tcc.sh b/tcc/scripts/run-gcc-libc-flat-tcc.sh @@ -0,0 +1,154 @@ +#!/bin/sh +## run-gcc-libc-flat-tcc.sh — tcc-gcc baseline runner. +## +## Builds the tcc.flat.c-built tcc-gcc against mes-libc's mem* sources +## into a runtime archive, then walks tests/cc/<name>.c through tcc-gcc +## linking against that archive. The control reference for the +## tcc/cc/tcc-libc suites — if a fixture passes here but fails through +## cc.scm + tcc-boot2 / tcc-tcc, the bug lives in our pipeline rather +## than in tcc-0.9.26 itself. +## +## Env: ARCH=aarch64 (default) | amd64 +## TCC=<path> overrides the per-arch tcc-gcc binary +## Usage: tcc/scripts/run-gcc-libc-flat-tcc.sh [<test-name>...] + +set -eu + +ROOT=$(cd "$(dirname "$0")/.." && pwd) +cd "$ROOT" + +ARCH=${ARCH:-aarch64} + +case "$ARCH" in + aarch64) TCC_TARGET=ARM64; RUNTIME_TARGET_DEFINES="-D TCC_TARGET_ARM64=1 -D TCC_TARGET_ARM=1"; LIB_HELPER_SRC=lib/lib-arm64.c; LIB_HELPER_NAME=lib-arm64.o ;; + amd64) TCC_TARGET=X86_64; RUNTIME_TARGET_DEFINES="-D TCC_TARGET_X86_64=1"; LIB_HELPER_SRC=; LIB_HELPER_NAME= ;; + *) echo "$0: unsupported ARCH '$ARCH' (aarch64|amd64)" >&2; exit 2 ;; +esac + +TCC=${TCC:-build/$ARCH/tcc/gcc/tcc-gcc} +START=build/$ARCH/tcc/cc/start.o +OUT_ROOT=build/$ARCH/tests/gcc-libc-flat-tcc +WORK_ROOT=build/$ARCH/.work/tests/gcc-libc-flat-tcc +TCC_SRC=build/$ARCH/vendor/tcc/tcc-0.9.26-1147-gee75a10c +MES_INC=vendor/mes-libc/include +case "$ARCH" in + aarch64) MES_LINUX_INC=vendor/mes-libc/include/linux/riscv64 ;; + amd64) MES_LINUX_INC=vendor/mes-libc/include/linux/x86_64 ;; +esac +RUNTIME=$WORK_ROOT/runtime.a + +[ -x "$TCC" ] || { + echo "missing $TCC; build it with tcc/scripts/build-tcc-gcc.sh and $TCC_SRC/../tcc.flat.c" >&2 + exit 2 +} +[ -r "$START" ] || { echo "missing $START" >&2; exit 2; } + +mkdir -p "$OUT_ROOT" "$WORK_ROOT" + +"$TCC" -v + +build_runtime() { + rm -rf "$WORK_ROOT/runtime-objs" + mkdir -p "$WORK_ROOT/runtime-objs" + + # shellcheck disable=SC2086 # RUNTIME_TARGET_DEFINES is intentionally word-split. + "$TCC" -c -D HAVE_CONFIG_H=1 -D HAVE_LONG_LONG=1 -D HAVE_FLOAT=1 \ + $RUNTIME_TARGET_DEFINES \ + -I "$TCC_SRC" -I "$TCC_SRC/include" \ + -o "$WORK_ROOT/runtime-objs/libtcc1.o" "$TCC_SRC/lib/libtcc1.c" + if [ -n "$LIB_HELPER_SRC" ]; then + # shellcheck disable=SC2086 # RUNTIME_TARGET_DEFINES is intentionally word-split. + "$TCC" -c -D HAVE_CONFIG_H=1 -D HAVE_LONG_LONG=1 -D HAVE_FLOAT=1 \ + $RUNTIME_TARGET_DEFINES \ + -I "$TCC_SRC" -I "$TCC_SRC/include" \ + -o "$WORK_ROOT/runtime-objs/$LIB_HELPER_NAME" "$TCC_SRC/$LIB_HELPER_SRC" + fi + + for src in string/memcpy.c string/memmove.c string/memset.c string/memcmp.c; do + obj=$WORK_ROOT/runtime-objs/$(basename "$src" .c).o + "$TCC" -c -D HAVE_CONFIG_H=1 -I "$MES_INC" -I "$MES_LINUX_INC" \ + -o "$obj" "vendor/mes-libc/$src" + done + + "$TCC" -ar cr "$RUNTIME" "$WORK_ROOT"/runtime-objs/*.o +} + +build_runtime + +if [ "$#" -gt 0 ]; then + NAMES="$*" +else + NAMES=$( + ls tests/cc 2>/dev/null \ + | sed -n 's/^\([^_][^.]*\)\.c$/\1/p' \ + | sort -u + ) +fi + +pass=0 +fail=0 + +check_one() { + name=$1 + src=tests/cc/$name.c + exe=$OUT_ROOT/$name + work=$WORK_ROOT/$name + tcc_log=$work/tcc.log + out=$work/stdout + mkdir -p "$work" + + if [ -e tests/cc/$name.expected ]; then + expout=$(cat tests/cc/$name.expected) + else + expout= + fi + if [ -e tests/cc/$name.expected-exit ]; then + expexit=$(cat tests/cc/$name.expected-exit) + else + expexit=0 + fi + + if ! "$TCC" -nostdlib -I "$TCC_SRC/include" \ + "$START" "$src" "$RUNTIME" -o "$exe" >"$tcc_log" 2>&1; then + echo " FAIL gcc-libc-flat-tcc/$name" + echo " tcc compile/link failed:" + sed 's/^/ /' "$tcc_log" + fail=$((fail + 1)) + return + fi + + if "$exe" >"$out" 2>&1; then + actexit=0 + else + actexit=$? + fi + actout=$(cat "$out") + + if [ "$actexit" != "$expexit" ]; then + echo " FAIL gcc-libc-flat-tcc/$name" + echo " exit: expected $expexit, got $actexit" + fail=$((fail + 1)) + return + fi + + if [ "$actout" != "$expout" ]; then + echo " FAIL gcc-libc-flat-tcc/$name" + echo " --- expected ---" + printf '%s\n' "$expout" | sed 's/^/ /' + echo " --- actual ---" + printf '%s\n' "$actout" | sed 's/^/ /' + fail=$((fail + 1)) + return + fi + + echo " PASS gcc-libc-flat-tcc/$name" + pass=$((pass + 1)) +} + +for name in $NAMES; do + [ -e tests/cc/$name.c ] || continue + check_one "$name" +done + +echo "$pass passed, $fail failed" +[ "$fail" -eq 0 ] diff --git a/tests/Makefile b/tests/Makefile @@ -50,19 +50,19 @@ IMAGE_STAMPS := $(foreach a,$(ALL_ARCHES),build/$(a)/.image) .PHONY: image image: $(IMAGE_STAMP) -$(IMAGE_STAMPS): build/%/.image: scripts/Containerfile.busybox +$(IMAGE_STAMPS): build/%/.image: tests/containers/Containerfile.busybox mkdir -p $(@D) podman build --platform $(PLATFORM_$*) -t boot2-busybox:$* \ - -f scripts/Containerfile.busybox scripts/ + -f tests/containers/Containerfile.busybox tests/containers/ @touch $@ ALPINE_GCC_IMAGES := $(foreach a,$(ALL_ARCHES),build/$(a)/.image-alpine-gcc) -$(ALPINE_GCC_IMAGES): build/%/.image-alpine-gcc: scripts/Containerfile.alpine-gcc +$(ALPINE_GCC_IMAGES): build/%/.image-alpine-gcc: tests/containers/Containerfile.alpine-gcc mkdir -p $(@D) podman build --platform $(PLATFORM_$*) \ -t boot2-alpine-gcc:$* \ - -f scripts/Containerfile.alpine-gcc scripts/ + -f tests/containers/Containerfile.alpine-gcc tests/containers/ @touch $@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \ @@ -88,28 +88,28 @@ build/%/P1/P1.M1: $(wildcard P1/gen/*.py) .SECONDARY: $(foreach a,$(ALL_ARCHES),build/$(a)/P1/P1.M1) -P1/P1-%.M1: build/%/P1/P1.M1 scripts/prune-p1-table.sh $(P1_PRUNE_SRCS) - sh scripts/prune-p1-table.sh $< $@ $(P1_PRUNE_SRCS) +P1/P1-%.M1: build/%/P1/P1.M1 bootprep/prune-p1-table.sh $(P1_PRUNE_SRCS) + sh bootprep/prune-p1-table.sh $< $@ $(P1_PRUNE_SRCS) # ── tcc-gcc: same flatten, stock gcc (sanity check) ────────────────────── TCC_PKG_INCLUDE = build/$(ARCH)/src/src/tcc/$(TCC_PKG)/include TCC_PKG_LIBDIR = build/$(ARCH)/src/src/tcc/$(TCC_PKG)/lib -TCC_GCC_BIN := build/$(ARCH)/tcc-gcc/tcc-gcc +TCC_GCC_BIN := build/$(ARCH)/tcc/gcc/tcc-gcc TCC_GCC_IMAGE := build/$(ARCH)/.image-alpine-gcc -TCC_GCC_HARNESS := tcc-gcc/$(ARCH)/start.S tcc-gcc/$(ARCH)/sys_stubs.c +TCC_GCC_HARNESS := tcc/gcc/$(ARCH)/start.S tcc/gcc/$(ARCH)/sys_stubs.c TCC_GCC_TCC_FLAT := build/$(ARCH)/src/src/tcc/tcc.flat.c TCC_GCC_LIBC_FLAT := build/$(ARCH)/src/src/libc/libc.flat.c $(TCC_GCC_BIN): $(TCC_GCC_TCC_FLAT) $(TCC_GCC_LIBC_FLAT) \ - $(TCC_GCC_HARNESS) scripts/build-tcc-gcc.sh $(TCC_GCC_IMAGE) \ + $(TCC_GCC_HARNESS) tcc/scripts/build-tcc-gcc.sh $(TCC_GCC_IMAGE) \ build/$(ARCH)/src/.stamp mkdir -p $(@D) podman run --rm --pull=never --platform $(PLATFORM_$(ARCH)) \ -e ARCH=$(ARCH) \ -v $(CURDIR):/work -w /work boot2-alpine-gcc:$(ARCH) \ - sh scripts/build-tcc-gcc.sh $@ $(TCC_GCC_TCC_FLAT) $(TCC_GCC_LIBC_FLAT) + sh tcc/scripts/build-tcc-gcc.sh $@ $(TCC_GCC_TCC_FLAT) $(TCC_GCC_LIBC_FLAT) # ── tcc-cc / tcc-libc test stubs (per-arch start.S / sys_stubs.S) ──────── # @@ -142,24 +142,24 @@ TCC_ASM_DEPS := TCC_ASM = $(HOST_CC) -target $(HOST_CC_TARGET) -c -o $(1) -x assembler $(2) endif -TCC_CC_START := build/$(ARCH)/tcc-cc/start.o +TCC_CC_START := build/$(ARCH)/tcc/cc/start.o ifeq ($(ARCH),amd64) # x86_64 va_arg intrinsics (__va_start / __va_arg). On other arches tcc # lowers va_arg without out-of-line helpers. -TCC_CC_VA_LIST := build/$(ARCH)/tcc-cc/va_list.o +TCC_CC_VA_LIST := build/$(ARCH)/tcc/cc/va_list.o else TCC_CC_VA_LIST := endif -TCC_LIBC_START := build/$(ARCH)/tcc-libc/start.o -TCC_LIBC_SYS_STUBS := build/$(ARCH)/tcc-libc/sys_stubs.o +TCC_LIBC_START := build/$(ARCH)/tcc/libc/start.o +TCC_LIBC_SYS_STUBS := build/$(ARCH)/tcc/libc/sys_stubs.o -$(TCC_CC_START): tcc-cc/$(ARCH)/start.S $(TCC_ASM_DEPS) +$(TCC_CC_START): tcc/cc/$(ARCH)/start.S $(TCC_ASM_DEPS) mkdir -p $(@D) $(call TCC_ASM,$@,$<) -build/amd64/tcc-cc/va_list.o: \ +build/amd64/tcc/cc/va_list.o: \ build/amd64/src/src/tcc/$(TCC_PKG)/lib/va_list.c \ $(call boot3,amd64)/tcc0 build/amd64/.image \ build/amd64/src/.stamp @@ -170,11 +170,11 @@ build/amd64/tcc-cc/va_list.o: \ -D TCC_TARGET_X86_64=1 \ -c -o $@ build/amd64/src/src/tcc/$(TCC_PKG)/lib/va_list.c -$(TCC_LIBC_START): tcc-libc/$(ARCH)/start.S $(TCC_ASM_DEPS) +$(TCC_LIBC_START): tcc/libc/$(ARCH)/start.S $(TCC_ASM_DEPS) mkdir -p $(@D) $(call TCC_ASM,$@,$<) -$(TCC_LIBC_SYS_STUBS): tcc-libc/$(ARCH)/sys_stubs.S $(TCC_ASM_DEPS) +$(TCC_LIBC_SYS_STUBS): tcc/libc/$(ARCH)/sys_stubs.S $(TCC_ASM_DEPS) mkdir -p $(@D) $(call TCC_ASM,$@,$<) diff --git a/tests/README.md b/tests/README.md @@ -105,6 +105,6 @@ tests/seed-accept.sh boot5 # boot5 byte-eq vs podman ``` ARCH is fixed to `aarch64` since that's the only seed-driver-complete -arch today. Prereq for every mode: `./scripts/boot.sh aarch64` has run +arch today. Prereq for every mode: `./boot/boot.sh aarch64` has run under the default `DRIVER=podman` so `build/aarch64/podman/boot{0..6}/` is populated. diff --git a/scripts/boot-run-scheme1.sh b/tests/boot-run-scheme1.sh diff --git a/tests/build-p1.sh b/tests/build-p1.sh @@ -18,7 +18,7 @@ ## build/$ARCH/.work/<src-without-ext>/, mirroring the source path under ## the repo root (e.g. tests/P1/00-hello.P1 -> build/aarch64/.work/ ## tests/P1/00-hello/). A one-line sidecar at <out>.workdir records -## that path so tooling (scripts/disasm-elf.sh) can find the artifacts +## that path so tooling (tools/disasm-elf.sh) can find the artifacts ## from the binary alone. ## ## Env: ARCH=aarch64|amd64|riscv64 diff --git a/tests/build-p1pp.sh b/tests/build-p1pp.sh @@ -41,7 +41,7 @@ ## or a generated build/.../*.P1pp), the caller MUST set WORK_SUBPATH ## explicitly so the work dir mirrors the logical primary source path ## (e.g. tests/cc-libc/000-exit). A one-line sidecar at <out>.workdir -## records the resolved work dir so tooling (scripts/disasm-elf.sh) can +## records the resolved work dir so tooling (tools/disasm-elf.sh) can ## locate the artifacts from the binary alone. ## ## Env: ARCH=aarch64|amd64|riscv64 diff --git a/tests/cc/338-literal-addr-deref.c b/tests/cc/338-literal-addr-deref.c @@ -4,7 +4,7 @@ * out on bare VT_CONST | VT_LVAL — i.e. `*(volatile T*)0x1234`. The * matching x86_64 path goes through gen_modrm and the riscv64 path has * an explicit fr==VT_CONST branch, so neither tripped before. The - * scripts/simple-patches/tcc-0.9.26/arm64-{store,load}-const-lvalue + * vendor/tcc/patches/arm64-{store,load}-const-lvalue * pair adds the missing case. * * The volatile global keeps tcc from constant-propagating the diff --git a/tests/containers/Containerfile.alpine-gcc b/tests/containers/Containerfile.alpine-gcc @@ -0,0 +1,18 @@ +## Alpine with gcc + musl-dev + binutils baked in, used by the +## `make tcc-gcc` sanity-check target (Makefile). +## +## tcc-gcc compiles the same tcc.flat.c + libc.flat.c our cc.scm path +## consumes, but with stock gcc + libgcc soft-float helpers + a tiny +## hand-rolled _start / sys_* shim (tcc/gcc/<arch>/). It's a known-good +## reference: if our cc.scm-built tcc-boot2 misbehaves and tcc-gcc +## doesn't, the bug is in our codegen, not the source. +## +## Built per --platform; tag as boot2-alpine-gcc:<arch>. We don't pin +## a digest here because alpine:3.20 is only used by this opt-in +## harness — re-pin if the harness becomes load-bearing. + +FROM docker.io/library/alpine:3.20 + +RUN apk add --no-cache gcc musl-dev binutils + +CMD ["/bin/sh"] diff --git a/scripts/Containerfile.busybox b/tests/containers/Containerfile.busybox diff --git a/tests/run-suite.sh b/tests/run-suite.sh @@ -10,7 +10,7 @@ ## the host greps them to update its totals, then prints the final ## summary itself. Fixture-name discovery happens here when no names ## are passed (so the host can stay agnostic about each suite's layout), -## except for m1pp: scripts/lint.sh runs python on the host, so the +## except for m1pp: tools/lint.sh runs python on the host, so the ## host preflights lint and passes the explicit kept list down. ## ## Env: ARCH=aarch64|amd64|riscv64 @@ -155,7 +155,7 @@ run_scheme1_suite() { fi tmp_stdout=$(mktemp) - if sh scripts/boot-run-scheme1.sh "$fixture" >"$tmp_stdout" 2>&1; then + if sh tests/boot-run-scheme1.sh "$fixture" >"$tmp_stdout" 2>&1; then actual_exit=0 else actual_exit=$? @@ -579,13 +579,13 @@ run_tcc_cc_suite() { return ;; esac - start=build/$ARCH/tcc-cc/start.o + start=build/$ARCH/tcc/cc/start.o libtcc1=build/$ARCH/podman/boot4/libtcc1.a tcc_include=build/$ARCH/src/src/tcc/tcc-0.9.26-1147-gee75a10c/include # x86_64 only: __va_start / __va_arg intrinsics for variadic # functions. Other arches lower va_arg without out-of-line helpers. if [ "$ARCH" = "amd64" ]; then - va_list=build/$ARCH/tcc-cc/va_list.o + va_list=build/$ARCH/tcc/cc/va_list.o else va_list= fi @@ -630,8 +630,8 @@ run_tcc_cc_suite() { expexit=0 fi - elf=build/$ARCH/tests/tcc-cc/$stage_tag/$name - workdir=build/$ARCH/.work/tests/tcc-cc/$stage_tag/$name + elf=build/$ARCH/tests/tcc/cc/$stage_tag/$name + workdir=build/$ARCH/.work/tests/tcc/cc/$stage_tag/$name label="[$ARCH] tcc-cc[$stage_tag]/$name" mkdir -p "$(dirname "$elf")" "$workdir" @@ -688,15 +688,15 @@ run_tcc_libc_suite() { return ;; esac - start=build/$ARCH/tcc-libc/start.o - sys_stubs=build/$ARCH/tcc-libc/sys_stubs.o + start=build/$ARCH/tcc/libc/start.o + sys_stubs=build/$ARCH/tcc/libc/sys_stubs.o libtcc1=build/$ARCH/podman/boot4/libtcc1.a libc=build/$ARCH/podman/boot5/libc.a tcc_include=build/$ARCH/src/src/tcc/tcc-0.9.26-1147-gee75a10c/include # x86_64 only: __va_start / __va_arg intrinsics for variadic # functions. mes-libc's printf family hits this directly. if [ "$ARCH" = "amd64" ]; then - va_list=build/$ARCH/tcc-cc/va_list.o + va_list=build/$ARCH/tcc/cc/va_list.o else va_list= fi @@ -733,8 +733,8 @@ run_tcc_libc_suite() { expexit=0 fi - elf=build/$ARCH/tests/tcc-libc/$stage_tag/$name - workdir=build/$ARCH/.work/tests/tcc-libc/$stage_tag/$name + elf=build/$ARCH/tests/tcc/libc/$stage_tag/$name + workdir=build/$ARCH/.work/tests/tcc/libc/$stage_tag/$name label="[$ARCH] tcc-libc[$stage_tag]/$name" mkdir -p "$(dirname "$elf")" "$workdir" diff --git a/tests/run.sh b/tests/run.sh @@ -9,7 +9,7 @@ ## a whole arch's suite is one podman invocation. ## ## The one bit of work that stays on the host is the lint preflight -## for the m1pp and p1 suites: scripts/lint.sh runs python, which the +## for the m1pp and p1 suites: tools/lint.sh runs python, which the ## busybox container doesn't carry. Names that fail lint are reported ## here (FAIL + diagnostic) and excluded from the in-container batch. ## @@ -128,9 +128,9 @@ lint_preflight() { for name in $all; do raw_src=$dir/$name.$raw_ext if [ -e "$raw_src" ] \ - && ! ARCH=$arch sh scripts/lint.sh "$raw_src" >/dev/null 2>&1; then + && ! ARCH=$arch sh tools/lint.sh "$raw_src" >/dev/null 2>&1; then echo " FAIL [$arch] $name" - ARCH=$arch sh scripts/lint.sh "$raw_src" 2>&1 \ + ARCH=$arch sh tools/lint.sh "$raw_src" 2>&1 \ | sed 's/^/ /' >&2 || true FAIL=$((FAIL + 1)) else diff --git a/tests/seed-accept.sh b/tests/seed-accept.sh @@ -20,7 +20,7 @@ ## ## All three modes target ARCH=aarch64 (the only seed-driver-complete ## arch today). Prereq for every mode: build/aarch64/podman/boot{0..6}/ -## populated via `./scripts/boot.sh aarch64` (default DRIVER=podman), +## populated via `./boot/boot.sh aarch64` (default DRIVER=podman), ## including boot6/Image as the seed kernel. ## ## Usage: @@ -46,19 +46,19 @@ SEED=build/$ARCH/seed KERNEL=$PODMAN/boot6/Image [ -f "$KERNEL" ] || { - echo "missing $KERNEL — run ./scripts/boot.sh $ARCH (default DRIVER=podman) first" >&2 + echo "missing $KERNEL — run ./boot/boot.sh $ARCH (default DRIVER=podman) first" >&2 exit 1 } # ─── Mode: boot34 ───────────────────────────────────────────────────── if [ "$MODE" = "boot34" ]; then [ -x $PODMAN/boot3/tcc0 ] || { - echo "$PODMAN/boot3/tcc0 missing — run scripts/boot3.sh aarch64" >&2 + echo "$PODMAN/boot3/tcc0 missing — run boot/boot3.sh aarch64" >&2 exit 1 } - echo "[seed-accept boot34] DRIVER=seed scripts/boot3.sh $ARCH" - DRIVER=seed scripts/boot3.sh $ARCH + echo "[seed-accept boot34] DRIVER=seed boot/boot3.sh $ARCH" + DRIVER=seed boot/boot3.sh $ARCH if ! cmp -s $SEED/boot3/tcc0 $PODMAN/boot3/tcc0; then s_seed=$(wc -c < $SEED/boot3/tcc0) @@ -73,12 +73,12 @@ if [ "$MODE" = "boot34" ]; then fi [ -x $PODMAN/boot4/tcc3 ] || { - echo "$PODMAN/boot4/tcc3 missing — run scripts/boot4.sh aarch64 under podman first" >&2 + echo "$PODMAN/boot4/tcc3 missing — run boot/boot4.sh aarch64 under podman first" >&2 exit 1 } - echo "[seed-accept boot34] DRIVER=seed scripts/boot4.sh $ARCH" - DRIVER=seed scripts/boot4.sh $ARCH + echo "[seed-accept boot34] DRIVER=seed boot/boot4.sh $ARCH" + DRIVER=seed boot/boot4.sh $ARCH fail=0 # All boot4 outputs — including the intermediate crt1.o / libc.a / @@ -102,18 +102,18 @@ fi # ─── Mode: boot5 ────────────────────────────────────────────────────── if [ "$MODE" = "boot5" ]; then [ -d $PODMAN/boot5 ] || { - echo "$PODMAN/boot5 missing — run scripts/boot5.sh aarch64" >&2 + echo "$PODMAN/boot5 missing — run boot/boot5.sh aarch64" >&2 exit 1 } for f in libc.a crt1.o crti.o crtn.o hello; do [ -e $PODMAN/boot5/$f ] || { - echo "$PODMAN/boot5/$f missing — run scripts/boot5.sh aarch64" >&2 + echo "$PODMAN/boot5/$f missing — run boot/boot5.sh aarch64" >&2 exit 1 } done - echo "[seed-accept boot5] DRIVER=seed scripts/boot5.sh $ARCH" - DRIVER=seed scripts/boot5.sh $ARCH + echo "[seed-accept boot5] DRIVER=seed boot/boot5.sh $ARCH" + DRIVER=seed boot/boot5.sh $ARCH fails=0 for f in libc.a crt1.o crti.o crtn.o hello; do diff --git a/tools/count-lines.sh b/tools/count-lines.sh @@ -0,0 +1,32 @@ +#!/bin/sh +## count-lines.sh — line counts for the core sources. +## +## Skips ZERO/comment/blank lines per the existing cloc convention. +## Pass file paths as arguments, or pass none to read from stdin +## (one path per line). Prints `<count> <path>` per file plus a +## trailing total. +## +## Usage: +## sh tools/count-lines.sh file1 file2 … +## printf '%s\n' file1 file2 | sh tools/count-lines.sh + +set -eu + +if [ "$#" -gt 0 ]; then + FILES="$*" +else + FILES=$(cat) +fi + +total=0 +for f in $FILES; do + [ -e "$f" ] || { echo "count-lines: missing $f" >&2; exit 1; } + n=$(grep -v "^ZERO.*" "$f" \ + | grep -v "^[[:space:]]*#.*" \ + | grep -v "^[[:space:]]*;.*" \ + | grep -v "^$" \ + | wc -l) + printf '%6d %s\n' "$n" "$f" + total=$((total + n)) +done +printf '%6d total\n' "$total" diff --git a/tools/diag-livebootstrap-qemu.sh b/tools/diag-livebootstrap-qemu.sh @@ -0,0 +1,193 @@ +#!/bin/sh +## tools/diag-livebootstrap-qemu.sh — DIAGNOSTIC ONLY +## +## Runs live-bootstrap's stage0 → tcc-0.9.27 chain inside a busybox:musl +## container under linux/amd64 QEMU emulation, to determine whether the +## tcc-boot0-mes startup SEGV (Issue §3 in docs/TCC.md) is QEMU's fault +## or specific to our build. +## +## **This script is diagnostic, not part of the bootstrap chain.** It +## intentionally invokes live-bootstrap's M2-Planet / Mes / MesCC path +## — the very chain our project replaces. Nothing it produces feeds +## into the project's deliverables. Read result, then ignore. +## +## Outcome interpretation: +## - chain reaches tcc-0.9.27 and `tcc -version` works: +## QEMU is sound. Our tcc-boot0-mes SEGV is build-specific +## (codegen/runtime bug). Action: backport tcc 0.9.28rc fixes +## or compare disasm against live-bootstrap's tcc-boot0. +## - chain SEGVs at tcc-mes / tcc-boot0 / tcc-0.9.27: +## QEMU is broken for these binaries on macOS arm64. Action: +## either run on native x86_64 hardware, or use the linux/386 +## (32-bit) path with QEMU and accept the arch mismatch. +## +## Setup (host): +## - LIVE_BOOTSTRAP=<path>: required env var pointing at a working +## live-bootstrap checkout. Distfiles are taken from +## $LIVE_BOOTSTRAP/../lb-work/distfiles (the sibling layout +## live-bootstrap's own scripts produce). The diagnostic also +## needs tcc-0.9.27.tar.bz2; this script fetches it via curl on +## first run if missing. Stage0-posix tools (M2-Planet, +## mescc-tools, etc.) are bundled in +## $LIVE_BOOTSTRAP/seed/stage0-posix and don't need distfiles. +## +## The script has no implicit `../live-bootstrap` lookup — the rest +## of the build is in-tree, and this diagnostic is the sole script +## that needs the upstream tree, so make the dependency explicit. +## +## Pipeline: +## 1. (host) populate distfiles (curl tcc-0.9.27 if needed) +## 2. (host) assemble rootfs at build/diag-livebootstrap/rootfs/ +## - copy seed/stage0-posix/* (the hex0 binaries + M2-Planet +## + mescc-tools sources) to / +## - copy seed/{after,seed,preseeded}.kaem to / +## - copy steps/, lib/ to / +## - copy distfiles to /distfiles +## - write /steps/bootstrap.cfg with ARCH=amd64, CHROOT=True +## - truncate /steps/manifest after `build: tcc-0.9.27` so the +## chain stops at our target instead of running 200+ builds +## 3. (busybox:musl, linux/amd64) chroot into rootfs and run +## /bootstrap-seeds/POSIX/AMD64/kaem-optional-seed +## +## Runtime: many hours under QEMU emulation on macOS arm64. Set aside a +## work block. Set DIAG_PREP_ONLY=1 to do steps 1-2 and skip the run. +## +## Usage: +## tools/diag-livebootstrap-qemu.sh + +set -eu + +ROOT=$(cd "$(dirname "$0")/.." && pwd) +: "${LIVE_BOOTSTRAP:?set LIVE_BOOTSTRAP=<path-to-live-bootstrap-checkout>}" +LB=$(cd "$LIVE_BOOTSTRAP" && pwd) +DISTFILES=${LB_DISTFILES:-$LB/../lb-work/distfiles} +WORK=$ROOT/build/diag-livebootstrap +ROOTFS=$WORK/rootfs + +[ -d "$LB" ] || { echo "missing live-bootstrap at $LB" >&2; exit 1; } +[ -d "$DISTFILES" ] || { echo "missing distfiles at $DISTFILES (override with LB_DISTFILES=<path>)" >&2; exit 1; } +command -v podman >/dev/null 2>&1 || { echo "podman required" >&2; exit 2; } + +# --- (1) ensure distfiles populated ---------------------------------- +NEED="mes-0.27.1.tar.gz tcc-0.9.26.tar.gz tcc-0.9.27.tar.bz2 nyacc-1.00.2-lb1.tar.gz" +for f in $NEED; do + if [ ! -r "$DISTFILES/$f" ]; then + echo "missing $DISTFILES/$f" + case "$f" in + tcc-0.9.27.tar.bz2) + echo "fetching from savannah..." + curl --fail --location \ + "https://download.savannah.gnu.org/releases/tinycc/$f" \ + -o "$DISTFILES/$f" + ;; + *) + echo " (cannot auto-fetch $f — please populate $DISTFILES/$f)" >&2 + exit 1 + ;; + esac + fi +done +echo "distfiles ok: $NEED" + +# --- (2) assemble rootfs -------------------------------------------- +echo "=== assembling rootfs at $ROOTFS ===" +rm -rf "$WORK" +mkdir -p "$ROOTFS" + +# seed/stage0-posix → / (stage0 tools, M2-Planet, mescc-tools, kaem etc) +cp -R "$LB/seed/stage0-posix/." "$ROOTFS/" + +# seed/*.kaem → / (preseeded.kaem, seed.kaem, after.kaem) +cp "$LB/seed/"*.kaem "$ROOTFS/" + +# Other seed files (configurator binaries, checksums) +cp "$LB/seed/configurator.c" "$LB/seed/configurator.amd64.checksums" "$ROOTFS/" 2>/dev/null || true +cp "$LB/seed/script-generator.c" "$LB/seed/script-generator.amd64.checksums" "$ROOTFS/" 2>/dev/null || true + +# steps/, lib/ from live-bootstrap +cp -R "$LB/steps" "$ROOTFS/" +cp -R "$LB/lib" "$ROOTFS/" + +# distfiles → /external/distfiles (live-bootstrap's steps/env sets +# DISTFILES=/external/distfiles, and helpers/build steps read from there) +mkdir -p "$ROOTFS/external/distfiles" +for f in $NEED; do + cp "$DISTFILES/$f" "$ROOTFS/external/distfiles/" +done + +# Truncate manifest to stop after the first `build: tcc-0.9.27`. The +# manifest has a header comment block (lines 1-33), then build steps +# starting at line 34. Line 38 is the first tcc-0.9.27 build. +awk ' + /^build: tcc-0\.9\.27/ && !seen_tcc27 { + print + seen_tcc27 = 1 + next + } + seen_tcc27 { + # drop everything after first tcc-0.9.27 build + next + } + { print } +' "$LB/steps/manifest" > "$ROOTFS/steps/manifest" + +# bootstrap.cfg — mirrors what rootfs.py would write for +# `--arch amd64 --chroot --mirrors file:///distfiles`. We disable +# every optional pipeline (kernels, configurator, fiwix) since this +# is a pass1-only diagnostic. +cat > "$ROOTFS/steps/bootstrap.cfg" <<'EOF' +ARCH=amd64 +ARCH_DIR=AMD64 +FORCE_TIMESTAMPS=False +CHROOT=True +UPDATE_CHECKSUMS=False +JOBS=2 +SWAP_SIZE=0 +FINAL_JOBS=2 +INTERNAL_CI=False +INTERACTIVE=False +QEMU=False +BARE_METAL=False +DISK=sda1 +KERNEL_BOOTSTRAP=False +BUILD_KERNELS=False +CONFIGURATOR=False +MIRRORS_LEN=0 +EOF + +echo "rootfs assembled." +du -sh "$ROOTFS" 2>/dev/null || true + +if [ "${DIAG_PREP_ONLY:-0}" = "1" ]; then + echo "DIAG_PREP_ONLY=1 — skipping container run." + exit 0 +fi + +# --- (3) run kaem-optional-seed in busybox:musl under linux/amd64 ---- +echo +echo "=== launching kaem-optional-seed via busybox:musl (linux/amd64 QEMU) ===" +echo " long-running. ctrl-C aborts. log lines stream below." +echo + +# busybox:musl ships chroot, sh, tar, awk, etc. — sufficient. +# /proc /dev /sys are mounted by podman; chroot inherits them via bind. +# We mount the rootfs as /work/rootfs inside the container, then chroot. +podman run --rm -i --platform linux/amd64 \ + -v "$ROOTFS":/rootfs \ + docker.io/library/busybox:musl sh -s <<'CONTAINER_SCRIPT' +set -eu + +# Ensure /proc /dev /sys exist inside the chroot for kaem etc. +mkdir -p /rootfs/proc /rootfs/dev /rootfs/sys /rootfs/tmp +mount -t proc proc /rootfs/proc 2>/dev/null || true +mount --rbind /dev /rootfs/dev 2>/dev/null || true +mount --rbind /sys /rootfs/sys 2>/dev/null || true + +echo "--- starting chroot kaem ---" +exec env -i PATH=/bin chroot /rootfs /bootstrap-seeds/POSIX/AMD64/kaem-optional-seed +CONTAINER_SCRIPT + +rc=$? +echo +echo "=== kaem exit=$rc ===" +exit "$rc" diff --git a/tools/disasm-elf.sh b/tools/disasm-elf.sh @@ -0,0 +1,131 @@ +#!/bin/sh +## disasm-elf.sh — disassemble a hex2pp-emitted ELF with llvm-objdump. +## +## Our seed ELF.hex2 sets ph_memsz to 512 MB (so the BSS region past +## ELF_end is mappable), but ph_filesz is just the on-disk size. +## llvm-objdump trusts memsz when laying out the segment for +## disassembly and runs off the end of the file with +## "The end of the file was unexpectedly encountered". The seed ELF +## also lacks section headers, so --start-address/--stop-address +## doesn't help on its own. +## +## Workaround: copy the ELF, patch ph_memsz down to ph_filesz, then +## disassemble. Output goes to stdout. +## +## We also auto-default --start-address to e_entry so the ELF header + +## program header bytes at the top of PT_LOAD aren't decoded as bogus +## instructions. Pass an explicit --start-address (e.g. 0x600000) to +## override and see the header bytes. +## +## boot-build-p1*.sh writes a one-line sidecar at <elf>.workdir pointing +## at build/$ARCH/.work/<src-without-ext>/. P1pp builds store +## expanded.hex2pp there; legacy raw-P1 seed builds store prog.hex2. +## When that sidecar is present we extract a label map via +## tools/m1-symbols.py and: +## - default --stop-address to :_text_end if that sentinel label is +## present, so trailing rodata doesn't decode as bogus instructions +## - inject "<label>:" headers and rewrite "<PT_LOAD#0+0xNNN>" xrefs +## in the disasm output +## Pass NO_LABELS=1 to disable both behaviors. +## +## Usage: disasm-elf.sh <elf> [llvm-objdump args...] +## defaults to `-d` (text only). For data + text, pass `-D`. + +set -eu + +[ "$#" -ge 1 ] || { echo "usage: $0 <elf> [llvm-objdump args...]" >&2; exit 2; } + +ELF=$1; shift +[ -e "$ELF" ] || { echo "missing $ELF" >&2; exit 1; } + +OBJDUMP=${LLVM_OBJDUMP:-llvm-objdump} +TRIPLE=${TRIPLE:-aarch64-linux-gnu} + +# ELF fields we read (little-endian 8-byte): +# e_entry at file offset 0x18 +# ph_filesz at file offset 0x60 (e_phoff 0x40 + 0x20) +# ph_memsz at file offset 0x68 (e_phoff 0x40 + 0x28) +# Single-program-header layout, per our seed ELF. +read_le8() { + od -An -tu8 -N8 -j"$2" "$1" | tr -d ' \n' +} +write_le8() { + # $1 file, $2 offset, $3 value + printf '%016x' "$3" \ + | sed 's/\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)/\8\7\6\5\4\3\2\1/' \ + | xxd -r -p \ + | dd of="$1" bs=1 seek="$2" count=8 conv=notrunc status=none +} + +ENTRY=$(read_le8 "$ELF" 24) +FILESZ=$(read_le8 "$ELF" 96) +MEMSZ=$(read_le8 "$ELF" 104) + +TMP=$(mktemp -t disasm-elf.XXXXXX) +trap 'rm -f "$TMP"' EXIT +cp "$ELF" "$TMP" +chmod u+w "$TMP" + +if [ "$MEMSZ" != "$FILESZ" ]; then + write_le8 "$TMP" 104 "$FILESZ" +fi + +# Default to -d if no objdump flags given. +[ "$#" -eq 0 ] && set -- -d + +# Auto-skip the ELF header + program header by defaulting +# --start-address to e_entry, unless the user supplied their own. +have_start=0 +have_stop=0 +for arg in "$@"; do + case "$arg" in + --start-address=*|--start-address) have_start=1;; + --stop-address=*|--stop-address) have_stop=1;; + esac +done +if [ "$have_start" -eq 0 ]; then + set -- "--start-address=0x$(printf '%x' "$ENTRY")" "$@" +fi + +# Locate expanded.hex2pp (new P1pp path) or prog.hex2 (legacy raw-P1 +# path) via the <elf>.workdir sidecar produced by boot-build-p1*.sh. +# The sidecar holds a repo-relative path (build/$ARCH/.work/<src>/), so +# resolve it against the repo root inferred from this script's location. +HERE=$(dirname "$0") +REPO_ROOT=$(cd "$HERE/.." && pwd) +HEX2="" +if [ -e "$ELF.workdir" ]; then + workdir=$(cat "$ELF.workdir") + case "$workdir" in + /*) ;; # absolute, leave alone + *) workdir="$REPO_ROOT/$workdir" ;; + esac + if [ -e "$workdir/expanded.hex2pp" ]; then + HEX2="$workdir/expanded.hex2pp" + elif [ -e "$workdir/prog.hex2" ]; then + HEX2="$workdir/prog.hex2" + else + echo "disasm-elf: $ELF.workdir -> $workdir, but no expanded.hex2pp or prog.hex2 there" >&2 + fi +elif [ "${NO_LABELS:-0}" != "1" ]; then + echo "disasm-elf: no $ELF.workdir sidecar; rebuild for label annotation" >&2 +fi +MAP="" +if [ "${NO_LABELS:-0}" != "1" ] && [ -n "$HEX2" ]; then + MAP=$(mktemp -t disasm-elf-map.XXXXXX) + trap 'rm -f "$TMP" "$MAP"' EXIT + "$HERE/m1-symbols.py" map "$HEX2" > "$MAP" + # Default --stop-address to :_text_end if no user value and the + # sentinel exists in the map. + if [ "$have_stop" -eq 0 ]; then + text_end=$(awk '$2 == "_text_end" {print $1; exit}' "$MAP") + [ -n "$text_end" ] && set -- "--stop-address=$text_end" "$@" + fi +fi + +if [ -n "$MAP" ]; then + "$OBJDUMP" --triple="$TRIPLE" "$@" "$TMP" \ + | "$HERE/m1-symbols.py" annotate "$MAP" +else + exec "$OBJDUMP" --triple="$TRIPLE" "$@" "$TMP" +fi diff --git a/scripts/lint.sh b/tools/lint.sh diff --git a/scripts/m1-symbols.py b/tools/m1-symbols.py diff --git a/vendor/boot2-include/stdarg.h b/vendor/boot2-include/stdarg.h @@ -1,39 +0,0 @@ -/* boot2 stdarg.h — shadows mes/include/stdarg.h for both flatten - * paths (scripts/{stage1,libc}-flatten.sh both have -I on the - * containing dir ahead of mes's include tree). Routes va_* through - * __builtin_va_*, so tcc.flat.c and libc.flat.c compile cleanly - * under both our cc.scm (which recognizes __builtin_va_list and - * __builtin_va_start/arg/end) and stock gcc/clang (where they're - * native). - * - * Mes's stdarg.h has a similar __builtin-routed branch but only - * activates under __riscv. We can't set -D __riscv at flatten time - * without also flipping setjmp.h and tcc-internal arch logic, so we - * shadow the whole header instead. - */ -#ifndef __MES_STDARG_H -#define __MES_STDARG_H 1 - -typedef __builtin_va_list va_list; - -#define va_start(v, l) __builtin_va_start((v), (l)) -#define va_end(v) __builtin_va_end((v)) -#define va_arg(v, t) __builtin_va_arg((v), t) -#define va_arg8(ap, type) va_arg((ap), type) -#define va_copy(d, s) __builtin_va_copy((d), (s)) - -/* mes/include/stdarg.h forward-declares the v* family here (instead - * of in <stdio.h>); tcc.c calls vsnprintf without ever including - * <stdio.h>, so dropping mes's stdarg.h in favor of this shim must - * still leak these prototypes. FILE and size_t come from a prior - * include in mes-libc TUs; tcc.c works because it includes - * <sys/types.h> for size_t and uses (FILE*) implicitly. */ -int vexec (char const *file_name, va_list ap); -int vfprintf (FILE *stream, char const *template, va_list ap); -int vfscanf (FILE *stream, char const *template, va_list ap); -int vprintf (char const *format, va_list ap); -int vsprintf (char *str, char const *format, va_list ap); -int vsnprintf(char *str, size_t size, char const *format, va_list ap); -int vsscanf (char const *s, char const *template, va_list ap); - -#endif /* __MES_STDARG_H */ diff --git a/vendor/upstream/musl-1.2.5.tar.gz b/vendor/musl/1.2.5.tar.gz Binary files differ. diff --git a/vendor/upstream/musl-1.2.5-deletes.txt b/vendor/musl/deletes.txt diff --git a/vendor/upstream/musl-1.2.5-generated/aarch64/alltypes.h b/vendor/musl/generated/aarch64/alltypes.h diff --git a/vendor/upstream/musl-1.2.5-generated/aarch64/syscall.h b/vendor/musl/generated/aarch64/syscall.h diff --git a/vendor/upstream/musl-1.2.5-generated/riscv64/alltypes.h b/vendor/musl/generated/riscv64/alltypes.h diff --git a/vendor/upstream/musl-1.2.5-generated/riscv64/syscall.h b/vendor/musl/generated/riscv64/syscall.h diff --git a/vendor/upstream/musl-1.2.5-generated/x86_64/alltypes.h b/vendor/musl/generated/x86_64/alltypes.h diff --git a/vendor/upstream/musl-1.2.5-generated/x86_64/syscall.h b/vendor/musl/generated/x86_64/syscall.h diff --git a/vendor/upstream/musl-1.2.5-overrides/arch/aarch64/atomic_arch.h b/vendor/musl/overrides/arch/aarch64/atomic_arch.h diff --git a/vendor/upstream/musl-1.2.5-overrides/arch/aarch64/crt_arch.h b/vendor/musl/overrides/arch/aarch64/crt_arch.h diff --git a/vendor/upstream/musl-1.2.5-overrides/arch/aarch64/pthread_arch.h b/vendor/musl/overrides/arch/aarch64/pthread_arch.h diff --git a/vendor/upstream/musl-1.2.5-overrides/arch/aarch64/syscall_arch.h b/vendor/musl/overrides/arch/aarch64/syscall_arch.h diff --git a/vendor/upstream/musl-1.2.5-overrides/arch/riscv64/atomic_arch.h b/vendor/musl/overrides/arch/riscv64/atomic_arch.h diff --git a/vendor/upstream/musl-1.2.5-overrides/arch/riscv64/crt_arch.h b/vendor/musl/overrides/arch/riscv64/crt_arch.h diff --git a/vendor/upstream/musl-1.2.5-overrides/arch/riscv64/pthread_arch.h b/vendor/musl/overrides/arch/riscv64/pthread_arch.h diff --git a/vendor/upstream/musl-1.2.5-overrides/arch/riscv64/syscall_arch.h b/vendor/musl/overrides/arch/riscv64/syscall_arch.h diff --git a/vendor/upstream/musl-1.2.5-overrides/arch/x86_64/syscall_arch.h b/vendor/musl/overrides/arch/x86_64/syscall_arch.h diff --git a/vendor/upstream/musl-1.2.5-overrides/include/complex.h b/vendor/musl/overrides/include/complex.h diff --git a/vendor/upstream/musl-1.2.5-overrides/src/include/features.h b/vendor/musl/overrides/src/include/features.h diff --git a/vendor/upstream/musl-1.2.5-overrides/src/internal/aarch64/atomic.s b/vendor/musl/overrides/src/internal/aarch64/atomic.s diff --git a/vendor/upstream/musl-1.2.5-overrides/src/internal/aarch64/get_tp.s b/vendor/musl/overrides/src/internal/aarch64/get_tp.s diff --git a/vendor/upstream/musl-1.2.5-overrides/src/internal/aarch64/syscall.s b/vendor/musl/overrides/src/internal/aarch64/syscall.s diff --git a/vendor/upstream/musl-1.2.5-overrides/src/internal/riscv64/get_tp.s b/vendor/musl/overrides/src/internal/riscv64/get_tp.s diff --git a/vendor/upstream/musl-1.2.5-overrides/src/internal/riscv64/syscall.s b/vendor/musl/overrides/src/internal/riscv64/syscall.s diff --git a/vendor/upstream/musl-1.2.5-overrides/src/internal/syscall.h b/vendor/musl/overrides/src/internal/syscall.h diff --git a/vendor/upstream/musl-1.2.5-overrides/src/internal/x86_64/syscall.s b/vendor/musl/overrides/src/internal/x86_64/syscall.s diff --git a/vendor/upstream/musl-1.2.5-overrides/src/network/lookup.h b/vendor/musl/overrides/src/network/lookup.h diff --git a/vendor/upstream/musl-1.2.5-overrides/src/network/lookup_ipliteral.c b/vendor/musl/overrides/src/network/lookup_ipliteral.c diff --git a/vendor/upstream/musl-1.2.5-overrides/src/network/lookup_name.c b/vendor/musl/overrides/src/network/lookup_name.c diff --git a/vendor/upstream/musl-1.2.5-overrides/src/network/lookup_serv.c b/vendor/musl/overrides/src/network/lookup_serv.c diff --git a/vendor/upstream/musl-1.2.5-overrides/src/thread/aarch64/__set_thread_area.s b/vendor/musl/overrides/src/thread/aarch64/__set_thread_area.s diff --git a/vendor/upstream/musl-1.2.5-overrides/src/thread/riscv64/__set_thread_area.s b/vendor/musl/overrides/src/thread/riscv64/__set_thread_area.s diff --git a/vendor/upstream/musl-1.2.5-tcc.patch b/vendor/musl/patches/tcc.patch diff --git a/vendor/upstream/musl-1.2.5-skip-aarch64.txt b/vendor/musl/skip-aarch64.txt diff --git a/vendor/upstream/musl-1.2.5-skip-amd64.txt b/vendor/musl/skip-amd64.txt diff --git a/vendor/upstream/musl-1.2.5-skip-riscv64.txt b/vendor/musl/skip-riscv64.txt diff --git a/vendor/upstream/tcc-0.9.26.tar.gz b/vendor/tcc/0.9.26.tar.gz Binary files differ. diff --git a/scripts/simple-patches/tcc-0.9.26-lb/addback-fileopen.after b/vendor/tcc/patches-lb/addback-fileopen.after diff --git a/scripts/simple-patches/tcc-0.9.26-lb/addback-fileopen.before b/vendor/tcc/patches-lb/addback-fileopen.before diff --git a/scripts/simple-patches/tcc-0.9.26-lb/remove-fileopen.after b/vendor/tcc/patches-lb/remove-fileopen.after diff --git a/scripts/simple-patches/tcc-0.9.26-lb/remove-fileopen.before b/vendor/tcc/patches-lb/remove-fileopen.before diff --git a/scripts/simple-patches/tcc-0.9.26/aarch64-stdarg-array.after b/vendor/tcc/patches/aarch64-stdarg-array.after diff --git a/scripts/simple-patches/tcc-0.9.26/aarch64-stdarg-array.before b/vendor/tcc/patches/aarch64-stdarg-array.before diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-asm-include-libtcc-c.after b/vendor/tcc/patches/arm64-asm-include-libtcc-c.after diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-asm-include-libtcc-c.before b/vendor/tcc/patches/arm64-asm-include-libtcc-c.before diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-asm-include-tcc-h.after b/vendor/tcc/patches/arm64-asm-include-tcc-h.after diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-asm-include-tcc-h.before b/vendor/tcc/patches/arm64-asm-include-tcc-h.before diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-link-asm-relocs.after b/vendor/tcc/patches/arm64-link-asm-relocs.after diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-link-asm-relocs.before b/vendor/tcc/patches/arm64-link-asm-relocs.before diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-link-prel64-condbr.after b/vendor/tcc/patches/arm64-link-prel64-condbr.after diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-link-prel64-condbr.before b/vendor/tcc/patches/arm64-link-prel64-condbr.before diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-load-const-lvalue.after b/vendor/tcc/patches/arm64-load-const-lvalue.after diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-load-const-lvalue.before b/vendor/tcc/patches/arm64-load-const-lvalue.before diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-store-const-lvalue.after b/vendor/tcc/patches/arm64-store-const-lvalue.after diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-store-const-lvalue.before b/vendor/tcc/patches/arm64-store-const-lvalue.before diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-svcul-no-truncate-store.after b/vendor/tcc/patches/arm64-svcul-no-truncate-store.after diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-svcul-no-truncate-store.before b/vendor/tcc/patches/arm64-svcul-no-truncate-store.before diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-svcul-no-truncate.after b/vendor/tcc/patches/arm64-svcul-no-truncate.after diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-svcul-no-truncate.before b/vendor/tcc/patches/arm64-svcul-no-truncate.before diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-tok-include-tcctok-h.after b/vendor/tcc/patches/arm64-tok-include-tcctok-h.after diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-tok-include-tcctok-h.before b/vendor/tcc/patches/arm64-tok-include-tcctok-h.before diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-va-arg-pointer.after b/vendor/tcc/patches/arm64-va-arg-pointer.after diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-va-arg-pointer.before b/vendor/tcc/patches/arm64-va-arg-pointer.before diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-va-pointer-operand.after b/vendor/tcc/patches/arm64-va-pointer-operand.after diff --git a/scripts/simple-patches/tcc-0.9.26/arm64-va-pointer-operand.before b/vendor/tcc/patches/arm64-va-pointer-operand.before diff --git a/scripts/simple-patches/tcc-0.9.26/asm-hash-bol-only.after b/vendor/tcc/patches/asm-hash-bol-only.after diff --git a/scripts/simple-patches/tcc-0.9.26/asm-hash-bol-only.before b/vendor/tcc/patches/asm-hash-bol-only.before diff --git a/scripts/simple-patches/tcc-0.9.26/bss-start-symbol.after b/vendor/tcc/patches/bss-start-symbol.after diff --git a/scripts/simple-patches/tcc-0.9.26/bss-start-symbol.before b/vendor/tcc/patches/bss-start-symbol.before diff --git a/scripts/simple-patches/tcc-0.9.26/const-divzero-shortcircuit-float.after b/vendor/tcc/patches/const-divzero-shortcircuit-float.after diff --git a/scripts/simple-patches/tcc-0.9.26/const-divzero-shortcircuit-float.before b/vendor/tcc/patches/const-divzero-shortcircuit-float.before diff --git a/scripts/simple-patches/tcc-0.9.26/const-divzero-shortcircuit-int.after b/vendor/tcc/patches/const-divzero-shortcircuit-int.after diff --git a/scripts/simple-patches/tcc-0.9.26/const-divzero-shortcircuit-int.before b/vendor/tcc/patches/const-divzero-shortcircuit-int.before diff --git a/scripts/simple-patches/tcc-0.9.26/date-time-stub.after b/vendor/tcc/patches/date-time-stub.after diff --git a/scripts/simple-patches/tcc-0.9.26/date-time-stub.before b/vendor/tcc/patches/date-time-stub.before diff --git a/scripts/simple-patches/tcc-0.9.26/elfinterp-stub.after b/vendor/tcc/patches/elfinterp-stub.after diff --git a/scripts/simple-patches/tcc-0.9.26/elfinterp-stub.before b/vendor/tcc/patches/elfinterp-stub.before diff --git a/scripts/simple-patches/tcc-0.9.26/files/arm64-asm.c b/vendor/tcc/patches/files/arm64-asm.c diff --git a/scripts/simple-patches/tcc-0.9.26/files/arm64-tok.h b/vendor/tcc/patches/files/arm64-tok.h diff --git a/scripts/simple-patches/tcc-0.9.26/getclock-ms-stub.after b/vendor/tcc/patches/getclock-ms-stub.after diff --git a/scripts/simple-patches/tcc-0.9.26/getclock-ms-stub.before b/vendor/tcc/patches/getclock-ms-stub.before diff --git a/scripts/simple-patches/tcc-0.9.26/getcwd-stub.after b/vendor/tcc/patches/getcwd-stub.after diff --git a/scripts/simple-patches/tcc-0.9.26/getcwd-stub.before b/vendor/tcc/patches/getcwd-stub.before diff --git a/scripts/simple-patches/tcc-0.9.26/ldexp-stub.after b/vendor/tcc/patches/ldexp-stub.after diff --git a/scripts/simple-patches/tcc-0.9.26/ldexp-stub.before b/vendor/tcc/patches/ldexp-stub.before diff --git a/scripts/simple-patches/tcc-0.9.26/lex-char-unsigned.after b/vendor/tcc/patches/lex-char-unsigned.after diff --git a/scripts/simple-patches/tcc-0.9.26/lex-char-unsigned.before b/vendor/tcc/patches/lex-char-unsigned.before diff --git a/scripts/simple-patches/tcc-0.9.26/load-obj-accept-sht-note.after b/vendor/tcc/patches/load-obj-accept-sht-note.after diff --git a/scripts/simple-patches/tcc-0.9.26/load-obj-accept-sht-note.before b/vendor/tcc/patches/load-obj-accept-sht-note.before diff --git a/scripts/simple-patches/tcc-0.9.26/longjmp-stub.after b/vendor/tcc/patches/longjmp-stub.after diff --git a/scripts/simple-patches/tcc-0.9.26/longjmp-stub.before b/vendor/tcc/patches/longjmp-stub.before diff --git a/scripts/simple-patches/tcc-0.9.26/lp64-long-constant.after b/vendor/tcc/patches/lp64-long-constant.after diff --git a/scripts/simple-patches/tcc-0.9.26/lp64-long-constant.before b/vendor/tcc/patches/lp64-long-constant.before diff --git a/scripts/simple-patches/tcc-0.9.26/note-section-sht-note.after b/vendor/tcc/patches/note-section-sht-note.after diff --git a/scripts/simple-patches/tcc-0.9.26/note-section-sht-note.before b/vendor/tcc/patches/note-section-sht-note.before diff --git a/scripts/simple-patches/tcc-0.9.26/pt-note-phdr.after b/vendor/tcc/patches/pt-note-phdr.after diff --git a/scripts/simple-patches/tcc-0.9.26/pt-note-phdr.before b/vendor/tcc/patches/pt-note-phdr.before diff --git a/scripts/simple-patches/tcc-0.9.26/riscv-stdarg-fix.after b/vendor/tcc/patches/riscv-stdarg-fix.after diff --git a/scripts/simple-patches/tcc-0.9.26/riscv-stdarg-fix.before b/vendor/tcc/patches/riscv-stdarg-fix.before diff --git a/scripts/simple-patches/tcc-0.9.26/riscv64-cvt-int-zext.after b/vendor/tcc/patches/riscv64-cvt-int-zext.after diff --git a/scripts/simple-patches/tcc-0.9.26/riscv64-cvt-int-zext.before b/vendor/tcc/patches/riscv64-cvt-int-zext.before diff --git a/scripts/simple-patches/tcc-0.9.26/riscv64-elf-start-addr.after b/vendor/tcc/patches/riscv64-elf-start-addr.after diff --git a/scripts/simple-patches/tcc-0.9.26/riscv64-elf-start-addr.before b/vendor/tcc/patches/riscv64-elf-start-addr.before diff --git a/scripts/simple-patches/tcc-0.9.26/riscv64-gen-cvt-sxtw.after b/vendor/tcc/patches/riscv64-gen-cvt-sxtw.after diff --git a/scripts/simple-patches/tcc-0.9.26/riscv64-gen-cvt-sxtw.before b/vendor/tcc/patches/riscv64-gen-cvt-sxtw.before diff --git a/scripts/simple-patches/tcc-0.9.26/riscv64-load-ptr-zext.after b/vendor/tcc/patches/riscv64-load-ptr-zext.after diff --git a/scripts/simple-patches/tcc-0.9.26/riscv64-load-ptr-zext.before b/vendor/tcc/patches/riscv64-load-ptr-zext.before diff --git a/scripts/simple-patches/tcc-0.9.26/set-environment-stub.after b/vendor/tcc/patches/set-environment-stub.after diff --git a/scripts/simple-patches/tcc-0.9.26/set-environment-stub.before b/vendor/tcc/patches/set-environment-stub.before diff --git a/scripts/simple-patches/tcc-0.9.26/stdarg-builtin-aliases.after b/vendor/tcc/patches/stdarg-builtin-aliases.after diff --git a/scripts/simple-patches/tcc-0.9.26/stdarg-builtin-aliases.before b/vendor/tcc/patches/stdarg-builtin-aliases.before diff --git a/scripts/simple-patches/tcc-0.9.26/strip-file-prefix.after b/vendor/tcc/patches/strip-file-prefix.after diff --git a/scripts/simple-patches/tcc-0.9.26/strip-file-prefix.before b/vendor/tcc/patches/strip-file-prefix.before diff --git a/scripts/simple-patches/tcc-0.9.26/tcc-h-gen-expr64-arm64.after b/vendor/tcc/patches/tcc-h-gen-expr64-arm64.after diff --git a/scripts/simple-patches/tcc-0.9.26/tcc-h-gen-expr64-arm64.before b/vendor/tcc/patches/tcc-h-gen-expr64-arm64.before diff --git a/scripts/simple-patches/tcc-0.9.26/tcc-is-native-stub.after b/vendor/tcc/patches/tcc-is-native-stub.after diff --git a/scripts/simple-patches/tcc-0.9.26/tcc-is-native-stub.before b/vendor/tcc/patches/tcc-is-native-stub.before diff --git a/scripts/simple-patches/tcc-0.9.26/tccasm-arm64-quad-asm-data.after b/vendor/tcc/patches/tccasm-arm64-quad-asm-data.after diff --git a/scripts/simple-patches/tcc-0.9.26/tccasm-arm64-quad-asm-data.before b/vendor/tcc/patches/tccasm-arm64-quad-asm-data.before diff --git a/scripts/simple-patches/tcc-0.9.26/tccasm-arm64-quad.after b/vendor/tcc/patches/tccasm-arm64-quad.after diff --git a/scripts/simple-patches/tcc-0.9.26/tccasm-arm64-quad.before b/vendor/tcc/patches/tccasm-arm64-quad.before diff --git a/scripts/simple-patches/tcc-0.9.26/tccrun-include.after b/vendor/tcc/patches/tccrun-include.after diff --git a/scripts/simple-patches/tcc-0.9.26/tccrun-include.before b/vendor/tcc/patches/tccrun-include.before diff --git a/scripts/simple-patches/tcc-0.9.26/tinyc-define.after b/vendor/tcc/patches/tinyc-define.after diff --git a/scripts/simple-patches/tcc-0.9.26/tinyc-define.before b/vendor/tcc/patches/tinyc-define.before diff --git a/scripts/simple-patches/tcc-0.9.26/va_list-no-abort.after b/vendor/tcc/patches/va_list-no-abort.after diff --git a/scripts/simple-patches/tcc-0.9.26/va_list-no-abort.before b/vendor/tcc/patches/va_list-no-abort.before diff --git a/scripts/simple-patches/tcc-0.9.26/x86_64-static-plt32.after b/vendor/tcc/patches/x86_64-static-plt32.after diff --git a/scripts/simple-patches/tcc-0.9.26/x86_64-static-plt32.before b/vendor/tcc/patches/x86_64-static-plt32.before