commit 6c7c1475733065ae0d98ff556718651667d1891d
parent 0ff7891d3dbe43aae2d5fc549d86b2e7e01ab029
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 6 May 2026 12:07:41 -0700
A0: prep-src.sh + prep-musl.sh build canonical src tree at build/<arch>/src/
Two host-side source-prep scripts implement docs/PLAN.md §A0:
- prep-src.sh (A0a): vendored seed copy, P1/M1pp/hex2pp/catm/scheme1/cc
sources, seed-kernel sources, tcc + libc flatten, musl unpack with
overrides + deletes + generated alltypes.h/syscall.h applied.
- prep-musl.sh (A0b): copies the committed per-arch skip list into
src/musl/skip.txt and removes every listed path from the tree, so
what's left is exactly what boot5 will compile.
Boot scripts unchanged — they still read from their current paths.
This lands the canonical tree as a parallel artifact; the bootN switch
is a follow-up.
Diffstat:
| A | scripts/prep-musl.sh | | | 73 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | scripts/prep-src.sh | | | 183 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
2 files changed, 256 insertions(+), 0 deletions(-)
diff --git a/scripts/prep-musl.sh b/scripts/prep-musl.sh
@@ -0,0 +1,73 @@
+#!/bin/sh
+## prep-musl.sh — A0b: apply the per-arch musl skip filter on top of
+## build/<arch>/src/src/musl/.
+##
+## prep-src.sh (A0a) leaves the musl tree at build/<arch>/src/src/musl/
+## with overrides merged, deletes applied, and pre-generated alltypes.h
+## / syscall.h dropped in. boot5-calibrate.sh's per-arch skip list (the
+## set of musl translation units tcc 0.9.26 cannot compile) needs a
+## working tcc3, so it can't be folded into A0a.
+##
+## A0b is a single transform: read the skip list (committed or freshly
+## calibrated), copy it into the canonical tree as skip.txt, and remove
+## every listed path from src/musl/. After A0b the tree is the exact
+## set of files boot5 will compile — no skip enumeration at boot time.
+##
+## Skip-list source policy:
+## - if vendor/upstream/musl-1.2.5-skip-<arch>.txt exists, use it
+## verbatim (the common case — calibrations are committed).
+## - else run scripts/boot5-calibrate.sh <arch>, which itself depends
+## on boot4/tcc3. The script writes the committed file for us.
+##
+## Usage: scripts/prep-musl.sh <arch>
+## <arch> ∈ {aarch64, amd64, riscv64}
+
+set -eu
+
+. scripts/lib-arch.sh
+bootlib_init prep-musl "${1:-}"
+
+DST=$ROOT/build/$ARCH/src
+DST_MUSL=$DST/src/musl
+SKIP_COMMITTED=vendor/upstream/musl-1.2.5-skip-$ARCH.txt
+
+TAG="[$BOOT_TAG]"
+
+[ -d "$DST_MUSL" ] || {
+ echo "$TAG missing $DST_MUSL — run scripts/prep-src.sh $ARCH first" >&2
+ exit 1
+}
+
+# ── (1) materialize the skip list ─────────────────────────────────────
+if [ ! -e "$SKIP_COMMITTED" ]; then
+ echo "$TAG no committed skip list at $SKIP_COMMITTED — calibrating"
+ scripts/boot5-calibrate.sh "$ARCH"
+ [ -e "$SKIP_COMMITTED" ] || {
+ echo "$TAG calibration did not produce $SKIP_COMMITTED" >&2
+ exit 1
+ }
+fi
+cp "$SKIP_COMMITTED" "$DST_MUSL/skip.txt"
+
+# ── (2) apply filter — drop every listed path from src/musl/ ──────────
+n_skip=0
+n_missing=0
+while read -r rel; do
+ [ -n "$rel" ] || continue
+ case "$rel" in
+ \#*) continue ;;
+ esac
+ if [ -e "$DST_MUSL/$rel" ]; then
+ rm -rf "$DST_MUSL/$rel"
+ n_skip=$((n_skip + 1))
+ else
+ n_missing=$((n_missing + 1))
+ fi
+done < "$DST_MUSL/skip.txt"
+
+if [ "$n_missing" -gt 0 ]; then
+ echo "$TAG WARN: $n_missing skip-list entries were not present in $DST_MUSL" >&2
+fi
+
+n_remaining=$(find "$DST_MUSL" -type f | wc -l | tr -d ' ')
+echo "$TAG OK filtered=$n_skip remaining=$n_remaining files in $DST_MUSL"
diff --git a/scripts/prep-src.sh b/scripts/prep-src.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+## prep-src.sh — A0a: build the canonical generated source tree.
+##
+## All host-side source preparation happens once, up front, into a
+## single canonical tree at build/<arch>/src/. This tree is the audit
+## basis and the only thing boot stages should read for source. Boot
+## stages do no flattening, no unpacking, no patching, no calibration.
+##
+## Layout produced (see docs/PLAN.md §A0):
+## build/<arch>/src/
+## bin/ binary inputs not built by a stage
+## hex0-seed vendored seed only
+## src/ everything textual
+## vendor-seed/ ELF.hex2 + *.hex0|*.hex1|*.hex2
+## M1pp/ M1pp.P1
+## hex2pp/ hex2pp.P1
+## P1/ P1*.{M1,M1pp,P1pp}, entry-*.P1pp,
+## elf-end.P1pp
+## catm/ catm.P1pp
+## scheme1/ scheme1.P1pp, prelude.scm
+## cc/ cc.scm, main.scm
+## tcc/ tcc.flat.c, stdarg-bridge.h, plus
+## tcc-0.9.26-1147-gee75a10c/{include,lib}
+## libc/ libc.flat.c (mes-libc flattened)
+## musl/ filtered musl-1.2.5 tree (overrides
+## merged, deletes applied, generated
+## alltypes.h/syscall.h dropped in).
+## prep-musl.sh applies the per-arch
+## skip filter on top.
+## kernel/ seed-kernel sources for this arch
+##
+## A0 is split: prep-src.sh runs before boot0 and produces everything
+## that doesn't need a working compiler. prep-musl.sh runs after boot4
+## (or copies the committed skip list) and applies the calibration
+## filter on top of src/musl/.
+##
+## Usage: scripts/prep-src.sh <arch>
+## <arch> ∈ {aarch64, amd64, riscv64}
+
+set -eu
+
+. scripts/lib-arch.sh
+bootlib_init prep-src "${1:-}"
+
+DST=$ROOT/build/$ARCH/src
+DST_BIN=$DST/bin
+DST_SRC=$DST/src
+
+TAG="[$BOOT_TAG]"
+
+# ── (0) reset destination ─────────────────────────────────────────────
+rm -rf "$DST"
+mkdir -p "$DST_BIN" "$DST_SRC"
+
+# ── (1) vendored seed (pre-built binary + textual sources) ────────────
+SEED=vendor/seed/$ARCH
+[ -d "$SEED" ] || { echo "$TAG missing $SEED" >&2; exit 1; }
+
+cp "$SEED/hex0-seed" "$DST_BIN/hex0-seed"
+
+mkdir -p "$DST_SRC/vendor-seed"
+for f in ELF.hex2 hex0.hex0 hex1.hex0 hex2.hex1 catm.hex2 M0.hex2; do
+ [ -e "$SEED/$f" ] || { echo "$TAG missing $SEED/$f" >&2; exit 1; }
+ cp "$SEED/$f" "$DST_SRC/vendor-seed/$f"
+done
+
+# ── (2) repo-tree textual sources ─────────────────────────────────────
+mkdir -p "$DST_SRC/M1pp"
+cp M1pp/M1pp.P1 "$DST_SRC/M1pp/M1pp.P1"
+
+mkdir -p "$DST_SRC/hex2pp"
+cp hex2pp/hex2pp.P1 "$DST_SRC/hex2pp/hex2pp.P1"
+
+mkdir -p "$DST_SRC/P1"
+cp "P1/P1.M1pp" "$DST_SRC/P1/P1.M1pp"
+cp "P1/P1-$ARCH.M1" "$DST_SRC/P1/P1-$ARCH.M1"
+cp "P1/P1-$ARCH.M1pp" "$DST_SRC/P1/P1-$ARCH.M1pp"
+cp "P1/P1pp.P1pp" "$DST_SRC/P1/P1pp.P1pp"
+cp "P1/entry-libc.P1pp" "$DST_SRC/P1/entry-libc.P1pp"
+cp "P1/entry-plain.P1pp" "$DST_SRC/P1/entry-plain.P1pp"
+cp "P1/elf-end.P1pp" "$DST_SRC/P1/elf-end.P1pp"
+
+mkdir -p "$DST_SRC/catm"
+cp catm/catm.P1pp "$DST_SRC/catm/catm.P1pp"
+
+mkdir -p "$DST_SRC/scheme1"
+cp scheme1/scheme1.P1pp "$DST_SRC/scheme1/scheme1.P1pp"
+cp scheme1/prelude.scm "$DST_SRC/scheme1/prelude.scm"
+
+mkdir -p "$DST_SRC/cc"
+cp cc/cc.scm "$DST_SRC/cc/cc.scm"
+cp cc/main.scm "$DST_SRC/cc/main.scm"
+
+# ── (3) seed-kernel sources for this arch ─────────────────────────────
+mkdir -p "$DST_SRC/kernel/arch/$ARCH" "$DST_SRC/kernel/user" "$DST_SRC/kernel/scripts"
+cp seed-kernel/kernel.c "$DST_SRC/kernel/kernel.c"
+for f in seed-kernel/arch/$ARCH/*; do
+ [ -f "$f" ] || continue
+ cp "$f" "$DST_SRC/kernel/arch/$ARCH/$(basename "$f")"
+done
+for f in seed-kernel/user/*; do
+ [ -f "$f" ] || continue
+ cp "$f" "$DST_SRC/kernel/user/$(basename "$f")"
+done
+# elf-pvh-note.c is consumed by boot6 on amd64; keep it in the canonical
+# tree on every arch so the layout is uniform.
+cp seed-kernel/scripts/elf-pvh-note.c "$DST_SRC/kernel/scripts/elf-pvh-note.c"
+
+# ── (4) tcc flatten ───────────────────────────────────────────────────
+# stage1-flatten.sh writes to build/<arch>/vendor/tcc/. Run it (it's
+# idempotent) and mirror the relevant artifacts into src/tcc/.
+echo "$TAG flatten tcc.flat.c (host)"
+scripts/stage1-flatten.sh --arch "$ARCH"
+
+TCC_VENDOR=$ROOT/build/$ARCH/vendor/tcc
+TCC_PKG=tcc-0.9.26-1147-gee75a10c
+[ -e "$TCC_VENDOR/tcc.flat.c" ] || { echo "$TAG flatten produced no tcc.flat.c" >&2; exit 1; }
+[ -e "$TCC_VENDOR/stdarg-bridge.h" ] || { echo "$TAG flatten produced no stdarg-bridge.h" >&2; exit 1; }
+[ -d "$TCC_VENDOR/$TCC_PKG/include" ] || { echo "$TAG flatten produced no $TCC_PKG/include" >&2; exit 1; }
+[ -d "$TCC_VENDOR/$TCC_PKG/lib" ] || { echo "$TAG flatten produced no $TCC_PKG/lib" >&2; exit 1; }
+
+mkdir -p "$DST_SRC/tcc"
+cp "$TCC_VENDOR/tcc.flat.c" "$DST_SRC/tcc/tcc.flat.c"
+cp "$TCC_VENDOR/stdarg-bridge.h" "$DST_SRC/tcc/stdarg-bridge.h"
+mkdir -p "$DST_SRC/tcc/$TCC_PKG"
+cp -R "$TCC_VENDOR/$TCC_PKG/include" "$DST_SRC/tcc/$TCC_PKG/include"
+cp -R "$TCC_VENDOR/$TCC_PKG/lib" "$DST_SRC/tcc/$TCC_PKG/lib"
+
+# ── (5) mes-libc flatten ──────────────────────────────────────────────
+echo "$TAG flatten libc.flat.c (host)"
+scripts/libc-flatten.sh --arch "$ARCH"
+
+LIBC_VENDOR=$ROOT/build/$ARCH/vendor/mes-libc
+[ -e "$LIBC_VENDOR/libc.flat.c" ] || { echo "$TAG flatten produced no libc.flat.c" >&2; exit 1; }
+
+mkdir -p "$DST_SRC/libc"
+cp "$LIBC_VENDOR/libc.flat.c" "$DST_SRC/libc/libc.flat.c"
+
+# ── (6) musl unpack + overrides + deletes + generated headers ─────────
+MUSL_TARBALL=vendor/upstream/musl-1.2.5.tar.gz
+MUSL_OVERRIDES=vendor/upstream/musl-1.2.5-overrides
+MUSL_DELETES=vendor/upstream/musl-1.2.5-deletes.txt
+MUSL_GENERATED=vendor/upstream/musl-1.2.5-generated/$MUSL_ARCH
+
+[ -e "$MUSL_TARBALL" ] || { echo "$TAG missing $MUSL_TARBALL" >&2; exit 1; }
+[ -d "$MUSL_OVERRIDES" ] || { echo "$TAG missing $MUSL_OVERRIDES" >&2; exit 1; }
+[ -e "$MUSL_DELETES" ] || { echo "$TAG missing $MUSL_DELETES" >&2; exit 1; }
+[ -d "$MUSL_GENERATED" ] || { echo "$TAG missing $MUSL_GENERATED (run scripts/musl-vendor.sh)" >&2; exit 1; }
+
+echo "$TAG unpack musl-1.2.5 + apply overrides/deletes"
+MUSL_TMP=$(mktemp -d)
+trap 'rm -rf "$MUSL_TMP"' EXIT
+tar xzf "$MUSL_TARBALL" -C "$MUSL_TMP"
+[ -d "$MUSL_TMP/musl-1.2.5" ] || { echo "$TAG musl tarball did not unpack to musl-1.2.5/" >&2; exit 1; }
+
+cp -R "$MUSL_OVERRIDES/." "$MUSL_TMP/musl-1.2.5/"
+while read -r p; do
+ [ -n "$p" ] && rm -rf "$MUSL_TMP/musl-1.2.5/$p"
+done < "$MUSL_DELETES"
+
+# Drop pre-generated arch headers + version.h into the same obj/ layout
+# boot5 expects.
+mkdir -p "$MUSL_TMP/musl-1.2.5/obj/include/bits" \
+ "$MUSL_TMP/musl-1.2.5/obj/src/internal"
+cp "$MUSL_GENERATED/alltypes.h" "$MUSL_TMP/musl-1.2.5/obj/include/bits/alltypes.h"
+cp "$MUSL_GENERATED/syscall.h" "$MUSL_TMP/musl-1.2.5/obj/include/bits/syscall.h"
+echo '#define VERSION "1.2.5-tcc-boot5"' > "$MUSL_TMP/musl-1.2.5/obj/src/internal/version.h"
+
+mkdir -p "$DST_SRC/musl"
+# Move into place — the canonical tree owns this from now on.
+( cd "$MUSL_TMP/musl-1.2.5" && tar cf - . ) | ( cd "$DST_SRC/musl" && tar xf - )
+
+# Seed src/musl/skip.txt with the committed skip list when one exists,
+# so the canonical tree carries metadata even before prep-musl.sh
+# applies the filter. prep-musl.sh refreshes/regenerates this.
+SKIP_COMMITTED=vendor/upstream/musl-1.2.5-skip-$ARCH.txt
+if [ -e "$SKIP_COMMITTED" ]; then
+ cp "$SKIP_COMMITTED" "$DST_SRC/musl/skip.txt"
+fi
+
+# ── summary ───────────────────────────────────────────────────────────
+n_files=$(find "$DST" -type f | wc -l | tr -d ' ')
+echo "$TAG OK -> $DST ($n_files files)"