commit 7dd647f943c4392b22761a9ec71c49466f885d6e
parent 79c5cce3f31c1bb066907dc8c456db129d5922b0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 4 May 2026 16:34:40 -0700
boot{1,2,3}: move catm rebuild from boot1 to boot2
boot1 only needs P1.M1 to build M1pp + hex2pp from their .P1 sources;
the libp1pp + P1.M1pp inputs were there solely for the catm.P1pp
rebuild bundled into the same script. Splitting that rebuild into
boot2 (which already runs the .P1pp -> ELF pipeline for scheme1)
shrinks boot1's input surface and keeps each stage focused on a
single concern.
boot2 now bootstraps with boot0 catm, builds the new catm via the
boot1 M1pp/hex2pp, then uses the new catm to build scheme1. boot3
sources catm from \$BOOT2 instead of \$BOOT1.
Diffstat:
4 files changed, 74 insertions(+), 67 deletions(-)
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@
(define catm (hex2 catm.hex2))
(define M0 (hex2 (catm ELF.hex2 M0.hex2)))
-;; ── boot1.sh ── Self-host m1pp + hex2pp + catm ───────────────────────
+;; ── boot1.sh ── Self-host m1pp + hex2pp ──────────────────────────────
;; Compile+Link for arch-specific M1 source.
(defn exe (M1-src) (hex2 (catm ELF.hex2 (M0 M1-src))))
@@ -19,6 +19,7 @@
(define m1pp (exe (catm P1A.M1 m1pp.P1)))
(define hex2pp (exe (catm P1A.M1 hex2pp.P1)))
+;; ── boot2.sh ── Self-host catm + Scheme ──────────────────────────────
;; P1pp — P1 rewritten with m1pp macros. Assemble any P1pp source via m1pp.
;; P1A.M1pp is the arch-specific backend, rewritten to use M1pp.
;; P1.M1pp is the arch-agnostic interface.
@@ -28,11 +29,9 @@
(hex2pp (catm ELF.hex2 (m1pp (catm P1A.M1pp P1.M1pp P1pp.P1pp src)))))
;; Rebuild catm from P1pp; after this stage the seed boot0 catm is
-;; no longer needed and boot2/boot3 run with only boot1 binaries.
-(define catm (ppexe catm.P1pp))
-
-;; ── boot2.sh ── Scheme ───────────────────────────────────────────────
-(define scheme (ppexe scheme1.P1pp))
+;; no longer needed and boot3 runs with only boot1 + boot2 binaries.
+(define catm (ppexe catm.P1pp))
+(define scheme (ppexe scheme1.P1pp))
;; ── boot3.sh ── C ────────────────────────────────────────────────────
(defn scc (C-src) (ppexe (scheme cc.scm C-src)))
diff --git a/scripts/boot1.sh b/scripts/boot1.sh
@@ -1,20 +1,14 @@
#!/bin/sh
-## boot1.sh — standalone build of M1pp + hex2pp + catm.
+## boot1.sh — standalone build of M1pp + hex2pp.
##
-## Stage 1 of the README's chain: produces the self-hosted tools the
-## rest of the boot chain runs on. M1pp + hex2pp are built from their
-## .P1 sources via the seed M0 + hex2 chain; catm is then rebuilt from
-## catm.P1pp through the freshly-built M1pp + hex2pp pipeline so later
-## stages can run with zero boot0 dependencies.
+## Stage 1 of the README's chain: produces the self-hosted M1pp +
+## hex2pp pair, built from their .P1 sources via the seed M0 + hex2
+## chain. catm is rebuilt from catm.P1pp in boot2.
##
## ─── Inputs (sources, copied into staging) ────────────────────────────
## M1pp/M1pp.P1 — M1pp expander, P1 source
## hex2pp/hex2pp.P1 — hex2pp assembler/linker, P1 source
-## catm/catm.P1pp — catm, P1pp source
## P1/P1-$ARCH.M1 — pre-pruned per-arch P1 backend
-## P1/P1-$ARCH.M1pp — per-arch P1pp backend (catm.P1pp)
-## P1/P1.M1pp — arch-agnostic P1pp frontend
-## P1/P1pp.P1pp — libp1pp standard library
## vendor/seed/$ARCH/ELF.hex2 — ELF header fragment (catm input)
##
## ─── Inputs (binaries from prior stages) ──────────────────────────────
@@ -27,7 +21,6 @@
## ─── Outputs ──────────────────────────────────────────────────────────
## build/$ARCH/boot1/M1pp — M1pp expander ELF
## build/$ARCH/boot1/hex2pp — hex2pp assembler/linker ELF
-## build/$ARCH/boot1/catm — catm ELF (rebuilt via M1pp+hex2pp)
##
## Usage: scripts/boot1.sh <arch>
## <arch> ∈ {aarch64, amd64, riscv64}
@@ -75,34 +68,21 @@ mkdir -p "$STAGE/in" "$STAGE/out" "$OUT"
cp "$BOOT0/hex2" "$BOOT0/M0" "$BOOT0/catm" "$STAGE/in/"
cp M1pp/M1pp.P1 "$STAGE/in/M1pp.P1"
cp hex2pp/hex2pp.P1 "$STAGE/in/hex2pp.P1"
-cp catm/catm.P1pp "$STAGE/in/catm.P1pp"
cp "P1/P1-$ARCH.M1" "$STAGE/in/P1.M1"
-cp "P1/P1-$ARCH.M1pp" "$STAGE/in/backend.M1pp"
-cp P1/P1.M1pp "$STAGE/in/frontend.M1pp"
-cp P1/P1pp.P1pp "$STAGE/in/libp1pp.P1pp"
cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2"
-# ── run the build pipelines ───────────────────────────────────────────
-# Two pipelines, run back to back in a single container:
-#
-# .P1 -> ELF (M0 + hex2) for M1pp and hex2pp:
+# ── run the build pipeline ────────────────────────────────────────────
+# .P1 -> ELF via M0 + hex2 (seed), run twice — for M1pp and hex2pp:
# catm combined.M1 P1.M1 src (per-arch backend prepended)
# M0 combined.M1 -> prog.hex2
# catm linked.hex2 ELF.hex2 prog.hex2
# hex2 linked.hex2 -> ELF binary
#
-# .P1pp -> ELF (M1pp + hex2pp) for catm:
-# catm combined.M1pp backend.M1pp frontend.M1pp libp1pp.P1pp catm.P1pp
-# M1pp combined.M1pp -> expanded.hex2pp
-# catm linked.hex2pp ELF.hex2 expanded.hex2pp
-# hex2pp -B 0x600000 linked.hex2pp -> ELF binary
-#
# Stages everything through /tmp because the M0/hex2 seed tools do one
# syscall per byte; virtiofs round-trips would dominate.
cat > "$STAGE/in/run.sh" <<'RUN'
#!/bin/sh
set -eu
-# .P1 -> ELF via M0 + hex2 (seed). Two of these — for M1pp and hex2pp.
# Inlined (no function) so the container shell sees only sequential
# exec — kaem-friendly.
@@ -119,29 +99,19 @@ chmod +x /work/out/M1pp
/work/in/catm /tmp/linked.hex2 /work/in/ELF.hex2 /tmp/prog.hex2
/work/in/hex2 /tmp/linked.hex2 /work/out/hex2pp
chmod +x /work/out/hex2pp
-
-# .P1pp -> ELF via the just-built M1pp + hex2pp. catm-from-boot0 still
-# does the M1/hex2 concatenation; the produced binary then replaces it
-# in boot2/boot3.
-/work/in/catm /tmp/combined.M1pp \
- /work/in/backend.M1pp /work/in/frontend.M1pp \
- /work/in/libp1pp.P1pp /work/in/catm.P1pp
-/work/out/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp
-/work/in/catm /tmp/linked.hex2pp /work/in/ELF.hex2 /tmp/expanded.hex2pp
-/work/out/hex2pp -B 0x600000 /tmp/linked.hex2pp /work/out/catm
RUN
chmod +x "$STAGE/in/run.sh"
-echo "[boot1 $ARCH] M1pp.P1 + hex2pp.P1 -> M1pp + hex2pp; catm.P1pp -> catm"
+echo "[boot1 $ARCH] M1pp.P1 + hex2pp.P1 -> M1pp + hex2pp"
podman run --rm -i --pull=never --platform "$PLATFORM" \
--tmpfs /tmp:size=512M \
-v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
sh -eu /work/in/run.sh
# ── copy outputs to final destination ─────────────────────────────────
-for f in M1pp hex2pp catm; do
+for f in M1pp hex2pp; do
cp "$STAGE/out/$f" "$OUT/$f"
chmod 0700 "$OUT/$f"
done
-echo "[boot1 $ARCH] OK -> $OUT/{M1pp, hex2pp, catm}"
+echo "[boot1 $ARCH] OK -> $OUT/{M1pp, hex2pp}"
diff --git a/scripts/boot2.sh b/scripts/boot2.sh
@@ -1,11 +1,15 @@
#!/bin/sh
-## boot2.sh — standalone build of scheme1 from scheme1.P1pp.
+## boot2.sh — rebuild catm via M1pp+hex2pp, then build scheme1.
##
-## Stage 2 of the README's chain: produces the scheme1 interpreter ELF.
+## Stage 2 of the README's chain: first rebuilds catm from catm.P1pp
+## via the freshly-built M1pp + hex2pp pipeline (replacing the seed
+## boot0 catm so later stages run with zero boot0 dependencies); then
+## builds the scheme1 interpreter from scheme1.P1pp using the new catm.
## End-to-end through M1pp + hex2pp (no seed M0/hex2 anywhere on the
## .P1pp pipeline).
##
## ─── Inputs (sources, copied into staging) ────────────────────────────
+## catm/catm.P1pp — catm, P1pp source
## scheme1/scheme1.P1pp — interpreter source
## P1/P1-$ARCH.M1pp — per-arch M1pp backend
## P1/P1.M1pp — arch-agnostic P1pp frontend
@@ -13,13 +17,18 @@
## vendor/seed/$ARCH/ELF.hex2 — ELF header fragment
##
## ─── Inputs (binaries from prior stages) ──────────────────────────────
-## build/$ARCH/boot1/{M1pp, hex2pp, catm} — built by scripts/boot1.sh
+## build/$ARCH/boot0/catm — built by scripts/boot0.sh
+## (used only to bootstrap the
+## catm.P1pp build; replaced by
+## the new catm afterwards)
+## build/$ARCH/boot1/{M1pp, hex2pp} — built by scripts/boot1.sh
##
## ─── Tools (in container) ─────────────────────────────────────────────
## busybox sh + cp + mkdir + chmod (scratch + busybox image only).
-## Plus the boot1 binaries (M1pp, hex2pp, catm), staged in.
+## Plus the boot0 catm and boot1 M1pp + hex2pp, staged in.
##
## ─── Outputs ──────────────────────────────────────────────────────────
+## build/$ARCH/boot2/catm — catm ELF (rebuilt via M1pp+hex2pp)
## build/$ARCH/boot2/scheme1 — scheme1 interpreter ELF
##
## Usage: scripts/boot2.sh <arch>
@@ -42,6 +51,7 @@ ROOT=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT"
IMAGE=boot2-scratch:$ARCH
+BOOT0=build/$ARCH/boot0
BOOT1=build/$ARCH/boot1
OUT=build/$ARCH/boot2
STAGE=build/$ARCH/.boot2-stage
@@ -53,8 +63,12 @@ if ! podman image exists "$IMAGE"; then
-f scripts/Containerfile.scratch scripts/
fi
-# ── prerequisite: boot1 binaries must exist ───────────────────────────
-for bin in M1pp hex2pp catm; do
+# ── prerequisite: prior-stage binaries must exist ─────────────────────
+[ -x "$BOOT0/catm" ] || {
+ echo "[boot2 $ARCH] missing prerequisite: $BOOT0/catm (run scripts/boot0.sh $ARCH)" >&2
+ exit 1
+}
+for bin in M1pp hex2pp; do
[ -x "$BOOT1/$bin" ] || {
echo "[boot2 $ARCH] missing prerequisite: $BOOT1/$bin (run scripts/boot1.sh $ARCH)" >&2
exit 1
@@ -65,38 +79,62 @@ done
rm -rf "$STAGE"
mkdir -p "$STAGE/in" "$STAGE/out" "$OUT"
-cp "$BOOT1/M1pp" "$BOOT1/hex2pp" "$BOOT1/catm" "$STAGE/in/"
+cp "$BOOT0/catm" "$STAGE/in/catm"
+cp "$BOOT1/M1pp" "$BOOT1/hex2pp" "$STAGE/in/"
+cp catm/catm.P1pp "$STAGE/in/catm.P1pp"
cp scheme1/scheme1.P1pp "$STAGE/in/scheme1.P1pp"
cp "P1/P1-$ARCH.M1pp" "$STAGE/in/backend.M1pp"
cp P1/P1.M1pp "$STAGE/in/frontend.M1pp"
cp P1/P1pp.P1pp "$STAGE/in/libp1pp.P1pp"
cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2"
-# ── run the .P1pp -> ELF pipeline ─────────────────────────────────────
-# catm combined.M1pp backend + frontend + libp1pp + scheme1.P1pp
-# M1pp combined.M1pp -> expanded.hex2pp
-# catm linked.hex2pp ELF.hex2 expanded.hex2pp
-# hex2pp -B 0x600000 linked.hex2pp -> ELF binary
+# ── run the build pipelines ───────────────────────────────────────────
+# Two .P1pp -> ELF pipelines, run back to back in a single container:
+#
+# catm.P1pp -> catm (boot0 catm bootstraps; produced binary then
+# takes over for the rest of this script + boot3)
+# catm combined.M1pp backend + frontend + libp1pp + catm.P1pp
+# M1pp combined.M1pp -> expanded.hex2pp
+# catm linked.hex2pp ELF.hex2 expanded.hex2pp
+# hex2pp -B 0x600000 linked.hex2pp -> ELF binary
+#
+# scheme1.P1pp -> scheme1 (uses the just-built catm)
+# (same shape, with /work/in/scheme1.P1pp in place of catm.P1pp)
cat > "$STAGE/in/run.sh" <<'RUN'
#!/bin/sh
set -eu
+# Inlined (no function) so the container shell sees only sequential
+# exec — kaem-friendly.
+
+# catm.P1pp -> catm (bootstrap with boot0 catm)
/work/in/catm /tmp/combined.M1pp \
/work/in/backend.M1pp /work/in/frontend.M1pp \
- /work/in/libp1pp.P1pp /work/in/scheme1.P1pp
+ /work/in/libp1pp.P1pp /work/in/catm.P1pp
/work/in/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp
/work/in/catm /tmp/linked.hex2pp /work/in/ELF.hex2 /tmp/expanded.hex2pp
+/work/in/hex2pp -B 0x600000 /tmp/linked.hex2pp /work/out/catm
+chmod +x /work/out/catm
+
+# scheme1.P1pp -> scheme1 (uses the just-built catm)
+/work/out/catm /tmp/combined.M1pp \
+ /work/in/backend.M1pp /work/in/frontend.M1pp \
+ /work/in/libp1pp.P1pp /work/in/scheme1.P1pp
+/work/in/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp
+/work/out/catm /tmp/linked.hex2pp /work/in/ELF.hex2 /tmp/expanded.hex2pp
/work/in/hex2pp -B 0x600000 /tmp/linked.hex2pp /work/out/scheme1
RUN
chmod +x "$STAGE/in/run.sh"
-echo "[boot2 $ARCH] scheme1.P1pp -> scheme1"
+echo "[boot2 $ARCH] catm.P1pp -> catm; scheme1.P1pp -> scheme1"
podman run --rm -i --pull=never --platform "$PLATFORM" \
--tmpfs /tmp:size=512M \
-v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
sh -eu /work/in/run.sh
-# ── copy output to final destination ──────────────────────────────────
-cp "$STAGE/out/scheme1" "$OUT/scheme1"
-chmod 0700 "$OUT/scheme1"
+# ── copy outputs to final destination ─────────────────────────────────
+for f in catm scheme1; do
+ cp "$STAGE/out/$f" "$OUT/$f"
+ chmod 0700 "$OUT/$f"
+done
-echo "[boot2 $ARCH] OK -> $OUT/scheme1"
+echo "[boot2 $ARCH] OK -> $OUT/{catm, scheme1}"
diff --git a/scripts/boot3.sh b/scripts/boot3.sh
@@ -59,8 +59,8 @@
## build/$ARCH/vendor/mes-libc/libc.flat.c — flattened mes-libc TU
##
## ─── Inputs (binaries from prior stages) ──────────────────────────────
-## build/$ARCH/boot1/{M1pp, hex2pp, catm} — built by scripts/boot1.sh
-## build/$ARCH/boot2/scheme1 — built by scripts/boot2.sh
+## build/$ARCH/boot1/{M1pp, hex2pp} — built by scripts/boot1.sh
+## build/$ARCH/boot2/{catm, scheme1} — built by scripts/boot2.sh
##
## ─── Tools ────────────────────────────────────────────────────────────
## In container: scratch + busybox (no libc, no /etc, no resolver).
@@ -143,9 +143,9 @@ if ! podman image exists "$IMAGE"; then
fi
# ── prerequisite: prior-stage binaries ────────────────────────────────
-[ -x "$BOOT1/catm" ] || { echo "[boot3 $ARCH] missing $BOOT1/catm (run scripts/boot1.sh $ARCH)" >&2; exit 1; }
[ -x "$BOOT1/M1pp" ] || { echo "[boot3 $ARCH] missing $BOOT1/M1pp (run scripts/boot1.sh $ARCH)" >&2; exit 1; }
[ -x "$BOOT1/hex2pp" ] || { echo "[boot3 $ARCH] missing $BOOT1/hex2pp (run scripts/boot1.sh $ARCH)" >&2; exit 1; }
+[ -x "$BOOT2/catm" ] || { echo "[boot3 $ARCH] missing $BOOT2/catm (run scripts/boot2.sh $ARCH)" >&2; exit 1; }
[ -x "$BOOT2/scheme1" ] || { echo "[boot3 $ARCH] missing $BOOT2/scheme1 (run scripts/boot2.sh $ARCH)" >&2; exit 1; }
# ── prerequisite: host-flattened sources + unpacked tcc tree ──────────
@@ -171,9 +171,9 @@ rm -f "$OUT/tcc0" "$OUT/tcc1" "$OUT/tcc2" \
"$OUT/start.o" "$OUT/sys_stubs.o" "$OUT/mem.o" "$OUT/libc.o"
# Prior-stage binaries
-cp "$BOOT1/catm" "$STAGE/in/catm"
cp "$BOOT1/M1pp" "$STAGE/in/M1pp"
cp "$BOOT1/hex2pp" "$STAGE/in/hex2pp"
+cp "$BOOT2/catm" "$STAGE/in/catm"
cp "$BOOT2/scheme1" "$STAGE/in/scheme1"
# cc.scm bundle inputs