boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 7dd647f943c4392b22761a9ec71c49466f885d6e
parent 79c5cce3f31c1bb066907dc8c456db129d5922b0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon,  4 May 2026 16:34:40 -0700

boot{1,2,3}: move catm rebuild from boot1 to boot2

boot1 only needs P1.M1 to build M1pp + hex2pp from their .P1 sources;
the libp1pp + P1.M1pp inputs were there solely for the catm.P1pp
rebuild bundled into the same script. Splitting that rebuild into
boot2 (which already runs the .P1pp -> ELF pipeline for scheme1)
shrinks boot1's input surface and keeps each stage focused on a
single concern.

boot2 now bootstraps with boot0 catm, builds the new catm via the
boot1 M1pp/hex2pp, then uses the new catm to build scheme1. boot3
sources catm from \$BOOT2 instead of \$BOOT1.

Diffstat:
MREADME.md | 11+++++------
Mscripts/boot1.sh | 48+++++++++---------------------------------------
Mscripts/boot2.sh | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Mscripts/boot3.sh | 8++++----
4 files changed, 74 insertions(+), 67 deletions(-)

diff --git a/README.md b/README.md @@ -8,7 +8,7 @@ (define catm (hex2 catm.hex2)) (define M0 (hex2 (catm ELF.hex2 M0.hex2))) -;; ── boot1.sh ── Self-host m1pp + hex2pp + catm ─────────────────────── +;; ── boot1.sh ── Self-host m1pp + hex2pp ────────────────────────────── ;; Compile+Link for arch-specific M1 source. (defn exe (M1-src) (hex2 (catm ELF.hex2 (M0 M1-src)))) @@ -19,6 +19,7 @@ (define m1pp (exe (catm P1A.M1 m1pp.P1))) (define hex2pp (exe (catm P1A.M1 hex2pp.P1))) +;; ── boot2.sh ── Self-host catm + Scheme ────────────────────────────── ;; P1pp — P1 rewritten with m1pp macros. Assemble any P1pp source via m1pp. ;; P1A.M1pp is the arch-specific backend, rewritten to use M1pp. ;; P1.M1pp is the arch-agnostic interface. @@ -28,11 +29,9 @@ (hex2pp (catm ELF.hex2 (m1pp (catm P1A.M1pp P1.M1pp P1pp.P1pp src))))) ;; Rebuild catm from P1pp; after this stage the seed boot0 catm is -;; no longer needed and boot2/boot3 run with only boot1 binaries. -(define catm (ppexe catm.P1pp)) - -;; ── boot2.sh ── Scheme ─────────────────────────────────────────────── -(define scheme (ppexe scheme1.P1pp)) +;; no longer needed and boot3 runs with only boot1 + boot2 binaries. +(define catm (ppexe catm.P1pp)) +(define scheme (ppexe scheme1.P1pp)) ;; ── boot3.sh ── C ──────────────────────────────────────────────────── (defn scc (C-src) (ppexe (scheme cc.scm C-src))) diff --git a/scripts/boot1.sh b/scripts/boot1.sh @@ -1,20 +1,14 @@ #!/bin/sh -## boot1.sh — standalone build of M1pp + hex2pp + catm. +## boot1.sh — standalone build of M1pp + hex2pp. ## -## Stage 1 of the README's chain: produces the self-hosted tools the -## rest of the boot chain runs on. M1pp + hex2pp are built from their -## .P1 sources via the seed M0 + hex2 chain; catm is then rebuilt from -## catm.P1pp through the freshly-built M1pp + hex2pp pipeline so later -## stages can run with zero boot0 dependencies. +## Stage 1 of the README's chain: produces the self-hosted M1pp + +## hex2pp pair, built from their .P1 sources via the seed M0 + hex2 +## chain. catm is rebuilt from catm.P1pp in boot2. ## ## ─── Inputs (sources, copied into staging) ──────────────────────────── ## M1pp/M1pp.P1 — M1pp expander, P1 source ## hex2pp/hex2pp.P1 — hex2pp assembler/linker, P1 source -## catm/catm.P1pp — catm, P1pp source ## P1/P1-$ARCH.M1 — pre-pruned per-arch P1 backend -## P1/P1-$ARCH.M1pp — per-arch P1pp backend (catm.P1pp) -## P1/P1.M1pp — arch-agnostic P1pp frontend -## P1/P1pp.P1pp — libp1pp standard library ## vendor/seed/$ARCH/ELF.hex2 — ELF header fragment (catm input) ## ## ─── Inputs (binaries from prior stages) ────────────────────────────── @@ -27,7 +21,6 @@ ## ─── Outputs ────────────────────────────────────────────────────────── ## build/$ARCH/boot1/M1pp — M1pp expander ELF ## build/$ARCH/boot1/hex2pp — hex2pp assembler/linker ELF -## build/$ARCH/boot1/catm — catm ELF (rebuilt via M1pp+hex2pp) ## ## Usage: scripts/boot1.sh <arch> ## <arch> ∈ {aarch64, amd64, riscv64} @@ -75,34 +68,21 @@ mkdir -p "$STAGE/in" "$STAGE/out" "$OUT" cp "$BOOT0/hex2" "$BOOT0/M0" "$BOOT0/catm" "$STAGE/in/" cp M1pp/M1pp.P1 "$STAGE/in/M1pp.P1" cp hex2pp/hex2pp.P1 "$STAGE/in/hex2pp.P1" -cp catm/catm.P1pp "$STAGE/in/catm.P1pp" cp "P1/P1-$ARCH.M1" "$STAGE/in/P1.M1" -cp "P1/P1-$ARCH.M1pp" "$STAGE/in/backend.M1pp" -cp P1/P1.M1pp "$STAGE/in/frontend.M1pp" -cp P1/P1pp.P1pp "$STAGE/in/libp1pp.P1pp" cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2" -# ── run the build pipelines ─────────────────────────────────────────── -# Two pipelines, run back to back in a single container: -# -# .P1 -> ELF (M0 + hex2) for M1pp and hex2pp: +# ── run the build pipeline ──────────────────────────────────────────── +# .P1 -> ELF via M0 + hex2 (seed), run twice — for M1pp and hex2pp: # catm combined.M1 P1.M1 src (per-arch backend prepended) # M0 combined.M1 -> prog.hex2 # catm linked.hex2 ELF.hex2 prog.hex2 # hex2 linked.hex2 -> ELF binary # -# .P1pp -> ELF (M1pp + hex2pp) for catm: -# catm combined.M1pp backend.M1pp frontend.M1pp libp1pp.P1pp catm.P1pp -# M1pp combined.M1pp -> expanded.hex2pp -# catm linked.hex2pp ELF.hex2 expanded.hex2pp -# hex2pp -B 0x600000 linked.hex2pp -> ELF binary -# # Stages everything through /tmp because the M0/hex2 seed tools do one # syscall per byte; virtiofs round-trips would dominate. cat > "$STAGE/in/run.sh" <<'RUN' #!/bin/sh set -eu -# .P1 -> ELF via M0 + hex2 (seed). Two of these — for M1pp and hex2pp. # Inlined (no function) so the container shell sees only sequential # exec — kaem-friendly. @@ -119,29 +99,19 @@ chmod +x /work/out/M1pp /work/in/catm /tmp/linked.hex2 /work/in/ELF.hex2 /tmp/prog.hex2 /work/in/hex2 /tmp/linked.hex2 /work/out/hex2pp chmod +x /work/out/hex2pp - -# .P1pp -> ELF via the just-built M1pp + hex2pp. catm-from-boot0 still -# does the M1/hex2 concatenation; the produced binary then replaces it -# in boot2/boot3. -/work/in/catm /tmp/combined.M1pp \ - /work/in/backend.M1pp /work/in/frontend.M1pp \ - /work/in/libp1pp.P1pp /work/in/catm.P1pp -/work/out/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp -/work/in/catm /tmp/linked.hex2pp /work/in/ELF.hex2 /tmp/expanded.hex2pp -/work/out/hex2pp -B 0x600000 /tmp/linked.hex2pp /work/out/catm RUN chmod +x "$STAGE/in/run.sh" -echo "[boot1 $ARCH] M1pp.P1 + hex2pp.P1 -> M1pp + hex2pp; catm.P1pp -> catm" +echo "[boot1 $ARCH] M1pp.P1 + hex2pp.P1 -> M1pp + hex2pp" podman run --rm -i --pull=never --platform "$PLATFORM" \ --tmpfs /tmp:size=512M \ -v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \ sh -eu /work/in/run.sh # ── copy outputs to final destination ───────────────────────────────── -for f in M1pp hex2pp catm; do +for f in M1pp hex2pp; do cp "$STAGE/out/$f" "$OUT/$f" chmod 0700 "$OUT/$f" done -echo "[boot1 $ARCH] OK -> $OUT/{M1pp, hex2pp, catm}" +echo "[boot1 $ARCH] OK -> $OUT/{M1pp, hex2pp}" diff --git a/scripts/boot2.sh b/scripts/boot2.sh @@ -1,11 +1,15 @@ #!/bin/sh -## boot2.sh — standalone build of scheme1 from scheme1.P1pp. +## boot2.sh — rebuild catm via M1pp+hex2pp, then build scheme1. ## -## Stage 2 of the README's chain: produces the scheme1 interpreter ELF. +## Stage 2 of the README's chain: first rebuilds catm from catm.P1pp +## via the freshly-built M1pp + hex2pp pipeline (replacing the seed +## boot0 catm so later stages run with zero boot0 dependencies); then +## builds the scheme1 interpreter from scheme1.P1pp using the new catm. ## End-to-end through M1pp + hex2pp (no seed M0/hex2 anywhere on the ## .P1pp pipeline). ## ## ─── Inputs (sources, copied into staging) ──────────────────────────── +## catm/catm.P1pp — catm, P1pp source ## scheme1/scheme1.P1pp — interpreter source ## P1/P1-$ARCH.M1pp — per-arch M1pp backend ## P1/P1.M1pp — arch-agnostic P1pp frontend @@ -13,13 +17,18 @@ ## vendor/seed/$ARCH/ELF.hex2 — ELF header fragment ## ## ─── Inputs (binaries from prior stages) ────────────────────────────── -## build/$ARCH/boot1/{M1pp, hex2pp, catm} — built by scripts/boot1.sh +## build/$ARCH/boot0/catm — built by scripts/boot0.sh +## (used only to bootstrap the +## catm.P1pp build; replaced by +## the new catm afterwards) +## build/$ARCH/boot1/{M1pp, hex2pp} — built by scripts/boot1.sh ## ## ─── Tools (in container) ───────────────────────────────────────────── ## busybox sh + cp + mkdir + chmod (scratch + busybox image only). -## Plus the boot1 binaries (M1pp, hex2pp, catm), staged in. +## Plus the boot0 catm and boot1 M1pp + hex2pp, staged in. ## ## ─── Outputs ────────────────────────────────────────────────────────── +## build/$ARCH/boot2/catm — catm ELF (rebuilt via M1pp+hex2pp) ## build/$ARCH/boot2/scheme1 — scheme1 interpreter ELF ## ## Usage: scripts/boot2.sh <arch> @@ -42,6 +51,7 @@ ROOT=$(cd "$(dirname "$0")/.." && pwd) cd "$ROOT" IMAGE=boot2-scratch:$ARCH +BOOT0=build/$ARCH/boot0 BOOT1=build/$ARCH/boot1 OUT=build/$ARCH/boot2 STAGE=build/$ARCH/.boot2-stage @@ -53,8 +63,12 @@ if ! podman image exists "$IMAGE"; then -f scripts/Containerfile.scratch scripts/ fi -# ── prerequisite: boot1 binaries must exist ─────────────────────────── -for bin in M1pp hex2pp catm; do +# ── prerequisite: prior-stage binaries must exist ───────────────────── +[ -x "$BOOT0/catm" ] || { + echo "[boot2 $ARCH] missing prerequisite: $BOOT0/catm (run scripts/boot0.sh $ARCH)" >&2 + exit 1 +} +for bin in M1pp hex2pp; do [ -x "$BOOT1/$bin" ] || { echo "[boot2 $ARCH] missing prerequisite: $BOOT1/$bin (run scripts/boot1.sh $ARCH)" >&2 exit 1 @@ -65,38 +79,62 @@ done rm -rf "$STAGE" mkdir -p "$STAGE/in" "$STAGE/out" "$OUT" -cp "$BOOT1/M1pp" "$BOOT1/hex2pp" "$BOOT1/catm" "$STAGE/in/" +cp "$BOOT0/catm" "$STAGE/in/catm" +cp "$BOOT1/M1pp" "$BOOT1/hex2pp" "$STAGE/in/" +cp catm/catm.P1pp "$STAGE/in/catm.P1pp" cp scheme1/scheme1.P1pp "$STAGE/in/scheme1.P1pp" cp "P1/P1-$ARCH.M1pp" "$STAGE/in/backend.M1pp" cp P1/P1.M1pp "$STAGE/in/frontend.M1pp" cp P1/P1pp.P1pp "$STAGE/in/libp1pp.P1pp" cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2" -# ── run the .P1pp -> ELF pipeline ───────────────────────────────────── -# catm combined.M1pp backend + frontend + libp1pp + scheme1.P1pp -# M1pp combined.M1pp -> expanded.hex2pp -# catm linked.hex2pp ELF.hex2 expanded.hex2pp -# hex2pp -B 0x600000 linked.hex2pp -> ELF binary +# ── run the build pipelines ─────────────────────────────────────────── +# Two .P1pp -> ELF pipelines, run back to back in a single container: +# +# catm.P1pp -> catm (boot0 catm bootstraps; produced binary then +# takes over for the rest of this script + boot3) +# catm combined.M1pp backend + frontend + libp1pp + catm.P1pp +# M1pp combined.M1pp -> expanded.hex2pp +# catm linked.hex2pp ELF.hex2 expanded.hex2pp +# hex2pp -B 0x600000 linked.hex2pp -> ELF binary +# +# scheme1.P1pp -> scheme1 (uses the just-built catm) +# (same shape, with /work/in/scheme1.P1pp in place of catm.P1pp) cat > "$STAGE/in/run.sh" <<'RUN' #!/bin/sh set -eu +# Inlined (no function) so the container shell sees only sequential +# exec — kaem-friendly. + +# catm.P1pp -> catm (bootstrap with boot0 catm) /work/in/catm /tmp/combined.M1pp \ /work/in/backend.M1pp /work/in/frontend.M1pp \ - /work/in/libp1pp.P1pp /work/in/scheme1.P1pp + /work/in/libp1pp.P1pp /work/in/catm.P1pp /work/in/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp /work/in/catm /tmp/linked.hex2pp /work/in/ELF.hex2 /tmp/expanded.hex2pp +/work/in/hex2pp -B 0x600000 /tmp/linked.hex2pp /work/out/catm +chmod +x /work/out/catm + +# scheme1.P1pp -> scheme1 (uses the just-built catm) +/work/out/catm /tmp/combined.M1pp \ + /work/in/backend.M1pp /work/in/frontend.M1pp \ + /work/in/libp1pp.P1pp /work/in/scheme1.P1pp +/work/in/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp +/work/out/catm /tmp/linked.hex2pp /work/in/ELF.hex2 /tmp/expanded.hex2pp /work/in/hex2pp -B 0x600000 /tmp/linked.hex2pp /work/out/scheme1 RUN chmod +x "$STAGE/in/run.sh" -echo "[boot2 $ARCH] scheme1.P1pp -> scheme1" +echo "[boot2 $ARCH] catm.P1pp -> catm; scheme1.P1pp -> scheme1" podman run --rm -i --pull=never --platform "$PLATFORM" \ --tmpfs /tmp:size=512M \ -v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \ sh -eu /work/in/run.sh -# ── copy output to final destination ────────────────────────────────── -cp "$STAGE/out/scheme1" "$OUT/scheme1" -chmod 0700 "$OUT/scheme1" +# ── copy outputs to final destination ───────────────────────────────── +for f in catm scheme1; do + cp "$STAGE/out/$f" "$OUT/$f" + chmod 0700 "$OUT/$f" +done -echo "[boot2 $ARCH] OK -> $OUT/scheme1" +echo "[boot2 $ARCH] OK -> $OUT/{catm, scheme1}" diff --git a/scripts/boot3.sh b/scripts/boot3.sh @@ -59,8 +59,8 @@ ## build/$ARCH/vendor/mes-libc/libc.flat.c — flattened mes-libc TU ## ## ─── Inputs (binaries from prior stages) ────────────────────────────── -## build/$ARCH/boot1/{M1pp, hex2pp, catm} — built by scripts/boot1.sh -## build/$ARCH/boot2/scheme1 — built by scripts/boot2.sh +## build/$ARCH/boot1/{M1pp, hex2pp} — built by scripts/boot1.sh +## build/$ARCH/boot2/{catm, scheme1} — built by scripts/boot2.sh ## ## ─── Tools ──────────────────────────────────────────────────────────── ## In container: scratch + busybox (no libc, no /etc, no resolver). @@ -143,9 +143,9 @@ if ! podman image exists "$IMAGE"; then fi # ── prerequisite: prior-stage binaries ──────────────────────────────── -[ -x "$BOOT1/catm" ] || { echo "[boot3 $ARCH] missing $BOOT1/catm (run scripts/boot1.sh $ARCH)" >&2; exit 1; } [ -x "$BOOT1/M1pp" ] || { echo "[boot3 $ARCH] missing $BOOT1/M1pp (run scripts/boot1.sh $ARCH)" >&2; exit 1; } [ -x "$BOOT1/hex2pp" ] || { echo "[boot3 $ARCH] missing $BOOT1/hex2pp (run scripts/boot1.sh $ARCH)" >&2; exit 1; } +[ -x "$BOOT2/catm" ] || { echo "[boot3 $ARCH] missing $BOOT2/catm (run scripts/boot2.sh $ARCH)" >&2; exit 1; } [ -x "$BOOT2/scheme1" ] || { echo "[boot3 $ARCH] missing $BOOT2/scheme1 (run scripts/boot2.sh $ARCH)" >&2; exit 1; } # ── prerequisite: host-flattened sources + unpacked tcc tree ────────── @@ -171,9 +171,9 @@ rm -f "$OUT/tcc0" "$OUT/tcc1" "$OUT/tcc2" \ "$OUT/start.o" "$OUT/sys_stubs.o" "$OUT/mem.o" "$OUT/libc.o" # Prior-stage binaries -cp "$BOOT1/catm" "$STAGE/in/catm" cp "$BOOT1/M1pp" "$STAGE/in/M1pp" cp "$BOOT1/hex2pp" "$STAGE/in/hex2pp" +cp "$BOOT2/catm" "$STAGE/in/catm" cp "$BOOT2/scheme1" "$STAGE/in/scheme1" # cc.scm bundle inputs