boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 46357be005c8c1f148a395e799779dd19dbaafaf
parent 8de854258970183eae63c3fe14bacf727a9e89c2
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon,  4 May 2026 09:19:49 -0700

catm.P1pp

Diffstat:
MREADME.md | 10+++++++---
Acatm/catm.P1pp | 166+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscripts/boot1.sh | 71++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Mscripts/boot2.sh | 25++++++++++++-------------
4 files changed, 235 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md @@ -8,18 +8,17 @@ (define catm (hex2 catm.hex2)) (define M0 (hex2 (catm ELF.hex2 M0.hex2))) -;; ── boot1.sh ── Self-host m1pp + hex2pp ────────────────────────────── +;; ── boot1.sh ── Self-host m1pp + hex2pp + catm ─────────────────────── ;; Compile+Link for arch-specific M1 source. (defn exe (M1-src) (hex2 (catm ELF.hex2 (M0 M1-src)))) ;; P1 — portable pseudo-ISA at the M1 level. ;; P1A.M1 is the arch-specific backend. -;; m1pp and hex2pp are themselves P1 programs; after this stage they +;; m1pp and hex2pp are themselves P1 programs; after these stages they ;; replace M0 + hex2 for everything downstream. (define m1pp (exe (catm P1A.M1 m1pp.P1))) (define hex2pp (exe (catm P1A.M1 hex2pp.P1))) -;; ── boot2.sh ── Scheme ─────────────────────────────────────────────── ;; P1pp — P1 rewritten with m1pp macros. Assemble any P1pp source via m1pp. ;; P1A.M1pp is the arch-specific backend, rewritten to use M1pp. ;; P1.M1pp is the arch-agnostic interface. @@ -28,6 +27,11 @@ (defn ppexe (src) (hex2pp (catm ELF.hex2 (m1pp (catm P1A.M1pp P1.M1pp P1pp.P1pp src))))) +;; Rebuild catm from P1pp; after this stage the seed boot0 catm is +;; no longer needed and boot2/boot3 run with only boot1 binaries. +(define catm (ppexe catm.P1pp)) + +;; ── boot2.sh ── Scheme ─────────────────────────────────────────────── (define scheme (ppexe scheme1.P1pp)) ;; ── boot3.sh ── C ──────────────────────────────────────────────────── diff --git a/catm/catm.P1pp b/catm/catm.P1pp @@ -0,0 +1,166 @@ +# catm.P1pp -- P1pp implementation of `catm`. +# +# Mirrors vendor/seed/$ARCH/catm.hex2 (Jeremiah Orians' stage0 catm): +# catm OUT IN1 IN2 ... -> writes the concatenation of IN1..INn to OUT. +# +# OUT is opened O_WRONLY|O_CREAT|O_TRUNC with mode 0640. With zero inputs +# OUT is created and left empty. Errors (open/read/write) print to stderr +# and exit nonzero. +# +# Built once M1pp + hex2pp exist (boot1) so later stages can replace the +# seed boot0 catm. The pipeline is the standard P1pp shape: +# +# catm P1-<arch>.M1pp P1.M1pp P1pp.P1pp catm/catm.P1pp -> combined.M1pp +# M1pp combined.M1pp -> expanded.hex2pp +# catm ELF.hex2 expanded.hex2pp -> linked.hex2pp +# hex2pp -B 0x600000 linked.hex2pp -> ELF binary + +%macro CATM_BUFSIZE() 0x100000 %endm # 1 MiB read/write chunk +%macro CATM_O_RDONLY() 0 %endm +%macro CATM_O_WRONLY_CREAT_TRUNC() 0x241 %endm # O_WRONLY|O_CREAT|O_TRUNC +%macro CATM_MODE_0640() 0x1A0 %endm # 0640 octal + +# 8-aligned NUL-terminated string (mirrors scheme1.P1pp's helper). +%macro cstr8(str) + str + 00 + .align 8 +%endm + +# argc / argv / out_fd / i live in callee-saved s0..s3 across the whole +# function. The inner copy/write loop spills in_fd / remaining-bytes / +# write-cursor to the frame because t-regs are clobbered by the sys_* +# %calls. %fn2 synthesizes a p1_main_FRAME struct from the local list. +%fn2(p1_main, {in_fd, remain, wptr}, { + %mov(s0, a0) # s0 = argc + %mov(s1, a1) # s1 = argv + + # Initialize buf_ptr from &ELF_end via libp1pp's arena helper. + %la(a0, &ELF_end) + %la(a1, &arena_table) + %la(a2, &arena_table_end) + %call(&init_arenas) + + # Need at least: catm OUT + %li(t0, 2) + %bltu(s0, t0, &.usage) + + # Open OUT = argv[1]. + %ld(a0, s1, 8) + %li(a1, %CATM_O_WRONLY_CREAT_TRUNC) + %li(a2, %CATM_MODE_0640) + %call(&sys_open) + %bltz(a0, &.open_fail) + %mov(s2, a0) # s2 = out_fd + + # i = 2; while (i < argc) { copy argv[i] -> out_fd; i++ } + %li(s3, 2) + :.arg_loop + %beq(s3, s0, &.arg_done) # i == argc -> done + + # in_path = argv[i] + %shli(t0, s3, 3) + %add(t0, s1, t0) + %ld(a0, t0, 0) + %li(a1, %CATM_O_RDONLY) + %li(a2, 0) + %call(&sys_open) + %bltz(a0, &.open_fail) + %stl(a0, in_fd) + + :.copy_loop + %ldl(a0, in_fd) + %ld_global(a1, &buf_ptr) + %li(a2, %CATM_BUFSIZE) + %call(&sys_read) + %beqz(a0, &.copy_done) # EOF + %bltz(a0, &.read_fail) + + # write_all(out_fd, buf, n) + %stl(a0, remain) + %ld_global(t0, &buf_ptr) + %stl(t0, wptr) + :.write_loop + %ldl(a2, remain) + %beqz(a2, &.write_done) + %mov(a0, s2) + %ldl(a1, wptr) + %call(&sys_write) + %bltz(a0, &.write_fail) + %ldl(t0, wptr) + %add(t0, t0, a0) + %stl(t0, wptr) + %ldl(t0, remain) + %sub(t0, t0, a0) + %stl(t0, remain) + %b(&.write_loop) + :.write_done + + %b(&.copy_loop) + :.copy_done + + %ldl(a0, in_fd) + %call(&sys_close) + + %addi(s3, s3, 1) + %b(&.arg_loop) + :.arg_done + + %mov(a0, s2) + %call(&sys_close) + %li(a0, 0) + %b(&.exit) + + :.usage + %la(a0, &msg_usage) + %call(&eprint_cstr) + %li(a0, 2) + %b(&.exit) + + :.open_fail + %la(a0, &msg_open_fail) + %call(&eprint_cstr) + %li(a0, 1) + %b(&.exit) + + :.read_fail + %la(a0, &msg_read_fail) + %call(&eprint_cstr) + %li(a0, 1) + %b(&.exit) + + :.write_fail + %la(a0, &msg_write_fail) + %call(&eprint_cstr) + %li(a0, 1) + + :.exit +}) + +# ---- read-only data ------------------------------------------------------- + +:msg_usage %cstr8("usage: catm OUT [IN ...] +") +:msg_open_fail %cstr8("catm: open failed +") +:msg_read_fail %cstr8("catm: read failed +") +:msg_write_fail %cstr8("catm: write failed +") + +# ---- BSS arena table ------------------------------------------------------ +# +# One arena past :ELF_end: a single 1 MiB read/write buffer. init_arenas +# walks the (slot, size) rows once at startup and writes &ELF_end into +# &buf_ptr. The ELF p_memsz reservation in vendor/seed/$ARCH/ELF.hex2 +# (512 MiB) covers it with plenty of headroom. + +:arena_table +%arena_entry(&buf_ptr, %CATM_BUFSIZE) +:arena_table_end + +# ---- BSS slots (file-resident, zero-initialized) -------------------------- + +:buf_ptr $(0) + +:ELF_end diff --git a/scripts/boot1.sh b/scripts/boot1.sh @@ -1,27 +1,33 @@ #!/bin/sh -## boot1.sh — standalone build of M1pp + hex2pp from .P1 sources. +## boot1.sh — standalone build of M1pp + hex2pp + catm. ## -## Stage 1 of the README's chain: produces the two self-hosted tools the -## rest of the boot chain runs on (M1pp expander + hex2pp assembler/ -## linker), built once via the seed M0 + hex2 chain. After this stage -## the seed binaries are no longer needed. +## Stage 1 of the README's chain: produces the self-hosted tools the +## rest of the boot chain runs on. M1pp + hex2pp are built from their +## .P1 sources via the seed M0 + hex2 chain; catm is then rebuilt from +## catm.P1pp through the freshly-built M1pp + hex2pp pipeline so later +## stages can run with zero boot0 dependencies. ## ## ─── Inputs (sources, copied into staging) ──────────────────────────── ## M1pp/M1pp.P1 — M1pp expander, P1 source ## hex2pp/hex2pp.P1 — hex2pp assembler/linker, P1 source +## catm/catm.P1pp — catm, P1pp source ## P1/P1-$ARCH.M1 — pre-pruned per-arch P1 backend +## P1/P1-$ARCH.M1pp — per-arch P1pp backend (catm.P1pp) +## P1/P1.M1pp — arch-agnostic P1pp frontend +## P1/P1pp.P1pp — libp1pp standard library ## vendor/seed/$ARCH/ELF.hex2 — ELF header fragment (catm input) ## ## ─── Inputs (binaries from prior stages) ────────────────────────────── ## build/$ARCH/boot0/{hex2, M0, catm} — built by scripts/boot0.sh ## ## ─── Tools (in container) ───────────────────────────────────────────── -## busybox sh + cat + cp + mkdir + chmod (scratch + busybox image only). +## busybox sh + cp + mkdir + chmod (scratch + busybox image only). ## Plus the boot0 binaries (M0, catm, hex2), staged in. ## ## ─── Outputs ────────────────────────────────────────────────────────── ## build/$ARCH/boot1/M1pp — M1pp expander ELF ## build/$ARCH/boot1/hex2pp — hex2pp assembler/linker ELF +## build/$ARCH/boot1/catm — catm ELF (rebuilt via M1pp+hex2pp) ## ## Usage: scripts/boot1.sh <arch> ## <arch> ∈ {aarch64, amd64, riscv64} @@ -69,40 +75,63 @@ mkdir -p "$STAGE/in" "$STAGE/out" "$OUT" cp "$BOOT0/hex2" "$BOOT0/M0" "$BOOT0/catm" "$STAGE/in/" cp M1pp/M1pp.P1 "$STAGE/in/M1pp.P1" cp hex2pp/hex2pp.P1 "$STAGE/in/hex2pp.P1" +cp catm/catm.P1pp "$STAGE/in/catm.P1pp" cp "P1/P1-$ARCH.M1" "$STAGE/in/P1.M1" +cp "P1/P1-$ARCH.M1pp" "$STAGE/in/backend.M1pp" +cp P1/P1.M1pp "$STAGE/in/frontend.M1pp" +cp P1/P1pp.P1pp "$STAGE/in/libp1pp.P1pp" cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2" -# ── run the per-source .P1 -> ELF pipeline twice ────────────────────── -# For each src in {M1pp.P1, hex2pp.P1}: -# cat P1.M1 src > combined.M1 (per-arch backend prepended to source) -# M0 combined.M1 -> prog.hex2 -# catm linked.hex2 ELF.hex2 prog.hex2 -# hex2 linked.hex2 -> ELF binary +# ── run the build pipelines ─────────────────────────────────────────── +# Two pipelines, run back to back in a single container: # -# Stages everything through /tmp because stage0 tools do one syscall per -# byte; virtiofs round-trips would dominate. -echo "[boot1 $ARCH] M1pp.P1 + hex2pp.P1 -> M1pp + hex2pp" +# .P1 -> ELF (M0 + hex2) for M1pp and hex2pp: +# catm combined.M1 P1.M1 src (per-arch backend prepended) +# M0 combined.M1 -> prog.hex2 +# catm linked.hex2 ELF.hex2 prog.hex2 +# hex2 linked.hex2 -> ELF binary +# +# .P1pp -> ELF (M1pp + hex2pp) for catm: +# catm combined.M1pp backend.M1pp frontend.M1pp libp1pp.P1pp catm.P1pp +# M1pp combined.M1pp -> expanded.hex2pp +# catm linked.hex2pp ELF.hex2 expanded.hex2pp +# hex2pp -B 0x600000 linked.hex2pp -> ELF binary +# +# Stages everything through /tmp because the M0/hex2 seed tools do one +# syscall per byte; virtiofs round-trips would dominate. +echo "[boot1 $ARCH] M1pp.P1 + hex2pp.P1 -> M1pp + hex2pp; catm.P1pp -> catm" podman run --rm -i --pull=never --platform "$PLATFORM" \ --tmpfs /tmp:size=512M \ -v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \ sh -eu -s <<'CONTAINER' -build_one() { +# .P1 -> ELF via M0 + hex2 (seed). +build_p1() { src=$1 out=$2 - cat /work/in/P1.M1 "/work/in/$src" > /tmp/combined.M1 + /work/in/catm /tmp/combined.M1 /work/in/P1.M1 "/work/in/$src" /work/in/M0 /tmp/combined.M1 /tmp/prog.hex2 /work/in/catm /tmp/linked.hex2 /work/in/ELF.hex2 /tmp/prog.hex2 /work/in/hex2 /tmp/linked.hex2 "/work/out/$out" chmod +x "/work/out/$out" } -build_one M1pp.P1 M1pp -build_one hex2pp.P1 hex2pp +build_p1 M1pp.P1 M1pp +build_p1 hex2pp.P1 hex2pp + +# .P1pp -> ELF via the just-built M1pp + hex2pp. catm-from-boot0 still +# does the M1/hex2 concatenation; the produced binary then replaces it +# in boot2/boot3. +/work/in/catm /tmp/combined.M1pp \ + /work/in/backend.M1pp /work/in/frontend.M1pp \ + /work/in/libp1pp.P1pp /work/in/catm.P1pp +/work/out/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp +/work/in/catm /tmp/linked.hex2pp /work/in/ELF.hex2 /tmp/expanded.hex2pp +/work/out/hex2pp -B 0x600000 /tmp/linked.hex2pp /work/out/catm CONTAINER # ── copy outputs to final destination ───────────────────────────────── -for f in M1pp hex2pp; do +for f in M1pp hex2pp catm; do cp "$STAGE/out/$f" "$OUT/$f" chmod 0700 "$OUT/$f" done -echo "[boot1 $ARCH] OK -> $OUT/{M1pp, hex2pp}" +echo "[boot1 $ARCH] OK -> $OUT/{M1pp, hex2pp, catm}" diff --git a/scripts/boot2.sh b/scripts/boot2.sh @@ -13,11 +13,11 @@ ## vendor/seed/$ARCH/ELF.hex2 — ELF header fragment ## ## ─── Inputs (binaries from prior stages) ────────────────────────────── -## build/$ARCH/boot1/{M1pp, hex2pp} — built by scripts/boot1.sh +## build/$ARCH/boot1/{M1pp, hex2pp, catm} — built by scripts/boot1.sh ## ## ─── Tools (in container) ───────────────────────────────────────────── -## busybox sh + cat + cp + mkdir + chmod (scratch + busybox image only). -## Plus the boot1 binaries (M1pp, hex2pp), staged in. +## busybox sh + cp + mkdir + chmod (scratch + busybox image only). +## Plus the boot1 binaries (M1pp, hex2pp, catm), staged in. ## ## ─── Outputs ────────────────────────────────────────────────────────── ## build/$ARCH/boot2/scheme1 — scheme1 interpreter ELF @@ -54,7 +54,7 @@ if ! podman image exists "$IMAGE"; then fi # ── prerequisite: boot1 binaries must exist ─────────────────────────── -for bin in M1pp hex2pp; do +for bin in M1pp hex2pp catm; do [ -x "$BOOT1/$bin" ] || { echo "[boot2 $ARCH] missing prerequisite: $BOOT1/$bin (run scripts/boot1.sh $ARCH)" >&2 exit 1 @@ -65,7 +65,7 @@ done rm -rf "$STAGE" mkdir -p "$STAGE/in" "$STAGE/out" "$OUT" -cp "$BOOT1/M1pp" "$BOOT1/hex2pp" "$STAGE/in/" +cp "$BOOT1/M1pp" "$BOOT1/hex2pp" "$BOOT1/catm" "$STAGE/in/" cp scheme1/scheme1.P1pp "$STAGE/in/scheme1.P1pp" cp "P1/P1-$ARCH.M1pp" "$STAGE/in/backend.M1pp" cp P1/P1.M1pp "$STAGE/in/frontend.M1pp" @@ -73,22 +73,21 @@ cp P1/P1pp.P1pp "$STAGE/in/libp1pp.P1pp" cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2" # ── run the .P1pp -> ELF pipeline ───────────────────────────────────── -# cat backend + frontend + libp1pp + scheme1.P1pp -> combined.M1pp -# M1pp combined.M1pp -> expanded.hex2pp -# cat ELF.hex2 expanded.hex2pp -> linked.hex2pp +# catm combined.M1pp backend + frontend + libp1pp + scheme1.P1pp +# M1pp combined.M1pp -> expanded.hex2pp +# catm linked.hex2pp ELF.hex2 expanded.hex2pp # hex2pp -B 0x600000 linked.hex2pp -> ELF binary echo "[boot2 $ARCH] scheme1.P1pp -> scheme1" podman run --rm -i --pull=never --platform "$PLATFORM" \ --tmpfs /tmp:size=512M \ -v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \ sh -eu -s <<'CONTAINER' -cat /work/in/backend.M1pp /work/in/frontend.M1pp \ - /work/in/libp1pp.P1pp /work/in/scheme1.P1pp \ - > /tmp/combined.M1pp +/work/in/catm /tmp/combined.M1pp \ + /work/in/backend.M1pp /work/in/frontend.M1pp \ + /work/in/libp1pp.P1pp /work/in/scheme1.P1pp /work/in/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp -cat /work/in/ELF.hex2 /tmp/expanded.hex2pp > /tmp/linked.hex2pp +/work/in/catm /tmp/linked.hex2pp /work/in/ELF.hex2 /tmp/expanded.hex2pp /work/in/hex2pp -B 0x600000 /tmp/linked.hex2pp /work/out/scheme1 -chmod +x /work/out/scheme1 CONTAINER # ── copy output to final destination ──────────────────────────────────