commit 46357be005c8c1f148a395e799779dd19dbaafaf
parent 8de854258970183eae63c3fe14bacf727a9e89c2
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 4 May 2026 09:19:49 -0700
catm.P1pp
Diffstat:
4 files changed, 235 insertions(+), 37 deletions(-)
diff --git a/README.md b/README.md
@@ -8,18 +8,17 @@
(define catm (hex2 catm.hex2))
(define M0 (hex2 (catm ELF.hex2 M0.hex2)))
-;; ── boot1.sh ── Self-host m1pp + hex2pp ──────────────────────────────
+;; ── boot1.sh ── Self-host m1pp + hex2pp + catm ───────────────────────
;; Compile+Link for arch-specific M1 source.
(defn exe (M1-src) (hex2 (catm ELF.hex2 (M0 M1-src))))
;; P1 — portable pseudo-ISA at the M1 level.
;; P1A.M1 is the arch-specific backend.
-;; m1pp and hex2pp are themselves P1 programs; after this stage they
+;; m1pp and hex2pp are themselves P1 programs; after these stages they
;; replace M0 + hex2 for everything downstream.
(define m1pp (exe (catm P1A.M1 m1pp.P1)))
(define hex2pp (exe (catm P1A.M1 hex2pp.P1)))
-;; ── boot2.sh ── Scheme ───────────────────────────────────────────────
;; P1pp — P1 rewritten with m1pp macros. Assemble any P1pp source via m1pp.
;; P1A.M1pp is the arch-specific backend, rewritten to use M1pp.
;; P1.M1pp is the arch-agnostic interface.
@@ -28,6 +27,11 @@
(defn ppexe (src)
(hex2pp (catm ELF.hex2 (m1pp (catm P1A.M1pp P1.M1pp P1pp.P1pp src)))))
+;; Rebuild catm from P1pp; after this stage the seed boot0 catm is
+;; no longer needed and boot2/boot3 run with only boot1 binaries.
+(define catm (ppexe catm.P1pp))
+
+;; ── boot2.sh ── Scheme ───────────────────────────────────────────────
(define scheme (ppexe scheme1.P1pp))
;; ── boot3.sh ── C ────────────────────────────────────────────────────
diff --git a/catm/catm.P1pp b/catm/catm.P1pp
@@ -0,0 +1,166 @@
+# catm.P1pp -- P1pp implementation of `catm`.
+#
+# Mirrors vendor/seed/$ARCH/catm.hex2 (Jeremiah Orians' stage0 catm):
+# catm OUT IN1 IN2 ... -> writes the concatenation of IN1..INn to OUT.
+#
+# OUT is opened O_WRONLY|O_CREAT|O_TRUNC with mode 0640. With zero inputs
+# OUT is created and left empty. Errors (open/read/write) print to stderr
+# and exit nonzero.
+#
+# Built once M1pp + hex2pp exist (boot1) so later stages can replace the
+# seed boot0 catm. The pipeline is the standard P1pp shape:
+#
+# catm P1-<arch>.M1pp P1.M1pp P1pp.P1pp catm/catm.P1pp -> combined.M1pp
+# M1pp combined.M1pp -> expanded.hex2pp
+# catm ELF.hex2 expanded.hex2pp -> linked.hex2pp
+# hex2pp -B 0x600000 linked.hex2pp -> ELF binary
+
+%macro CATM_BUFSIZE() 0x100000 %endm # 1 MiB read/write chunk
+%macro CATM_O_RDONLY() 0 %endm
+%macro CATM_O_WRONLY_CREAT_TRUNC() 0x241 %endm # O_WRONLY|O_CREAT|O_TRUNC
+%macro CATM_MODE_0640() 0x1A0 %endm # 0640 octal
+
+# 8-aligned NUL-terminated string (mirrors scheme1.P1pp's helper).
+%macro cstr8(str)
+ str
+ 00
+ .align 8
+%endm
+
+# argc / argv / out_fd / i live in callee-saved s0..s3 across the whole
+# function. The inner copy/write loop spills in_fd / remaining-bytes /
+# write-cursor to the frame because t-regs are clobbered by the sys_*
+# %calls. %fn2 synthesizes a p1_main_FRAME struct from the local list.
+%fn2(p1_main, {in_fd, remain, wptr}, {
+ %mov(s0, a0) # s0 = argc
+ %mov(s1, a1) # s1 = argv
+
+ # Initialize buf_ptr from &ELF_end via libp1pp's arena helper.
+ %la(a0, &ELF_end)
+ %la(a1, &arena_table)
+ %la(a2, &arena_table_end)
+ %call(&init_arenas)
+
+ # Need at least: catm OUT
+ %li(t0, 2)
+ %bltu(s0, t0, &.usage)
+
+ # Open OUT = argv[1].
+ %ld(a0, s1, 8)
+ %li(a1, %CATM_O_WRONLY_CREAT_TRUNC)
+ %li(a2, %CATM_MODE_0640)
+ %call(&sys_open)
+ %bltz(a0, &.open_fail)
+ %mov(s2, a0) # s2 = out_fd
+
+ # i = 2; while (i < argc) { copy argv[i] -> out_fd; i++ }
+ %li(s3, 2)
+ :.arg_loop
+ %beq(s3, s0, &.arg_done) # i == argc -> done
+
+ # in_path = argv[i]
+ %shli(t0, s3, 3)
+ %add(t0, s1, t0)
+ %ld(a0, t0, 0)
+ %li(a1, %CATM_O_RDONLY)
+ %li(a2, 0)
+ %call(&sys_open)
+ %bltz(a0, &.open_fail)
+ %stl(a0, in_fd)
+
+ :.copy_loop
+ %ldl(a0, in_fd)
+ %ld_global(a1, &buf_ptr)
+ %li(a2, %CATM_BUFSIZE)
+ %call(&sys_read)
+ %beqz(a0, &.copy_done) # EOF
+ %bltz(a0, &.read_fail)
+
+ # write_all(out_fd, buf, n)
+ %stl(a0, remain)
+ %ld_global(t0, &buf_ptr)
+ %stl(t0, wptr)
+ :.write_loop
+ %ldl(a2, remain)
+ %beqz(a2, &.write_done)
+ %mov(a0, s2)
+ %ldl(a1, wptr)
+ %call(&sys_write)
+ %bltz(a0, &.write_fail)
+ %ldl(t0, wptr)
+ %add(t0, t0, a0)
+ %stl(t0, wptr)
+ %ldl(t0, remain)
+ %sub(t0, t0, a0)
+ %stl(t0, remain)
+ %b(&.write_loop)
+ :.write_done
+
+ %b(&.copy_loop)
+ :.copy_done
+
+ %ldl(a0, in_fd)
+ %call(&sys_close)
+
+ %addi(s3, s3, 1)
+ %b(&.arg_loop)
+ :.arg_done
+
+ %mov(a0, s2)
+ %call(&sys_close)
+ %li(a0, 0)
+ %b(&.exit)
+
+ :.usage
+ %la(a0, &msg_usage)
+ %call(&eprint_cstr)
+ %li(a0, 2)
+ %b(&.exit)
+
+ :.open_fail
+ %la(a0, &msg_open_fail)
+ %call(&eprint_cstr)
+ %li(a0, 1)
+ %b(&.exit)
+
+ :.read_fail
+ %la(a0, &msg_read_fail)
+ %call(&eprint_cstr)
+ %li(a0, 1)
+ %b(&.exit)
+
+ :.write_fail
+ %la(a0, &msg_write_fail)
+ %call(&eprint_cstr)
+ %li(a0, 1)
+
+ :.exit
+})
+
+# ---- read-only data -------------------------------------------------------
+
+:msg_usage %cstr8("usage: catm OUT [IN ...]
+")
+:msg_open_fail %cstr8("catm: open failed
+")
+:msg_read_fail %cstr8("catm: read failed
+")
+:msg_write_fail %cstr8("catm: write failed
+")
+
+# ---- BSS arena table ------------------------------------------------------
+#
+# One arena past :ELF_end: a single 1 MiB read/write buffer. init_arenas
+# walks the (slot, size) rows once at startup and writes &ELF_end into
+# &buf_ptr. The ELF p_memsz reservation in vendor/seed/$ARCH/ELF.hex2
+# (512 MiB) covers it with plenty of headroom.
+
+:arena_table
+%arena_entry(&buf_ptr, %CATM_BUFSIZE)
+:arena_table_end
+
+# ---- BSS slots (file-resident, zero-initialized) --------------------------
+
+:buf_ptr $(0)
+
+:ELF_end
diff --git a/scripts/boot1.sh b/scripts/boot1.sh
@@ -1,27 +1,33 @@
#!/bin/sh
-## boot1.sh — standalone build of M1pp + hex2pp from .P1 sources.
+## boot1.sh — standalone build of M1pp + hex2pp + catm.
##
-## Stage 1 of the README's chain: produces the two self-hosted tools the
-## rest of the boot chain runs on (M1pp expander + hex2pp assembler/
-## linker), built once via the seed M0 + hex2 chain. After this stage
-## the seed binaries are no longer needed.
+## Stage 1 of the README's chain: produces the self-hosted tools the
+## rest of the boot chain runs on. M1pp + hex2pp are built from their
+## .P1 sources via the seed M0 + hex2 chain; catm is then rebuilt from
+## catm.P1pp through the freshly-built M1pp + hex2pp pipeline so later
+## stages can run with zero boot0 dependencies.
##
## ─── Inputs (sources, copied into staging) ────────────────────────────
## M1pp/M1pp.P1 — M1pp expander, P1 source
## hex2pp/hex2pp.P1 — hex2pp assembler/linker, P1 source
+## catm/catm.P1pp — catm, P1pp source
## P1/P1-$ARCH.M1 — pre-pruned per-arch P1 backend
+## P1/P1-$ARCH.M1pp — per-arch P1pp backend (catm.P1pp)
+## P1/P1.M1pp — arch-agnostic P1pp frontend
+## P1/P1pp.P1pp — libp1pp standard library
## vendor/seed/$ARCH/ELF.hex2 — ELF header fragment (catm input)
##
## ─── Inputs (binaries from prior stages) ──────────────────────────────
## build/$ARCH/boot0/{hex2, M0, catm} — built by scripts/boot0.sh
##
## ─── Tools (in container) ─────────────────────────────────────────────
-## busybox sh + cat + cp + mkdir + chmod (scratch + busybox image only).
+## busybox sh + cp + mkdir + chmod (scratch + busybox image only).
## Plus the boot0 binaries (M0, catm, hex2), staged in.
##
## ─── Outputs ──────────────────────────────────────────────────────────
## build/$ARCH/boot1/M1pp — M1pp expander ELF
## build/$ARCH/boot1/hex2pp — hex2pp assembler/linker ELF
+## build/$ARCH/boot1/catm — catm ELF (rebuilt via M1pp+hex2pp)
##
## Usage: scripts/boot1.sh <arch>
## <arch> ∈ {aarch64, amd64, riscv64}
@@ -69,40 +75,63 @@ mkdir -p "$STAGE/in" "$STAGE/out" "$OUT"
cp "$BOOT0/hex2" "$BOOT0/M0" "$BOOT0/catm" "$STAGE/in/"
cp M1pp/M1pp.P1 "$STAGE/in/M1pp.P1"
cp hex2pp/hex2pp.P1 "$STAGE/in/hex2pp.P1"
+cp catm/catm.P1pp "$STAGE/in/catm.P1pp"
cp "P1/P1-$ARCH.M1" "$STAGE/in/P1.M1"
+cp "P1/P1-$ARCH.M1pp" "$STAGE/in/backend.M1pp"
+cp P1/P1.M1pp "$STAGE/in/frontend.M1pp"
+cp P1/P1pp.P1pp "$STAGE/in/libp1pp.P1pp"
cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2"
-# ── run the per-source .P1 -> ELF pipeline twice ──────────────────────
-# For each src in {M1pp.P1, hex2pp.P1}:
-# cat P1.M1 src > combined.M1 (per-arch backend prepended to source)
-# M0 combined.M1 -> prog.hex2
-# catm linked.hex2 ELF.hex2 prog.hex2
-# hex2 linked.hex2 -> ELF binary
+# ── run the build pipelines ───────────────────────────────────────────
+# Two pipelines, run back to back in a single container:
#
-# Stages everything through /tmp because stage0 tools do one syscall per
-# byte; virtiofs round-trips would dominate.
-echo "[boot1 $ARCH] M1pp.P1 + hex2pp.P1 -> M1pp + hex2pp"
+# .P1 -> ELF (M0 + hex2) for M1pp and hex2pp:
+# catm combined.M1 P1.M1 src (per-arch backend prepended)
+# M0 combined.M1 -> prog.hex2
+# catm linked.hex2 ELF.hex2 prog.hex2
+# hex2 linked.hex2 -> ELF binary
+#
+# .P1pp -> ELF (M1pp + hex2pp) for catm:
+# catm combined.M1pp backend.M1pp frontend.M1pp libp1pp.P1pp catm.P1pp
+# M1pp combined.M1pp -> expanded.hex2pp
+# catm linked.hex2pp ELF.hex2 expanded.hex2pp
+# hex2pp -B 0x600000 linked.hex2pp -> ELF binary
+#
+# Stages everything through /tmp because the M0/hex2 seed tools do one
+# syscall per byte; virtiofs round-trips would dominate.
+echo "[boot1 $ARCH] M1pp.P1 + hex2pp.P1 -> M1pp + hex2pp; catm.P1pp -> catm"
podman run --rm -i --pull=never --platform "$PLATFORM" \
--tmpfs /tmp:size=512M \
-v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
sh -eu -s <<'CONTAINER'
-build_one() {
+# .P1 -> ELF via M0 + hex2 (seed).
+build_p1() {
src=$1
out=$2
- cat /work/in/P1.M1 "/work/in/$src" > /tmp/combined.M1
+ /work/in/catm /tmp/combined.M1 /work/in/P1.M1 "/work/in/$src"
/work/in/M0 /tmp/combined.M1 /tmp/prog.hex2
/work/in/catm /tmp/linked.hex2 /work/in/ELF.hex2 /tmp/prog.hex2
/work/in/hex2 /tmp/linked.hex2 "/work/out/$out"
chmod +x "/work/out/$out"
}
-build_one M1pp.P1 M1pp
-build_one hex2pp.P1 hex2pp
+build_p1 M1pp.P1 M1pp
+build_p1 hex2pp.P1 hex2pp
+
+# .P1pp -> ELF via the just-built M1pp + hex2pp. catm-from-boot0 still
+# does the M1/hex2 concatenation; the produced binary then replaces it
+# in boot2/boot3.
+/work/in/catm /tmp/combined.M1pp \
+ /work/in/backend.M1pp /work/in/frontend.M1pp \
+ /work/in/libp1pp.P1pp /work/in/catm.P1pp
+/work/out/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp
+/work/in/catm /tmp/linked.hex2pp /work/in/ELF.hex2 /tmp/expanded.hex2pp
+/work/out/hex2pp -B 0x600000 /tmp/linked.hex2pp /work/out/catm
CONTAINER
# ── copy outputs to final destination ─────────────────────────────────
-for f in M1pp hex2pp; do
+for f in M1pp hex2pp catm; do
cp "$STAGE/out/$f" "$OUT/$f"
chmod 0700 "$OUT/$f"
done
-echo "[boot1 $ARCH] OK -> $OUT/{M1pp, hex2pp}"
+echo "[boot1 $ARCH] OK -> $OUT/{M1pp, hex2pp, catm}"
diff --git a/scripts/boot2.sh b/scripts/boot2.sh
@@ -13,11 +13,11 @@
## vendor/seed/$ARCH/ELF.hex2 — ELF header fragment
##
## ─── Inputs (binaries from prior stages) ──────────────────────────────
-## build/$ARCH/boot1/{M1pp, hex2pp} — built by scripts/boot1.sh
+## build/$ARCH/boot1/{M1pp, hex2pp, catm} — built by scripts/boot1.sh
##
## ─── Tools (in container) ─────────────────────────────────────────────
-## busybox sh + cat + cp + mkdir + chmod (scratch + busybox image only).
-## Plus the boot1 binaries (M1pp, hex2pp), staged in.
+## busybox sh + cp + mkdir + chmod (scratch + busybox image only).
+## Plus the boot1 binaries (M1pp, hex2pp, catm), staged in.
##
## ─── Outputs ──────────────────────────────────────────────────────────
## build/$ARCH/boot2/scheme1 — scheme1 interpreter ELF
@@ -54,7 +54,7 @@ if ! podman image exists "$IMAGE"; then
fi
# ── prerequisite: boot1 binaries must exist ───────────────────────────
-for bin in M1pp hex2pp; do
+for bin in M1pp hex2pp catm; do
[ -x "$BOOT1/$bin" ] || {
echo "[boot2 $ARCH] missing prerequisite: $BOOT1/$bin (run scripts/boot1.sh $ARCH)" >&2
exit 1
@@ -65,7 +65,7 @@ done
rm -rf "$STAGE"
mkdir -p "$STAGE/in" "$STAGE/out" "$OUT"
-cp "$BOOT1/M1pp" "$BOOT1/hex2pp" "$STAGE/in/"
+cp "$BOOT1/M1pp" "$BOOT1/hex2pp" "$BOOT1/catm" "$STAGE/in/"
cp scheme1/scheme1.P1pp "$STAGE/in/scheme1.P1pp"
cp "P1/P1-$ARCH.M1pp" "$STAGE/in/backend.M1pp"
cp P1/P1.M1pp "$STAGE/in/frontend.M1pp"
@@ -73,22 +73,21 @@ cp P1/P1pp.P1pp "$STAGE/in/libp1pp.P1pp"
cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2"
# ── run the .P1pp -> ELF pipeline ─────────────────────────────────────
-# cat backend + frontend + libp1pp + scheme1.P1pp -> combined.M1pp
-# M1pp combined.M1pp -> expanded.hex2pp
-# cat ELF.hex2 expanded.hex2pp -> linked.hex2pp
+# catm combined.M1pp backend + frontend + libp1pp + scheme1.P1pp
+# M1pp combined.M1pp -> expanded.hex2pp
+# catm linked.hex2pp ELF.hex2 expanded.hex2pp
# hex2pp -B 0x600000 linked.hex2pp -> ELF binary
echo "[boot2 $ARCH] scheme1.P1pp -> scheme1"
podman run --rm -i --pull=never --platform "$PLATFORM" \
--tmpfs /tmp:size=512M \
-v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
sh -eu -s <<'CONTAINER'
-cat /work/in/backend.M1pp /work/in/frontend.M1pp \
- /work/in/libp1pp.P1pp /work/in/scheme1.P1pp \
- > /tmp/combined.M1pp
+/work/in/catm /tmp/combined.M1pp \
+ /work/in/backend.M1pp /work/in/frontend.M1pp \
+ /work/in/libp1pp.P1pp /work/in/scheme1.P1pp
/work/in/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp
-cat /work/in/ELF.hex2 /tmp/expanded.hex2pp > /tmp/linked.hex2pp
+/work/in/catm /tmp/linked.hex2pp /work/in/ELF.hex2 /tmp/expanded.hex2pp
/work/in/hex2pp -B 0x600000 /tmp/linked.hex2pp /work/out/scheme1
-chmod +x /work/out/scheme1
CONTAINER
# ── copy output to final destination ──────────────────────────────────