commit c352715fd06e83f2dc4ebd6094943a4c5626a203
parent 505d30e7b343b771d0225501634bf5e9c1ca1b7f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 20 Apr 2026 14:47:16 -0700
Bootstrap the P1 toolchain from hex0-seed instead of gcc
Replace the alpine+gcc builder that compiled mescc-tools M1 and hex2
with bootstrap.sh, which drives the stage0-posix phase chain
hex0-seed -> hex0 -> hex1 -> hex2-0 -> catm -> M0 per arch. Nothing
above M0 is built, so no C compiler is involved anywhere in the flow.
Makefile collapses to a single alpine image per target arch: toolchain
build, assembly via M0, linking via hex2-0, and running the final ELF
all run under the target Linux ABI. The Containerfile is no longer
needed.
Diffstat:
| D | Containerfile | | | 6 | ------ |
| M | Makefile | | | 123 | +++++++++++++++++++++++++++++++++---------------------------------------------- |
| A | bootstrap.sh | | | 67 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
3 files changed, 119 insertions(+), 77 deletions(-)
diff --git a/Containerfile b/Containerfile
@@ -1,6 +0,0 @@
-# Builder image: alpine + gcc, used only to compile M1 and hex2 statically.
-# The compiled binaries are then run inside a pristine alpine:latest with
-# nothing else added.
-FROM alpine:latest
-RUN apk add --no-cache gcc musl-dev
-WORKDIR /work
diff --git a/Makefile b/Makefile
@@ -1,18 +1,15 @@
-# lispcc — P1 portable pseudo-ISA demo.
+# lispcc — P1 portable pseudo-ISA demo, built from seed0.
#
-# hello.M1 is written in P1 mnemonics and assembles unchanged for all
-# three targets. The backing defs file (p1_<arch>.M1) is the only per-
-# arch source. See P1.md.
+# The toolchain (M0, hex2-0, catm) is bootstrapped per arch from a 400-byte
+# hand-assembled hex0-seed via the stage0-posix chain: no C compiler is
+# involved, not even the pre-M2-Planet cc_<arch>. See bootstrap.sh.
#
-# Two-image setup:
-# - lispcc-builder (alpine + gcc, host-arch): builds M1 and hex2
-# statically. These tools run natively regardless of ARCH.
-# - alpine:latest (pristine, per-arch): runs the assembled hello under
-# the target's Linux ABI. Non-native targets go through podman's
-# binfmt + qemu-user path.
+# One podman image is used for everything: alpine at the target arch. The
+# seed binaries are target-arch ELF, so the whole chain — toolchain build,
+# assembly, linking, and running the final program — runs under the target
+# Linux ABI. Foreign arches transparently use podman's binfmt + qemu-user.
#
# Usage:
-# make image Build the builder image (one-time)
# make Build hello for default ARCH=aarch64
# make ARCH=amd64 Targeting amd64
# make PROG=demo Build the broader-ISA demo program
@@ -23,11 +20,10 @@
# --- Configuration ---------------------------------------------------------
-# PROG = basename of the <prog>.M1 source to assemble. hello / demo.
PROG ?= hello
ARCH ?= aarch64
-# Map P1 ARCH -> Linux-platform tag for the runtime container.
+# Map P1 ARCH -> Linux-platform tag for the container.
PLATFORM_aarch64 := linux/arm64
PLATFORM_amd64 := linux/amd64
PLATFORM_riscv64 := linux/riscv64
@@ -36,83 +32,68 @@ ifeq ($(PLATFORM),)
$(error ARCH '$(ARCH)' not supported — use aarch64, amd64, or riscv64)
endif
-HOST_ROOT := $(abspath $(CURDIR)/..)
-TOOLCHAIN_SRC := /work/live-bootstrap/seed/stage0-posix/mescc-tools
-BUILDER_IMAGE := lispcc-builder:latest
-RUNTIME_IMAGE := docker.io/library/alpine:latest
+UPSTREAM := $(abspath $(CURDIR)/../live-bootstrap)
+RUNTIME_IMAGE := public.ecr.aws/docker/library/alpine:latest
-OUT_DIR := build/$(ARCH)
+OUT_DIR := build/$(ARCH)
+TOOLS_DIR := $(OUT_DIR)/tools
-# Builder: alpine + gcc at host arch, mounts the bootstrap-explore parent
-# dir at /work so we can reach upstream mescc-tools C source.
-PODMAN_BUILD := podman run --rm \
- -v $(HOST_ROOT):/work \
+# Two container views:
+# PODMAN_BOOTSTRAP — toolchain build. Needs read-only access to stage0-posix
+# under ../live-bootstrap; writes only into build/$(ARCH)/tools.
+# PODMAN — assemble / link / run. Sees only the lispcc dir.
+PODMAN_BOOTSTRAP := podman run --rm --platform $(PLATFORM) \
+ -v $(UPSTREAM):/work/live-bootstrap:ro \
+ -v $(CURDIR):/work/lispcc \
-w /work/lispcc \
- $(BUILDER_IMAGE)
-
-# Native alpine — used to run the static M1/hex2 binaries (which are
-# built for host arch and don't care about P1 target arch).
-PODMAN_RUN_NATIVE := podman run --rm \
- -v $(CURDIR):/work \
- -w /work \
$(RUNTIME_IMAGE)
-# Target alpine — used to run the generated hello binary on its own
-# Linux ABI. Foreign arches transparently use binfmt + qemu-user.
-PODMAN_RUN_TARGET := podman run --rm --platform $(PLATFORM) \
+PODMAN := podman run --rm --platform $(PLATFORM) \
-v $(CURDIR):/work \
-w /work \
$(RUNTIME_IMAGE)
-# Static linking so M1/hex2 have no libc dep at runtime.
-CFLAGS := -D_GNU_SOURCE -std=c99 -ggdb -fno-common -static
-
# --- Targets ---------------------------------------------------------------
-.PHONY: all image toolchain run run-all clean
+.PHONY: all toolchain run run-all clean
all: $(OUT_DIR)/$(PROG)
-image:
- podman build -t $(BUILDER_IMAGE) .
-
-toolchain: build/M1 build/hex2
+toolchain: $(TOOLS_DIR)/M0
-build $(OUT_DIR):
+$(OUT_DIR) $(TOOLS_DIR):
mkdir -p $@
-build/M1: | build
- $(PODMAN_BUILD) gcc $(CFLAGS) \
- $(TOOLCHAIN_SRC)/M1-macro.c \
- $(TOOLCHAIN_SRC)/stringify.c \
- $(TOOLCHAIN_SRC)/M2libc/bootstrappable.c \
- -o build/M1
-
-build/hex2: | build
- $(PODMAN_BUILD) gcc $(CFLAGS) \
- $(TOOLCHAIN_SRC)/hex2.c \
- $(TOOLCHAIN_SRC)/hex2_linker.c \
- $(TOOLCHAIN_SRC)/hex2_word.c \
- $(TOOLCHAIN_SRC)/M2libc/bootstrappable.c \
- -o build/hex2
-
-$(OUT_DIR)/$(PROG).hex2: $(PROG).M1 p1_$(ARCH).M1 build/M1 | $(OUT_DIR)
- $(PODMAN_RUN_NATIVE) ./build/M1 \
- -f p1_$(ARCH).M1 \
- -f $(PROG).M1 \
- --little-endian --architecture $(ARCH) \
- -o $(OUT_DIR)/$(PROG).hex2
-
-$(OUT_DIR)/$(PROG): $(OUT_DIR)/$(PROG).hex2 ELF-$(ARCH).hex2 build/hex2
- $(PODMAN_RUN_NATIVE) ./build/hex2 \
- -f ELF-$(ARCH).hex2 \
- -f $(OUT_DIR)/$(PROG).hex2 \
- --little-endian --architecture $(ARCH) \
- --base-address 0x400000 \
- -o $(OUT_DIR)/$(PROG)
+# Bootstrap M0, hex2-0, catm (and the throwaway hex0/hex1) from hex0-seed.
+# One shot per arch — see bootstrap.sh for the phase-by-phase chain.
+#
+# Grouped target (&:) so all five outputs come from a single recipe run.
+$(TOOLS_DIR)/M0 $(TOOLS_DIR)/hex2-0 $(TOOLS_DIR)/catm $(TOOLS_DIR)/hex0 $(TOOLS_DIR)/hex1 &: bootstrap.sh | $(TOOLS_DIR)
+ $(PODMAN_BOOTSTRAP) sh bootstrap.sh $(ARCH) /work/lispcc/$(TOOLS_DIR)
+
+# Assemble: combine per-arch defs + program, feed to M0.
+#
+# M0 takes a single positional input (no -f flag), so we catm the two
+# sources together first. The intermediate .combined.M1 is kept in OUT_DIR
+# so it gets cleaned along with everything else.
+$(OUT_DIR)/$(PROG).hex2: $(PROG).M1 p1_$(ARCH).M1 $(TOOLS_DIR)/M0 $(TOOLS_DIR)/catm | $(OUT_DIR)
+ $(PODMAN) sh -ec ' \
+ $(TOOLS_DIR)/catm $(OUT_DIR)/$(PROG).combined.M1 p1_$(ARCH).M1 $(PROG).M1 ; \
+ $(TOOLS_DIR)/M0 $(OUT_DIR)/$(PROG).combined.M1 $(OUT_DIR)/$(PROG).hex2'
+
+# Link: prepend the ELF header and feed to hex2-0.
+#
+# hex2-0 is strictly positional too, so again catm first. hex2-0 hardcodes
+# base address 0x00600000 (no --base-address flag), which is why the ELF
+# header references `&ELF_base` symbolically rather than baking in a
+# concrete VA — the header travels to whatever base the linker chose.
+$(OUT_DIR)/$(PROG): $(OUT_DIR)/$(PROG).hex2 ELF-$(ARCH).hex2 $(TOOLS_DIR)/hex2-0 $(TOOLS_DIR)/catm
+ $(PODMAN) sh -ec ' \
+ $(TOOLS_DIR)/catm $(OUT_DIR)/$(PROG).linked.hex2 ELF-$(ARCH).hex2 $(OUT_DIR)/$(PROG).hex2 ; \
+ $(TOOLS_DIR)/hex2-0 $(OUT_DIR)/$(PROG).linked.hex2 $(OUT_DIR)/$(PROG)'
run: $(OUT_DIR)/$(PROG)
- $(PODMAN_RUN_TARGET) ./$(OUT_DIR)/$(PROG)
+ $(PODMAN) ./$(OUT_DIR)/$(PROG)
# `-` prefix: continue past non-zero exit. demo.M1 exits with the computed
# result (5), which is a legitimate program outcome, not a make failure.
diff --git a/bootstrap.sh b/bootstrap.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+# Bootstrap the seed0 toolchain we need to assemble + link P1 programs:
+# hex0-seed -> hex0 -> hex1 -> hex2-0 -> catm -> M0.
+#
+# Runs inside a target-arch alpine container. All produced binaries are
+# target-arch Linux ELF and are written to $OUT as: hex0 hex1 hex2-0 catm M0.
+#
+# The only non-source input is bootstrap-seeds/POSIX/<Arch>/hex0-seed (~400
+# bytes, hand-assembled, shipped by stage0-posix). Nothing above M0 is built,
+# which is the whole point — no C compiler is involved, not even cc_<arch>.
+#
+# Phase map (stage0-posix mescc-tools-{seed,mini}-kaem.kaem phases 0-3):
+# 0) hex0-seed + hex0_<A>.hex0 -> hex0
+# 1) hex0 + hex1_<A>.hex0 -> hex1
+# 2) hex1 + hex2_<A>.hex1 -> hex2-0
+# 2b) (hex1|hex2-0) + catm_<A>.(hex1|hex2) -> catm (arch-specific)
+# 3a) catm : ELF header + M0_<A>.hex2 -> M0.hex2
+# 3b) hex2-0 : M0.hex2 -> M0
+#
+# Usage: bootstrap.sh <arch> <out-dir>
+# arch: aarch64 | amd64 | riscv64
+# out-dir: absolute path where tool binaries should land
+set -eu
+
+ARCH=$1
+OUT=$2
+
+# Map lispcc's lowercase ARCH to stage0-posix's dir name and the two files
+# whose extensions differ across arches (catm's source + assembler).
+case "$ARCH" in
+ aarch64) A=AArch64 ; CATM_SRC=catm_AArch64.hex1 ; CATM_ASM=hex1 ;;
+ amd64) A=AMD64 ; CATM_SRC=catm_AMD64.hex2 ; CATM_ASM=hex2-0 ;;
+ riscv64) A=riscv64 ; CATM_SRC=catm_riscv64.hex2 ; CATM_ASM=hex2-0 ;;
+ *) echo "bootstrap.sh: unsupported arch '$ARCH'" >&2 ; exit 1 ;;
+esac
+
+S=/work/live-bootstrap/seed/stage0-posix
+mkdir -p "$OUT"
+cd "$S"
+
+# qemu-user amd64 workaround: the shipped hex0-seed and the hex0 it produces
+# both have a program header with p_flags=0x01 (PF_X only, no PF_R). A native
+# Linux kernel treats x86-64's hardware coercion as making this loadable, but
+# qemu-user's stricter ELF loader faults when fetching instructions from an
+# unreadable segment. Copy the seed to a writable location and flip p_flags
+# to 0x05 (PF_R|PF_X) before use. All later seed sources already use 0x07.
+#
+# This only affects foreign-arch builds on non-amd64 hosts; on a native amd64
+# host the patch is a no-op (binary would load fine either way).
+SEED=./bootstrap-seeds/POSIX/"$A"/hex0-seed
+if [ "$ARCH" = amd64 ]; then
+ cp "$SEED" "$OUT"/hex0-seed
+ printf '\5' | dd of="$OUT"/hex0-seed bs=1 seek=68 count=1 conv=notrunc status=none
+ chmod +x "$OUT"/hex0-seed
+ SEED="$OUT"/hex0-seed
+fi
+
+"$SEED" "$A"/hex0_"$A".hex0 "$OUT"/hex0
+if [ "$ARCH" = amd64 ]; then
+ printf '\5' | dd of="$OUT"/hex0 bs=1 seek=68 count=1 conv=notrunc status=none
+fi
+
+"$OUT"/hex0 "$A"/hex1_"$A".hex0 "$OUT"/hex1
+"$OUT"/hex1 "$A"/hex2_"$A".hex1 "$OUT"/hex2-0
+"$OUT"/"$CATM_ASM" "$A"/"$CATM_SRC" "$OUT"/catm
+"$OUT"/catm "$OUT"/M0.hex2 "$A"/ELF-"$ARCH".hex2 "$A"/M0_"$A".hex2
+"$OUT"/hex2-0 "$OUT"/M0.hex2 "$OUT"/M0