boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs

commit c352715fd06e83f2dc4ebd6094943a4c5626a203
parent 505d30e7b343b771d0225501634bf5e9c1ca1b7f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 20 Apr 2026 14:47:16 -0700

Bootstrap the P1 toolchain from hex0-seed instead of gcc

Replace the alpine+gcc builder that compiled mescc-tools M1 and hex2
with bootstrap.sh, which drives the stage0-posix phase chain
hex0-seed -> hex0 -> hex1 -> hex2-0 -> catm -> M0 per arch. Nothing
above M0 is built, so no C compiler is involved anywhere in the flow.

Makefile collapses to a single alpine image per target arch: toolchain
build, assembly via M0, linking via hex2-0, and running the final ELF
all run under the target Linux ABI. The Containerfile is no longer
needed.

Diffstat:
DContainerfile | 6------
MMakefile | 123+++++++++++++++++++++++++++++++++----------------------------------------------
Abootstrap.sh | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 119 insertions(+), 77 deletions(-)

diff --git a/Containerfile b/Containerfile @@ -1,6 +0,0 @@ -# Builder image: alpine + gcc, used only to compile M1 and hex2 statically. -# The compiled binaries are then run inside a pristine alpine:latest with -# nothing else added. -FROM alpine:latest -RUN apk add --no-cache gcc musl-dev -WORKDIR /work diff --git a/Makefile b/Makefile @@ -1,18 +1,15 @@ -# lispcc — P1 portable pseudo-ISA demo. +# lispcc — P1 portable pseudo-ISA demo, built from seed0. # -# hello.M1 is written in P1 mnemonics and assembles unchanged for all -# three targets. The backing defs file (p1_<arch>.M1) is the only per- -# arch source. See P1.md. +# The toolchain (M0, hex2-0, catm) is bootstrapped per arch from a 400-byte +# hand-assembled hex0-seed via the stage0-posix chain: no C compiler is +# involved, not even the pre-M2-Planet cc_<arch>. See bootstrap.sh. # -# Two-image setup: -# - lispcc-builder (alpine + gcc, host-arch): builds M1 and hex2 -# statically. These tools run natively regardless of ARCH. -# - alpine:latest (pristine, per-arch): runs the assembled hello under -# the target's Linux ABI. Non-native targets go through podman's -# binfmt + qemu-user path. +# One podman image is used for everything: alpine at the target arch. The +# seed binaries are target-arch ELF, so the whole chain — toolchain build, +# assembly, linking, and running the final program — runs under the target +# Linux ABI. Foreign arches transparently use podman's binfmt + qemu-user. # # Usage: -# make image Build the builder image (one-time) # make Build hello for default ARCH=aarch64 # make ARCH=amd64 Targeting amd64 # make PROG=demo Build the broader-ISA demo program @@ -23,11 +20,10 @@ # --- Configuration --------------------------------------------------------- -# PROG = basename of the <prog>.M1 source to assemble. hello / demo. PROG ?= hello ARCH ?= aarch64 -# Map P1 ARCH -> Linux-platform tag for the runtime container. +# Map P1 ARCH -> Linux-platform tag for the container. PLATFORM_aarch64 := linux/arm64 PLATFORM_amd64 := linux/amd64 PLATFORM_riscv64 := linux/riscv64 @@ -36,83 +32,68 @@ ifeq ($(PLATFORM),) $(error ARCH '$(ARCH)' not supported — use aarch64, amd64, or riscv64) endif -HOST_ROOT := $(abspath $(CURDIR)/..) -TOOLCHAIN_SRC := /work/live-bootstrap/seed/stage0-posix/mescc-tools -BUILDER_IMAGE := lispcc-builder:latest -RUNTIME_IMAGE := docker.io/library/alpine:latest +UPSTREAM := $(abspath $(CURDIR)/../live-bootstrap) +RUNTIME_IMAGE := public.ecr.aws/docker/library/alpine:latest -OUT_DIR := build/$(ARCH) +OUT_DIR := build/$(ARCH) +TOOLS_DIR := $(OUT_DIR)/tools -# Builder: alpine + gcc at host arch, mounts the bootstrap-explore parent -# dir at /work so we can reach upstream mescc-tools C source. -PODMAN_BUILD := podman run --rm \ - -v $(HOST_ROOT):/work \ +# Two container views: +# PODMAN_BOOTSTRAP — toolchain build. Needs read-only access to stage0-posix +# under ../live-bootstrap; writes only into build/$(ARCH)/tools. +# PODMAN — assemble / link / run. Sees only the lispcc dir. +PODMAN_BOOTSTRAP := podman run --rm --platform $(PLATFORM) \ + -v $(UPSTREAM):/work/live-bootstrap:ro \ + -v $(CURDIR):/work/lispcc \ -w /work/lispcc \ - $(BUILDER_IMAGE) - -# Native alpine — used to run the static M1/hex2 binaries (which are -# built for host arch and don't care about P1 target arch). -PODMAN_RUN_NATIVE := podman run --rm \ - -v $(CURDIR):/work \ - -w /work \ $(RUNTIME_IMAGE) -# Target alpine — used to run the generated hello binary on its own -# Linux ABI. Foreign arches transparently use binfmt + qemu-user. -PODMAN_RUN_TARGET := podman run --rm --platform $(PLATFORM) \ +PODMAN := podman run --rm --platform $(PLATFORM) \ -v $(CURDIR):/work \ -w /work \ $(RUNTIME_IMAGE) -# Static linking so M1/hex2 have no libc dep at runtime. -CFLAGS := -D_GNU_SOURCE -std=c99 -ggdb -fno-common -static - # --- Targets --------------------------------------------------------------- -.PHONY: all image toolchain run run-all clean +.PHONY: all toolchain run run-all clean all: $(OUT_DIR)/$(PROG) -image: - podman build -t $(BUILDER_IMAGE) . - -toolchain: build/M1 build/hex2 +toolchain: $(TOOLS_DIR)/M0 -build $(OUT_DIR): +$(OUT_DIR) $(TOOLS_DIR): mkdir -p $@ -build/M1: | build - $(PODMAN_BUILD) gcc $(CFLAGS) \ - $(TOOLCHAIN_SRC)/M1-macro.c \ - $(TOOLCHAIN_SRC)/stringify.c \ - $(TOOLCHAIN_SRC)/M2libc/bootstrappable.c \ - -o build/M1 - -build/hex2: | build - $(PODMAN_BUILD) gcc $(CFLAGS) \ - $(TOOLCHAIN_SRC)/hex2.c \ - $(TOOLCHAIN_SRC)/hex2_linker.c \ - $(TOOLCHAIN_SRC)/hex2_word.c \ - $(TOOLCHAIN_SRC)/M2libc/bootstrappable.c \ - -o build/hex2 - -$(OUT_DIR)/$(PROG).hex2: $(PROG).M1 p1_$(ARCH).M1 build/M1 | $(OUT_DIR) - $(PODMAN_RUN_NATIVE) ./build/M1 \ - -f p1_$(ARCH).M1 \ - -f $(PROG).M1 \ - --little-endian --architecture $(ARCH) \ - -o $(OUT_DIR)/$(PROG).hex2 - -$(OUT_DIR)/$(PROG): $(OUT_DIR)/$(PROG).hex2 ELF-$(ARCH).hex2 build/hex2 - $(PODMAN_RUN_NATIVE) ./build/hex2 \ - -f ELF-$(ARCH).hex2 \ - -f $(OUT_DIR)/$(PROG).hex2 \ - --little-endian --architecture $(ARCH) \ - --base-address 0x400000 \ - -o $(OUT_DIR)/$(PROG) +# Bootstrap M0, hex2-0, catm (and the throwaway hex0/hex1) from hex0-seed. +# One shot per arch — see bootstrap.sh for the phase-by-phase chain. +# +# Grouped target (&:) so all five outputs come from a single recipe run. +$(TOOLS_DIR)/M0 $(TOOLS_DIR)/hex2-0 $(TOOLS_DIR)/catm $(TOOLS_DIR)/hex0 $(TOOLS_DIR)/hex1 &: bootstrap.sh | $(TOOLS_DIR) + $(PODMAN_BOOTSTRAP) sh bootstrap.sh $(ARCH) /work/lispcc/$(TOOLS_DIR) + +# Assemble: combine per-arch defs + program, feed to M0. +# +# M0 takes a single positional input (no -f flag), so we catm the two +# sources together first. The intermediate .combined.M1 is kept in OUT_DIR +# so it gets cleaned along with everything else. +$(OUT_DIR)/$(PROG).hex2: $(PROG).M1 p1_$(ARCH).M1 $(TOOLS_DIR)/M0 $(TOOLS_DIR)/catm | $(OUT_DIR) + $(PODMAN) sh -ec ' \ + $(TOOLS_DIR)/catm $(OUT_DIR)/$(PROG).combined.M1 p1_$(ARCH).M1 $(PROG).M1 ; \ + $(TOOLS_DIR)/M0 $(OUT_DIR)/$(PROG).combined.M1 $(OUT_DIR)/$(PROG).hex2' + +# Link: prepend the ELF header and feed to hex2-0. +# +# hex2-0 is strictly positional too, so again catm first. hex2-0 hardcodes +# base address 0x00600000 (no --base-address flag), which is why the ELF +# header references `&ELF_base` symbolically rather than baking in a +# concrete VA — the header travels to whatever base the linker chose. +$(OUT_DIR)/$(PROG): $(OUT_DIR)/$(PROG).hex2 ELF-$(ARCH).hex2 $(TOOLS_DIR)/hex2-0 $(TOOLS_DIR)/catm + $(PODMAN) sh -ec ' \ + $(TOOLS_DIR)/catm $(OUT_DIR)/$(PROG).linked.hex2 ELF-$(ARCH).hex2 $(OUT_DIR)/$(PROG).hex2 ; \ + $(TOOLS_DIR)/hex2-0 $(OUT_DIR)/$(PROG).linked.hex2 $(OUT_DIR)/$(PROG)' run: $(OUT_DIR)/$(PROG) - $(PODMAN_RUN_TARGET) ./$(OUT_DIR)/$(PROG) + $(PODMAN) ./$(OUT_DIR)/$(PROG) # `-` prefix: continue past non-zero exit. demo.M1 exits with the computed # result (5), which is a legitimate program outcome, not a make failure. diff --git a/bootstrap.sh b/bootstrap.sh @@ -0,0 +1,67 @@ +#!/bin/sh +# Bootstrap the seed0 toolchain we need to assemble + link P1 programs: +# hex0-seed -> hex0 -> hex1 -> hex2-0 -> catm -> M0. +# +# Runs inside a target-arch alpine container. All produced binaries are +# target-arch Linux ELF and are written to $OUT as: hex0 hex1 hex2-0 catm M0. +# +# The only non-source input is bootstrap-seeds/POSIX/<Arch>/hex0-seed (~400 +# bytes, hand-assembled, shipped by stage0-posix). Nothing above M0 is built, +# which is the whole point — no C compiler is involved, not even cc_<arch>. +# +# Phase map (stage0-posix mescc-tools-{seed,mini}-kaem.kaem phases 0-3): +# 0) hex0-seed + hex0_<A>.hex0 -> hex0 +# 1) hex0 + hex1_<A>.hex0 -> hex1 +# 2) hex1 + hex2_<A>.hex1 -> hex2-0 +# 2b) (hex1|hex2-0) + catm_<A>.(hex1|hex2) -> catm (arch-specific) +# 3a) catm : ELF header + M0_<A>.hex2 -> M0.hex2 +# 3b) hex2-0 : M0.hex2 -> M0 +# +# Usage: bootstrap.sh <arch> <out-dir> +# arch: aarch64 | amd64 | riscv64 +# out-dir: absolute path where tool binaries should land +set -eu + +ARCH=$1 +OUT=$2 + +# Map lispcc's lowercase ARCH to stage0-posix's dir name and the two files +# whose extensions differ across arches (catm's source + assembler). +case "$ARCH" in + aarch64) A=AArch64 ; CATM_SRC=catm_AArch64.hex1 ; CATM_ASM=hex1 ;; + amd64) A=AMD64 ; CATM_SRC=catm_AMD64.hex2 ; CATM_ASM=hex2-0 ;; + riscv64) A=riscv64 ; CATM_SRC=catm_riscv64.hex2 ; CATM_ASM=hex2-0 ;; + *) echo "bootstrap.sh: unsupported arch '$ARCH'" >&2 ; exit 1 ;; +esac + +S=/work/live-bootstrap/seed/stage0-posix +mkdir -p "$OUT" +cd "$S" + +# qemu-user amd64 workaround: the shipped hex0-seed and the hex0 it produces +# both have a program header with p_flags=0x01 (PF_X only, no PF_R). A native +# Linux kernel treats x86-64's hardware coercion as making this loadable, but +# qemu-user's stricter ELF loader faults when fetching instructions from an +# unreadable segment. Copy the seed to a writable location and flip p_flags +# to 0x05 (PF_R|PF_X) before use. All later seed sources already use 0x07. +# +# This only affects foreign-arch builds on non-amd64 hosts; on a native amd64 +# host the patch is a no-op (binary would load fine either way). +SEED=./bootstrap-seeds/POSIX/"$A"/hex0-seed +if [ "$ARCH" = amd64 ]; then + cp "$SEED" "$OUT"/hex0-seed + printf '\5' | dd of="$OUT"/hex0-seed bs=1 seek=68 count=1 conv=notrunc status=none + chmod +x "$OUT"/hex0-seed + SEED="$OUT"/hex0-seed +fi + +"$SEED" "$A"/hex0_"$A".hex0 "$OUT"/hex0 +if [ "$ARCH" = amd64 ]; then + printf '\5' | dd of="$OUT"/hex0 bs=1 seek=68 count=1 conv=notrunc status=none +fi + +"$OUT"/hex0 "$A"/hex1_"$A".hex0 "$OUT"/hex1 +"$OUT"/hex1 "$A"/hex2_"$A".hex1 "$OUT"/hex2-0 +"$OUT"/"$CATM_ASM" "$A"/"$CATM_SRC" "$OUT"/catm +"$OUT"/catm "$OUT"/M0.hex2 "$A"/ELF-"$ARCH".hex2 "$A"/M0_"$A".hex2 +"$OUT"/hex2-0 "$OUT"/M0.hex2 "$OUT"/M0