commit a570d90096e6f998b88f626c4ea05d870e80e84f
parent 3b6a3c115744b6b268f83c3a7e122ab4155e78c2
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 24 Apr 2026 05:44:59 -0700
Speed up P1 build/test loop ~30-70x via native mescc-tools
Default path for m1pp/build.sh now runs host-compiled mescc-tools M1/hex2
instead of the stage0 M0/hex2-0 inside the linux/arm64 VM, cutting a
full P1 test from ~110s to ~3.9s cold / ~1.6s warm. The bootstrap tools
still produce byte-exact output and stay the source of truth; set
M1PP_BOOTSTRAP_TOOLS=1 to force the original container path.
Also caches the built m1pp/m1pp by mtime in both test runners so repeat
runs skip the full expander rebuild when m1pp.M1 and the P1 defs are
unchanged.
Diffstat:
4 files changed, 194 insertions(+), 43 deletions(-)
diff --git a/m1pp/build-native-tools.sh b/m1pp/build-native-tools.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+## build-native-tools.sh — compile mescc-tools M1 and hex2 natively for
+## dev-loop speed. These are NOT in the bootstrap chain; they are a fast
+## substitute for stage0 M0/hex2-0, used by m1pp/build.sh's default mode.
+##
+## Output is verified byte-exact with the bootstrap tools when the right
+## flags are passed (see m1pp/build.sh). Using them saves ~150× wall time
+## on P1 builds / tests by avoiding the per-byte syscall storm the
+## stage0 tools incur under Apple's linux/arm64 VM.
+##
+## Source lookup (first match wins):
+## 1. $MESCC_TOOLS_SRC (if set)
+## 2. ../live-bootstrap/seed/stage0-posix/mescc-tools
+## 3. ../mescc-tools (if M2libc is populated)
+
+set -eu
+
+REPO=$(cd "$(dirname "$0")/.." && pwd)
+cd "$REPO"
+
+OUT=build/native-tools
+mkdir -p "$OUT"
+
+find_src() {
+ if [ -n "${MESCC_TOOLS_SRC:-}" ]; then
+ if [ -f "$MESCC_TOOLS_SRC/M1-macro.c" ] && [ -f "$MESCC_TOOLS_SRC/M2libc/bootstrappable.c" ]; then
+ echo "$MESCC_TOOLS_SRC"
+ return 0
+ fi
+ echo "build-native-tools.sh: MESCC_TOOLS_SRC=$MESCC_TOOLS_SRC is not a complete mescc-tools tree" >&2
+ return 1
+ fi
+ for d in \
+ "$REPO/../live-bootstrap/seed/stage0-posix/mescc-tools" \
+ "$REPO/../mescc-tools"
+ do
+ if [ -f "$d/M1-macro.c" ] && [ -f "$d/M2libc/bootstrappable.c" ]; then
+ echo "$d"
+ return 0
+ fi
+ done
+ echo "build-native-tools.sh: no mescc-tools source found." >&2
+ echo " set MESCC_TOOLS_SRC to a directory containing M1-macro.c and M2libc/," >&2
+ echo " or fall back to the bootstrap path with M1PP_BOOTSTRAP_TOOLS=1." >&2
+ return 1
+}
+
+SRC=$(find_src)
+
+: "${CC:=cc}"
+CFLAGS="-O2 -std=c99 -D_GNU_SOURCE"
+
+## Only rebuild if sources are newer than the cached binary.
+m1_fresh() {
+ [ -x "$OUT/M1" ] || return 1
+ for s in "$SRC/M1-macro.c" "$SRC/stringify.c" "$SRC/M2libc/bootstrappable.c"; do
+ [ "$OUT/M1" -nt "$s" ] || return 1
+ done
+ return 0
+}
+hex2_fresh() {
+ [ -x "$OUT/hex2" ] || return 1
+ for s in "$SRC/hex2.c" "$SRC/hex2_linker.c" "$SRC/hex2_word.c" "$SRC/M2libc/bootstrappable.c"; do
+ [ "$OUT/hex2" -nt "$s" ] || return 1
+ done
+ return 0
+}
+
+if ! m1_fresh; then
+ echo " compiling $OUT/M1 from $SRC"
+ $CC $CFLAGS \
+ "$SRC/M1-macro.c" "$SRC/stringify.c" "$SRC/M2libc/bootstrappable.c" \
+ -o "$OUT/M1"
+fi
+
+if ! hex2_fresh; then
+ echo " compiling $OUT/hex2 from $SRC"
+ $CC $CFLAGS \
+ "$SRC/hex2.c" "$SRC/hex2_linker.c" "$SRC/hex2_word.c" "$SRC/M2libc/bootstrappable.c" \
+ -o "$OUT/hex2"
+fi
diff --git a/m1pp/build.sh b/m1pp/build.sh
@@ -11,15 +11,23 @@
## 1. lint — assert every P1v2 op token in source.M1 is defined
## 2. prune — strip DEFINEs the source doesn't reference
## 3. catm — pruned defs ++ source.M1 -> combined.M1
-## 4. M0 — combined.M1 -> .hex2
+## 4. M1 — combined.M1 -> .hex2
## 5. catm — ELF header ++ .hex2 -> linked.hex2
-## 6. hex2-0 — linked.hex2 -> raw ELF
+## 6. hex2 — linked.hex2 -> raw ELF
## 7. chmod 0700, deposit at <output_binary>
##
+## Default mode uses native-compiled mescc-tools M1/hex2 on the host:
+## ~150× faster than the bootstrap M0/hex2-0 on the linux/arm64 VM (the
+## bootstrap tools do one syscall per byte; mescc-tools uses buffered
+## stdio). Output is byte-exact — verified against the bootstrap chain
+## with --little-endian on both tools.
+##
+## This is a dev-loop convenience. It does not alter the bootstrap chain
+## itself: the stage0 M0/hex2-0 binaries under build/$ARCH/tools remain
+## the source of truth. Set M1PP_BOOTSTRAP_TOOLS=1 to force the original
+## container-based path (verification, or when mescc-tools is unavailable).
+##
## Intermediates land in build/m1pp/<basename>.* for later inspection.
-## All M0/hex2-0 I/O stages through container /tmp (overlayfs) instead of
-## the bind-mounted /work to dodge per-byte virtiofs overhead — same trick
-## the Makefile uses for the existing PROG=m1m flow.
set -eu
@@ -40,20 +48,21 @@ CONTAINERFILE=Containerfile.busybox
P1_DEFS=build/p1v2/$ARCH/p1_$ARCH.M1
TOOLS=build/$ARCH/tools
ELF_HDR=build/upstream/AArch64/ELF-aarch64.hex2
+BASE_ADDR=0x600000 ## must match the load address encoded in $ELF_HDR
+
+NATIVE_TOOLS_DIR=build/native-tools
+NATIVE_M1=$NATIVE_TOOLS_DIR/M1
+NATIVE_HEX2=$NATIVE_TOOLS_DIR/hex2
cd "$REPO"
-for f in "$P1_DEFS" "$TOOLS/M0" "$TOOLS/hex2-0" "$TOOLS/catm" "$ELF_HDR" lint.sh "$SRC"; do
+for f in "$P1_DEFS" "$ELF_HDR" lint.sh "$SRC"; do
if [ ! -e "$f" ]; then
echo "build.sh: missing dependency: $f" >&2
exit 1
fi
done
-if ! podman image exists "$IMAGE"; then
- podman build -f "$CONTAINERFILE" -t "$IMAGE" .
-fi
-
NAME=$(basename "$SRC" .M1)
WORK=build/m1pp/$NAME.work
mkdir -p "$WORK" "$(dirname "$OUT")"
@@ -68,29 +77,62 @@ sh lint.sh "$P1_DEFS" "$SRC"
awk 'NR==FNR{for(i=1;i<=NF;i++)u[$i]=1;next} /^DEFINE /{if($2 in u)print;next} {print}' \
"$SRC" "$P1_DEFS" > "$PRUNED"
-## Steps 3-7: run inside the alpine container so M0/hex2-0 are native arm64.
-## Stage everything in /tmp inside the container to avoid virtiofs syscall
-## overhead for the per-byte fputc tools, then cp results back.
-podman run --rm --pull=never --platform "$PLATFORM" \
- -v "$REPO":/work \
- -w /work \
- "$IMAGE" sh -ec "
- set -eu
- cp $PRUNED /tmp/p1.M1
- cp $SRC /tmp/prog.M1
- $TOOLS/catm /tmp/combined.M1 /tmp/p1.M1 /tmp/prog.M1
- $TOOLS/M0 /tmp/combined.M1 /tmp/prog.hex2
-
- cp $ELF_HDR /tmp/elf.hex2
- $TOOLS/catm /tmp/linked.hex2 /tmp/elf.hex2 /tmp/prog.hex2
- $TOOLS/hex2-0 /tmp/linked.hex2 /tmp/prog
- chmod 0700 /tmp/prog
-
- cp /tmp/combined.M1 $WORK/combined.M1
- cp /tmp/prog.hex2 $WORK/prog.hex2
- cp /tmp/linked.hex2 $WORK/linked.hex2
- cp /tmp/prog $WORK/prog
- "
+## Steps 3-7: mode selection.
+if [ "${M1PP_BOOTSTRAP_TOOLS:-0}" = 1 ]; then
+ ## Bootstrap mode: run stage0 M0/hex2-0 inside the container. Slow
+ ## (~110s on 1 MB m1pp.M1) but exercises the same code path the
+ ## seed/tcc-boot chain does. Stage through /tmp to dodge virtiofs.
+ for f in "$TOOLS/M0" "$TOOLS/hex2-0" "$TOOLS/catm"; do
+ if [ ! -e "$f" ]; then
+ echo "build.sh: missing bootstrap dependency: $f" >&2
+ exit 1
+ fi
+ done
+ if ! podman image exists "$IMAGE"; then
+ podman build -f "$CONTAINERFILE" -t "$IMAGE" .
+ fi
+ podman run --rm --pull=never --platform "$PLATFORM" \
+ -v "$REPO":/work \
+ -w /work \
+ "$IMAGE" sh -ec "
+ set -eu
+ cp $PRUNED /tmp/p1.M1
+ cp $SRC /tmp/prog.M1
+ $TOOLS/catm /tmp/combined.M1 /tmp/p1.M1 /tmp/prog.M1
+ $TOOLS/M0 /tmp/combined.M1 /tmp/prog.hex2
+
+ cp $ELF_HDR /tmp/elf.hex2
+ $TOOLS/catm /tmp/linked.hex2 /tmp/elf.hex2 /tmp/prog.hex2
+ $TOOLS/hex2-0 /tmp/linked.hex2 /tmp/prog
+ chmod 0700 /tmp/prog
+
+ cp /tmp/combined.M1 $WORK/combined.M1
+ cp /tmp/prog.hex2 $WORK/prog.hex2
+ cp /tmp/linked.hex2 $WORK/linked.hex2
+ cp /tmp/prog $WORK/prog
+ "
+else
+ ## Native mode (default): host-compiled mescc-tools M1/hex2. The
+ ## bootstrap chain is not invoked. Flags are chosen to make the
+ ## emitted aarch64 ELF bit-exact with the bootstrap build:
+ ## M1: --little-endian (default is big-endian; --architecture
+ ## aarch64 does NOT override that — see M1-macro.c:786).
+ ## hex2: --little-endian and --base-address matches $ELF_HDR.
+ if [ ! -x "$NATIVE_M1" ] || [ ! -x "$NATIVE_HEX2" ]; then
+ sh m1pp/build-native-tools.sh
+ fi
+
+ ## catm on the bootstrap side is just multi-file concat → shell cat
+ ## is equivalent.
+ cat "$PRUNED" "$SRC" > "$WORK/combined.M1"
+ "$NATIVE_M1" --architecture "$ARCH" --little-endian \
+ -f "$WORK/combined.M1" -o "$WORK/prog.hex2"
+ cat "$ELF_HDR" "$WORK/prog.hex2" > "$WORK/linked.hex2"
+ "$NATIVE_HEX2" --architecture "$ARCH" --little-endian \
+ --base-address "$BASE_ADDR" \
+ -f "$WORK/linked.hex2" -o "$WORK/prog"
+ chmod 0700 "$WORK/prog"
+fi
cp "$WORK/prog" "$OUT"
chmod 0700 "$OUT"
diff --git a/m1pp/test.sh b/m1pp/test.sh
@@ -30,15 +30,28 @@ IMAGE=localhost/distroless-busybox:latest
cd "$REPO"
EXPANDER_BIN=build/m1pp/m1pp
+EXPANDER_SRC=m1pp/m1pp.M1
+EXPANDER_DEFS=build/p1v2/aarch64/p1_aarch64.M1
EXPANDER_BUILT=0
+expander_up_to_date() {
+ [ -x "$EXPANDER_BIN" ] || return 1
+ [ "$EXPANDER_BIN" -nt "$EXPANDER_SRC" ] || return 1
+ [ "$EXPANDER_BIN" -nt "$EXPANDER_DEFS" ] || return 1
+ return 0
+}
+
build_expander() {
if [ "$EXPANDER_BUILT" = 0 ]; then
- sh m1pp/build.sh m1pp/m1pp.M1 "$EXPANDER_BIN" >/dev/null 2>&1 || {
- echo "FATAL: failed to build m1pp/m1pp.M1" >&2
- sh m1pp/build.sh m1pp/m1pp.M1 "$EXPANDER_BIN" 2>&1 | sed 's/^/ /' >&2
- exit 1
- }
+ if expander_up_to_date; then
+ echo " (m1pp up to date, skipping rebuild)"
+ else
+ sh m1pp/build.sh "$EXPANDER_SRC" "$EXPANDER_BIN" >/dev/null 2>&1 || {
+ echo "FATAL: failed to build $EXPANDER_SRC" >&2
+ sh m1pp/build.sh "$EXPANDER_SRC" "$EXPANDER_BIN" 2>&1 | sed 's/^/ /' >&2
+ exit 1
+ }
+ fi
EXPANDER_BUILT=1
fi
}
diff --git a/tests/p1/test.sh b/tests/p1/test.sh
@@ -21,17 +21,32 @@ PLATFORM=linux/arm64
IMAGE=localhost/distroless-busybox:latest
EXPANDER_BIN=build/m1pp/m1pp
+EXPANDER_SRC=m1pp/m1pp.M1
+EXPANDER_DEFS=build/p1v2/aarch64/p1_aarch64.M1
EXPANDER_BUILT=0
cd "$REPO"
+## Rebuild m1pp only if its inputs are newer than the cached binary.
+## The full build is ~110s; skipping it when unchanged is the iteration win.
+expander_up_to_date() {
+ [ -x "$EXPANDER_BIN" ] || return 1
+ [ "$EXPANDER_BIN" -nt "$EXPANDER_SRC" ] || return 1
+ [ "$EXPANDER_BIN" -nt "$EXPANDER_DEFS" ] || return 1
+ return 0
+}
+
build_expander() {
if [ "$EXPANDER_BUILT" = 0 ]; then
- sh m1pp/build.sh m1pp/m1pp.M1 "$EXPANDER_BIN" >/dev/null 2>&1 || {
- echo "FATAL: failed to build m1pp/m1pp.M1" >&2
- sh m1pp/build.sh m1pp/m1pp.M1 "$EXPANDER_BIN" 2>&1 | sed 's/^/ /' >&2
- exit 1
- }
+ if expander_up_to_date; then
+ echo " (m1pp up to date, skipping rebuild)"
+ else
+ sh m1pp/build.sh "$EXPANDER_SRC" "$EXPANDER_BIN" >/dev/null 2>&1 || {
+ echo "FATAL: failed to build $EXPANDER_SRC" >&2
+ sh m1pp/build.sh "$EXPANDER_SRC" "$EXPANDER_BIN" 2>&1 | sed 's/^/ /' >&2
+ exit 1
+ }
+ fi
EXPANDER_BUILT=1
fi
}