commit 049c1ea756097264ad88f7a22003d26259543ee3
parent 92f3cc91a2b73dc490ae6048309beb6da7382c96
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 23 Apr 2026 13:19:28 -0700
m1pp: Phase 0 build infra; split aarch64 LI/LA literal prefix
Add m1pp/build.sh and m1pp/test.sh: build a P1v2 .M1 fixture against
build/p1v2/aarch64/p1_aarch64.M1, run it in the aarch64 alpine
container, diff against .expected. Wired in via `make test-m1pp`.
Phase 0 fixture tests/m1pp/00-hello.M1 is a hello-world that exercises
the full pipeline end to end.
Split the aarch64 literal-pool prefix. LI stays 64-bit (ldr x, b PC+12)
so any one-word constant fits. LA and LA_BR move to a 32-bit zero-
extending prefix (ldr w, b PC+8). Label addresses fit in 32 bits under
the stage0 layout, so source now writes `la_a2 &msg` without padding
the literal pool entry to 8 bytes. Codified in docs/P1v2.md.
Moves the full-parity fixture from tests/m1m/ to tests/m1pp/ so per-
phase fixtures live together.
The prior 64-bit prefix had b PC+8 where it needed b PC+12 and would
have SIGILL'd any call site; no in-tree code had run against it, so
the bug surfaced only when Phase 0 drove it.
Diffstat:
11 files changed, 339 insertions(+), 25 deletions(-)
diff --git a/Makefile b/Makefile
@@ -120,7 +120,7 @@ IMAGE_STAMP := $(OUT_DIR)/.image
# --- Targets ---------------------------------------------------------------
-.PHONY: all toolchain populate-upstream run run-all test-lisp test-lisp-all clean
+.PHONY: all toolchain populate-upstream run run-all test-lisp test-lisp-all test-m1pp clean
all: $(OUT_DIR)/$(PROG)
@@ -307,6 +307,17 @@ test-lisp-all:
$(MAKE) --no-print-directory ARCH=amd64 test-lisp
$(MAKE) --no-print-directory ARCH=riscv64 test-lisp
+# m1pp port test suite (P1v2, aarch64-only). Drives m1pp/build.sh + run.
+# Fixtures live in tests/m1pp/; see m1pp/test.sh for the runner contract.
+test-m1pp: build/p1v2/aarch64/p1_aarch64.M1 $(TOOLS_DIR)/M0 | $(IMAGE_STAMP)
+ sh m1pp/test.sh
+
+# P1v2 DEFINE table for aarch64. Generated by p1/p1_gen.py from
+# p1/aarch64.py. Used by the m1pp port (build/m1pp/) — distinct from the
+# legacy build/aarch64/p1_aarch64.M1 used by PROG=hello/lisp/m1m.
+build/p1v2/aarch64/p1_aarch64.M1: p1/p1_gen.py p1/aarch64.py p1/common.py
+ python3 p1/p1_gen.py --arch aarch64 build/p1v2
+
clean:
rm -rf build/
diff --git a/docs/M1M-IMPL.md b/docs/M1M-IMPL.md
@@ -376,23 +376,68 @@ process_tokens:
### Implementation Slices
-1. Replace structural `%macro` skipping with `define_macro` storage
- only. Verify definition-only inputs still match the C oracle.
-2. Add the stream stack and make pass-through processing read from
- streams instead of `proc_pos`.
-3. Add macro calls with plain parameter substitution. Test one-token and
- multi-token args.
-4. Add recursive rescanning by pushing expansion streams. Test macro
- calling macro.
-5. Add paste compaction. Test valid paste, misplaced paste, bad
- operands, and pasted-parameter single-token validation.
-6. Add integer atom parsing and explicit-stack expression evaluation.
- Test arithmetic and comparison expressions without macros.
-7. Add `!@%$(`
-8. Add macro expansion inside expressions. This is required for
- `p1/aarch64.M1M`.
-9. Add `%select`.
-10. Add malformed-input smoke tests and coarse fatal labels.
-11. Run C-oracle parity for `tests/m1m/full-parity.M1M`, then combined
- `p1/aarch64.M1M + p1/P1.M1M`, then use the produced frontend on a
- small P1 program.
+The port is broken into phases. Each phase ends with a dedicated test
+under `tests/m1pp/` and a parity check (where applicable) against the C
+oracle in `m1pp/m1pp.c`. The target ISA is **P1v2** (registers
+`a0..a3`, `t0..t2`, `s0..s3`; `enter`/`leave`; `la_br`); the DEFINE
+table is `build/p1v2/aarch64/p1_aarch64.M1`. Aarch64 is the staging
+arch (matches the macOS host so podman runs natively).
+
+- [x] **Phase 0 — Build/run/diff infra under `m1pp/`.**
+ `m1pp/build.sh <source.M1> <out>` lints against the P1v2 DEFINE
+ table, prunes unused DEFINEs, runs M0 + hex2-0 with the aarch64
+ ELF header inside the alpine container, and deposits a runnable
+ binary. `m1pp/test.sh` walks fixtures in `tests/m1pp/`, builds
+ each, runs it in the alpine container, and diffs stdout against
+ `<name>.expected` (or — for fixtures that write to `argv[2]` —
+ against the argv[2] file). Wired into `make test-m1pp`. Phase 0
+ fixture: `tests/m1pp/00-hello.M1` — a P1v2 hello-world that
+ proves the pipeline without depending on `m1pp/m1pp.M1`'s current
+ state.
+
+- [ ] **Phase 1 — Port lexer + pass-through skeleton to P1v2.**
+ Rewrite the existing `_start` / read / write / lex_source /
+ emit_token / emit_newline / process_tokens / skip_macro_def
+ routines using P1v2 conventions (`a*`, `t*`, `s*` registers,
+ `enter SIZE` / `leave`, `la_br %label`). Verify byte-for-byte
+ against the C oracle on a definition-only fixture.
+
+- [ ] **Phase 2 — Macro definition storage.**
+ Replace structural `%macro` skipping with `define_macro`: parse
+ header, params, body tokens, body limits, line-start `%endm`.
+
+- [ ] **Phase 3 — Stream stack + expansion-pool lifetime.**
+ Stream stack push/pop for recursive rescanning;
+ expansion-pool mark/restore on stream pop.
+
+- [ ] **Phase 4 — Argument parsing.**
+ `parse_args` with nested-paren depth tracking, comma split at
+ depth 1, argument-count validation.
+
+- [ ] **Phase 5 — Plain parameter substitution.**
+ Substitute params in macro body via expand pool; enforce the
+ single-token requirement for parameters that participate in `##`.
+
+- [ ] **Phase 6 — `##` token paste compaction.**
+ In-place compactor over the expand pool; reject misplaced or
+ malformed paste sites.
+
+- [ ] **Phase 7 — Integer atoms + S-expression evaluator.**
+ `parse_int_token`, explicit expression-frame stack, all C operator
+ semantics, macro-in-expression composition (required for
+ `p1/aarch64.M1M`).
+
+- [ ] **Phase 8 — `!@%$(expr)` builtins.**
+ One-arg builtins on top of the evaluator; emit LE 1/2/4/8-byte hex
+ tokens.
+
+- [ ] **Phase 9 — `%select(cond, then, else)`.**
+ Eager `cond` eval; copy chosen branch to expand pool, push as
+ stream; never evaluate the unchosen branch.
+
+- [ ] **Phase 10 — Full-parity + malformed-input smoke tests.**
+ Run `tests/m1m/full-parity.M1M` against the M1 implementation;
+ add malformed fixtures (unterminated macro, wrong arg count, bad
+ paste, bad expression, bad builtin arity) requiring non-zero exit.
+ Then run combined `p1/aarch64.M1M + p1/P1.M1M`, then use the
+ produced frontend on a small P1 program.
diff --git a/docs/P1v2.md b/docs/P1v2.md
@@ -261,6 +261,12 @@ P1 v2 also uses two structured assembly-time operands:
The backend may realize `LI` and `LA` using native immediates, literal pools,
multi-instruction sequences, or other backend-private mechanisms.
+Backends may assume labels fit in 32 bits when realizing `LA` and `LA_BR`.
+This reflects the stage0 image layout (`hex2-0` base `0x00600000`, programs
+well under 4 GB), not a portable-ISA-level guarantee. Backends that target
+images loaded above the 4 GB boundary must adjust their `LA` / `LA_BR`
+lowering. `LI` makes no such assumption — it materializes any one-word value.
+
## Control Flow
### Call / Return / Tail Call
diff --git a/m1pp/build.sh b/m1pp/build.sh
@@ -0,0 +1,91 @@
+#!/bin/sh
+## build.sh — build a P1v2 .M1 source into a runnable aarch64 ELF binary.
+##
+## Mirrors the Makefile's PROG=m1m pipeline but targets the P1v2 DEFINE
+## table at build/p1v2/aarch64/p1_aarch64.M1. Used during the m1pp
+## C-to-M1 port; once m1pp.M1 lands, this same script builds it too.
+##
+## Usage: m1pp/build.sh <source.M1> <output_binary>
+##
+## Pipeline (mirrors Makefile PROG=m1m):
+## 1. lint — assert every P1v2 op token in source.M1 is defined
+## 2. prune — strip DEFINEs the source doesn't reference
+## 3. catm — pruned defs ++ source.M1 -> combined.M1
+## 4. M0 — combined.M1 -> .hex2
+## 5. catm — ELF header ++ .hex2 -> linked.hex2
+## 6. hex2-0 — linked.hex2 -> raw ELF
+## 7. chmod 0700, deposit at <output_binary>
+##
+## Intermediates land in build/m1pp/<basename>.* for later inspection.
+## All M0/hex2-0 I/O stages through container /tmp (overlayfs) instead of
+## the bind-mounted /work to dodge per-byte virtiofs overhead — same trick
+## the Makefile uses for the existing PROG=m1m flow.
+
+set -eu
+
+if [ "$#" -ne 2 ]; then
+ echo "usage: $0 <source.M1> <output_binary>" >&2
+ exit 2
+fi
+
+SRC=$1
+OUT=$2
+
+REPO=$(cd "$(dirname "$0")/.." && pwd)
+ARCH=aarch64
+PLATFORM=linux/arm64
+RUNTIME_IMAGE='public.ecr.aws/docker/library/alpine@sha256:378c4c5418f7493bd500ad21ffb43818d0689daaad43e3261859fb417d1481a0'
+
+P1_DEFS=build/p1v2/$ARCH/p1_$ARCH.M1
+TOOLS=build/$ARCH/tools
+ELF_HDR=build/upstream/AArch64/ELF-aarch64.hex2
+
+cd "$REPO"
+
+for f in "$P1_DEFS" "$TOOLS/M0" "$TOOLS/hex2-0" "$TOOLS/catm" "$ELF_HDR" lint.sh "$SRC"; do
+ if [ ! -e "$f" ]; then
+ echo "build.sh: missing dependency: $f" >&2
+ exit 1
+ fi
+done
+
+NAME=$(basename "$SRC" .M1)
+WORK=build/m1pp/$NAME.work
+mkdir -p "$WORK" "$(dirname "$OUT")"
+
+PRUNED=$WORK/p1.pruned.M1
+
+## Step 1: lint.
+sh lint.sh "$P1_DEFS" "$SRC"
+
+## Step 2: prune. Same awk one-liner as Makefile — collect all whitespace-
+## separated tokens from the source, keep only DEFINEs whose name appears.
+awk 'NR==FNR{for(i=1;i<=NF;i++)u[$i]=1;next} /^DEFINE /{if($2 in u)print;next} {print}' \
+ "$SRC" "$P1_DEFS" > "$PRUNED"
+
+## Steps 3-7: run inside the alpine container so M0/hex2-0 are native arm64.
+## Stage everything in /tmp inside the container to avoid virtiofs syscall
+## overhead for the per-byte fputc tools, then cp results back.
+podman run --rm --pull=never --platform "$PLATFORM" \
+ -v "$REPO":/work \
+ -w /work \
+ "$RUNTIME_IMAGE" sh -ec "
+ set -eu
+ cp $PRUNED /tmp/p1.M1
+ cp $SRC /tmp/prog.M1
+ $TOOLS/catm /tmp/combined.M1 /tmp/p1.M1 /tmp/prog.M1
+ $TOOLS/M0 /tmp/combined.M1 /tmp/prog.hex2
+
+ cp $ELF_HDR /tmp/elf.hex2
+ $TOOLS/catm /tmp/linked.hex2 /tmp/elf.hex2 /tmp/prog.hex2
+ $TOOLS/hex2-0 /tmp/linked.hex2 /tmp/prog
+ chmod 0700 /tmp/prog
+
+ cp /tmp/combined.M1 $WORK/combined.M1
+ cp /tmp/prog.hex2 $WORK/prog.hex2
+ cp /tmp/linked.hex2 $WORK/linked.hex2
+ cp /tmp/prog $WORK/prog
+ "
+
+cp "$WORK/prog" "$OUT"
+chmod 0700 "$OUT"
diff --git a/m1pp/test.sh b/m1pp/test.sh
@@ -0,0 +1,106 @@
+#!/bin/sh
+## test.sh — run the m1pp test suite under tests/m1pp/.
+##
+## For each <name>.M1 fixture in tests/m1pp/:
+## 1. Build it via m1pp/build.sh into build/m1pp/<name>
+## 2. If <name>.in exists, pipe it as stdin to the binary
+## 3. If <name>.argv exists, read each line as an argv element
+## 4. Run the binary inside the alpine arm64 container
+## 5. Diff actual stdout against <name>.expected
+##
+## Some Phase >= 1 fixtures will exercise the m1pp expander itself —
+## those tests pass <name>.M1M as argv[1] and <name>.out as argv[2],
+## then diff the produced argv[2] file against <name>.expected.M1.
+## See per-fixture <name>.argv to know which mode is in use.
+##
+## Filenames starting with `_` are skipped — used for ad-hoc debugging.
+##
+## Usage: m1pp/test.sh [fixture-name ...]
+## No args: run every non-`_` fixture under tests/m1pp/.
+
+set -eu
+
+REPO=$(cd "$(dirname "$0")/.." && pwd)
+ARCH=aarch64
+PLATFORM=linux/arm64
+RUNTIME_IMAGE='public.ecr.aws/docker/library/alpine@sha256:378c4c5418f7493bd500ad21ffb43818d0689daaad43e3261859fb417d1481a0'
+
+cd "$REPO"
+
+if [ "$#" -gt 0 ]; then
+ FIXTURES=""
+ for n in "$@"; do
+ FIXTURES="$FIXTURES tests/m1pp/$n.M1"
+ done
+else
+ FIXTURES=$(ls tests/m1pp/[!_]*.M1 2>/dev/null || true)
+fi
+
+if [ -z "$FIXTURES" ]; then
+ echo "no fixtures to run" >&2
+ exit 1
+fi
+
+pass=0
+fail=0
+for src in $FIXTURES; do
+ name=$(basename "$src" .M1)
+ expected=tests/m1pp/$name.expected
+ bin=build/m1pp/$name
+ in=tests/m1pp/$name.in
+ argv_file=tests/m1pp/$name.argv
+
+ if [ ! -e "$expected" ]; then
+ echo " SKIP $name (no .expected)"
+ continue
+ fi
+
+ sh m1pp/build.sh "$src" "$bin" >/dev/null 2>&1 || {
+ echo " FAIL $name (build failed)"
+ sh m1pp/build.sh "$src" "$bin" 2>&1 | sed 's/^/ /'
+ fail=$((fail + 1))
+ continue
+ }
+
+ ## Build the run command. Argv lines (if any) are passed as args; stdin
+ ## (if any) is piped in.
+ set --
+ if [ -e "$argv_file" ]; then
+ while IFS= read -r line; do
+ set -- "$@" "$line"
+ done < "$argv_file"
+ fi
+
+ if [ -e "$in" ]; then
+ actual=$(podman run --rm --pull=never --platform "$PLATFORM" -i \
+ -v "$REPO":/work -w /work "$RUNTIME_IMAGE" \
+ "./$bin" "$@" < "$in" 2>&1 || true)
+ else
+ actual=$(podman run --rm --pull=never --platform "$PLATFORM" \
+ -v "$REPO":/work -w /work "$RUNTIME_IMAGE" \
+ "./$bin" "$@" 2>&1 || true)
+ fi
+
+ expected_content=$(cat "$expected")
+
+ ## Special-case argv-file fixtures whose binary writes to argv[2]
+ ## (the m1pp expander shape). When that file exists post-run, diff it.
+ if [ -e "$argv_file" ] && [ "$#" -ge 2 ] && [ -e "$2" ]; then
+ actual=$(cat "$2")
+ fi
+
+ if [ "$actual" = "$expected_content" ]; then
+ echo " PASS $name"
+ pass=$((pass + 1))
+ else
+ echo " FAIL $name"
+ echo " --- expected ---"
+ printf '%s\n' "$expected_content" | sed 's/^/ /'
+ echo " --- actual ---"
+ printf '%s\n' "$actual" | sed 's/^/ /'
+ fail=$((fail + 1))
+ fi
+done
+
+echo "$pass passed, $fail failed"
+[ "$fail" -eq 0 ]
diff --git a/p1/P1-aarch64.M1pp b/p1/P1-aarch64.M1pp
@@ -298,7 +298,18 @@
%endm
%macro aa64_lit64_prefix(rd)
+# 64-bit literal-pool prefix for LI: ldr xN, [pc,#8]; b PC+12.
+# The 8 bytes that follow in source become the literal; b skips them.
%((| 0x58000040 %aa64_reg(rd)))
+%(0x14000003)
+%endm
+
+%macro aa64_lit32_prefix(rd)
+# 32-bit literal-pool prefix for LA / LA_BR: ldr wN, [pc,#8]; b PC+8.
+# ldr w zero-extends into the full 64-bit register, so a 4-byte literal
+# is enough for any address in the stage0 layout. Lets source use
+# `&label` directly without padding to 8 bytes.
+%((| 0x18000040 %aa64_reg(rd)))
%(0x14000002)
%endm
@@ -350,11 +361,11 @@
%endm
%macro p1_la(rd)
-%aa64_lit64_prefix(rd)
+%aa64_lit32_prefix(rd)
%endm
%macro p1_labr()
-%aa64_lit64_prefix(br)
+%aa64_lit32_prefix(br)
%endm
%macro p1_mov(rd, rs)
diff --git a/p1/aarch64.py b/p1/aarch64.py
@@ -186,8 +186,22 @@ def aa_ret():
def aa_lit64_prefix(rd):
+ ## 64-bit literal-pool prefix for LI: ldr xN, [pc,#8]; b PC+12.
+ ## The 8 bytes that follow in source become the literal; b skips them.
d = NAT[rd]
ldr_lit = 0x58000040 | d
+ b_plus12 = 0x14000003
+ return le32(ldr_lit) + le32(b_plus12)
+
+
+def aa_lit32_prefix(rd):
+ ## 32-bit literal-pool prefix for LA / LA_BR: ldr wN, [pc,#8]; b PC+8.
+ ## ldr w zero-extends into the full 64-bit register, so a 4-byte literal
+ ## is enough for any address in the stage0 layout (base 0x00600000,
+ ## programs well under 4 GB). This lets source use `&label` directly
+ ## without padding to 8 bytes.
+ d = NAT[rd]
+ ldr_lit = 0x18000040 | d
b_plus8 = 0x14000002
return le32(ldr_lit) + le32(b_plus8)
@@ -197,11 +211,11 @@ def encode_li(_arch, row):
def encode_la(_arch, row):
- return aa_lit64_prefix(row.rd)
+ return aa_lit32_prefix(row.rd)
def encode_labr(_arch, _row):
- return aa_lit64_prefix('br')
+ return aa_lit32_prefix('br')
def encode_mov(_arch, row):
diff --git a/tests/m1pp/00-hello.M1 b/tests/m1pp/00-hello.M1
@@ -0,0 +1,29 @@
+## Phase 0 smoke fixture: P1v2 hello-world.
+##
+## Proves that the m1pp build pipeline (lint -> prune -> catm -> M0 -> ELF
+## link -> hex2-0) works against build/p1v2/aarch64/p1_aarch64.M1.
+## Independent of m1pp/m1pp.M1's current state so Phase 0 can land before
+## Phase 1.
+##
+## P1v2 syscall ABI:
+## a0 = syscall number on entry, return value on exit
+## a1, a2, a3, t0, s0, s1 = syscall arguments 0..5
+
+:_start
+ ## write(fd=1, buf=&msg, count=14)
+ li_a0 sys_write
+ li_a1 %1 %0
+ la_a2 &msg
+ li_a3 %14 %0
+ syscall
+
+ ## exit(0)
+ li_a0 sys_exit
+ li_a1 %0 %0
+ syscall
+
+:msg
+"Hello, World!
+"
+
+:ELF_end
diff --git a/tests/m1pp/00-hello.expected b/tests/m1pp/00-hello.expected
@@ -0,0 +1 @@
+Hello, World!
diff --git a/tests/m1m/full-parity.M1M b/tests/m1pp/full-parity.M1M
diff --git a/tests/m1m/full-parity.expected b/tests/m1pp/full-parity.expected