commit cddd94f916f41fad819f4905b6508be0378368d1
parent 049c1ea756097264ad88f7a22003d26259543ee3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 23 Apr 2026 13:25:10 -0700
m1pp: alias container image as localhost/lispcc:aarch64; add port pointers
Tag the digest-pinned alpine arm64 image as localhost/lispcc:aarch64
on first run from m1pp/build.sh; both build.sh and test.sh now run
podman against the short tag. The Makefile keeps the digest as the
canonical pin; the tag is just an alias for ergonomics so the long
SHA doesn't have to appear in commands or docs.
Expand docs/M1M-IMPL.md with a "Working on the port" section: file
map, command cheatsheet (build, test, manual container run, regen
defs, C-oracle parity, lint), and a P1v2 quick-reference. Each phase
now lists the oracle entry points in m1pp/m1pp.c that the M1 port
should lift, so a session picking up mid-port can find the relevant
C routines by symbol name.
Diffstat:
3 files changed, 135 insertions(+), 28 deletions(-)
diff --git a/docs/M1M-IMPL.md b/docs/M1M-IMPL.md
@@ -4,6 +4,78 @@ This note is the implementation-oriented companion to
`docs/M1M-P1-PORT.md`. It describes a practical structure for the P1
macro expander.
+### Working on the port
+
+**Files**
+
+- `m1pp/m1pp.c` — C oracle. Behaviorally authoritative.
+ Each phase below names the oracle entry points to lift from.
+- `m1pp/m1pp.M1` — port target. Currently has only the lexer
+ skeleton and `%macro` structural skipping in the legacy P1 ISA.
+ Phase 1 rewrites it on P1v2; Phases 2–9 extend it.
+- `m1pp/build.sh`, `m1pp/test.sh` — build / run / diff a P1v2 .M1
+ into a runnable aarch64 binary. See `docs/M1M-IMPL.md` Phase 0.
+- `tests/m1pp/` — per-phase fixtures. `<name>.M1` +
+ `<name>.expected` is the minimum; `<name>.argv` (one arg per line)
+ and `<name>.in` (stdin) are optional. When the fixture passes an
+ argv[2] output path, the runner diffs that file against
+ `<name>.expected` instead of stdout.
+- `build/p1v2/aarch64/p1_aarch64.M1` — P1v2 DEFINE table, generated
+ from `p1/aarch64.py` + `p1/p1_gen.py`. Regenerate after any
+ backend edit.
+- `docs/P1v2.md` — ISA spec. `docs/M1M-P1-PORT.md` — higher-level
+ port contract.
+
+**Commands**
+
+```sh
+# Build one .M1 source into a binary:
+sh m1pp/build.sh tests/m1pp/01-passthrough.M1 build/m1pp/01-passthrough
+
+# Run the whole suite (regenerates P1v2 defs if the generator changed):
+make test-m1pp
+
+# Run one fixture by name:
+sh m1pp/test.sh 01-passthrough
+
+# Run a built binary manually in the aarch64 container.
+# `localhost/lispcc:aarch64` is a local tag created by m1pp/build.sh on
+# first run, aliased to the digest-pinned alpine arm64 image; see the
+# Makefile for the canonical pin.
+podman run --rm --pull=never --platform linux/arm64 \
+ -v "$PWD":/work -w /work \
+ localhost/lispcc:aarch64 \
+ ./build/m1pp/<name> <argv...>
+
+# Regenerate P1v2 DEFINE tables after touching p1/*.py:
+python3 p1/p1_gen.py --arch aarch64 build/p1v2
+
+# Build the C oracle + compare its output to the M1 build:
+cc m1pp/m1pp.c -o build/m1pp/m1pp-oracle
+./build/m1pp/m1pp-oracle <input.M1M> /tmp/out-c
+./build/m1pp/<fixture> <input.M1M> /tmp/out-m1 # run via podman as above
+diff /tmp/out-c /tmp/out-m1
+
+# Discover undefined P1 tokens without running M0 (catches typos that
+# would otherwise SIGILL silently — build.sh runs this automatically):
+sh lint.sh build/p1v2/aarch64/p1_aarch64.M1 m1pp/m1pp.M1
+```
+
+**P1v2 quick reference for this port**
+
+- Registers: `a0..a3` args + caller-saved, `t0..t2` caller-saved,
+ `s0..s3` callee-saved. `sp` is stack pointer; no raw writes.
+- Frame: `enter SIZE` / `leave`; no implicit `s*` save. Leaf
+ functions may skip frames.
+- Call: `la_br &target` then `call` / `tail` / `b` / `beq` / …
+ (the branch op consumes `br` — load it immediately before).
+- Materialize: `li_aN <8 bytes>` for any one-word integer
+ (`%lo %hi` or `'XXXXXXXXXXXXXXXX'`); `la_aN &label` for label
+ addresses — **no padding needed**, the 32-bit literal-pool
+ prefix zero-extends.
+- Syscall ABI: number in `a0`; args in `a1, a2, a3, t0, s0, s1`;
+ result in `a0`.
+
### Supported Features
The target expander supports the features required by `p1/*.M1M`:
@@ -383,6 +455,10 @@ oracle in `m1pp/m1pp.c`. The target ISA is **P1v2** (registers
table is `build/p1v2/aarch64/p1_aarch64.M1`. Aarch64 is the staging
arch (matches the macOS host so podman runs natively).
+Each phase below lists the oracle entry points in `m1pp/m1pp.c` that
+the M1 port lifts for that slice. Line numbers are hints — track by
+symbol name.
+
- [x] **Phase 0 — Build/run/diff infra under `m1pp/`.**
`m1pp/build.sh <source.M1> <out>` lints against the P1v2 DEFINE
table, prunes unused DEFINEs, runs M0 + hex2-0 with the aarch64
@@ -396,48 +472,70 @@ arch (matches the macOS host so podman runs natively).
state.
- [ ] **Phase 1 — Port lexer + pass-through skeleton to P1v2.**
- Rewrite the existing `_start` / read / write / lex_source /
- emit_token / emit_newline / process_tokens / skip_macro_def
- routines using P1v2 conventions (`a*`, `t*`, `s*` registers,
- `enter SIZE` / `leave`, `la_br %label`). Verify byte-for-byte
- against the C oracle on a definition-only fixture.
+ Rewrite `_start`, read/write, `lex_source`, `emit_token`,
+ `emit_newline`, `process_tokens`, and the structural %macro skip
+ in P1v2 conventions (`a*`/`t*`/`s*` registers, `enter SIZE` /
+ `leave`, `la_br &label`). Verify byte-for-byte parity against the
+ C oracle on a definition-only fixture (tokenizer pass-through).
+ Oracle entry points: `main`, `lex_source`, `emit_token`,
+ `emit_newline`, `process_tokens` (pass-through branches only),
+ plus `append_text_len`, `push_token`, `token_text_eq`,
+ `span_eq_token`.
- [ ] **Phase 2 — Macro definition storage.**
- Replace structural `%macro` skipping with `define_macro`: parse
- header, params, body tokens, body limits, line-start `%endm`.
+ Replace structural skipping with real storage: parse header,
+ params, body tokens, body limits, line-start `%endm`. Does not
+ yet call macros — adding defs-only input to an otherwise
+ pass-through run must still match the oracle.
+ Oracle: `define_macro`, `find_macro`, `find_param`.
- [ ] **Phase 3 — Stream stack + expansion-pool lifetime.**
- Stream stack push/pop for recursive rescanning;
- expansion-pool mark/restore on stream pop.
+ Stream stack push/pop for recursive rescanning; expansion-pool
+ mark/restore on stream pop. No semantic change until Phase 4
+ wires macro calls in, but isolates the lifecycle plumbing.
+ Oracle: `push_stream_span`, `current_stream`, `pop_stream`,
+ `copy_span_to_pool`, `push_pool_stream_from_mark`.
- [ ] **Phase 4 — Argument parsing.**
- `parse_args` with nested-paren depth tracking, comma split at
- depth 1, argument-count validation.
+ Nested-paren depth tracking, comma split at depth 1, argument-
+ count validation, `call_end_pos` output.
+ Oracle: `parse_args`.
- [ ] **Phase 5 — Plain parameter substitution.**
- Substitute params in macro body via expand pool; enforce the
- single-token requirement for parameters that participate in `##`.
+ Walk macro body; substitute params via the expand pool; push
+ resulting slice as a stream. Enforces single-token-arg rule for
+ parameters adjacent to `##` (still no actual paste yet).
+ Oracle: `expand_macro_tokens` (parameter loop),
+ `copy_arg_tokens_to_pool`, `copy_paste_arg_to_pool`,
+ `expand_call`.
- [ ] **Phase 6 — `##` token paste compaction.**
- In-place compactor over the expand pool; reject misplaced or
+ In-place compactor over the expand pool. Rejects misplaced or
malformed paste sites.
+ Oracle: `paste_pool_range`, `append_pasted_token`.
- [ ] **Phase 7 — Integer atoms + S-expression evaluator.**
- `parse_int_token`, explicit expression-frame stack, all C operator
- semantics, macro-in-expression composition (required for
- `p1/aarch64.M1M`).
+ Integer-token parsing; explicit expression-frame stack; all
+ operators from the oracle; macro-in-expression composition (the
+ required path for `p1/aarch64.M1M`).
+ Oracle: `parse_int_token`, `expr_op_code`, `apply_expr_op`,
+ `eval_expr_atom`, `eval_expr_range`, `skip_expr_newlines`.
- [ ] **Phase 8 — `!@%$(expr)` builtins.**
- One-arg builtins on top of the evaluator; emit LE 1/2/4/8-byte hex
- tokens.
+ One-arg builtins on top of the evaluator; emit LE 1/2/4/8-byte
+ hex tokens.
+ Oracle: `expand_builtin_call` (the `!@%$` cases), `emit_hex_value`.
- [ ] **Phase 9 — `%select(cond, then, else)`.**
Eager `cond` eval; copy chosen branch to expand pool, push as
stream; never evaluate the unchosen branch.
+ Oracle: `expand_builtin_call` (the `%select` case).
- [ ] **Phase 10 — Full-parity + malformed-input smoke tests.**
- Run `tests/m1m/full-parity.M1M` against the M1 implementation;
+ Run `tests/m1pp/full-parity.M1M` against the M1 implementation;
add malformed fixtures (unterminated macro, wrong arg count, bad
- paste, bad expression, bad builtin arity) requiring non-zero exit.
- Then run combined `p1/aarch64.M1M + p1/P1.M1M`, then use the
- produced frontend on a small P1 program.
+ paste, bad expression, bad builtin arity) requiring non-zero
+ exit. Then run combined `p1/P1-aarch64.M1pp + p1/P1.M1pp` through
+ the M1 expander and diff against the Python-generated
+ `build/p1v2/aarch64/p1_aarch64.M1`. Finally use the produced
+ frontend on a small P1 program through the normal toolchain.
diff --git a/m1pp/build.sh b/m1pp/build.sh
@@ -34,7 +34,11 @@ OUT=$2
REPO=$(cd "$(dirname "$0")/.." && pwd)
ARCH=aarch64
PLATFORM=linux/arm64
-RUNTIME_IMAGE='public.ecr.aws/docker/library/alpine@sha256:378c4c5418f7493bd500ad21ffb43818d0689daaad43e3261859fb417d1481a0'
+IMAGE=localhost/lispcc:aarch64
+## Digest-pinned source for the local tag. Mirrors the Makefile pin so the
+## tag is created from the same image bytes even when build.sh runs
+## standalone without `make` having materialised the image stamp.
+IMAGE_DIGEST='public.ecr.aws/docker/library/alpine@sha256:378c4c5418f7493bd500ad21ffb43818d0689daaad43e3261859fb417d1481a0'
P1_DEFS=build/p1v2/$ARCH/p1_$ARCH.M1
TOOLS=build/$ARCH/tools
@@ -49,6 +53,10 @@ for f in "$P1_DEFS" "$TOOLS/M0" "$TOOLS/hex2-0" "$TOOLS/catm" "$ELF_HDR" lint.sh
fi
done
+if ! podman image exists "$IMAGE"; then
+ podman tag "$IMAGE_DIGEST" "$IMAGE"
+fi
+
NAME=$(basename "$SRC" .M1)
WORK=build/m1pp/$NAME.work
mkdir -p "$WORK" "$(dirname "$OUT")"
@@ -69,7 +77,7 @@ awk 'NR==FNR{for(i=1;i<=NF;i++)u[$i]=1;next} /^DEFINE /{if($2 in u)print;next} {
podman run --rm --pull=never --platform "$PLATFORM" \
-v "$REPO":/work \
-w /work \
- "$RUNTIME_IMAGE" sh -ec "
+ "$IMAGE" sh -ec "
set -eu
cp $PRUNED /tmp/p1.M1
cp $SRC /tmp/prog.M1
diff --git a/m1pp/test.sh b/m1pp/test.sh
@@ -23,7 +23,8 @@ set -eu
REPO=$(cd "$(dirname "$0")/.." && pwd)
ARCH=aarch64
PLATFORM=linux/arm64
-RUNTIME_IMAGE='public.ecr.aws/docker/library/alpine@sha256:378c4c5418f7493bd500ad21ffb43818d0689daaad43e3261859fb417d1481a0'
+## build.sh creates the localhost/lispcc:aarch64 tag on first run.
+IMAGE=localhost/lispcc:aarch64
cd "$REPO"
@@ -73,11 +74,11 @@ for src in $FIXTURES; do
if [ -e "$in" ]; then
actual=$(podman run --rm --pull=never --platform "$PLATFORM" -i \
- -v "$REPO":/work -w /work "$RUNTIME_IMAGE" \
+ -v "$REPO":/work -w /work "$IMAGE" \
"./$bin" "$@" < "$in" 2>&1 || true)
else
actual=$(podman run --rm --pull=never --platform "$PLATFORM" \
- -v "$REPO":/work -w /work "$RUNTIME_IMAGE" \
+ -v "$REPO":/work -w /work "$IMAGE" \
"./$bin" "$@" 2>&1 || true)
fi