commit b96f45aff0a222fd0290bd77309c8a4c812307e4
parent 2859e2697132d3f328b92a6dcaa646d2de336dd8
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 20 Apr 2026 13:18:43 -0700
Add demo.M1 exercising broader P1 op set across three arches
demo.M1 computes (3+4)-2=5 in registers, prints "P1 = 5\n", exits 5.
Exercises LI + SYSCALL + MOV/ADD/SUB register tuples on aarch64, amd64,
riscv64 from a single source. Uses r6 (x19 / rbx / s1, callee-saved on
every arch) to carry values across syscalls.
Extends each p1_<arch>.M1 with the five tuple DEFINEs demo.M1 needs.
These are still hand-written; a generator replaces them once more
than a handful of tuples are in use.
Makefile: parameterize on PROG (default hello). `make PROG=demo run`
builds and runs just the demo. `run-all` iterates arches with `-`
prefix so make continues past demo's intentional exit 5.
Diffstat:
| M | Makefile | | | 43 | ++++++++++++++++++++++++------------------- |
| A | demo.M1 | | | 72 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | p1_aarch64.M1 | | | 22 | +++++++++++++++++++++- |
| M | p1_amd64.M1 | | | 24 | ++++++++++++++++++++++-- |
| M | p1_riscv64.M1 | | | 19 | +++++++++++++++++-- |
5 files changed, 156 insertions(+), 24 deletions(-)
diff --git a/Makefile b/Makefile
@@ -12,16 +12,19 @@
# binfmt + qemu-user path.
#
# Usage:
-# make image Build the builder image (one-time, idempotent)
-# make Build + assemble for ARCH (default aarch64)
-# make ARCH=amd64 Same, targeting amd64
-# make ARCH=riscv64 Same, targeting riscv64
-# make run Run the assembled hello for $(ARCH) under alpine
-# make run-all Build + run on all three arches
-# make clean Remove build/ artifacts
+# make image Build the builder image (one-time)
+# make Build hello for default ARCH=aarch64
+# make ARCH=amd64 Targeting amd64
+# make PROG=demo Build the broader-ISA demo program
+# make run Run $(PROG) for $(ARCH) under alpine
+# make run-all Build + run $(PROG) on all three arches
+# make PROG=demo run-all Run demo on all three arches
+# make clean Remove build/ artifacts
# --- Configuration ---------------------------------------------------------
+# PROG = basename of the <prog>.M1 source to assemble. hello / demo.
+PROG ?= hello
ARCH ?= aarch64
# Map P1 ARCH -> Linux-platform tag for the runtime container.
@@ -68,7 +71,7 @@ CFLAGS := -D_GNU_SOURCE -std=c99 -ggdb -fno-common -static
.PHONY: all image toolchain run run-all clean
-all: $(OUT_DIR)/hello
+all: $(OUT_DIR)/$(PROG)
image:
podman build -t $(BUILDER_IMAGE) .
@@ -93,28 +96,30 @@ build/hex2: | build
$(TOOLCHAIN_SRC)/M2libc/bootstrappable.c \
-o build/hex2
-$(OUT_DIR)/hello.hex2: hello.M1 p1_$(ARCH).M1 build/M1 | $(OUT_DIR)
+$(OUT_DIR)/$(PROG).hex2: $(PROG).M1 p1_$(ARCH).M1 build/M1 | $(OUT_DIR)
$(PODMAN_RUN_NATIVE) ./build/M1 \
-f p1_$(ARCH).M1 \
- -f hello.M1 \
+ -f $(PROG).M1 \
--little-endian --architecture $(ARCH) \
- -o $(OUT_DIR)/hello.hex2
+ -o $(OUT_DIR)/$(PROG).hex2
-$(OUT_DIR)/hello: $(OUT_DIR)/hello.hex2 ELF-$(ARCH).hex2 build/hex2
+$(OUT_DIR)/$(PROG): $(OUT_DIR)/$(PROG).hex2 ELF-$(ARCH).hex2 build/hex2
$(PODMAN_RUN_NATIVE) ./build/hex2 \
-f ELF-$(ARCH).hex2 \
- -f $(OUT_DIR)/hello.hex2 \
+ -f $(OUT_DIR)/$(PROG).hex2 \
--little-endian --architecture $(ARCH) \
--base-address 0x400000 \
- -o $(OUT_DIR)/hello
+ -o $(OUT_DIR)/$(PROG)
-run: $(OUT_DIR)/hello
- $(PODMAN_RUN_TARGET) ./$(OUT_DIR)/hello
+run: $(OUT_DIR)/$(PROG)
+ $(PODMAN_RUN_TARGET) ./$(OUT_DIR)/$(PROG)
+# `-` prefix: continue past non-zero exit. demo.M1 exits with the computed
+# result (5), which is a legitimate program outcome, not a make failure.
run-all:
- $(MAKE) --no-print-directory ARCH=aarch64 run
- $(MAKE) --no-print-directory ARCH=amd64 run
- $(MAKE) --no-print-directory ARCH=riscv64 run
+ -$(MAKE) --no-print-directory PROG=$(PROG) ARCH=aarch64 run
+ -$(MAKE) --no-print-directory PROG=$(PROG) ARCH=amd64 run
+ -$(MAKE) --no-print-directory PROG=$(PROG) ARCH=riscv64 run
clean:
rm -rf build/
diff --git a/demo.M1 b/demo.M1
@@ -0,0 +1,72 @@
+## P1 broader-ISA demo — portable across aarch64, amd64, riscv64.
+##
+## Computes (3 + 4) - 2 = 5 in registers, prints "P1 = 5\n", then
+## exits with status 5. Exercises LI, ADD, SUB, MOV, and SYSCALL.
+##
+## Run-and-verify:
+## make PROG=demo ARCH=<arch> run && echo "exit=$?"
+## expected stdout: "P1 = 5\n" expected exit: 5
+
+:_start
+ ## Compute result = (3 + 4) - 2 = 5, stash in r6.
+ ## r6 maps to a callee-saved native reg on every arch (x19 / rbx /
+ ## s1), so it survives SYSCALL's argument shuffle.
+ P1_LI_R1
+ '03000000' # r1 = 3
+ P1_LI_R2
+ '04000000' # r2 = 4
+ P1_ADD_R3_R1_R2 # r3 = r1 + r2 (= 7)
+ P1_LI_R4
+ '02000000' # r4 = 2
+ P1_SUB_R3_R3_R4 # r3 = r3 - r4 (= 5)
+ P1_MOV_R6_R3 # r6 = r3 (save across syscalls)
+
+ ## write(1, &prefix, 5) — "P1 = "
+ P1_LI_R0
+ SYS_WRITE
+ P1_LI_R1
+ '01000000'
+ P1_LI_R2
+ &prefix
+ P1_LI_R3
+ '05000000'
+ P1_SYSCALL
+
+ ## write(1, &digits + r6, 1) — the computed digit ('5')
+ P1_LI_R0
+ SYS_WRITE
+ P1_LI_R1
+ '01000000'
+ P1_LI_R2
+ &digits
+ P1_ADD_R2_R2_R6 # r2 = &digits + 5
+ P1_LI_R3
+ '01000000'
+ P1_SYSCALL
+
+ ## write(1, &newline, 1)
+ P1_LI_R0
+ SYS_WRITE
+ P1_LI_R1
+ '01000000'
+ P1_LI_R2
+ &newline
+ P1_LI_R3
+ '01000000'
+ P1_SYSCALL
+
+ ## exit(r6) — exit status = computed result
+ P1_LI_R0
+ SYS_EXIT
+ P1_MOV_R1_R6
+ P1_SYSCALL
+
+:prefix
+"P1 = "
+:digits
+"0123456789"
+:newline
+"
+"
+
+:ELF_end
diff --git a/p1_aarch64.M1 b/p1_aarch64.M1
@@ -1,6 +1,9 @@
## P1 pseudo-ISA — aarch64 backing defs (v0.1 spike)
##
-## Implements the subset needed for the hello-world demo: LI, SYSCALL.
+## Implements the subset needed by hello.M1 and demo.M1: LI, SYSCALL,
+## plus a handful of MOV/ADD/SUB register tuples. The full 1500-entry
+## table described in P1.md is generator-driven; what's here is the
+## spike's hand-written sliver.
## See ../P1.md for the full ISA and register mapping.
##
## Register mapping (P1 → aarch64):
@@ -59,3 +62,20 @@ DEFINE P1_SYSCALL e80300aae00301aae10302aae20303aae30304aae40305aae50313aa010000
## Emitted as 4-byte little-endian immediates, to be consumed by P1_LI_R*.
DEFINE SYS_WRITE 40000000
DEFINE SYS_EXIT 5D000000
+
+
+## ---- MOV / ADD / SUB (demo.M1 subset) ------------------------------------
+## One native insn per op. Named after the P1-register tuple they realize.
+## Add entries here as new programs need them; a generator will replace the
+## hand-maintenance once more than a handful of tuples are in use.
+
+## MOV rD, rA -> orr xD, xzr, xA
+DEFINE P1_MOV_R6_R3 F30303AA ## mov x19, x3
+DEFINE P1_MOV_R1_R6 E10313AA ## mov x1, x19
+
+## ADD rD, rA, rB -> add xD, xA, xB
+DEFINE P1_ADD_R3_R1_R2 2300028B ## add x3, x1, x2
+DEFINE P1_ADD_R2_R2_R6 4200138B ## add x2, x2, x19
+
+## SUB rD, rA, rB -> sub xD, xA, xB
+DEFINE P1_SUB_R3_R3_R4 630004CB ## sub x3, x3, x4
diff --git a/p1_amd64.M1 b/p1_amd64.M1
@@ -1,7 +1,7 @@
## P1 pseudo-ISA — amd64 backing defs (v0.1 spike)
##
-## Implements the subset needed for the hello-world demo: LI, SYSCALL.
-## See ../P1.md for the full ISA and register mapping.
+## Implements the subset needed by hello.M1 and demo.M1: LI, SYSCALL,
+## plus a handful of MOV/ADD/SUB register tuples. See ../P1.md.
##
## Register mapping (P1 → amd64):
## r0 → rax , r1 → rdi , r2 → rsi , r3 → rdx
@@ -49,3 +49,23 @@ DEFINE P1_SYSCALL 4989D90F05
## Emitted as 4-byte little-endian immediates, consumed by P1_LI_R*.
DEFINE SYS_WRITE 01000000
DEFINE SYS_EXIT 3C000000
+
+
+## ---- MOV / ADD / SUB (demo.M1 subset) ------------------------------------
+## amd64 lacks a 3-operand arithmetic form, so every "rD = rA op rB"
+## expands unconditionally to two native insns:
+## mov rD_native, rA_native
+## <op> rD_native, rB_native
+## When rD == rA the leading mov is a no-op write, kept anyway — P1 is
+## deliberately unoptimized (P1.md §"Non-goals").
+
+## MOV rD, rA -> mov rD_native, rA_native (single insn)
+DEFINE P1_MOV_R6_R3 4889D3 ## mov rbx, rdx
+DEFINE P1_MOV_R1_R6 4889DF ## mov rdi, rbx
+
+## ADD rD, rA, rB -> mov rD,rA ; add rD,rB
+DEFINE P1_ADD_R3_R1_R2 4889FA4801F2 ## mov rdx,rdi ; add rdx,rsi
+DEFINE P1_ADD_R2_R2_R6 4889F64801DE ## mov rsi,rsi ; add rsi,rbx
+
+## SUB rD, rA, rB -> mov rD,rA ; sub rD,rB
+DEFINE P1_SUB_R3_R3_R4 4889D24C29D2 ## mov rdx,rdx ; sub rdx,r10
diff --git a/p1_riscv64.M1 b/p1_riscv64.M1
@@ -1,7 +1,7 @@
## P1 pseudo-ISA — riscv64 backing defs (v0.1 spike)
##
-## Implements the subset needed for the hello-world demo: LI, SYSCALL.
-## See ../P1.md for the full ISA and register mapping.
+## Implements the subset needed by hello.M1 and demo.M1: LI, SYSCALL,
+## plus a handful of MOV/ADD/SUB register tuples. See ../P1.md.
##
## Register mapping (P1 → RISC-V):
## r0 → a0 (x10) , r1 → a1 (x11) , r2 → a2 (x12) , r3 → a3 (x13)
@@ -61,3 +61,18 @@ DEFINE P1_SYSCALL 93080500138505009305060013860600930607001387070093070400730000
## Linux syscall numbers (riscv64 uses the generic table — same as aarch64).
DEFINE SYS_WRITE 40000000
DEFINE SYS_EXIT 5D000000
+
+
+## ---- MOV / ADD / SUB (demo.M1 subset) ------------------------------------
+## One native insn per op.
+
+## MOV rD, rA -> addi rD, rA, 0 (the `mv` pseudo)
+DEFINE P1_MOV_R6_R3 93840600 ## mv s1, a3
+DEFINE P1_MOV_R1_R6 93850400 ## mv a1, s1
+
+## ADD rD, rA, rB -> add rD, rA, rB
+DEFINE P1_ADD_R3_R1_R2 B386C500 ## add a3, a1, a2
+DEFINE P1_ADD_R2_R2_R6 33069600 ## add a2, a2, s1
+
+## SUB rD, rA, rB -> sub rD, rA, rB
+DEFINE P1_SUB_R3_R3_R4 B386E640 ## sub a3, a3, a4