boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs

commit b96f45aff0a222fd0290bd77309c8a4c812307e4
parent 2859e2697132d3f328b92a6dcaa646d2de336dd8
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 20 Apr 2026 13:18:43 -0700

Add demo.M1 exercising broader P1 op set across three arches

demo.M1 computes (3+4)-2=5 in registers, prints "P1 = 5\n", exits 5.
Exercises LI + SYSCALL + MOV/ADD/SUB register tuples on aarch64, amd64,
riscv64 from a single source. Uses r6 (x19 / rbx / s1, callee-saved on
every arch) to carry values across syscalls.

Extends each p1_<arch>.M1 with the five tuple DEFINEs demo.M1 needs.
These are still hand-written; a generator replaces them once more
than a handful of tuples are in use.

Makefile: parameterize on PROG (default hello). `make PROG=demo run`
builds and runs just the demo. `run-all` iterates arches with `-`
prefix so make continues past demo's intentional exit 5.

Diffstat:
MMakefile | 43++++++++++++++++++++++++-------------------
Ademo.M1 | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mp1_aarch64.M1 | 22+++++++++++++++++++++-
Mp1_amd64.M1 | 24++++++++++++++++++++++--
Mp1_riscv64.M1 | 19+++++++++++++++++--
5 files changed, 156 insertions(+), 24 deletions(-)

diff --git a/Makefile b/Makefile @@ -12,16 +12,19 @@ # binfmt + qemu-user path. # # Usage: -# make image Build the builder image (one-time, idempotent) -# make Build + assemble for ARCH (default aarch64) -# make ARCH=amd64 Same, targeting amd64 -# make ARCH=riscv64 Same, targeting riscv64 -# make run Run the assembled hello for $(ARCH) under alpine -# make run-all Build + run on all three arches -# make clean Remove build/ artifacts +# make image Build the builder image (one-time) +# make Build hello for default ARCH=aarch64 +# make ARCH=amd64 Targeting amd64 +# make PROG=demo Build the broader-ISA demo program +# make run Run $(PROG) for $(ARCH) under alpine +# make run-all Build + run $(PROG) on all three arches +# make PROG=demo run-all Run demo on all three arches +# make clean Remove build/ artifacts # --- Configuration --------------------------------------------------------- +# PROG = basename of the <prog>.M1 source to assemble. hello / demo. +PROG ?= hello ARCH ?= aarch64 # Map P1 ARCH -> Linux-platform tag for the runtime container. @@ -68,7 +71,7 @@ CFLAGS := -D_GNU_SOURCE -std=c99 -ggdb -fno-common -static .PHONY: all image toolchain run run-all clean -all: $(OUT_DIR)/hello +all: $(OUT_DIR)/$(PROG) image: podman build -t $(BUILDER_IMAGE) . @@ -93,28 +96,30 @@ build/hex2: | build $(TOOLCHAIN_SRC)/M2libc/bootstrappable.c \ -o build/hex2 -$(OUT_DIR)/hello.hex2: hello.M1 p1_$(ARCH).M1 build/M1 | $(OUT_DIR) +$(OUT_DIR)/$(PROG).hex2: $(PROG).M1 p1_$(ARCH).M1 build/M1 | $(OUT_DIR) $(PODMAN_RUN_NATIVE) ./build/M1 \ -f p1_$(ARCH).M1 \ - -f hello.M1 \ + -f $(PROG).M1 \ --little-endian --architecture $(ARCH) \ - -o $(OUT_DIR)/hello.hex2 + -o $(OUT_DIR)/$(PROG).hex2 -$(OUT_DIR)/hello: $(OUT_DIR)/hello.hex2 ELF-$(ARCH).hex2 build/hex2 +$(OUT_DIR)/$(PROG): $(OUT_DIR)/$(PROG).hex2 ELF-$(ARCH).hex2 build/hex2 $(PODMAN_RUN_NATIVE) ./build/hex2 \ -f ELF-$(ARCH).hex2 \ - -f $(OUT_DIR)/hello.hex2 \ + -f $(OUT_DIR)/$(PROG).hex2 \ --little-endian --architecture $(ARCH) \ --base-address 0x400000 \ - -o $(OUT_DIR)/hello + -o $(OUT_DIR)/$(PROG) -run: $(OUT_DIR)/hello - $(PODMAN_RUN_TARGET) ./$(OUT_DIR)/hello +run: $(OUT_DIR)/$(PROG) + $(PODMAN_RUN_TARGET) ./$(OUT_DIR)/$(PROG) +# `-` prefix: continue past non-zero exit. demo.M1 exits with the computed +# result (5), which is a legitimate program outcome, not a make failure. run-all: - $(MAKE) --no-print-directory ARCH=aarch64 run - $(MAKE) --no-print-directory ARCH=amd64 run - $(MAKE) --no-print-directory ARCH=riscv64 run + -$(MAKE) --no-print-directory PROG=$(PROG) ARCH=aarch64 run + -$(MAKE) --no-print-directory PROG=$(PROG) ARCH=amd64 run + -$(MAKE) --no-print-directory PROG=$(PROG) ARCH=riscv64 run clean: rm -rf build/ diff --git a/demo.M1 b/demo.M1 @@ -0,0 +1,72 @@ +## P1 broader-ISA demo — portable across aarch64, amd64, riscv64. +## +## Computes (3 + 4) - 2 = 5 in registers, prints "P1 = 5\n", then +## exits with status 5. Exercises LI, ADD, SUB, MOV, and SYSCALL. +## +## Run-and-verify: +## make PROG=demo ARCH=<arch> run && echo "exit=$?" +## expected stdout: "P1 = 5\n" expected exit: 5 + +:_start + ## Compute result = (3 + 4) - 2 = 5, stash in r6. + ## r6 maps to a callee-saved native reg on every arch (x19 / rbx / + ## s1), so it survives SYSCALL's argument shuffle. + P1_LI_R1 + '03000000' # r1 = 3 + P1_LI_R2 + '04000000' # r2 = 4 + P1_ADD_R3_R1_R2 # r3 = r1 + r2 (= 7) + P1_LI_R4 + '02000000' # r4 = 2 + P1_SUB_R3_R3_R4 # r3 = r3 - r4 (= 5) + P1_MOV_R6_R3 # r6 = r3 (save across syscalls) + + ## write(1, &prefix, 5) — "P1 = " + P1_LI_R0 + SYS_WRITE + P1_LI_R1 + '01000000' + P1_LI_R2 + &prefix + P1_LI_R3 + '05000000' + P1_SYSCALL + + ## write(1, &digits + r6, 1) — the computed digit ('5') + P1_LI_R0 + SYS_WRITE + P1_LI_R1 + '01000000' + P1_LI_R2 + &digits + P1_ADD_R2_R2_R6 # r2 = &digits + 5 + P1_LI_R3 + '01000000' + P1_SYSCALL + + ## write(1, &newline, 1) + P1_LI_R0 + SYS_WRITE + P1_LI_R1 + '01000000' + P1_LI_R2 + &newline + P1_LI_R3 + '01000000' + P1_SYSCALL + + ## exit(r6) — exit status = computed result + P1_LI_R0 + SYS_EXIT + P1_MOV_R1_R6 + P1_SYSCALL + +:prefix +"P1 = " +:digits +"0123456789" +:newline +" +" + +:ELF_end diff --git a/p1_aarch64.M1 b/p1_aarch64.M1 @@ -1,6 +1,9 @@ ## P1 pseudo-ISA — aarch64 backing defs (v0.1 spike) ## -## Implements the subset needed for the hello-world demo: LI, SYSCALL. +## Implements the subset needed by hello.M1 and demo.M1: LI, SYSCALL, +## plus a handful of MOV/ADD/SUB register tuples. The full 1500-entry +## table described in P1.md is generator-driven; what's here is the +## spike's hand-written sliver. ## See ../P1.md for the full ISA and register mapping. ## ## Register mapping (P1 → aarch64): @@ -59,3 +62,20 @@ DEFINE P1_SYSCALL e80300aae00301aae10302aae20303aae30304aae40305aae50313aa010000 ## Emitted as 4-byte little-endian immediates, to be consumed by P1_LI_R*. DEFINE SYS_WRITE 40000000 DEFINE SYS_EXIT 5D000000 + + +## ---- MOV / ADD / SUB (demo.M1 subset) ------------------------------------ +## One native insn per op. Named after the P1-register tuple they realize. +## Add entries here as new programs need them; a generator will replace the +## hand-maintenance once more than a handful of tuples are in use. + +## MOV rD, rA -> orr xD, xzr, xA +DEFINE P1_MOV_R6_R3 F30303AA ## mov x19, x3 +DEFINE P1_MOV_R1_R6 E10313AA ## mov x1, x19 + +## ADD rD, rA, rB -> add xD, xA, xB +DEFINE P1_ADD_R3_R1_R2 2300028B ## add x3, x1, x2 +DEFINE P1_ADD_R2_R2_R6 4200138B ## add x2, x2, x19 + +## SUB rD, rA, rB -> sub xD, xA, xB +DEFINE P1_SUB_R3_R3_R4 630004CB ## sub x3, x3, x4 diff --git a/p1_amd64.M1 b/p1_amd64.M1 @@ -1,7 +1,7 @@ ## P1 pseudo-ISA — amd64 backing defs (v0.1 spike) ## -## Implements the subset needed for the hello-world demo: LI, SYSCALL. -## See ../P1.md for the full ISA and register mapping. +## Implements the subset needed by hello.M1 and demo.M1: LI, SYSCALL, +## plus a handful of MOV/ADD/SUB register tuples. See ../P1.md. ## ## Register mapping (P1 → amd64): ## r0 → rax , r1 → rdi , r2 → rsi , r3 → rdx @@ -49,3 +49,23 @@ DEFINE P1_SYSCALL 4989D90F05 ## Emitted as 4-byte little-endian immediates, consumed by P1_LI_R*. DEFINE SYS_WRITE 01000000 DEFINE SYS_EXIT 3C000000 + + +## ---- MOV / ADD / SUB (demo.M1 subset) ------------------------------------ +## amd64 lacks a 3-operand arithmetic form, so every "rD = rA op rB" +## expands unconditionally to two native insns: +## mov rD_native, rA_native +## <op> rD_native, rB_native +## When rD == rA the leading mov is a no-op write, kept anyway — P1 is +## deliberately unoptimized (P1.md §"Non-goals"). + +## MOV rD, rA -> mov rD_native, rA_native (single insn) +DEFINE P1_MOV_R6_R3 4889D3 ## mov rbx, rdx +DEFINE P1_MOV_R1_R6 4889DF ## mov rdi, rbx + +## ADD rD, rA, rB -> mov rD,rA ; add rD,rB +DEFINE P1_ADD_R3_R1_R2 4889FA4801F2 ## mov rdx,rdi ; add rdx,rsi +DEFINE P1_ADD_R2_R2_R6 4889F64801DE ## mov rsi,rsi ; add rsi,rbx + +## SUB rD, rA, rB -> mov rD,rA ; sub rD,rB +DEFINE P1_SUB_R3_R3_R4 4889D24C29D2 ## mov rdx,rdx ; sub rdx,r10 diff --git a/p1_riscv64.M1 b/p1_riscv64.M1 @@ -1,7 +1,7 @@ ## P1 pseudo-ISA — riscv64 backing defs (v0.1 spike) ## -## Implements the subset needed for the hello-world demo: LI, SYSCALL. -## See ../P1.md for the full ISA and register mapping. +## Implements the subset needed by hello.M1 and demo.M1: LI, SYSCALL, +## plus a handful of MOV/ADD/SUB register tuples. See ../P1.md. ## ## Register mapping (P1 → RISC-V): ## r0 → a0 (x10) , r1 → a1 (x11) , r2 → a2 (x12) , r3 → a3 (x13) @@ -61,3 +61,18 @@ DEFINE P1_SYSCALL 93080500138505009305060013860600930607001387070093070400730000 ## Linux syscall numbers (riscv64 uses the generic table — same as aarch64). DEFINE SYS_WRITE 40000000 DEFINE SYS_EXIT 5D000000 + + +## ---- MOV / ADD / SUB (demo.M1 subset) ------------------------------------ +## One native insn per op. + +## MOV rD, rA -> addi rD, rA, 0 (the `mv` pseudo) +DEFINE P1_MOV_R6_R3 93840600 ## mv s1, a3 +DEFINE P1_MOV_R1_R6 93850400 ## mv a1, s1 + +## ADD rD, rA, rB -> add rD, rA, rB +DEFINE P1_ADD_R3_R1_R2 B386C500 ## add a3, a1, a2 +DEFINE P1_ADD_R2_R2_R6 33069600 ## add a2, a2, s1 + +## SUB rD, rA, rB -> sub rD, rA, rB +DEFINE P1_SUB_R3_R3_R4 B386E640 ## sub a3, a3, a4