Add scheme1 phase-1 interpreter spine and test runner - boot2

commit 925240dd23966009f0b742a6199d87f1661e79fa
parent 6ce02250c5f5982da088c191b4da1ac0ce6a4958
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 25 Apr 2026 03:03:32 -0700

Add scheme1 phase-1 interpreter spine and test runner

Tag-dispatched eval/apply over a bump heap, linear-scan symbol intern
table, and a single sys-exit primitive. Adds a scheme1 Make target,
.P1pp build pipeline support in boot-build-p1pp.sh, and a scheme1
suite in run-tests.sh that diffs stdout and exit code against
.expected / .expected-exit fixtures.

Also bumps the m1pp INPUT/OUTPUT/TEXT caps so combined.M1pp inputs
that include libp1pp + scheme1 fit through the expander.

Diffstat:
M M1pp/M1pp.P1  | 6 +++---
M Makefile  | 44 ++++++++++++++++++++++++++++++++++----------
A scheme1/scheme1.P1pp  | 809 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M scripts/boot-build-p1pp.sh  | 20 ++++++++++++++------
M scripts/run-tests.sh  | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
A tests/scheme1/00-exit.expected-exit  | 1 +
A tests/scheme1/00-exit.scm  | 1 +

7 files changed, 953 insertions(+), 34 deletions(-)
diff --git a/M1pp/M1pp.P1 b/M1pp/M1pp.P1
@@ -27,9 +27,9 @@
 
 ## --- Constants & sizing ------------------------------------------------------
 
-DEFINE M1PP_INPUT_CAP 0000010000000000
-DEFINE M1PP_OUTPUT_CAP 0000010000000000
-DEFINE M1PP_TEXT_CAP 0080000000000000
+DEFINE M1PP_INPUT_CAP 0000040000000000
+DEFINE M1PP_OUTPUT_CAP 0000040000000000
+DEFINE M1PP_TEXT_CAP 0000040000000000
 DEFINE M1PP_TOKENS_END 0000c00000000000
 ## Macro record is 296 bytes: name (16) + param_count (8) + params[16]*16 (256)
 ## + body_start (8) + body_end (8). MACROS_CAP fits 512 records (151552 B).
diff --git a/Makefile b/Makefile
@@ -15,10 +15,12 @@
 #   make m1pp                       build the m1pp expander for ARCH
 #   make pokem                      build pokem for ARCH
 #   make hello                      build hello via the bootstrap chain
+#   make scheme1                    build the scheme1 interpreter for ARCH
 #   make run                        run hello in the container
 #   make test                       every suite, every arch
 #   make test SUITE=m1pp            m1pp suite, every arch
 #   make test SUITE=p1 ARCH=amd64   p1 suite, one arch
+#   make test SUITE=scheme1         scheme1 .scm fixtures, every arch
 #   make image                      build the per-arch container image
 #   make tools                      bootstrap M0/hex2-0/catm for ARCH
 #   make tables                     regen pre-pruned P1/P1-<arch>.M1 tables
@@ -58,7 +60,7 @@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \
 
 # --- Targets --------------------------------------------------------------
 
-.PHONY: all m1pp pokem hello run test image tools tables \
+.PHONY: all m1pp pokem hello scheme1 run test image tools tables \
         tools-native clean help
 
 all: m1pp pokem
@@ -120,20 +122,29 @@ P1/P1-%.M1: build/p1/%.M1 scripts/prune-p1-table.sh $(P1_PRUNE_SRCS)
 
 # --- Programs (per arch) --------------------------------------------------
 
-M1PP_BINS  := $(foreach a,$(ALL_ARCHES),build/$(a)/m1pp)
-POKEM_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/pokem)
-HELLO_SRC  := tests/M1pp/00-hello.M1
-HELLO_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/hello)
+M1PP_BINS    := $(foreach a,$(ALL_ARCHES),build/$(a)/m1pp)
+POKEM_BINS   := $(foreach a,$(ALL_ARCHES),build/$(a)/pokem)
+HELLO_SRC    := tests/M1pp/00-hello.M1
+HELLO_BINS   := $(foreach a,$(ALL_ARCHES),build/$(a)/hello)
+SCHEME1_SRC  := scheme1/scheme1.P1pp
+SCHEME1_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/scheme1)
 
-m1pp:  $(OUT_DIR)/m1pp
-pokem: $(OUT_DIR)/pokem
-hello: $(OUT_DIR)/hello
+m1pp:    $(OUT_DIR)/m1pp
+pokem:   $(OUT_DIR)/pokem
+hello:   $(OUT_DIR)/hello
+scheme1: $(OUT_DIR)/scheme1
 
-# Per-arch deps shared by every .P1/.M1 build.
+# Per-arch deps for .P1/.M1 builds (raw M1, no macro expansion).
 P1_BUILD_DEPS = scripts/lint.sh scripts/boot-build-p1.sh \
                 build/%/.image build/%/tools/M0 \
                 vendor/seed/%/ELF.hex2 P1/P1-%.M1
 
+# Per-arch deps for .P1pp builds (m1pp expansion + libp1pp).
+P1PP_BUILD_DEPS = scripts/boot-build-p1pp.sh \
+                  build/%/.image build/%/tools/M0 build/%/m1pp \
+                  vendor/seed/%/ELF.hex2 \
+                  P1/P1-%.M1pp P1/P1.M1pp P1/P1pp.P1pp
+
 $(M1PP_BINS): build/%/m1pp: M1pp/M1pp.P1 $(P1_BUILD_DEPS)
 	ARCH=$* sh scripts/lint.sh M1pp/M1pp.P1
 	$(call PODMAN,$*) sh scripts/boot-build-p1.sh M1pp/M1pp.P1 $@
@@ -146,6 +157,9 @@ $(HELLO_BINS): build/%/hello: $(HELLO_SRC) $(P1_BUILD_DEPS)
 	ARCH=$* sh scripts/lint.sh $(HELLO_SRC)
 	$(call PODMAN,$*) sh scripts/boot-build-p1.sh $(HELLO_SRC) $@
 
+$(SCHEME1_BINS): build/%/scheme1: $(SCHEME1_SRC) $(P1PP_BUILD_DEPS)
+	$(call PODMAN,$*) sh scripts/boot-build-p1pp.sh $(SCHEME1_SRC) $@
+
 run: $(OUT_DIR)/hello $(IMAGE_STAMP)
 	$(call PODMAN,$(ARCH)) ./$(OUT_DIR)/hello
 
@@ -190,16 +204,26 @@ TEST_M1PP_DEPS := $(foreach a,$(TEST_ARCHES), \
 TEST_P1_DEPS := $(foreach a,$(TEST_ARCHES), \
     build/$(a)/.image build/$(a)/tools/M0 build/$(a)/m1pp)
 
+# scheme1 suite per-arch deps: image, tools, expander, scheme1 binary.
+# (run-tests.sh runs the pre-built binary against each .scm fixture; it
+# does not rebuild the interpreter per fixture.)
+TEST_SCHEME1_DEPS := $(foreach a,$(TEST_ARCHES), \
+    build/$(a)/.image build/$(a)/tools/M0 build/$(a)/m1pp build/$(a)/scheme1)
+
 test:
 ifeq ($(SUITE),)
 	@$(MAKE) --no-print-directory test SUITE=m1pp
 	@$(MAKE) --no-print-directory test SUITE=p1
+	@$(MAKE) --no-print-directory test SUITE=scheme1
 else ifeq ($(SUITE),m1pp)
 	@$(MAKE) --no-print-directory $(TEST_M1PP_DEPS)
 	sh scripts/run-tests.sh --suite=m1pp $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER))
 else ifeq ($(SUITE),p1)
 	@$(MAKE) --no-print-directory $(TEST_P1_DEPS)
 	sh scripts/run-tests.sh --suite=p1 $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER))
+else ifeq ($(SUITE),scheme1)
+	@$(MAKE) --no-print-directory $(TEST_SCHEME1_DEPS)
+	sh scripts/run-tests.sh --suite=scheme1 $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER))
 else
-	@echo "unknown SUITE='$(SUITE)' (expected m1pp | p1)" >&2; exit 2
+	@echo "unknown SUITE='$(SUITE)' (expected m1pp | p1 | scheme1)" >&2; exit 2
 endif
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -0,0 +1,809 @@
+# scheme1.P1pp -- Phase 1 minimal Scheme interpreter on P1.
+#
+# The full target is described in docs/LISP-C.md and docs/LISP.md. This
+# file is the spine: enough infrastructure to read a single source file,
+# parse one s-expression, and evaluate it via tag-dispatched eval/apply
+# with a single primitive (`sys-exit`). Every later piece (more
+# primitives, special forms, closures, pmatch, records, prelude, repl
+# loop) hooks onto these anchors without restructuring.
+#
+# What's wired up now:
+#   - Tag layout per LISP-C.md: fixnums, pairs, symbols, headered objects,
+#     immediate singletons. mkimm composes IMM constants at M1pp time.
+#   - Bump heap allocator (cons + alloc_hdr) over a BSS-past-ELF_end arena.
+#   - Linear-scan symbol intern table; entries store (name_ptr, name_len,
+#     global_val) borrowed directly from readbuf.
+#   - Reader: '(', ')', fixnums (decimal, optional leading `-`), bare
+#     symbols. No strings/hex/chars/quote/dotted yet, no `;` comments.
+#   - eval: tag dispatch -> self-eval / symbol lookup / pair application.
+#   - apply: HDR.PRIM dispatch only; closures/specials come later.
+#   - One primitive: sys-exit. Exits with the raw fixnum.
+#
+# Build chain (P1pp.P1pp = libp1pp must be in the catm sequence):
+#   catm P1-<arch>.M1pp P1.M1pp P1pp.P1pp scheme1/scheme1.P1pp \
+#     | m1pp -> M0 -> hex2 -> ELF
+#
+# Memory model: the ELF's ph_memsz is 8 MB (boot2 default), so all
+# zero-initialized arenas live past :ELF_end and cost zero file bytes.
+# p1_main writes their absolute addresses into pointer slots once at
+# startup; every later access goes through one extra load.
+
+# =========================================================================
+# Constants
+# =========================================================================
+
+%enum TAG { FIXNUM PAIR SYM HEAP IMM }
+%enum IMM { FALSE TRUE NIL UNSPEC UNBOUND }
+%enum HDR { BV CLOSURE PRIM TD REC }
+
+# imm_val(idx) -> integer-expression for the tagged immediate at IMM index
+# `idx`. Used both at %li sites (loaded into a register) and at $() emission
+# sites (baked into a static word).
+%macro imm_val(idx)
+(| (<< idx 3) %TAG.IMM)
+%endm
+
+# Layout helpers. %struct stride is 8 bytes per field.
+%struct PAIR    { car cdr }                         # .SIZE = 16
+%struct SYMENT  { name_ptr name_len global_val pad } # .SIZE = 32
+%struct PRIM    { hdr entry_w }                      # .SIZE = 16
+
+# BSS sizing. Offsets are bytes from :ELF_end.
+DEFINE OFF_readbuf      0000000000000000
+DEFINE OFF_heap         0000010000000000
+DEFINE OFF_symtab       0000020000000000
+DEFINE READBUF_CAP      0000010000000000
+DEFINE HEAP_CAP         0000010000000000
+DEFINE SYMTAB_CAP_BYTES 0000010000000000
+
+# Number of symtab slots. Used as a runtime guard in intern. Must match
+# SYMTAB_CAP_BYTES / SYMENT.SIZE.
+%macro SYMTAB_CAP_SLOTS()
+1024
+%endm
+
+# Same as M1PP_INPUT_CAP, exposed for %li in load_source.
+%macro READBUF_CAP_BYTES()
+65536
+%endm
+
+# =========================================================================
+# Tag idioms
+# =========================================================================
+
+%macro tagof(rd, rs)
+%andi(rd, rs, 7)
+%endm
+
+%macro mkfix(rd, rs)
+%shli(rd, rs, 3)
+%endm
+
+%macro untag_fix(rd, rs)
+%sari(rd, rs, 3)
+%endm
+
+%macro untag_sym(rd, rs)
+%sari(rd, rs, 3)
+%endm
+
+%macro car(rd, rs)
+%ld(rd, rs, -1)
+%endm
+
+%macro cdr(rd, rs)
+%ld(rd, rs, 7)
+%endm
+
+%macro hdr_type(rd, rs)
+%lb(rd, rs, -3)
+%endm
+
+# =========================================================================
+# p1_main -- runtime spine
+# =========================================================================
+#
+# Frame layout (16 bytes):
+#   +0  saved argv
+
+%fn(p1_main, 16, {
+    %st(a1, sp, 0)
+
+    %li(t0, 2)
+    %bltu(a0, t0, &::usage)
+
+    # Initialize BSS pointer slots from ELF_end + OFF_*. Same idiom as
+    # M1pp.P1: a tiny init table walked once.
+    %la(t0, &ELF_end)
+    %la(t1, &bss_init_tbl)
+    %la(t2, &bss_init_tbl_end)
+    ::bss_loop
+    %beq(t1, t2, &::bss_done)
+    %ld(a0, t1, 0)
+    %ld(a2, t1, 8)
+    %add(a2, a2, t0)
+    %st(a2, a0, 0)
+    %addi(t1, t1, 16)
+    %b(&::bss_loop)
+    ::bss_done
+
+    # heap_next = &heap_buf, rounded up to 8-byte alignment. The BSS arena
+    # starts at &ELF_end + OFF_heap, but &ELF_end's alignment depends on
+    # the data section above it; cons assumes 8-byte-aligned heap_next so
+    # every pair pointer's low 3 bits are exactly the PAIR tag.
+    %la(t0, &heap_buf_ptr)
+    %ld(t0, t0, 0)
+    %addi(t0, t0, 7)
+    %li(t1, -8)
+    %and(t0, t0, t1)
+    %la(t1, &heap_next)
+    %st(t0, t1, 0)
+
+    # Bind built-in primitives.
+    %call(&register_primitives)
+
+    # argv[1] is the source path (NUL-terminated cstr from the kernel).
+    %ld(a1, sp, 0)
+    %ld(a0, a1, 8)
+    %call(&load_source)
+
+    # parse_one returns one s-expression in a0.
+    %call(&parse_one)
+
+    # eval(expr, NIL).
+    %li(a1, %imm_val(%IMM.NIL))
+    %call(&eval)
+
+    # If the form did not sys-exit (e.g. evaluated to a fixnum), drop
+    # cleanly with status 0. Useful while wiring up more of the runtime.
+    %li(a0, 0)
+    %eret
+
+    ::usage
+    %la(a0, &msg_usage)
+    %call(&print_cstr)
+    %li(a0, 2)
+})
+
+# =========================================================================
+# Source loading -- argv[1] -> readbuf, length stored in readbuf_len
+# =========================================================================
+#
+# Frame: 8 bytes
+#   +0  no slots; libp1pp's read_file does open+read+close in one call.
+
+%fn(load_source, 0, {
+    %la(t0, &readbuf_buf_ptr)
+    %ld(a1, t0, 0)
+    %li(a2, %READBUF_CAP_BYTES)
+    %call(&read_file)
+    %bltz(a0, &::fail)
+
+    %la(t0, &readbuf_len)
+    %st(a0, t0, 0)
+    %li(a0, 0)
+    %eret
+
+    ::fail
+    %la(a0, &msg_load_fail)
+    %call(&print_cstr)
+    %li(a0, 3)
+    %call(&sys_exit)
+    ::spin
+    %b(&::spin)
+})
+
+# =========================================================================
+# Heap: cons (leaf) and alloc_hdr (leaf)
+# =========================================================================
+#
+# Both are call-free leaves: bump heap_next, write fields, return tagged
+# pointer. Heap exhaustion crashes via a deliberate read past the arena
+# end -- runtime check arrives when error reporting does (LISP-C.md
+# milestone 3).
+
+# cons(car=a0, cdr=a1) -> tagged pair (a0)
+:cons
+    %la(t2, &heap_next)
+    %ld(t0, t2, 0)
+    %st(a0, t0, 0)
+    %st(a1, t0, 8)
+    %addi(t1, t0, 16)
+    %st(t1, t2, 0)
+    %addi(a0, t0, 1)
+    %ret
+
+# alloc_hdr(bytes=a0, hdr_word=a1) -> tagged heap obj (a0)
+# Rounds bytes up to a multiple of 8 and writes hdr_word at offset 0.
+:alloc_hdr
+    %addi(a0, a0, 7)
+    %li(t0, -8)
+    %and(a0, a0, t0)
+    %la(t2, &heap_next)
+    %ld(t0, t2, 0)
+    %add(t1, t0, a0)
+    %st(t1, t2, 0)
+    %st(a1, t0, 0)
+    %addi(a0, t0, 3)
+    %ret
+
+# =========================================================================
+# Symbol intern -- linear scan, append on miss
+# =========================================================================
+#
+# Frame: 32 bytes
+#   +0  name_ptr      (input)
+#   +8  name_len      (input)
+#   +16 idx           (loop counter / found index)
+#   +24 entry_ptr     (spilled across memcmp)
+
+%fn(intern, 32, {
+    %st(a0, sp, 0)
+    %st(a1, sp, 8)
+
+    %li(t0, 0)
+    %st(t0, sp, 16)
+
+    ::scan
+    # idx >= count? -> append
+    %ld(t0, sp, 16)
+    %la(t1, &symtab_count)
+    %ld(t1, t1, 0)
+    %bltu(t0, t1, &::probe)
+    %b(&::append)
+
+    ::probe
+    # entry_ptr = symtab_buf + idx * SYMENT.SIZE
+    %la(t1, &symtab_buf_ptr)
+    %ld(t1, t1, 0)
+    %shli(t2, t0, 5)
+    %add(t1, t1, t2)
+    %st(t1, sp, 24)
+
+    # entry.name_len == name_len ?
+    %ld(t2, t1, 8)
+    %ld(a2, sp, 8)
+    %bne(t2, a2, &::next)
+
+    # memcmp(entry.name_ptr, name_ptr, len)
+    %ld(a0, t1, 0)
+    %ld(a1, sp, 0)
+    %ld(a2, sp, 8)
+    %call(&memcmp)
+    %beqz(a0, &::found)
+
+    ::next
+    %ld(t0, sp, 16)
+    %addi(t0, t0, 1)
+    %st(t0, sp, 16)
+    %b(&::scan)
+
+    ::append
+    # Bounds check; on overflow exit 5 with a message.
+    %ld(t0, sp, 16)
+    %li(t1, %SYMTAB_CAP_SLOTS)
+    %bltu(t0, t1, &::append_ok)
+    %la(a0, &msg_symtab_full)
+    %call(&print_cstr)
+    %li(a0, 5)
+    %call(&sys_exit)
+
+    ::append_ok
+    %la(t1, &symtab_buf_ptr)
+    %ld(t1, t1, 0)
+    %shli(t2, t0, 5)
+    %add(t1, t1, t2)
+    %ld(a0, sp, 0)
+    %st(a0, t1, 0)
+    %ld(a0, sp, 8)
+    %st(a0, t1, 8)
+    %li(a0, %imm_val(%IMM.UNBOUND))
+    %st(a0, t1, 16)
+    %li(a0, 0)
+    %st(a0, t1, 24)
+
+    # symtab_count = idx + 1
+    %addi(a0, t0, 1)
+    %la(t2, &symtab_count)
+    %st(a0, t2, 0)
+
+    # fall through with idx in t0 = sp[16]
+
+    ::found
+    %ld(t0, sp, 16)
+    %shli(a0, t0, 3)
+    %ori(a0, a0, %TAG.SYM)
+})
+
+# Lookup by sym_idx (untagged, in a0). Returns symtab[idx].global_val in a0.
+# Leaf.
+:sym_global
+    %la(t0, &symtab_buf_ptr)
+    %ld(t0, t0, 0)
+    %shli(t1, a0, 5)
+    %add(t0, t0, t1)
+    %ld(a0, t0, 16)
+    %ret
+
+# sym_set_global(idx=a0, val=a1). Leaf.
+:sym_set_global
+    %la(t0, &symtab_buf_ptr)
+    %ld(t0, t0, 0)
+    %shli(t1, a0, 5)
+    %add(t0, t0, t1)
+    %st(a1, t0, 16)
+    %ret
+
+# =========================================================================
+# Reader -- parse_one over readbuf with a single byte cursor
+# =========================================================================
+#
+# Cursor lives in &readbuf_pos; readbuf_len holds the slurped byte count.
+# The reader is called recursively from parse_list, so every state goes
+# through frame slots, not s-registers.
+
+# Skip whitespace (ASCII 32, 9, 10, 13). Leaf.
+:skip_ws
+%scope skip_ws
+    %la(t2, &readbuf_pos)
+    %ld(t0, t2, 0)
+    %la(t1, &readbuf_len)
+    %ld(t1, t1, 0)
+    ::loop
+    %beq(t0, t1, &::done)
+    %la(a0, &readbuf_buf_ptr)
+    %ld(a0, a0, 0)
+    %add(a0, a0, t0)
+    %lb(a0, a0, 0)
+    %addi(a1, a0, -32)
+    %beqz(a1, &::step)
+    %addi(a1, a0, -9)
+    %beqz(a1, &::step)
+    %addi(a1, a0, -10)
+    %beqz(a1, &::step)
+    %addi(a1, a0, -13)
+    %beqz(a1, &::step)
+    %b(&::done)
+    ::step
+    %addi(t0, t0, 1)
+    %b(&::loop)
+    ::done
+    %st(t0, t2, 0)
+    %ret
+%endscope
+
+# parse_one() -> tagged value in a0
+%fn(parse_one, 16, {
+    %call(&skip_ws)
+
+    %la(t0, &readbuf_pos)
+    %ld(t0, t0, 0)
+    %la(t1, &readbuf_len)
+    %ld(t1, t1, 0)
+    %beq(t0, t1, &::eof)
+
+    %la(a0, &readbuf_buf_ptr)
+    %ld(a0, a0, 0)
+    %add(a0, a0, t0)
+    %lb(a0, a0, 0)
+
+    %addi(a1, a0, -40)
+    %beqz(a1, &::lparen)
+    %addi(a1, a0, -41)
+    %beqz(a1, &::rparen)
+
+    %call(&parse_atom)
+    %eret
+
+    ::lparen
+    # Consume '(' and read items until ')'.
+    %la(t0, &readbuf_pos)
+    %ld(t1, t0, 0)
+    %addi(t1, t1, 1)
+    %st(t1, t0, 0)
+    %call(&parse_list)
+    %eret
+
+    ::rparen
+    %la(a0, &msg_unexp_rparen)
+    %call(&print_cstr)
+    %li(a0, 6)
+    %call(&sys_exit)
+
+    ::eof
+    %la(a0, &msg_unexp_eof)
+    %call(&print_cstr)
+    %li(a0, 6)
+    %call(&sys_exit)
+    ::spin
+    %b(&::spin)
+})
+
+# parse_list() -> tagged list value in a0. Cursor sits past '(' on entry;
+# returns once ')' is consumed.
+#
+# Frame: 16 bytes
+#   +0  head (NIL until first item)
+#   +8  tail (most recent cons; set-cdr! target)
+%fn(parse_list, 16, {
+    %li(t0, %imm_val(%IMM.NIL))
+    %st(t0, sp, 0)
+    %st(t0, sp, 8)
+
+    ::loop
+    %call(&skip_ws)
+    %la(t0, &readbuf_pos)
+    %ld(t0, t0, 0)
+    %la(t1, &readbuf_len)
+    %ld(t1, t1, 0)
+    %beq(t0, t1, &::eof)
+
+    %la(a0, &readbuf_buf_ptr)
+    %ld(a0, a0, 0)
+    %add(a0, a0, t0)
+    %lb(a0, a0, 0)
+    %addi(a1, a0, -41)
+    %beqz(a1, &::close)
+
+    # Not ')': parse one item, append.
+    %call(&parse_one)
+    %li(a1, %imm_val(%IMM.NIL))
+    %call(&cons)
+
+    # If head is NIL, both head and tail = new cons; else set-cdr! tail = new.
+    %ld(t0, sp, 0)
+    %li(t1, %imm_val(%IMM.NIL))
+    %bne(t0, t1, &::link)
+    %st(a0, sp, 0)
+    %st(a0, sp, 8)
+    %b(&::loop)
+
+    ::link
+    %ld(t0, sp, 8)
+    # set-cdr! tail = a0  -> store a0 at [tail + 7] (raw + 8)
+    %st(a0, t0, 7)
+    %st(a0, sp, 8)
+    %b(&::loop)
+
+    ::close
+    # Consume ')' and return head.
+    %la(t0, &readbuf_pos)
+    %ld(t1, t0, 0)
+    %addi(t1, t1, 1)
+    %st(t1, t0, 0)
+    %ld(a0, sp, 0)
+    %eret
+
+    ::eof
+    %la(a0, &msg_unterm_list)
+    %call(&print_cstr)
+    %li(a0, 6)
+    %call(&sys_exit)
+    ::spin
+    %b(&::spin)
+})
+
+# parse_atom() -> tagged value (fixnum or symbol) in a0.
+# Reads until whitespace or paren or EOF, then dispatches by first byte.
+#
+# Frame: 16 bytes
+#   +0  start cursor (byte offset)
+#   +8  end cursor   (byte offset)
+%fn(parse_atom, 16, {
+    %la(t0, &readbuf_pos)
+    %ld(t1, t0, 0)
+    %st(t1, sp, 0)
+
+    %la(t2, &readbuf_len)
+    %ld(t2, t2, 0)
+
+    ::scan
+    %beq(t1, t2, &::end)
+    %la(a0, &readbuf_buf_ptr)
+    %ld(a0, a0, 0)
+    %add(a0, a0, t1)
+    %lb(a0, a0, 0)
+
+    # whitespace?
+    %addi(a1, a0, -32)
+    %beqz(a1, &::end)
+    %addi(a1, a0, -9)
+    %beqz(a1, &::end)
+    %addi(a1, a0, -10)
+    %beqz(a1, &::end)
+    %addi(a1, a0, -13)
+    %beqz(a1, &::end)
+    # paren?
+    %addi(a1, a0, -40)
+    %beqz(a1, &::end)
+    %addi(a1, a0, -41)
+    %beqz(a1, &::end)
+
+    %addi(t1, t1, 1)
+    %b(&::scan)
+
+    ::end
+    %st(t1, sp, 8)
+    %la(t0, &readbuf_pos)
+    %st(t1, t0, 0)
+
+    # Dispatch on the first byte.
+    %ld(t0, sp, 0)
+    %la(a0, &readbuf_buf_ptr)
+    %ld(a0, a0, 0)
+    %add(a0, a0, t0)
+    %lb(t1, a0, 0)
+
+    # '0'..'9' -> int
+    %addi(a1, t1, -48)
+    %li(a2, 10)
+    %bltu(a1, a2, &::is_int)
+    # '-' followed by digit -> int
+    %addi(a1, t1, -45)
+    %bnez(a1, &::is_sym)
+    # there must be at least one more byte for it to be a number
+    %ld(t2, sp, 8)
+    %addi(t0, t0, 1)
+    %beq(t0, t2, &::is_sym)
+    %addi(a0, a0, 1)
+    %lb(a0, a0, 0)
+    %addi(a1, a0, -48)
+    %bltu(a1, a2, &::is_int)
+    # fall through to is_sym
+
+    ::is_sym
+    %ld(a0, sp, 0)
+    %la(t0, &readbuf_buf_ptr)
+    %ld(t0, t0, 0)
+    %add(a0, t0, a0)
+    %ld(t1, sp, 8)
+    %ld(t2, sp, 0)
+    %sub(a1, t1, t2)
+    %call(&intern)
+    %eret
+
+    ::is_int
+    %ld(a0, sp, 0)
+    %ld(a1, sp, 8)
+    %call(&parse_int)
+})
+
+# parse_int(start_off=a0, end_off=a1) -> tagged fixnum in a0. Leaf.
+:parse_int
+%scope parse_int
+    %la(t0, &readbuf_buf_ptr)
+    %ld(t0, t0, 0)
+    %add(t1, t0, a1)        ; t1 = end pointer = base + end_off
+    %add(t0, t0, a0)        ; t0 = start pointer = base + start_off
+
+    %li(a2, 0)              ; a2 = "is negative" flag (0 = positive)
+    %lb(a3, t0, 0)
+    %addi(a3, a3, -45)
+    %bnez(a3, &::loop)
+    %li(a2, 1)
+    %addi(t0, t0, 1)
+
+    ::loop
+    %li(a0, 0)
+    %li(t2, 10)
+    ::step
+    %beq(t0, t1, &::done)
+    %lb(a3, t0, 0)
+    %addi(a3, a3, -48)
+    %mul(a0, a0, t2)
+    %add(a0, a0, a3)
+    %addi(t0, t0, 1)
+    %b(&::step)
+
+    ::done
+    %beqz(a2, &::tag)
+    %li(t2, 0)
+    %sub(a0, t2, a0)
+    ::tag
+    %shli(a0, a0, 3)
+    %ret
+%endscope
+
+# =========================================================================
+# eval / apply
+# =========================================================================
+#
+# eval is the only place that touches tag bits at runtime; the table
+# below is a flat compare cascade for now (5 tags). When special-form
+# dispatch is wired up the SYM/PAIR paths split further per LISP-C.md
+# §Eval.
+
+# eval(expr=a0, env=a1) -> value (a0)
+#
+# Frame: 32 bytes
+#   +0  expr
+#   +8  env
+#   +16 fn (head value, while args are being evaluated)
+%fn(eval, 32, {
+    %st(a0, sp, 0)
+    %st(a1, sp, 8)
+
+    %andi(t0, a0, 7)
+    %li(t1, %TAG.PAIR)
+    %beq(t0, t1, &::pair)
+    %li(t1, %TAG.SYM)
+    %beq(t0, t1, &::sym)
+    # FIXNUM, HEAP, IMM all self-evaluate.
+    %eret
+
+    ::sym
+    %sari(a0, a0, 3)
+    %call(&sym_global)
+    %li(t0, %imm_val(%IMM.UNBOUND))
+    %beq(a0, t0, &::unbound)
+    %eret
+
+    ::unbound
+    %la(a0, &msg_unbound)
+    %call(&print_cstr)
+    %li(a0, 7)
+    %call(&sys_exit)
+
+    ::pair
+    # head = eval(car(expr), env)
+    %ld(a0, sp, 0)
+    %ld(a0, a0, -1)
+    %ld(a1, sp, 8)
+    %call(&eval)
+    %st(a0, sp, 16)
+
+    # args = eval_args(cdr(expr), env)
+    %ld(a0, sp, 0)
+    %ld(a0, a0, 7)
+    %ld(a1, sp, 8)
+    %call(&eval_args)
+
+    # apply(fn, args)
+    %mov(a1, a0)
+    %ld(a0, sp, 16)
+    %call(&apply)
+})
+
+# eval_args(args=a0, env=a1) -> evaluated args list (cons-built).
+# Recursion depth = arg count, so very long arg lists could blow the
+# stack. Iterative tail-build is a future tightening.
+%fn(eval_args, 24, {
+    %li(t0, %imm_val(%IMM.NIL))
+    %beq(a0, t0, &::nil)
+
+    %st(a0, sp, 0)
+    %st(a1, sp, 8)
+
+    # val = eval(car(args), env)
+    %ld(a0, a0, -1)
+    %call(&eval)
+    %st(a0, sp, 16)
+
+    # rest = eval_args(cdr(args), env)
+    %ld(a0, sp, 0)
+    %ld(a0, a0, 7)
+    %ld(a1, sp, 8)
+    %call(&eval_args)
+
+    # cons(val, rest)
+    %mov(a1, a0)
+    %ld(a0, sp, 16)
+    %call(&cons)
+    %eret
+
+    ::nil
+    %li(a0, %imm_val(%IMM.NIL))
+})
+
+# apply(fn=a0, args=a1) -> result (a0)
+#
+# Frame: 16 bytes
+#   +0  args
+%fn(apply, 16, {
+    %st(a1, sp, 0)
+
+    # Only HEAP-tagged values can be applicable. PRIM is the only header
+    # type wired up here.
+    %andi(t0, a0, 7)
+    %li(t1, %TAG.HEAP)
+    %bne(t0, t1, &::not_proc)
+
+    %lb(t0, a0, -3)
+    %li(t1, %HDR.PRIM)
+    %beq(t0, t1, &::prim)
+
+    ::not_proc
+    %la(a0, &msg_not_proc)
+    %call(&print_cstr)
+    %li(a0, 8)
+    %call(&sys_exit)
+
+    ::prim
+    %ld(t0, a0, 5)         ; t0 = entry word (offset = -3 + 8)
+    %ld(a0, sp, 0)         ; args list -> a0
+    %callr(t0)
+})
+
+# =========================================================================
+# Primitives
+# =========================================================================
+#
+# Each primitive sits behind a 16-byte heap object literal in the data
+# section: [hdr_word, entry_word]. The tagged value is &obj + 3.
+# register_primitives interns the surface name and writes the tagged
+# pointer into the symbol's global slot.
+
+%fn(register_primitives, 0, {
+    %la(a0, &name_sys_exit)
+    %li(a1, 8)
+    %call(&intern)
+    %sari(a0, a0, 3)            ; idx
+    %la(a1, &prim_sys_exit)
+    %addi(a1, a1, 3)            ; tag HEAP
+    %call(&sym_set_global)
+    # No stack frame needed -- pure register code with one nested call.
+})
+
+# prim_sys_exit_entry(args=a0). Args is a one-element list whose car is
+# the exit code as a tagged fixnum. Untag and tail-jump to libp1pp's
+# sys_exit (a %b, not a %call -- this is a leaf with no frame, and
+# sys_exit doesn't return anyway).
+:prim_sys_exit_entry
+    %ld(a0, a0, -1)         ; car = fixnum
+    %sari(a0, a0, 3)
+    %b(&sys_exit)
+
+# =========================================================================
+# Read-only data
+# =========================================================================
+
+# Primitive object literals (16 bytes each).
+:prim_sys_exit
+$(%HDR.PRIM) &prim_sys_exit_entry %(0)
+
+# Surface names. Length is hard-coded at the call site; no NUL needed
+# because intern takes (ptr, len). Aligned padding via "\0" bytes is
+# fine -- M0 emits ASCII verbatim.
+:name_sys_exit "sys-exit"
+
+:msg_usage          "scheme1: usage: scheme1 SOURCE.scm" '0a' '00'
+:msg_load_fail      "scheme1: failed to read source" '0a' '00'
+:msg_symtab_full    "scheme1: symbol table full" '0a' '00'
+:msg_unexp_rparen   "scheme1: unexpected ')'" '0a' '00'
+:msg_unexp_eof      "scheme1: unexpected EOF in form" '0a' '00'
+:msg_unterm_list    "scheme1: unterminated list" '0a' '00'
+:msg_unbound        "scheme1: unbound variable" '0a' '00'
+:msg_not_proc       "scheme1: not a procedure" '0a' '00'
+
+# =========================================================================
+# BSS pointer-init table
+# =========================================================================
+#
+# Each entry: 8-byte slot pointer (4-byte label ref + 4 bytes pad) +
+# 8-byte offset constant. p1_main walks this once at startup.
+:bss_init_tbl
+&readbuf_buf_ptr %(0) OFF_readbuf
+&heap_buf_ptr    %(0) OFF_heap
+&symtab_buf_ptr  %(0) OFF_symtab
+:bss_init_tbl_end
+
+# =========================================================================
+# Scalar BSS (file-resident, zero-initialized)
+# =========================================================================
+
+# heap_next: bump pointer; written once by p1_main, then by cons/alloc_hdr.
+:heap_next        $(0)
+
+# Source-buffer cursor and slurped length.
+:readbuf_pos      $(0)
+:readbuf_len      $(0)
+
+# Symbol table count (number of entries used).
+:symtab_count     $(0)
+
+# Pointer slots for the past-:ELF_end arenas.
+:readbuf_buf_ptr  $(0)
+:heap_buf_ptr     $(0)
+:symtab_buf_ptr   $(0)
+
+:ELF_end
diff --git a/scripts/boot-build-p1pp.sh b/scripts/boot-build-p1pp.sh
@@ -6,11 +6,18 @@
 ## ELF binary (build/$ARCH/m1pp, built by boot2.sh / boot-build-p1.sh).
 ##
 ## Pipeline:
-##   cat <P1/P1-$ARCH.M1pp> <P1/P1.M1pp> <src> -> /tmp/combined.M1pp
-##   m1pp /tmp/combined.M1pp                    -> /tmp/expanded.M1
-##   M0   /tmp/expanded.M1                      -> /tmp/prog.hex2
-##   catm /tmp/elf.hex2 /tmp/prog.hex2          -> /tmp/linked.hex2
-##   hex2-0 /tmp/linked.hex2                    -> $OUT
+##   cat <P1-$ARCH.M1pp> <P1.M1pp> <P1pp.P1pp> <src> -> /tmp/combined.M1pp
+##   m1pp /tmp/combined.M1pp                          -> /tmp/expanded.M1
+##   M0   /tmp/expanded.M1                            -> /tmp/prog.hex2
+##   catm /tmp/elf.hex2 /tmp/prog.hex2                -> /tmp/linked.hex2
+##   hex2-0 /tmp/linked.hex2                          -> $OUT
+##
+## libp1pp (P1/P1pp.P1pp) is concatenated unconditionally so portable
+## sources can use %fn, the control-flow macros, and libp1pp routines
+## (sys_*, print*, parse_*, fmt_*, memcpy/memcmp, bump allocator, panic,
+## %assert_*) without per-program plumbing. M0 has no link-time DCE, so
+## programs that don't reference any libp1pp routine still pay a fixed
+## code-size tax (~a few KB).
 ##
 ## Env: ARCH=aarch64|amd64|riscv64
 ## Usage: boot-build-p1pp.sh <src> <out>
@@ -25,6 +32,7 @@ OUT=$2
 
 BACKEND=P1/P1-$ARCH.M1pp
 FRONTEND=P1/P1.M1pp
+LIBP1PP=P1/P1pp.P1pp
 ELF_HDR=vendor/seed/$ARCH/ELF.hex2
 TOOLS=build/$ARCH/tools
 M1PP_BIN=build/$ARCH/m1pp
@@ -32,7 +40,7 @@ NAME=$(basename "$SRC" .P1pp)
 WORK=build/$ARCH/.work/$NAME
 mkdir -p "$WORK" "$(dirname "$OUT")"
 
-cat "$BACKEND" "$FRONTEND" "$SRC" > /tmp/combined.M1pp
+cat "$BACKEND" "$FRONTEND" "$LIBP1PP" "$SRC" > /tmp/combined.M1pp
 "$M1PP_BIN" /tmp/combined.M1pp /tmp/expanded.M1
 "$TOOLS/M0" /tmp/expanded.M1 /tmp/prog.hex2
 
diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
-## run-tests.sh — unified test runner for the m1pp and p1 fixture suites.
+## run-tests.sh — unified test runner for the m1pp, p1, and scheme1 suites.
 ##
 ## Each suite is a directory of `<name>.<ext>` fixtures with sibling
 ## `<name>.expected` files. The runner builds each fixture, runs it inside
@@ -8,19 +8,24 @@
 ## (parked, ad-hoc debugging).
 ##
 ## Suites:
-##   m1pp   tests/M1pp/<name>.M1     — P1 program built via build-p1.sh
-##                                     for each requested arch, run in
-##                                     container, stdout diffed.
-##          tests/M1pp/<name>.M1pp   — m1pp expander parity test: per-arch
-##                                     m1pp binary consumes the fixture
-##                                     and writes <out>; diffed.
-##   p1     tests/P1/<name>.P1pp     — P1pp program built via build-p1pp.sh
-##                                     for each requested arch, run in
-##                                     container, stdout diffed.
+##   m1pp     tests/M1pp/<name>.M1     — P1 program built via build-p1.sh
+##                                       for each requested arch, run in
+##                                       container, stdout diffed.
+##            tests/M1pp/<name>.M1pp   — m1pp expander parity test: per-arch
+##                                       m1pp binary consumes the fixture
+##                                       and writes <out>; diffed.
+##   p1       tests/P1/<name>.P1pp     — P1pp program built via build-p1pp.sh
+##                                       for each requested arch, run in
+##                                       container, stdout diffed.
+##   scheme1  tests/scheme1/<name>.scm — Scheme source run by the per-arch
+##                                       scheme1 binary. stdout diffed
+##                                       against <name>.expected (default
+##                                       empty); exit code diffed against
+##                                       <name>.expected-exit (default 0).
 ##
 ## All three arches by default; --arch restricts to one.
 ##
-## Usage: scripts/run-tests.sh --suite <m1pp|p1> [--arch ARCH] [name ...]
+## Usage: scripts/run-tests.sh --suite <m1pp|p1|scheme1> [--arch ARCH] [name ...]
 
 set -eu
 
@@ -42,8 +47,8 @@ while [ "$#" -gt 0 ]; do
 done
 
 case "$SUITE" in
-    m1pp|p1) ;;
-    "") echo "$0: --suite required (m1pp | p1)" >&2; exit 2 ;;
+    m1pp|p1|scheme1) ;;
+    "") echo "$0: --suite required (m1pp | p1 | scheme1)" >&2; exit 2 ;;
     *) echo "$0: unknown suite '$SUITE'" >&2; exit 2 ;;
 esac
 
@@ -203,9 +208,80 @@ run_p1_suite() {
     done
 }
 
+## --- scheme1 suite ------------------------------------------------------
+##
+## Caller (Make) ensures build/<arch>/scheme1 already exists. The runner
+## just invokes that binary against each .scm fixture, capturing stdout
+## and the exit status. stdout is diffed against <name>.expected (defaults
+## to empty if absent); the exit status is diffed against
+## <name>.expected-exit (defaults to 0 if absent).
+
+run_scheme1_suite() {
+    if [ -z "$ARCH" ]; then
+        ARCHES="aarch64 amd64 riscv64"
+    else
+        ARCHES=$ARCH
+    fi
+    if [ -z "$NAMES" ]; then
+        NAMES=$(discover tests/scheme1 scm)
+    fi
+    for name in $NAMES; do
+        fixture=tests/scheme1/$name.scm
+        expected_stdout_file=tests/scheme1/$name.expected
+        expected_exit_file=tests/scheme1/$name.expected-exit
+
+        if [ ! -e "$fixture" ]; then
+            echo "  SKIP $name (no .scm)"; continue
+        fi
+        if [ -e "$expected_stdout_file" ]; then
+            expected_stdout=$(cat "$expected_stdout_file")
+        else
+            expected_stdout=
+        fi
+        if [ -e "$expected_exit_file" ]; then
+            expected_exit=$(cat "$expected_exit_file")
+        else
+            expected_exit=0
+        fi
+
+        for arch in $ARCHES; do
+            label="[$arch] $name"
+            bin=build/$arch/scheme1
+            if [ ! -x "$bin" ]; then
+                report "$label" FAIL
+                echo "    (missing $bin -- run 'make scheme1 ARCH=$arch')" >&2
+                continue
+            fi
+
+            tmp_stdout=$(mktemp)
+            if run_in_container "$arch" "./$bin" "$fixture" >"$tmp_stdout" 2>&1; then
+                actual_exit=0
+            else
+                actual_exit=$?
+            fi
+            actual_stdout=$(cat "$tmp_stdout")
+            rm -f "$tmp_stdout"
+
+            if [ "$actual_stdout" = "$expected_stdout" ] \
+               && [ "$actual_exit" = "$expected_exit" ]; then
+                report "$label" PASS
+            else
+                report "$label" FAIL
+                if [ "$actual_stdout" != "$expected_stdout" ]; then
+                    show_diff "$expected_stdout" "$actual_stdout"
+                fi
+                if [ "$actual_exit" != "$expected_exit" ]; then
+                    echo "    exit: expected $expected_exit, got $actual_exit"
+                fi
+            fi
+        done
+    done
+}
+
 case "$SUITE" in
-    m1pp) run_m1pp_suite ;;
-    p1)   run_p1_suite ;;
+    m1pp)    run_m1pp_suite ;;
+    p1)      run_p1_suite ;;
+    scheme1) run_scheme1_suite ;;
 esac
 
 echo "$PASS passed, $FAIL failed"
diff --git a/tests/scheme1/00-exit.expected-exit b/tests/scheme1/00-exit.expected-exit
@@ -0,0 +1 @@
+42
diff --git a/tests/scheme1/00-exit.scm b/tests/scheme1/00-exit.scm
@@ -0,0 +1 @@
+(sys-exit 42)

	boot2 Playing with the boostrap
	git clone https://git.ryansepassi.com/git/boot2.git
	Log \| Files \| Refs \| README

M	M1pp/M1pp.P1	\|	6	+++---
M	Makefile	\|	44	++++++++++++++++++++++++++++++++++----------
A	scheme1/scheme1.P1pp	\|	809	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	scripts/boot-build-p1pp.sh	\|	20	++++++++++++++------
M	scripts/run-tests.sh	\|	106	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
A	tests/scheme1/00-exit.expected-exit	\|	1	+
A	tests/scheme1/00-exit.scm	\|	1	+