commit 925240dd23966009f0b742a6199d87f1661e79fa
parent 6ce02250c5f5982da088c191b4da1ac0ce6a4958
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 25 Apr 2026 03:03:32 -0700
Add scheme1 phase-1 interpreter spine and test runner
Tag-dispatched eval/apply over a bump heap, linear-scan symbol intern
table, and a single sys-exit primitive. Adds a scheme1 Make target,
.P1pp build pipeline support in boot-build-p1pp.sh, and a scheme1
suite in run-tests.sh that diffs stdout and exit code against
.expected / .expected-exit fixtures.
Also bumps the m1pp INPUT/OUTPUT/TEXT caps so combined.M1pp inputs
that include libp1pp + scheme1 fit through the expander.
Diffstat:
7 files changed, 953 insertions(+), 34 deletions(-)
diff --git a/M1pp/M1pp.P1 b/M1pp/M1pp.P1
@@ -27,9 +27,9 @@
## --- Constants & sizing ------------------------------------------------------
-DEFINE M1PP_INPUT_CAP 0000010000000000
-DEFINE M1PP_OUTPUT_CAP 0000010000000000
-DEFINE M1PP_TEXT_CAP 0080000000000000
+DEFINE M1PP_INPUT_CAP 0000040000000000
+DEFINE M1PP_OUTPUT_CAP 0000040000000000
+DEFINE M1PP_TEXT_CAP 0000040000000000
DEFINE M1PP_TOKENS_END 0000c00000000000
## Macro record is 296 bytes: name (16) + param_count (8) + params[16]*16 (256)
## + body_start (8) + body_end (8). MACROS_CAP fits 512 records (151552 B).
diff --git a/Makefile b/Makefile
@@ -15,10 +15,12 @@
# make m1pp build the m1pp expander for ARCH
# make pokem build pokem for ARCH
# make hello build hello via the bootstrap chain
+# make scheme1 build the scheme1 interpreter for ARCH
# make run run hello in the container
# make test every suite, every arch
# make test SUITE=m1pp m1pp suite, every arch
# make test SUITE=p1 ARCH=amd64 p1 suite, one arch
+# make test SUITE=scheme1 scheme1 .scm fixtures, every arch
# make image build the per-arch container image
# make tools bootstrap M0/hex2-0/catm for ARCH
# make tables regen pre-pruned P1/P1-<arch>.M1 tables
@@ -58,7 +60,7 @@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \
# --- Targets --------------------------------------------------------------
-.PHONY: all m1pp pokem hello run test image tools tables \
+.PHONY: all m1pp pokem hello scheme1 run test image tools tables \
tools-native clean help
all: m1pp pokem
@@ -120,20 +122,29 @@ P1/P1-%.M1: build/p1/%.M1 scripts/prune-p1-table.sh $(P1_PRUNE_SRCS)
# --- Programs (per arch) --------------------------------------------------
-M1PP_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/m1pp)
-POKEM_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/pokem)
-HELLO_SRC := tests/M1pp/00-hello.M1
-HELLO_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/hello)
+M1PP_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/m1pp)
+POKEM_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/pokem)
+HELLO_SRC := tests/M1pp/00-hello.M1
+HELLO_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/hello)
+SCHEME1_SRC := scheme1/scheme1.P1pp
+SCHEME1_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/scheme1)
-m1pp: $(OUT_DIR)/m1pp
-pokem: $(OUT_DIR)/pokem
-hello: $(OUT_DIR)/hello
+m1pp: $(OUT_DIR)/m1pp
+pokem: $(OUT_DIR)/pokem
+hello: $(OUT_DIR)/hello
+scheme1: $(OUT_DIR)/scheme1
-# Per-arch deps shared by every .P1/.M1 build.
+# Per-arch deps for .P1/.M1 builds (raw M1, no macro expansion).
P1_BUILD_DEPS = scripts/lint.sh scripts/boot-build-p1.sh \
build/%/.image build/%/tools/M0 \
vendor/seed/%/ELF.hex2 P1/P1-%.M1
+# Per-arch deps for .P1pp builds (m1pp expansion + libp1pp).
+P1PP_BUILD_DEPS = scripts/boot-build-p1pp.sh \
+ build/%/.image build/%/tools/M0 build/%/m1pp \
+ vendor/seed/%/ELF.hex2 \
+ P1/P1-%.M1pp P1/P1.M1pp P1/P1pp.P1pp
+
$(M1PP_BINS): build/%/m1pp: M1pp/M1pp.P1 $(P1_BUILD_DEPS)
ARCH=$* sh scripts/lint.sh M1pp/M1pp.P1
$(call PODMAN,$*) sh scripts/boot-build-p1.sh M1pp/M1pp.P1 $@
@@ -146,6 +157,9 @@ $(HELLO_BINS): build/%/hello: $(HELLO_SRC) $(P1_BUILD_DEPS)
ARCH=$* sh scripts/lint.sh $(HELLO_SRC)
$(call PODMAN,$*) sh scripts/boot-build-p1.sh $(HELLO_SRC) $@
+$(SCHEME1_BINS): build/%/scheme1: $(SCHEME1_SRC) $(P1PP_BUILD_DEPS)
+ $(call PODMAN,$*) sh scripts/boot-build-p1pp.sh $(SCHEME1_SRC) $@
+
run: $(OUT_DIR)/hello $(IMAGE_STAMP)
$(call PODMAN,$(ARCH)) ./$(OUT_DIR)/hello
@@ -190,16 +204,26 @@ TEST_M1PP_DEPS := $(foreach a,$(TEST_ARCHES), \
TEST_P1_DEPS := $(foreach a,$(TEST_ARCHES), \
build/$(a)/.image build/$(a)/tools/M0 build/$(a)/m1pp)
+# scheme1 suite per-arch deps: image, tools, expander, scheme1 binary.
+# (run-tests.sh runs the pre-built binary against each .scm fixture; it
+# does not rebuild the interpreter per fixture.)
+TEST_SCHEME1_DEPS := $(foreach a,$(TEST_ARCHES), \
+ build/$(a)/.image build/$(a)/tools/M0 build/$(a)/m1pp build/$(a)/scheme1)
+
test:
ifeq ($(SUITE),)
@$(MAKE) --no-print-directory test SUITE=m1pp
@$(MAKE) --no-print-directory test SUITE=p1
+ @$(MAKE) --no-print-directory test SUITE=scheme1
else ifeq ($(SUITE),m1pp)
@$(MAKE) --no-print-directory $(TEST_M1PP_DEPS)
sh scripts/run-tests.sh --suite=m1pp $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER))
else ifeq ($(SUITE),p1)
@$(MAKE) --no-print-directory $(TEST_P1_DEPS)
sh scripts/run-tests.sh --suite=p1 $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER))
+else ifeq ($(SUITE),scheme1)
+ @$(MAKE) --no-print-directory $(TEST_SCHEME1_DEPS)
+ sh scripts/run-tests.sh --suite=scheme1 $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER))
else
- @echo "unknown SUITE='$(SUITE)' (expected m1pp | p1)" >&2; exit 2
+ @echo "unknown SUITE='$(SUITE)' (expected m1pp | p1 | scheme1)" >&2; exit 2
endif
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -0,0 +1,809 @@
+# scheme1.P1pp -- Phase 1 minimal Scheme interpreter on P1.
+#
+# The full target is described in docs/LISP-C.md and docs/LISP.md. This
+# file is the spine: enough infrastructure to read a single source file,
+# parse one s-expression, and evaluate it via tag-dispatched eval/apply
+# with a single primitive (`sys-exit`). Every later piece (more
+# primitives, special forms, closures, pmatch, records, prelude, repl
+# loop) hooks onto these anchors without restructuring.
+#
+# What's wired up now:
+# - Tag layout per LISP-C.md: fixnums, pairs, symbols, headered objects,
+# immediate singletons. mkimm composes IMM constants at M1pp time.
+# - Bump heap allocator (cons + alloc_hdr) over a BSS-past-ELF_end arena.
+# - Linear-scan symbol intern table; entries store (name_ptr, name_len,
+# global_val) borrowed directly from readbuf.
+# - Reader: '(', ')', fixnums (decimal, optional leading `-`), bare
+# symbols. No strings/hex/chars/quote/dotted yet, no `;` comments.
+# - eval: tag dispatch -> self-eval / symbol lookup / pair application.
+# - apply: HDR.PRIM dispatch only; closures/specials come later.
+# - One primitive: sys-exit. Exits with the raw fixnum.
+#
+# Build chain (P1pp.P1pp = libp1pp must be in the catm sequence):
+# catm P1-<arch>.M1pp P1.M1pp P1pp.P1pp scheme1/scheme1.P1pp \
+# | m1pp -> M0 -> hex2 -> ELF
+#
+# Memory model: the ELF's ph_memsz is 8 MB (boot2 default), so all
+# zero-initialized arenas live past :ELF_end and cost zero file bytes.
+# p1_main writes their absolute addresses into pointer slots once at
+# startup; every later access goes through one extra load.
+
+# =========================================================================
+# Constants
+# =========================================================================
+
+%enum TAG { FIXNUM PAIR SYM HEAP IMM }
+%enum IMM { FALSE TRUE NIL UNSPEC UNBOUND }
+%enum HDR { BV CLOSURE PRIM TD REC }
+
+# imm_val(idx) -> integer-expression for the tagged immediate at IMM index
+# `idx`. Used both at %li sites (loaded into a register) and at $() emission
+# sites (baked into a static word).
+%macro imm_val(idx)
+(| (<< idx 3) %TAG.IMM)
+%endm
+
+# Layout helpers. %struct stride is 8 bytes per field.
+%struct PAIR { car cdr } # .SIZE = 16
+%struct SYMENT { name_ptr name_len global_val pad } # .SIZE = 32
+%struct PRIM { hdr entry_w } # .SIZE = 16
+
+# BSS sizing. Offsets are bytes from :ELF_end.
+DEFINE OFF_readbuf 0000000000000000
+DEFINE OFF_heap 0000010000000000
+DEFINE OFF_symtab 0000020000000000
+DEFINE READBUF_CAP 0000010000000000
+DEFINE HEAP_CAP 0000010000000000
+DEFINE SYMTAB_CAP_BYTES 0000010000000000
+
+# Number of symtab slots. Used as a runtime guard in intern. Must match
+# SYMTAB_CAP_BYTES / SYMENT.SIZE.
+%macro SYMTAB_CAP_SLOTS()
+1024
+%endm
+
+# Same as M1PP_INPUT_CAP, exposed for %li in load_source.
+%macro READBUF_CAP_BYTES()
+65536
+%endm
+
+# =========================================================================
+# Tag idioms
+# =========================================================================
+
+%macro tagof(rd, rs)
+%andi(rd, rs, 7)
+%endm
+
+%macro mkfix(rd, rs)
+%shli(rd, rs, 3)
+%endm
+
+%macro untag_fix(rd, rs)
+%sari(rd, rs, 3)
+%endm
+
+%macro untag_sym(rd, rs)
+%sari(rd, rs, 3)
+%endm
+
+%macro car(rd, rs)
+%ld(rd, rs, -1)
+%endm
+
+%macro cdr(rd, rs)
+%ld(rd, rs, 7)
+%endm
+
+%macro hdr_type(rd, rs)
+%lb(rd, rs, -3)
+%endm
+
+# =========================================================================
+# p1_main -- runtime spine
+# =========================================================================
+#
+# Frame layout (16 bytes):
+# +0 saved argv
+
+%fn(p1_main, 16, {
+ %st(a1, sp, 0)
+
+ %li(t0, 2)
+ %bltu(a0, t0, &::usage)
+
+ # Initialize BSS pointer slots from ELF_end + OFF_*. Same idiom as
+ # M1pp.P1: a tiny init table walked once.
+ %la(t0, &ELF_end)
+ %la(t1, &bss_init_tbl)
+ %la(t2, &bss_init_tbl_end)
+ ::bss_loop
+ %beq(t1, t2, &::bss_done)
+ %ld(a0, t1, 0)
+ %ld(a2, t1, 8)
+ %add(a2, a2, t0)
+ %st(a2, a0, 0)
+ %addi(t1, t1, 16)
+ %b(&::bss_loop)
+ ::bss_done
+
+ # heap_next = &heap_buf, rounded up to 8-byte alignment. The BSS arena
+ # starts at &ELF_end + OFF_heap, but &ELF_end's alignment depends on
+ # the data section above it; cons assumes 8-byte-aligned heap_next so
+ # every pair pointer's low 3 bits are exactly the PAIR tag.
+ %la(t0, &heap_buf_ptr)
+ %ld(t0, t0, 0)
+ %addi(t0, t0, 7)
+ %li(t1, -8)
+ %and(t0, t0, t1)
+ %la(t1, &heap_next)
+ %st(t0, t1, 0)
+
+ # Bind built-in primitives.
+ %call(®ister_primitives)
+
+ # argv[1] is the source path (NUL-terminated cstr from the kernel).
+ %ld(a1, sp, 0)
+ %ld(a0, a1, 8)
+ %call(&load_source)
+
+ # parse_one returns one s-expression in a0.
+ %call(&parse_one)
+
+ # eval(expr, NIL).
+ %li(a1, %imm_val(%IMM.NIL))
+ %call(&eval)
+
+ # If the form did not sys-exit (e.g. evaluated to a fixnum), drop
+ # cleanly with status 0. Useful while wiring up more of the runtime.
+ %li(a0, 0)
+ %eret
+
+ ::usage
+ %la(a0, &msg_usage)
+ %call(&print_cstr)
+ %li(a0, 2)
+})
+
+# =========================================================================
+# Source loading -- argv[1] -> readbuf, length stored in readbuf_len
+# =========================================================================
+#
+# Frame: 8 bytes
+# +0 no slots; libp1pp's read_file does open+read+close in one call.
+
+%fn(load_source, 0, {
+ %la(t0, &readbuf_buf_ptr)
+ %ld(a1, t0, 0)
+ %li(a2, %READBUF_CAP_BYTES)
+ %call(&read_file)
+ %bltz(a0, &::fail)
+
+ %la(t0, &readbuf_len)
+ %st(a0, t0, 0)
+ %li(a0, 0)
+ %eret
+
+ ::fail
+ %la(a0, &msg_load_fail)
+ %call(&print_cstr)
+ %li(a0, 3)
+ %call(&sys_exit)
+ ::spin
+ %b(&::spin)
+})
+
+# =========================================================================
+# Heap: cons (leaf) and alloc_hdr (leaf)
+# =========================================================================
+#
+# Both are call-free leaves: bump heap_next, write fields, return tagged
+# pointer. Heap exhaustion crashes via a deliberate read past the arena
+# end -- runtime check arrives when error reporting does (LISP-C.md
+# milestone 3).
+
+# cons(car=a0, cdr=a1) -> tagged pair (a0)
+:cons
+ %la(t2, &heap_next)
+ %ld(t0, t2, 0)
+ %st(a0, t0, 0)
+ %st(a1, t0, 8)
+ %addi(t1, t0, 16)
+ %st(t1, t2, 0)
+ %addi(a0, t0, 1)
+ %ret
+
+# alloc_hdr(bytes=a0, hdr_word=a1) -> tagged heap obj (a0)
+# Rounds bytes up to a multiple of 8 and writes hdr_word at offset 0.
+:alloc_hdr
+ %addi(a0, a0, 7)
+ %li(t0, -8)
+ %and(a0, a0, t0)
+ %la(t2, &heap_next)
+ %ld(t0, t2, 0)
+ %add(t1, t0, a0)
+ %st(t1, t2, 0)
+ %st(a1, t0, 0)
+ %addi(a0, t0, 3)
+ %ret
+
+# =========================================================================
+# Symbol intern -- linear scan, append on miss
+# =========================================================================
+#
+# Frame: 32 bytes
+# +0 name_ptr (input)
+# +8 name_len (input)
+# +16 idx (loop counter / found index)
+# +24 entry_ptr (spilled across memcmp)
+
+%fn(intern, 32, {
+ %st(a0, sp, 0)
+ %st(a1, sp, 8)
+
+ %li(t0, 0)
+ %st(t0, sp, 16)
+
+ ::scan
+ # idx >= count? -> append
+ %ld(t0, sp, 16)
+ %la(t1, &symtab_count)
+ %ld(t1, t1, 0)
+ %bltu(t0, t1, &::probe)
+ %b(&::append)
+
+ ::probe
+ # entry_ptr = symtab_buf + idx * SYMENT.SIZE
+ %la(t1, &symtab_buf_ptr)
+ %ld(t1, t1, 0)
+ %shli(t2, t0, 5)
+ %add(t1, t1, t2)
+ %st(t1, sp, 24)
+
+ # entry.name_len == name_len ?
+ %ld(t2, t1, 8)
+ %ld(a2, sp, 8)
+ %bne(t2, a2, &::next)
+
+ # memcmp(entry.name_ptr, name_ptr, len)
+ %ld(a0, t1, 0)
+ %ld(a1, sp, 0)
+ %ld(a2, sp, 8)
+ %call(&memcmp)
+ %beqz(a0, &::found)
+
+ ::next
+ %ld(t0, sp, 16)
+ %addi(t0, t0, 1)
+ %st(t0, sp, 16)
+ %b(&::scan)
+
+ ::append
+ # Bounds check; on overflow exit 5 with a message.
+ %ld(t0, sp, 16)
+ %li(t1, %SYMTAB_CAP_SLOTS)
+ %bltu(t0, t1, &::append_ok)
+ %la(a0, &msg_symtab_full)
+ %call(&print_cstr)
+ %li(a0, 5)
+ %call(&sys_exit)
+
+ ::append_ok
+ %la(t1, &symtab_buf_ptr)
+ %ld(t1, t1, 0)
+ %shli(t2, t0, 5)
+ %add(t1, t1, t2)
+ %ld(a0, sp, 0)
+ %st(a0, t1, 0)
+ %ld(a0, sp, 8)
+ %st(a0, t1, 8)
+ %li(a0, %imm_val(%IMM.UNBOUND))
+ %st(a0, t1, 16)
+ %li(a0, 0)
+ %st(a0, t1, 24)
+
+ # symtab_count = idx + 1
+ %addi(a0, t0, 1)
+ %la(t2, &symtab_count)
+ %st(a0, t2, 0)
+
+ # fall through with idx in t0 = sp[16]
+
+ ::found
+ %ld(t0, sp, 16)
+ %shli(a0, t0, 3)
+ %ori(a0, a0, %TAG.SYM)
+})
+
+# Lookup by sym_idx (untagged, in a0). Returns symtab[idx].global_val in a0.
+# Leaf.
+:sym_global
+ %la(t0, &symtab_buf_ptr)
+ %ld(t0, t0, 0)
+ %shli(t1, a0, 5)
+ %add(t0, t0, t1)
+ %ld(a0, t0, 16)
+ %ret
+
+# sym_set_global(idx=a0, val=a1). Leaf.
+:sym_set_global
+ %la(t0, &symtab_buf_ptr)
+ %ld(t0, t0, 0)
+ %shli(t1, a0, 5)
+ %add(t0, t0, t1)
+ %st(a1, t0, 16)
+ %ret
+
+# =========================================================================
+# Reader -- parse_one over readbuf with a single byte cursor
+# =========================================================================
+#
+# Cursor lives in &readbuf_pos; readbuf_len holds the slurped byte count.
+# The reader is called recursively from parse_list, so every state goes
+# through frame slots, not s-registers.
+
+# Skip whitespace (ASCII 32, 9, 10, 13). Leaf.
+:skip_ws
+%scope skip_ws
+ %la(t2, &readbuf_pos)
+ %ld(t0, t2, 0)
+ %la(t1, &readbuf_len)
+ %ld(t1, t1, 0)
+ ::loop
+ %beq(t0, t1, &::done)
+ %la(a0, &readbuf_buf_ptr)
+ %ld(a0, a0, 0)
+ %add(a0, a0, t0)
+ %lb(a0, a0, 0)
+ %addi(a1, a0, -32)
+ %beqz(a1, &::step)
+ %addi(a1, a0, -9)
+ %beqz(a1, &::step)
+ %addi(a1, a0, -10)
+ %beqz(a1, &::step)
+ %addi(a1, a0, -13)
+ %beqz(a1, &::step)
+ %b(&::done)
+ ::step
+ %addi(t0, t0, 1)
+ %b(&::loop)
+ ::done
+ %st(t0, t2, 0)
+ %ret
+%endscope
+
+# parse_one() -> tagged value in a0
+%fn(parse_one, 16, {
+ %call(&skip_ws)
+
+ %la(t0, &readbuf_pos)
+ %ld(t0, t0, 0)
+ %la(t1, &readbuf_len)
+ %ld(t1, t1, 0)
+ %beq(t0, t1, &::eof)
+
+ %la(a0, &readbuf_buf_ptr)
+ %ld(a0, a0, 0)
+ %add(a0, a0, t0)
+ %lb(a0, a0, 0)
+
+ %addi(a1, a0, -40)
+ %beqz(a1, &::lparen)
+ %addi(a1, a0, -41)
+ %beqz(a1, &::rparen)
+
+ %call(&parse_atom)
+ %eret
+
+ ::lparen
+ # Consume '(' and read items until ')'.
+ %la(t0, &readbuf_pos)
+ %ld(t1, t0, 0)
+ %addi(t1, t1, 1)
+ %st(t1, t0, 0)
+ %call(&parse_list)
+ %eret
+
+ ::rparen
+ %la(a0, &msg_unexp_rparen)
+ %call(&print_cstr)
+ %li(a0, 6)
+ %call(&sys_exit)
+
+ ::eof
+ %la(a0, &msg_unexp_eof)
+ %call(&print_cstr)
+ %li(a0, 6)
+ %call(&sys_exit)
+ ::spin
+ %b(&::spin)
+})
+
+# parse_list() -> tagged list value in a0. Cursor sits past '(' on entry;
+# returns once ')' is consumed.
+#
+# Frame: 16 bytes
+# +0 head (NIL until first item)
+# +8 tail (most recent cons; set-cdr! target)
+%fn(parse_list, 16, {
+ %li(t0, %imm_val(%IMM.NIL))
+ %st(t0, sp, 0)
+ %st(t0, sp, 8)
+
+ ::loop
+ %call(&skip_ws)
+ %la(t0, &readbuf_pos)
+ %ld(t0, t0, 0)
+ %la(t1, &readbuf_len)
+ %ld(t1, t1, 0)
+ %beq(t0, t1, &::eof)
+
+ %la(a0, &readbuf_buf_ptr)
+ %ld(a0, a0, 0)
+ %add(a0, a0, t0)
+ %lb(a0, a0, 0)
+ %addi(a1, a0, -41)
+ %beqz(a1, &::close)
+
+ # Not ')': parse one item, append.
+ %call(&parse_one)
+ %li(a1, %imm_val(%IMM.NIL))
+ %call(&cons)
+
+ # If head is NIL, both head and tail = new cons; else set-cdr! tail = new.
+ %ld(t0, sp, 0)
+ %li(t1, %imm_val(%IMM.NIL))
+ %bne(t0, t1, &::link)
+ %st(a0, sp, 0)
+ %st(a0, sp, 8)
+ %b(&::loop)
+
+ ::link
+ %ld(t0, sp, 8)
+ # set-cdr! tail = a0 -> store a0 at [tail + 7] (raw + 8)
+ %st(a0, t0, 7)
+ %st(a0, sp, 8)
+ %b(&::loop)
+
+ ::close
+ # Consume ')' and return head.
+ %la(t0, &readbuf_pos)
+ %ld(t1, t0, 0)
+ %addi(t1, t1, 1)
+ %st(t1, t0, 0)
+ %ld(a0, sp, 0)
+ %eret
+
+ ::eof
+ %la(a0, &msg_unterm_list)
+ %call(&print_cstr)
+ %li(a0, 6)
+ %call(&sys_exit)
+ ::spin
+ %b(&::spin)
+})
+
+# parse_atom() -> tagged value (fixnum or symbol) in a0.
+# Reads until whitespace or paren or EOF, then dispatches by first byte.
+#
+# Frame: 16 bytes
+# +0 start cursor (byte offset)
+# +8 end cursor (byte offset)
+%fn(parse_atom, 16, {
+ %la(t0, &readbuf_pos)
+ %ld(t1, t0, 0)
+ %st(t1, sp, 0)
+
+ %la(t2, &readbuf_len)
+ %ld(t2, t2, 0)
+
+ ::scan
+ %beq(t1, t2, &::end)
+ %la(a0, &readbuf_buf_ptr)
+ %ld(a0, a0, 0)
+ %add(a0, a0, t1)
+ %lb(a0, a0, 0)
+
+ # whitespace?
+ %addi(a1, a0, -32)
+ %beqz(a1, &::end)
+ %addi(a1, a0, -9)
+ %beqz(a1, &::end)
+ %addi(a1, a0, -10)
+ %beqz(a1, &::end)
+ %addi(a1, a0, -13)
+ %beqz(a1, &::end)
+ # paren?
+ %addi(a1, a0, -40)
+ %beqz(a1, &::end)
+ %addi(a1, a0, -41)
+ %beqz(a1, &::end)
+
+ %addi(t1, t1, 1)
+ %b(&::scan)
+
+ ::end
+ %st(t1, sp, 8)
+ %la(t0, &readbuf_pos)
+ %st(t1, t0, 0)
+
+ # Dispatch on the first byte.
+ %ld(t0, sp, 0)
+ %la(a0, &readbuf_buf_ptr)
+ %ld(a0, a0, 0)
+ %add(a0, a0, t0)
+ %lb(t1, a0, 0)
+
+ # '0'..'9' -> int
+ %addi(a1, t1, -48)
+ %li(a2, 10)
+ %bltu(a1, a2, &::is_int)
+ # '-' followed by digit -> int
+ %addi(a1, t1, -45)
+ %bnez(a1, &::is_sym)
+ # there must be at least one more byte for it to be a number
+ %ld(t2, sp, 8)
+ %addi(t0, t0, 1)
+ %beq(t0, t2, &::is_sym)
+ %addi(a0, a0, 1)
+ %lb(a0, a0, 0)
+ %addi(a1, a0, -48)
+ %bltu(a1, a2, &::is_int)
+ # fall through to is_sym
+
+ ::is_sym
+ %ld(a0, sp, 0)
+ %la(t0, &readbuf_buf_ptr)
+ %ld(t0, t0, 0)
+ %add(a0, t0, a0)
+ %ld(t1, sp, 8)
+ %ld(t2, sp, 0)
+ %sub(a1, t1, t2)
+ %call(&intern)
+ %eret
+
+ ::is_int
+ %ld(a0, sp, 0)
+ %ld(a1, sp, 8)
+ %call(&parse_int)
+})
+
+# parse_int(start_off=a0, end_off=a1) -> tagged fixnum in a0. Leaf.
+:parse_int
+%scope parse_int
+ %la(t0, &readbuf_buf_ptr)
+ %ld(t0, t0, 0)
+ %add(t1, t0, a1) ; t1 = end pointer = base + end_off
+ %add(t0, t0, a0) ; t0 = start pointer = base + start_off
+
+ %li(a2, 0) ; a2 = "is negative" flag (0 = positive)
+ %lb(a3, t0, 0)
+ %addi(a3, a3, -45)
+ %bnez(a3, &::loop)
+ %li(a2, 1)
+ %addi(t0, t0, 1)
+
+ ::loop
+ %li(a0, 0)
+ %li(t2, 10)
+ ::step
+ %beq(t0, t1, &::done)
+ %lb(a3, t0, 0)
+ %addi(a3, a3, -48)
+ %mul(a0, a0, t2)
+ %add(a0, a0, a3)
+ %addi(t0, t0, 1)
+ %b(&::step)
+
+ ::done
+ %beqz(a2, &::tag)
+ %li(t2, 0)
+ %sub(a0, t2, a0)
+ ::tag
+ %shli(a0, a0, 3)
+ %ret
+%endscope
+
+# =========================================================================
+# eval / apply
+# =========================================================================
+#
+# eval is the only place that touches tag bits at runtime; the table
+# below is a flat compare cascade for now (5 tags). When special-form
+# dispatch is wired up the SYM/PAIR paths split further per LISP-C.md
+# §Eval.
+
+# eval(expr=a0, env=a1) -> value (a0)
+#
+# Frame: 32 bytes
+# +0 expr
+# +8 env
+# +16 fn (head value, while args are being evaluated)
+%fn(eval, 32, {
+ %st(a0, sp, 0)
+ %st(a1, sp, 8)
+
+ %andi(t0, a0, 7)
+ %li(t1, %TAG.PAIR)
+ %beq(t0, t1, &::pair)
+ %li(t1, %TAG.SYM)
+ %beq(t0, t1, &::sym)
+ # FIXNUM, HEAP, IMM all self-evaluate.
+ %eret
+
+ ::sym
+ %sari(a0, a0, 3)
+ %call(&sym_global)
+ %li(t0, %imm_val(%IMM.UNBOUND))
+ %beq(a0, t0, &::unbound)
+ %eret
+
+ ::unbound
+ %la(a0, &msg_unbound)
+ %call(&print_cstr)
+ %li(a0, 7)
+ %call(&sys_exit)
+
+ ::pair
+ # head = eval(car(expr), env)
+ %ld(a0, sp, 0)
+ %ld(a0, a0, -1)
+ %ld(a1, sp, 8)
+ %call(&eval)
+ %st(a0, sp, 16)
+
+ # args = eval_args(cdr(expr), env)
+ %ld(a0, sp, 0)
+ %ld(a0, a0, 7)
+ %ld(a1, sp, 8)
+ %call(&eval_args)
+
+ # apply(fn, args)
+ %mov(a1, a0)
+ %ld(a0, sp, 16)
+ %call(&apply)
+})
+
+# eval_args(args=a0, env=a1) -> evaluated args list (cons-built).
+# Recursion depth = arg count, so very long arg lists could blow the
+# stack. Iterative tail-build is a future tightening.
+%fn(eval_args, 24, {
+ %li(t0, %imm_val(%IMM.NIL))
+ %beq(a0, t0, &::nil)
+
+ %st(a0, sp, 0)
+ %st(a1, sp, 8)
+
+ # val = eval(car(args), env)
+ %ld(a0, a0, -1)
+ %call(&eval)
+ %st(a0, sp, 16)
+
+ # rest = eval_args(cdr(args), env)
+ %ld(a0, sp, 0)
+ %ld(a0, a0, 7)
+ %ld(a1, sp, 8)
+ %call(&eval_args)
+
+ # cons(val, rest)
+ %mov(a1, a0)
+ %ld(a0, sp, 16)
+ %call(&cons)
+ %eret
+
+ ::nil
+ %li(a0, %imm_val(%IMM.NIL))
+})
+
+# apply(fn=a0, args=a1) -> result (a0)
+#
+# Frame: 16 bytes
+# +0 args
+%fn(apply, 16, {
+ %st(a1, sp, 0)
+
+ # Only HEAP-tagged values can be applicable. PRIM is the only header
+ # type wired up here.
+ %andi(t0, a0, 7)
+ %li(t1, %TAG.HEAP)
+ %bne(t0, t1, &::not_proc)
+
+ %lb(t0, a0, -3)
+ %li(t1, %HDR.PRIM)
+ %beq(t0, t1, &::prim)
+
+ ::not_proc
+ %la(a0, &msg_not_proc)
+ %call(&print_cstr)
+ %li(a0, 8)
+ %call(&sys_exit)
+
+ ::prim
+ %ld(t0, a0, 5) ; t0 = entry word (offset = -3 + 8)
+ %ld(a0, sp, 0) ; args list -> a0
+ %callr(t0)
+})
+
+# =========================================================================
+# Primitives
+# =========================================================================
+#
+# Each primitive sits behind a 16-byte heap object literal in the data
+# section: [hdr_word, entry_word]. The tagged value is &obj + 3.
+# register_primitives interns the surface name and writes the tagged
+# pointer into the symbol's global slot.
+
+%fn(register_primitives, 0, {
+ %la(a0, &name_sys_exit)
+ %li(a1, 8)
+ %call(&intern)
+ %sari(a0, a0, 3) ; idx
+ %la(a1, &prim_sys_exit)
+ %addi(a1, a1, 3) ; tag HEAP
+ %call(&sym_set_global)
+ # No stack frame needed -- pure register code with one nested call.
+})
+
+# prim_sys_exit_entry(args=a0). Args is a one-element list whose car is
+# the exit code as a tagged fixnum. Untag and tail-jump to libp1pp's
+# sys_exit (a %b, not a %call -- this is a leaf with no frame, and
+# sys_exit doesn't return anyway).
+:prim_sys_exit_entry
+ %ld(a0, a0, -1) ; car = fixnum
+ %sari(a0, a0, 3)
+ %b(&sys_exit)
+
+# =========================================================================
+# Read-only data
+# =========================================================================
+
+# Primitive object literals (16 bytes each).
+:prim_sys_exit
+$(%HDR.PRIM) &prim_sys_exit_entry %(0)
+
+# Surface names. Length is hard-coded at the call site; no NUL needed
+# because intern takes (ptr, len). Aligned padding via "\0" bytes is
+# fine -- M0 emits ASCII verbatim.
+:name_sys_exit "sys-exit"
+
+:msg_usage "scheme1: usage: scheme1 SOURCE.scm" '0a' '00'
+:msg_load_fail "scheme1: failed to read source" '0a' '00'
+:msg_symtab_full "scheme1: symbol table full" '0a' '00'
+:msg_unexp_rparen "scheme1: unexpected ')'" '0a' '00'
+:msg_unexp_eof "scheme1: unexpected EOF in form" '0a' '00'
+:msg_unterm_list "scheme1: unterminated list" '0a' '00'
+:msg_unbound "scheme1: unbound variable" '0a' '00'
+:msg_not_proc "scheme1: not a procedure" '0a' '00'
+
+# =========================================================================
+# BSS pointer-init table
+# =========================================================================
+#
+# Each entry: 8-byte slot pointer (4-byte label ref + 4 bytes pad) +
+# 8-byte offset constant. p1_main walks this once at startup.
+:bss_init_tbl
+&readbuf_buf_ptr %(0) OFF_readbuf
+&heap_buf_ptr %(0) OFF_heap
+&symtab_buf_ptr %(0) OFF_symtab
+:bss_init_tbl_end
+
+# =========================================================================
+# Scalar BSS (file-resident, zero-initialized)
+# =========================================================================
+
+# heap_next: bump pointer; written once by p1_main, then by cons/alloc_hdr.
+:heap_next $(0)
+
+# Source-buffer cursor and slurped length.
+:readbuf_pos $(0)
+:readbuf_len $(0)
+
+# Symbol table count (number of entries used).
+:symtab_count $(0)
+
+# Pointer slots for the past-:ELF_end arenas.
+:readbuf_buf_ptr $(0)
+:heap_buf_ptr $(0)
+:symtab_buf_ptr $(0)
+
+:ELF_end
diff --git a/scripts/boot-build-p1pp.sh b/scripts/boot-build-p1pp.sh
@@ -6,11 +6,18 @@
## ELF binary (build/$ARCH/m1pp, built by boot2.sh / boot-build-p1.sh).
##
## Pipeline:
-## cat <P1/P1-$ARCH.M1pp> <P1/P1.M1pp> <src> -> /tmp/combined.M1pp
-## m1pp /tmp/combined.M1pp -> /tmp/expanded.M1
-## M0 /tmp/expanded.M1 -> /tmp/prog.hex2
-## catm /tmp/elf.hex2 /tmp/prog.hex2 -> /tmp/linked.hex2
-## hex2-0 /tmp/linked.hex2 -> $OUT
+## cat <P1-$ARCH.M1pp> <P1.M1pp> <P1pp.P1pp> <src> -> /tmp/combined.M1pp
+## m1pp /tmp/combined.M1pp -> /tmp/expanded.M1
+## M0 /tmp/expanded.M1 -> /tmp/prog.hex2
+## catm /tmp/elf.hex2 /tmp/prog.hex2 -> /tmp/linked.hex2
+## hex2-0 /tmp/linked.hex2 -> $OUT
+##
+## libp1pp (P1/P1pp.P1pp) is concatenated unconditionally so portable
+## sources can use %fn, the control-flow macros, and libp1pp routines
+## (sys_*, print*, parse_*, fmt_*, memcpy/memcmp, bump allocator, panic,
+## %assert_*) without per-program plumbing. M0 has no link-time DCE, so
+## programs that don't reference any libp1pp routine still pay a fixed
+## code-size tax (~a few KB).
##
## Env: ARCH=aarch64|amd64|riscv64
## Usage: boot-build-p1pp.sh <src> <out>
@@ -25,6 +32,7 @@ OUT=$2
BACKEND=P1/P1-$ARCH.M1pp
FRONTEND=P1/P1.M1pp
+LIBP1PP=P1/P1pp.P1pp
ELF_HDR=vendor/seed/$ARCH/ELF.hex2
TOOLS=build/$ARCH/tools
M1PP_BIN=build/$ARCH/m1pp
@@ -32,7 +40,7 @@ NAME=$(basename "$SRC" .P1pp)
WORK=build/$ARCH/.work/$NAME
mkdir -p "$WORK" "$(dirname "$OUT")"
-cat "$BACKEND" "$FRONTEND" "$SRC" > /tmp/combined.M1pp
+cat "$BACKEND" "$FRONTEND" "$LIBP1PP" "$SRC" > /tmp/combined.M1pp
"$M1PP_BIN" /tmp/combined.M1pp /tmp/expanded.M1
"$TOOLS/M0" /tmp/expanded.M1 /tmp/prog.hex2
diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-## run-tests.sh — unified test runner for the m1pp and p1 fixture suites.
+## run-tests.sh — unified test runner for the m1pp, p1, and scheme1 suites.
##
## Each suite is a directory of `<name>.<ext>` fixtures with sibling
## `<name>.expected` files. The runner builds each fixture, runs it inside
@@ -8,19 +8,24 @@
## (parked, ad-hoc debugging).
##
## Suites:
-## m1pp tests/M1pp/<name>.M1 — P1 program built via build-p1.sh
-## for each requested arch, run in
-## container, stdout diffed.
-## tests/M1pp/<name>.M1pp — m1pp expander parity test: per-arch
-## m1pp binary consumes the fixture
-## and writes <out>; diffed.
-## p1 tests/P1/<name>.P1pp — P1pp program built via build-p1pp.sh
-## for each requested arch, run in
-## container, stdout diffed.
+## m1pp tests/M1pp/<name>.M1 — P1 program built via build-p1.sh
+## for each requested arch, run in
+## container, stdout diffed.
+## tests/M1pp/<name>.M1pp — m1pp expander parity test: per-arch
+## m1pp binary consumes the fixture
+## and writes <out>; diffed.
+## p1 tests/P1/<name>.P1pp — P1pp program built via build-p1pp.sh
+## for each requested arch, run in
+## container, stdout diffed.
+## scheme1 tests/scheme1/<name>.scm — Scheme source run by the per-arch
+## scheme1 binary. stdout diffed
+## against <name>.expected (default
+## empty); exit code diffed against
+## <name>.expected-exit (default 0).
##
## All three arches by default; --arch restricts to one.
##
-## Usage: scripts/run-tests.sh --suite <m1pp|p1> [--arch ARCH] [name ...]
+## Usage: scripts/run-tests.sh --suite <m1pp|p1|scheme1> [--arch ARCH] [name ...]
set -eu
@@ -42,8 +47,8 @@ while [ "$#" -gt 0 ]; do
done
case "$SUITE" in
- m1pp|p1) ;;
- "") echo "$0: --suite required (m1pp | p1)" >&2; exit 2 ;;
+ m1pp|p1|scheme1) ;;
+ "") echo "$0: --suite required (m1pp | p1 | scheme1)" >&2; exit 2 ;;
*) echo "$0: unknown suite '$SUITE'" >&2; exit 2 ;;
esac
@@ -203,9 +208,80 @@ run_p1_suite() {
done
}
+## --- scheme1 suite ------------------------------------------------------
+##
+## Caller (Make) ensures build/<arch>/scheme1 already exists. The runner
+## just invokes that binary against each .scm fixture, capturing stdout
+## and the exit status. stdout is diffed against <name>.expected (defaults
+## to empty if absent); the exit status is diffed against
+## <name>.expected-exit (defaults to 0 if absent).
+
+run_scheme1_suite() {
+ if [ -z "$ARCH" ]; then
+ ARCHES="aarch64 amd64 riscv64"
+ else
+ ARCHES=$ARCH
+ fi
+ if [ -z "$NAMES" ]; then
+ NAMES=$(discover tests/scheme1 scm)
+ fi
+ for name in $NAMES; do
+ fixture=tests/scheme1/$name.scm
+ expected_stdout_file=tests/scheme1/$name.expected
+ expected_exit_file=tests/scheme1/$name.expected-exit
+
+ if [ ! -e "$fixture" ]; then
+ echo " SKIP $name (no .scm)"; continue
+ fi
+ if [ -e "$expected_stdout_file" ]; then
+ expected_stdout=$(cat "$expected_stdout_file")
+ else
+ expected_stdout=
+ fi
+ if [ -e "$expected_exit_file" ]; then
+ expected_exit=$(cat "$expected_exit_file")
+ else
+ expected_exit=0
+ fi
+
+ for arch in $ARCHES; do
+ label="[$arch] $name"
+ bin=build/$arch/scheme1
+ if [ ! -x "$bin" ]; then
+ report "$label" FAIL
+ echo " (missing $bin -- run 'make scheme1 ARCH=$arch')" >&2
+ continue
+ fi
+
+ tmp_stdout=$(mktemp)
+ if run_in_container "$arch" "./$bin" "$fixture" >"$tmp_stdout" 2>&1; then
+ actual_exit=0
+ else
+ actual_exit=$?
+ fi
+ actual_stdout=$(cat "$tmp_stdout")
+ rm -f "$tmp_stdout"
+
+ if [ "$actual_stdout" = "$expected_stdout" ] \
+ && [ "$actual_exit" = "$expected_exit" ]; then
+ report "$label" PASS
+ else
+ report "$label" FAIL
+ if [ "$actual_stdout" != "$expected_stdout" ]; then
+ show_diff "$expected_stdout" "$actual_stdout"
+ fi
+ if [ "$actual_exit" != "$expected_exit" ]; then
+ echo " exit: expected $expected_exit, got $actual_exit"
+ fi
+ fi
+ done
+ done
+}
+
case "$SUITE" in
- m1pp) run_m1pp_suite ;;
- p1) run_p1_suite ;;
+ m1pp) run_m1pp_suite ;;
+ p1) run_p1_suite ;;
+ scheme1) run_scheme1_suite ;;
esac
echo "$PASS passed, $FAIL failed"
diff --git a/tests/scheme1/00-exit.expected-exit b/tests/scheme1/00-exit.expected-exit
@@ -0,0 +1 @@
+42
diff --git a/tests/scheme1/00-exit.scm b/tests/scheme1/00-exit.scm
@@ -0,0 +1 @@
+(sys-exit 42)