boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 925240dd23966009f0b742a6199d87f1661e79fa
parent 6ce02250c5f5982da088c191b4da1ac0ce6a4958
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 25 Apr 2026 03:03:32 -0700

Add scheme1 phase-1 interpreter spine and test runner

Tag-dispatched eval/apply over a bump heap, linear-scan symbol intern
table, and a single sys-exit primitive. Adds a scheme1 Make target,
.P1pp build pipeline support in boot-build-p1pp.sh, and a scheme1
suite in run-tests.sh that diffs stdout and exit code against
.expected / .expected-exit fixtures.

Also bumps the m1pp INPUT/OUTPUT/TEXT caps so combined.M1pp inputs
that include libp1pp + scheme1 fit through the expander.

Diffstat:
MM1pp/M1pp.P1 | 6+++---
MMakefile | 44++++++++++++++++++++++++++++++++++----------
Ascheme1/scheme1.P1pp | 809+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscripts/boot-build-p1pp.sh | 20++++++++++++++------
Mscripts/run-tests.sh | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Atests/scheme1/00-exit.expected-exit | 1+
Atests/scheme1/00-exit.scm | 1+
7 files changed, 953 insertions(+), 34 deletions(-)

diff --git a/M1pp/M1pp.P1 b/M1pp/M1pp.P1 @@ -27,9 +27,9 @@ ## --- Constants & sizing ------------------------------------------------------ -DEFINE M1PP_INPUT_CAP 0000010000000000 -DEFINE M1PP_OUTPUT_CAP 0000010000000000 -DEFINE M1PP_TEXT_CAP 0080000000000000 +DEFINE M1PP_INPUT_CAP 0000040000000000 +DEFINE M1PP_OUTPUT_CAP 0000040000000000 +DEFINE M1PP_TEXT_CAP 0000040000000000 DEFINE M1PP_TOKENS_END 0000c00000000000 ## Macro record is 296 bytes: name (16) + param_count (8) + params[16]*16 (256) ## + body_start (8) + body_end (8). MACROS_CAP fits 512 records (151552 B). diff --git a/Makefile b/Makefile @@ -15,10 +15,12 @@ # make m1pp build the m1pp expander for ARCH # make pokem build pokem for ARCH # make hello build hello via the bootstrap chain +# make scheme1 build the scheme1 interpreter for ARCH # make run run hello in the container # make test every suite, every arch # make test SUITE=m1pp m1pp suite, every arch # make test SUITE=p1 ARCH=amd64 p1 suite, one arch +# make test SUITE=scheme1 scheme1 .scm fixtures, every arch # make image build the per-arch container image # make tools bootstrap M0/hex2-0/catm for ARCH # make tables regen pre-pruned P1/P1-<arch>.M1 tables @@ -58,7 +60,7 @@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \ # --- Targets -------------------------------------------------------------- -.PHONY: all m1pp pokem hello run test image tools tables \ +.PHONY: all m1pp pokem hello scheme1 run test image tools tables \ tools-native clean help all: m1pp pokem @@ -120,20 +122,29 @@ P1/P1-%.M1: build/p1/%.M1 scripts/prune-p1-table.sh $(P1_PRUNE_SRCS) # --- Programs (per arch) -------------------------------------------------- -M1PP_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/m1pp) -POKEM_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/pokem) -HELLO_SRC := tests/M1pp/00-hello.M1 -HELLO_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/hello) +M1PP_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/m1pp) +POKEM_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/pokem) +HELLO_SRC := tests/M1pp/00-hello.M1 +HELLO_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/hello) +SCHEME1_SRC := scheme1/scheme1.P1pp +SCHEME1_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/scheme1) -m1pp: $(OUT_DIR)/m1pp -pokem: $(OUT_DIR)/pokem -hello: $(OUT_DIR)/hello +m1pp: $(OUT_DIR)/m1pp +pokem: $(OUT_DIR)/pokem +hello: $(OUT_DIR)/hello +scheme1: $(OUT_DIR)/scheme1 -# Per-arch deps shared by every .P1/.M1 build. +# Per-arch deps for .P1/.M1 builds (raw M1, no macro expansion). P1_BUILD_DEPS = scripts/lint.sh scripts/boot-build-p1.sh \ build/%/.image build/%/tools/M0 \ vendor/seed/%/ELF.hex2 P1/P1-%.M1 +# Per-arch deps for .P1pp builds (m1pp expansion + libp1pp). +P1PP_BUILD_DEPS = scripts/boot-build-p1pp.sh \ + build/%/.image build/%/tools/M0 build/%/m1pp \ + vendor/seed/%/ELF.hex2 \ + P1/P1-%.M1pp P1/P1.M1pp P1/P1pp.P1pp + $(M1PP_BINS): build/%/m1pp: M1pp/M1pp.P1 $(P1_BUILD_DEPS) ARCH=$* sh scripts/lint.sh M1pp/M1pp.P1 $(call PODMAN,$*) sh scripts/boot-build-p1.sh M1pp/M1pp.P1 $@ @@ -146,6 +157,9 @@ $(HELLO_BINS): build/%/hello: $(HELLO_SRC) $(P1_BUILD_DEPS) ARCH=$* sh scripts/lint.sh $(HELLO_SRC) $(call PODMAN,$*) sh scripts/boot-build-p1.sh $(HELLO_SRC) $@ +$(SCHEME1_BINS): build/%/scheme1: $(SCHEME1_SRC) $(P1PP_BUILD_DEPS) + $(call PODMAN,$*) sh scripts/boot-build-p1pp.sh $(SCHEME1_SRC) $@ + run: $(OUT_DIR)/hello $(IMAGE_STAMP) $(call PODMAN,$(ARCH)) ./$(OUT_DIR)/hello @@ -190,16 +204,26 @@ TEST_M1PP_DEPS := $(foreach a,$(TEST_ARCHES), \ TEST_P1_DEPS := $(foreach a,$(TEST_ARCHES), \ build/$(a)/.image build/$(a)/tools/M0 build/$(a)/m1pp) +# scheme1 suite per-arch deps: image, tools, expander, scheme1 binary. +# (run-tests.sh runs the pre-built binary against each .scm fixture; it +# does not rebuild the interpreter per fixture.) +TEST_SCHEME1_DEPS := $(foreach a,$(TEST_ARCHES), \ + build/$(a)/.image build/$(a)/tools/M0 build/$(a)/m1pp build/$(a)/scheme1) + test: ifeq ($(SUITE),) @$(MAKE) --no-print-directory test SUITE=m1pp @$(MAKE) --no-print-directory test SUITE=p1 + @$(MAKE) --no-print-directory test SUITE=scheme1 else ifeq ($(SUITE),m1pp) @$(MAKE) --no-print-directory $(TEST_M1PP_DEPS) sh scripts/run-tests.sh --suite=m1pp $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) else ifeq ($(SUITE),p1) @$(MAKE) --no-print-directory $(TEST_P1_DEPS) sh scripts/run-tests.sh --suite=p1 $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) +else ifeq ($(SUITE),scheme1) + @$(MAKE) --no-print-directory $(TEST_SCHEME1_DEPS) + sh scripts/run-tests.sh --suite=scheme1 $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) else - @echo "unknown SUITE='$(SUITE)' (expected m1pp | p1)" >&2; exit 2 + @echo "unknown SUITE='$(SUITE)' (expected m1pp | p1 | scheme1)" >&2; exit 2 endif diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -0,0 +1,809 @@ +# scheme1.P1pp -- Phase 1 minimal Scheme interpreter on P1. +# +# The full target is described in docs/LISP-C.md and docs/LISP.md. This +# file is the spine: enough infrastructure to read a single source file, +# parse one s-expression, and evaluate it via tag-dispatched eval/apply +# with a single primitive (`sys-exit`). Every later piece (more +# primitives, special forms, closures, pmatch, records, prelude, repl +# loop) hooks onto these anchors without restructuring. +# +# What's wired up now: +# - Tag layout per LISP-C.md: fixnums, pairs, symbols, headered objects, +# immediate singletons. mkimm composes IMM constants at M1pp time. +# - Bump heap allocator (cons + alloc_hdr) over a BSS-past-ELF_end arena. +# - Linear-scan symbol intern table; entries store (name_ptr, name_len, +# global_val) borrowed directly from readbuf. +# - Reader: '(', ')', fixnums (decimal, optional leading `-`), bare +# symbols. No strings/hex/chars/quote/dotted yet, no `;` comments. +# - eval: tag dispatch -> self-eval / symbol lookup / pair application. +# - apply: HDR.PRIM dispatch only; closures/specials come later. +# - One primitive: sys-exit. Exits with the raw fixnum. +# +# Build chain (P1pp.P1pp = libp1pp must be in the catm sequence): +# catm P1-<arch>.M1pp P1.M1pp P1pp.P1pp scheme1/scheme1.P1pp \ +# | m1pp -> M0 -> hex2 -> ELF +# +# Memory model: the ELF's ph_memsz is 8 MB (boot2 default), so all +# zero-initialized arenas live past :ELF_end and cost zero file bytes. +# p1_main writes their absolute addresses into pointer slots once at +# startup; every later access goes through one extra load. + +# ========================================================================= +# Constants +# ========================================================================= + +%enum TAG { FIXNUM PAIR SYM HEAP IMM } +%enum IMM { FALSE TRUE NIL UNSPEC UNBOUND } +%enum HDR { BV CLOSURE PRIM TD REC } + +# imm_val(idx) -> integer-expression for the tagged immediate at IMM index +# `idx`. Used both at %li sites (loaded into a register) and at $() emission +# sites (baked into a static word). +%macro imm_val(idx) +(| (<< idx 3) %TAG.IMM) +%endm + +# Layout helpers. %struct stride is 8 bytes per field. +%struct PAIR { car cdr } # .SIZE = 16 +%struct SYMENT { name_ptr name_len global_val pad } # .SIZE = 32 +%struct PRIM { hdr entry_w } # .SIZE = 16 + +# BSS sizing. Offsets are bytes from :ELF_end. +DEFINE OFF_readbuf 0000000000000000 +DEFINE OFF_heap 0000010000000000 +DEFINE OFF_symtab 0000020000000000 +DEFINE READBUF_CAP 0000010000000000 +DEFINE HEAP_CAP 0000010000000000 +DEFINE SYMTAB_CAP_BYTES 0000010000000000 + +# Number of symtab slots. Used as a runtime guard in intern. Must match +# SYMTAB_CAP_BYTES / SYMENT.SIZE. +%macro SYMTAB_CAP_SLOTS() +1024 +%endm + +# Same as M1PP_INPUT_CAP, exposed for %li in load_source. +%macro READBUF_CAP_BYTES() +65536 +%endm + +# ========================================================================= +# Tag idioms +# ========================================================================= + +%macro tagof(rd, rs) +%andi(rd, rs, 7) +%endm + +%macro mkfix(rd, rs) +%shli(rd, rs, 3) +%endm + +%macro untag_fix(rd, rs) +%sari(rd, rs, 3) +%endm + +%macro untag_sym(rd, rs) +%sari(rd, rs, 3) +%endm + +%macro car(rd, rs) +%ld(rd, rs, -1) +%endm + +%macro cdr(rd, rs) +%ld(rd, rs, 7) +%endm + +%macro hdr_type(rd, rs) +%lb(rd, rs, -3) +%endm + +# ========================================================================= +# p1_main -- runtime spine +# ========================================================================= +# +# Frame layout (16 bytes): +# +0 saved argv + +%fn(p1_main, 16, { + %st(a1, sp, 0) + + %li(t0, 2) + %bltu(a0, t0, &::usage) + + # Initialize BSS pointer slots from ELF_end + OFF_*. Same idiom as + # M1pp.P1: a tiny init table walked once. + %la(t0, &ELF_end) + %la(t1, &bss_init_tbl) + %la(t2, &bss_init_tbl_end) + ::bss_loop + %beq(t1, t2, &::bss_done) + %ld(a0, t1, 0) + %ld(a2, t1, 8) + %add(a2, a2, t0) + %st(a2, a0, 0) + %addi(t1, t1, 16) + %b(&::bss_loop) + ::bss_done + + # heap_next = &heap_buf, rounded up to 8-byte alignment. The BSS arena + # starts at &ELF_end + OFF_heap, but &ELF_end's alignment depends on + # the data section above it; cons assumes 8-byte-aligned heap_next so + # every pair pointer's low 3 bits are exactly the PAIR tag. + %la(t0, &heap_buf_ptr) + %ld(t0, t0, 0) + %addi(t0, t0, 7) + %li(t1, -8) + %and(t0, t0, t1) + %la(t1, &heap_next) + %st(t0, t1, 0) + + # Bind built-in primitives. + %call(&register_primitives) + + # argv[1] is the source path (NUL-terminated cstr from the kernel). + %ld(a1, sp, 0) + %ld(a0, a1, 8) + %call(&load_source) + + # parse_one returns one s-expression in a0. + %call(&parse_one) + + # eval(expr, NIL). + %li(a1, %imm_val(%IMM.NIL)) + %call(&eval) + + # If the form did not sys-exit (e.g. evaluated to a fixnum), drop + # cleanly with status 0. Useful while wiring up more of the runtime. + %li(a0, 0) + %eret + + ::usage + %la(a0, &msg_usage) + %call(&print_cstr) + %li(a0, 2) +}) + +# ========================================================================= +# Source loading -- argv[1] -> readbuf, length stored in readbuf_len +# ========================================================================= +# +# Frame: 8 bytes +# +0 no slots; libp1pp's read_file does open+read+close in one call. + +%fn(load_source, 0, { + %la(t0, &readbuf_buf_ptr) + %ld(a1, t0, 0) + %li(a2, %READBUF_CAP_BYTES) + %call(&read_file) + %bltz(a0, &::fail) + + %la(t0, &readbuf_len) + %st(a0, t0, 0) + %li(a0, 0) + %eret + + ::fail + %la(a0, &msg_load_fail) + %call(&print_cstr) + %li(a0, 3) + %call(&sys_exit) + ::spin + %b(&::spin) +}) + +# ========================================================================= +# Heap: cons (leaf) and alloc_hdr (leaf) +# ========================================================================= +# +# Both are call-free leaves: bump heap_next, write fields, return tagged +# pointer. Heap exhaustion crashes via a deliberate read past the arena +# end -- runtime check arrives when error reporting does (LISP-C.md +# milestone 3). + +# cons(car=a0, cdr=a1) -> tagged pair (a0) +:cons + %la(t2, &heap_next) + %ld(t0, t2, 0) + %st(a0, t0, 0) + %st(a1, t0, 8) + %addi(t1, t0, 16) + %st(t1, t2, 0) + %addi(a0, t0, 1) + %ret + +# alloc_hdr(bytes=a0, hdr_word=a1) -> tagged heap obj (a0) +# Rounds bytes up to a multiple of 8 and writes hdr_word at offset 0. +:alloc_hdr + %addi(a0, a0, 7) + %li(t0, -8) + %and(a0, a0, t0) + %la(t2, &heap_next) + %ld(t0, t2, 0) + %add(t1, t0, a0) + %st(t1, t2, 0) + %st(a1, t0, 0) + %addi(a0, t0, 3) + %ret + +# ========================================================================= +# Symbol intern -- linear scan, append on miss +# ========================================================================= +# +# Frame: 32 bytes +# +0 name_ptr (input) +# +8 name_len (input) +# +16 idx (loop counter / found index) +# +24 entry_ptr (spilled across memcmp) + +%fn(intern, 32, { + %st(a0, sp, 0) + %st(a1, sp, 8) + + %li(t0, 0) + %st(t0, sp, 16) + + ::scan + # idx >= count? -> append + %ld(t0, sp, 16) + %la(t1, &symtab_count) + %ld(t1, t1, 0) + %bltu(t0, t1, &::probe) + %b(&::append) + + ::probe + # entry_ptr = symtab_buf + idx * SYMENT.SIZE + %la(t1, &symtab_buf_ptr) + %ld(t1, t1, 0) + %shli(t2, t0, 5) + %add(t1, t1, t2) + %st(t1, sp, 24) + + # entry.name_len == name_len ? + %ld(t2, t1, 8) + %ld(a2, sp, 8) + %bne(t2, a2, &::next) + + # memcmp(entry.name_ptr, name_ptr, len) + %ld(a0, t1, 0) + %ld(a1, sp, 0) + %ld(a2, sp, 8) + %call(&memcmp) + %beqz(a0, &::found) + + ::next + %ld(t0, sp, 16) + %addi(t0, t0, 1) + %st(t0, sp, 16) + %b(&::scan) + + ::append + # Bounds check; on overflow exit 5 with a message. + %ld(t0, sp, 16) + %li(t1, %SYMTAB_CAP_SLOTS) + %bltu(t0, t1, &::append_ok) + %la(a0, &msg_symtab_full) + %call(&print_cstr) + %li(a0, 5) + %call(&sys_exit) + + ::append_ok + %la(t1, &symtab_buf_ptr) + %ld(t1, t1, 0) + %shli(t2, t0, 5) + %add(t1, t1, t2) + %ld(a0, sp, 0) + %st(a0, t1, 0) + %ld(a0, sp, 8) + %st(a0, t1, 8) + %li(a0, %imm_val(%IMM.UNBOUND)) + %st(a0, t1, 16) + %li(a0, 0) + %st(a0, t1, 24) + + # symtab_count = idx + 1 + %addi(a0, t0, 1) + %la(t2, &symtab_count) + %st(a0, t2, 0) + + # fall through with idx in t0 = sp[16] + + ::found + %ld(t0, sp, 16) + %shli(a0, t0, 3) + %ori(a0, a0, %TAG.SYM) +}) + +# Lookup by sym_idx (untagged, in a0). Returns symtab[idx].global_val in a0. +# Leaf. +:sym_global + %la(t0, &symtab_buf_ptr) + %ld(t0, t0, 0) + %shli(t1, a0, 5) + %add(t0, t0, t1) + %ld(a0, t0, 16) + %ret + +# sym_set_global(idx=a0, val=a1). Leaf. +:sym_set_global + %la(t0, &symtab_buf_ptr) + %ld(t0, t0, 0) + %shli(t1, a0, 5) + %add(t0, t0, t1) + %st(a1, t0, 16) + %ret + +# ========================================================================= +# Reader -- parse_one over readbuf with a single byte cursor +# ========================================================================= +# +# Cursor lives in &readbuf_pos; readbuf_len holds the slurped byte count. +# The reader is called recursively from parse_list, so every state goes +# through frame slots, not s-registers. + +# Skip whitespace (ASCII 32, 9, 10, 13). Leaf. +:skip_ws +%scope skip_ws + %la(t2, &readbuf_pos) + %ld(t0, t2, 0) + %la(t1, &readbuf_len) + %ld(t1, t1, 0) + ::loop + %beq(t0, t1, &::done) + %la(a0, &readbuf_buf_ptr) + %ld(a0, a0, 0) + %add(a0, a0, t0) + %lb(a0, a0, 0) + %addi(a1, a0, -32) + %beqz(a1, &::step) + %addi(a1, a0, -9) + %beqz(a1, &::step) + %addi(a1, a0, -10) + %beqz(a1, &::step) + %addi(a1, a0, -13) + %beqz(a1, &::step) + %b(&::done) + ::step + %addi(t0, t0, 1) + %b(&::loop) + ::done + %st(t0, t2, 0) + %ret +%endscope + +# parse_one() -> tagged value in a0 +%fn(parse_one, 16, { + %call(&skip_ws) + + %la(t0, &readbuf_pos) + %ld(t0, t0, 0) + %la(t1, &readbuf_len) + %ld(t1, t1, 0) + %beq(t0, t1, &::eof) + + %la(a0, &readbuf_buf_ptr) + %ld(a0, a0, 0) + %add(a0, a0, t0) + %lb(a0, a0, 0) + + %addi(a1, a0, -40) + %beqz(a1, &::lparen) + %addi(a1, a0, -41) + %beqz(a1, &::rparen) + + %call(&parse_atom) + %eret + + ::lparen + # Consume '(' and read items until ')'. + %la(t0, &readbuf_pos) + %ld(t1, t0, 0) + %addi(t1, t1, 1) + %st(t1, t0, 0) + %call(&parse_list) + %eret + + ::rparen + %la(a0, &msg_unexp_rparen) + %call(&print_cstr) + %li(a0, 6) + %call(&sys_exit) + + ::eof + %la(a0, &msg_unexp_eof) + %call(&print_cstr) + %li(a0, 6) + %call(&sys_exit) + ::spin + %b(&::spin) +}) + +# parse_list() -> tagged list value in a0. Cursor sits past '(' on entry; +# returns once ')' is consumed. +# +# Frame: 16 bytes +# +0 head (NIL until first item) +# +8 tail (most recent cons; set-cdr! target) +%fn(parse_list, 16, { + %li(t0, %imm_val(%IMM.NIL)) + %st(t0, sp, 0) + %st(t0, sp, 8) + + ::loop + %call(&skip_ws) + %la(t0, &readbuf_pos) + %ld(t0, t0, 0) + %la(t1, &readbuf_len) + %ld(t1, t1, 0) + %beq(t0, t1, &::eof) + + %la(a0, &readbuf_buf_ptr) + %ld(a0, a0, 0) + %add(a0, a0, t0) + %lb(a0, a0, 0) + %addi(a1, a0, -41) + %beqz(a1, &::close) + + # Not ')': parse one item, append. + %call(&parse_one) + %li(a1, %imm_val(%IMM.NIL)) + %call(&cons) + + # If head is NIL, both head and tail = new cons; else set-cdr! tail = new. + %ld(t0, sp, 0) + %li(t1, %imm_val(%IMM.NIL)) + %bne(t0, t1, &::link) + %st(a0, sp, 0) + %st(a0, sp, 8) + %b(&::loop) + + ::link + %ld(t0, sp, 8) + # set-cdr! tail = a0 -> store a0 at [tail + 7] (raw + 8) + %st(a0, t0, 7) + %st(a0, sp, 8) + %b(&::loop) + + ::close + # Consume ')' and return head. + %la(t0, &readbuf_pos) + %ld(t1, t0, 0) + %addi(t1, t1, 1) + %st(t1, t0, 0) + %ld(a0, sp, 0) + %eret + + ::eof + %la(a0, &msg_unterm_list) + %call(&print_cstr) + %li(a0, 6) + %call(&sys_exit) + ::spin + %b(&::spin) +}) + +# parse_atom() -> tagged value (fixnum or symbol) in a0. +# Reads until whitespace or paren or EOF, then dispatches by first byte. +# +# Frame: 16 bytes +# +0 start cursor (byte offset) +# +8 end cursor (byte offset) +%fn(parse_atom, 16, { + %la(t0, &readbuf_pos) + %ld(t1, t0, 0) + %st(t1, sp, 0) + + %la(t2, &readbuf_len) + %ld(t2, t2, 0) + + ::scan + %beq(t1, t2, &::end) + %la(a0, &readbuf_buf_ptr) + %ld(a0, a0, 0) + %add(a0, a0, t1) + %lb(a0, a0, 0) + + # whitespace? + %addi(a1, a0, -32) + %beqz(a1, &::end) + %addi(a1, a0, -9) + %beqz(a1, &::end) + %addi(a1, a0, -10) + %beqz(a1, &::end) + %addi(a1, a0, -13) + %beqz(a1, &::end) + # paren? + %addi(a1, a0, -40) + %beqz(a1, &::end) + %addi(a1, a0, -41) + %beqz(a1, &::end) + + %addi(t1, t1, 1) + %b(&::scan) + + ::end + %st(t1, sp, 8) + %la(t0, &readbuf_pos) + %st(t1, t0, 0) + + # Dispatch on the first byte. + %ld(t0, sp, 0) + %la(a0, &readbuf_buf_ptr) + %ld(a0, a0, 0) + %add(a0, a0, t0) + %lb(t1, a0, 0) + + # '0'..'9' -> int + %addi(a1, t1, -48) + %li(a2, 10) + %bltu(a1, a2, &::is_int) + # '-' followed by digit -> int + %addi(a1, t1, -45) + %bnez(a1, &::is_sym) + # there must be at least one more byte for it to be a number + %ld(t2, sp, 8) + %addi(t0, t0, 1) + %beq(t0, t2, &::is_sym) + %addi(a0, a0, 1) + %lb(a0, a0, 0) + %addi(a1, a0, -48) + %bltu(a1, a2, &::is_int) + # fall through to is_sym + + ::is_sym + %ld(a0, sp, 0) + %la(t0, &readbuf_buf_ptr) + %ld(t0, t0, 0) + %add(a0, t0, a0) + %ld(t1, sp, 8) + %ld(t2, sp, 0) + %sub(a1, t1, t2) + %call(&intern) + %eret + + ::is_int + %ld(a0, sp, 0) + %ld(a1, sp, 8) + %call(&parse_int) +}) + +# parse_int(start_off=a0, end_off=a1) -> tagged fixnum in a0. Leaf. +:parse_int +%scope parse_int + %la(t0, &readbuf_buf_ptr) + %ld(t0, t0, 0) + %add(t1, t0, a1) ; t1 = end pointer = base + end_off + %add(t0, t0, a0) ; t0 = start pointer = base + start_off + + %li(a2, 0) ; a2 = "is negative" flag (0 = positive) + %lb(a3, t0, 0) + %addi(a3, a3, -45) + %bnez(a3, &::loop) + %li(a2, 1) + %addi(t0, t0, 1) + + ::loop + %li(a0, 0) + %li(t2, 10) + ::step + %beq(t0, t1, &::done) + %lb(a3, t0, 0) + %addi(a3, a3, -48) + %mul(a0, a0, t2) + %add(a0, a0, a3) + %addi(t0, t0, 1) + %b(&::step) + + ::done + %beqz(a2, &::tag) + %li(t2, 0) + %sub(a0, t2, a0) + ::tag + %shli(a0, a0, 3) + %ret +%endscope + +# ========================================================================= +# eval / apply +# ========================================================================= +# +# eval is the only place that touches tag bits at runtime; the table +# below is a flat compare cascade for now (5 tags). When special-form +# dispatch is wired up the SYM/PAIR paths split further per LISP-C.md +# §Eval. + +# eval(expr=a0, env=a1) -> value (a0) +# +# Frame: 32 bytes +# +0 expr +# +8 env +# +16 fn (head value, while args are being evaluated) +%fn(eval, 32, { + %st(a0, sp, 0) + %st(a1, sp, 8) + + %andi(t0, a0, 7) + %li(t1, %TAG.PAIR) + %beq(t0, t1, &::pair) + %li(t1, %TAG.SYM) + %beq(t0, t1, &::sym) + # FIXNUM, HEAP, IMM all self-evaluate. + %eret + + ::sym + %sari(a0, a0, 3) + %call(&sym_global) + %li(t0, %imm_val(%IMM.UNBOUND)) + %beq(a0, t0, &::unbound) + %eret + + ::unbound + %la(a0, &msg_unbound) + %call(&print_cstr) + %li(a0, 7) + %call(&sys_exit) + + ::pair + # head = eval(car(expr), env) + %ld(a0, sp, 0) + %ld(a0, a0, -1) + %ld(a1, sp, 8) + %call(&eval) + %st(a0, sp, 16) + + # args = eval_args(cdr(expr), env) + %ld(a0, sp, 0) + %ld(a0, a0, 7) + %ld(a1, sp, 8) + %call(&eval_args) + + # apply(fn, args) + %mov(a1, a0) + %ld(a0, sp, 16) + %call(&apply) +}) + +# eval_args(args=a0, env=a1) -> evaluated args list (cons-built). +# Recursion depth = arg count, so very long arg lists could blow the +# stack. Iterative tail-build is a future tightening. +%fn(eval_args, 24, { + %li(t0, %imm_val(%IMM.NIL)) + %beq(a0, t0, &::nil) + + %st(a0, sp, 0) + %st(a1, sp, 8) + + # val = eval(car(args), env) + %ld(a0, a0, -1) + %call(&eval) + %st(a0, sp, 16) + + # rest = eval_args(cdr(args), env) + %ld(a0, sp, 0) + %ld(a0, a0, 7) + %ld(a1, sp, 8) + %call(&eval_args) + + # cons(val, rest) + %mov(a1, a0) + %ld(a0, sp, 16) + %call(&cons) + %eret + + ::nil + %li(a0, %imm_val(%IMM.NIL)) +}) + +# apply(fn=a0, args=a1) -> result (a0) +# +# Frame: 16 bytes +# +0 args +%fn(apply, 16, { + %st(a1, sp, 0) + + # Only HEAP-tagged values can be applicable. PRIM is the only header + # type wired up here. + %andi(t0, a0, 7) + %li(t1, %TAG.HEAP) + %bne(t0, t1, &::not_proc) + + %lb(t0, a0, -3) + %li(t1, %HDR.PRIM) + %beq(t0, t1, &::prim) + + ::not_proc + %la(a0, &msg_not_proc) + %call(&print_cstr) + %li(a0, 8) + %call(&sys_exit) + + ::prim + %ld(t0, a0, 5) ; t0 = entry word (offset = -3 + 8) + %ld(a0, sp, 0) ; args list -> a0 + %callr(t0) +}) + +# ========================================================================= +# Primitives +# ========================================================================= +# +# Each primitive sits behind a 16-byte heap object literal in the data +# section: [hdr_word, entry_word]. The tagged value is &obj + 3. +# register_primitives interns the surface name and writes the tagged +# pointer into the symbol's global slot. + +%fn(register_primitives, 0, { + %la(a0, &name_sys_exit) + %li(a1, 8) + %call(&intern) + %sari(a0, a0, 3) ; idx + %la(a1, &prim_sys_exit) + %addi(a1, a1, 3) ; tag HEAP + %call(&sym_set_global) + # No stack frame needed -- pure register code with one nested call. +}) + +# prim_sys_exit_entry(args=a0). Args is a one-element list whose car is +# the exit code as a tagged fixnum. Untag and tail-jump to libp1pp's +# sys_exit (a %b, not a %call -- this is a leaf with no frame, and +# sys_exit doesn't return anyway). +:prim_sys_exit_entry + %ld(a0, a0, -1) ; car = fixnum + %sari(a0, a0, 3) + %b(&sys_exit) + +# ========================================================================= +# Read-only data +# ========================================================================= + +# Primitive object literals (16 bytes each). +:prim_sys_exit +$(%HDR.PRIM) &prim_sys_exit_entry %(0) + +# Surface names. Length is hard-coded at the call site; no NUL needed +# because intern takes (ptr, len). Aligned padding via "\0" bytes is +# fine -- M0 emits ASCII verbatim. +:name_sys_exit "sys-exit" + +:msg_usage "scheme1: usage: scheme1 SOURCE.scm" '0a' '00' +:msg_load_fail "scheme1: failed to read source" '0a' '00' +:msg_symtab_full "scheme1: symbol table full" '0a' '00' +:msg_unexp_rparen "scheme1: unexpected ')'" '0a' '00' +:msg_unexp_eof "scheme1: unexpected EOF in form" '0a' '00' +:msg_unterm_list "scheme1: unterminated list" '0a' '00' +:msg_unbound "scheme1: unbound variable" '0a' '00' +:msg_not_proc "scheme1: not a procedure" '0a' '00' + +# ========================================================================= +# BSS pointer-init table +# ========================================================================= +# +# Each entry: 8-byte slot pointer (4-byte label ref + 4 bytes pad) + +# 8-byte offset constant. p1_main walks this once at startup. +:bss_init_tbl +&readbuf_buf_ptr %(0) OFF_readbuf +&heap_buf_ptr %(0) OFF_heap +&symtab_buf_ptr %(0) OFF_symtab +:bss_init_tbl_end + +# ========================================================================= +# Scalar BSS (file-resident, zero-initialized) +# ========================================================================= + +# heap_next: bump pointer; written once by p1_main, then by cons/alloc_hdr. +:heap_next $(0) + +# Source-buffer cursor and slurped length. +:readbuf_pos $(0) +:readbuf_len $(0) + +# Symbol table count (number of entries used). +:symtab_count $(0) + +# Pointer slots for the past-:ELF_end arenas. +:readbuf_buf_ptr $(0) +:heap_buf_ptr $(0) +:symtab_buf_ptr $(0) + +:ELF_end diff --git a/scripts/boot-build-p1pp.sh b/scripts/boot-build-p1pp.sh @@ -6,11 +6,18 @@ ## ELF binary (build/$ARCH/m1pp, built by boot2.sh / boot-build-p1.sh). ## ## Pipeline: -## cat <P1/P1-$ARCH.M1pp> <P1/P1.M1pp> <src> -> /tmp/combined.M1pp -## m1pp /tmp/combined.M1pp -> /tmp/expanded.M1 -## M0 /tmp/expanded.M1 -> /tmp/prog.hex2 -## catm /tmp/elf.hex2 /tmp/prog.hex2 -> /tmp/linked.hex2 -## hex2-0 /tmp/linked.hex2 -> $OUT +## cat <P1-$ARCH.M1pp> <P1.M1pp> <P1pp.P1pp> <src> -> /tmp/combined.M1pp +## m1pp /tmp/combined.M1pp -> /tmp/expanded.M1 +## M0 /tmp/expanded.M1 -> /tmp/prog.hex2 +## catm /tmp/elf.hex2 /tmp/prog.hex2 -> /tmp/linked.hex2 +## hex2-0 /tmp/linked.hex2 -> $OUT +## +## libp1pp (P1/P1pp.P1pp) is concatenated unconditionally so portable +## sources can use %fn, the control-flow macros, and libp1pp routines +## (sys_*, print*, parse_*, fmt_*, memcpy/memcmp, bump allocator, panic, +## %assert_*) without per-program plumbing. M0 has no link-time DCE, so +## programs that don't reference any libp1pp routine still pay a fixed +## code-size tax (~a few KB). ## ## Env: ARCH=aarch64|amd64|riscv64 ## Usage: boot-build-p1pp.sh <src> <out> @@ -25,6 +32,7 @@ OUT=$2 BACKEND=P1/P1-$ARCH.M1pp FRONTEND=P1/P1.M1pp +LIBP1PP=P1/P1pp.P1pp ELF_HDR=vendor/seed/$ARCH/ELF.hex2 TOOLS=build/$ARCH/tools M1PP_BIN=build/$ARCH/m1pp @@ -32,7 +40,7 @@ NAME=$(basename "$SRC" .P1pp) WORK=build/$ARCH/.work/$NAME mkdir -p "$WORK" "$(dirname "$OUT")" -cat "$BACKEND" "$FRONTEND" "$SRC" > /tmp/combined.M1pp +cat "$BACKEND" "$FRONTEND" "$LIBP1PP" "$SRC" > /tmp/combined.M1pp "$M1PP_BIN" /tmp/combined.M1pp /tmp/expanded.M1 "$TOOLS/M0" /tmp/expanded.M1 /tmp/prog.hex2 diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh @@ -1,5 +1,5 @@ #!/bin/sh -## run-tests.sh — unified test runner for the m1pp and p1 fixture suites. +## run-tests.sh — unified test runner for the m1pp, p1, and scheme1 suites. ## ## Each suite is a directory of `<name>.<ext>` fixtures with sibling ## `<name>.expected` files. The runner builds each fixture, runs it inside @@ -8,19 +8,24 @@ ## (parked, ad-hoc debugging). ## ## Suites: -## m1pp tests/M1pp/<name>.M1 — P1 program built via build-p1.sh -## for each requested arch, run in -## container, stdout diffed. -## tests/M1pp/<name>.M1pp — m1pp expander parity test: per-arch -## m1pp binary consumes the fixture -## and writes <out>; diffed. -## p1 tests/P1/<name>.P1pp — P1pp program built via build-p1pp.sh -## for each requested arch, run in -## container, stdout diffed. +## m1pp tests/M1pp/<name>.M1 — P1 program built via build-p1.sh +## for each requested arch, run in +## container, stdout diffed. +## tests/M1pp/<name>.M1pp — m1pp expander parity test: per-arch +## m1pp binary consumes the fixture +## and writes <out>; diffed. +## p1 tests/P1/<name>.P1pp — P1pp program built via build-p1pp.sh +## for each requested arch, run in +## container, stdout diffed. +## scheme1 tests/scheme1/<name>.scm — Scheme source run by the per-arch +## scheme1 binary. stdout diffed +## against <name>.expected (default +## empty); exit code diffed against +## <name>.expected-exit (default 0). ## ## All three arches by default; --arch restricts to one. ## -## Usage: scripts/run-tests.sh --suite <m1pp|p1> [--arch ARCH] [name ...] +## Usage: scripts/run-tests.sh --suite <m1pp|p1|scheme1> [--arch ARCH] [name ...] set -eu @@ -42,8 +47,8 @@ while [ "$#" -gt 0 ]; do done case "$SUITE" in - m1pp|p1) ;; - "") echo "$0: --suite required (m1pp | p1)" >&2; exit 2 ;; + m1pp|p1|scheme1) ;; + "") echo "$0: --suite required (m1pp | p1 | scheme1)" >&2; exit 2 ;; *) echo "$0: unknown suite '$SUITE'" >&2; exit 2 ;; esac @@ -203,9 +208,80 @@ run_p1_suite() { done } +## --- scheme1 suite ------------------------------------------------------ +## +## Caller (Make) ensures build/<arch>/scheme1 already exists. The runner +## just invokes that binary against each .scm fixture, capturing stdout +## and the exit status. stdout is diffed against <name>.expected (defaults +## to empty if absent); the exit status is diffed against +## <name>.expected-exit (defaults to 0 if absent). + +run_scheme1_suite() { + if [ -z "$ARCH" ]; then + ARCHES="aarch64 amd64 riscv64" + else + ARCHES=$ARCH + fi + if [ -z "$NAMES" ]; then + NAMES=$(discover tests/scheme1 scm) + fi + for name in $NAMES; do + fixture=tests/scheme1/$name.scm + expected_stdout_file=tests/scheme1/$name.expected + expected_exit_file=tests/scheme1/$name.expected-exit + + if [ ! -e "$fixture" ]; then + echo " SKIP $name (no .scm)"; continue + fi + if [ -e "$expected_stdout_file" ]; then + expected_stdout=$(cat "$expected_stdout_file") + else + expected_stdout= + fi + if [ -e "$expected_exit_file" ]; then + expected_exit=$(cat "$expected_exit_file") + else + expected_exit=0 + fi + + for arch in $ARCHES; do + label="[$arch] $name" + bin=build/$arch/scheme1 + if [ ! -x "$bin" ]; then + report "$label" FAIL + echo " (missing $bin -- run 'make scheme1 ARCH=$arch')" >&2 + continue + fi + + tmp_stdout=$(mktemp) + if run_in_container "$arch" "./$bin" "$fixture" >"$tmp_stdout" 2>&1; then + actual_exit=0 + else + actual_exit=$? + fi + actual_stdout=$(cat "$tmp_stdout") + rm -f "$tmp_stdout" + + if [ "$actual_stdout" = "$expected_stdout" ] \ + && [ "$actual_exit" = "$expected_exit" ]; then + report "$label" PASS + else + report "$label" FAIL + if [ "$actual_stdout" != "$expected_stdout" ]; then + show_diff "$expected_stdout" "$actual_stdout" + fi + if [ "$actual_exit" != "$expected_exit" ]; then + echo " exit: expected $expected_exit, got $actual_exit" + fi + fi + done + done +} + case "$SUITE" in - m1pp) run_m1pp_suite ;; - p1) run_p1_suite ;; + m1pp) run_m1pp_suite ;; + p1) run_p1_suite ;; + scheme1) run_scheme1_suite ;; esac echo "$PASS passed, $FAIL failed" diff --git a/tests/scheme1/00-exit.expected-exit b/tests/scheme1/00-exit.expected-exit @@ -0,0 +1 @@ +42 diff --git a/tests/scheme1/00-exit.scm b/tests/scheme1/00-exit.scm @@ -0,0 +1 @@ +(sys-exit 42)