boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 7d47e3a0a957c251ad510bd88c30d306b75f2e26
parent b0dd9c3418d3dc4e783b4f960384ea2a0de6980a
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 25 Apr 2026 08:32:45 -0700

Add scheme1 cond, begin, and the let family

intern_special_forms gains quote/begin/cond/else/let/let*/letrec via a
new %intern_form macro that compresses each entry to one line. The eval
pair-branch dispatch and per-form tail-call stubs are similarly factored
into %dispatch_form / %tail_to_handler.

eval_cond walks clauses; the literal `else` symbol is matched by pointer
equality with sym_else. eval_let / eval_letstar share a layout and
differ only in which env each init evaluates against. eval_letrec
pre-binds every name to UNSPEC then patches via set-cdr! after each init
runs in the new env. Named let dispatches to eval_let_named which builds
a closure capturing a self-binding, then applies it -- equivalent to a
letrec + call without going through s-expression desugaring.

Diffstat:
Mscheme1/scheme1.P1pp | 514+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Atests/scheme1/16-cond.expected-exit | 1+
Atests/scheme1/16-cond.scm | 2++
Atests/scheme1/17-cond-else.expected-exit | 1+
Atests/scheme1/17-cond-else.scm | 2++
Atests/scheme1/18-let.expected-exit | 1+
Atests/scheme1/18-let.scm | 2++
Atests/scheme1/19-letstar.expected-exit | 1+
Atests/scheme1/19-letstar.scm | 3+++
Atests/scheme1/20-letrec.expected-exit | 1+
Atests/scheme1/20-letrec.scm | 4++++
Atests/scheme1/21-letrec-recursion.expected-exit | 1+
Atests/scheme1/21-letrec-recursion.scm | 3+++
Atests/scheme1/22-named-let.expected-exit | 1+
Atests/scheme1/22-named-let.scm | 5+++++
15 files changed, 493 insertions(+), 49 deletions(-)

diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -138,6 +138,36 @@ %call(&sys_exit) %endm +# Intern a special-form name and stash the tagged-symbol value in a +# labeled slot. `name` and `slot` are written as full label refs +# (`&foo`) so the macro can substitute them verbatim into %la sites. +%macro intern_form(name, len, slot) +%la(a0, name) +%li(a1, len) +%call(&intern) +%la(t0, slot) +%st(a0, t0, 0) +%endm + +# Special-form dispatch: pointer-compare the head symbol against `slot`'s +# cached value (in t0) and branch to `target` on hit. Caller has already +# loaded head into t0. +%macro dispatch_form(slot, target) +%la(t1, slot) +%ld(t1, t1, 0) +%beq(t0, t1, target) +%endm + +# Tail-jump from a special-form dispatch label to its handler. Handlers +# uniformly take (rest=cdr(expr), env) -> value; expr lives at sp[0], +# env at sp[8] in eval's frame. +%macro tail_to_handler(handler) +%ld(a0, sp, 0) +%cdr(a0, a0) +%ld(a1, sp, 8) +%tail(handler) +%endm + # ========================================================================= # p1_main -- runtime spine # ========================================================================= @@ -780,18 +810,15 @@ # isn't a symbol cannot collide with any sym_* slot. %ld(t0, sp, 0) %car(t0, t0) ; t0 = head - %la(t1, &sym_quote) - %ld(t1, t1, 0) - %beq(t0, t1, &::do_quote) - %la(t1, &sym_if) - %ld(t1, t1, 0) - %beq(t0, t1, &::do_if) - %la(t1, &sym_lambda) - %ld(t1, t1, 0) - %beq(t0, t1, &::do_lambda) - %la(t1, &sym_define) - %ld(t1, t1, 0) - %beq(t0, t1, &::do_define) + %dispatch_form(&sym_quote, &::do_quote) + %dispatch_form(&sym_if, &::do_if) + %dispatch_form(&sym_lambda, &::do_lambda) + %dispatch_form(&sym_define, &::do_define) + %dispatch_form(&sym_begin, &::do_begin) + %dispatch_form(&sym_cond, &::do_cond) + %dispatch_form(&sym_let, &::do_let) + %dispatch_form(&sym_letstar, &::do_letstar) + %dispatch_form(&sym_letrec, &::do_letrec) # head = eval(car(expr), env) %ld(a0, sp, 0) @@ -819,22 +846,21 @@ %eret ::do_if - %ld(a0, sp, 0) - %cdr(a0, a0) - %ld(a1, sp, 8) - %tail(&eval_if) - + %tail_to_handler(&eval_if) ::do_lambda - %ld(a0, sp, 0) - %cdr(a0, a0) - %ld(a1, sp, 8) - %tail(&eval_lambda) - + %tail_to_handler(&eval_lambda) ::do_define - %ld(a0, sp, 0) - %cdr(a0, a0) - %ld(a1, sp, 8) - %tail(&eval_define) + %tail_to_handler(&eval_define) + ::do_begin + %tail_to_handler(&eval_body) + ::do_cond + %tail_to_handler(&eval_cond) + ::do_let + %tail_to_handler(&eval_let) + ::do_letstar + %tail_to_handler(&eval_letstar) + ::do_letrec + %tail_to_handler(&eval_letrec) }) # eval_args(args=a0, env=a1) -> evaluated args list (cons-built). @@ -924,29 +950,16 @@ # these slots before falling through to ordinary application. %fn(intern_special_forms, 0, { - %la(a0, &name_quote) - %li(a1, 5) - %call(&intern) - %la(t0, &sym_quote) - %st(a0, t0, 0) - - %la(a0, &name_if) - %li(a1, 2) - %call(&intern) - %la(t0, &sym_if) - %st(a0, t0, 0) - - %la(a0, &name_lambda) - %li(a1, 6) - %call(&intern) - %la(t0, &sym_lambda) - %st(a0, t0, 0) - - %la(a0, &name_define) - %li(a1, 6) - %call(&intern) - %la(t0, &sym_define) - %st(a0, t0, 0) + %intern_form(&name_quote, 5, &sym_quote) + %intern_form(&name_if, 2, &sym_if) + %intern_form(&name_lambda, 6, &sym_lambda) + %intern_form(&name_define, 6, &sym_define) + %intern_form(&name_begin, 5, &sym_begin) + %intern_form(&name_cond, 4, &sym_cond) + %intern_form(&name_else, 4, &sym_else) + %intern_form(&name_let, 3, &sym_let) + %intern_form(&name_letstar, 4, &sym_letstar) + %intern_form(&name_letrec, 6, &sym_letrec) }) # eval_if(rest=a0, env=a1) -> value (a0). `rest` is (test then else). @@ -1075,6 +1088,397 @@ %li(a0, %imm_val(%IMM.UNSPEC)) }) +# eval_cond(clauses=a0, env=a1) -> value (a0). +# Each clause is (test body...) or (else body...). The first clause whose +# test isn't #f wins; its body is tail-evaluated. `else` is a literal +# symbol matched by pointer equality with sym_else's cached value. +# +# Frame: 16 bytes +# +0 clauses (advances) +# +8 env +%fn(eval_cond, 16, { + %st(a0, sp, 0) + %st(a1, sp, 8) + + ::loop + %ld(t0, sp, 0) + %li(t1, %imm_val(%IMM.NIL)) + %beq(t0, t1, &::no_match) + + %car(t1, t0) ; clause = (test body...) + %car(t2, t1) ; test_expr + + # else? + %la(a0, &sym_else) + %ld(a0, a0, 0) + %beq(t2, a0, &::do_match) + + # Eval test in env. + %mov(a0, t2) + %ld(a1, sp, 8) + %call(&eval) + %li(t0, %imm_val(%IMM.FALSE)) + %beq(a0, t0, &::next) + + ::do_match + %ld(t0, sp, 0) + %car(t0, t0) + %cdr(a0, t0) ; body + %ld(a1, sp, 8) + %tail(&eval_body) + + ::next + %ld(t0, sp, 0) + %cdr(t0, t0) + %st(t0, sp, 0) + %b(&::loop) + + ::no_match + %li(a0, %imm_val(%IMM.UNSPEC)) +}) + +# eval_let(rest=a0, env=a1) -> value (a0). +# Two surface forms: +# (let ((p v) ...) body...) +# (let name ((p v) ...) body...) ; named let, dispatches to eval_let_named +# Standard `let` evaluates every init in `env`, then extends env with all +# bindings simultaneously and tail-evaluates the body. +# +# Frame: 32 bytes +# +0 rest +# +8 env (original) +# +16 walk (bindings, advances) +# +24 new_env (built up) +%fn(eval_let, 32, { + %st(a0, sp, 0) + %st(a1, sp, 8) + + # Named let? + %car(t0, a0) + %tagof(t1, t0) + %li(t2, %TAG.SYM) + %beq(t1, t2, &::named) + + %ld(t0, sp, 0) + %car(t0, t0) ; bindings + %st(t0, sp, 16) + %ld(t0, sp, 8) + %st(t0, sp, 24) ; new_env = env + + ::loop + %ld(t0, sp, 16) + %li(t1, %imm_val(%IMM.NIL)) + %beq(t0, t1, &::done) + + %car(t1, t0) ; pair = (name init) + %cdr(t2, t1) + %car(t2, t2) ; init + + # val = eval(init, env_orig) + %mov(a0, t2) + %ld(a1, sp, 8) + %call(&eval) + + # binding = cons(name, val) + %ld(t0, sp, 16) + %car(t1, t0) + %car(t2, t1) + %mov(a1, a0) + %mov(a0, t2) + %call(&cons) + + # new_env = cons(binding, new_env) + %ld(a1, sp, 24) + %call(&cons) + %st(a0, sp, 24) + + %ld(t0, sp, 16) + %cdr(t0, t0) + %st(t0, sp, 16) + %b(&::loop) + + ::done + %ld(a0, sp, 0) + %cdr(a0, a0) ; body + %ld(a1, sp, 24) + %tail(&eval_body) + + ::named + %ld(a0, sp, 0) + %ld(a1, sp, 8) + %tail(&eval_let_named) +}) + +# eval_letstar(rest=a0, env=a1) -> value (a0). +# Like let, but each init is evaluated in the env extended by all prior +# bindings of the same let* form (left-to-right shadowing). +# +# Frame: 32 bytes (same layout as eval_let) +%fn(eval_letstar, 32, { + %st(a0, sp, 0) + %st(a1, sp, 8) + + %ld(t0, sp, 0) + %car(t0, t0) + %st(t0, sp, 16) + %ld(t0, sp, 8) + %st(t0, sp, 24) + + ::loop + %ld(t0, sp, 16) + %li(t1, %imm_val(%IMM.NIL)) + %beq(t0, t1, &::done) + + %car(t1, t0) + %cdr(t2, t1) + %car(t2, t2) + + # val = eval(init, new_env) + %mov(a0, t2) + %ld(a1, sp, 24) + %call(&eval) + + %ld(t0, sp, 16) + %car(t1, t0) + %car(t2, t1) + %mov(a1, a0) + %mov(a0, t2) + %call(&cons) + + %ld(a1, sp, 24) + %call(&cons) + %st(a0, sp, 24) + + %ld(t0, sp, 16) + %cdr(t0, t0) + %st(t0, sp, 16) + %b(&::loop) + + ::done + %ld(a0, sp, 0) + %cdr(a0, a0) + %ld(a1, sp, 24) + %tail(&eval_body) +}) + +# eval_letrec(rest=a0, env=a1) -> value (a0). +# Two-phase. Phase 1: pre-bind every name to UNSPEC in the new env so +# inits can reference each other. Phase 2: evaluate each init in the new +# env and patch the matching binding's cdr (linear-scan lookup is fine +# here -- bindings are typically ≤ a handful). +# +# Frame: 32 bytes +# +0 rest +# +8 env_orig +# +16 walk +# +24 new_env +%fn(eval_letrec, 32, { + %st(a0, sp, 0) + %st(a1, sp, 8) + + %ld(t0, sp, 0) + %car(t0, t0) + %st(t0, sp, 16) + %ld(t0, sp, 8) + %st(t0, sp, 24) + + ::phase1 + %ld(t0, sp, 16) + %li(t1, %imm_val(%IMM.NIL)) + %beq(t0, t1, &::p1_done) + + %car(t1, t0) + %car(t2, t1) ; name + %mov(a0, t2) + %li(a1, %imm_val(%IMM.UNSPEC)) + %call(&cons) + %ld(a1, sp, 24) + %call(&cons) + %st(a0, sp, 24) + + %ld(t0, sp, 16) + %cdr(t0, t0) + %st(t0, sp, 16) + %b(&::phase1) + + ::p1_done + %ld(t0, sp, 0) + %car(t0, t0) + %st(t0, sp, 16) ; reset walk + + ::phase2 + %ld(t0, sp, 16) + %li(t1, %imm_val(%IMM.NIL)) + %beq(t0, t1, &::p2_done) + + %car(t1, t0) + %cdr(t2, t1) + %car(t2, t2) ; init + %mov(a0, t2) + %ld(a1, sp, 24) + %call(&eval) ; val in a0 + + %ld(t0, sp, 16) + %car(t1, t0) + %car(t2, t1) ; name (in t2) + + %ld(t1, sp, 24) + ::scan + %car(a1, t1) + %car(a2, a1) + %beq(a2, t2, &::found) + %cdr(t1, t1) + %b(&::scan) + + ::found + %car(a1, t1) + %st(a0, a1, 7) ; set-cdr! binding val + + %ld(t0, sp, 16) + %cdr(t0, t0) + %st(t0, sp, 16) + %b(&::phase2) + + ::p2_done + %ld(a0, sp, 0) + %cdr(a0, a0) + %ld(a1, sp, 24) + %tail(&eval_body) +}) + +# eval_let_named(rest=a0, env=a1) -> value (a0). +# rest = (name bindings . body). Builds a closure whose captured env +# contains a self-binding that resolves `name` to the closure itself +# (set after the closure is allocated, via set-cdr! on the placeholder +# pair). Inits are evaluated in the *original* env (matches let +# semantics), then we apply the closure. +# +# Frame: 64 bytes +# +0 rest +# +8 env_orig +# +16 self_binding (the (name . UNSPEC) placeholder, patched at the end) +# +24 self_env (cons(self_binding, env_orig)) +# +32 walk (advances; reset between passes) +# +40 head (current pass's list head — params, then args) +# +48 tail (current pass's list tail) +# +56 params (saved between passes) +%fn(eval_let_named, 64, { + %st(a0, sp, 0) + %st(a1, sp, 8) + + # 1. self_binding = (name . UNSPEC); self_env = cons(self_binding, env) + %car(t0, a0) + %mov(a0, t0) + %li(a1, %imm_val(%IMM.UNSPEC)) + %call(&cons) + %st(a0, sp, 16) + %ld(a1, sp, 8) + %call(&cons) + %st(a0, sp, 24) + + # 2. Pass 1: build params list (cdr-tail trick) by walking bindings. + %li(t0, %imm_val(%IMM.NIL)) + %st(t0, sp, 40) + %st(t0, sp, 48) + %ld(t0, sp, 0) + %cdr(t0, t0) + %car(t0, t0) ; bindings + %st(t0, sp, 32) + + ::p1_loop + %ld(t0, sp, 32) + %li(t1, %imm_val(%IMM.NIL)) + %beq(t0, t1, &::p1_done) + + %car(t1, t0) + %car(t2, t1) ; name + %mov(a0, t2) + %li(a1, %imm_val(%IMM.NIL)) + %call(&cons) ; cell = (name . NIL) + + %ld(t0, sp, 40) + %li(t1, %imm_val(%IMM.NIL)) + %beq(t0, t1, &::p1_first) + %ld(t0, sp, 48) + %st(a0, t0, 7) + %st(a0, sp, 48) + %b(&::p1_advance) + + ::p1_first + %st(a0, sp, 40) + %st(a0, sp, 48) + + ::p1_advance + %ld(t0, sp, 32) + %cdr(t0, t0) + %st(t0, sp, 32) + %b(&::p1_loop) + + ::p1_done + %ld(t0, sp, 40) + %st(t0, sp, 56) ; save params + + # 3. Pass 2: build args list (eval inits in env_orig). + %li(t0, %imm_val(%IMM.NIL)) + %st(t0, sp, 40) + %st(t0, sp, 48) + %ld(t0, sp, 0) + %cdr(t0, t0) + %car(t0, t0) + %st(t0, sp, 32) + + ::p2_loop + %ld(t0, sp, 32) + %li(t1, %imm_val(%IMM.NIL)) + %beq(t0, t1, &::p2_done) + + %car(t1, t0) + %cdr(t2, t1) + %car(t2, t2) ; init + %mov(a0, t2) + %ld(a1, sp, 8) + %call(&eval) ; val + + %li(a1, %imm_val(%IMM.NIL)) + %call(&cons) ; cell = (val . NIL) + + %ld(t0, sp, 40) + %li(t1, %imm_val(%IMM.NIL)) + %beq(t0, t1, &::p2_first) + %ld(t0, sp, 48) + %st(a0, t0, 7) + %st(a0, sp, 48) + %b(&::p2_advance) + + ::p2_first + %st(a0, sp, 40) + %st(a0, sp, 48) + + ::p2_advance + %ld(t0, sp, 32) + %cdr(t0, t0) + %st(t0, sp, 32) + %b(&::p2_loop) + + ::p2_done + # 4. Closure: eval_lambda((params . body), self_env). + %ld(a0, sp, 56) ; params + %ld(t0, sp, 0) + %cdr(t0, t0) + %cdr(a1, t0) ; body + %call(&cons) + %ld(a1, sp, 24) + %call(&eval_lambda) + + # 5. Patch self_binding cdr to closure. + %ld(t0, sp, 16) + %st(a0, t0, 7) + + # 6. apply(closure, args). + %ld(a1, sp, 40) + %tail(&apply) +}) + # bind_params(params=a0, args=a1, env=a2) -> extended env (a0). # Walks params and args in lockstep, prepending (param . arg) to env. # Variadic `.`-tail: when params terminates with a SYM (rather than NIL), @@ -1222,6 +1626,12 @@ :name_if "if" :name_lambda "lambda" :name_define "define" +:name_begin "begin" +:name_cond "cond" +:name_else "else" +:name_let "let" +:name_letstar "let*" +:name_letrec "letrec" :name_sys_exit "sys-exit" :msg_usage "scheme1: usage: scheme1 SOURCE.scm" '0a' '00' @@ -1266,6 +1676,12 @@ :sym_if $(0) :sym_lambda $(0) :sym_define $(0) +:sym_begin $(0) +:sym_cond $(0) +:sym_else $(0) +:sym_let $(0) +:sym_letstar $(0) +:sym_letrec $(0) # Pointer slots for the past-:ELF_end arenas. :readbuf_buf_ptr $(0) diff --git a/tests/scheme1/16-cond.expected-exit b/tests/scheme1/16-cond.expected-exit @@ -0,0 +1 @@ +33 diff --git a/tests/scheme1/16-cond.scm b/tests/scheme1/16-cond.scm @@ -0,0 +1,2 @@ +; First-truthy clause wins; subsequent clauses are not evaluated. +(sys-exit (cond (#f 1) (#t 33) (#f 2))) diff --git a/tests/scheme1/17-cond-else.expected-exit b/tests/scheme1/17-cond-else.expected-exit @@ -0,0 +1 @@ +99 diff --git a/tests/scheme1/17-cond-else.scm b/tests/scheme1/17-cond-else.scm @@ -0,0 +1,2 @@ +; `else` is the catch-all; only fires after every prior test was #f. +(sys-exit (cond (#f 1) (#f 2) (else 99))) diff --git a/tests/scheme1/18-let.expected-exit b/tests/scheme1/18-let.expected-exit @@ -0,0 +1 @@ +18 diff --git a/tests/scheme1/18-let.scm b/tests/scheme1/18-let.scm @@ -0,0 +1,2 @@ +; Basic let: parallel binding, body in extended env. +(sys-exit (let ((x 7) (y 18)) y)) diff --git a/tests/scheme1/19-letstar.expected-exit b/tests/scheme1/19-letstar.expected-exit @@ -0,0 +1 @@ +5 diff --git a/tests/scheme1/19-letstar.scm b/tests/scheme1/19-letstar.scm @@ -0,0 +1,3 @@ +; let* binds sequentially: y's init may reference the new x. +(define x 1) ; outer x; let*'s x must shadow inside the body. +(sys-exit (let* ((x 5) (y x)) y)) diff --git a/tests/scheme1/20-letrec.expected-exit b/tests/scheme1/20-letrec.expected-exit @@ -0,0 +1 @@ +44 diff --git a/tests/scheme1/20-letrec.scm b/tests/scheme1/20-letrec.scm @@ -0,0 +1,4 @@ +; letrec: a local helper that calls itself. The closure must see its own +; binding via the same env it captured. Without numeric primitives we +; terminate by passing #t at the recursive call. +(sys-exit (letrec ((f (lambda (n) (if n n (f #t))))) (f 44))) diff --git a/tests/scheme1/21-letrec-recursion.expected-exit b/tests/scheme1/21-letrec-recursion.expected-exit @@ -0,0 +1 @@ +7 diff --git a/tests/scheme1/21-letrec-recursion.scm b/tests/scheme1/21-letrec-recursion.scm @@ -0,0 +1,3 @@ +; letrec actually exercises recursion: f(2) recurses to f(#f), which +; takes the else branch and exits 7. +(sys-exit (letrec ((f (lambda (n) (if n (f #f) 7)))) (f 2))) diff --git a/tests/scheme1/22-named-let.expected-exit b/tests/scheme1/22-named-let.expected-exit @@ -0,0 +1 @@ +71 diff --git a/tests/scheme1/22-named-let.scm b/tests/scheme1/22-named-let.scm @@ -0,0 +1,5 @@ +; Named let: `go` is bound to a closure that re-enters the loop body. +; First iteration: first=#t → recurse with first=#f and the same n=71. +; Second: first=#f → return n. +(sys-exit (let go ((n 71) (first #t)) + (if first (go n #f) n)))