boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 086f5f0fbe01a0a2d59d4158da12057d37cfe4e6
parent 8a3e64ac5a9f5c9857b856242a5def558f1411ec
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 27 Apr 2026 13:12:11 -0700

scheme1: single-arm if, do, case-lambda, letrec*

Single-arm `if` returns UNSPEC when the test is #f instead of
dereferencing the missing else.

`do` is implemented as a special form: build new_env once with binding
pairs, then mutate cdrs each iteration. Step values collect into a
preallocated parallel list before update so steps see pre-update values
(parallel semantics). Specs without a step keep their current value.

`case-lambda` allocates a new HDR.CASELAMBDA heap object holding the
clauses list and captured env. apply() dispatches to apply_case_lambda,
which walks clauses and selects the first whose formals shape admits
the argument count (proper requires equality; improper / symbol formals
match >=). procedure? and the writer recognize the new header.

`letrec*` is a dispatch alias of letrec — eval_letrec already initializes
left-to-right.

Also fix the case-lambda fixture: scheme1's `+` is 2-arg, so the
clause body uses pairwise sums.

Diffstat:
Mdocs/SCHEME1-R7RS-TODO.md | 6------
Mscheme1/scheme1.P1pp | 409+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mtests/scheme1/020-letrec.expected-exit | 2+-
Mtests/scheme1/020-letrec.scm | 32+++++++++++++++++++++++++-------
Mtests/scheme1/106-case-lambda.scm | 2+-
Dtests/scheme1/20-letrec.expected-exit | 1-
Dtests/scheme1/20-letrec.scm | 25-------------------------
7 files changed, 428 insertions(+), 49 deletions(-)

diff --git a/docs/SCHEME1-R7RS-TODO.md b/docs/SCHEME1-R7RS-TODO.md @@ -19,12 +19,6 @@ and rough implementation notes. ### Special forms / form fixes -- [ ] **Single-arm `if`.** `(if t c)` returns unspec when test is `#f` - instead of segfaulting (eval_if path). -- [ ] **`do`.** Iteration with init/step/test. -- [ ] **`case-lambda`.** Pick first matching arity at call time. - Stored as a list of (formals body) clauses inside the closure - payload. - [ ] **`let-values`, `let*-values`, `define-values`.** Depend on the MV protocol. - [ ] **Flat (one-level) quasiquote.** `(quasiquote ...)`, diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -13,7 +13,7 @@ %enum TAG { FIXNUM PAIR SYM HEAP IMM } %enum IMM { FALSE TRUE NIL UNSPEC UNBOUND EOF } -%enum HDR { BV CLOSURE PRIM TD REC } +%enum HDR { BV CLOSURE PRIM TD REC CASELAMBDA } # imm_val(idx) -> integer-expression for the tagged immediate at IMM index # `idx`. Used both at %li sites (loaded into a register) and at $() emission @@ -25,6 +25,7 @@ %struct SYMENT { name_ptr name_len global_val pad } # .SIZE = 32 %struct PRIM { hdr entry_w data } # .SIZE = 24 %struct CLOSURE { hdr params body env } # .SIZE = 32 +%struct CASELAMBDA { hdr clauses env pad } # .SIZE = 32 %struct TD { hdr name nfields fields } # .SIZE = 32 %struct BV { hdr data } # .SIZE = 16 %struct REC { hdr td } # .SIZE = 16 (header) @@ -1161,6 +1162,7 @@ %dispatch_form(&sym_let, &::do_let) %dispatch_form(&sym_letstar, &::do_letstar) %dispatch_form(&sym_letrec, &::do_letrec) + %dispatch_form(&sym_letrecstar, &::do_letrec) %dispatch_form(&sym_and, &::do_and) %dispatch_form(&sym_or, &::do_or) %dispatch_form(&sym_when, &::do_when) @@ -1169,6 +1171,8 @@ %dispatch_form(&sym_setbang, &::do_setbang) %dispatch_form(&sym_define_record_type, &::do_define_record_type) %dispatch_form(&sym_pmatch, &::do_pmatch) + %dispatch_form(&sym_do, &::do_do) + %dispatch_form(&sym_case_lambda, &::do_case_lambda) # head = eval(car(expr), env) %ldl(a0, expr) @@ -1227,6 +1231,10 @@ %tail_to_handler(&eval_define_record_type) ::do_pmatch %tail_to_handler(&eval_pmatch) + ::do_do + %tail_to_handler(&eval_do) + ::do_case_lambda + %tail_to_handler(&eval_case_lambda) }) # eval_args(args=a0, env=a1) -> evaluated args list (cons-built). @@ -1297,6 +1305,8 @@ %beq(t0, t1, &::prim) %li(t1, %HDR.CLOSURE) %beq(t0, t1, &::closure) + %li(t1, %HDR.CASELAMBDA) + %beq(t0, t1, &::case_lambda) ::not_proc %die(msg_not_proc) @@ -1334,6 +1344,13 @@ %mov(a1, a0) %ldl(a0, body) %tail(&eval_body) + + ::case_lambda + # apply_case_lambda(args, obj). args already in `args` slot; the obj + # is still in a0. + %mov(a1, a0) + %ldl(a0, args) + %tail(&apply_case_lambda) }) # ========================================================================= @@ -1358,6 +1375,7 @@ %intern_form(&name_let, 3, &sym_let) %intern_form(&name_letstar, 4, &sym_letstar) %intern_form(&name_letrec, 6, &sym_letrec) + %intern_form(&name_letrecstar, 7, &sym_letrecstar) %intern_form(&name_and, 3, &sym_and) %intern_form(&name_or, 2, &sym_or) %intern_form(&name_when, 4, &sym_when) @@ -1366,14 +1384,18 @@ %intern_form(&name_setbang, 4, &sym_setbang) %intern_form(&name_define_record_type, 18, &sym_define_record_type) %intern_form(&name_pmatch, 6, &sym_pmatch) + %intern_form(&name_do, 2, &sym_do) + %intern_form(&name_case_lambda, 11, &sym_case_lambda) %intern_form(&name_unquote, 7, &sym_unquote) %intern_form(&name_guard, 5, &sym_guard) %intern_form(&name_underscore, 1, &sym_underscore) %intern_form(&name_dollar, 1, &sym_dollar) }) -# eval_if(rest=a0, env=a1) -> value (a0). `rest` is (test then else). -# No arity check here -- spec policy: malformed special forms are UB. +# eval_if(rest=a0, env=a1) -> value (a0). `rest` is (test then) or +# (test then else). Single-arm form returns UNSPEC when test is #f. +# No arity check beyond that -- spec policy: malformed special forms +# are UB. # # Locals: # rest @@ -1397,13 +1419,19 @@ %tail(&eval) ::else_branch - # else-branch: tail-eval(caddr(rest), env) - %ldl(a0, rest) - %cdr(a0, a0) - %cdr(a0, a0) - %car(a0, a0) + # If cddr(rest) is NIL, this is single-arm `if` -> UNSPEC. + %ldl(t0, rest) + %cdr(t0, t0) + %cdr(t0, t0) + %if_nil(t1, t0, &::no_else) + + # else-branch: tail-eval(car(cddr(rest)), env) + %car(a0, t0) %ldl(a1, env) %tail(&eval) + + ::no_else + %li(a0, %imm_val(%IMM.UNSPEC)) }) # eval_lambda(rest=a0, env=a1) -> closure (a0). @@ -2151,6 +2179,360 @@ %die(msg_pmatch_no_match) }) +# eval_do(rest=a0, env=a1) -> value (a0). +# rest = (((var init step?) ...) (test result?...) body...). +# +# Phase 1 (init): walk binding-specs in order, eval each `init` in the +# outer env, build new_env by consing (var . val) pairs onto it. A +# parallel list `pairs_head` records the binding pairs in spec order so +# the iteration can mutate them by set-cdr!. A second parallel list +# `vals_head` is preallocated (one cell per spec) to hold each iteration's +# computed step values without per-iteration cell allocation. +# +# Phase 2 (loop): eval test in new_env. Truthy -> tail-eval result body +# (UNSPEC if no result forms). Falsy -> eval each command form in +# new_env (discard), collect new step values into vals_head (parallel +# semantics: every step is evaluated against the iteration's pre-update +# bindings; specs without a step keep their current value), then walk +# pairs_head/vals_head together and set-cdr! each binding pair to its +# new value. Loop. +# +# Locals: +# rest original rest pointer +# env outer env +# new_env env extended with binding pairs (mutated each iter) +# walk generic cdr-cursor (binding-specs / commands / steps) +# pairs_head list of binding-pair refs in spec order +# pairs_tail append point during init +# vals_head parallel list of cells holding each iteration's step vals +# vals_tail append point during init +# body body command-forms (cddr of rest) +# pair_walk cdr-cursor over pairs_head during step/update +# val_walk cdr-cursor over vals_head during step/update +%fn2(eval_do, {rest env new_env walk pairs_head pairs_tail vals_head vals_tail body pair_walk val_walk}, { + %stl(a0, rest) + %stl(a1, env) + + %ldl(t0, rest) + %car(t0, t0) + %stl(t0, walk) + %ldl(t0, env) + %stl(t0, new_env) + %li(t0, %imm_val(%IMM.NIL)) + %stl(t0, pairs_head) + %stl(t0, pairs_tail) + %stl(t0, vals_head) + %stl(t0, vals_tail) + + ::init_loop + %ldl(t0, walk) + %if_nil(t1, t0, &::init_done) + + # spec = car(walk); init-expr = car(cdr(spec)). + %car(t1, t0) + %cdr(a0, t1) + %car(a0, a0) ; init expression + + # val = eval(init, env) + %ldl(a1, env) + %call(&eval) + + # binding pair = cons(var, val); var = car(car(walk)). + %ldl(t0, walk) + %car(t1, t0) + %car(t2, t1) ; var + %mov(a1, a0) + %mov(a0, t2) + %call(&cons) ; a0 = binding pair + + # new_env = cons(pair, new_env). cons clobbers t0/t1/t2 so we don't + # spill pair into a t-reg; recover it as car(new_env) afterwards. + %ldl(a1, new_env) + %call(&cons) + %stl(a0, new_env) + + # pcell = cons(pair, NIL). pair = car(new_env), and a0 still holds + # the new_env list pointer from the cons above. + %car(a0, a0) + %li(a1, %imm_val(%IMM.NIL)) + %call(&cons) + + %ldl(t0, pairs_head) + %if_nil(t1, t0, &::pairs_first) + %ldl(t0, pairs_tail) + %set_cdr(a0, t0) + %stl(a0, pairs_tail) + %b(&::vals_alloc) + + ::pairs_first + %stl(a0, pairs_head) + %stl(a0, pairs_tail) + + ::vals_alloc + # vcell = cons(NIL, NIL). Append onto vals list. + %li(a0, %imm_val(%IMM.NIL)) + %li(a1, %imm_val(%IMM.NIL)) + %call(&cons) + + %ldl(t0, vals_head) + %if_nil(t1, t0, &::vals_first) + %ldl(t0, vals_tail) + %set_cdr(a0, t0) + %stl(a0, vals_tail) + %b(&::init_advance) + + ::vals_first + %stl(a0, vals_head) + %stl(a0, vals_tail) + + ::init_advance + %advance_walk(24) + %b(&::init_loop) + + ::init_done + # body = cddr(rest). + %ldl(t0, rest) + %cdr(t0, t0) + %cdr(t0, t0) + %stl(t0, body) + + ::iter_loop + # test = car(car(cdr(rest))). Eval in new_env. + %ldl(t0, rest) + %cdr(t0, t0) + %car(t0, t0) ; (test result?...) + %car(a0, t0) ; test + %ldl(a1, new_env) + %call(&eval) + + %li(t0, %imm_val(%IMM.FALSE)) + %beq(a0, t0, &::commands) + + # Truthy: results = cdr(car(cdr(rest))). + %ldl(t0, rest) + %cdr(t0, t0) + %car(t0, t0) + %cdr(t0, t0) ; results + %if_nil(t1, t0, &::no_results) + %mov(a0, t0) + %ldl(a1, new_env) + %tail(&eval_body) + + ::no_results + %li(a0, %imm_val(%IMM.UNSPEC)) + %eret + + ::commands + %ldl(t0, body) + %stl(t0, walk) + + ::cmd_loop + %ldl(t0, walk) + %if_nil(t1, t0, &::step_phase) + %car(a0, t0) + %ldl(a1, new_env) + %call(&eval) + %advance_walk(24) + %b(&::cmd_loop) + + ::step_phase + # Compute new step values. walk = specs, pair_walk = pairs_head, + # val_walk = vals_head. For each spec: if spec has step (cddr non-NIL), + # val = eval(step, new_env); else val = cdr(binding_pair) (current). + # Store val into car(val_walk). + %ldl(t0, rest) + %car(t0, t0) + %stl(t0, walk) + %ldl(t0, pairs_head) + %stl(t0, pair_walk) + %ldl(t0, vals_head) + %stl(t0, val_walk) + + ::step_loop + %ldl(t0, walk) + %if_nil(t1, t0, &::update_phase) + + %car(t1, t0) ; spec + %cdr(t2, t1) + %cdr(t2, t2) ; (step?) or NIL + %if_nil(t1, t2, &::no_step) + + %car(a0, t2) ; step + %ldl(a1, new_env) + %call(&eval) + %b(&::store_val) + + ::no_step + %ldl(t0, pair_walk) + %car(t0, t0) ; binding pair + %cdr(a0, t0) ; current val + + ::store_val + %ldl(t0, val_walk) + %set_car(a0, t0) + + %advance_walk(24) + %advance_walk(72) + %advance_walk(80) + %b(&::step_loop) + + ::update_phase + # Walk pairs_head and vals_head; set-cdr!(pair, val) for each. + %ldl(t0, pairs_head) + %stl(t0, pair_walk) + %ldl(t0, vals_head) + %stl(t0, val_walk) + + ::update_loop + %ldl(t0, pair_walk) + %if_nil(t1, t0, &::iter_loop) + %car(t1, t0) ; binding pair + %ldl(t0, val_walk) + %car(t2, t0) ; new val + %set_cdr(t2, t1) + %advance_walk(72) + %advance_walk(80) + %b(&::update_loop) +}) + +# eval_case_lambda(rest=a0, env=a1) -> case-lambda object (a0). +# rest = ((formals body...) ...). Allocates a 32-byte HDR.CASELAMBDA +# heap object holding the clauses list and the captured env. apply() +# dispatches by walking clauses and matching the first whose formals +# admit the argument count. +# +# Locals: +# rest +# env +# obj +%fn2(eval_case_lambda, {rest env obj}, { + %stl(a0, rest) + %stl(a1, env) + + %li(a0, 32) + %li(a1, %HDR.CASELAMBDA) + %call(&alloc_hdr) + %stl(a0, obj) + + # obj.clauses = rest. + %ldl(t1, rest) + %ldl(t0, obj) + %heap_st(t1, t0, %CASELAMBDA.clauses) + + # obj.env = env. + %ldl(t1, env) + %heap_st(t1, t0, %CASELAMBDA.env) + + %ldl(a0, obj) +}) + +# apply_case_lambda(args=a0, obj=a1) -> result. +# Walks obj's clauses; the first whose formals shape admits the given +# argument count is selected. Match rules: +# formals = NIL -> arglen == 0 +# formals = SYM (rest) -> always matches +# formals = (a b ...) -> arglen == count(formals) +# formals = (a b ... . rest) -> arglen >= count(proper-prefix) +# On match: bind_params(formals, args, captured_env) -> new_env; +# tail-eval body in new_env. No-match dies with msg_case_lambda_no_match. +# +# Locals: +# args +# obj +# arglen +# clauses (advances) +# env_capt (captured env from obj) +# formals (current clause's formals; saved across bind_params) +# body (current clause's body; saved across bind_params) +%fn2(apply_case_lambda, {args obj arglen clauses env_capt formals body}, { + %stl(a0, args) + %stl(a1, obj) + + %ldl(a0, args) + %call(&list_length) + %stl(a0, arglen) + + %ldl(t0, obj) + %heap_ld(t1, t0, %CASELAMBDA.clauses) + %stl(t1, clauses) + %heap_ld(t1, t0, %CASELAMBDA.env) + %stl(t1, env_capt) + + ::loop + %ldl(t0, clauses) + %if_nil(t1, t0, &::no_match) + + %car(t1, t0) ; clause + %car(t2, t1) ; formals + %stl(t2, formals) + %cdr(t2, t1) ; body + %stl(t2, body) + + %ldl(t0, formals) + %tagof(t1, t0) + + # SYM formals: unconditional rest, always matches. + %li(t2, %TAG.SYM) + %beq(t1, t2, &::matched) + + # PAIR formals: count proper prefix, inspect terminator. + %li(t2, %TAG.PAIR) + %beq(t1, t2, &::pair_walk) + + # Otherwise (NIL or other immediate): match iff arglen == 0. + %ldl(t0, arglen) + %beqz(t0, &::matched) + %b(&::next) + + ::pair_walk + %li(t2, 0) ; required count + %ldl(t0, formals) + ::pcount + %tagof(t1, t0) + %li(a3, %TAG.PAIR) + %bne(t1, a3, &::pcount_done) + %addi(t2, t2, 1) + %cdr(t0, t0) + %b(&::pcount) + + ::pcount_done + # cursor (t0) is NIL (proper) or SYM (improper). + %tagof(t1, t0) + %li(a3, %TAG.SYM) + %beq(t1, a3, &::improper) + + # Proper formals: arglen must equal required. + %ldl(a3, arglen) + %beq(a3, t2, &::matched) + %b(&::next) + + ::improper + # Improper formals: arglen must be >= required. + # `arglen >= required` is `!(arglen < required)`. + %ldl(a3, arglen) + %blt(a3, t2, &::next) + %b(&::matched) + + ::next + %ldl(t0, clauses) + %cdr(t0, t0) + %stl(t0, clauses) + %b(&::loop) + + ::matched + %ldl(a0, formals) + %ldl(a1, args) + %ldl(a2, env_capt) + %call(&bind_params) + + %mov(a1, a0) + %ldl(a0, body) + %tail(&eval_body) + + ::no_match + %die(msg_case_lambda_no_match) +}) + # pmatch_match(pat=a0, subj=a1, env=a2) -> (env=a0, ok=a1) # # Walks pat and subj structurally. On success returns the (possibly @@ -3188,6 +3570,8 @@ %beq(t1, t2, &::yes) %li(t2, %HDR.PRIM) %beq(t1, t2, &::yes) + %li(t2, %HDR.CASELAMBDA) + %beq(t1, t2, &::yes) %b(&::end) ::yes %li(a0, %imm_val(%IMM.TRUE)) @@ -4515,6 +4899,8 @@ %beq(t0, t1, &::heap_bv) %li(t1, %HDR.CLOSURE) %beq(t0, t1, &::heap_closure) + %li(t1, %HDR.CASELAMBDA) + %beq(t0, t1, &::heap_closure) %li(t1, %HDR.PRIM) %beq(t0, t1, &::heap_prim) %li(t1, %HDR.TD) @@ -5284,6 +5670,7 @@ :name_let "let" 00 00 00 00 :name_letstar "let*" 00 00 00 :name_letrec "letrec" 00 +:name_letrecstar "letrec*" :name_and "and" 00 00 00 00 :name_or "or" 00 00 00 00 00 :name_when "when" 00 00 00 @@ -5292,6 +5679,8 @@ :name_setbang "set!" 00 00 00 :name_define_record_type "define-record-type" 00 00 00 00 00 :name_pmatch "pmatch" 00 +:name_do "do" 00 00 00 00 00 +:name_case_lambda "case-lambda" 00 00 00 00 :name_unquote "unquote" :name_guard "guard" 00 00 :name_underscore "_" 00 00 00 00 00 00 @@ -5476,6 +5865,7 @@ :msg_internal_define "scheme1: internal define is not supported (use letrec)" '0a' 00 :msg_pmatch_no_match "scheme1: pmatch: no clause matched" '0a' 00 :msg_bad_unquote_pattern "scheme1: pmatch: malformed ,-pattern" '0a' 00 +:msg_case_lambda_no_match "scheme1: case-lambda: no clause matched arity" '0a' 00 :name_ch_tab "tab" :name_ch_null "null" @@ -5549,6 +5939,7 @@ :sym_let $(0) :sym_letstar $(0) :sym_letrec $(0) +:sym_letrecstar $(0) :sym_and $(0) :sym_or $(0) :sym_when $(0) @@ -5557,6 +5948,8 @@ :sym_setbang $(0) :sym_define_record_type $(0) :sym_pmatch $(0) +:sym_do $(0) +:sym_case_lambda $(0) :sym_unquote $(0) :sym_guard $(0) :sym_underscore $(0) diff --git a/tests/scheme1/020-letrec.expected-exit b/tests/scheme1/020-letrec.expected-exit @@ -1 +1 @@ -120 +4 diff --git a/tests/scheme1/020-letrec.scm b/tests/scheme1/020-letrec.scm @@ -1,7 +1,25 @@ -; letrec: a local helper that calls itself. The closure must see its own -; binding via the captured env. Real recursion -- factorial walks the -; stack down to the base case and back, so a one-step bug would not -; produce 120. -(sys-exit (letrec ((fact (lambda (n) - (if (= n 0) 1 (* n (fact (- n 1))))))) - (fact 5))) ; 5! = 120 +; letrec / letrec*: local recursive bindings. letrec* is an alias of +; letrec in scheme1 (eval_letrec already evaluates inits left-to-right); +; the R7RS guarantee letrec* adds — later inits may reference earlier +; ones — is exercised below. + +;; --- letrec: classic recursive factorial --------------------------- +(if (= 120 (letrec ((fact (lambda (n) + (if (= n 0) 1 (* n (fact (- n 1))))))) + (fact 5))) + 0 (sys-exit 1)) + +;; --- letrec* accepts the same shape and value --------------------- +(if (= 120 (letrec* ((fact (lambda (n) + (if (= n 0) 1 (* n (fact (- n 1))))))) + (fact 5))) + 0 (sys-exit 2)) + +;; --- letrec*: later inits reference earlier ones ------------------- +;; y depends on x; z depends on both. + is 2-arg in scheme1, so each +;; init uses pairwise sums. +(if (= 6 (letrec* ((x 1) (y (+ x 2)) (z (+ y x))) + (+ z 2))) + 0 (sys-exit 3)) + +(sys-exit 4) diff --git a/tests/scheme1/106-case-lambda.scm b/tests/scheme1/106-case-lambda.scm @@ -37,7 +37,7 @@ (define base 10) (define add (case-lambda ((x) (+ base x)) - ((x y) (+ base x y)))) + ((x y) (+ (+ base x) y)))) (if (= 11 (add 1)) 0 (sys-exit 10)) (if (= 13 (add 1 2)) 0 (sys-exit 11)) diff --git a/tests/scheme1/20-letrec.expected-exit b/tests/scheme1/20-letrec.expected-exit @@ -1 +0,0 @@ -4 diff --git a/tests/scheme1/20-letrec.scm b/tests/scheme1/20-letrec.scm @@ -1,25 +0,0 @@ -; letrec / letrec*: local recursive bindings. letrec* is an alias of -; letrec in scheme1 (eval_letrec already evaluates inits left-to-right); -; the R7RS guarantee letrec* adds — later inits may reference earlier -; ones — is exercised below. - -;; --- letrec: classic recursive factorial --------------------------- -(if (= 120 (letrec ((fact (lambda (n) - (if (= n 0) 1 (* n (fact (- n 1))))))) - (fact 5))) - 0 (sys-exit 1)) - -;; --- letrec* accepts the same shape and value --------------------- -(if (= 120 (letrec* ((fact (lambda (n) - (if (= n 0) 1 (* n (fact (- n 1))))))) - (fact 5))) - 0 (sys-exit 2)) - -;; --- letrec*: later inits reference earlier ones ------------------- -;; y depends on x; z depends on both. + is 2-arg in scheme1, so each -;; init uses pairwise sums. -(if (= 6 (letrec* ((x 1) (y (+ x 2)) (z (+ y x))) - (+ z 2))) - 0 (sys-exit 3)) - -(sys-exit 4)