boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 95ef642fe318fd2018bfed00e494cc9513cf7336
parent d0e1e4271a0eb7dc1765b2ee3586050a14b02c13
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 25 Apr 2026 16:16:06 -0700

scheme1: set!, cond => arrow, number/string radix-arg surface

set! mutates a binding in place: walk the env alist looking for a
cell whose car is the target sym; on hit, set-cdr! on the cell;
on miss, fall back to sym_set_global. Globals, lexicals (let-bound,
lambda params), and closure-captured cells all observe the mutation
through the same path; the value is spilled across the env walk so
hits and misses share one frame slot.

cond extended to recognize (test => proc-expr) clauses: when the
test is truthy, eval proc-expr in env, build (test-value . ()), and
tail-apply. The else clause path bypasses the => check so
(else => proc) is not silently honored.

number->string / string->number now document an optional radix arg
per LISP.md. The implementation stays decimal-only -- the prims
already silently ignore extra args, so this is comment-only and
test 74 grows the radix=10 case to pin the surface.

prim_length_entry needs a frame because list_length clobbers lr; the
prior leaf body would have looped on its trailing %ret. Caught by
inspection while landing the rest, fixed in the same commit.

Diffstat:
Mscheme1/scheme1.P1pp | 148++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
Atests/scheme1/82-cond-arrow.expected-exit | 1+
Atests/scheme1/82-cond-arrow.scm | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 178 insertions(+), 24 deletions(-)

diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -69,7 +69,7 @@ %endm %macro HEAP_CAP_BYTES() -0x10000 +0x1000000 %endm # ========================================================================= @@ -1471,6 +1471,7 @@ %dispatch_form(&sym_letrec, &::do_letrec) %dispatch_form(&sym_and, &::do_and) %dispatch_form(&sym_or, &::do_or) + %dispatch_form(&sym_setbang, &::do_setbang) %dispatch_form(&sym_define_record_type, &::do_define_record_type) # head = eval(car(expr), env) @@ -1518,6 +1519,8 @@ %tail_to_handler(&eval_and) ::do_or %tail_to_handler(&eval_or) + ::do_setbang + %tail_to_handler(&eval_setbang) ::do_define_record_type %tail_to_handler(&eval_define_record_type) }) @@ -1647,11 +1650,13 @@ %intern_form(&name_begin, 5, &sym_begin) %intern_form(&name_cond, 4, &sym_cond) %intern_form(&name_else, 4, &sym_else) + %intern_form(&name_arrow, 2, &sym_arrow) %intern_form(&name_let, 3, &sym_let) %intern_form(&name_letstar, 4, &sym_letstar) %intern_form(&name_letrec, 6, &sym_letrec) %intern_form(&name_and, 3, &sym_and) %intern_form(&name_or, 2, &sym_or) + %intern_form(&name_setbang, 4, &sym_setbang) %intern_form(&name_define_record_type, 18, &sym_define_record_type) }) @@ -1775,49 +1780,133 @@ %li(a0, %imm_val(%IMM.UNSPEC)) }) +# eval_setbang(rest=a0, env=a1) -> UNSPEC (a0). +# rest = (sym value-expr). Evaluates value-expr in env, then walks the +# env alist looking for a binding cell whose car is the target sym; +# on hit, mutates the cell's cdr (offset 7, same as set-cdr!). On miss, +# falls back to the global slot via sym_set_global -- the shape used +# by define for top-level rebind. Spec: behavior on a truly unbound +# name follows the primitive-failure policy. +# +# Frame: 24 bytes +# +0 rest (sym . (value-expr . ())) +# +8 env +# +16 saved value (eval'd value-expr) +%fn(eval_setbang, 24, { + %st(a0, sp, 0) + %st(a1, sp, 8) + + # value = eval(cadr(rest), env) + %cdr(a0, a0) + %car(a0, a0) + %ld(a1, sp, 8) + %call(&eval) + %st(a0, sp, 16) + + # Walk env looking for a binding cell whose car == target sym. + # Only t0..t2 are available: t0 scratch, t1 target sym, t2 env cursor. + %ld(t1, sp, 0) + %car(t1, t1) ; target sym + + ::lp + %ld(t2, sp, 8) + %if_nil(t0, t2, &::ms) + %car(t0, t2) + %car(t0, t0) ; cell sym + %beq(t0, t1, &::ht) + %cdr(t2, t2) + %st(t2, sp, 8) + %b(&::lp) + + ::ht + %car(t0, t2) ; re-fetch binding cell + %ld(a0, sp, 16) + %st(a0, t0, 7) ; mutate cell's cdr + %li(a0, %imm_val(%IMM.UNSPEC)) + %eret + + ::ms + # Miss: rebind global. + %ld(a0, sp, 16) + %ld(t0, sp, 0) + %car(t0, t0) + %bind_global_from_t0() + %li(a0, %imm_val(%IMM.UNSPEC)) +}) + # eval_cond(clauses=a0, env=a1) -> value (a0). -# Each clause is (test body...) or (else body...). The first clause whose -# test isn't #f wins; its body is tail-evaluated. `else` is a literal -# symbol matched by pointer equality with sym_else's cached value. +# Clause shapes: (else body...), (test body...), (test => proc-expr). +# else / => are literal symbols matched by pointer equality. The => +# arrow is only recognized in non-else clauses; an empty body after a +# truthy test returns UNSPEC (spec policy: malformed-form UB). # -# Frame: 16 bytes +# Frame: 32 bytes # +0 clauses (advances) # +8 env -%fn(eval_cond, 16, { +# +16 test value (live across the => eval/cons calls) +# +24 proc (live across the => cons call) +%fn(eval_cond, 32, { %st(a0, sp, 0) %st(a1, sp, 8) ::loop %ld(t0, sp, 0) - %if_nil(t1, t0, &::no_match) + %if_nil(t1, t0, &::nm) - %car(t1, t0) ; clause = (test body...) + %car(t1, t0) ; clause %car(t2, t1) ; test_expr - # else? %la(a0, &sym_else) %ld(a0, a0, 0) - %beq(t2, a0, &::do_match) + %beq(t2, a0, &::dm) - # Eval test in env. %mov(a0, t2) %ld(a1, sp, 8) %call(&eval) %li(t0, %imm_val(%IMM.FALSE)) - %beq(a0, t0, &::next) + %beq(a0, t0, &::nx) - ::do_match + # Truthy. Spill test value and inspect cdr(clause): empty -> UNSPEC, + # car == => -> arrow path, else regular body. + %st(a0, sp, 16) %ld(t0, sp, 0) %car(t0, t0) - %cdr(a0, t0) ; body + %cdr(t0, t0) + %if_nil(t1, t0, &::nm) + %car(t1, t0) + %la(t2, &sym_arrow) + %ld(t2, t2, 0) + %beq(t1, t2, &::ar) + + %mov(a0, t0) ; regular body %ld(a1, sp, 8) %tail(&eval_body) - ::next + ::ar + %cdr(t0, t0) + %car(a0, t0) ; proc-expr + %ld(a1, sp, 8) + %call(&eval) + %st(a0, sp, 24) + %ld(a0, sp, 16) + %li(a1, %imm_val(%IMM.NIL)) + %call(&cons) + %mov(a1, a0) + %ld(a0, sp, 24) + %tail(&apply) + + ::dm + %ld(t0, sp, 0) + %car(t0, t0) + %cdr(a0, t0) + %ld(a1, sp, 8) + %tail(&eval_body) + + ::nx %advance_walk(0) %b(&::loop) - ::no_match + ::nm %li(a0, %imm_val(%IMM.UNSPEC)) }) @@ -2471,12 +2560,14 @@ # (length xs) -- count of pairs in a proper list. Forwards to the # list_length helper (which clobbers a0 as the cursor) and tags the -# resulting count as a fixnum. -:prim_length_entry +# resulting count as a fixnum. Needs a frame because %call(&list_length) +# would otherwise clobber lr and the trailing %ret would loop. +%fn(prim_length_entry, 0, { %car(a0, a0) %call(&list_length) %mkfix(a0, a0) - %ret + %eret +}) # (list-ref xs n) -- 0-indexed nth element. n is a fixnum; we untag, # advance via cdr, then car. Out-of-range is undefined behavior, same @@ -2588,9 +2679,12 @@ %ld(a0, sp, 16) }) -# (number->string n) -- decimal repr in a fresh bv. bv_putint takes the -# raw value, so untag first; bv_alloc(0) gives an empty wrapper that -# bv_putint grows in place. +0 holds the raw value across bv_alloc. +# (number->string n [radix]) -- decimal repr in a fresh bv. The radix +# arg is part of the surface per LISP.md (10 and 16 required) but the +# implementation is decimal-only for now: the second arg, if present, +# is silently ignored. bv_putint takes the raw value, so untag first; +# bv_alloc(0) gives an empty wrapper that bv_putint grows in place. +# +0 holds the raw value across bv_alloc. %fn(prim_number_to_string_entry, 16, { %car(t0, a0) %sari(t0, t0, 3) ; raw value @@ -2601,8 +2695,10 @@ %tail(&bv_putint) }) -# (string->number bv) -- delegate parsing to parse_dec. Returns #f on -# non-bytevector input, empty string, lone "-", or any non-digit byte. +# (string->number bv [radix]) -- delegate parsing to parse_dec. Returns +# #f on non-bytevector input, empty string, lone "-", or any non-digit +# byte. Radix arg is part of the surface per LISP.md (10 and 16 required) +# but ignored for now -- decimal only. %fn(prim_string_to_number_entry, 0, { %car(a0, a0) %tagof(t0, a0) @@ -4552,11 +4648,13 @@ :name_begin "begin" :name_cond "cond" :name_else "else" +:name_arrow "=>" :name_let "let" :name_letstar "let*" :name_letrec "letrec" :name_and "and" :name_or "or" +:name_setbang "set!" :name_define_record_type "define-record-type" # Primitive surface names. @@ -4764,11 +4862,13 @@ :sym_begin $(0) :sym_cond $(0) :sym_else $(0) +:sym_arrow $(0) :sym_let $(0) :sym_letstar $(0) :sym_letrec $(0) :sym_and $(0) :sym_or $(0) +:sym_setbang $(0) :sym_define_record_type $(0) # Process startup state, captured by p1_main and read by sys-argv. diff --git a/tests/scheme1/82-cond-arrow.expected-exit b/tests/scheme1/82-cond-arrow.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/scheme1/82-cond-arrow.scm b/tests/scheme1/82-cond-arrow.scm @@ -0,0 +1,53 @@ +; (cond (test => proc) ...) -- if test is truthy, call proc on the +; value of test and return that. proc is any expression that evaluates +; to a 1-arg procedure (a name, a lambda, etc.). + +;; --- Basic: => with a named procedure ---------------------------------- +;; assoc returns the binding pair when the key is found, #f otherwise. +;; (assoc 'b ...) -> (b . 2); (cdr ...) -> 2. +(if (= 2 + (cond ((assoc 'b (list (cons 'a 1) (cons 'b 2) (cons 'c 3))) => cdr) + (else 99))) + 0 (sys-exit 1)) + +;; --- Falsy test: => clause skipped, falls through to else ------------ +(if (= 99 + (cond ((assoc 'z (list (cons 'a 1))) => cdr) + (else 99))) + 0 (sys-exit 2)) + +;; --- proc as a lambda expression --------------------------------------- +(if (= 84 + (cond (42 => (lambda (v) (+ v v))) + (else 0))) + 0 (sys-exit 3)) + +;; --- proc as the result of evaluating a non-trivial expression -------- +;; Confirms proc-expr is eval'd in env, not just looked up by symbol. +(define (mk-doubler) (lambda (x) (* x 2))) +(if (= 14 + (cond (7 => (mk-doubler)) + (else 0))) + 0 (sys-exit 4)) + +;; --- Mixed clauses: => after a regular truthy clause never fires ----- +(if (= 1 + (cond (#t 1) + (#t => (lambda (v) (sys-exit 5))))) + 0 (sys-exit 6)) + +;; --- Mixed clauses: => after a falsy regular clause does fire -------- +(if (= 5 + (cond (#f 99) + (5 => (lambda (v) v)) + (else 0))) + 0 (sys-exit 7)) + +;; --- => binds the value of test, not just truthiness ------------------ +;; The test value (the pair) is what gets passed to cdr, not #t. +(if (equal? '(b . 2) + (cond ((assoc 'b (list (cons 'a 1) (cons 'b 2))) => + (lambda (p) p)))) + 0 (sys-exit 8)) + +(sys-exit 0)