boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit b186d29846b5a79aecd11a685b1517b3a2360b10
parent 1345a2a22f182156e9c40e5eb4c3cb63cd61ab35
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 25 Apr 2026 06:32:49 -0700

Add scheme1 `if` special form and #t/#f reader literals

Wire eval's pair branch through a cached sym_if slot before the apply
fall-through; intern_special_forms runs at startup to reserve the
special-form symbol indices. parse_one's first-byte cascade gains a `#`
arm that maps `#t` and `#f` to the TRUE/FALSE immediates, leaving room
for `#x.../#\\...` later. Tests cover both branches of `if`.

Diffstat:
Mscheme1/scheme1.P1pp | 106++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Atests/scheme1/01-if.expected-exit | 1+
Atests/scheme1/01-if.scm | 1+
Atests/scheme1/02-if-false.expected-exit | 1+
Atests/scheme1/02-if-false.scm | 1+
5 files changed, 109 insertions(+), 1 deletion(-)

diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -175,7 +175,8 @@ %la(t1, &heap_next) %st(t0, t1, 0) - # Bind built-in primitives. + # Reserve special-form symbol indices, then bind built-in primitives. + %call(&intern_special_forms) %call(&register_primitives) # argv[1] is the source path (NUL-terminated cstr from the kernel). @@ -394,6 +395,8 @@ %beqz(a1, &::lparen) %addi(a1, a0, -41) %beqz(a1, &::rparen) + %addi(a1, a0, -35) + %beqz(a1, &::hash) %call(&parse_atom) %eret @@ -410,6 +413,32 @@ ::rparen %die(msg_unexp_rparen, 6) + ::hash + # Consume '#' plus its type byte; dispatch on the type byte. + # Other #-prefixed forms (#xHEX, #\char) come later. + %la(t2, &readbuf_pos) + %ld(t0, t2, 0) + %addi(t0, t0, 1) + %la(t1, &readbuf_len) + %ld(t1, t1, 0) + %beq(t0, t1, &::eof) + %readbuf_byte(a0, t0) + %addi(t0, t0, 1) + %st(t0, t2, 0) + %addi(a1, a0, -116) ; 't' + %beqz(a1, &::true_lit) + %addi(a1, a0, -102) ; 'f' + %beqz(a1, &::false_lit) + %die(msg_bad_hash, 6) + + ::true_lit + %li(a0, %imm_val(%IMM.TRUE)) + %eret + + ::false_lit + %li(a0, %imm_val(%IMM.FALSE)) + %eret + ::eof %die(msg_unexp_eof, 6) }) @@ -617,6 +646,15 @@ %die(msg_unbound, 7) ::pair + # Special-form dispatch: pointer-compare head against the cached + # special-form symbol values. SYM is a distinct tag, so a head that + # isn't a symbol cannot collide with any sym_* slot. + %ld(t0, sp, 0) + %car(t0, t0) ; t0 = head + %la(t1, &sym_if) + %ld(t1, t1, 0) + %beq(t0, t1, &::do_if) + # head = eval(car(expr), env) %ld(a0, sp, 0) %car(a0, a0) @@ -634,6 +672,13 @@ %mov(a1, a0) %ld(a0, sp, 16) %call(&apply) + %eret + + ::do_if + %ld(a0, sp, 0) + %cdr(a0, a0) + %ld(a1, sp, 8) + %call(&eval_if) }) # eval_args(args=a0, env=a1) -> evaluated args list (cons-built). @@ -694,6 +739,59 @@ }) # ========================================================================= +# Special forms +# ========================================================================= +# +# intern_special_forms runs at startup, before register_primitives, so +# the symbols `if`, ... occupy the first sym_idx slots (per LISP-C.md +# §Reservation convention). For now we just cache each one's tagged +# value in a labeled slot; eval's pair branch compares head against +# these slots before falling through to ordinary application. + +%fn(intern_special_forms, 0, { + %la(a0, &name_if) + %li(a1, 2) + %call(&intern) + %la(t0, &sym_if) + %st(a0, t0, 0) +}) + +# eval_if(rest=a0, env=a1) -> value (a0). `rest` is (test then else). +# No arity check here -- spec policy: malformed special forms are UB. +# +# Frame: 16 bytes +# +0 rest +# +8 env +%fn(eval_if, 16, { + %st(a0, sp, 0) + %st(a1, sp, 8) + + # val = eval(car(rest), env) + %car(a0, a0) + %call(&eval) + + %li(t0, %imm_val(%IMM.FALSE)) + %beq(a0, t0, &::else_branch) + + # then-branch: eval(cadr(rest), env) + %ld(a0, sp, 0) + %cdr(a0, a0) + %car(a0, a0) + %ld(a1, sp, 8) + %call(&eval) + %eret + + ::else_branch + # else-branch: eval(caddr(rest), env) + %ld(a0, sp, 0) + %cdr(a0, a0) + %cdr(a0, a0) + %car(a0, a0) + %ld(a1, sp, 8) + %call(&eval) +}) + +# ========================================================================= # Primitives # ========================================================================= # @@ -746,12 +844,14 @@ # Surface names. Length is hard-coded at the call site; no NUL needed # because intern takes (ptr, len). Aligned padding via "\0" bytes is # fine -- M0 emits ASCII verbatim. +:name_if "if" :name_sys_exit "sys-exit" :msg_usage "scheme1: usage: scheme1 SOURCE.scm" '0a' '00' :msg_load_fail "scheme1: failed to read source" '0a' '00' :msg_symtab_full "scheme1: symbol table full" '0a' '00' :msg_unexp_rparen "scheme1: unexpected ')'" '0a' '00' +:msg_bad_hash "scheme1: bad #-syntax" '0a' '00' :msg_unexp_eof "scheme1: unexpected EOF in form" '0a' '00' :msg_unterm_list "scheme1: unterminated list" '0a' '00' :msg_unbound "scheme1: unbound variable" '0a' '00' @@ -783,6 +883,10 @@ # Symbol table count (number of entries used). :symtab_count $(0) +# Cached tagged-symbol values for special forms (filled by +# intern_special_forms at startup). +:sym_if $(0) + # Pointer slots for the past-:ELF_end arenas. :readbuf_buf_ptr $(0) :heap_buf_ptr $(0) diff --git a/tests/scheme1/01-if.expected-exit b/tests/scheme1/01-if.expected-exit @@ -0,0 +1 @@ +42 diff --git a/tests/scheme1/01-if.scm b/tests/scheme1/01-if.scm @@ -0,0 +1 @@ +(if 1 (sys-exit 42) (sys-exit 99)) diff --git a/tests/scheme1/02-if-false.expected-exit b/tests/scheme1/02-if-false.expected-exit @@ -0,0 +1 @@ +42 diff --git a/tests/scheme1/02-if-false.scm b/tests/scheme1/02-if-false.scm @@ -0,0 +1 @@ +(if #f (sys-exit 99) (sys-exit 42))