scheme1: and/or special forms; drop embedded prelude - boot2

commit f60a666570b19da819acf341d4a6c70ebd094df1
parent efbfccaedff611ddde6f3d4c6d3cf2f68e9f09b6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 25 Apr 2026 13:28:01 -0700

scheme1: and/or special forms; drop embedded prelude

Diffstat:
M docs/scheme-shell-todo.md  | 31 ++++++++++---------------------
M scheme1/scheme1.P1pp  | 227 ++++++++++++++++++++++++++++++++++++-------------------------------------------
A tests/scheme1/46-and.expected-exit  | 1 +
A tests/scheme1/46-and.scm  | 8 ++++++++
A tests/scheme1/47-or.expected-exit  | 1 +
A tests/scheme1/47-or.scm  | 7 +++++++

6 files changed, 130 insertions(+), 145 deletions(-)
diff --git a/docs/scheme-shell-todo.md b/docs/scheme-shell-todo.md
@@ -55,7 +55,7 @@ Per LISP.md and LISP-C.md, but not implemented:
   - `"…"` string / bytevector literals with `\n \t \r \\ \"` escapes
   - `#\char` literals (printable ASCII, named forms, `#\xNN`)
   - Source-location side table (line:col → pair) used by `error`
-- [ ] **Special forms missing**: `set!`, `and`, `or`, `pmatch`, `cond`'s
+- [ ] **Special forms missing**: `set!`, `pmatch`, `cond`'s
   `=>` arrow form. `pmatch` is called out by LISP-C.md as a built-in
   special form needed by the self-hosted compiler.
 - [ ] **Primitives missing** (LISP.md lists them as required):
@@ -83,13 +83,14 @@ Per LISP.md and LISP-C.md, but not implemented:
   `read-all` / `bv-concat-reverse` / `write-bytes` / `write-line` are
   NOT in the prelude.** Only the process-management half of shell.scm
   is ported.
-- [ ] **Prelude is a strict subset of `lisp/prelude.scm`.** Missing:
-  `<= >= zero? positive? negative? abs caar cadr cdar cddr caddr
-  list? assoc member filter fold equal?` and the vector helpers
-  (`vector->list`, `list->vector`, `equal?-vector`, etc.). The
-  vector helpers reference `vector-ref` / `vector-length` /
-  `make-vector` which we don't have, so they cannot be embedded
-  unchanged.
+- [ ] **`scheme1/prelude.scm` is a strict subset of `lisp/prelude.scm`.**
+  Active set: `<=`, `>=`, `negative?`, `abs`, `caar/cadr/cdar/cddr/caddr`,
+  `list?`, `assoc`, `member`, `filter`, `fold`, plus the inherited
+  list/shell helpers. Commented-out placeholders for `positive?`
+  (needs `>`), `vector->list` / `list->vector` (need `make-vector` /
+  `vector-ref` / `vector-set!` / `vector-length`), and `equal?` (needs
+  `string?` / `vector?` plus their ref/length) wait on the
+  corresponding primitives.
 - [ ] **`error` primitive** — exits with no formatted message; no
   "at file:line:col:" prefix.
 
@@ -97,18 +98,6 @@ Per LISP.md and LISP-C.md, but not implemented:
 
 These work today but are easy to break.
 
-- [ ] **`skip_ws` treats NUL (`\0`) as whitespace.** The embedded
-  prelude is a chain of `"…"` literals separated by `'0a'` (LF), and
-  M0 auto-NUL-terminates each `"…"`. Without the NUL skip the parser
-  treats those NULs as the start of a new symbol. Side effects:
-  - User source files that contain literal NULs are silently skipped,
-    which is non-standard.
-  - `parse_atom` does **not** skip NULs — an atom that ends exactly at
-    a M0 string-literal boundary in the prelude would absorb the
-    trailing NUL into the token. The current prelude doesn't trigger
-    this because every line ends on a paren or whitespace, but
-    re-flowing the prelude can break it silently.
-
 - [ ] **Bytevector NUL-termination via headroom.** `bv_capacity_for`
   returns the smallest power of two strictly greater than `n`. The
   byte at index `length` is the zero-init NUL terminator and we hand
@@ -262,7 +251,7 @@ In rough priority order:
 4. Fill in the spec-required primitives (`equal?`, `eqv?`, `set-car!`,
    `set-cdr!`, the comparison family, the bytevector family, the
    number/string converters).
-5. `set!`, `and`, `or`, `pmatch`.
+5. `set!`, `pmatch`.
 6. Reader: `"…"` strings, `#\char`, source locations.
 7. Port shell.scm's port record + I/O wrappers.
 8. Replace the 1024-slot linear-scan symtab with an open-addressing
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -23,6 +23,10 @@
 #   catm P1-<arch>.M1pp P1.M1pp P1pp.P1pp scheme1/scheme1.P1pp \
 #     | m1pp -> M0 -> hex2 -> ELF
 #
+# At run time, scripts/boot-run-scheme1.sh catm's scheme1/prelude.scm
+# in front of the user .scm and passes the combined file to the binary,
+# so the interpreter itself has no embedded prelude.
+#
 # Memory model: the ELF's ph_memsz is 8 MB (boot2 default), so all
 # zero-initialized arenas live past :ELF_end and cost zero file bytes.
 # p1_main writes their absolute addresses into pointer slots once at
@@ -294,12 +298,11 @@
     %st(t0, t1, 0)
 
     # Reserve special-form symbol indices, then bind built-in primitives.
+    # The Scheme prelude is catm'd into argv[1] by scripts/run-scheme1.sh
+    # before scheme1 starts, so there's no separate prelude eval pass.
     %call(&intern_special_forms)
     %call(&register_primitives)
 
-    # Evaluate the embedded prelude before the user source.
-    %call(&eval_prelude)
-
     # argv[1] is the source path (NUL-terminated cstr from the kernel).
     %ld(a1, sp, 0)
     %ld(a0, a1, 8)
@@ -372,52 +375,6 @@
     %die(msg_load_fail)
 })
 
-# eval_prelude -- copy the embedded prelude bytes into readbuf, run the
-# read-eval loop until the prelude is exhausted, then return so
-# load_source can later refill readbuf with the user file.
-%fn(eval_prelude, 0, {
-    %la(a1, &prelude_src)
-    %la(a2, &prelude_src_end)
-    %sub(a2, a2, a1)
-
-    # Defensive: prelude is fixed at compile time, but the readbuf cap
-    # isn't. If somebody trims the readbuf below the prelude size, we
-    # need to surface it instead of overrunning into the heap arena.
-    %li(t0, %READBUF_CAP_BYTES)
-    %bltu(t0, a2, &::too_big)
-
-    %la(t0, &readbuf_buf_ptr)
-    %ld(a0, t0, 0)
-    %call(&memcpy)
-
-    %la(t0, &prelude_src)
-    %la(t1, &prelude_src_end)
-    %sub(t0, t1, t0)
-    %la(t1, &readbuf_len)
-    %st(t0, t1, 0)
-    %li(t0, 0)
-    %la(t1, &readbuf_pos)
-    %st(t0, t1, 0)
-
-    ::loop
-    %call(&skip_ws)
-    %la(t0, &readbuf_pos)
-    %ld(t0, t0, 0)
-    %la(t1, &readbuf_len)
-    %ld(t1, t1, 0)
-    %beq(t0, t1, &::done)
-    %call(&parse_one)
-    %li(a1, %imm_val(%IMM.NIL))
-    %call(&eval)
-    %b(&::loop)
-
-    ::done
-    %eret
-
-    ::too_big
-    %die(msg_readbuf_full)
-})
-
 # =========================================================================
 # Heap: cons (leaf) and alloc_hdr (leaf)
 # =========================================================================
@@ -595,9 +552,7 @@
 # The reader is called recursively from parse_list, so every state goes
 # through frame slots, not s-registers.
 
-# Skip whitespace (ASCII 32, 9, 10, 13), NUL bytes (M0 auto-terminates
-# string literals, so the embedded prelude has NULs we want to ignore),
-# and `;`-to-LF comments. Leaf.
+# Skip whitespace (ASCII 32, 9, 10, 13) and `;`-to-LF comments. Leaf.
 :skip_ws
 %scope skip_ws
     %la(t2, &readbuf_pos)
@@ -608,7 +563,6 @@
     %beq(t0, t1, &::done)
     %readbuf_byte(a0, t0)
     %is_ws_branch(a1, a0, &::step)
-    %beqz(a0, &::step)            ; NUL
     %addi(a1, a0, -59)            ; ';'
     %beqz(a1, &::comment)
     %b(&::done)
@@ -1020,6 +974,8 @@
     %dispatch_form(&sym_let,     &::do_let)
     %dispatch_form(&sym_letstar, &::do_letstar)
     %dispatch_form(&sym_letrec,  &::do_letrec)
+    %dispatch_form(&sym_and,     &::do_and)
+    %dispatch_form(&sym_or,      &::do_or)
     %dispatch_form(&sym_define_record_type, &::do_define_record_type)
 
     # head = eval(car(expr), env)
@@ -1063,6 +1019,10 @@
     %tail_to_handler(&eval_letstar)
     ::do_letrec
     %tail_to_handler(&eval_letrec)
+    ::do_and
+    %tail_to_handler(&eval_and)
+    ::do_or
+    %tail_to_handler(&eval_or)
     ::do_define_record_type
     %tail_to_handler(&eval_define_record_type)
 })
@@ -1188,6 +1148,8 @@
     %intern_form(&name_let,     3, &sym_let)
     %intern_form(&name_letstar, 4, &sym_letstar)
     %intern_form(&name_letrec,  6, &sym_letrec)
+    %intern_form(&name_and,     3, &sym_and)
+    %intern_form(&name_or,      2, &sym_or)
     %intern_form(&name_define_record_type, 18, &sym_define_record_type)
 })
 
@@ -1554,6 +1516,90 @@
     %tail(&eval_body)
 })
 
+# eval_and(rest=a0, env=a1) -> value (a0).
+# (and) is #t. Otherwise eval forms left-to-right, short-circuiting to #f
+# the moment one yields #f. The last form is tail-evaluated so a tail call
+# inside `and` doesn't grow the host stack.
+#
+# Frame: 16 bytes
+#   +0  rest
+#   +8  env
+%fn(eval_and, 16, {
+    %li(t0, %imm_val(%IMM.TRUE))
+    %if_nil(t1, a0, &::done_imm)
+
+    ::loop
+    %st(a0, sp, 0)
+    %st(a1, sp, 8)
+
+    # If cdr(rest) is NIL, the head is the last form -> tail-eval.
+    %cdr(t0, a0)
+    %if_nil(t1, t0, &::last)
+
+    # Non-last: eval, short-circuit on #f, otherwise advance.
+    %car(a0, a0)
+    %call(&eval)
+    %li(t0, %imm_val(%IMM.FALSE))
+    %beq(a0, t0, &::done)
+    %ld(a0, sp, 0)
+    %cdr(a0, a0)
+    %ld(a1, sp, 8)
+    %b(&::loop)
+
+    ::last
+    %ld(a0, sp, 0)
+    %car(a0, a0)
+    %ld(a1, sp, 8)
+    %tail(&eval)
+
+    ::done
+    %eret
+
+    ::done_imm
+    %mov(a0, t0)
+})
+
+# eval_or(rest=a0, env=a1) -> value (a0).
+# (or) is #f. Otherwise eval forms left-to-right and return the first
+# non-#f value; if every form was #f, return #f. The last form is
+# tail-evaluated.
+#
+# Frame: 16 bytes
+#   +0  rest
+#   +8  env
+%fn(eval_or, 16, {
+    %li(t0, %imm_val(%IMM.FALSE))
+    %if_nil(t1, a0, &::done_imm)
+
+    ::loop
+    %st(a0, sp, 0)
+    %st(a1, sp, 8)
+
+    %cdr(t0, a0)
+    %if_nil(t1, t0, &::last)
+
+    %car(a0, a0)
+    %call(&eval)
+    %li(t0, %imm_val(%IMM.FALSE))
+    %bne(a0, t0, &::done)
+    %ld(a0, sp, 0)
+    %cdr(a0, a0)
+    %ld(a1, sp, 8)
+    %b(&::loop)
+
+    ::last
+    %ld(a0, sp, 0)
+    %car(a0, a0)
+    %ld(a1, sp, 8)
+    %tail(&eval)
+
+    ::done
+    %eret
+
+    ::done_imm
+    %mov(a0, t0)
+})
+
 # eval_let_named(rest=a0, env=a1) -> value (a0).
 # rest = (name bindings . body). Builds a closure whose captured env
 # contains a self-binding that resolves `name` to the closure itself
@@ -3048,6 +3094,8 @@
 :name_let         "let"
 :name_letstar     "let*"
 :name_letrec      "letrec"
+:name_and         "and"
+:name_or          "or"
 :name_define_record_type "define-record-type"
 
 # Primitive surface names.
@@ -3140,77 +3188,6 @@
 &name_eof_objectq %(0)  $(11)  &prim_eof_objectq_entry  %(0)
 :prim_table_end
 
-# Embedded Scheme prelude. Length = prelude_src_end - prelude_src,
-# computed at startup. Each form is parsed and evaluated under the
-# global env before the user file runs, so the user's source can
-# rely on these helpers without re-defining them.
-:prelude_src
-"(define (list . xs) xs)" '0a'
-"(define (length xs)" '0a'
-"  (let loop ((xs xs) (n 0))" '0a'
-"    (if (null? xs) n (loop (cdr xs) (+ n 1)))))" '0a'
-"(define (reverse xs)" '0a'
-"  (let loop ((xs xs) (acc (quote ())))" '0a'
-"    (if (null? xs) acc (loop (cdr xs) (cons (car xs) acc)))))" '0a'
-"(define (append-pair a b)" '0a'
-"  (if (null? a) b (cons (car a) (append-pair (cdr a) b))))" '0a'
-"(define (append . lists)" '0a'
-"  (cond ((null? lists) (quote ()))" '0a'
-"        ((null? (cdr lists)) (car lists))" '0a'
-"        (else (append-pair (car lists) (apply append (cdr lists))))))" '0a'
-"(define (list-ref xs n)" '0a'
-"  (if (= n 0) (car xs) (list-ref (cdr xs) (- n 1))))" '0a'
-"(define (map f xs)" '0a'
-"  (if (null? xs) (quote ()) (cons (f (car xs)) (map f (cdr xs)))))" '0a'
-"(define (for-each f xs)" '0a'
-"  (if (null? xs) (quote ()) (begin (f (car xs)) (for-each f (cdr xs)))))" '0a'
-
-# shell.scm port: process-management wrappers built on top of the syscall
-# primitives. sys-wait is a Scheme adapter over sys-waitid that returns a
-# wait4-style raw wstatus so decode-wait-status can stay unchanged.
-
-"(define (sys-wait pid)" '0a'
-"  (let ((info (make-bytevector 128 0)))" '0a'
-"    (let ((r (sys-waitid 1 pid info 4)))" '0a'
-"      (if (car r)" '0a'
-"          (let ((code (bytevector-u8-ref info 8))" '0a'
-"                (status (bytevector-u8-ref info 24)))" '0a'
-"            (if (= code 1)" '0a'
-"                (cons #t (arithmetic-shift status 8))" '0a'
-"                (cons #t (bit-and status #x7f))))" '0a'
-"          r))))" '0a'
-
-"(define (decode-wait-status s)" '0a'
-"  (let ((termsig (bit-and s #x7f)))" '0a'
-"    (if (zero? termsig)" '0a'
-"        (bit-and (arithmetic-shift s -8) #xff)" '0a'
-"        (+ 128 termsig))))" '0a'
-
-"(define (wait pid)" '0a'
-"  (let ((r (sys-wait pid)))" '0a'
-"    (if (car r)" '0a'
-"        (cons #t (decode-wait-status (cdr r)))" '0a'
-"        r)))" '0a'
-
-"(define (exit . rest)" '0a'
-"  (sys-exit (if (null? rest) 0 (car rest))))" '0a'
-
-"(define (argv) (sys-argv))" '0a'
-
-"(define (spawn prog . args)" '0a'
-"  (let ((r (sys-clone)))" '0a'
-"    (cond" '0a'
-"      ((not (car r)) r)" '0a'
-"      ((zero? (cdr r))" '0a'
-"       (sys-execve prog (cons prog args))" '0a'
-"       (sys-exit 127))" '0a'
-"      (else r))))" '0a'
-
-"(define (run prog . args)" '0a'
-"  (let ((r (apply spawn prog args)))" '0a'
-"    (if (car r) (wait (cdr r)) r)))" '0a'
-:prelude_src_end
-
 :msg_usage          "scheme1: usage: scheme1 SOURCE.scm" '0a' '00'
 :msg_load_fail      "scheme1: failed to read source" '0a' '00'
 :msg_symtab_full    "scheme1: symbol table full" '0a' '00'
@@ -3266,6 +3243,8 @@
 :sym_let          $(0)
 :sym_letstar      $(0)
 :sym_letrec       $(0)
+:sym_and          $(0)
+:sym_or           $(0)
 :sym_define_record_type $(0)
 
 # Process startup state, captured by p1_main and read by sys-argv.
diff --git a/tests/scheme1/46-and.expected-exit b/tests/scheme1/46-and.expected-exit
@@ -0,0 +1 @@
+42
diff --git a/tests/scheme1/46-and.scm b/tests/scheme1/46-and.scm
@@ -0,0 +1,8 @@
+; (and) is #t; (and a b c) returns last when all truthy; (and a #f c) is #f.
+; Short-circuits left-to-right.
+(if (and) 0 (sys-exit 1))                   ; (and) -> #t
+(if (and #f) (sys-exit 2) 0)                ; (and #f) -> #f
+(if (and 5) 0 (sys-exit 3))                 ; single arg returns its value
+(if (and 1 #f 3) (sys-exit 4) 0)            ; falsy middle yields #f
+(if (and #f (sys-exit 99)) (sys-exit 5) 0)  ; rest unevaluated after #f
+(sys-exit (and 1 2 42))                     ; returns last value when all truthy
diff --git a/tests/scheme1/47-or.expected-exit b/tests/scheme1/47-or.expected-exit
@@ -0,0 +1 @@
+42
diff --git a/tests/scheme1/47-or.scm b/tests/scheme1/47-or.scm
@@ -0,0 +1,7 @@
+; (or) is #f; (or a b c) returns first non-#f or the last; short-circuits.
+(if (or) (sys-exit 1) 0)                    ; (or) -> #f
+(if (or 5) 0 (sys-exit 2))                  ; single arg returns its value
+(if (or #f #f 7) 0 (sys-exit 3))            ; falls through to the truthy tail
+(if (or #f #f #f) (sys-exit 4) 0)           ; all #f -> #f
+(if (or 5 (sys-exit 99)) 0 (sys-exit 5))    ; truthy short-circuits the rest
+(sys-exit (or #f #f 42))                    ; returns last value when no truthy earlier

	boot2 Playing with the boostrap
	git clone https://git.ryansepassi.com/git/boot2.git
	Log \| Files \| Refs \| README

M	docs/scheme-shell-todo.md	\|	31	++++++++++---------------------
M	scheme1/scheme1.P1pp	\|	227	++++++++++++++++++++++++++++++++++++-------------------------------------------
A	tests/scheme1/46-and.expected-exit	\|	1	+
A	tests/scheme1/46-and.scm	\|	8	++++++++
A	tests/scheme1/47-or.expected-exit	\|	1	+
A	tests/scheme1/47-or.scm	\|	7	+++++++