commit f60a666570b19da819acf341d4a6c70ebd094df1
parent efbfccaedff611ddde6f3d4c6d3cf2f68e9f09b6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 25 Apr 2026 13:28:01 -0700
scheme1: and/or special forms; drop embedded prelude
Diffstat:
6 files changed, 130 insertions(+), 145 deletions(-)
diff --git a/docs/scheme-shell-todo.md b/docs/scheme-shell-todo.md
@@ -55,7 +55,7 @@ Per LISP.md and LISP-C.md, but not implemented:
- `"…"` string / bytevector literals with `\n \t \r \\ \"` escapes
- `#\char` literals (printable ASCII, named forms, `#\xNN`)
- Source-location side table (line:col → pair) used by `error`
-- [ ] **Special forms missing**: `set!`, `and`, `or`, `pmatch`, `cond`'s
+- [ ] **Special forms missing**: `set!`, `pmatch`, `cond`'s
`=>` arrow form. `pmatch` is called out by LISP-C.md as a built-in
special form needed by the self-hosted compiler.
- [ ] **Primitives missing** (LISP.md lists them as required):
@@ -83,13 +83,14 @@ Per LISP.md and LISP-C.md, but not implemented:
`read-all` / `bv-concat-reverse` / `write-bytes` / `write-line` are
NOT in the prelude.** Only the process-management half of shell.scm
is ported.
-- [ ] **Prelude is a strict subset of `lisp/prelude.scm`.** Missing:
- `<= >= zero? positive? negative? abs caar cadr cdar cddr caddr
- list? assoc member filter fold equal?` and the vector helpers
- (`vector->list`, `list->vector`, `equal?-vector`, etc.). The
- vector helpers reference `vector-ref` / `vector-length` /
- `make-vector` which we don't have, so they cannot be embedded
- unchanged.
+- [ ] **`scheme1/prelude.scm` is a strict subset of `lisp/prelude.scm`.**
+ Active set: `<=`, `>=`, `negative?`, `abs`, `caar/cadr/cdar/cddr/caddr`,
+ `list?`, `assoc`, `member`, `filter`, `fold`, plus the inherited
+ list/shell helpers. Commented-out placeholders for `positive?`
+ (needs `>`), `vector->list` / `list->vector` (need `make-vector` /
+ `vector-ref` / `vector-set!` / `vector-length`), and `equal?` (needs
+ `string?` / `vector?` plus their ref/length) wait on the
+ corresponding primitives.
- [ ] **`error` primitive** — exits with no formatted message; no
"at file:line:col:" prefix.
@@ -97,18 +98,6 @@ Per LISP.md and LISP-C.md, but not implemented:
These work today but are easy to break.
-- [ ] **`skip_ws` treats NUL (`\0`) as whitespace.** The embedded
- prelude is a chain of `"…"` literals separated by `'0a'` (LF), and
- M0 auto-NUL-terminates each `"…"`. Without the NUL skip the parser
- treats those NULs as the start of a new symbol. Side effects:
- - User source files that contain literal NULs are silently skipped,
- which is non-standard.
- - `parse_atom` does **not** skip NULs — an atom that ends exactly at
- a M0 string-literal boundary in the prelude would absorb the
- trailing NUL into the token. The current prelude doesn't trigger
- this because every line ends on a paren or whitespace, but
- re-flowing the prelude can break it silently.
-
- [ ] **Bytevector NUL-termination via headroom.** `bv_capacity_for`
returns the smallest power of two strictly greater than `n`. The
byte at index `length` is the zero-init NUL terminator and we hand
@@ -262,7 +251,7 @@ In rough priority order:
4. Fill in the spec-required primitives (`equal?`, `eqv?`, `set-car!`,
`set-cdr!`, the comparison family, the bytevector family, the
number/string converters).
-5. `set!`, `and`, `or`, `pmatch`.
+5. `set!`, `pmatch`.
6. Reader: `"…"` strings, `#\char`, source locations.
7. Port shell.scm's port record + I/O wrappers.
8. Replace the 1024-slot linear-scan symtab with an open-addressing
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -23,6 +23,10 @@
# catm P1-<arch>.M1pp P1.M1pp P1pp.P1pp scheme1/scheme1.P1pp \
# | m1pp -> M0 -> hex2 -> ELF
#
+# At run time, scripts/boot-run-scheme1.sh catm's scheme1/prelude.scm
+# in front of the user .scm and passes the combined file to the binary,
+# so the interpreter itself has no embedded prelude.
+#
# Memory model: the ELF's ph_memsz is 8 MB (boot2 default), so all
# zero-initialized arenas live past :ELF_end and cost zero file bytes.
# p1_main writes their absolute addresses into pointer slots once at
@@ -294,12 +298,11 @@
%st(t0, t1, 0)
# Reserve special-form symbol indices, then bind built-in primitives.
+ # The Scheme prelude is catm'd into argv[1] by scripts/run-scheme1.sh
+ # before scheme1 starts, so there's no separate prelude eval pass.
%call(&intern_special_forms)
%call(®ister_primitives)
- # Evaluate the embedded prelude before the user source.
- %call(&eval_prelude)
-
# argv[1] is the source path (NUL-terminated cstr from the kernel).
%ld(a1, sp, 0)
%ld(a0, a1, 8)
@@ -372,52 +375,6 @@
%die(msg_load_fail)
})
-# eval_prelude -- copy the embedded prelude bytes into readbuf, run the
-# read-eval loop until the prelude is exhausted, then return so
-# load_source can later refill readbuf with the user file.
-%fn(eval_prelude, 0, {
- %la(a1, &prelude_src)
- %la(a2, &prelude_src_end)
- %sub(a2, a2, a1)
-
- # Defensive: prelude is fixed at compile time, but the readbuf cap
- # isn't. If somebody trims the readbuf below the prelude size, we
- # need to surface it instead of overrunning into the heap arena.
- %li(t0, %READBUF_CAP_BYTES)
- %bltu(t0, a2, &::too_big)
-
- %la(t0, &readbuf_buf_ptr)
- %ld(a0, t0, 0)
- %call(&memcpy)
-
- %la(t0, &prelude_src)
- %la(t1, &prelude_src_end)
- %sub(t0, t1, t0)
- %la(t1, &readbuf_len)
- %st(t0, t1, 0)
- %li(t0, 0)
- %la(t1, &readbuf_pos)
- %st(t0, t1, 0)
-
- ::loop
- %call(&skip_ws)
- %la(t0, &readbuf_pos)
- %ld(t0, t0, 0)
- %la(t1, &readbuf_len)
- %ld(t1, t1, 0)
- %beq(t0, t1, &::done)
- %call(&parse_one)
- %li(a1, %imm_val(%IMM.NIL))
- %call(&eval)
- %b(&::loop)
-
- ::done
- %eret
-
- ::too_big
- %die(msg_readbuf_full)
-})
-
# =========================================================================
# Heap: cons (leaf) and alloc_hdr (leaf)
# =========================================================================
@@ -595,9 +552,7 @@
# The reader is called recursively from parse_list, so every state goes
# through frame slots, not s-registers.
-# Skip whitespace (ASCII 32, 9, 10, 13), NUL bytes (M0 auto-terminates
-# string literals, so the embedded prelude has NULs we want to ignore),
-# and `;`-to-LF comments. Leaf.
+# Skip whitespace (ASCII 32, 9, 10, 13) and `;`-to-LF comments. Leaf.
:skip_ws
%scope skip_ws
%la(t2, &readbuf_pos)
@@ -608,7 +563,6 @@
%beq(t0, t1, &::done)
%readbuf_byte(a0, t0)
%is_ws_branch(a1, a0, &::step)
- %beqz(a0, &::step) ; NUL
%addi(a1, a0, -59) ; ';'
%beqz(a1, &::comment)
%b(&::done)
@@ -1020,6 +974,8 @@
%dispatch_form(&sym_let, &::do_let)
%dispatch_form(&sym_letstar, &::do_letstar)
%dispatch_form(&sym_letrec, &::do_letrec)
+ %dispatch_form(&sym_and, &::do_and)
+ %dispatch_form(&sym_or, &::do_or)
%dispatch_form(&sym_define_record_type, &::do_define_record_type)
# head = eval(car(expr), env)
@@ -1063,6 +1019,10 @@
%tail_to_handler(&eval_letstar)
::do_letrec
%tail_to_handler(&eval_letrec)
+ ::do_and
+ %tail_to_handler(&eval_and)
+ ::do_or
+ %tail_to_handler(&eval_or)
::do_define_record_type
%tail_to_handler(&eval_define_record_type)
})
@@ -1188,6 +1148,8 @@
%intern_form(&name_let, 3, &sym_let)
%intern_form(&name_letstar, 4, &sym_letstar)
%intern_form(&name_letrec, 6, &sym_letrec)
+ %intern_form(&name_and, 3, &sym_and)
+ %intern_form(&name_or, 2, &sym_or)
%intern_form(&name_define_record_type, 18, &sym_define_record_type)
})
@@ -1554,6 +1516,90 @@
%tail(&eval_body)
})
+# eval_and(rest=a0, env=a1) -> value (a0).
+# (and) is #t. Otherwise eval forms left-to-right, short-circuiting to #f
+# the moment one yields #f. The last form is tail-evaluated so a tail call
+# inside `and` doesn't grow the host stack.
+#
+# Frame: 16 bytes
+# +0 rest
+# +8 env
+%fn(eval_and, 16, {
+ %li(t0, %imm_val(%IMM.TRUE))
+ %if_nil(t1, a0, &::done_imm)
+
+ ::loop
+ %st(a0, sp, 0)
+ %st(a1, sp, 8)
+
+ # If cdr(rest) is NIL, the head is the last form -> tail-eval.
+ %cdr(t0, a0)
+ %if_nil(t1, t0, &::last)
+
+ # Non-last: eval, short-circuit on #f, otherwise advance.
+ %car(a0, a0)
+ %call(&eval)
+ %li(t0, %imm_val(%IMM.FALSE))
+ %beq(a0, t0, &::done)
+ %ld(a0, sp, 0)
+ %cdr(a0, a0)
+ %ld(a1, sp, 8)
+ %b(&::loop)
+
+ ::last
+ %ld(a0, sp, 0)
+ %car(a0, a0)
+ %ld(a1, sp, 8)
+ %tail(&eval)
+
+ ::done
+ %eret
+
+ ::done_imm
+ %mov(a0, t0)
+})
+
+# eval_or(rest=a0, env=a1) -> value (a0).
+# (or) is #f. Otherwise eval forms left-to-right and return the first
+# non-#f value; if every form was #f, return #f. The last form is
+# tail-evaluated.
+#
+# Frame: 16 bytes
+# +0 rest
+# +8 env
+%fn(eval_or, 16, {
+ %li(t0, %imm_val(%IMM.FALSE))
+ %if_nil(t1, a0, &::done_imm)
+
+ ::loop
+ %st(a0, sp, 0)
+ %st(a1, sp, 8)
+
+ %cdr(t0, a0)
+ %if_nil(t1, t0, &::last)
+
+ %car(a0, a0)
+ %call(&eval)
+ %li(t0, %imm_val(%IMM.FALSE))
+ %bne(a0, t0, &::done)
+ %ld(a0, sp, 0)
+ %cdr(a0, a0)
+ %ld(a1, sp, 8)
+ %b(&::loop)
+
+ ::last
+ %ld(a0, sp, 0)
+ %car(a0, a0)
+ %ld(a1, sp, 8)
+ %tail(&eval)
+
+ ::done
+ %eret
+
+ ::done_imm
+ %mov(a0, t0)
+})
+
# eval_let_named(rest=a0, env=a1) -> value (a0).
# rest = (name bindings . body). Builds a closure whose captured env
# contains a self-binding that resolves `name` to the closure itself
@@ -3048,6 +3094,8 @@
:name_let "let"
:name_letstar "let*"
:name_letrec "letrec"
+:name_and "and"
+:name_or "or"
:name_define_record_type "define-record-type"
# Primitive surface names.
@@ -3140,77 +3188,6 @@
&name_eof_objectq %(0) $(11) &prim_eof_objectq_entry %(0)
:prim_table_end
-# Embedded Scheme prelude. Length = prelude_src_end - prelude_src,
-# computed at startup. Each form is parsed and evaluated under the
-# global env before the user file runs, so the user's source can
-# rely on these helpers without re-defining them.
-:prelude_src
-"(define (list . xs) xs)" '0a'
-"(define (length xs)" '0a'
-" (let loop ((xs xs) (n 0))" '0a'
-" (if (null? xs) n (loop (cdr xs) (+ n 1)))))" '0a'
-"(define (reverse xs)" '0a'
-" (let loop ((xs xs) (acc (quote ())))" '0a'
-" (if (null? xs) acc (loop (cdr xs) (cons (car xs) acc)))))" '0a'
-"(define (append-pair a b)" '0a'
-" (if (null? a) b (cons (car a) (append-pair (cdr a) b))))" '0a'
-"(define (append . lists)" '0a'
-" (cond ((null? lists) (quote ()))" '0a'
-" ((null? (cdr lists)) (car lists))" '0a'
-" (else (append-pair (car lists) (apply append (cdr lists))))))" '0a'
-"(define (list-ref xs n)" '0a'
-" (if (= n 0) (car xs) (list-ref (cdr xs) (- n 1))))" '0a'
-"(define (map f xs)" '0a'
-" (if (null? xs) (quote ()) (cons (f (car xs)) (map f (cdr xs)))))" '0a'
-"(define (for-each f xs)" '0a'
-" (if (null? xs) (quote ()) (begin (f (car xs)) (for-each f (cdr xs)))))" '0a'
-
-# shell.scm port: process-management wrappers built on top of the syscall
-# primitives. sys-wait is a Scheme adapter over sys-waitid that returns a
-# wait4-style raw wstatus so decode-wait-status can stay unchanged.
-
-"(define (sys-wait pid)" '0a'
-" (let ((info (make-bytevector 128 0)))" '0a'
-" (let ((r (sys-waitid 1 pid info 4)))" '0a'
-" (if (car r)" '0a'
-" (let ((code (bytevector-u8-ref info 8))" '0a'
-" (status (bytevector-u8-ref info 24)))" '0a'
-" (if (= code 1)" '0a'
-" (cons #t (arithmetic-shift status 8))" '0a'
-" (cons #t (bit-and status #x7f))))" '0a'
-" r))))" '0a'
-
-"(define (decode-wait-status s)" '0a'
-" (let ((termsig (bit-and s #x7f)))" '0a'
-" (if (zero? termsig)" '0a'
-" (bit-and (arithmetic-shift s -8) #xff)" '0a'
-" (+ 128 termsig))))" '0a'
-
-"(define (wait pid)" '0a'
-" (let ((r (sys-wait pid)))" '0a'
-" (if (car r)" '0a'
-" (cons #t (decode-wait-status (cdr r)))" '0a'
-" r)))" '0a'
-
-"(define (exit . rest)" '0a'
-" (sys-exit (if (null? rest) 0 (car rest))))" '0a'
-
-"(define (argv) (sys-argv))" '0a'
-
-"(define (spawn prog . args)" '0a'
-" (let ((r (sys-clone)))" '0a'
-" (cond" '0a'
-" ((not (car r)) r)" '0a'
-" ((zero? (cdr r))" '0a'
-" (sys-execve prog (cons prog args))" '0a'
-" (sys-exit 127))" '0a'
-" (else r))))" '0a'
-
-"(define (run prog . args)" '0a'
-" (let ((r (apply spawn prog args)))" '0a'
-" (if (car r) (wait (cdr r)) r)))" '0a'
-:prelude_src_end
-
:msg_usage "scheme1: usage: scheme1 SOURCE.scm" '0a' '00'
:msg_load_fail "scheme1: failed to read source" '0a' '00'
:msg_symtab_full "scheme1: symbol table full" '0a' '00'
@@ -3266,6 +3243,8 @@
:sym_let $(0)
:sym_letstar $(0)
:sym_letrec $(0)
+:sym_and $(0)
+:sym_or $(0)
:sym_define_record_type $(0)
# Process startup state, captured by p1_main and read by sys-argv.
diff --git a/tests/scheme1/46-and.expected-exit b/tests/scheme1/46-and.expected-exit
@@ -0,0 +1 @@
+42
diff --git a/tests/scheme1/46-and.scm b/tests/scheme1/46-and.scm
@@ -0,0 +1,8 @@
+; (and) is #t; (and a b c) returns last when all truthy; (and a #f c) is #f.
+; Short-circuits left-to-right.
+(if (and) 0 (sys-exit 1)) ; (and) -> #t
+(if (and #f) (sys-exit 2) 0) ; (and #f) -> #f
+(if (and 5) 0 (sys-exit 3)) ; single arg returns its value
+(if (and 1 #f 3) (sys-exit 4) 0) ; falsy middle yields #f
+(if (and #f (sys-exit 99)) (sys-exit 5) 0) ; rest unevaluated after #f
+(sys-exit (and 1 2 42)) ; returns last value when all truthy
diff --git a/tests/scheme1/47-or.expected-exit b/tests/scheme1/47-or.expected-exit
@@ -0,0 +1 @@
+42
diff --git a/tests/scheme1/47-or.scm b/tests/scheme1/47-or.scm
@@ -0,0 +1,7 @@
+; (or) is #f; (or a b c) returns first non-#f or the last; short-circuits.
+(if (or) (sys-exit 1) 0) ; (or) -> #f
+(if (or 5) 0 (sys-exit 2)) ; single arg returns its value
+(if (or #f #f 7) 0 (sys-exit 3)) ; falls through to the truthy tail
+(if (or #f #f #f) (sys-exit 4) 0) ; all #f -> #f
+(if (or 5 (sys-exit 99)) 0 (sys-exit 5)) ; truthy short-circuits the rest
+(sys-exit (or #f #f 42)) ; returns last value when no truthy earlier