commit 0da31f6ffbb178470961fc0de5a9cfe634c508e3
parent 60313fcd116e77f3f0016bb3cafe27c8c5c9cdb9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 25 Apr 2026 07:12:30 -0700
Add scheme1 reader: `;` comments, `'datum` quote, `#xNN` hex literals
skip_ws now consumes `;`-to-LF as whitespace. parse_one dispatches `'`
to a recursive read that conses (quote <datum>); intern_special_forms
reserves `quote` and eval's pair branch handles it inline. The `#`
cascade gains `#x`/`#X` slices that scan to ws/paren and feed
libp1pp's parse_hex, with optional leading `-`.
Diffstat:
8 files changed, 139 insertions(+), 1 deletion(-)
diff --git a/docs/scheme-shell-todo.md b/docs/scheme-shell-todo.md
@@ -0,0 +1,37 @@
+# scheme1 → shell.scm TODO
+
+Checklist for getting `lisp/shell.scm` running under scheme1.
+
+**Workflow:** every item is red-green TDD. Add a failing `tests/scheme1/NN-*.scm` (with `.expected-exit` and/or `.expected`) first, run the suite to confirm it fails for the expected reason, then implement until green. Multi-arch suite (`make test SUITE=scheme1`) must stay clean before moving on.
+
+## Checklist
+
+- [ ] **1. Reader: `;` comments, `'datum`, `'()`, `#xNN` hex literals.**
+ Hook `;` into `skip_ws` (skip until LF/EOF). Add `'` to `parse_one` as a prefix that reads the next datum and wraps in `(quote …)`. Intern `quote` at startup. Extend the `#`-cascade with `x`/`X` → hex parse.
+
+- [ ] **2. Top-level `define`.**
+ `eval_define` → `sym_set_global`. Recognize `(define (f a b . rest) body…)` sugar by rewriting to `(define f (lambda (a b . rest) body…))`. Returns UNSPEC.
+
+- [ ] **3. Variadic `.`-tail.**
+ Reader: `parse_list` recognizes `.` followed by ws inside a list as the dotted-pair separator; `parse_atom` rejects a bare `.`. `bind_params`: when `params` is a non-pair, bind `params` to the remaining args list and stop.
+
+- [ ] **4. `cond` and the `let` family.**
+ `cond` driver walks clauses; intern `else` at startup and pointer-compare it. `let` / `let*` / `letrec` / named `let`. `letrec` pre-binds names to UNSPEC, evaluates inits in the new env, then writes each value via `%st(val, pair, 7)` on the binding cell — no new primitive required.
+
+- [ ] **5. Arith + list primitives.**
+ Batch in: `cons`, `car`, `cdr`, `null?`, `pair?`, `zero?`, `not`, `eq?`, `+`, `-`, `*`, `=`, `<`, `bit-and`, `bit-or`, `arithmetic-shift`, `apply`. Each is a leaf with the `prim_sys_exit_entry` shape; `register_primitives` grows a table-style init.
+
+- [ ] **6. Bytevectors.**
+ Encode length in the upper 56 bits of the header (`hdr = (len << 8) | HDR.BV`). Allocator + `make-bytevector` (1- and 2-arg), `bytevector-length`, `bytevector-u8-ref`, `bytevector-u8-set!`, `bytevector-copy` (3-arg), `bytevector-copy!` (5-arg).
+
+- [ ] **7. `define-record-type`.**
+ Desugar at eval time by constructing the equivalent s-expression (a `begin` of `define`s for the td, ctor, predicate, and accessors/mutators) and recursing into `eval`. Cleaner than hand-rolling closures and reuses every existing path. Add the internal `%record-*` primitives. Depends on (1) for the quoted type-name symbol.
+
+- [ ] **8. Syscall primitives.**
+ Wrap libp1pp's `sys_read/write/close/openat/exit/clone/execve/waitid` macros as scheme primitives returning the `(#t . val)` / `(#f . errno)` convention. Promote argv to a BSS slot at startup so `sys-argv` can rebuild a list of bytevectors on demand. Add `EOF` as a sixth `IMM` and expose `eof-object` / `eof-object?`.
+
+- [ ] **9. Prelude.**
+ Embed `lisp/prelude.scm` as a bytevector literal in `scheme1.P1pp`. Parse-and-eval it before the user file at startup. Contents per LISP-C.md §Prelude: `list`, `length`, `reverse`, `append`, `list-ref`, `map`, `for-each`.
+
+- [ ] **10. Port `shell.scm` into the prelude.**
+ Concatenate `lisp/shell.scm` (or a shell-prelude variant) into the embedded source after the base prelude. Add an end-to-end test that uses `spawn` / `run` / a file-I/O round-trip.
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -360,7 +360,7 @@
# The reader is called recursively from parse_list, so every state goes
# through frame slots, not s-registers.
-# Skip whitespace (ASCII 32, 9, 10, 13). Leaf.
+# Skip whitespace (ASCII 32, 9, 10, 13) and `;`-to-LF comments. Leaf.
:skip_ws
%scope skip_ws
%la(t2, &readbuf_pos)
@@ -371,7 +371,18 @@
%beq(t0, t1, &::done)
%readbuf_byte(a0, t0)
%is_ws_branch(a1, a0, &::step)
+ %addi(a1, a0, -59) ; ';'
+ %beqz(a1, &::comment)
%b(&::done)
+ ::comment
+ # Consume up to and including the next LF, or to EOF.
+ %addi(t0, t0, 1)
+ %beq(t0, t1, &::done)
+ %readbuf_byte(a0, t0)
+ %addi(a1, a0, -10)
+ %bnez(a1, &::comment)
+ %addi(t0, t0, 1)
+ %b(&::loop)
::step
%addi(t0, t0, 1)
%b(&::loop)
@@ -398,6 +409,8 @@
%beqz(a1, &::rparen)
%addi(a1, a0, -35)
%beqz(a1, &::hash)
+ %addi(a1, a0, -39) ; '\''
+ %beqz(a1, &::quote)
%tail(&parse_atom)
@@ -428,6 +441,10 @@
%beqz(a1, &::true_lit)
%addi(a1, a0, -102) ; 'f'
%beqz(a1, &::false_lit)
+ %addi(a1, a0, -120) ; 'x'
+ %beqz(a1, &::hex_lit)
+ %addi(a1, a0, -88) ; 'X'
+ %beqz(a1, &::hex_lit)
%die(msg_bad_hash, 6)
::true_lit
@@ -438,6 +455,58 @@
%li(a0, %imm_val(%IMM.FALSE))
%eret
+ ::hex_lit
+ # t0 sits at the first hex digit; t1 = readbuf_len. Scan to ws/paren/EOF,
+ # then parse_hex over the slice (with optional leading '-').
+ %mov(a3, t0)
+ ::hex_scan
+ %beq(t0, t1, &::hex_end)
+ %readbuf_byte(a0, t0)
+ %is_ws_branch(a1, a0, &::hex_end)
+ %addi(a1, a0, -40)
+ %beqz(a1, &::hex_end)
+ %addi(a1, a0, -41)
+ %beqz(a1, &::hex_end)
+ %addi(t0, t0, 1)
+ %b(&::hex_scan)
+ ::hex_end
+ %la(t2, &readbuf_pos)
+ %st(t0, t2, 0)
+ %la(a0, &readbuf_buf_ptr)
+ %ld(a0, a0, 0)
+ %add(a0, a0, a3)
+ %sub(a1, t0, a3)
+ %lb(t2, a0, 0)
+ %addi(t2, t2, -45) ; '-'
+ %beqz(t2, &::hex_neg)
+ %call(&parse_hex)
+ %mkfix(a0, a0)
+ %eret
+ ::hex_neg
+ %addi(a0, a0, 1)
+ %addi(a1, a1, -1)
+ %call(&parse_hex)
+ %li(t0, 0)
+ %sub(a0, t0, a0)
+ %mkfix(a0, a0)
+ %eret
+
+ ::quote
+ # Consume the leading '\''; recurse into parse_one for the datum;
+ # then build (quote <datum>).
+ %la(t2, &readbuf_pos)
+ %ld(t0, t2, 0)
+ %addi(t0, t0, 1)
+ %st(t0, t2, 0)
+ %call(&parse_one)
+ %li(a1, %imm_val(%IMM.NIL))
+ %call(&cons)
+ %la(t0, &sym_quote)
+ %ld(t0, t0, 0)
+ %mov(a1, a0)
+ %mov(a0, t0)
+ %tail(&cons)
+
::eof
%die(msg_unexp_eof, 6)
})
@@ -666,6 +735,9 @@
# isn't a symbol cannot collide with any sym_* slot.
%ld(t0, sp, 0)
%car(t0, t0) ; t0 = head
+ %la(t1, &sym_quote)
+ %ld(t1, t1, 0)
+ %beq(t0, t1, &::do_quote)
%la(t1, &sym_if)
%ld(t1, t1, 0)
%beq(t0, t1, &::do_if)
@@ -691,6 +763,13 @@
%ld(a0, sp, 16)
%tail(&apply)
+ ::do_quote
+ # (quote datum) -> car(cdr(expr))
+ %ld(a0, sp, 0)
+ %cdr(a0, a0)
+ %car(a0, a0)
+ %eret
+
::do_if
%ld(a0, sp, 0)
%cdr(a0, a0)
@@ -791,6 +870,12 @@
# these slots before falling through to ordinary application.
%fn(intern_special_forms, 0, {
+ %la(a0, &name_quote)
+ %li(a1, 5)
+ %call(&intern)
+ %la(t0, &sym_quote)
+ %st(a0, t0, 0)
+
%la(a0, &name_if)
%li(a1, 2)
%call(&intern)
@@ -1001,6 +1086,7 @@
# Surface names. Length is hard-coded at the call site; no NUL needed
# because intern takes (ptr, len). Aligned padding via "\0" bytes is
# fine -- M0 emits ASCII verbatim.
+:name_quote "quote"
:name_if "if"
:name_lambda "lambda"
:name_sys_exit "sys-exit"
@@ -1043,6 +1129,7 @@
# Cached tagged-symbol values for special forms (filled by
# intern_special_forms at startup).
+:sym_quote $(0)
:sym_if $(0)
:sym_lambda $(0)
diff --git a/tests/scheme1/06-comment.expected-exit b/tests/scheme1/06-comment.expected-exit
@@ -0,0 +1 @@
+17
diff --git a/tests/scheme1/06-comment.scm b/tests/scheme1/06-comment.scm
@@ -0,0 +1,5 @@
+; A comment at top of file.
+;; double-semicolon, ignored.
+(sys-exit ; trailing comment after head
+ ; another full-line comment between args
+ 17) ; trailing tail
diff --git a/tests/scheme1/07-hex.expected-exit b/tests/scheme1/07-hex.expected-exit
@@ -0,0 +1 @@
+42
diff --git a/tests/scheme1/07-hex.scm b/tests/scheme1/07-hex.scm
@@ -0,0 +1 @@
+(sys-exit #x2a)
diff --git a/tests/scheme1/08-quote.expected-exit b/tests/scheme1/08-quote.expected-exit
@@ -0,0 +1 @@
+23
diff --git a/tests/scheme1/08-quote.scm b/tests/scheme1/08-quote.scm
@@ -0,0 +1,5 @@
+; '() should evaluate to nil; we need a primitive that turns nil into 0
+; status. Use if + lambda we already have. (if (null? '()) 0 1) requires
+; null?. For now, just verify '() reads + evals without crashing by:
+; ((lambda (x) (sys-exit 23)) '())
+((lambda (x) (sys-exit 23)) '())