boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit d1669e299cb2cdd93ca6cb11bc70b22e29b9c915
parent 6ff8a39efb43086b165a90851556c3e44e185295
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 25 Apr 2026 09:22:22 -0700

Add scheme1 syscall primitives, sys-argv, and EOF immediate

New primitives: sys-read, sys-write, sys-close, sys-openat, sys-clone,
sys-execve, sys-waitid, sys-argv, eof-object, eof-object?. Each thin
syscall wrapper unpacks args from the list, calls a libp1pp-style sys_*
function, and routes the raw return through wrap_syscall_result --
nonneg becomes (#t . r), negative becomes (#f . -r) per the shell.scm
contract.

Process management uses %p1_sys_clone / %p1_sys_execve / %p1_sys_waitid
directly (libp1pp doesn't ship wrappers for these). sys_clone saves and
restores s0 around the syscall because %p1_syscall reads s0 as the OS
syscall's 5th argument; sys_execve and sys_waitid are leaf reg-shuffles.
build_execve_argv walks a list of bytevectors, allocates a NULL-
terminated raw pointer array, and writes each bv's data_ptr into it.

p1_main now stashes argc/argv into saved_argc / saved_argv at startup so
sys-argv can rebuild a list of bytevectors on demand by strlen-ing each
argv[i] and memcpy-ing into a fresh bytevector via bv_alloc.

bv_capacity_for now returns the smallest power of two strictly greater
than the requested length (minimum 16). Capacity always exceeds length
by at least one byte, so the byte at index length is the zero-init
NUL terminator -- safe to pass a bytevector's data_ptr to syscalls
expecting a C string.

EOF joins the IMM enum, exposed via eof-object / eof-object? primitives.

Diffstat:
Mscheme1/scheme1.P1pp | 367+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Atests/scheme1/39-eof.expected-exit | 1+
Atests/scheme1/39-eof.scm | 4++++
Atests/scheme1/40-sys-argv.expected-exit | 1+
Atests/scheme1/40-sys-argv.scm | 6++++++
Atests/scheme1/41-fileio.expected-exit | 1+
Atests/scheme1/41-fileio.scm | 16++++++++++++++++
Atests/scheme1/42-clone-wait.expected-exit | 1+
Atests/scheme1/42-clone-wait.scm | 14++++++++++++++
9 files changed, 404 insertions(+), 7 deletions(-)

diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -33,7 +33,7 @@ # ========================================================================= %enum TAG { FIXNUM PAIR SYM HEAP IMM } -%enum IMM { FALSE TRUE NIL UNSPEC UNBOUND } +%enum IMM { FALSE TRUE NIL UNSPEC UNBOUND EOF } %enum HDR { BV CLOSURE PRIM TD REC } # imm_val(idx) -> integer-expression for the tagged immediate at IMM index @@ -180,6 +180,14 @@ %fn(p1_main, 16, { %st(a1, sp, 0) + # Stash argc/argv globally so the sys-argv primitive can rebuild a + # list of bytevectors on demand without re-plumbing them through + # every call frame. + %la(t0, &saved_argc) + %st(a0, t0, 0) + %la(t0, &saved_argv) + %st(a1, t0, 0) + %li(t0, 2) %bltu(a0, t0, &::usage) @@ -1850,17 +1858,20 @@ %mov(a0, t1) %ret -# bv_capacity_for(n=a0) -> next pow2 ≥ max(n, 16) (a0). +# bv_capacity_for(n=a0) -> smallest power-of-two strictly greater than n, +# minimum 16 (so capacity > length and the byte at index `length` is the +# zero-init NUL terminator -- syscalls that take C strings can pass a +# bytevector's data_ptr directly). :bv_capacity_for %scope bv_capacity_for %li(t0, 16) ::loop - %bltu(t0, a0, &::shift) - %mov(a0, t0) - %ret - ::shift + %bltu(a0, t0, &::done) %shli(t0, t0, 1) %b(&::loop) + ::done + %mov(a0, t0) + %ret %endscope # bv_alloc(raw_len=a0) -> tagged bv (a0). Length = raw_len, capacity from @@ -2500,8 +2511,325 @@ }) # ========================================================================= -# Read-only data +# Syscall primitives # ========================================================================= +# +# Each syscall primitive untags the args list, calls a thin libp1pp- or +# scheme1-local syscall wrapper, and routes the raw return through +# wrap_syscall_result: r >= 0 -> (#t . r), r < 0 -> (#f . -r). +# +# Bytevector args (paths, buffers) are passed by their raw data_ptr (slot +# +5 from the tagged wrapper). bv_capacity_for guarantees capacity > length +# so the byte at [length] is the zero-init NUL terminator -- safe to hand +# to syscalls expecting a C string. + +# wrap_syscall_result(raw=a0) -> (#t . r) or (#f . errno). +%fn(wrap_syscall_result, 16, { + %st(a0, sp, 0) + %bltz(a0, &::err) + %shli(a1, a0, 3) + %li(a0, %imm_val(%IMM.TRUE)) + %tail(&cons) + + ::err + %ld(t0, sp, 0) + %li(t1, 0) + %sub(t0, t1, t0) + %shli(a1, t0, 3) + %li(a0, %imm_val(%IMM.FALSE)) + %tail(&cons) +}) + +# sys_openat(dirfd=a0, path=a1, flags=a2, mode=a3) -> r (a0). Leaf. +:sys_openat + %mov(t0, a3) + %mov(a3, a2) + %mov(a2, a1) + %mov(a1, a0) + %li(a0, %p1_sys_openat) + %syscall + %ret + +# sys_clone() -> r (a0). Linux clone(SIGCHLD, 0, 0, 0, 0) -- fork-style. +# Saves and restores s0 around the syscall because %p1_syscall reads s0 +# as the 5th OS-syscall argument. +%fn(sys_clone, 16, { + %st(s0, sp, 0) + %li(s0, 0) + + %li(a1, 17) + %li(a2, 0) + %li(a3, 0) + %li(t0, 0) + %li(a0, %p1_sys_clone) + %syscall + + %ld(s0, sp, 0) +}) + +# sys_execve(path=a0, argv=a1, envp=a2) -> -errno (a0). Only returns on +# failure; on success the new image takes over. +:sys_execve + %mov(a3, a2) + %mov(a2, a1) + %mov(a1, a0) + %li(a0, %p1_sys_execve) + %syscall + %ret + +# sys_waitid(idtype=a0, id=a1, infop=a2, options=a3) -> r (a0). Leaf. +:sys_waitid + %mov(t0, a3) + %mov(a3, a2) + %mov(a2, a1) + %mov(a1, a0) + %li(a0, %p1_sys_waitid) + %syscall + %ret + +# build_execve_argv(list=a0) -> raw NULL-terminated array (a0). +# Walks `list` (cons-list of bytevectors), allocates (count+1)*8 bytes, +# writes each bv's data_ptr, terminates with NULL. +# +# Frame: 24 bytes +# +0 list +# +8 count +# +16 array ptr (raw) +%fn(build_execve_argv, 24, { + %st(a0, sp, 0) + + %mov(t0, a0) + %li(t1, 0) + ::count_loop + %li(t2, %imm_val(%IMM.NIL)) + %beq(t0, t2, &::count_done) + %addi(t1, t1, 1) + %cdr(t0, t0) + %b(&::count_loop) + ::count_done + %st(t1, sp, 8) + + %addi(a0, t1, 1) + %shli(a0, a0, 3) + %call(&alloc_bytes) + %st(a0, sp, 16) + + %ld(t0, sp, 0) + %ld(t1, sp, 16) + + ::fill_loop + %li(t2, %imm_val(%IMM.NIL)) + %beq(t0, t2, &::fill_done) + %car(a3, t0) + %ld(a2, a3, 5) + %st(a2, t1, 0) + %addi(t1, t1, 8) + %cdr(t0, t0) + %b(&::fill_loop) + + ::fill_done + %li(t2, 0) + %st(t2, t1, 0) + + %ld(a0, sp, 16) +}) + +# (sys-read fd buf count) +%fn(prim_sys_read_entry, 0, { + %car(t0, a0) + %sari(t0, t0, 3) + %cdr(t1, a0) + %car(t1, t1) + %ld(t1, t1, 5) + %cdr(t2, a0) + %cdr(t2, t2) + %car(t2, t2) + %sari(t2, t2, 3) + %mov(a0, t0) + %mov(a1, t1) + %mov(a2, t2) + %call(&sys_read) + %tail(&wrap_syscall_result) +}) + +# (sys-write fd buf count) +%fn(prim_sys_write_entry, 0, { + %car(t0, a0) + %sari(t0, t0, 3) + %cdr(t1, a0) + %car(t1, t1) + %ld(t1, t1, 5) + %cdr(t2, a0) + %cdr(t2, t2) + %car(t2, t2) + %sari(t2, t2, 3) + %mov(a0, t0) + %mov(a1, t1) + %mov(a2, t2) + %call(&sys_write) + %tail(&wrap_syscall_result) +}) + +# (sys-close fd) +%fn(prim_sys_close_entry, 0, { + %car(a0, a0) + %sari(a0, a0, 3) + %call(&sys_close) + %tail(&wrap_syscall_result) +}) + +# (sys-openat dirfd path flags mode) +%fn(prim_sys_openat_entry, 16, { + # +0 flags raw (parked across the path/mode extraction) + %car(t0, a0) + %sari(t0, t0, 3) ; dirfd raw + %cdr(a0, a0) + %car(t1, a0) + %ld(t1, t1, 5) ; path data_ptr + %cdr(a0, a0) + %car(t2, a0) + %sari(t2, t2, 3) + %st(t2, sp, 0) ; flags raw + %cdr(a0, a0) + %car(a3, a0) + %sari(a3, a3, 3) ; mode raw + %mov(a0, t0) + %mov(a1, t1) + %ld(a2, sp, 0) + %call(&sys_openat) + %tail(&wrap_syscall_result) +}) + +# (sys-clone) +%fn(prim_sys_clone_entry, 0, { + %call(&sys_clone) + %tail(&wrap_syscall_result) +}) + +# (sys-execve path argv-list) +%fn(prim_sys_execve_entry, 16, { + %st(a0, sp, 0) + %cdr(a0, a0) + %car(a0, a0) + %call(&build_execve_argv) + %mov(t0, a0) + %ld(a0, sp, 0) + %car(a0, a0) + %ld(a0, a0, 5) + %mov(a1, t0) + %li(a2, 0) + %call(&sys_execve) + %tail(&wrap_syscall_result) +}) + +# (sys-waitid idtype id infop options) +%fn(prim_sys_waitid_entry, 0, { + %car(t0, a0) + %sari(t0, t0, 3) + %cdr(a0, a0) + %car(t1, a0) + %sari(t1, t1, 3) + %cdr(a0, a0) + %car(t2, a0) + %ld(t2, t2, 5) + %cdr(a0, a0) + %car(a3, a0) + %sari(a3, a3, 3) + %mov(a0, t0) + %mov(a1, t1) + %mov(a2, t2) + %call(&sys_waitid) + %tail(&wrap_syscall_result) +}) + +# (sys-argv) -> list of bytevectors. Walks saved_argv, strlen-ing each +# NUL-terminated entry into a fresh bytevector and consing them in order +# via the head/tail trick. +# +# Frame: 40 bytes +# +0 argv ptr (advancing 8 bytes per iteration) +# +8 count remaining (decrementing from saved_argc) +# +16 list head +# +24 list tail +# +32 current bv (across memcpy) +%fn(prim_sys_argv_entry, 40, { + %la(t0, &saved_argv) + %ld(t0, t0, 0) + %st(t0, sp, 0) + %la(t0, &saved_argc) + %ld(t0, t0, 0) + %st(t0, sp, 8) + %li(t0, %imm_val(%IMM.NIL)) + %st(t0, sp, 16) + %st(t0, sp, 24) + + ::loop + %ld(t0, sp, 8) + %beqz(t0, &::done) + + # len = strlen(*argv) + %ld(t0, sp, 0) + %ld(a0, t0, 0) + %call(&strlen) + + # bv = bv_alloc(len) + %call(&bv_alloc) + %st(a0, sp, 32) + + # memcpy(bv.data_ptr, *argv, len-from-bv-hdr). + %ld(t0, sp, 32) + %ld(a0, t0, 5) + %ld(t1, sp, 0) + %ld(a1, t1, 0) + %ld(t1, t0, -3) + %shri(a2, t1, 8) + %call(&memcpy) + + # cell = cons(bv, NIL); append to list head/tail. + %ld(a0, sp, 32) + %li(a1, %imm_val(%IMM.NIL)) + %call(&cons) + + %ld(t0, sp, 16) + %li(t1, %imm_val(%IMM.NIL)) + %beq(t0, t1, &::first) + %ld(t0, sp, 24) + %st(a0, t0, 7) + %st(a0, sp, 24) + %b(&::advance) + + ::first + %st(a0, sp, 16) + %st(a0, sp, 24) + + ::advance + %ld(t0, sp, 0) + %addi(t0, t0, 8) + %st(t0, sp, 0) + %ld(t0, sp, 8) + %addi(t0, t0, -1) + %st(t0, sp, 8) + %b(&::loop) + + ::done + %ld(a0, sp, 16) +}) + +# (eof-object) and (eof-object? x). +:prim_eof_object_entry + %li(a0, %imm_val(%IMM.EOF)) + %ret + +:prim_eof_objectq_entry +%scope prim_eof_objectq + %car(t0, a0) + %li(t1, %imm_val(%IMM.EOF)) + %li(a0, %imm_val(%IMM.FALSE)) + %bne(t0, t1, &::end) + %li(a0, %imm_val(%IMM.TRUE)) + ::end + %ret +%endscope # Surface names. Length is hard-coded at the call site; no NUL needed # because intern takes (ptr, len). Aligned padding via "\0" bytes is @@ -2551,6 +2879,17 @@ :name_record_isa "%record-is-a?" :name_record_td "%record-td" +:name_sys_read "sys-read" +:name_sys_write "sys-write" +:name_sys_close "sys-close" +:name_sys_openat "sys-openat" +:name_sys_clone "sys-clone" +:name_sys_execve "sys-execve" +:name_sys_waitid "sys-waitid" +:name_sys_argv "sys-argv" +:name_eof_object "eof-object" +:name_eof_objectq "eof-object?" + # Primitive registration table. Each entry: 8-byte name_ptr (4-byte label # ref + 4 pad), 8-byte name_len, 8-byte entry_label (4 ref + 4 pad). :prim_table @@ -2585,6 +2924,16 @@ &name_record_set %(0) $(12) &prim_record_set_entry %(0) &name_record_isa %(0) $(13) &prim_record_is_a_entry %(0) &name_record_td %(0) $(10) &prim_record_td_entry %(0) +&name_sys_read %(0) $(8) &prim_sys_read_entry %(0) +&name_sys_write %(0) $(9) &prim_sys_write_entry %(0) +&name_sys_close %(0) $(9) &prim_sys_close_entry %(0) +&name_sys_openat %(0) $(10) &prim_sys_openat_entry %(0) +&name_sys_clone %(0) $(9) &prim_sys_clone_entry %(0) +&name_sys_execve %(0) $(10) &prim_sys_execve_entry %(0) +&name_sys_waitid %(0) $(10) &prim_sys_waitid_entry %(0) +&name_sys_argv %(0) $(8) &prim_sys_argv_entry %(0) +&name_eof_object %(0) $(10) &prim_eof_object_entry %(0) +&name_eof_objectq %(0) $(11) &prim_eof_objectq_entry %(0) :prim_table_end :msg_usage "scheme1: usage: scheme1 SOURCE.scm" '0a' '00' @@ -2637,6 +2986,10 @@ :sym_letrec $(0) :sym_define_record_type $(0) +# Process startup state, captured by p1_main and read by sys-argv. +:saved_argc $(0) +:saved_argv $(0) + # Pointer slots for the past-:ELF_end arenas. :readbuf_buf_ptr $(0) :heap_buf_ptr $(0) diff --git a/tests/scheme1/39-eof.expected-exit b/tests/scheme1/39-eof.expected-exit @@ -0,0 +1 @@ +22 diff --git a/tests/scheme1/39-eof.scm b/tests/scheme1/39-eof.scm @@ -0,0 +1,4 @@ +; eof-object returns the EOF singleton; eof-object? distinguishes it. +(sys-exit (if (eof-object? (eof-object)) + (if (eof-object? '()) 1 22) + 2)) diff --git a/tests/scheme1/40-sys-argv.expected-exit b/tests/scheme1/40-sys-argv.expected-exit @@ -0,0 +1 @@ +2 diff --git a/tests/scheme1/40-sys-argv.scm b/tests/scheme1/40-sys-argv.scm @@ -0,0 +1,6 @@ +; sys-argv returns a list of bytevectors. argv[0] is the program name, +; argv[1] is this script's path. We just count the entries via a list +; recursion and return that. +(define (count xs) + (if (null? xs) 0 (+ 1 (count (cdr xs))))) +(sys-exit (count (sys-argv))) diff --git a/tests/scheme1/41-fileio.expected-exit b/tests/scheme1/41-fileio.expected-exit @@ -0,0 +1 @@ +59 diff --git a/tests/scheme1/41-fileio.scm b/tests/scheme1/41-fileio.scm @@ -0,0 +1,16 @@ +; Round-trip sys-openat / sys-read / sys-close on this very file. The +; first byte is `;` (ASCII 59) -- we exit with that, proving open and +; read both succeeded and the data made it into a Scheme bytevector. +(define args (sys-argv)) +(define path (car (cdr args))) +(define r (sys-openat -100 path 0 0)) +(if (not (car r)) + (sys-exit 1) + (let ((fd (cdr r)) + (buf (make-bytevector 1))) + (let ((rr (sys-read fd buf 1))) + (if (not (car rr)) + (sys-exit 2) + (let ((b (bytevector-u8-ref buf 0))) + (sys-close fd) + (sys-exit b)))))) diff --git a/tests/scheme1/42-clone-wait.expected-exit b/tests/scheme1/42-clone-wait.expected-exit @@ -0,0 +1 @@ +17 diff --git a/tests/scheme1/42-clone-wait.scm b/tests/scheme1/42-clone-wait.scm @@ -0,0 +1,14 @@ +; sys-clone forks; the child exits with 17. The parent calls sys-waitid +; (idtype=P_PID=1, id=pid, infop=128-byte buffer, options=WEXITED=4) and +; reads the child's exit code from siginfo_t.si_status (offset 24 on +; 64-bit Linux). Final exit reflects the child's status. +(define r (sys-clone)) +(if (not (car r)) + (sys-exit 1) + (if (zero? (cdr r)) + (sys-exit 17) + (let ((info (make-bytevector 128 0))) + (let ((wr (sys-waitid 1 (cdr r) info 4))) + (if (car wr) + (sys-exit (bytevector-u8-ref info 24)) + (sys-exit 2))))))