boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 32f98055288f0ef2591cf9cd95f03eab81f761cc
parent 0e0688006486288d9b30757cc807f016a71b2e6f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 28 Apr 2026 10:56:59 -0700

scheme1: hex number<->string and format, rename eof-object to eof

Diffstat:
Mdocs/SCHEME1.md | 29++++++++++++++++-------------
Mscheme1/prelude.scm | 4++--
Mscheme1/scheme1.P1pp | 189++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Mtests/scheme1/039-eof.scm | 6+++---
Mtests/scheme1/067-read-line.scm | 2+-
Atests/scheme1/116-hex-conv.expected-exit | 1+
Atests/scheme1/116-hex-conv.scm | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 251 insertions(+), 53 deletions(-)

diff --git a/docs/SCHEME1.md b/docs/SCHEME1.md @@ -55,7 +55,7 @@ The runtime knows exactly: | empty list | `'()`, disjoint from pair | | procedure | closure or primitive | | record | via `define-record-type` | -| eof-object | singleton; produced by `(eof-object)` and EOF reads | +| eof-object | singleton; bound at top level as `eof`; also returned on EOF reads | | unspecified | singleton; result of `set!`, `define`, `(if #f x)`, etc. | Multiple-values packs flow through `values` / `call-with-values` / `let-values` @@ -140,8 +140,7 @@ these. **Equality / predicates** `eq?`, `equal?`, `not`, `null?`, `pair?`, `boolean?`, `integer?`, -`symbol?`, `string?` (≡ `bytevector?`), `procedure?`, `zero?`, -`eof-object?`. +`symbol?`, `string?` (≡ `bytevector?`), `procedure?`, `zero?`, `eof?`. **Pairs** `cons`, `car`, `cdr`, `set-car!`, `set-cdr!`, `length`, `list-ref`. @@ -163,19 +162,23 @@ src-end`), `bytevector-append` (variadic), `bytevector=?`, `string-length` (strlen of the data buffer up to the first NUL). **Symbols / numbers as text** -`string->symbol`, `symbol->string`, `number->string` (decimal only; -optional radix arg accepted but ignored), `string->number` (decimal -only; returns `#f` on parse failure). +`string->symbol`, `symbol->string`, `number->string` (decimal by +default; lowercase hex when the optional radix arg is `16`, with a +leading `-` for negatives; any other radix value falls back to +decimal), `string->number` (decimal by default; hex when radix is +`16`, accepting upper- or lowercase digits and an optional leading +`+`/`-`; returns `#f` on parse failure). **I/O and error** `display`, `write`, `format`, `error`. `format` understands `~a` -(display), `~s` (write), `~d` (decimal fixnum), `~%` (newline), -`~~` (literal tilde); unknown directives pass through verbatim. -`error` writes `scheme1: error: <msg> <irritants…>` to stderr and -exits with status 1. +(display), `~s` (write), `~d` (decimal fixnum), `~x` (lowercase hex +fixnum, signed: leading `-` for negatives), `~%` (newline), `~~` +(literal tilde); unknown directives pass through verbatim. `error` +writes `scheme1: error: <msg> <irritants…>` to stderr and exits with +status 1. **EOF** -`eof-object`, `eof-object?`. +`eof` (the singleton, bound at startup), `eof?`. **Multiple values** `values`, `call-with-values`. `(values x)` is identical to `x` in @@ -253,8 +256,8 @@ convention) when failure needs to be observable. - `open-input`, `open-output`, `open-append`, `close`, `file-exists?`. - Buffered reads: `read-bytes`, `read-line`, `read-all`. Each - returns either `(#t . value)` (where `value` may be the - eof-object) or `(#f . errno)` from the underlying syscall. + returns either `(#t . value)` (where `value` may be `eof`) or + `(#f . errno)` from the underlying syscall. - Unbuffered writes: `write-bytes`, `write-string`, `write-line`. Writes loop until the requested length is delivered or a syscall error surfaces. diff --git a/scheme1/prelude.scm b/scheme1/prelude.scm @@ -511,7 +511,7 @@ (cond ((not (car r)) r) ((zero? (cdr r)) - (cons #t (if (zero? i) (eof-object) (bytevector-copy out 0 i)))) + (cons #t (if (zero? i) eof (bytevector-copy out 0 i)))) (else (loop i))))))))) (define (read-line p) @@ -536,7 +536,7 @@ (cond ((not (car r)) r) ((zero? (cdr r)) - (cons #t (if (null? acc) (eof-object) (bv-concat-reverse acc)))) + (cons #t (if (null? acc) eof (bv-concat-reverse acc)))) (else (loop acc)))))))) (define (read-all p) diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -44,7 +44,7 @@ %macro SYMTAB_CAP_SLOTS() 8192 %endm %macro READBUF_CAP_BYTES() 1048576 %endm %macro HEAP_CAP_BYTES() 0x10000000 %endm -%macro SCRATCH_CAP_BYTES() 0x1000000 %endm +%macro SCRATCH_CAP_BYTES() 0x8000000 %endm # ========================================================================= # Tag idioms @@ -221,6 +221,15 @@ %call(&intern_special_forms) %call(&register_primitives) + # Bind `eof` as a direct global -> IMM.EOF value. (Predicate is `eof?`, + # registered via prim_table.) Cheaper and shorter than a 0-arg thunk. + %la(a0, &name_eof) + %li(a1, 3) + %call(&intern) + %untag_sym(a0, a0) + %li(a1, %imm_val(%IMM.EOF)) + %call(&sym_set_global) + # load_source(argv[1]) %ld_global(a0, &saved_argv) %ld(a0, a0, 8) @@ -3448,43 +3457,115 @@ %ldl(a0, bv) }) -# (number->string n [radix]) -- decimal repr in a fresh bv. The radix -# arg is part of the surface per LISP.md (10 and 16 required) but the -# implementation is decimal-only for now: the second arg, if present, -# is silently ignored. str_putint takes the raw value, so untag first; -# str_alloc(0) gives an empty NUL-terminated wrapper that str_putint -# grows in place. +0 holds the raw value across str_alloc. +# (number->string n [radix]) -- fresh bv with the integer's text form. +# Radix 16 selects str_puthex (lowercase, leading '-' for negatives); +# any other radix (or omitted) selects decimal. str_alloc(0) gives an +# empty NUL-terminated wrapper that the str_put* helper grows in place. -%fn2(prim_number_to_string_entry, {value pad}, { +%fn2(prim_number_to_string_entry, {value radix}, { %car(t0, a0) %sari(t0, t0, 3) ; raw value %stl(t0, value) + + # Default radix = 10. If a second arg is present, untag it. + %li(t0, 10) + %stl(t0, radix) + %cdr(t1, a0) + %if_nil(t0, t1, &::have_radix) + %car(t0, t1) + %sari(t0, t0, 3) + %stl(t0, radix) + ::have_radix + %li(a0, 0) %call(&str_alloc) %ldl(a1, value) + %ldl(t0, radix) + %li(t1, 16) + %beq(t0, t1, &::hex) %tail(&str_putint) + ::hex + %tail(&str_puthex) }) -# (string->number bv [radix]) -- delegate parsing to parse_dec. Returns -# #f on non-bytevector input, empty string, lone "-", or any non-digit -# byte. Radix arg is part of the surface per LISP.md (10 and 16 required) -# but ignored for now -- decimal only. -%fn(prim_string_to_number_entry, 0, { - %car(a0, a0) - %tagof(t0, a0) +# (string->number bv [radix]) -- decimal goes through parse_dec; radix +# 16 strips an optional leading '-' and calls parse_hex over the +# remainder, demanding it consume every byte. Returns #f on +# non-bytevector input, empty string, lone "-", or any non-recognized +# byte. Other radices are not pinned by LISP.md and currently fall +# through to the decimal path. +%fn2(prim_string_to_number_entry, {args ptr len sign}, { + %stl(a0, args) + + %car(t2, a0) + %tagof(t0, t2) %li(t1, %TAG.HEAP) %bne(t0, t1, &::fail) - %hdr_type(t0, a0) + %hdr_type(t0, t2) %li(t1, %HDR.BV) %bne(t0, t1, &::fail) - %heap_ld(t0, a0, %BV.data) - %heap_ld(t1, a0, %BV.hdr) + %heap_ld(t0, t2, %BV.data) + %heap_ld(t1, t2, %BV.hdr) %shri(t1, t1, 8) ; length - %mov(a0, t0) - %mov(a1, t1) + %stl(t0, ptr) + %stl(t1, len) + + # Inspect the optional radix arg. + %ldl(t0, args) + %cdr(t0, t0) + %if_nil(t1, t0, &::dec) + %car(t1, t0) + %sari(t1, t1, 3) + %li(t2, 16) + %beq(t1, t2, &::hex) + + ::dec + %ldl(a0, ptr) + %ldl(a1, len) %call(&parse_dec) ; -> (a0=value, a1=ok) %bnez(a1, &::end) + %b(&::fail) + + ::hex + # Strip optional leading '+' / '-'. + %li(t0, 0) + %stl(t0, sign) + %ldl(t0, len) + %beqz(t0, &::fail) + %ldl(t1, ptr) + %lb(t2, t1, 0) + %addi(t0, t2, -45) ; '-' + %beqz(t0, &::hex_neg) + %addi(t0, t2, -43) ; '+' + %beqz(t0, &::hex_skip_sign) + %b(&::hex_parse) + ::hex_neg + %li(t0, 1) + %stl(t0, sign) + ::hex_skip_sign + %ldl(t0, ptr) + %addi(t0, t0, 1) + %stl(t0, ptr) + %ldl(t0, len) + %addi(t0, t0, -1) + %stl(t0, len) + %beqz(t0, &::fail) + + ::hex_parse + %ldl(a0, ptr) + %ldl(a1, len) + %call(&parse_hex) ; -> (a0=value, a1=consumed) + %ldl(t0, len) + %bne(a1, t0, &::fail) ; demand full consumption + %ldl(t0, sign) + %beqz(t0, &::hex_pos) + %li(t1, 0) + %sub(a0, t1, a0) + ::hex_pos + %shli(a0, a0, 3) ; mkfix + %b(&::end) + ::fail %li(a0, %imm_val(%IMM.FALSE)) ::end @@ -4890,6 +4971,37 @@ %tail(&str_putn) }) +# str_puthex(bv=a0, value=a1) -> bv (a0). Signed hex: emits a leading +# '-' for negatives, then unsigned hex of |value| via fmt_hex. The bv +# wrapper pointer is stable across str_putc / str_putn (only the +# internal data buffer can move), so we reload it from the local. +%fn2(str_puthex, {bv value}, { + %stl(a0, bv) + %stl(a1, value) + + %bltz(a1, &::neg) + %b(&::pos) + + ::neg + %ldl(a0, bv) + %li(a1, 45) ; '-' + %call(&str_putc) + %ldl(t0, value) + %li(t1, 0) + %sub(t0, t1, t0) + %stl(t0, value) + + ::pos + %la(a0, &writer_num_buf) + %ldl(a1, value) + %call(&fmt_hex) ; n_bytes (a0) + + %mov(a2, a0) + %la(a1, &writer_num_buf) + %ldl(a0, bv) + %tail(&str_putn) +}) + # sym_name(idx=a0) -> (ptr=a0, len=a1). Leaf. idx is the untagged sym # slot index; both fields come straight out of the symtab entry. :sym_name @@ -5229,9 +5341,10 @@ # (format template-bv arg ...). Walks the template bv byte by byte; # `~X` consumes the next byte as a directive: a (display), s (write), -# d (decimal fixnum), % (newline), ~ (literal tilde). Unknown specs -# pass through verbatim. Returns the assembled bv; the caller decides -# how to consume it (e.g. (display (format ...))). +# d (decimal fixnum), x (lowercase hex fixnum, signed), % (newline), +# ~ (literal tilde). Unknown specs pass through verbatim. Returns the +# assembled bv; the caller decides how to consume it (e.g. +# (display (format ...))). # # Locals: # out bv @@ -5298,6 +5411,8 @@ %beqz(t1, &::spec_s) %addi(t1, a3, -100) ; 'd' %beqz(t1, &::spec_d) + %addi(t1, a3, -120) ; 'x' + %beqz(t1, &::spec_x) %addi(t1, a3, -37) ; '%' %beqz(t1, &::spec_pct) %addi(t1, a3, -126) ; '~' @@ -5358,6 +5473,16 @@ %call(&str_putint) %b(&::loop) + ::spec_x + %ldl(t0, args) + %car(t1, t0) + %cdr(t0, t0) + %stl(t0, args) + %sari(a1, t1, 3) + %ldl(a0, out) + %call(&str_puthex) + %b(&::loop) + ::spec_pct %ldl(a0, out) %li(a1, 10) @@ -5653,13 +5778,10 @@ %ldl(a0, head) }) -# (eof-object) and (eof-object? x). -:prim_eof_object_entry - %li(a0, %imm_val(%IMM.EOF)) - %ret - -:prim_eof_objectq_entry -%scope prim_eof_objectq +# (eof? x). The `eof` value itself is bound at startup in p1_main as a +# direct global -> IMM.EOF, not via a primitive thunk. +:prim_eofq_entry +%scope prim_eofq %car(t0, a0) %li(t1, %imm_val(%IMM.EOF)) %li(a0, %imm_val(%IMM.FALSE)) @@ -5897,8 +6019,8 @@ :name_sys_execve "sys-execve" '0000000000' :name_sys_waitid "sys-waitid" '0000000000' :name_sys_argv "sys-argv" '00000000000000' -:name_eof_object "eof-object" '0000000000' -:name_eof_objectq "eof-object?" '00000000' +:name_eof "eof" '00000000' +:name_eofq "eof?" '000000' :name_values "values" '00' :name_call_with_values "call-with-values" '00000000000000' :name_display "display" @@ -5986,8 +6108,7 @@ &name_sys_execve %(0) $(10) &prim_sys_execve_entry %(0) &name_sys_waitid %(0) $(10) &prim_sys_waitid_entry %(0) &name_sys_argv %(0) $(8) &prim_sys_argv_entry %(0) -&name_eof_object %(0) $(10) &prim_eof_object_entry %(0) -&name_eof_objectq %(0) $(11) &prim_eof_objectq_entry %(0) +&name_eofq %(0) $(4) &prim_eofq_entry %(0) &name_display %(0) $(7) &prim_display_entry %(0) &name_write %(0) $(5) &prim_write_entry %(0) &name_error %(0) $(5) &prim_error_entry %(0) diff --git a/tests/scheme1/039-eof.scm b/tests/scheme1/039-eof.scm @@ -1,4 +1,4 @@ -; eof-object returns the EOF singleton; eof-object? distinguishes it. -(sys-exit (if (eof-object? (eof-object)) - (if (eof-object? '()) 1 22) +; eof is the EOF singleton; eof? distinguishes it. +(sys-exit (if (eof? eof) + (if (eof? '()) 1 22) 2)) diff --git a/tests/scheme1/067-read-line.scm b/tests/scheme1/067-read-line.scm @@ -19,6 +19,6 @@ (if (and (car l1) (bytevector=? (cdr l1) "alpha") (car l2) (bytevector=? (cdr l2) "beta") (car l3) (bytevector=? (cdr l3) "gamma") - (car l4) (eof-object? (cdr l4))) + (car l4) (eof? (cdr l4))) (exit 0) (exit 1)) diff --git a/tests/scheme1/116-hex-conv.expected-exit b/tests/scheme1/116-hex-conv.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/scheme1/116-hex-conv.scm b/tests/scheme1/116-hex-conv.scm @@ -0,0 +1,73 @@ +; Hex support for number->string, string->number, and format ~x. +; Hex digits a-f are lowercase. + +(define (eq-bv a b) (bytevector=? a b)) + +; ---- number->string with radix 16 ---- + +(if (eq-bv "0" (number->string 0 16)) 0 (sys-exit 1)) +(if (eq-bv "1" (number->string 1 16)) 0 (sys-exit 2)) +(if (eq-bv "a" (number->string 10 16)) 0 (sys-exit 3)) +(if (eq-bv "f" (number->string 15 16)) 0 (sys-exit 4)) +(if (eq-bv "10" (number->string 16 16)) 0 (sys-exit 5)) +(if (eq-bv "ff" (number->string 255 16)) 0 (sys-exit 6)) +(if (eq-bv "100" (number->string 256 16)) 0 (sys-exit 7)) +(if (eq-bv "deadbeef" (number->string #xdeadbeef 16)) 0 (sys-exit 8)) +(if (eq-bv "-1" (number->string -1 16)) 0 (sys-exit 9)) +(if (eq-bv "-ff" (number->string -255 16)) 0 (sys-exit 10)) + +; Radix 10 still works (regression). +(if (eq-bv "42" (number->string 42 10)) 0 (sys-exit 11)) +(if (eq-bv "-7" (number->string -7 10)) 0 (sys-exit 12)) + +; ---- string->number with radix 16 ---- + +(if (= 0 (string->number "0" 16)) 0 (sys-exit 20)) +(if (= 1 (string->number "1" 16)) 0 (sys-exit 21)) +(if (= 15 (string->number "f" 16)) 0 (sys-exit 22)) +(if (= 15 (string->number "F" 16)) 0 (sys-exit 23)) +(if (= 16 (string->number "10" 16)) 0 (sys-exit 24)) +(if (= 255 (string->number "ff" 16)) 0 (sys-exit 25)) +(if (= 255 (string->number "FF" 16)) 0 (sys-exit 26)) +(if (= #xdeadbeef (string->number "deadbeef" 16)) 0 (sys-exit 27)) +(if (= -1 (string->number "-1" 16)) 0 (sys-exit 28)) +(if (= -255 (string->number "-ff" 16)) 0 (sys-exit 29)) + +; Failure cases for hex parse. +(if (not (string->number "" 16)) 0 (sys-exit 30)) +(if (not (string->number "-" 16)) 0 (sys-exit 31)) +(if (not (string->number "g" 16)) 0 (sys-exit 32)) +(if (not (string->number "1g" 16)) 0 (sys-exit 33)) +(if (not (string->number "1.5" 16)) 0 (sys-exit 34)) + +; Radix 10 still works (regression). +(if (= 42 (string->number "42" 10)) 0 (sys-exit 35)) +(if (= -7 (string->number "-7" 10)) 0 (sys-exit 36)) + +; ---- Round trip ---- + +(if (= #xabcd (string->number (number->string #xabcd 16) 16)) 0 (sys-exit 40)) +(if (= -255 (string->number (number->string -255 16) 16)) 0 (sys-exit 41)) + +; ---- format ~x ---- + +(define (bv-from bs) + (let ((bv (make-bytevector (length bs) 0))) + (let loop ((i 0) (xs bs)) + (if (null? xs) bv + (begin (bytevector-u8-set! bv i (car xs)) + (loop (+ i 1) (cdr xs))))))) + +; Template "~x" -> byte sequence (126 120) +(if (eq-bv "0" (format (bv-from '(126 120)) 0)) 0 (sys-exit 50)) +(if (eq-bv "ff" (format (bv-from '(126 120)) 255)) 0 (sys-exit 51)) +(if (eq-bv "-1" (format (bv-from '(126 120)) -1)) 0 (sys-exit 52)) +(if (eq-bv "deadbeef" (format (bv-from '(126 120)) #xdeadbeef)) 0 (sys-exit 53)) + +; Template "~x ~d ~x" -> mix dec and hex +; "~x ~d ~x" = (126 120 32 126 100 32 126 120) +(if (eq-bv "ff 16 10" + (format (bv-from '(126 120 32 126 100 32 126 120)) 255 16 16)) + 0 (sys-exit 54)) + +(sys-exit 0)