commit 32f98055288f0ef2591cf9cd95f03eab81f761cc
parent 0e0688006486288d9b30757cc807f016a71b2e6f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 28 Apr 2026 10:56:59 -0700
scheme1: hex number<->string and format, rename eof-object to eof
Diffstat:
7 files changed, 251 insertions(+), 53 deletions(-)
diff --git a/docs/SCHEME1.md b/docs/SCHEME1.md
@@ -55,7 +55,7 @@ The runtime knows exactly:
| empty list | `'()`, disjoint from pair |
| procedure | closure or primitive |
| record | via `define-record-type` |
-| eof-object | singleton; produced by `(eof-object)` and EOF reads |
+| eof-object | singleton; bound at top level as `eof`; also returned on EOF reads |
| unspecified | singleton; result of `set!`, `define`, `(if #f x)`, etc. |
Multiple-values packs flow through `values` / `call-with-values` / `let-values`
@@ -140,8 +140,7 @@ these.
**Equality / predicates**
`eq?`, `equal?`, `not`, `null?`, `pair?`, `boolean?`, `integer?`,
-`symbol?`, `string?` (≡ `bytevector?`), `procedure?`, `zero?`,
-`eof-object?`.
+`symbol?`, `string?` (≡ `bytevector?`), `procedure?`, `zero?`, `eof?`.
**Pairs**
`cons`, `car`, `cdr`, `set-car!`, `set-cdr!`, `length`, `list-ref`.
@@ -163,19 +162,23 @@ src-end`), `bytevector-append` (variadic), `bytevector=?`,
`string-length` (strlen of the data buffer up to the first NUL).
**Symbols / numbers as text**
-`string->symbol`, `symbol->string`, `number->string` (decimal only;
-optional radix arg accepted but ignored), `string->number` (decimal
-only; returns `#f` on parse failure).
+`string->symbol`, `symbol->string`, `number->string` (decimal by
+default; lowercase hex when the optional radix arg is `16`, with a
+leading `-` for negatives; any other radix value falls back to
+decimal), `string->number` (decimal by default; hex when radix is
+`16`, accepting upper- or lowercase digits and an optional leading
+`+`/`-`; returns `#f` on parse failure).
**I/O and error**
`display`, `write`, `format`, `error`. `format` understands `~a`
-(display), `~s` (write), `~d` (decimal fixnum), `~%` (newline),
-`~~` (literal tilde); unknown directives pass through verbatim.
-`error` writes `scheme1: error: <msg> <irritants…>` to stderr and
-exits with status 1.
+(display), `~s` (write), `~d` (decimal fixnum), `~x` (lowercase hex
+fixnum, signed: leading `-` for negatives), `~%` (newline), `~~`
+(literal tilde); unknown directives pass through verbatim. `error`
+writes `scheme1: error: <msg> <irritants…>` to stderr and exits with
+status 1.
**EOF**
-`eof-object`, `eof-object?`.
+`eof` (the singleton, bound at startup), `eof?`.
**Multiple values**
`values`, `call-with-values`. `(values x)` is identical to `x` in
@@ -253,8 +256,8 @@ convention) when failure needs to be observable.
- `open-input`, `open-output`, `open-append`, `close`,
`file-exists?`.
- Buffered reads: `read-bytes`, `read-line`, `read-all`. Each
- returns either `(#t . value)` (where `value` may be the
- eof-object) or `(#f . errno)` from the underlying syscall.
+ returns either `(#t . value)` (where `value` may be `eof`) or
+ `(#f . errno)` from the underlying syscall.
- Unbuffered writes: `write-bytes`, `write-string`, `write-line`.
Writes loop until the requested length is delivered or a syscall
error surfaces.
diff --git a/scheme1/prelude.scm b/scheme1/prelude.scm
@@ -511,7 +511,7 @@
(cond
((not (car r)) r)
((zero? (cdr r))
- (cons #t (if (zero? i) (eof-object) (bytevector-copy out 0 i))))
+ (cons #t (if (zero? i) eof (bytevector-copy out 0 i))))
(else (loop i)))))))))
(define (read-line p)
@@ -536,7 +536,7 @@
(cond
((not (car r)) r)
((zero? (cdr r))
- (cons #t (if (null? acc) (eof-object) (bv-concat-reverse acc))))
+ (cons #t (if (null? acc) eof (bv-concat-reverse acc))))
(else (loop acc))))))))
(define (read-all p)
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -44,7 +44,7 @@
%macro SYMTAB_CAP_SLOTS() 8192 %endm
%macro READBUF_CAP_BYTES() 1048576 %endm
%macro HEAP_CAP_BYTES() 0x10000000 %endm
-%macro SCRATCH_CAP_BYTES() 0x1000000 %endm
+%macro SCRATCH_CAP_BYTES() 0x8000000 %endm
# =========================================================================
# Tag idioms
@@ -221,6 +221,15 @@
%call(&intern_special_forms)
%call(®ister_primitives)
+ # Bind `eof` as a direct global -> IMM.EOF value. (Predicate is `eof?`,
+ # registered via prim_table.) Cheaper and shorter than a 0-arg thunk.
+ %la(a0, &name_eof)
+ %li(a1, 3)
+ %call(&intern)
+ %untag_sym(a0, a0)
+ %li(a1, %imm_val(%IMM.EOF))
+ %call(&sym_set_global)
+
# load_source(argv[1])
%ld_global(a0, &saved_argv)
%ld(a0, a0, 8)
@@ -3448,43 +3457,115 @@
%ldl(a0, bv)
})
-# (number->string n [radix]) -- decimal repr in a fresh bv. The radix
-# arg is part of the surface per LISP.md (10 and 16 required) but the
-# implementation is decimal-only for now: the second arg, if present,
-# is silently ignored. str_putint takes the raw value, so untag first;
-# str_alloc(0) gives an empty NUL-terminated wrapper that str_putint
-# grows in place. +0 holds the raw value across str_alloc.
+# (number->string n [radix]) -- fresh bv with the integer's text form.
+# Radix 16 selects str_puthex (lowercase, leading '-' for negatives);
+# any other radix (or omitted) selects decimal. str_alloc(0) gives an
+# empty NUL-terminated wrapper that the str_put* helper grows in place.
-%fn2(prim_number_to_string_entry, {value pad}, {
+%fn2(prim_number_to_string_entry, {value radix}, {
%car(t0, a0)
%sari(t0, t0, 3) ; raw value
%stl(t0, value)
+
+ # Default radix = 10. If a second arg is present, untag it.
+ %li(t0, 10)
+ %stl(t0, radix)
+ %cdr(t1, a0)
+ %if_nil(t0, t1, &::have_radix)
+ %car(t0, t1)
+ %sari(t0, t0, 3)
+ %stl(t0, radix)
+ ::have_radix
+
%li(a0, 0)
%call(&str_alloc)
%ldl(a1, value)
+ %ldl(t0, radix)
+ %li(t1, 16)
+ %beq(t0, t1, &::hex)
%tail(&str_putint)
+ ::hex
+ %tail(&str_puthex)
})
-# (string->number bv [radix]) -- delegate parsing to parse_dec. Returns
-# #f on non-bytevector input, empty string, lone "-", or any non-digit
-# byte. Radix arg is part of the surface per LISP.md (10 and 16 required)
-# but ignored for now -- decimal only.
-%fn(prim_string_to_number_entry, 0, {
- %car(a0, a0)
- %tagof(t0, a0)
+# (string->number bv [radix]) -- decimal goes through parse_dec; radix
+# 16 strips an optional leading '-' and calls parse_hex over the
+# remainder, demanding it consume every byte. Returns #f on
+# non-bytevector input, empty string, lone "-", or any non-recognized
+# byte. Other radices are not pinned by LISP.md and currently fall
+# through to the decimal path.
+%fn2(prim_string_to_number_entry, {args ptr len sign}, {
+ %stl(a0, args)
+
+ %car(t2, a0)
+ %tagof(t0, t2)
%li(t1, %TAG.HEAP)
%bne(t0, t1, &::fail)
- %hdr_type(t0, a0)
+ %hdr_type(t0, t2)
%li(t1, %HDR.BV)
%bne(t0, t1, &::fail)
- %heap_ld(t0, a0, %BV.data)
- %heap_ld(t1, a0, %BV.hdr)
+ %heap_ld(t0, t2, %BV.data)
+ %heap_ld(t1, t2, %BV.hdr)
%shri(t1, t1, 8) ; length
- %mov(a0, t0)
- %mov(a1, t1)
+ %stl(t0, ptr)
+ %stl(t1, len)
+
+ # Inspect the optional radix arg.
+ %ldl(t0, args)
+ %cdr(t0, t0)
+ %if_nil(t1, t0, &::dec)
+ %car(t1, t0)
+ %sari(t1, t1, 3)
+ %li(t2, 16)
+ %beq(t1, t2, &::hex)
+
+ ::dec
+ %ldl(a0, ptr)
+ %ldl(a1, len)
%call(&parse_dec) ; -> (a0=value, a1=ok)
%bnez(a1, &::end)
+ %b(&::fail)
+
+ ::hex
+ # Strip optional leading '+' / '-'.
+ %li(t0, 0)
+ %stl(t0, sign)
+ %ldl(t0, len)
+ %beqz(t0, &::fail)
+ %ldl(t1, ptr)
+ %lb(t2, t1, 0)
+ %addi(t0, t2, -45) ; '-'
+ %beqz(t0, &::hex_neg)
+ %addi(t0, t2, -43) ; '+'
+ %beqz(t0, &::hex_skip_sign)
+ %b(&::hex_parse)
+ ::hex_neg
+ %li(t0, 1)
+ %stl(t0, sign)
+ ::hex_skip_sign
+ %ldl(t0, ptr)
+ %addi(t0, t0, 1)
+ %stl(t0, ptr)
+ %ldl(t0, len)
+ %addi(t0, t0, -1)
+ %stl(t0, len)
+ %beqz(t0, &::fail)
+
+ ::hex_parse
+ %ldl(a0, ptr)
+ %ldl(a1, len)
+ %call(&parse_hex) ; -> (a0=value, a1=consumed)
+ %ldl(t0, len)
+ %bne(a1, t0, &::fail) ; demand full consumption
+ %ldl(t0, sign)
+ %beqz(t0, &::hex_pos)
+ %li(t1, 0)
+ %sub(a0, t1, a0)
+ ::hex_pos
+ %shli(a0, a0, 3) ; mkfix
+ %b(&::end)
+
::fail
%li(a0, %imm_val(%IMM.FALSE))
::end
@@ -4890,6 +4971,37 @@
%tail(&str_putn)
})
+# str_puthex(bv=a0, value=a1) -> bv (a0). Signed hex: emits a leading
+# '-' for negatives, then unsigned hex of |value| via fmt_hex. The bv
+# wrapper pointer is stable across str_putc / str_putn (only the
+# internal data buffer can move), so we reload it from the local.
+%fn2(str_puthex, {bv value}, {
+ %stl(a0, bv)
+ %stl(a1, value)
+
+ %bltz(a1, &::neg)
+ %b(&::pos)
+
+ ::neg
+ %ldl(a0, bv)
+ %li(a1, 45) ; '-'
+ %call(&str_putc)
+ %ldl(t0, value)
+ %li(t1, 0)
+ %sub(t0, t1, t0)
+ %stl(t0, value)
+
+ ::pos
+ %la(a0, &writer_num_buf)
+ %ldl(a1, value)
+ %call(&fmt_hex) ; n_bytes (a0)
+
+ %mov(a2, a0)
+ %la(a1, &writer_num_buf)
+ %ldl(a0, bv)
+ %tail(&str_putn)
+})
+
# sym_name(idx=a0) -> (ptr=a0, len=a1). Leaf. idx is the untagged sym
# slot index; both fields come straight out of the symtab entry.
:sym_name
@@ -5229,9 +5341,10 @@
# (format template-bv arg ...). Walks the template bv byte by byte;
# `~X` consumes the next byte as a directive: a (display), s (write),
-# d (decimal fixnum), % (newline), ~ (literal tilde). Unknown specs
-# pass through verbatim. Returns the assembled bv; the caller decides
-# how to consume it (e.g. (display (format ...))).
+# d (decimal fixnum), x (lowercase hex fixnum, signed), % (newline),
+# ~ (literal tilde). Unknown specs pass through verbatim. Returns the
+# assembled bv; the caller decides how to consume it (e.g.
+# (display (format ...))).
#
# Locals:
# out bv
@@ -5298,6 +5411,8 @@
%beqz(t1, &::spec_s)
%addi(t1, a3, -100) ; 'd'
%beqz(t1, &::spec_d)
+ %addi(t1, a3, -120) ; 'x'
+ %beqz(t1, &::spec_x)
%addi(t1, a3, -37) ; '%'
%beqz(t1, &::spec_pct)
%addi(t1, a3, -126) ; '~'
@@ -5358,6 +5473,16 @@
%call(&str_putint)
%b(&::loop)
+ ::spec_x
+ %ldl(t0, args)
+ %car(t1, t0)
+ %cdr(t0, t0)
+ %stl(t0, args)
+ %sari(a1, t1, 3)
+ %ldl(a0, out)
+ %call(&str_puthex)
+ %b(&::loop)
+
::spec_pct
%ldl(a0, out)
%li(a1, 10)
@@ -5653,13 +5778,10 @@
%ldl(a0, head)
})
-# (eof-object) and (eof-object? x).
-:prim_eof_object_entry
- %li(a0, %imm_val(%IMM.EOF))
- %ret
-
-:prim_eof_objectq_entry
-%scope prim_eof_objectq
+# (eof? x). The `eof` value itself is bound at startup in p1_main as a
+# direct global -> IMM.EOF, not via a primitive thunk.
+:prim_eofq_entry
+%scope prim_eofq
%car(t0, a0)
%li(t1, %imm_val(%IMM.EOF))
%li(a0, %imm_val(%IMM.FALSE))
@@ -5897,8 +6019,8 @@
:name_sys_execve "sys-execve" '0000000000'
:name_sys_waitid "sys-waitid" '0000000000'
:name_sys_argv "sys-argv" '00000000000000'
-:name_eof_object "eof-object" '0000000000'
-:name_eof_objectq "eof-object?" '00000000'
+:name_eof "eof" '00000000'
+:name_eofq "eof?" '000000'
:name_values "values" '00'
:name_call_with_values "call-with-values" '00000000000000'
:name_display "display"
@@ -5986,8 +6108,7 @@
&name_sys_execve %(0) $(10) &prim_sys_execve_entry %(0)
&name_sys_waitid %(0) $(10) &prim_sys_waitid_entry %(0)
&name_sys_argv %(0) $(8) &prim_sys_argv_entry %(0)
-&name_eof_object %(0) $(10) &prim_eof_object_entry %(0)
-&name_eof_objectq %(0) $(11) &prim_eof_objectq_entry %(0)
+&name_eofq %(0) $(4) &prim_eofq_entry %(0)
&name_display %(0) $(7) &prim_display_entry %(0)
&name_write %(0) $(5) &prim_write_entry %(0)
&name_error %(0) $(5) &prim_error_entry %(0)
diff --git a/tests/scheme1/039-eof.scm b/tests/scheme1/039-eof.scm
@@ -1,4 +1,4 @@
-; eof-object returns the EOF singleton; eof-object? distinguishes it.
-(sys-exit (if (eof-object? (eof-object))
- (if (eof-object? '()) 1 22)
+; eof is the EOF singleton; eof? distinguishes it.
+(sys-exit (if (eof? eof)
+ (if (eof? '()) 1 22)
2))
diff --git a/tests/scheme1/067-read-line.scm b/tests/scheme1/067-read-line.scm
@@ -19,6 +19,6 @@
(if (and (car l1) (bytevector=? (cdr l1) "alpha")
(car l2) (bytevector=? (cdr l2) "beta")
(car l3) (bytevector=? (cdr l3) "gamma")
- (car l4) (eof-object? (cdr l4)))
+ (car l4) (eof? (cdr l4)))
(exit 0)
(exit 1))
diff --git a/tests/scheme1/116-hex-conv.expected-exit b/tests/scheme1/116-hex-conv.expected-exit
@@ -0,0 +1 @@
+0
diff --git a/tests/scheme1/116-hex-conv.scm b/tests/scheme1/116-hex-conv.scm
@@ -0,0 +1,73 @@
+; Hex support for number->string, string->number, and format ~x.
+; Hex digits a-f are lowercase.
+
+(define (eq-bv a b) (bytevector=? a b))
+
+; ---- number->string with radix 16 ----
+
+(if (eq-bv "0" (number->string 0 16)) 0 (sys-exit 1))
+(if (eq-bv "1" (number->string 1 16)) 0 (sys-exit 2))
+(if (eq-bv "a" (number->string 10 16)) 0 (sys-exit 3))
+(if (eq-bv "f" (number->string 15 16)) 0 (sys-exit 4))
+(if (eq-bv "10" (number->string 16 16)) 0 (sys-exit 5))
+(if (eq-bv "ff" (number->string 255 16)) 0 (sys-exit 6))
+(if (eq-bv "100" (number->string 256 16)) 0 (sys-exit 7))
+(if (eq-bv "deadbeef" (number->string #xdeadbeef 16)) 0 (sys-exit 8))
+(if (eq-bv "-1" (number->string -1 16)) 0 (sys-exit 9))
+(if (eq-bv "-ff" (number->string -255 16)) 0 (sys-exit 10))
+
+; Radix 10 still works (regression).
+(if (eq-bv "42" (number->string 42 10)) 0 (sys-exit 11))
+(if (eq-bv "-7" (number->string -7 10)) 0 (sys-exit 12))
+
+; ---- string->number with radix 16 ----
+
+(if (= 0 (string->number "0" 16)) 0 (sys-exit 20))
+(if (= 1 (string->number "1" 16)) 0 (sys-exit 21))
+(if (= 15 (string->number "f" 16)) 0 (sys-exit 22))
+(if (= 15 (string->number "F" 16)) 0 (sys-exit 23))
+(if (= 16 (string->number "10" 16)) 0 (sys-exit 24))
+(if (= 255 (string->number "ff" 16)) 0 (sys-exit 25))
+(if (= 255 (string->number "FF" 16)) 0 (sys-exit 26))
+(if (= #xdeadbeef (string->number "deadbeef" 16)) 0 (sys-exit 27))
+(if (= -1 (string->number "-1" 16)) 0 (sys-exit 28))
+(if (= -255 (string->number "-ff" 16)) 0 (sys-exit 29))
+
+; Failure cases for hex parse.
+(if (not (string->number "" 16)) 0 (sys-exit 30))
+(if (not (string->number "-" 16)) 0 (sys-exit 31))
+(if (not (string->number "g" 16)) 0 (sys-exit 32))
+(if (not (string->number "1g" 16)) 0 (sys-exit 33))
+(if (not (string->number "1.5" 16)) 0 (sys-exit 34))
+
+; Radix 10 still works (regression).
+(if (= 42 (string->number "42" 10)) 0 (sys-exit 35))
+(if (= -7 (string->number "-7" 10)) 0 (sys-exit 36))
+
+; ---- Round trip ----
+
+(if (= #xabcd (string->number (number->string #xabcd 16) 16)) 0 (sys-exit 40))
+(if (= -255 (string->number (number->string -255 16) 16)) 0 (sys-exit 41))
+
+; ---- format ~x ----
+
+(define (bv-from bs)
+ (let ((bv (make-bytevector (length bs) 0)))
+ (let loop ((i 0) (xs bs))
+ (if (null? xs) bv
+ (begin (bytevector-u8-set! bv i (car xs))
+ (loop (+ i 1) (cdr xs)))))))
+
+; Template "~x" -> byte sequence (126 120)
+(if (eq-bv "0" (format (bv-from '(126 120)) 0)) 0 (sys-exit 50))
+(if (eq-bv "ff" (format (bv-from '(126 120)) 255)) 0 (sys-exit 51))
+(if (eq-bv "-1" (format (bv-from '(126 120)) -1)) 0 (sys-exit 52))
+(if (eq-bv "deadbeef" (format (bv-from '(126 120)) #xdeadbeef)) 0 (sys-exit 53))
+
+; Template "~x ~d ~x" -> mix dec and hex
+; "~x ~d ~x" = (126 120 32 126 100 32 126 120)
+(if (eq-bv "ff 16 10"
+ (format (bv-from '(126 120 32 126 100 32 126 120)) 255 16 16))
+ 0 (sys-exit 54))
+
+(sys-exit 0)