boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 8a0f5873757ecdbf47038906555141d5351145e4
parent 48cc13f8851f4264f2459311c53108e6b62fd5e0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 25 Apr 2026 15:34:11 -0700

scheme1: tests for hex string escapes, signed-int literals, and strict reader

53 grows inline-hex cases (\xNN; with mixed digits, max byte, leading
zeros). 75 covers +N/-N decimal literals and lone +/- as identifiers.
76 and 77 are red-path tests asserting a numeric-prefixed token with
trailing junk and a non-ident byte each abort with the new reader
diagnostics.

Diffstat:
Mtests/scheme1/53-string-escapes.scm | 28+++++++++++++++++++++++++++-
Atests/scheme1/75-signed-int.scm | 12++++++++++++
Atests/scheme1/76-bad-int-trailing.expected | 1+
Atests/scheme1/76-bad-int-trailing.expected-exit | 1+
Atests/scheme1/76-bad-int-trailing.scm | 5+++++
Atests/scheme1/77-bad-ident-char.expected | 1+
Atests/scheme1/77-bad-ident-char.expected-exit | 1+
Atests/scheme1/77-bad-ident-char.scm | 6++++++
8 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/tests/scheme1/53-string-escapes.scm b/tests/scheme1/53-string-escapes.scm @@ -1,4 +1,6 @@ -; All five string escapes: \n \t \r \\ \" — each yields one byte. +; All five named string escapes plus inline-hex escapes \xNN;. +; +; Named escapes (\n \t \r \\ \") each yield one byte: ; "x\ny\tz\r\\\"" -> 'x', LF, 'y', TAB, 'z', CR, '\\', '"' (8 bytes) (define s "x\ny\tz\r\\\"") (if (= (bytevector-length s) 8) 0 (sys-exit 1)) @@ -10,4 +12,28 @@ (if (= (bytevector-u8-ref s 5) 13) 0 (sys-exit 7)) ; \r (if (= (bytevector-u8-ref s 6) 92) 0 (sys-exit 8)) ; \\ (if (= (bytevector-u8-ref s 7) 34) 0 (sys-exit 9)) ; \" + +; Inline-hex escape \xHEX;: 1+ hex digits, terminated by ';', value 0..255. +; "\x41;" -> "A" (one byte, 0x41). +(define h1 "\x41;") +(if (= (bytevector-length h1) 1) 0 (sys-exit 10)) +(if (= (bytevector-u8-ref h1 0) 65) 0 (sys-exit 11)) + +; Mixed: ordinary bytes interleaved with hex escapes; lower/upper-case +; hex digits; leading-zero short form; max byte 0xFF. +; "a\x42;c\xff;\x0;" -> 'a', 'B', 'c', 0xff, 0x00 (5 bytes) +(define h2 "a\x42;c\xff;\x0;") +(if (= (bytevector-length h2) 5) 0 (sys-exit 12)) +(if (= (bytevector-u8-ref h2 0) 97) 0 (sys-exit 13)) ; 'a' +(if (= (bytevector-u8-ref h2 1) 66) 0 (sys-exit 14)) ; 'B' +(if (= (bytevector-u8-ref h2 2) 99) 0 (sys-exit 15)) ; 'c' +(if (= (bytevector-u8-ref h2 3) 255) 0 (sys-exit 16)) ; 0xff +(if (= (bytevector-u8-ref h2 4) 0) 0 (sys-exit 17)) ; 0x00 + +; Multi-digit form (still byte-sized) and uppercase X is NOT accepted — +; only lowercase \x; this matches R7RS-Small literal syntax. +(define h3 "\x0041;") ; leading zeros ok +(if (= (bytevector-length h3) 1) 0 (sys-exit 18)) +(if (= (bytevector-u8-ref h3 0) 65) 0 (sys-exit 19)) + (sys-exit 0) diff --git a/tests/scheme1/75-signed-int.scm b/tests/scheme1/75-signed-int.scm @@ -0,0 +1,12 @@ +; Decimal int literals with explicit sign. The reader treats `+`/`-` +; followed by a digit as a numeric prefix; lone `+` and `-` remain +; valid identifiers (they are bound to the arithmetic primitives). +(if (= +0 0) 0 (sys-exit 1)) +(if (= +7 7) 0 (sys-exit 2)) +(if (= -7 -7) 0 (sys-exit 3)) +(if (= +42 42) 0 (sys-exit 4)) +(if (= -42 (- 0 42)) 0 (sys-exit 5)) +; Lone `+` / `-` are still the arithmetic primitives. +(if (= (+ 3 4) 7) 0 (sys-exit 6)) +(if (= (- 9 2) 7) 0 (sys-exit 7)) +(sys-exit 0) diff --git a/tests/scheme1/76-bad-int-trailing.expected b/tests/scheme1/76-bad-int-trailing.expected @@ -0,0 +1 @@ +scheme1: bad number literal diff --git a/tests/scheme1/76-bad-int-trailing.expected-exit b/tests/scheme1/76-bad-int-trailing.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/scheme1/76-bad-int-trailing.scm b/tests/scheme1/76-bad-int-trailing.scm @@ -0,0 +1,5 @@ +; A token whose first byte commits it to the integer parse path +; (digit, or +/- followed by a digit) must consist of only digits +; after the optional sign. Trailing junk like `+7abc` is a hard error. +(define x +7abc) +(sys-exit 0) diff --git a/tests/scheme1/77-bad-ident-char.expected b/tests/scheme1/77-bad-ident-char.expected @@ -0,0 +1 @@ +scheme1: bad identifier diff --git a/tests/scheme1/77-bad-ident-char.expected-exit b/tests/scheme1/77-bad-ident-char.expected-exit @@ -0,0 +1 @@ +1 diff --git a/tests/scheme1/77-bad-ident-char.scm b/tests/scheme1/77-bad-ident-char.scm @@ -0,0 +1,6 @@ +; Identifiers may contain only letters, digits, and the R7RS-Small +; extended chars (! $ % & * + - . / : < = > ? @ ^ _ ~). A backslash +; inside a token (or any other byte outside that set) is rejected by +; the reader rather than silently absorbed into the symbol's name. +(define foo\bar 7) +(sys-exit 0)