commit 8a0f5873757ecdbf47038906555141d5351145e4
parent 48cc13f8851f4264f2459311c53108e6b62fd5e0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 25 Apr 2026 15:34:11 -0700
scheme1: tests for hex string escapes, signed-int literals, and strict reader
53 grows inline-hex cases (\xNN; with mixed digits, max byte, leading
zeros). 75 covers +N/-N decimal literals and lone +/- as identifiers.
76 and 77 are red-path tests asserting a numeric-prefixed token with
trailing junk and a non-ident byte each abort with the new reader
diagnostics.
Diffstat:
8 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/tests/scheme1/53-string-escapes.scm b/tests/scheme1/53-string-escapes.scm
@@ -1,4 +1,6 @@
-; All five string escapes: \n \t \r \\ \" — each yields one byte.
+; All five named string escapes plus inline-hex escapes \xNN;.
+;
+; Named escapes (\n \t \r \\ \") each yield one byte:
; "x\ny\tz\r\\\"" -> 'x', LF, 'y', TAB, 'z', CR, '\\', '"' (8 bytes)
(define s "x\ny\tz\r\\\"")
(if (= (bytevector-length s) 8) 0 (sys-exit 1))
@@ -10,4 +12,28 @@
(if (= (bytevector-u8-ref s 5) 13) 0 (sys-exit 7)) ; \r
(if (= (bytevector-u8-ref s 6) 92) 0 (sys-exit 8)) ; \\
(if (= (bytevector-u8-ref s 7) 34) 0 (sys-exit 9)) ; \"
+
+; Inline-hex escape \xHEX;: 1+ hex digits, terminated by ';', value 0..255.
+; "\x41;" -> "A" (one byte, 0x41).
+(define h1 "\x41;")
+(if (= (bytevector-length h1) 1) 0 (sys-exit 10))
+(if (= (bytevector-u8-ref h1 0) 65) 0 (sys-exit 11))
+
+; Mixed: ordinary bytes interleaved with hex escapes; lower/upper-case
+; hex digits; leading-zero short form; max byte 0xFF.
+; "a\x42;c\xff;\x0;" -> 'a', 'B', 'c', 0xff, 0x00 (5 bytes)
+(define h2 "a\x42;c\xff;\x0;")
+(if (= (bytevector-length h2) 5) 0 (sys-exit 12))
+(if (= (bytevector-u8-ref h2 0) 97) 0 (sys-exit 13)) ; 'a'
+(if (= (bytevector-u8-ref h2 1) 66) 0 (sys-exit 14)) ; 'B'
+(if (= (bytevector-u8-ref h2 2) 99) 0 (sys-exit 15)) ; 'c'
+(if (= (bytevector-u8-ref h2 3) 255) 0 (sys-exit 16)) ; 0xff
+(if (= (bytevector-u8-ref h2 4) 0) 0 (sys-exit 17)) ; 0x00
+
+; Multi-digit form (still byte-sized) and uppercase X is NOT accepted —
+; only lowercase \x; this matches R7RS-Small literal syntax.
+(define h3 "\x0041;") ; leading zeros ok
+(if (= (bytevector-length h3) 1) 0 (sys-exit 18))
+(if (= (bytevector-u8-ref h3 0) 65) 0 (sys-exit 19))
+
(sys-exit 0)
diff --git a/tests/scheme1/75-signed-int.scm b/tests/scheme1/75-signed-int.scm
@@ -0,0 +1,12 @@
+; Decimal int literals with explicit sign. The reader treats `+`/`-`
+; followed by a digit as a numeric prefix; lone `+` and `-` remain
+; valid identifiers (they are bound to the arithmetic primitives).
+(if (= +0 0) 0 (sys-exit 1))
+(if (= +7 7) 0 (sys-exit 2))
+(if (= -7 -7) 0 (sys-exit 3))
+(if (= +42 42) 0 (sys-exit 4))
+(if (= -42 (- 0 42)) 0 (sys-exit 5))
+; Lone `+` / `-` are still the arithmetic primitives.
+(if (= (+ 3 4) 7) 0 (sys-exit 6))
+(if (= (- 9 2) 7) 0 (sys-exit 7))
+(sys-exit 0)
diff --git a/tests/scheme1/76-bad-int-trailing.expected b/tests/scheme1/76-bad-int-trailing.expected
@@ -0,0 +1 @@
+scheme1: bad number literal
diff --git a/tests/scheme1/76-bad-int-trailing.expected-exit b/tests/scheme1/76-bad-int-trailing.expected-exit
@@ -0,0 +1 @@
+1
diff --git a/tests/scheme1/76-bad-int-trailing.scm b/tests/scheme1/76-bad-int-trailing.scm
@@ -0,0 +1,5 @@
+; A token whose first byte commits it to the integer parse path
+; (digit, or +/- followed by a digit) must consist of only digits
+; after the optional sign. Trailing junk like `+7abc` is a hard error.
+(define x +7abc)
+(sys-exit 0)
diff --git a/tests/scheme1/77-bad-ident-char.expected b/tests/scheme1/77-bad-ident-char.expected
@@ -0,0 +1 @@
+scheme1: bad identifier
diff --git a/tests/scheme1/77-bad-ident-char.expected-exit b/tests/scheme1/77-bad-ident-char.expected-exit
@@ -0,0 +1 @@
+1
diff --git a/tests/scheme1/77-bad-ident-char.scm b/tests/scheme1/77-bad-ident-char.scm
@@ -0,0 +1,6 @@
+; Identifiers may contain only letters, digits, and the R7RS-Small
+; extended chars (! $ % & * + - . / : < = > ? @ ^ _ ~). A backslash
+; inside a token (or any other byte outside that set) is rejected by
+; the reader rather than silently absorbed into the symbol's name.
+(define foo\bar 7)
+(sys-exit 0)