commit 3df97ae35fed2a45793173fd0735a647df41e514
parent 364cb5c1b8cdf9e70f6dfb81015cb5d8e22c0cee
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 27 Apr 2026 08:32:47 -0700
cc: use mark+reset in lex+pp
Diffstat:
| M | cc/lex.scm | | | 540 | ++++++++++++++++++++++++++++++++++++++++++++----------------------------------- |
| M | cc/pp.scm | | | 29 | +++++++++++++++++++---------- |
2 files changed, 318 insertions(+), 251 deletions(-)
diff --git a/cc/lex.scm b/cc/lex.scm
@@ -18,6 +18,20 @@
;; - Comments are stripped at the same level as whitespace.
;; - NL tokens are emitted at every physical newline so pp can use
;; them to terminate directives.
+;;
+;; Heap discipline (per tests/scheme1/93-heap-mark-rewind.scm):
+;;
+;; - Token-producing helpers wrap their inner work in a heap-mark /
+;; heap-rewind! arena. The slots that must survive the rewind
+;; (start-loc and the integer holders for npos/nline/ncol) are bound
+;; *before* the (set! mark (heap-mark)) so the let's env extensions
+;; live below the mark. The byte-run scanners' tail-call env frames
+;; and any %lex-peek 4-lists are above the mark and get reclaimed.
+;; For helpers that produce a fresh bytevector (ident, string), the
+;; bv is allocated post-rewind so it persists into the parent arena.
+;; - Numeric digit runs accumulate their value inline via
+;; %accum-int-while; they no longer materialize a per-byte cons list
+;; and then a separate %digits-value walk.
;; --------------------------------------------------------------------
;; Byte-class predicates (raw u8 values, not chars).
@@ -114,23 +128,6 @@
(else #t)))
;; --------------------------------------------------------------------
-;; Build a fresh logical-byte snapshot of `src`. Used by lex-read-*
-;; helpers (which consume from a bv with translations already applied,
-;; rather than re-walking through %lex-peek for each character class
-;; check). The translated bv mirrors the original 1:1 for ASCII text
-;; that contains no trigraphs and no line splices, which is the common
-;; case; tests rarely exercise the slow path.
-;;
-;; Returns a fresh bv. Original line/col tracking is unaffected because
-;; the main tokenizer walks via %lex-peek directly; helpers only use
-;; the snapshot for value-extraction (number/string/char/ident bytes).
-;;
-;; We do not currently materialize a snapshot — every helper uses
-;; %lex-peek directly. The function is reserved for later if profiling
-;; shows it's worth it.
-;; --------------------------------------------------------------------
-
-;; --------------------------------------------------------------------
;; Whitespace + comment skipper. Returns (pos line col).
;; Handles spaces/tabs, // line comments, /* block */ comments. Does
;; *not* consume `\n` — newlines are tokens.
@@ -241,50 +238,150 @@
file start-line start-col)))))))))))
;; --------------------------------------------------------------------
-;; Identifier / keyword reader.
+;; Byte-run scanners.
;;
-;; Returns (tok npos nline ncol). Caller has already verified that the
-;; first byte at `pos` satisfies %ident-start?.
+;; Tail-recursive walkers used by ident/number/string readers. None
+;; allocate per scanned byte on the fast path (only %lex-peek 4-lists
+;; on trigraph/splice/newline); the per-iteration env frames allocated
+;; by tail recursion are reclaimed by the caller's heap-rewind!.
+;;
+;; - %scan-while: count bytes that satisfy pred. (count npos nline ncol)
+;; - %fill-while-bv: write matching bytes into a pre-sized bv.
+;; - %accum-int-while: accumulate a base-N integer over digit bytes.
+;; (val count npos nline ncol)
+;; - %accum-octal-bounded: same, but stops after k digits.
;; --------------------------------------------------------------------
-(define (%collect-ident src pos line col)
- ;; Walk %ident-cont? bytes; return (bytes-fwd npos nline ncol) where
- ;; bytes-fwd is a forward list of byte ints. Caller calls %bv-of-bytes
- ;; once to materialize the name, avoiding a per-byte bv allocation.
+(define (%scan-while pred src pos line col)
+ (let ((n (bytevector-length src)))
+ (let loop ((pos pos) (line line) (col col) (cnt 0))
+ (cond
+ ((>= pos n) (list cnt pos line col))
+ (else
+ (let ((b (bytevector-u8-ref src pos)))
+ (cond
+ ((%fast-byte? b)
+ (if (pred b)
+ (loop (+ pos 1) line (+ col 1) (+ cnt 1))
+ (list cnt pos line col)))
+ (else
+ (let* ((p (%lex-peek src pos line col))
+ (b2 (%pk-byte p)))
+ (if (and b2 (pred b2))
+ (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ cnt 1))
+ (list cnt pos line col)))))))))))
+
+(define (%fill-while-bv pred src pos line col bv idx)
+ (let ((n (bytevector-length src)))
+ (let loop ((pos pos) (line line) (col col) (idx idx))
+ (cond
+ ((>= pos n) idx)
+ (else
+ (let ((b (bytevector-u8-ref src pos)))
+ (cond
+ ((%fast-byte? b)
+ (cond
+ ((pred b)
+ (bytevector-u8-set! bv idx b)
+ (loop (+ pos 1) line (+ col 1) (+ idx 1)))
+ (else idx)))
+ (else
+ (let* ((p (%lex-peek src pos line col))
+ (b2 (%pk-byte p)))
+ (cond
+ ((and b2 (pred b2))
+ (bytevector-u8-set! bv idx b2)
+ (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1)))
+ (else idx)))))))))))
+
+(define (%digit-val-byte b)
+ ;; ASCII digit byte → integer value. Caller guarantees b is a valid
+ ;; digit in the relevant base (0-9 / 0-7 / 0-9a-fA-F).
+ (cond ((%digit? b) (- b 48))
+ ((if (< b 65) #f (if (< 70 b) #f #t)) (+ (- b 65) 10))
+ ((if (< b 97) #f (if (< 102 b) #f #t)) (+ (- b 97) 10))
+ (else 0)))
+
+(define (%accum-int-while pred src pos line col base)
+ (let ((n (bytevector-length src)))
+ (let loop ((pos pos) (line line) (col col) (val 0) (cnt 0))
+ (cond
+ ((>= pos n) (list val cnt pos line col))
+ (else
+ (let ((b (bytevector-u8-ref src pos)))
+ (cond
+ ((%fast-byte? b)
+ (if (pred b)
+ (loop (+ pos 1) line (+ col 1)
+ (+ (* val base) (%digit-val-byte b)) (+ cnt 1))
+ (list val cnt pos line col)))
+ (else
+ (let* ((p (%lex-peek src pos line col))
+ (b2 (%pk-byte p)))
+ (if (and b2 (pred b2))
+ (loop (%pk-pos p) (%pk-line p) (%pk-col p)
+ (+ (* val base) (%digit-val-byte b2)) (+ cnt 1))
+ (list val cnt pos line col)))))))))))
+
+(define (%accum-octal-bounded src pos line col k)
+ ;; Up to k octal digits. Returns (val count npos nline ncol).
(let ((n (bytevector-length src)))
- (let loop ((pos pos) (line line) (col col) (acc '()))
+ (let loop ((pos pos) (line line) (col col) (k k) (val 0) (cnt 0))
(cond
- ((>= pos n) (list (reverse acc) pos line col))
+ ((zero? k) (list val cnt pos line col))
+ ((>= pos n) (list val cnt pos line col))
(else
(let ((b (bytevector-u8-ref src pos)))
(cond
((%fast-byte? b)
- (if (%ident-cont? b)
- (loop (+ pos 1) line (+ col 1) (cons b acc))
- (list (reverse acc) pos line col)))
+ (if (%octal? b)
+ (loop (+ pos 1) line (+ col 1) (- k 1)
+ (+ (* val 8) (- b 48)) (+ cnt 1))
+ (list val cnt pos line col)))
(else
(let* ((p (%lex-peek src pos line col))
(b2 (%pk-byte p)))
- (if (and b2 (%ident-cont? b2))
- (loop (%pk-pos p) (%pk-line p) (%pk-col p) (cons b2 acc))
- (list (reverse acc) pos line col)))))))))))
+ (if (and b2 (%octal? b2))
+ (loop (%pk-pos p) (%pk-line p) (%pk-col p) (- k 1)
+ (+ (* val 8) (- b2 48)) (+ cnt 1))
+ (list val cnt pos line col)))))))))))
+;; --------------------------------------------------------------------
+;; Identifier / keyword reader.
+;;
+;; Returns (tok npos nline ncol). Caller has already verified that the
+;; first byte at `pos` satisfies %ident-start?.
+;;
+;; Two-pass with heap-mark/rewind: pass 1 (%scan-while) sizes the run,
+;; then we rewind, allocate `name` bv post-rewind so it survives, then
+;; pass 2 (%fill-while-bv) writes into it under a fresh mark. The
+;; integer slots count/npos/nline/ncol are bound *before* the mark so
+;; they survive both rewinds.
+;; --------------------------------------------------------------------
(define (lex-read-ident src pos file)
;; Public for tests. Threads line/col from a fresh start.
(%lex-read-ident src pos 1 (+ pos 1) file))
(define (%lex-read-ident src pos line col file)
- (let* ((start-loc (%loc file line col))
- (r (%collect-ident src pos line col))
- (bs (car r))
- (npos (car (cdr r)))
- (nline (car (cdr (cdr r))))
- (ncol (car (cdr (cdr (cdr r)))))
- (name (%bv-of-bytes bs))
- (kw (alist-ref name %keyword-alist)))
- (cons (if kw
- (make-tok 'KW kw start-loc)
- (make-tok 'IDENT name start-loc))
- (list npos nline ncol))))
+ (let ((start-loc (%loc file line col))
+ (count 0) (npos 0) (nline 0) (ncol 0)
+ (mark 0))
+ (set! mark (heap-mark))
+ (let ((sres (%scan-while %ident-cont? src pos line col)))
+ (set! count (car sres))
+ (set! npos (car (cdr sres)))
+ (set! nline (car (cdr (cdr sres))))
+ (set! ncol (car (cdr (cdr (cdr sres))))))
+ (heap-rewind! mark)
+ (let ((name (make-bytevector count 0))
+ (mark2 0))
+ (set! mark2 (heap-mark))
+ (%fill-while-bv %ident-cont? src pos line col name 0)
+ (heap-rewind! mark2)
+ (let ((kw (alist-ref name %keyword-alist)))
+ (cons (if kw
+ (make-tok 'KW kw start-loc)
+ (make-tok 'IDENT name start-loc))
+ (list npos nline ncol))))))
;; --------------------------------------------------------------------
;; Number reader.
@@ -295,54 +392,13 @@
;; Float: anything looking like 1.0, 1e3, .5 → die crisply.
;;
;; Returns (tok npos nline ncol) on success. Aborts via `die` on float.
+;;
+;; %accum-int-while folds digit collection and value computation into
+;; one walk — no per-byte cons cells, no separate digits-list pass.
;; --------------------------------------------------------------------
(define (lex-read-number src pos file)
(%lex-read-number src pos 1 (+ pos 1) file))
-(define (%collect-while pred src pos line col)
- ;; Generic byte collector. Returns (forward-byte-list npos nline ncol).
- (let ((n (bytevector-length src)))
- (let loop ((pos pos) (line line) (col col) (acc '()))
- (cond
- ((>= pos n) (list (reverse acc) pos line col))
- (else
- (let ((b (bytevector-u8-ref src pos)))
- (cond
- ((%fast-byte? b)
- (if (pred b)
- (loop (+ pos 1) line (+ col 1) (cons b acc))
- (list (reverse acc) pos line col)))
- (else
- (let* ((p (%lex-peek src pos line col))
- (b2 (%pk-byte p)))
- (if (and b2 (pred b2))
- (loop (%pk-pos p) (%pk-line p) (%pk-col p) (cons b2 acc))
- (list (reverse acc) pos line col)))))))))))
-
-(define (%bv-of-bytes bs)
- ;; Build a fresh bv from a list of byte ints. Allocates one bv.
- (let* ((n (length bs))
- (out (make-bytevector n 0)))
- (let loop ((i 0) (xs bs))
- (cond ((null? xs) out)
- (else (bytevector-u8-set! out i (car xs))
- (loop (+ i 1) (cdr xs)))))))
-
-(define (%digits-value bs base)
- ;; Convert list of ASCII digit bytes (already validated) to fixnum
- ;; under the given base. Big-endian (leftmost = most significant).
- (let loop ((xs bs) (acc 0))
- (cond ((null? xs) acc)
- (else
- (let* ((b (car xs))
- (d (cond ((%digit? b) (- b 48))
- ((if (< b 65) #f (if (< 70 b) #f #t))
- (+ (- b 65) 10))
- ((if (< b 97) #f (if (< 102 b) #f #t))
- (+ (- b 97) 10))
- (else 0))))
- (loop (cdr xs) (+ (* acc base) d)))))))
-
(define (%lex-read-number src pos line col file)
(let* ((start-loc (%loc file line col))
(p (%lex-peek src pos line col))
@@ -354,26 +410,25 @@
(b2 (%pk-byte q)))
(and b2 (or (= b2 120) (= b2 88))))) ; 'x' or 'X'
(let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
- (after-x-pos (%pk-pos q))
- (after-x-line (%pk-line q))
- (after-x-col (%pk-col q))
- (r (%collect-while %hex? src after-x-pos after-x-line after-x-col))
- (digs (car r))
- (pos2 (car (cdr r)))
- (line2 (car (cdr (cdr r))))
- (col2 (car (cdr (cdr (cdr r))))))
- (if (null? digs)
+ (r (%accum-int-while %hex? src
+ (%pk-pos q) (%pk-line q) (%pk-col q) 16))
+ (val (car r))
+ (cnt (car (cdr r)))
+ (pos2 (car (cdr (cdr r))))
+ (line2 (car (cdr (cdr (cdr r)))))
+ (col2 (car (cdr (cdr (cdr (cdr r)))))))
+ (if (zero? cnt)
(die start-loc "expected hex digits after 0x")
- (let* ((after (%lex-strip-int-suffix src pos2 line2 col2 file)))
- (cons (make-tok 'INT (%digits-value digs 16) start-loc)
- after)))))
+ (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file)))
+ (cons (make-tok 'INT val start-loc) after)))))
;; '0' alone → octal sequence (could be just zero)
((= b 48)
- (let* ((r (%collect-while %octal? src (%pk-pos p) (%pk-line p) (%pk-col p)))
- (digs (car r))
- (pos2 (car (cdr r)))
- (line2 (car (cdr (cdr r))))
- (col2 (car (cdr (cdr (cdr r))))))
+ (let* ((r (%accum-int-while %octal? src
+ (%pk-pos p) (%pk-line p) (%pk-col p) 8))
+ (val (car r))
+ (pos2 (car (cdr (cdr r))))
+ (line2 (car (cdr (cdr (cdr r)))))
+ (col2 (car (cdr (cdr (cdr (cdr r)))))))
;; Reject '.' / 'e' / 'E' immediately after the octal run — float.
(%check-no-float src pos2 line2 col2 file start-loc)
;; Reject stray digits 8/9 in an octal context (e.g. 089).
@@ -381,19 +436,17 @@
(b3 (%pk-byte p3)))
(if (and b3 (%digit? b3))
(die start-loc "invalid octal digit" (bv-of-byte b3))
- (let* ((after (%lex-strip-int-suffix src pos2 line2 col2 file))
- (val (%digits-value digs 8)))
+ (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file)))
(cons (make-tok 'INT val start-loc) after))))))
;; '1'-'9' → decimal
((%digit? b)
- (let* ((r (%collect-while %digit? src pos line col))
- (digs (car r))
- (pos2 (car (cdr r)))
- (line2 (car (cdr (cdr r))))
- (col2 (car (cdr (cdr (cdr r))))))
+ (let* ((r (%accum-int-while %digit? src pos line col 10))
+ (val (car r))
+ (pos2 (car (cdr (cdr r))))
+ (line2 (car (cdr (cdr (cdr r)))))
+ (col2 (car (cdr (cdr (cdr (cdr r)))))))
(%check-no-float src pos2 line2 col2 file start-loc)
- (let* ((after (%lex-strip-int-suffix src pos2 line2 col2 file))
- (val (%digits-value digs 10)))
+ (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file)))
(cons (make-tok 'INT val start-loc) after))))
;; '.' followed by a digit = float-style literal — reject.
((= b 46)
@@ -446,58 +499,146 @@
(else (list pos line col))))))
;; --------------------------------------------------------------------
+;; Escape sequence reader.
+;;
+;; %scan-or-fill-escape decodes one escape sequence starting at `pos`
+;; (which points one past the leading `\\`). When `bv` is a bytevector,
+;; the resulting byte is written to (bv idx); when it is #f, no write
+;; occurs (used during the string-pass scan phase). Returns the 4-list
+;; (val npos nline ncol).
+;; --------------------------------------------------------------------
+(define (%scan-or-fill-escape src pos line col file start-loc bv idx)
+ (let* ((p (%lex-peek src pos line col))
+ (b (%pk-byte p)))
+ (cond
+ ((not b) (die start-loc "unterminated escape sequence"))
+ ;; \xNN — 1+ hex digits (tcc.c uses 1- and 2-digit forms).
+ ((or (= b 120) (= b 88)) ; 'x' / 'X'
+ (let* ((r (%accum-int-while %hex? src
+ (%pk-pos p) (%pk-line p) (%pk-col p) 16))
+ (val0 (car r))
+ (cnt (car (cdr r)))
+ (pos2 (car (cdr (cdr r))))
+ (line2 (car (cdr (cdr (cdr r)))))
+ (col2 (car (cdr (cdr (cdr (cdr r)))))))
+ (cond
+ ((zero? cnt) (die start-loc "expected hex digits after \\x"))
+ (else
+ (let ((val (bit-and val0 255)))
+ (cond (bv (bytevector-u8-set! bv idx val))
+ (else #f))
+ (list val pos2 line2 col2))))))
+ ;; \NNN — 1..3 octal digits.
+ ((%octal? b)
+ (let* ((r (%accum-octal-bounded src pos line col 3))
+ (val0 (car r))
+ (pos2 (car (cdr (cdr r))))
+ (line2 (car (cdr (cdr (cdr r)))))
+ (col2 (car (cdr (cdr (cdr (cdr r))))))
+ (val (bit-and val0 255)))
+ (cond (bv (bytevector-u8-set! bv idx val))
+ (else #f))
+ (list val pos2 line2 col2)))
+ (else
+ (let ((val (cond ((= b 110) 10) ; n
+ ((= b 116) 9) ; t
+ ((= b 114) 13) ; r
+ ((= b 92) 92) ; \\
+ ((= b 39) 39) ; '
+ ((= b 34) 34) ; "
+ ((= b 48) 0) ; 0 (already handled by octal but be safe)
+ ((= b 97) 7) ; \a -> BEL
+ ((= b 98) 8) ; \b
+ ((= b 102) 12) ; \f
+ ((= b 118) 11) ; \v
+ ((= b 63) 63) ; \?
+ (else
+ (die start-loc "unknown escape" (bv-of-byte b))))))
+ (cond (bv (bytevector-u8-set! bv idx val))
+ (else #f))
+ (list val (%pk-pos p) (%pk-line p) (%pk-col p)))))))
+
+;; --------------------------------------------------------------------
;; String reader.
;;
;; Caller has verified src[pos] == '"' (raw byte 34). Returns
;; (tok npos nline ncol) with the raw decoded bytes (no NUL appended).
+;;
+;; Two-pass: %string-pass with bv=#f counts effective bytes (escapes
+;; collapse to 1 byte each); after rewind we allocate the final bv and
+;; rerun with bv set so the bytes are written directly into it.
;; --------------------------------------------------------------------
(define (lex-read-string src pos file)
(%lex-read-string src pos 1 (+ pos 1) file))
(define (%lex-read-string src pos line col file)
- (let* ((start-loc (%loc file line col))
- ;; pos points to the opening quote — step over it first.
- (p0 (%lex-peek src pos line col))
- (b0 (%pk-byte p0)))
- (if (not (and b0 (= b0 34)))
- (die start-loc "internal: string reader on non-quote")
- (%collect-string src (%pk-pos p0) (%pk-line p0) (%pk-col p0)
- file start-loc '()))))
-
-(define (%collect-string src pos line col file start-loc acc)
- (let ((n (bytevector-length src)))
+ (let ((start-loc (%loc file line col))
+ (cnt 0) (npos 0) (nline 0) (ncol 0)
+ (mark 0))
+ ;; '"' (34) is a fast-byte and never a trigraph result, so the
+ ;; physical byte at `pos` is exactly the opening quote.
(cond
- ((>= pos n) (die start-loc "unterminated string literal"))
+ ((or (>= pos (bytevector-length src))
+ (not (= (bytevector-u8-ref src pos) 34)))
+ (die start-loc "internal: string reader on non-quote"))
(else
- (let ((b (bytevector-u8-ref src pos)))
- (cond
- ;; Closing quote — fast byte but special.
- ((= b 34)
- (let ((bv (%bv-of-bytes (reverse acc))))
- (cons (make-tok 'STR bv start-loc)
- (list (+ pos 1) line (+ col 1)))))
- ((%fast-byte? b)
- (%collect-string src (+ pos 1) line (+ col 1)
- file start-loc (cons b acc)))
- (else
- ;; Slow path: ?/\ (trigraph/splice/escape) or '\n'.
- (let* ((p (%lex-peek src pos line col))
- (b2 (%pk-byte p)))
- (cond
- ((not b2)
- (die start-loc "unterminated string literal"))
- ((= b2 34)
- (let ((bv (%bv-of-bytes (reverse acc))))
- (cons (make-tok 'STR bv start-loc)
- (list (%pk-pos p) (%pk-line p) (%pk-col p)))))
- ((%newline? b2)
- (die start-loc "newline in string literal"))
- ((= b2 92)
- (%read-escape src (%pk-pos p) (%pk-line p) (%pk-col p)
- file start-loc acc 'string))
- (else
- (%collect-string src (%pk-pos p) (%pk-line p) (%pk-col p)
- file start-loc (cons b2 acc))))))))))))
+ (set! mark (heap-mark))
+ (let ((sres (%string-pass src (+ pos 1) line (+ col 1)
+ file start-loc #f)))
+ (set! cnt (car sres))
+ (set! npos (car (cdr sres)))
+ (set! nline (car (cdr (cdr sres))))
+ (set! ncol (car (cdr (cdr (cdr sres))))))
+ (heap-rewind! mark)
+ (let ((bv (make-bytevector cnt 0))
+ (mark2 0))
+ (set! mark2 (heap-mark))
+ (%string-pass src (+ pos 1) line (+ col 1) file start-loc bv)
+ (heap-rewind! mark2)
+ (cons (make-tok 'STR bv start-loc)
+ (list npos nline ncol)))))))
+
+(define (%string-pass src pos line col file start-loc bv)
+ ;; Walk the string body (after opening "). When `bv` is #f, count
+ ;; effective bytes; when it is a bytevector, write bytes into it at
+ ;; index 0..count-1. Returns (count npos nline ncol).
+ (let ((n (bytevector-length src)))
+ (let loop ((pos pos) (line line) (col col) (idx 0))
+ (cond
+ ((>= pos n) (die start-loc "unterminated string literal"))
+ (else
+ (let ((b (bytevector-u8-ref src pos)))
+ (cond
+ ;; Closing quote — fast byte but special.
+ ((= b 34)
+ (list idx (+ pos 1) line (+ col 1)))
+ ((%fast-byte? b)
+ (cond (bv (bytevector-u8-set! bv idx b))
+ (else #f))
+ (loop (+ pos 1) line (+ col 1) (+ idx 1)))
+ (else
+ ;; Slow path: ?/\ (trigraph/splice/escape) or '\n'.
+ (let* ((p (%lex-peek src pos line col))
+ (b2 (%pk-byte p)))
+ (cond
+ ((not b2)
+ (die start-loc "unterminated string literal"))
+ ((= b2 34)
+ (list idx (%pk-pos p) (%pk-line p) (%pk-col p)))
+ ((%newline? b2)
+ (die start-loc "newline in string literal"))
+ ((= b2 92)
+ (let* ((er (%scan-or-fill-escape
+ src (%pk-pos p) (%pk-line p) (%pk-col p)
+ file start-loc bv idx))
+ (epos (car (cdr er)))
+ (eline (car (cdr (cdr er))))
+ (ecol (car (cdr (cdr (cdr er))))))
+ (loop epos eline ecol (+ idx 1))))
+ (else
+ (cond (bv (bytevector-u8-set! bv idx b2))
+ (else #f))
+ (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1)))))))))))))
;; --------------------------------------------------------------------
;; Char reader.
@@ -526,9 +667,9 @@
((= b 39) (die start-loc "empty char literal"))
((%newline? b) (die start-loc "newline in char literal"))
((= b 92) ; escape
- (let* ((r (%read-escape src (%pk-pos p) (%pk-line p) (%pk-col p)
- file start-loc '() 'char))
- ;; r is a list (val npos nline ncol) for char-mode escapes.
+ (let* ((r (%scan-or-fill-escape src
+ (%pk-pos p) (%pk-line p) (%pk-col p)
+ file start-loc #f 0))
(val (car r))
(pos2 (car (cdr r)))
(line2 (car (cdr (cdr r))))
@@ -550,89 +691,6 @@
(die start-loc "multi-character char constant not supported")))))
;; --------------------------------------------------------------------
-;; Escape sequence reader.
-;;
-;; mode = 'string : returns (%collect-string ...) tail (recurse)
-;; mode = 'char : returns (val npos nline ncol)
-;; --------------------------------------------------------------------
-(define (%read-escape src pos line col file start-loc acc mode)
- (let* ((p (%lex-peek src pos line col))
- (b (%pk-byte p)))
- (cond
- ((not b) (die start-loc "unterminated escape sequence"))
- ;; \xNN — 1+ hex digits (tcc.c uses 1- and 2-digit forms).
- ((or (= b 120) (= b 88)) ; 'x' / 'X'
- (let* ((r (%collect-while %hex? src (%pk-pos p) (%pk-line p) (%pk-col p)))
- (digs (car r))
- (pos2 (car (cdr r)))
- (line2 (car (cdr (cdr r))))
- (col2 (car (cdr (cdr (cdr r))))))
- (if (null? digs)
- (die start-loc "expected hex digits after \\x")
- (let ((val (bit-and (%digits-value digs 16) 255)))
- (%escape-result mode val pos2 line2 col2
- src file start-loc acc)))))
- ;; \NNN — 1..3 octal digits.
- ((%octal? b)
- (let* ((r (%collect-octals src pos line col 3 '()))
- (digs (car r))
- (pos2 (car (cdr r)))
- (line2 (car (cdr (cdr r))))
- (col2 (car (cdr (cdr (cdr r)))))
- (val (bit-and (%digits-value digs 8) 255)))
- (%escape-result mode val pos2 line2 col2 src file start-loc acc)))
- (else
- (let ((val (cond ((= b 110) 10) ; n
- ((= b 116) 9) ; t
- ((= b 114) 13) ; r
- ((= b 92) 92) ; \\
- ((= b 39) 39) ; '
- ((= b 34) 34) ; "
- ((= b 48) 0) ; 0 (already handled by octal but be safe)
- ((= b 97) 7) ; \a -> BEL
- ((= b 98) 8) ; \b
- ((= b 102) 12) ; \f
- ((= b 118) 11) ; \v
- ((= b 63) 63) ; \?
- (else
- (die start-loc "unknown escape" (bv-of-byte b))))))
- (%escape-result mode val (%pk-pos p) (%pk-line p) (%pk-col p)
- src file start-loc acc))))))
-
-(define (%collect-octals src pos line col k acc)
- ;; Collect up to k octal-digit bytes into acc (as a forward list of
- ;; ASCII bytes), returning (digits npos nline ncol).
- (cond
- ((zero? k) (list (reverse acc) pos line col))
- (else
- (let ((n (bytevector-length src)))
- (cond
- ((>= pos n) (list (reverse acc) pos line col))
- (else
- (let ((b (bytevector-u8-ref src pos)))
- (cond
- ((%fast-byte? b)
- (if (%octal? b)
- (%collect-octals src (+ pos 1) line (+ col 1)
- (- k 1) (cons b acc))
- (list (reverse acc) pos line col)))
- (else
- (let* ((p (%lex-peek src pos line col))
- (b2 (%pk-byte p)))
- (if (and b2 (%octal? b2))
- (%collect-octals src (%pk-pos p) (%pk-line p) (%pk-col p)
- (- k 1) (cons b2 acc))
- (list (reverse acc) pos line col))))))))))))
-
-(define (%escape-result mode val pos line col src file start-loc acc)
- (cond
- ((eq? mode 'string)
- (%collect-string src pos line col file start-loc (cons val acc)))
- ((eq? mode 'char)
- (list val pos line col))
- (else (die start-loc "internal: bad escape mode"))))
-
-;; --------------------------------------------------------------------
;; Punctuator reader.
;;
;; Greedy longest-match against %punct-alist (cc/data.scm). The alist
diff --git a/cc/pp.scm b/cc/pp.scm
@@ -643,17 +643,26 @@
;; --- pp-eval-cexpr: #if expression evaluator ---
;; Steps: resolve `defined NAME`, macro-expand the rest, treat any
;; remaining IDENT as 0, parse with recursive descent.
+;;
+;; Arena boundary (test_93 A→B→C pattern). Everything between the mark
+;; and the rewind is scratch: `s1`/`s2`/`s3` (each a fresh token list,
+;; where `s2` runs the full macro-expansion engine), plus the recursive
+;; parser's (val . rest) cons cell at every level. The result is a
+;; fixnum, so no pre-allocated out cell is needed — `val` survives the
+;; rewind by virtue of being an immediate. The error path goes through
+;; `die` (which sys-exits), so no rewind there.
(define (pp-eval-cexpr toks macros)
- (let* ((state (%pp-state macros '() #f 0))
- (s1 (%pp-resolve-defined toks state))
- (s2 (%pp-expand-line s1 state))
- (s3 (%pp-idents-as-zero s2)))
- (let* ((p (%pp-cx-expr s3))
- (val (car p)) (rest (cdr p)))
- (cond
- ((null? rest) val)
- (else (die (tok-loc (car rest)) "#if: garbage at end of expression"
- (tok-kind (car rest))))))))
+ (let ((mark (heap-mark)))
+ (let* ((state (%pp-state macros '() #f 0))
+ (s1 (%pp-resolve-defined toks state))
+ (s2 (%pp-expand-line s1 state))
+ (s3 (%pp-idents-as-zero s2)))
+ (let* ((p (%pp-cx-expr s3))
+ (val (car p)) (rest (cdr p)))
+ (cond
+ ((null? rest) (heap-rewind! mark) val)
+ (else (die (tok-loc (car rest)) "#if: garbage at end of expression"
+ (tok-kind (car rest)))))))))
(define (%pp-expand-line toks state)
(let ((out (make-buf-list)))