boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 3df97ae35fed2a45793173fd0735a647df41e514
parent 364cb5c1b8cdf9e70f6dfb81015cb5d8e22c0cee
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 27 Apr 2026 08:32:47 -0700

cc: use mark+reset in lex+pp

Diffstat:
Mcc/lex.scm | 540++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mcc/pp.scm | 29+++++++++++++++++++----------
2 files changed, 318 insertions(+), 251 deletions(-)

diff --git a/cc/lex.scm b/cc/lex.scm @@ -18,6 +18,20 @@ ;; - Comments are stripped at the same level as whitespace. ;; - NL tokens are emitted at every physical newline so pp can use ;; them to terminate directives. +;; +;; Heap discipline (per tests/scheme1/93-heap-mark-rewind.scm): +;; +;; - Token-producing helpers wrap their inner work in a heap-mark / +;; heap-rewind! arena. The slots that must survive the rewind +;; (start-loc and the integer holders for npos/nline/ncol) are bound +;; *before* the (set! mark (heap-mark)) so the let's env extensions +;; live below the mark. The byte-run scanners' tail-call env frames +;; and any %lex-peek 4-lists are above the mark and get reclaimed. +;; For helpers that produce a fresh bytevector (ident, string), the +;; bv is allocated post-rewind so it persists into the parent arena. +;; - Numeric digit runs accumulate their value inline via +;; %accum-int-while; they no longer materialize a per-byte cons list +;; and then a separate %digits-value walk. ;; -------------------------------------------------------------------- ;; Byte-class predicates (raw u8 values, not chars). @@ -114,23 +128,6 @@ (else #t))) ;; -------------------------------------------------------------------- -;; Build a fresh logical-byte snapshot of `src`. Used by lex-read-* -;; helpers (which consume from a bv with translations already applied, -;; rather than re-walking through %lex-peek for each character class -;; check). The translated bv mirrors the original 1:1 for ASCII text -;; that contains no trigraphs and no line splices, which is the common -;; case; tests rarely exercise the slow path. -;; -;; Returns a fresh bv. Original line/col tracking is unaffected because -;; the main tokenizer walks via %lex-peek directly; helpers only use -;; the snapshot for value-extraction (number/string/char/ident bytes). -;; -;; We do not currently materialize a snapshot — every helper uses -;; %lex-peek directly. The function is reserved for later if profiling -;; shows it's worth it. -;; -------------------------------------------------------------------- - -;; -------------------------------------------------------------------- ;; Whitespace + comment skipper. Returns (pos line col). ;; Handles spaces/tabs, // line comments, /* block */ comments. Does ;; *not* consume `\n` — newlines are tokens. @@ -241,50 +238,150 @@ file start-line start-col))))))))))) ;; -------------------------------------------------------------------- -;; Identifier / keyword reader. +;; Byte-run scanners. ;; -;; Returns (tok npos nline ncol). Caller has already verified that the -;; first byte at `pos` satisfies %ident-start?. +;; Tail-recursive walkers used by ident/number/string readers. None +;; allocate per scanned byte on the fast path (only %lex-peek 4-lists +;; on trigraph/splice/newline); the per-iteration env frames allocated +;; by tail recursion are reclaimed by the caller's heap-rewind!. +;; +;; - %scan-while: count bytes that satisfy pred. (count npos nline ncol) +;; - %fill-while-bv: write matching bytes into a pre-sized bv. +;; - %accum-int-while: accumulate a base-N integer over digit bytes. +;; (val count npos nline ncol) +;; - %accum-octal-bounded: same, but stops after k digits. ;; -------------------------------------------------------------------- -(define (%collect-ident src pos line col) - ;; Walk %ident-cont? bytes; return (bytes-fwd npos nline ncol) where - ;; bytes-fwd is a forward list of byte ints. Caller calls %bv-of-bytes - ;; once to materialize the name, avoiding a per-byte bv allocation. +(define (%scan-while pred src pos line col) + (let ((n (bytevector-length src))) + (let loop ((pos pos) (line line) (col col) (cnt 0)) + (cond + ((>= pos n) (list cnt pos line col)) + (else + (let ((b (bytevector-u8-ref src pos))) + (cond + ((%fast-byte? b) + (if (pred b) + (loop (+ pos 1) line (+ col 1) (+ cnt 1)) + (list cnt pos line col))) + (else + (let* ((p (%lex-peek src pos line col)) + (b2 (%pk-byte p))) + (if (and b2 (pred b2)) + (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ cnt 1)) + (list cnt pos line col))))))))))) + +(define (%fill-while-bv pred src pos line col bv idx) + (let ((n (bytevector-length src))) + (let loop ((pos pos) (line line) (col col) (idx idx)) + (cond + ((>= pos n) idx) + (else + (let ((b (bytevector-u8-ref src pos))) + (cond + ((%fast-byte? b) + (cond + ((pred b) + (bytevector-u8-set! bv idx b) + (loop (+ pos 1) line (+ col 1) (+ idx 1))) + (else idx))) + (else + (let* ((p (%lex-peek src pos line col)) + (b2 (%pk-byte p))) + (cond + ((and b2 (pred b2)) + (bytevector-u8-set! bv idx b2) + (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1))) + (else idx))))))))))) + +(define (%digit-val-byte b) + ;; ASCII digit byte → integer value. Caller guarantees b is a valid + ;; digit in the relevant base (0-9 / 0-7 / 0-9a-fA-F). + (cond ((%digit? b) (- b 48)) + ((if (< b 65) #f (if (< 70 b) #f #t)) (+ (- b 65) 10)) + ((if (< b 97) #f (if (< 102 b) #f #t)) (+ (- b 97) 10)) + (else 0))) + +(define (%accum-int-while pred src pos line col base) + (let ((n (bytevector-length src))) + (let loop ((pos pos) (line line) (col col) (val 0) (cnt 0)) + (cond + ((>= pos n) (list val cnt pos line col)) + (else + (let ((b (bytevector-u8-ref src pos))) + (cond + ((%fast-byte? b) + (if (pred b) + (loop (+ pos 1) line (+ col 1) + (+ (* val base) (%digit-val-byte b)) (+ cnt 1)) + (list val cnt pos line col))) + (else + (let* ((p (%lex-peek src pos line col)) + (b2 (%pk-byte p))) + (if (and b2 (pred b2)) + (loop (%pk-pos p) (%pk-line p) (%pk-col p) + (+ (* val base) (%digit-val-byte b2)) (+ cnt 1)) + (list val cnt pos line col))))))))))) + +(define (%accum-octal-bounded src pos line col k) + ;; Up to k octal digits. Returns (val count npos nline ncol). (let ((n (bytevector-length src))) - (let loop ((pos pos) (line line) (col col) (acc '())) + (let loop ((pos pos) (line line) (col col) (k k) (val 0) (cnt 0)) (cond - ((>= pos n) (list (reverse acc) pos line col)) + ((zero? k) (list val cnt pos line col)) + ((>= pos n) (list val cnt pos line col)) (else (let ((b (bytevector-u8-ref src pos))) (cond ((%fast-byte? b) - (if (%ident-cont? b) - (loop (+ pos 1) line (+ col 1) (cons b acc)) - (list (reverse acc) pos line col))) + (if (%octal? b) + (loop (+ pos 1) line (+ col 1) (- k 1) + (+ (* val 8) (- b 48)) (+ cnt 1)) + (list val cnt pos line col))) (else (let* ((p (%lex-peek src pos line col)) (b2 (%pk-byte p))) - (if (and b2 (%ident-cont? b2)) - (loop (%pk-pos p) (%pk-line p) (%pk-col p) (cons b2 acc)) - (list (reverse acc) pos line col))))))))))) + (if (and b2 (%octal? b2)) + (loop (%pk-pos p) (%pk-line p) (%pk-col p) (- k 1) + (+ (* val 8) (- b2 48)) (+ cnt 1)) + (list val cnt pos line col))))))))))) +;; -------------------------------------------------------------------- +;; Identifier / keyword reader. +;; +;; Returns (tok npos nline ncol). Caller has already verified that the +;; first byte at `pos` satisfies %ident-start?. +;; +;; Two-pass with heap-mark/rewind: pass 1 (%scan-while) sizes the run, +;; then we rewind, allocate `name` bv post-rewind so it survives, then +;; pass 2 (%fill-while-bv) writes into it under a fresh mark. The +;; integer slots count/npos/nline/ncol are bound *before* the mark so +;; they survive both rewinds. +;; -------------------------------------------------------------------- (define (lex-read-ident src pos file) ;; Public for tests. Threads line/col from a fresh start. (%lex-read-ident src pos 1 (+ pos 1) file)) (define (%lex-read-ident src pos line col file) - (let* ((start-loc (%loc file line col)) - (r (%collect-ident src pos line col)) - (bs (car r)) - (npos (car (cdr r))) - (nline (car (cdr (cdr r)))) - (ncol (car (cdr (cdr (cdr r))))) - (name (%bv-of-bytes bs)) - (kw (alist-ref name %keyword-alist))) - (cons (if kw - (make-tok 'KW kw start-loc) - (make-tok 'IDENT name start-loc)) - (list npos nline ncol)))) + (let ((start-loc (%loc file line col)) + (count 0) (npos 0) (nline 0) (ncol 0) + (mark 0)) + (set! mark (heap-mark)) + (let ((sres (%scan-while %ident-cont? src pos line col))) + (set! count (car sres)) + (set! npos (car (cdr sres))) + (set! nline (car (cdr (cdr sres)))) + (set! ncol (car (cdr (cdr (cdr sres)))))) + (heap-rewind! mark) + (let ((name (make-bytevector count 0)) + (mark2 0)) + (set! mark2 (heap-mark)) + (%fill-while-bv %ident-cont? src pos line col name 0) + (heap-rewind! mark2) + (let ((kw (alist-ref name %keyword-alist))) + (cons (if kw + (make-tok 'KW kw start-loc) + (make-tok 'IDENT name start-loc)) + (list npos nline ncol)))))) ;; -------------------------------------------------------------------- ;; Number reader. @@ -295,54 +392,13 @@ ;; Float: anything looking like 1.0, 1e3, .5 → die crisply. ;; ;; Returns (tok npos nline ncol) on success. Aborts via `die` on float. +;; +;; %accum-int-while folds digit collection and value computation into +;; one walk — no per-byte cons cells, no separate digits-list pass. ;; -------------------------------------------------------------------- (define (lex-read-number src pos file) (%lex-read-number src pos 1 (+ pos 1) file)) -(define (%collect-while pred src pos line col) - ;; Generic byte collector. Returns (forward-byte-list npos nline ncol). - (let ((n (bytevector-length src))) - (let loop ((pos pos) (line line) (col col) (acc '())) - (cond - ((>= pos n) (list (reverse acc) pos line col)) - (else - (let ((b (bytevector-u8-ref src pos))) - (cond - ((%fast-byte? b) - (if (pred b) - (loop (+ pos 1) line (+ col 1) (cons b acc)) - (list (reverse acc) pos line col))) - (else - (let* ((p (%lex-peek src pos line col)) - (b2 (%pk-byte p))) - (if (and b2 (pred b2)) - (loop (%pk-pos p) (%pk-line p) (%pk-col p) (cons b2 acc)) - (list (reverse acc) pos line col))))))))))) - -(define (%bv-of-bytes bs) - ;; Build a fresh bv from a list of byte ints. Allocates one bv. - (let* ((n (length bs)) - (out (make-bytevector n 0))) - (let loop ((i 0) (xs bs)) - (cond ((null? xs) out) - (else (bytevector-u8-set! out i (car xs)) - (loop (+ i 1) (cdr xs))))))) - -(define (%digits-value bs base) - ;; Convert list of ASCII digit bytes (already validated) to fixnum - ;; under the given base. Big-endian (leftmost = most significant). - (let loop ((xs bs) (acc 0)) - (cond ((null? xs) acc) - (else - (let* ((b (car xs)) - (d (cond ((%digit? b) (- b 48)) - ((if (< b 65) #f (if (< 70 b) #f #t)) - (+ (- b 65) 10)) - ((if (< b 97) #f (if (< 102 b) #f #t)) - (+ (- b 97) 10)) - (else 0)))) - (loop (cdr xs) (+ (* acc base) d))))))) - (define (%lex-read-number src pos line col file) (let* ((start-loc (%loc file line col)) (p (%lex-peek src pos line col)) @@ -354,26 +410,25 @@ (b2 (%pk-byte q))) (and b2 (or (= b2 120) (= b2 88))))) ; 'x' or 'X' (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p))) - (after-x-pos (%pk-pos q)) - (after-x-line (%pk-line q)) - (after-x-col (%pk-col q)) - (r (%collect-while %hex? src after-x-pos after-x-line after-x-col)) - (digs (car r)) - (pos2 (car (cdr r))) - (line2 (car (cdr (cdr r)))) - (col2 (car (cdr (cdr (cdr r)))))) - (if (null? digs) + (r (%accum-int-while %hex? src + (%pk-pos q) (%pk-line q) (%pk-col q) 16)) + (val (car r)) + (cnt (car (cdr r))) + (pos2 (car (cdr (cdr r)))) + (line2 (car (cdr (cdr (cdr r))))) + (col2 (car (cdr (cdr (cdr (cdr r))))))) + (if (zero? cnt) (die start-loc "expected hex digits after 0x") - (let* ((after (%lex-strip-int-suffix src pos2 line2 col2 file))) - (cons (make-tok 'INT (%digits-value digs 16) start-loc) - after))))) + (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file))) + (cons (make-tok 'INT val start-loc) after))))) ;; '0' alone → octal sequence (could be just zero) ((= b 48) - (let* ((r (%collect-while %octal? src (%pk-pos p) (%pk-line p) (%pk-col p))) - (digs (car r)) - (pos2 (car (cdr r))) - (line2 (car (cdr (cdr r)))) - (col2 (car (cdr (cdr (cdr r)))))) + (let* ((r (%accum-int-while %octal? src + (%pk-pos p) (%pk-line p) (%pk-col p) 8)) + (val (car r)) + (pos2 (car (cdr (cdr r)))) + (line2 (car (cdr (cdr (cdr r))))) + (col2 (car (cdr (cdr (cdr (cdr r))))))) ;; Reject '.' / 'e' / 'E' immediately after the octal run — float. (%check-no-float src pos2 line2 col2 file start-loc) ;; Reject stray digits 8/9 in an octal context (e.g. 089). @@ -381,19 +436,17 @@ (b3 (%pk-byte p3))) (if (and b3 (%digit? b3)) (die start-loc "invalid octal digit" (bv-of-byte b3)) - (let* ((after (%lex-strip-int-suffix src pos2 line2 col2 file)) - (val (%digits-value digs 8))) + (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file))) (cons (make-tok 'INT val start-loc) after)))))) ;; '1'-'9' → decimal ((%digit? b) - (let* ((r (%collect-while %digit? src pos line col)) - (digs (car r)) - (pos2 (car (cdr r))) - (line2 (car (cdr (cdr r)))) - (col2 (car (cdr (cdr (cdr r)))))) + (let* ((r (%accum-int-while %digit? src pos line col 10)) + (val (car r)) + (pos2 (car (cdr (cdr r)))) + (line2 (car (cdr (cdr (cdr r))))) + (col2 (car (cdr (cdr (cdr (cdr r))))))) (%check-no-float src pos2 line2 col2 file start-loc) - (let* ((after (%lex-strip-int-suffix src pos2 line2 col2 file)) - (val (%digits-value digs 10))) + (let ((after (%lex-strip-int-suffix src pos2 line2 col2 file))) (cons (make-tok 'INT val start-loc) after)))) ;; '.' followed by a digit = float-style literal — reject. ((= b 46) @@ -446,58 +499,146 @@ (else (list pos line col)))))) ;; -------------------------------------------------------------------- +;; Escape sequence reader. +;; +;; %scan-or-fill-escape decodes one escape sequence starting at `pos` +;; (which points one past the leading `\\`). When `bv` is a bytevector, +;; the resulting byte is written to (bv idx); when it is #f, no write +;; occurs (used during the string-pass scan phase). Returns the 4-list +;; (val npos nline ncol). +;; -------------------------------------------------------------------- +(define (%scan-or-fill-escape src pos line col file start-loc bv idx) + (let* ((p (%lex-peek src pos line col)) + (b (%pk-byte p))) + (cond + ((not b) (die start-loc "unterminated escape sequence")) + ;; \xNN — 1+ hex digits (tcc.c uses 1- and 2-digit forms). + ((or (= b 120) (= b 88)) ; 'x' / 'X' + (let* ((r (%accum-int-while %hex? src + (%pk-pos p) (%pk-line p) (%pk-col p) 16)) + (val0 (car r)) + (cnt (car (cdr r))) + (pos2 (car (cdr (cdr r)))) + (line2 (car (cdr (cdr (cdr r))))) + (col2 (car (cdr (cdr (cdr (cdr r))))))) + (cond + ((zero? cnt) (die start-loc "expected hex digits after \\x")) + (else + (let ((val (bit-and val0 255))) + (cond (bv (bytevector-u8-set! bv idx val)) + (else #f)) + (list val pos2 line2 col2)))))) + ;; \NNN — 1..3 octal digits. + ((%octal? b) + (let* ((r (%accum-octal-bounded src pos line col 3)) + (val0 (car r)) + (pos2 (car (cdr (cdr r)))) + (line2 (car (cdr (cdr (cdr r))))) + (col2 (car (cdr (cdr (cdr (cdr r)))))) + (val (bit-and val0 255))) + (cond (bv (bytevector-u8-set! bv idx val)) + (else #f)) + (list val pos2 line2 col2))) + (else + (let ((val (cond ((= b 110) 10) ; n + ((= b 116) 9) ; t + ((= b 114) 13) ; r + ((= b 92) 92) ; \\ + ((= b 39) 39) ; ' + ((= b 34) 34) ; " + ((= b 48) 0) ; 0 (already handled by octal but be safe) + ((= b 97) 7) ; \a -> BEL + ((= b 98) 8) ; \b + ((= b 102) 12) ; \f + ((= b 118) 11) ; \v + ((= b 63) 63) ; \? + (else + (die start-loc "unknown escape" (bv-of-byte b)))))) + (cond (bv (bytevector-u8-set! bv idx val)) + (else #f)) + (list val (%pk-pos p) (%pk-line p) (%pk-col p))))))) + +;; -------------------------------------------------------------------- ;; String reader. ;; ;; Caller has verified src[pos] == '"' (raw byte 34). Returns ;; (tok npos nline ncol) with the raw decoded bytes (no NUL appended). +;; +;; Two-pass: %string-pass with bv=#f counts effective bytes (escapes +;; collapse to 1 byte each); after rewind we allocate the final bv and +;; rerun with bv set so the bytes are written directly into it. ;; -------------------------------------------------------------------- (define (lex-read-string src pos file) (%lex-read-string src pos 1 (+ pos 1) file)) (define (%lex-read-string src pos line col file) - (let* ((start-loc (%loc file line col)) - ;; pos points to the opening quote — step over it first. - (p0 (%lex-peek src pos line col)) - (b0 (%pk-byte p0))) - (if (not (and b0 (= b0 34))) - (die start-loc "internal: string reader on non-quote") - (%collect-string src (%pk-pos p0) (%pk-line p0) (%pk-col p0) - file start-loc '())))) - -(define (%collect-string src pos line col file start-loc acc) - (let ((n (bytevector-length src))) + (let ((start-loc (%loc file line col)) + (cnt 0) (npos 0) (nline 0) (ncol 0) + (mark 0)) + ;; '"' (34) is a fast-byte and never a trigraph result, so the + ;; physical byte at `pos` is exactly the opening quote. (cond - ((>= pos n) (die start-loc "unterminated string literal")) + ((or (>= pos (bytevector-length src)) + (not (= (bytevector-u8-ref src pos) 34))) + (die start-loc "internal: string reader on non-quote")) (else - (let ((b (bytevector-u8-ref src pos))) - (cond - ;; Closing quote — fast byte but special. - ((= b 34) - (let ((bv (%bv-of-bytes (reverse acc)))) - (cons (make-tok 'STR bv start-loc) - (list (+ pos 1) line (+ col 1))))) - ((%fast-byte? b) - (%collect-string src (+ pos 1) line (+ col 1) - file start-loc (cons b acc))) - (else - ;; Slow path: ?/\ (trigraph/splice/escape) or '\n'. - (let* ((p (%lex-peek src pos line col)) - (b2 (%pk-byte p))) - (cond - ((not b2) - (die start-loc "unterminated string literal")) - ((= b2 34) - (let ((bv (%bv-of-bytes (reverse acc)))) - (cons (make-tok 'STR bv start-loc) - (list (%pk-pos p) (%pk-line p) (%pk-col p))))) - ((%newline? b2) - (die start-loc "newline in string literal")) - ((= b2 92) - (%read-escape src (%pk-pos p) (%pk-line p) (%pk-col p) - file start-loc acc 'string)) - (else - (%collect-string src (%pk-pos p) (%pk-line p) (%pk-col p) - file start-loc (cons b2 acc)))))))))))) + (set! mark (heap-mark)) + (let ((sres (%string-pass src (+ pos 1) line (+ col 1) + file start-loc #f))) + (set! cnt (car sres)) + (set! npos (car (cdr sres))) + (set! nline (car (cdr (cdr sres)))) + (set! ncol (car (cdr (cdr (cdr sres)))))) + (heap-rewind! mark) + (let ((bv (make-bytevector cnt 0)) + (mark2 0)) + (set! mark2 (heap-mark)) + (%string-pass src (+ pos 1) line (+ col 1) file start-loc bv) + (heap-rewind! mark2) + (cons (make-tok 'STR bv start-loc) + (list npos nline ncol))))))) + +(define (%string-pass src pos line col file start-loc bv) + ;; Walk the string body (after opening "). When `bv` is #f, count + ;; effective bytes; when it is a bytevector, write bytes into it at + ;; index 0..count-1. Returns (count npos nline ncol). + (let ((n (bytevector-length src))) + (let loop ((pos pos) (line line) (col col) (idx 0)) + (cond + ((>= pos n) (die start-loc "unterminated string literal")) + (else + (let ((b (bytevector-u8-ref src pos))) + (cond + ;; Closing quote — fast byte but special. + ((= b 34) + (list idx (+ pos 1) line (+ col 1))) + ((%fast-byte? b) + (cond (bv (bytevector-u8-set! bv idx b)) + (else #f)) + (loop (+ pos 1) line (+ col 1) (+ idx 1))) + (else + ;; Slow path: ?/\ (trigraph/splice/escape) or '\n'. + (let* ((p (%lex-peek src pos line col)) + (b2 (%pk-byte p))) + (cond + ((not b2) + (die start-loc "unterminated string literal")) + ((= b2 34) + (list idx (%pk-pos p) (%pk-line p) (%pk-col p))) + ((%newline? b2) + (die start-loc "newline in string literal")) + ((= b2 92) + (let* ((er (%scan-or-fill-escape + src (%pk-pos p) (%pk-line p) (%pk-col p) + file start-loc bv idx)) + (epos (car (cdr er))) + (eline (car (cdr (cdr er)))) + (ecol (car (cdr (cdr (cdr er)))))) + (loop epos eline ecol (+ idx 1)))) + (else + (cond (bv (bytevector-u8-set! bv idx b2)) + (else #f)) + (loop (%pk-pos p) (%pk-line p) (%pk-col p) (+ idx 1))))))))))))) ;; -------------------------------------------------------------------- ;; Char reader. @@ -526,9 +667,9 @@ ((= b 39) (die start-loc "empty char literal")) ((%newline? b) (die start-loc "newline in char literal")) ((= b 92) ; escape - (let* ((r (%read-escape src (%pk-pos p) (%pk-line p) (%pk-col p) - file start-loc '() 'char)) - ;; r is a list (val npos nline ncol) for char-mode escapes. + (let* ((r (%scan-or-fill-escape src + (%pk-pos p) (%pk-line p) (%pk-col p) + file start-loc #f 0)) (val (car r)) (pos2 (car (cdr r))) (line2 (car (cdr (cdr r)))) @@ -550,89 +691,6 @@ (die start-loc "multi-character char constant not supported"))))) ;; -------------------------------------------------------------------- -;; Escape sequence reader. -;; -;; mode = 'string : returns (%collect-string ...) tail (recurse) -;; mode = 'char : returns (val npos nline ncol) -;; -------------------------------------------------------------------- -(define (%read-escape src pos line col file start-loc acc mode) - (let* ((p (%lex-peek src pos line col)) - (b (%pk-byte p))) - (cond - ((not b) (die start-loc "unterminated escape sequence")) - ;; \xNN — 1+ hex digits (tcc.c uses 1- and 2-digit forms). - ((or (= b 120) (= b 88)) ; 'x' / 'X' - (let* ((r (%collect-while %hex? src (%pk-pos p) (%pk-line p) (%pk-col p))) - (digs (car r)) - (pos2 (car (cdr r))) - (line2 (car (cdr (cdr r)))) - (col2 (car (cdr (cdr (cdr r)))))) - (if (null? digs) - (die start-loc "expected hex digits after \\x") - (let ((val (bit-and (%digits-value digs 16) 255))) - (%escape-result mode val pos2 line2 col2 - src file start-loc acc))))) - ;; \NNN — 1..3 octal digits. - ((%octal? b) - (let* ((r (%collect-octals src pos line col 3 '())) - (digs (car r)) - (pos2 (car (cdr r))) - (line2 (car (cdr (cdr r)))) - (col2 (car (cdr (cdr (cdr r))))) - (val (bit-and (%digits-value digs 8) 255))) - (%escape-result mode val pos2 line2 col2 src file start-loc acc))) - (else - (let ((val (cond ((= b 110) 10) ; n - ((= b 116) 9) ; t - ((= b 114) 13) ; r - ((= b 92) 92) ; \\ - ((= b 39) 39) ; ' - ((= b 34) 34) ; " - ((= b 48) 0) ; 0 (already handled by octal but be safe) - ((= b 97) 7) ; \a -> BEL - ((= b 98) 8) ; \b - ((= b 102) 12) ; \f - ((= b 118) 11) ; \v - ((= b 63) 63) ; \? - (else - (die start-loc "unknown escape" (bv-of-byte b)))))) - (%escape-result mode val (%pk-pos p) (%pk-line p) (%pk-col p) - src file start-loc acc)))))) - -(define (%collect-octals src pos line col k acc) - ;; Collect up to k octal-digit bytes into acc (as a forward list of - ;; ASCII bytes), returning (digits npos nline ncol). - (cond - ((zero? k) (list (reverse acc) pos line col)) - (else - (let ((n (bytevector-length src))) - (cond - ((>= pos n) (list (reverse acc) pos line col)) - (else - (let ((b (bytevector-u8-ref src pos))) - (cond - ((%fast-byte? b) - (if (%octal? b) - (%collect-octals src (+ pos 1) line (+ col 1) - (- k 1) (cons b acc)) - (list (reverse acc) pos line col))) - (else - (let* ((p (%lex-peek src pos line col)) - (b2 (%pk-byte p))) - (if (and b2 (%octal? b2)) - (%collect-octals src (%pk-pos p) (%pk-line p) (%pk-col p) - (- k 1) (cons b2 acc)) - (list (reverse acc) pos line col)))))))))))) - -(define (%escape-result mode val pos line col src file start-loc acc) - (cond - ((eq? mode 'string) - (%collect-string src pos line col file start-loc (cons val acc))) - ((eq? mode 'char) - (list val pos line col)) - (else (die start-loc "internal: bad escape mode")))) - -;; -------------------------------------------------------------------- ;; Punctuator reader. ;; ;; Greedy longest-match against %punct-alist (cc/data.scm). The alist diff --git a/cc/pp.scm b/cc/pp.scm @@ -643,17 +643,26 @@ ;; --- pp-eval-cexpr: #if expression evaluator --- ;; Steps: resolve `defined NAME`, macro-expand the rest, treat any ;; remaining IDENT as 0, parse with recursive descent. +;; +;; Arena boundary (test_93 A→B→C pattern). Everything between the mark +;; and the rewind is scratch: `s1`/`s2`/`s3` (each a fresh token list, +;; where `s2` runs the full macro-expansion engine), plus the recursive +;; parser's (val . rest) cons cell at every level. The result is a +;; fixnum, so no pre-allocated out cell is needed — `val` survives the +;; rewind by virtue of being an immediate. The error path goes through +;; `die` (which sys-exits), so no rewind there. (define (pp-eval-cexpr toks macros) - (let* ((state (%pp-state macros '() #f 0)) - (s1 (%pp-resolve-defined toks state)) - (s2 (%pp-expand-line s1 state)) - (s3 (%pp-idents-as-zero s2))) - (let* ((p (%pp-cx-expr s3)) - (val (car p)) (rest (cdr p))) - (cond - ((null? rest) val) - (else (die (tok-loc (car rest)) "#if: garbage at end of expression" - (tok-kind (car rest)))))))) + (let ((mark (heap-mark))) + (let* ((state (%pp-state macros '() #f 0)) + (s1 (%pp-resolve-defined toks state)) + (s2 (%pp-expand-line s1 state)) + (s3 (%pp-idents-as-zero s2))) + (let* ((p (%pp-cx-expr s3)) + (val (car p)) (rest (cdr p))) + (cond + ((null? rest) (heap-rewind! mark) val) + (else (die (tok-loc (car rest)) "#if: garbage at end of expression" + (tok-kind (car rest))))))))) (define (%pp-expand-line toks state) (let ((out (make-buf-list)))