commit 257fdd5b913fd29cea9637e958304a511bc7d84b
parent 67e43e7cea97347519640114712c419b7a4f9261
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 27 Apr 2026 21:01:53 -0700
cc streaming architecture
Diffstat:
| M | cc/cc.scm | | | 638 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------- |
1 file changed, 434 insertions(+), 204 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -414,10 +414,14 @@
;; --------------------------------------------------------------------
;; pstate — parser state. Owned by parse.scm; read-only to cg.
;; --------------------------------------------------------------------
+;; iter holds a tok-iter (typically a pp-iter chained over a lex-iter).
+;; peek / peek2 / advance go through iter-peek / iter-peek2 / iter-next
+;; so the parser pulls one token at a time, with no full materialized
+;; token list. See docs/CC-STREAM.md.
(define-record-type pstate
- (%pstate toks scope tags loops fn-ctx typedefs cg)
+ (%pstate iter scope tags loops fn-ctx typedefs cg)
pstate?
- (toks ps-toks ps-toks-set!)
+ (iter ps-iter ps-iter-set!)
(scope ps-scope ps-scope-set!)
(tags ps-tags ps-tags-set!)
(loops ps-loops ps-loops-set!)
@@ -1379,7 +1383,90 @@
(else #f))))))))))))
;; --------------------------------------------------------------------
-;; lex-tokenize src file -> list of tok ending in EOF.
+;; tok-iter — streaming token source.
+;; --------------------------------------------------------------------
+;; Each pipeline layer (lex, pp, parser) wraps the layer below as a
+;; tok-iter. iter-next pulls one token at a time. iter-peek/iter-peek2
+;; cache lookahead in `buf`. iter-unget! pushes back. Live-data bound is
+;; lookahead (≤2) + per-layer state, not source length. See
+;; docs/CC-STREAM.md.
+;;
+;; Pull-fns must keep yielding EOF after the first EOF (idempotent).
+(define-record-type tok-iter
+ (%tok-iter pull-fn state buf)
+ tok-iter?
+ (pull-fn tok-iter-pull-fn)
+ (state tok-iter-state)
+ (buf tok-iter-buf tok-iter-buf-set!))
+
+(define (iter-next it)
+ (let ((b (tok-iter-buf it)))
+ (cond
+ ((null? b) ((tok-iter-pull-fn it) (tok-iter-state it)))
+ (else
+ (tok-iter-buf-set! it (cdr b))
+ (car b)))))
+
+(define (iter-peek it)
+ (let ((b (tok-iter-buf it)))
+ (cond
+ ((null? b)
+ (let ((t ((tok-iter-pull-fn it) (tok-iter-state it))))
+ (tok-iter-buf-set! it (list t))
+ t))
+ (else (car b)))))
+
+(define (iter-peek2 it)
+ (let ((b (tok-iter-buf it)))
+ (cond
+ ((null? b)
+ (let* ((t1 ((tok-iter-pull-fn it) (tok-iter-state it)))
+ (t2 ((tok-iter-pull-fn it) (tok-iter-state it))))
+ (tok-iter-buf-set! it (list t1 t2))
+ t2))
+ ((null? (cdr b))
+ (let ((t2 ((tok-iter-pull-fn it) (tok-iter-state it))))
+ (tok-iter-buf-set! it (cons (car b) (list t2)))
+ t2))
+ (else (car (cdr b))))))
+
+(define (iter-unget! it t)
+ (tok-iter-buf-set! it (cons t (tok-iter-buf it))))
+
+;; Drain to a list ending in EOF. Used by back-compat wrappers
+;; (lex-tokenize, pp-expand) so the cc-lex / cc-pp test runners can
+;; inspect the materialized stream.
+(define (iter->list it)
+ (let loop ((acc '()))
+ (let ((t (iter-next it)))
+ (cond
+ ((eq? (tok-kind t) 'EOF) (reverse (cons t acc)))
+ (else (loop (cons t acc)))))))
+
+;; --------------------------------------------------------------------
+;; list-iter — wrap an existing token list as a tok-iter. Yields each
+;; tok in turn; once exhausted, keeps yielding EOF (idempotent). The
+;; wrapped list typically already ends in EOF.
+;; --------------------------------------------------------------------
+(define-record-type list-iter-state
+ (%list-iter-state toks)
+ list-iter-state?
+ (toks lis-toks lis-toks-set!))
+
+(define (make-list-iter toks)
+ (%tok-iter %list-iter-pull (%list-iter-state toks) '()))
+
+(define (%list-iter-pull st)
+ (let ((toks (lis-toks st)))
+ (cond
+ ((null? toks) (make-tok 'EOF #f #f))
+ (else
+ (lis-toks-set! st (cdr toks))
+ (car toks)))))
+
+;; --------------------------------------------------------------------
+;; lex-iter — streaming lexer. Steady state: pos/line/col + bol? in
+;; lex-state; per-token allocation reclaimed via heap-mark/rewind.
;; --------------------------------------------------------------------
;; bol? — `#t` when no token has been emitted on the current physical
;; line yet (start of file, or only NL + whitespace seen since the last
@@ -1387,155 +1474,179 @@
;; at line-start; we forward that decision into the token stream by
;; emitting `HASH` instead of `(PUNCT hash …)` for a line-leading `#`.
;;
-;; Heap discipline: each iteration of `loop` is wrapped in a
+;; Heap discipline: each call to %lex-iter-pull is wrapped in a
;; heap-mark / heap-rewind!. All scratch the helper allocates (the
;; helper's own tok/loc, the `(cons tok 4-list)` it returns, every
;; bind_params / let* / eval_args env-cons consumed getting in and out)
-;; lives above the mark and is reclaimed before recursing. The driver
-;; ferries scalars across the rewind via the outer let bindings (set!
-;; mutates the binding cdr in place — no fresh heap), and bv contents
-;; via the sticky %lex-scratch buffer + %lex-intern pool. The only
-;; survivors per token are the freshly-built tok (48 B), loc (40 B),
-;; cons-onto-acc (16 B) and, on first sight, an interned bv.
-(define (lex-tokenize src file)
+;; lives above the mark and is reclaimed before returning. Per-token
+;; scratch (kind/val/vlen/loc-line/loc-col/npos/nline/ncol/nbol?) is
+;; allocated in the outer `let` BEFORE the mark — set! mutates those
+;; cells in place across the rewind. Bv contents survive via the
+;; sticky %lex-scratch buffer + %lex-scratch->bv (allocated post-rewind).
+;; The survivors per token are tok (48 B) + loc (40 B) + bv if any.
+(define-record-type lex-state
+ (%lex-state src file pos line col bol? done?)
+ lex-state?
+ (src lex-state-src)
+ (file lex-state-file)
+ (pos lex-state-pos lex-state-pos-set!)
+ (line lex-state-line lex-state-line-set!)
+ (col lex-state-col lex-state-col-set!)
+ (bol? lex-state-bol? lex-state-bol?-set!)
+ (done? lex-state-done? lex-state-done?-set!))
+
+(define (make-lex-iter src file)
(%lex-init!)
- (let ((pos 0) (line 1) (col 1) (acc '()) (bol? #t) (done? #f)
- (mark 0)
- ;; per-iteration scratch: kind + value (+ vlen for bv-typed),
- ;; loc-line/col (the start of the token), and npos/nline/ncol
- ;; (where the lexer stopped). All scalar — set! is safe across
- ;; the rewind because the binding cons lives in the outer let,
- ;; below the mark.
+ (%tok-iter %lex-iter-pull
+ (%lex-state src file 0 1 1 #t #f)
+ '()))
+
+(define (%lex-iter-pull st)
+ (cond
+ ((lex-state-done? st)
+ ;; Idempotent EOF: keep yielding EOF after the first one.
+ (make-tok 'EOF #f (%loc (lex-state-file st)
+ (lex-state-line st)
+ (lex-state-col st))))
+ (else (%lex-iter-step st))))
+
+(define (%lex-iter-step st)
+ (let ((src (lex-state-src st))
+ (file (lex-state-file st))
+ (pos (lex-state-pos st))
+ (line (lex-state-line st))
+ (col (lex-state-col st))
+ (bol? (lex-state-bol? st))
+ ;; Per-iteration scratch — must be allocated BEFORE the mark
+ ;; so that set!s after heap-rewind! still find live cells.
(kind #f) (val #f) (vlen 0)
(loc-line 1) (loc-col 1)
- (npos 0) (nline 1) (ncol 1) (nbol? #t))
- (let loop ()
+ (npos 0) (nline 1) (ncol 1) (nbol? #f)
+ (mark 0))
+ (set! mark (heap-mark))
+ (let* ((sw (%skip-ws-and-comments src pos line col file))
+ (pos1 (car sw))
+ (line1 (car (cdr sw)))
+ (col1 (car (cdr (cdr sw))))
+ (p (%lex-peek src pos1 line1 col1))
+ (b (%pk-byte p)))
+ (set! loc-line line1)
+ (set! loc-col col1)
+ (set! val #f) (set! vlen 0) (set! nbol? #f)
(cond
- (done? (reverse acc))
- (else
- (set! mark (heap-mark))
- (let* ((sw (%skip-ws-and-comments src pos line col file))
- (pos1 (car sw))
- (line1 (car (cdr sw)))
- (col1 (car (cdr (cdr sw))))
- (p (%lex-peek src pos1 line1 col1))
- (b (%pk-byte p)))
- (set! loc-line line1)
- (set! loc-col col1)
- (set! val #f) (set! vlen 0) (set! nbol? #f)
+ ;; EOF
+ ((not b)
+ (set! kind 'EOF)
+ (set! npos pos1) (set! nline line1) (set! ncol col1))
+ ;; Newline → NL token; next call starts at bol.
+ ((%newline? b)
+ (set! kind 'NL)
+ (set! npos (%pk-pos p))
+ (set! nline (%pk-line p))
+ (set! ncol (%pk-col p))
+ (set! nbol? #t))
+ ;; Line-leading `#`: bare `#` becomes HASH; `##` falls
+ ;; through to punctuator (lexes as `paste`).
+ ((and bol? (= b 35))
+ (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
+ (b2 (%pk-byte q)))
(cond
- ;; EOF
- ((not b)
- (set! kind 'EOF)
- (set! npos pos1) (set! nline line1) (set! ncol col1))
- ;; Newline → NL token; next iter starts at bol.
- ((%newline? b)
- (set! kind 'NL)
- (set! npos (%pk-pos p))
- (set! nline (%pk-line p))
- (set! ncol (%pk-col p))
- (set! nbol? #t))
- ;; Line-leading `#`: bare `#` becomes HASH; `##` falls
- ;; through to punctuator (lexes as `paste`).
- ((and bol? (= b 35))
- (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
- (b2 (%pk-byte q)))
- (cond
- ((and b2 (= b2 35))
- (let* ((r (%lex-read-punct src pos1 line1 col1 file))
- (tok (car r)) (rest (cdr r)))
- (set! kind 'PUNCT) (set! val (tok-value tok))
- (set! npos (car rest))
- (set! nline (car (cdr rest)))
- (set! ncol (car (cdr (cdr rest))))))
- (else
- (set! kind 'HASH)
- (set! npos (%pk-pos p))
- (set! nline (%pk-line p))
- (set! ncol (%pk-col p))))))
- ;; Identifier / keyword
- ((%ident-start? b)
- (let* ((r (%lex-read-ident src pos1 line1 col1 file))
- (tok (car r)) (rest (cdr r)))
- (set! kind (tok-kind tok))
- (cond ((eq? (tok-kind tok) 'KW)
- (set! val (tok-value tok)))
- (else
- (let ((bv (tok-value tok)))
- (set! vlen (bytevector-length bv))
- (%lex-scratch<- bv vlen))))
- (set! npos (car rest))
- (set! nline (car (cdr rest)))
- (set! ncol (car (cdr (cdr rest))))))
- ;; Number (digit start)
- ((%digit? b)
- (let* ((r (%lex-read-number src pos1 line1 col1 file))
- (tok (car r)) (rest (cdr r)))
- (set! kind 'INT) (set! val (tok-value tok))
- (set! npos (car rest))
- (set! nline (car (cdr rest)))
- (set! ncol (car (cdr (cdr rest))))))
- ;; '.' is a punctuator unless followed by a digit (float).
- ((= b 46)
- (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
- (b2 (%pk-byte q)))
- (cond
- ((and b2 (%digit? b2))
- (die (%loc file line1 col1)
- "floating-point literal not supported"))
- (else
- (let* ((r (%lex-read-punct src pos1 line1 col1 file))
- (tok (car r)) (rest (cdr r)))
- (set! kind 'PUNCT) (set! val (tok-value tok))
- (set! npos (car rest))
- (set! nline (car (cdr rest)))
- (set! ncol (car (cdr (cdr rest)))))))))
- ;; String
- ((= b 34)
- (let* ((r (%lex-read-string src pos1 line1 col1 file))
- (tok (car r)) (rest (cdr r))
- (bv (tok-value tok)))
- (set! kind 'STR)
- (set! vlen (bytevector-length bv))
- (%lex-scratch<- bv vlen)
- (set! npos (car rest))
- (set! nline (car (cdr rest)))
- (set! ncol (car (cdr (cdr rest))))))
- ;; Char
- ((= b 39)
- (let* ((r (%lex-read-char src pos1 line1 col1 file))
+ ((and b2 (= b2 35))
+ (let* ((r (%lex-read-punct src pos1 line1 col1 file))
(tok (car r)) (rest (cdr r)))
- (set! kind 'CHAR) (set! val (tok-value tok))
+ (set! kind 'PUNCT) (set! val (tok-value tok))
(set! npos (car rest))
(set! nline (car (cdr rest)))
(set! ncol (car (cdr (cdr rest))))))
- ;; Punctuator (default)
+ (else
+ (set! kind 'HASH)
+ (set! npos (%pk-pos p))
+ (set! nline (%pk-line p))
+ (set! ncol (%pk-col p))))))
+ ;; Identifier / keyword
+ ((%ident-start? b)
+ (let* ((r (%lex-read-ident src pos1 line1 col1 file))
+ (tok (car r)) (rest (cdr r)))
+ (set! kind (tok-kind tok))
+ (cond ((eq? (tok-kind tok) 'KW)
+ (set! val (tok-value tok)))
+ (else
+ (let ((bv (tok-value tok)))
+ (set! vlen (bytevector-length bv))
+ (%lex-scratch<- bv vlen))))
+ (set! npos (car rest))
+ (set! nline (car (cdr rest)))
+ (set! ncol (car (cdr (cdr rest))))))
+ ;; Number (digit start)
+ ((%digit? b)
+ (let* ((r (%lex-read-number src pos1 line1 col1 file))
+ (tok (car r)) (rest (cdr r)))
+ (set! kind 'INT) (set! val (tok-value tok))
+ (set! npos (car rest))
+ (set! nline (car (cdr rest)))
+ (set! ncol (car (cdr (cdr rest))))))
+ ;; '.' is a punctuator unless followed by a digit (float).
+ ((= b 46)
+ (let* ((q (%lex-peek src (%pk-pos p) (%pk-line p) (%pk-col p)))
+ (b2 (%pk-byte q)))
+ (cond
+ ((and b2 (%digit? b2))
+ (die (%loc file line1 col1)
+ "floating-point literal not supported"))
(else
(let* ((r (%lex-read-punct src pos1 line1 col1 file))
(tok (car r)) (rest (cdr r)))
(set! kind 'PUNCT) (set! val (tok-value tok))
(set! npos (car rest))
(set! nline (car (cdr rest)))
- (set! ncol (car (cdr (cdr rest))))))))
- (heap-rewind! mark)
- ;; Reconstruct survivors post-rewind. Bv-typed values go
- ;; through the intern pool so repeats share storage.
- (cond
- ((eq? kind 'EOF)
- (set! acc (cons (make-tok 'EOF #f (%loc file loc-line loc-col))
- acc))
- (set! done? #t))
- (else
- (let ((tok-val (cond ((eq? kind 'IDENT) (%lex-scratch->bv vlen))
- ((eq? kind 'STR) (%lex-scratch->bv vlen))
- (else val))))
- (set! acc (cons (make-tok kind tok-val
- (%loc file loc-line loc-col))
- acc))
- (set! pos npos) (set! line nline) (set! col ncol)
- (set! bol? nbol?)))
- )
- (loop))))))
+ (set! ncol (car (cdr (cdr rest)))))))))
+ ;; String
+ ((= b 34)
+ (let* ((r (%lex-read-string src pos1 line1 col1 file))
+ (tok (car r)) (rest (cdr r))
+ (bv (tok-value tok)))
+ (set! kind 'STR)
+ (set! vlen (bytevector-length bv))
+ (%lex-scratch<- bv vlen)
+ (set! npos (car rest))
+ (set! nline (car (cdr rest)))
+ (set! ncol (car (cdr (cdr rest))))))
+ ;; Char
+ ((= b 39)
+ (let* ((r (%lex-read-char src pos1 line1 col1 file))
+ (tok (car r)) (rest (cdr r)))
+ (set! kind 'CHAR) (set! val (tok-value tok))
+ (set! npos (car rest))
+ (set! nline (car (cdr rest)))
+ (set! ncol (car (cdr (cdr rest))))))
+ ;; Punctuator (default)
+ (else
+ (let* ((r (%lex-read-punct src pos1 line1 col1 file))
+ (tok (car r)) (rest (cdr r)))
+ (set! kind 'PUNCT) (set! val (tok-value tok))
+ (set! npos (car rest))
+ (set! nline (car (cdr rest)))
+ (set! ncol (car (cdr (cdr rest))))))))
+ (heap-rewind! mark)
+ ;; Reconstruct the survivor below the mark and advance state.
+ (cond
+ ((eq? kind 'EOF)
+ (lex-state-done?-set! st #t)
+ (make-tok 'EOF #f (%loc file loc-line loc-col)))
+ (else
+ (let ((tok-val (cond ((eq? kind 'IDENT) (%lex-scratch->bv vlen))
+ ((eq? kind 'STR) (%lex-scratch->bv vlen))
+ (else val))))
+ (lex-state-pos-set! st npos)
+ (lex-state-line-set! st nline)
+ (lex-state-col-set! st ncol)
+ (lex-state-bol?-set! st nbol?)
+ (make-tok kind tok-val (%loc file loc-line loc-col)))))))
+
+;; Back-compat wrapper — drains lex-iter into a list ending in EOF.
+;; Used by the cc-lex test runner.
+(define (lex-tokenize src file)
+ (iter->list (make-lex-iter src file)))
;; cc/pp.scm — token list -> expanded token list.
;; Realizes docs/CC-INTERNALS.md §pp.scm. Hide-set per C11 6.10.3.4.
;; #include rejected (CC.md §Toolchain envelope).
@@ -1559,15 +1670,27 @@
;; --- pp-state (private record) ---
;; cond-stack: list of (active? . has-taken?). Outer-active gating is
;; computed by walking the stack rather than encoding it in frames.
+;;
+;; Streaming fields (used only by make-pp-iter; #f for the legacy
+;; bounded-buffer path used by pp-eval-cexpr):
+;; lex-iter — upstream tok-iter, or #f
+;; up-pending — toks unshifted upstream (macro-expansion bodies that
+;; must be re-scanned for further expansion)
+;; out-buf — toks already dispatched but stashed for the next pull
+;; (peek-and-fuse for adjacent STRs lookahead overshoots
+;; by one tok, which lands here)
(define-record-type pp-state
- (%pp-state macros cond-stack cur-file line-delta)
+ (%pp-state macros cond-stack cur-file line-delta lex-iter up-pending out-buf)
pp-state?
(macros pps-macros pps-macros-set!)
(cond-stack pps-cond-stack pps-cond-stack-set!)
(cur-file pps-cur-file pps-cur-file-set!)
- (line-delta pps-line-delta pps-line-delta-set!))
+ (line-delta pps-line-delta pps-line-delta-set!)
+ (lex-iter pps-lex-iter)
+ (up-pending pps-up-pending pps-up-pending-set!)
+ (out-buf pps-out-buf pps-out-buf-set!))
-(define (%pp-make-state defs) (%pp-state defs '() #f 0))
+(define (%pp-make-state defs) (%pp-state defs '() #f 0 #f '() '()))
(define (%pp-active? state)
(let loop ((xs (pps-cond-stack state)))
@@ -1648,59 +1771,162 @@
(else (buf-list-push! b (car ts)) (loop (cdr ts))))))
(define (buf-list-flush b) (reverse (buf-list-xs b)))
-;; Translation phase 6: concatenate adjacent string literals. The merged
-;; token keeps the loc and hide-set of the first; values are byte-appended.
-(define (%pp-merge-adjacent-strs toks)
- (let loop ((toks toks) (acc '()))
+;; --- make-pp-iter: streaming preprocessor ---
+;; Wraps a lex-iter (or any tok-iter). Returns a tok-iter. Live data
+;; bounded by parser state + lookahead, not source length. Adjacent-STR
+;; fusion happens inline via peek-and-stash.
+(define (make-pp-iter src-iter initial-defines)
+ (let ((st (%pp-state initial-defines '() #f 0 src-iter '() '())))
+ (%tok-iter %pp-iter-pull st '())))
+
+(define (%pp-iter-pull st)
+ (let ((ob (pps-out-buf st)))
(cond
- ((null? toks) (reverse acc))
- ((and (not (null? acc))
- (eq? (tok-kind (car toks)) 'STR)
- (eq? (tok-kind (car acc)) 'STR))
- (let* ((prev (car acc))
- (cur (car toks))
- (merged (%tok 'STR
- (bytevector-append (tok-value prev) (tok-value cur))
- (tok-loc prev)
- (tok-hide prev))))
- (loop (cdr toks) (cons merged (cdr acc)))))
- (else (loop (cdr toks) (cons (car toks) acc))))))
+ ((not (null? ob))
+ (pps-out-buf-set! st (cdr ob))
+ (car ob))
+ (else (%pp-maybe-fuse-str st (%pp-dispatch-step st))))))
+
+;; --- upstream helpers ---
+;; Upstream tokens come either from up-pending (macro-expansion bodies
+;; that need re-scanning) or from the wrapped lex-iter.
+(define (%pp-pull-upstream st)
+ (let ((up (pps-up-pending st)))
+ (cond
+ ((not (null? up))
+ (pps-up-pending-set! st (cdr up))
+ (car up))
+ (else (iter-next (pps-lex-iter st))))))
-;; --- pp-expand: top-level driver ---
-(define (pp-expand toks initial-defines)
- (let ((state (%pp-make-state initial-defines))
- (out (make-buf-list)))
- (let loop ((toks toks))
+(define (%pp-peek-upstream st)
+ (let ((up (pps-up-pending st)))
+ (cond
+ ((not (null? up)) (car up))
+ (else (iter-peek (pps-lex-iter st))))))
+
+;; Push toks to the front of upstream so (car toks) is yielded next.
+(define (%pp-unshift-upstream! st toks)
+ (pps-up-pending-set! st (append toks (pps-up-pending st))))
+
+;; Collect tokens up to (not including) NL or EOF. NL is consumed; EOF
+;; is unshifted back so the main loop sees it.
+(define (%pp-collect-line-stream st)
+ (let loop ((acc '()))
+ (let ((t (%pp-pull-upstream st)))
(cond
- ((null? toks) (die #f "pp-expand: missing EOF token"))
- ((%pp-eof? (car toks))
- (cond ((not (null? (pps-cond-stack state)))
- (die (tok-loc (car toks)) "unterminated #if/#ifdef/#ifndef"))
- (else
- (buf-list-push! out (car toks))
- (%pp-merge-adjacent-strs (buf-list-flush out)))))
- ((%pp-nl? (car toks)) (loop (cdr toks)))
- ((%pp-hash? (car toks))
- (let-values (((line rest) (%pp-take-line (cdr toks))))
- (%pp-dispatch-directive (car toks) line state out)
- (loop rest)))
- (else
- (let-values (((line rest) (%pp-take-line toks)))
- (cond ((%pp-active? state)
- (%pp-emit-expanded line state out))
- (else #t))
- (loop rest)))))))
-
-;; Take tokens up to (not including) the next NL or EOF. NL is consumed;
-;; EOF is left in the stream so the driver sees it next.
-;; Returns (values line rest).
-(define (%pp-take-line toks)
- (let loop ((toks toks) (acc '()))
+ ((%pp-eof? t)
+ (%pp-unshift-upstream! st (list t))
+ (reverse acc))
+ ((%pp-nl? t) (reverse acc))
+ (else (loop (cons t acc)))))))
+
+;; Streaming arg collection for fn-like macro calls. Position is just
+;; AFTER the opening `(`. Returns the list of arg-tokenlists.
+(define (%pp-collect-args-stream st call-loc)
+ (let loop ((depth 0) (cur '()) (args '()))
+ (let ((t (%pp-pull-upstream st)))
+ (cond
+ ((%pp-eof? t)
+ (die call-loc "macro call: unterminated argument list"))
+ ((and (= depth 0) (%pp-punct? t 'rparen))
+ (cond
+ ;; Empty parens count as one empty argument; bind-args
+ ;; degenerates this back to "no args" for 0-param macros.
+ ((and (null? args) (null? cur)) (list '()))
+ (else (reverse (cons (reverse cur) args)))))
+ ((and (= depth 0) (%pp-punct? t 'comma))
+ (loop 0 '() (cons (reverse cur) args)))
+ ((%pp-punct? t 'lparen)
+ (loop (+ depth 1) (cons t cur) args))
+ ((%pp-punct? t 'rparen)
+ (loop (- depth 1) (cons t cur) args))
+ (else (loop depth (cons t cur) args))))))
+
+;; Single dispatch step. Returns one post-pp tok (skipping NLs,
+;; processing directives, expanding macros). Does NOT apply STR-fusion
+;; — that happens one layer up in %pp-iter-pull, otherwise the
+;; recursive lookahead during fusion would itself fuse further STRs
+;; and drag tokens past the run into out-buf.
+(define (%pp-dispatch-step st)
+ (let ((t (%pp-pull-upstream st)))
(cond
- ((null? toks) (values (reverse acc) toks))
- ((%pp-eof? (car toks)) (values (reverse acc) toks))
- ((%pp-nl? (car toks)) (values (reverse acc) (cdr toks)))
- (else (loop (cdr toks) (cons (car toks) acc))))))
+ ((%pp-eof? t)
+ (cond ((not (null? (pps-cond-stack st)))
+ (die (tok-loc t) "unterminated #if/#ifdef/#ifndef"))
+ (else t)))
+ ((%pp-nl? t) (%pp-dispatch-step st))
+ ((%pp-hash? t)
+ (let ((line (%pp-collect-line-stream st)))
+ (%pp-dispatch-directive t line st #f)
+ (%pp-dispatch-step st)))
+ ((not (%pp-active? st))
+ (%pp-dispatch-step st))
+ ((%pp-ident? t)
+ (let ((name (tok-value t)))
+ (cond
+ ((%pp-bv-mem? name (tok-hide t))
+ (%pp-relocate t st))
+ ((%pp-builtin? name)
+ (let ((toks (%pp-expand-builtin name (tok-loc t) st)))
+ (%pp-unshift-upstream! st toks)
+ (%pp-dispatch-step st)))
+ (else
+ (let ((m (alist-ref name (pps-macros st))))
+ (cond
+ ((not m) (%pp-relocate t st))
+ ((eq? (macro-kind m) 'obj)
+ (let ((body (%pp-prepare-body (macro-body m)
+ (cons name (tok-hide t)))))
+ (%pp-unshift-upstream! st body)
+ (%pp-dispatch-step st)))
+ (else
+ ;; fn-like or fn-vararg: peek upstream for `(`. If
+ ;; not present, pass IDENT through unchanged (no
+ ;; consumption); the next iter call will process the
+ ;; following tok normally.
+ (let ((next (%pp-peek-upstream st)))
+ (cond
+ ((not (%pp-punct? next 'lparen))
+ (%pp-relocate t st))
+ (else
+ (%pp-pull-upstream st) ; consume `(`
+ (let* ((args (%pp-collect-args-stream st (tok-loc t)))
+ (params (macro-params m))
+ (variadic? (eq? (macro-kind m) 'fn-vararg))
+ (env (%pp-bind-args params args variadic? (tok-loc t)))
+ (sub (%pp-substitute (macro-body m) env (tok-loc t)))
+ (body (%pp-prepare-body sub
+ (cons name (tok-hide t)))))
+ (%pp-unshift-upstream! st body)
+ (%pp-dispatch-step st))))))))))))
+ (else (%pp-relocate t st)))))
+
+;; Translation phase 6 (peek-and-fuse). If `cur` is STR, look at the
+;; next post-pp tok; if it's STR, fuse and repeat. Anything else gets
+;; stashed in out-buf for the next iter-next call. Lookahead goes
+;; through %pp-dispatch-step (no fusion), so a non-STR neighbor
+;; correctly terminates the run.
+(define (%pp-maybe-fuse-str st cur)
+ (cond
+ ((not (eq? (tok-kind cur) 'STR)) cur)
+ (else
+ (let loop ((cur cur))
+ (let ((next (%pp-dispatch-step st)))
+ (cond
+ ((eq? (tok-kind next) 'STR)
+ (loop (%tok 'STR
+ (bytevector-append (tok-value cur) (tok-value next))
+ (tok-loc cur)
+ (tok-hide cur))))
+ (else
+ (pps-out-buf-set! st (cons next (pps-out-buf st)))
+ cur)))))))
+
+;; Back-compat wrapper — drains pp-iter into a list ending in EOF.
+;; Used by the cc-pp test runner. Wraps the input list in a list-iter
+;; so the streaming engine sees a normal upstream.
+(define (pp-expand toks initial-defines)
+ (iter->list (make-pp-iter (make-list-iter toks) initial-defines)))
;; --- directive dispatch ---
;; pmatch-based on the directive name bv. bv literals match by equal?.
@@ -2190,7 +2416,7 @@
;; `die` (which sys-exits), so no rewind there.
(define (pp-eval-cexpr toks macros)
(let ((mark (heap-mark)))
- (let* ((state (%pp-state macros '() #f 0))
+ (let* ((state (%pp-state macros '() #f 0 #f '() '()))
(s1 (%pp-resolve-defined toks state))
(s2 (%pp-expand-line s1 state))
(s3 (%pp-idents-as-zero s2)))
@@ -3755,15 +3981,12 @@
aligned))
;; cc/parse.scm — recursive-descent + Pratt parser. Minimal scheme1.
-(define (make-pstate toks cg)
- (%pstate toks (list '()) (list '()) '() #f '() cg))
+(define (make-pstate iter cg)
+ (%pstate iter (list '()) (list '()) '() #f '() cg))
-(define (peek ps) (car (ps-toks ps)))
-(define (peek2 ps)
- (let ((r (cdr (ps-toks ps))))
- (if (null? r) (car (ps-toks ps)) (car r))))
-(define (advance ps)
- (let ((t (peek ps))) (ps-toks-set! ps (cdr (ps-toks ps))) t))
+(define (peek ps) (iter-peek (ps-iter ps)))
+(define (peek2 ps) (iter-peek2 (ps-iter ps)))
+(define (advance ps) (iter-next (ps-iter ps)))
(define (at-kw? ps s)
(pmatch (peek ps)
(($ tok? (kind KW) (value ,v)) (eq? v s))
@@ -5255,11 +5478,16 @@
(cond
((null? stk) #t)
(else
- (let ((sv (ps-toks ps)))
- (ps-toks-set! ps
- (append stk (list (make-tok 'EOF #f #f))))
+ ;; Step expr is parsed AFTER the body. Swap the
+ ;; parser's iter for a list-iter wrapping the saved
+ ;; tokens (followed by EOF so parse-expr stops);
+ ;; restore on exit.
+ (let ((sv (ps-iter ps)))
+ (ps-iter-set! ps
+ (make-list-iter
+ (append stk (list (make-tok 'EOF #f #f)))))
(parse-expr ps) (cg-pop (ps-cg ps))
- (ps-toks-set! ps sv)))))))
+ (ps-iter-set! ps sv)))))))
(scope-leave! ps) #t)
(define (collect-til-rparen ps)
@@ -5921,15 +6149,17 @@
(let* ((in-path (car args))
(out-path (car (cdr args))))
(debug-log "phase=start" "heap" (heap-usage))
+ ;; Streaming pipeline: lex → pp → parser → cg, all concurrent.
+ ;; Each stage pulls one tok at a time from upstream. Steady-state
+ ;; live data is bounded by parser/pp state, not source length.
+ ;; See docs/CC-STREAM.md.
(let* ((src (%cc-slurp in-path))
(_1 (debug-log "phase=slurp" "heap" (heap-usage)
"src-bytes" (bytevector-length src)))
- (toks (lex-tokenize src in-path))
- (_2 (debug-log "phase=lex" "heap" (heap-usage)))
- (expanded (pp-expand toks '()))
- (_3 (debug-log "phase=pp" "heap" (heap-usage)))
+ (lex-iter (make-lex-iter src in-path))
+ (pp-iter (make-pp-iter lex-iter '()))
(cg (cg-init))
- (ps (make-pstate expanded cg)))
+ (ps (make-pstate pp-iter cg)))
(parse-translation-unit ps)
(debug-log "phase=parse" "heap" (heap-usage))
(let ((out (cg-finish cg)))