commit 1f7b74c622a0e62f2737ae7a364996207026684b
parent 257fdd5b913fd29cea9637e958304a511bc7d84b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 28 Apr 2026 00:21:12 -0700
cc: redecl compatibility, update cc docs
Diffstat:
13 files changed, 284 insertions(+), 775 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -1,8 +1,4 @@
;; cc/util.scm — leaf helpers. Depends only on the scheme1 prelude.
-;;
-;; Realization of docs/CC-INTERNALS.md §util.scm. Engineers may add
-;; helpers here freely; the listed signatures are the load-bearing
-;; surface other modules call.
;; --------------------------------------------------------------------
;; bytevector helpers (scheme1 strings ARE bytevectors)
@@ -181,7 +177,7 @@
;; diagnostics + I/O
;; --------------------------------------------------------------------
(define (die loc msg . irritants)
- ;; Format per CC-CONTRACTS §2.3:
+ ;; Format:
;; <file>:<line>:<col>: error: <msg>: <irritant> <irritant> ...
;; When loc is #f, the "<file>:<line>:<col>: " prefix is omitted.
;; irritants are written via display semantics (no quoting); format's
@@ -284,13 +280,6 @@
(set! ctr (+ ctr 1))
s))))
;; cc/data.scm — record types and symbol alphabets shared across modules.
-;;
-;; Concrete realization of:
-;; docs/CC-INTERNALS.md §data.scm
-;; docs/CC-CONTRACTS.md §1
-;;
-;; Adding a record or alphabet symbol requires updating the contract
-;; doc first.
;; --------------------------------------------------------------------
;; loc — source location for diagnostics
@@ -303,13 +292,13 @@
(col loc-col)) ; fixnum
;; --------------------------------------------------------------------
-;; tok — lexer token. See CC-CONTRACTS §1.1 for kind set, §1.2 for
-;; PUNCT value symbols, §1.3 for KW value symbols.
+;; tok — lexer token.
;; --------------------------------------------------------------------
(define-record-type tok
(%tok kind value loc hide)
tok?
- (kind tok-kind) ; symbol from §1.1
+ (kind tok-kind) ; IDENT | INT | STR | CHAR | KW | PUNCT
+ ; | NL | HASH | EOF
(value tok-value) ; bv | fixnum | symbol | #f
(loc tok-loc) ; loc
(hide tok-hide)) ; list of bv (macro names already expanded)
@@ -328,8 +317,7 @@
(body macro-body)) ; list of tok
;; --------------------------------------------------------------------
-;; ctype — C type. See CC-CONTRACTS §1.4 for kind set, and
-;; CC-INTERNALS §data.scm for the ext payload table.
+;; ctype — C type.
;;
;; Fields that mutate over a ctype's lifetime:
;; size and align — set to -1/-1 on forward struct/union decl,
@@ -345,7 +333,7 @@
(align ctype-align ctype-align-set!)
(ext ctype-ext ctype-ext-set!))
-;; Interned primitive ctypes (CC-CONTRACTS §1.4). Equality is eq?.
+;; Interned primitive ctypes. Equality is eq?.
(define %t-void (%ctype 'void -1 -1 #f))
(define %t-i8 (%ctype 'i8 1 1 #f))
(define %t-u8 (%ctype 'u8 1 1 #f))
@@ -365,20 +353,23 @@
;; --------------------------------------------------------------------
;; sym — declared identifier (function, variable, typedef, …)
-;; See CC-CONTRACTS §1.7 (kind), §1.8 (storage).
+;; defined? distinguishes a forward declaration (extern fn proto, extern
+;; var) from a definition (fn body, var with initializer, tentative def
+;; without `extern`). scope-bind! merges compatible decls; only two
+;; defined? syms with the same name fire a redefinition error.
;; --------------------------------------------------------------------
(define-record-type sym
- (%sym name kind storage type slot)
+ (%sym name kind storage type slot defined?)
sym?
- (name sym-name) ; bv
- (kind sym-kind) ; symbol from §1.7
- (storage sym-storage) ; symbol from §1.8 or #f
- (type sym-type) ; ctype
- (slot sym-slot)) ; fixnum | bv | #f, per kind
+ (name sym-name) ; bv
+ (kind sym-kind) ; symbol from §1.7
+ (storage sym-storage) ; symbol from §1.8 or #f
+ (type sym-type) ; ctype
+ (slot sym-slot) ; fixnum | bv | #f, per kind
+ (defined? sym-defined?)) ; #t = definition, #f = decl-only
;; --------------------------------------------------------------------
-;; opnd — operand on cg's vstack. See CC-CONTRACTS §1.5 (kind),
-;; §1.10 (reg names).
+;; opnd — operand on cg's vstack.
;; --------------------------------------------------------------------
(define-record-type opnd
(%opnd kind type ext lval?)
@@ -390,7 +381,6 @@
;; --------------------------------------------------------------------
;; loop-ctx — entry on parser's loop/switch context stack.
-;; See CC-CONTRACTS §1.9.
;; --------------------------------------------------------------------
(define-record-type loop-ctx
(%loop-ctx kind tag has-continue?)
@@ -417,7 +407,7 @@
;; iter holds a tok-iter (typically a pp-iter chained over a lex-iter).
;; peek / peek2 / advance go through iter-peek / iter-peek2 / iter-next
;; so the parser pulls one token at a time, with no full materialized
-;; token list. See docs/CC-STREAM.md.
+;; token list.
(define-record-type pstate
(%pstate iter scope tags loops fn-ctx typedefs cg)
pstate?
@@ -469,10 +459,10 @@
(in-fn? cg-in-fn? cg-in-fn?-set!))
;; --------------------------------------------------------------------
-;; Symbol alphabets — canonical alists. See CC-CONTRACTS §1.
+;; Symbol alphabets — canonical alists.
;; --------------------------------------------------------------------
-;; CC-CONTRACTS §1.3 — keyword bytevector → keyword symbol.
+;; Keyword bytevector → keyword symbol.
(define %keyword-alist
'(;; storage
("auto" . auto) ("register" . register) ("static" . static)
@@ -503,7 +493,7 @@
("_Static_assert" . _Static_assert)
("_Complex" . _Complex) ("_Imaginary" . _Imaginary)))
-;; CC-CONTRACTS §1.2 — punctuator bytevector → punct symbol.
+;; Punctuator bytevector → punct symbol.
;; Listed longest-match-first; the lexer scans this list in order.
;; Digraphs (<: :> <% %> %: %:%:) lex to their standard equivalents.
(define %punct-alist
@@ -535,48 +525,33 @@
;; cc/lex.scm — bytestream → token list. Pure function; no I/O,
;; no macro awareness.
;;
-;; Realization of docs/CC-INTERNALS.md §lex.scm. Symbol alphabets
-;; (KW, PUNCT, tok-kind) live in cc/data.scm; do not duplicate.
-;;
-;; Owner: <unassigned>
-;;
-;; Implementation notes:
-;;
-;; - The lexer walks `src` byte-by-byte, threading (pos, line, col)
-;; explicitly through every helper (no mutable state). Each token
-;; captures its starting loc; helpers return (tok npos nline ncol).
-;; - Trigraphs and `\<newline>` line splicing are handled via a single
-;; logical-byte primitive `%lex-peek`: it advances over splices and
-;; translates trigraphs in-place, so downstream code only ever sees
-;; the "translation phase 2" stream.
-;; - Comments are stripped at the same level as whitespace.
-;; - NL tokens are emitted at every physical newline so pp can use
-;; them to terminate directives.
+;; The lexer walks `src` byte-by-byte, threading (pos, line, col)
+;; explicitly through every helper (no mutable state). Each token
+;; captures its starting loc; helpers return (tok npos nline ncol).
+;; Trigraphs and `\<newline>` line splicing are handled via a single
+;; logical-byte primitive `%lex-peek`: it advances over splices and
+;; translates trigraphs in-place, so downstream code only ever sees
+;; the "translation phase 2" stream. Comments are stripped at the
+;; same level as whitespace. NL tokens are emitted at every physical
+;; newline so pp can use them to terminate directives.
;;
;; Heap discipline (per tests/scheme1/93-heap-mark-rewind.scm):
+;; token-producing helpers wrap their inner work in a heap-mark /
+;; heap-rewind! arena. Slots that must survive the rewind (start-loc
+;; and the integer holders for npos/nline/ncol) are bound *before*
+;; the (set! mark (heap-mark)) so the let's env extensions live below
+;; the mark. The byte-run scanners' tail-call env frames and any
+;; %lex-peek 4-lists are above the mark and get reclaimed. For
+;; helpers that produce a fresh bytevector (ident, string), the bv is
+;; allocated post-rewind so it persists into the parent arena.
+;; Numeric digit runs accumulate inline via %accum-int-while.
;;
-;; - Token-producing helpers wrap their inner work in a heap-mark /
-;; heap-rewind! arena. The slots that must survive the rewind
-;; (start-loc and the integer holders for npos/nline/ncol) are bound
-;; *before* the (set! mark (heap-mark)) so the let's env extensions
-;; live below the mark. The byte-run scanners' tail-call env frames
-;; and any %lex-peek 4-lists are above the mark and get reclaimed.
-;; For helpers that produce a fresh bytevector (ident, string), the
-;; bv is allocated post-rewind so it persists into the parent arena.
-;; - Numeric digit runs accumulate their value inline via
-;; %accum-int-while; they no longer materialize a per-byte cons list
-;; and then a separate %digits-value walk.
-;; - lex-tokenize wraps each token-emitting iteration in an outer
-;; heap-mark / heap-rewind!. The helper allocates its own tok+loc+bv
-;; above this outer mark; the driver reads the scalar fields and
-;; copies any bv contents into %lex-scratch (sticky, pre-mark) before
-;; rewinding. Post-rewind it rebuilds a fresh tok+loc and a fresh bv
-;; (for IDENT/STR) sized to the actual content. Surface effect on
-;; the kitchen-sink test: per-token heap delta drops from ~12 KB
-;; (interpreter overhead, env-extension cons cells, eval_args lists,
-;; tail-rec named-let frames in the inner scanners, all sticky for
-;; the duration of lex) to ~840 B (tok + loc + acc-cons + bv +
-;; the irreducible eval-cost of the post-rewind reconstruction).
+;; %lex-iter-pull wraps each token-emitting iteration in an outer
+;; heap-mark / heap-rewind!. The helper allocates its own tok+loc+bv
+;; above this outer mark; the driver reads the scalar fields and
+;; copies any bv contents into %lex-scratch (sticky, pre-mark) before
+;; rewinding. Post-rewind it rebuilds a fresh tok+loc and a fresh bv
+;; (for IDENT/STR) sized to the actual content.
;; --------------------------------------------------------------------
;; Cross-rewind transport for IDENT / STR bv values.
@@ -587,15 +562,12 @@
;; ident/string content) by copying back out of scratch. The whole
;; lex run shares this one buffer.
;;
-;; We considered a deduplicating *intern pool* (one bv per unique
-;; ident, repeats share storage). With scheme1's interpreter, walking
-;; the pool cons-list per lookup costs ~50–150 B per step in
-;; bind_params/eval_args/named-let env-extension overhead. Even with
-;; 16-way bucketing, a 50 KB tcc.flat.c prefix saw walk costs blow
-;; past the bv-allocation savings (60+ MB heap vs 17 MB un-interned).
-;; The cleanest path until scheme1 gets a vector primitive is no
-;; intern; if a downstream eq?-fast-path is needed later, layer it
-;; atop %lex-scratch with a hash backed by a u8 bytevector.
+;; Why scratch, not a deduplicating intern pool: under scheme1's
+;; interpreter, walking a cons-list pool per lookup costs ~50–150 B
+;; per step in bind_params/eval_args/named-let env-extension
+;; overhead. Even 16-way bucketing has the walk cost outpace the
+;; bv-allocation savings until scheme1 grows a vector primitive
+;; (an O(1) bucket lookup without interpreter overhead).
;; --------------------------------------------------------------------
(define %lex-scratch-cap 65536)
(define %lex-scratch (make-bytevector %lex-scratch-cap 0))
@@ -1279,7 +1251,7 @@
;; --------------------------------------------------------------------
;; Punctuator reader.
;;
-;; Greedy longest-match against %punct-alist (cc/data.scm). The alist
+;; Greedy longest-match against %punct-alist. The alist
;; is already ordered longest-first. We additionally bucket entries by
;; their first byte so %lex-read-punct only loops over the small set of
;; patterns that can start at the current source byte.
@@ -1388,8 +1360,7 @@
;; Each pipeline layer (lex, pp, parser) wraps the layer below as a
;; tok-iter. iter-next pulls one token at a time. iter-peek/iter-peek2
;; cache lookahead in `buf`. iter-unget! pushes back. Live-data bound is
-;; lookahead (≤2) + per-layer state, not source length. See
-;; docs/CC-STREAM.md.
+;; lookahead (≤2) + per-layer state, not source length.
;;
;; Pull-fns must keep yielding EOF after the first EOF (idempotent).
(define-record-type tok-iter
@@ -1433,9 +1404,9 @@
(define (iter-unget! it t)
(tok-iter-buf-set! it (cons t (tok-iter-buf it))))
-;; Drain to a list ending in EOF. Used by back-compat wrappers
-;; (lex-tokenize, pp-expand) so the cc-lex / cc-pp test runners can
-;; inspect the materialized stream.
+;; Drain an iter to a list ending in EOF. Used by lex-tokenize /
+;; pp-expand so the cc-lex / cc-pp test runners can inspect the
+;; materialized stream.
(define (iter->list it)
(let loop ((acc '()))
(let ((t (iter-next it)))
@@ -1643,15 +1614,14 @@
(lex-state-bol?-set! st nbol?)
(make-tok kind tok-val (%loc file loc-line loc-col)))))))
-;; Back-compat wrapper — drains lex-iter into a list ending in EOF.
-;; Used by the cc-lex test runner.
+;; Drain a lex-iter into a list ending in EOF, for the cc-lex test
+;; runner. Production callers chain make-lex-iter directly.
(define (lex-tokenize src file)
(iter->list (make-lex-iter src file)))
-;; cc/pp.scm — token list -> expanded token list.
-;; Realizes docs/CC-INTERNALS.md §pp.scm. Hide-set per C11 6.10.3.4.
+;; cc/pp.scm — preprocessor. Hide-set per C11 6.10.3.4.
;; #include rejected (CC.md §Toolchain envelope).
-;; --- helpers (TODO: promote to util.scm if shared more broadly) ---
+;; --- helpers ---
(define (%pp-bv-mem? x xs)
(cond ((null? xs) #f)
((bv= x (car xs)) #t)
@@ -1671,8 +1641,8 @@
;; cond-stack: list of (active? . has-taken?). Outer-active gating is
;; computed by walking the stack rather than encoding it in frames.
;;
-;; Streaming fields (used only by make-pp-iter; #f for the legacy
-;; bounded-buffer path used by pp-eval-cexpr):
+;; Streaming fields drive make-pp-iter; the bounded-buffer path used
+;; by pp-eval-cexpr leaves them at #f / '().
;; lex-iter — upstream tok-iter, or #f
;; up-pending — toks unshifted upstream (macro-expansion bodies that
;; must be re-scanned for further expansion)
@@ -1922,9 +1892,9 @@
(pps-out-buf-set! st (cons next (pps-out-buf st)))
cur)))))))
-;; Back-compat wrapper — drains pp-iter into a list ending in EOF.
-;; Used by the cc-pp test runner. Wraps the input list in a list-iter
-;; so the streaming engine sees a normal upstream.
+;; Drain a pp-iter into a list ending in EOF, for the cc-pp test
+;; runner. The input token list becomes the upstream via make-list-iter.
+;; Production callers chain make-pp-iter directly over a make-lex-iter.
(define (pp-expand toks initial-defines)
(iter->list (make-pp-iter (make-list-iter toks) initial-defines)))
@@ -2321,8 +2291,8 @@
(%tok 'PUNCT 'comma (%loc "<expand>" 0 0) '()))
;; Body substitution: walk body; replace param IDENTs with arg toks,
-;; handle `#param` (stringize) and `a##b` (paste). For v1 we do not
-;; pre-expand args before substitution; the rescan after substitution
+;; handle `#param` (stringize) and `a##b` (paste). Args are not
+;; pre-expanded before substitution; the rescan after substitution
;; catches the same expansions in practice.
(define (%pp-substitute body env call-loc)
(let loop ((body body) (out '()))
@@ -2562,14 +2532,13 @@
(else (values v (cdr r))))))
(else (die (tok-loc (car toks)) "#if: unexpected token" (tok-kind (car toks))))))
;; cc/cg.scm — codegen state and emission API.
-;; Realization of docs/CC-INTERNALS.md §cg.scm.
-;; Conversion split per CC-CONTRACTS §4: parse owns promotion etc;
-;; cg owns sign extension, signed/unsigned dispatch, pointer scaling.
+;; Conversion split: parse owns promotion etc; cg owns sign extension,
+;; signed/unsigned dispatch, pointer scaling.
;;
;; Output uses libp1pp's structured macros (%fn, %ifelse_nez,
;; %loop_tag, %break, %continue) per docs/LIBP1PP.md.
;;
-;; Frame layout (CC-CONTRACTS §3):
+;; Frame layout:
;; [sp + 0 .. staging*8) outgoing-arg staging
;; [sp + staging*8 ..) locals + spilled vstack values
;; Slot offsets are emitted symbolically as `(+ %<fn>__SO N)` so the
@@ -2648,7 +2617,7 @@
;; gather bytes via %lb + shli/or; stores scatter via shri/%sb.
;; Signed loads (i16/i32) sign-extend via shli/sari to canonical
;; 64-bit form.
-;; 8 (or anything else for now): %ld / %st.
+;; 8 (and any other size): %ld / %st.
;; Scratch convention: helpers may clobber t1; callers never pass
;; reg=t1.
@@ -2833,7 +2802,6 @@
;; clobber a0/a1, so falling straight through to cc__main forwards
;; them unchanged. The 16-byte frame is just enough for %enter's
;; saved-fp/lr to fit; cc__main builds its own frame on top.
- ;; (CC-CONTRACTS §J.1, §5.4.)
(let ((tb (cg-text cg)))
(buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to cc__main\n")
(buf-push! tb "%fn(p1_main, 16, {\n")
@@ -2908,8 +2876,8 @@
(bv-cat (list "%st(a0, sp, "
(%cg-slot-expr cg ss) ")\n")))))
(else (%cg-fn-set! cg '%fn-sret-slot #f))))
- ;; params per CC-CONTRACTS §3.1. With sret, explicit arg i lives at
- ;; ABI position (i+1): args 0..2 in a1..a3, args 3+ in slot (i-3).
+ ;; With sret, explicit arg i lives at ABI position (i+1): args 0..2
+ ;; in a1..a3, args 3+ in slot (i-3).
(let* ((sret-shift (if (%cg-fn-get cg '%fn-sret?) 1 0))
(spill (lambda (abi off)
(cond
@@ -2943,7 +2911,7 @@
(nm (car p))
(ty (cdr p))
(off (cg-alloc-slot cg 8 8))
- (psym (%sym nm 'param #f ty off)))
+ (psym (%sym nm 'param #f ty off #t)))
(spill (+ idx sret-shift) off)
(walk (cdr ps) (+ idx 1) (cons (cons nm psym) out)
(or first-slot off))))))))
@@ -3060,9 +3028,9 @@
;; (or label, or indirect-marked frame) backing the lval keeps existing
;; until the function ends. For rvals it duplicates the descriptor of
;; the spilled value; both copies refer to the same already-emitted
-;; storage. CC-CONTRACTS §4.1: used for `lhs += rhs` and `++lhs` to
-;; preserve the lhs across a `cg-load` so the subsequent `cg-assign`
-;; still has its address.
+;; storage. Used for `lhs += rhs` and `++lhs` to preserve the lhs
+;; across a `cg-load` so the subsequent `cg-assign` still has its
+;; address.
(define (cg-dup cg)
(let ((p (cg-top cg))) (cg-push cg p) p))
@@ -3384,8 +3352,8 @@
(else (cg-push cg p)))))
(define (cg-arith-conv cg)
- ;; Usual arithmetic conversions. CC-CONTRACTS §4.2: applies to
- ;; arithmetic operands. When either operand is a pointer (or array,
+ ;; Usual arithmetic conversions on arithmetic operands. When either
+ ;; operand is a pointer (or array,
;; which behaves as a pointer in arithmetic), the pair is a
;; pointer-arith case — leave the types alone so cg-binop can detect
;; the ptr operand and apply the right scaling.
@@ -3536,8 +3504,8 @@
(define (cg-assign cg)
;; Pops rhs, pops lhs, casts rhs to lhs's type (parser cannot peek
- ;; deeper than vstack top to do this itself — CC-CONTRACTS §4.2),
- ;; emits the store, pushes the assigned value as the result rval.
+ ;; deeper than vstack top to do this itself), emits the store, pushes
+ ;; the assigned value as the result rval.
(let* ((rhs0 (cg-pop cg))
(lhs (cg-pop cg))
(ty (opnd-type lhs)))
@@ -3714,8 +3682,7 @@
(define (cg-loop cg head-thunk body-thunk)
;; body-thunk receives the loop tag as its argument; parser uses
- ;; that tag for cg-break / cg-continue inside the body. CC-CONTRACTS
- ;; §1.9 / §3.3.
+ ;; that tag for cg-break / cg-continue inside the body.
(let ((tag (%cg-fresh-loop-tag cg)))
(%cg-emit-many cg (list "%loop_tag(" tag ", {\n"))
(head-thunk)
@@ -3748,7 +3715,7 @@
;;
;; Limitation: only first 4 incoming args (named + variadic) live in
;; the save area; variadic args at index >= 4 need LDARG and are not
-;; yet supported. See punchlist §G.2 for the gap.
+;; yet supported.
;; --------------------------------------------------------------------
(define (%cg-vararg-first-slot cg)
(let ((s (%cg-fn-get cg '%fn-vararg-first-slot)))
@@ -3824,7 +3791,7 @@
0)
;; --------------------------------------------------------------------
-;; Labels and unconditional goto (§F.4 / CC-CONTRACTS §5.3).
+;; Labels and unconditional goto.
;; user_<name> namespace keeps the user's label space disjoint from
;; the compiler-internal ::ret and ::lbl_<n>. Labels resolve through
;; libp1pp's %scope mechanism, so forward references inside the same
@@ -4012,10 +3979,63 @@
(define (scope-leave! ps)
(ps-scope-set! ps (cdr (ps-scope ps)))
(ps-tags-set! ps (cdr (ps-tags ps))))
+(define (ctype-compat? a b)
+ (cond
+ ((eq? a b) #t)
+ ((not (eq? (ctype-kind a) (ctype-kind b))) #f)
+ (else
+ (let ((k (ctype-kind a)))
+ (cond
+ ((eq? k 'ptr) (ctype-compat? (ctype-ext a) (ctype-ext b)))
+ ((eq? k 'arr)
+ (let ((ea (ctype-ext a)) (eb (ctype-ext b)))
+ (and (ctype-compat? (car ea) (car eb))
+ (or (= (cdr ea) (cdr eb))
+ (< (cdr ea) 0) (< (cdr eb) 0)))))
+ ((eq? k 'fn) (%fn-ctype-compat? (ctype-ext a) (ctype-ext b)))
+ ((or (eq? k 'struct) (eq? k 'union) (eq? k 'enum)) #f)
+ (else #t))))))
+
+(define (%fn-ctype-compat? a b)
+ (and (ctype-compat? (car a) (car b))
+ (eq? (car (cddr a)) (car (cddr b)))
+ (%fn-params-compat? (cadr a) (cadr b))))
+
+(define (%fn-params-compat? pa pb)
+ (cond
+ ((and (null? pa) (null? pb)) #t)
+ ((or (null? pa) (null? pb)) #f)
+ ((ctype-compat? (cdar pa) (cdar pb))
+ (%fn-params-compat? (cdr pa) (cdr pb)))
+ (else #f)))
+
+(define (sym-merge old new)
+ (cond
+ ((not (eq? (sym-kind old) (sym-kind new)))
+ (die #f "redecl: kind mismatch" (sym-name old)))
+ ((not (ctype-compat? (sym-type old) (sym-type new)))
+ (die #f "redecl: type mismatch" (sym-name old)))
+ ((eq? (sym-kind old) 'typedef) old)
+ ((eq? (sym-kind old) 'enum-const)
+ (cond ((equal? (sym-slot old) (sym-slot new)) old)
+ (else (die #f "enum-const redecl" (sym-name old)))))
+ ((and (sym-defined? old) (sym-defined? new))
+ (die #f "redefinition" (sym-name old)))
+ ((sym-defined? new) new)
+ (else old)))
+
(define (scope-bind! ps n s)
- (let* ((f (ps-scope ps)) (top (car f)) (r (cdr f)))
- (if (alist-ref n top) (die #f "dup decl" n)
- (ps-scope-set! ps (cons (alist-set n s top) r)))))
+ (let* ((f (ps-scope ps)) (top (car f)) (r (cdr f))
+ (old (alist-ref n top)))
+ (cond
+ ((not old)
+ (ps-scope-set! ps (cons (alist-set n s top) r)))
+ (else
+ (let ((merged (sym-merge old s)))
+ (cond
+ ((eq? merged old) #t)
+ (else
+ (ps-scope-set! ps (cons (alist-set n merged top) r)))))))))
(define (scope-lookup ps n)
(let loop ((f (ps-scope ps)))
(cond ((null? f) #f)
@@ -4210,7 +4230,7 @@
(advance ps) (parse-const-int ps))
(else nv))))
(scope-bind! ps nm
- (%sym nm 'enum-const #f %t-i32 val))
+ (%sym nm 'enum-const #f %t-i32 val #t))
(cond ((at-punct? ps 'comma) (advance ps))
((at-punct? ps 'rbrace) #t)
(else (die (tok-loc (peek ps)) "enum")))
@@ -4571,7 +4591,8 @@
(else (die (tok-loc t) "const-expr: bad operand"
(tok-value t))))))
-;; Back-compat wrapper: callers that just want the integer value.
+;; Convenience: returns the integer value alone (callers that don't
+;; need the type half of parse-const-expr's (value . ctype) result).
(define (parse-const-int ps) (car (parse-const-expr ps)))
(define (parse-declarator ps base)
@@ -4699,11 +4720,11 @@
((not n) (die #f "no name"))
((eq? sto 'typedef)
(typedef-add! ps n)
- (scope-bind! ps n (%sym n 'typedef #f ty #f)))
+ (scope-bind! ps n (%sym n 'typedef #f ty #f #t)))
((ctype-is-fn? ty)
(scope-bind! ps n
(%sym n 'fn (or sto 'extern) ty
- (bytevector-append "cc__" n))))
+ (bytevector-append "cc__" n) #f)))
;; §I: block-scope `static` routes to a global with a name mangled
;; on the enclosing function so two functions can each have their
;; own `static int n;` without colliding. The sym's NAME holds the
@@ -4714,7 +4735,7 @@
(let* ((fname (fn-ctx-name (ps-fn-ctx ps)))
(mangled (bytevector-append fname "__" n))
(sm (%sym mangled 'var 'static ty
- (bytevector-append "cc__" mangled))))
+ (bytevector-append "cc__" mangled) #t)))
(scope-bind! ps n sm)
(cond
((at-punct? ps 'assign)
@@ -4724,11 +4745,17 @@
(else
(cond
((not (ps-fn-ctx ps))
- (let ((sm (%sym n 'var (or sto 'extern) ty
- (bytevector-append "cc__" n))))
+ ;; defined? = #f for `extern` decls without `=`; everything else
+ ;; (initializer present, or no `extern` keyword) is a definition
+ ;; or tentative def, so the merge logic in scope-bind! treats it
+ ;; as the authoritative binding.
+ (let* ((init? (at-punct? ps 'assign))
+ (def? (or init? (not (eq? sto 'extern))))
+ (sm (%sym n 'var (or sto 'extern) ty
+ (bytevector-append "cc__" n) def?)))
(scope-bind! ps n sm)
(cond
- ((at-punct? ps 'assign)
+ (init?
(advance ps)
(cg-emit-global (ps-cg ps) sm
(parse-init-global ps ty)))
@@ -4738,7 +4765,7 @@
(let* ((sz (max (ctype-size ty) 1))
(al (max (ctype-align ty) 1))
(sl (cg-alloc-slot (ps-cg ps) sz al))
- (sm (%sym n 'var (or sto 'auto) ty sl)))
+ (sm (%sym n 'var (or sto 'auto) ty sl #t)))
(scope-bind! ps n sm)
(cond
((at-punct? ps 'assign)
@@ -4750,9 +4777,9 @@
(eq? (tok-kind (peek ps)) 'STR)))
(parse-init-local-aggregate ps sm ty))
;; Struct/union initializer from a non-brace expression
- ;; (typically a function call returning by-value, per
- ;; Stream A1). The expr produces a struct lval; we copy
- ;; bytes into the destination slot.
+ ;; (typically a function call returning by-value). The
+ ;; expr produces a struct lval; we copy bytes into the
+ ;; destination slot.
((or (eq? (ctype-kind ty) 'struct)
(eq? (ctype-kind ty) 'union))
(cg-push-sym (ps-cg ps) sm)
@@ -5208,11 +5235,11 @@
(define (%parse-init-local-struct-list ps sm base-off ty)
;; Track each initialized field by name in `seen`; at the closing brace
- ;; zero every field NOT in `seen`. The previous design tracked positional
- ;; "remaining fields" via `rest`, which silently dropped earlier fields
- ;; when a designator jumped backwards (e.g. `{.y = 5}` left `x`
- ;; uninitialized). C requires every unmentioned member of an aggregate
- ;; with at least one designator/initializer to be zeroed (C11 §6.7.9 ¶21).
+ ;; zero every field NOT in `seen`. Tracking by name (rather than
+ ;; positional "remaining" fields) handles a designator jumping
+ ;; backwards correctly — e.g. `{.y = 5}` must still zero `x`.
+ ;; C requires every unmentioned member of an aggregate with at least
+ ;; one designator/initializer to be zeroed (C11 §6.7.9 ¶21).
(let ((fields (%init-struct-fields ty)))
(let lp ((rest fields) (seen '()))
(cond
@@ -5300,11 +5327,12 @@
;; full heap cost only for functions that genuinely mutate global state.
(define (parse-fn-body ps name dt)
;; Hoist the recursive-binding scope-bind! out of the marked region
- ;; so the fn-sym cons survives rewind.
- (cond ((not (scope-lookup ps name))
- (scope-bind! ps name
- (%sym name 'fn 'extern dt
- (bytevector-append "cc__" name)))))
+ ;; so the fn-sym cons survives rewind. defined?=#t marks this as a
+ ;; definition so a prior forward decl gets overwritten and a second
+ ;; definition with the same name fires sym-merge's redefinition error.
+ (scope-bind! ps name
+ (%sym name 'fn 'extern dt
+ (bytevector-append "cc__" name) #t))
(let* ((cg (ps-cg ps))
(mark (heap-mark))
(globals-before (cg-globals cg))
@@ -5422,8 +5450,8 @@
(advance ps) (parse-stmt ps))
(else #t)))))
-;; cg-loop's body-thunk now receives the tag from cg (CC-CONTRACTS
-;; §3.3); the parser threads it into break/continue via loop-ctx.
+;; cg-loop's body-thunk receives the tag from cg; the parser threads
+;; it into break/continue via loop-ctx.
(define (parse-while-stmt ps)
(expect-kw ps 'while)
@@ -5543,7 +5571,7 @@
(cond
;; Struct/union return — leave the source as a struct lval;
;; cg-return copies bytes into the function's return slot.
- ;; (Stream A1, P1.md §Arguments and return values.)
+ ;; (P1.md §Arguments and return values.)
((or (eq? rk 'struct) (eq? rk 'union))
(parse-expr ps)
(cg-return (ps-cg ps)))
@@ -5857,7 +5885,7 @@
;; pass, and array decay all chain through the existing primitives
;; (cg-take-addr / cg-push-field / cg-decay-array via rval!).
;;
-;; File-scope literals are out of scope (Stream B, docs/CC-PARALLEL-PLAN.md).
+;; File-scope literals are out of scope.
;; --------------------------------------------------------------------
(define (parse-compound-literal ps ty)
(cond
@@ -5870,7 +5898,7 @@
;; sym-slot at its top-level entry to seed base-off; the
;; recursive helpers thread `sm` along but never read other
;; fields. The name is unbound and never enters scope.
- (sm (%sym "__cl" 'var 'auto ty sl)))
+ (sm (%sym "__cl" 'var 'auto ty sl #t)))
(cond
((or (eq? (ctype-kind ty) 'arr)
(eq? (ctype-kind ty) 'struct)
@@ -6094,12 +6122,9 @@
(cg-load (ps-cg ps)))
(else #t))))
;; cc/main.scm — driver. Argv, file I/O, ties phases together.
-;;
-;; Realization of docs/CC-INTERNALS.md §main.scm.
;; --------------------------------------------------------------------
-;; CLI: cc <input.c> <output.P1pp>
-;; (-o flag and -D flags are deferred — phase-1 runner doesn't need them.)
+;; CLI: cc [--cc-debug] <input.c> <output.P1pp>
;;
;; scheme1 passes (argv) as a list of bvs; argv[0] is "scheme1", argv[1]
;; is the catm'd compiler source path, argv[2..] are the user-facing
@@ -6152,7 +6177,6 @@
;; Streaming pipeline: lex → pp → parser → cg, all concurrent.
;; Each stage pulls one tok at a time from upstream. Steady-state
;; live data is bounded by parser/pp state, not source length.
- ;; See docs/CC-STREAM.md.
(let* ((src (%cc-slurp in-path))
(_1 (debug-log "phase=slurp" "heap" (heap-usage)
"src-bytes" (bytevector-length src)))
diff --git a/docs/CC-PARALLEL-PLAN.md b/docs/CC-PARALLEL-PLAN.md
@@ -1,233 +0,0 @@
-# CC parallel work plan
-
-Coordination doc for the next batch of `tests/cc/` features. Defines
-four parallel work streams, the design seams between them, and the
-sequencing required where streams aren't fully independent.
-
-Companion to [CC-PUNCHLIST.md](CC-PUNCHLIST.md) (the per-item
-checklist) and [CC-INTERNALS.md](CC-INTERNALS.md) (module layering
-and the cg-fixture-first workflow). ABI choices below cite
-[P1.md §Arguments and return values](P1.md).
-
-## Tests in scope
-
-Currently failing on aarch64:
-
-| Test | Stream | Failure |
-|---|---|---|
-| cc/082-union-basic | D | exit 1 — union field offsets bumped like struct |
-| cc/087-sizeof-noeval | D | exit 2 — sizeof(x++) actually increments x |
-| cc/111-struct-ret-1word | A1 | compile-fail — return-by-value struct unhandled |
-| cc/112-struct-ret-2word | A1 | compile-fail — no two-word return path |
-| cc/113-struct-ret-3word | A2 | compile-fail — no indirect-result path |
-| cc/114-struct-ret-many-args | A3 | exit 2 — silently truncates |
-| cc/115-struct-ret-3word-many-args | A3 | exit 2 — silently truncates |
-| cc/116-struct-ret-vararg | A3 | exit 2 — silently truncates |
-| cc/117-compound-literal | B | compile-fail — `(T){…}` unparsed |
-| cc/118-const-expr | C | compile-fail — const-expr surface incomplete |
-
-## Stream layout
-
-```
- ┌─ A1: 111 + 112 (one-word + two-word direct)
-Stream A (sequential) ───┤
-struct-return ABI └─ A2: 113 (indirect-result)
- │
- ▼
- ┌─ A3a: 114 (sret + stack-staged args) ┐
- ├─ A3b: 115 (sret + 3-word + many args) │ parallel
- └─ A3c: 116 (sret + variadic save area) ┘
-
-Stream B (independent, t=0): 117 compound literals
-Stream C (independent, t=0): 118 const-expr evaluator
-Stream D (independent, t=0): 082 union offsets + 087 sizeof no-emit
-```
-
-`t=0`: A1, B, C, D fan out together. A2 starts when A1 lands. A3a/b/c
-fan out when A2 lands. Total wall time ≈ A1 + A2 + max(A3).
-
-## Stream A — Struct-return ABI
-
-Implements the three result conventions from P1.md §Arguments:
-
-| Width | Convention | a0 | a1 | Args 0..3 |
-|---|---|---|---|---|
-| ≤ 8B | one-word direct | result word 0 | (caller arg 1) | a0..a3 |
-| 9–16B | two-word direct | result word 0 | result word 1 | a0..a3 |
-| > 16B | indirect-result | result-buffer ptr | (callee arg 0) | a1..a3 (shifted) |
-
-In the indirect convention, incoming stack-arg slot 0 corresponds to
-explicit arg word 3 (not 4). `LDARG` indexing in the variadic save area
-must respect this shift.
-
-### A1 — one-word and two-word direct
-
-- **cg primitives:** extend `cg-fn-end` to load the function's return
- slot into a0 (≤8B) or a0/a1 (9–16B) per the return ctype's size.
- Extend `cg-call`'s receive side to allocate a fresh frame slot via
- `cg-alloc-slot` (sized to the return ctype) and store back from
- a0[/a1] before pushing a frame-lval.
-- **Uniform path:** ≤8B and 9–16B both go through a frame slot. No
- register-only fast path. Simpler cg, identical parser surface.
-- **Receive-area lifetime:** fresh slot per call site, allocated by
- cg-call. Required so chained `make_triple(...).c` (cc/113:32) and
- `ret1(99).x` (cc/111:26) don't alias across consecutive calls.
-- **Parser:** `parse-return-stmt` accepts a struct-typed expression and
- emits a per-byte copy from the source lval into the function's
- return slot. `parse-postfix-rest` accepts struct-typed call results
- as lvals so `.field` and `&` chain naturally.
-- **CC-CONTRACTS update:** §3.2 currently asserts a single 8-byte
- return slot. Replace with a pointer to P1.md §Arguments and a note
- that the cg lowers all three conventions.
-- **Fixtures:** `tests/cc-cg/70-struct-ret-1word.scm`,
- `71-struct-ret-2word.scm` first; then unblock `tests/cc/111`, `112`.
-
-### A2 — indirect-result
-
-- **cg primitives:** when return ctype size > 16, `cg-fn-begin/v`
- treats arg slot 0 as the sret pointer, parameter slots shift by one
- register. `cg-fn-end` does no a0 store (callee already wrote through
- *a0 during the return-stmt copy); the convention's "a0 holds the
- same pointer on return" is automatic since a0 hasn't been clobbered.
-- **Caller side:** `cg-call` detects sret-eligible return type; before
- emitting the call, materializes the receive-slot's address into a0,
- shifts ordinary args from a0..a3 → a1..a3 + stack, with stack slot 0
- now holding arg word 3.
-- **Variadic interaction is deferred to A3c.** A2 itself targets only
- cc/113 (no varargs, ≤4 named args).
-- **Fixtures:** `tests/cc-cg/72-struct-ret-3word.scm`; then `tests/cc/113`.
-
-### A3 — sret compositions (parallel)
-
-Each agent picks up one test, mostly composing infrastructure A1+A2
-already shipped:
-
-- **A3a — cc/114** (sret-pair + 8 stack-staged args): the two-word
- return doesn't need sret; the 8 args do exercise stack staging
- (P1.md §Incoming stack-argument area). Validates that A1's
- two-word receive composes with the existing stack-stage path.
-- **A3b — cc/115** (sret-3word + 8 stack-staged args): with sret in
- a0, args 0–2 live in a1–a3 and args 3–7 stage to stack slots 0–4.
- Pure indexing test for A2's shift.
-- **A3c — cc/116** (sret + variadic save area): `cg-fn-begin/v`'s
- 16-slot save window indexes from incoming arg word 0. When the fn
- uses indirect-result, slot 0 is arg word 3 (not 4) — A3c adjusts the
- windowing accordingly. `__builtin_va_start` must skip the sret
- pointer; in practice that's automatic if the named-arg count
- threading is correct, since the sret pointer occupies a0 and the
- named args start at a1.
-
-## Stream B — Compound literals (117)
-
-C99 §6.5.2.5: `(T){ init-list }` as a postfix expression. Block-scope
-only; file-scope literals `die` explicitly.
-
-- **Parser:** detect `(T){` lookahead in `parse-cast-or-unary`. Parse
- the typename via `parse-decl-spec` + `parse-declarator`, then call
- the existing `parse-init-local-aggregate` against a fresh slot from
- `cg-alloc-slot`. Push a frame-lval typed as T (or T[N]).
-- **Lvalue contract:** the literal is an lvalue, so `&literal`,
- `literal.field`, and `literal[i]` all work via existing
- `cg-take-addr` / `cg-push-field` / `cg-decay-array` paths.
-- **Lifetime:** frame slot ⇒ enclosing block, automatic. Matches C99.
-- **Reuse:** no new cg primitives. The fixture surface (positional,
- designated, partial-init zero-fill, trailing comma, array decay,
- byval struct arg) is all already covered by Stream E in
- CC-PUNCHLIST.
-
-## Stream C — Const-expr evaluator (118)
-
-Adds `parse-const-expr ps → (value . ctype)`, a self-contained walker
-that never touches cg.
-
-- **Operand surface:** integer/character literal, enum constant,
- `sizeof(typename)`, unary `+ - ~ !`, binary `+ - * / % << >> & | ^`,
- compare `< <= > >= == !=`, logical `&& ||` (short-circuit), ternary
- `?:`, cast to integer type, parenthesization. Anything else `die`s.
-- **Width-aware return:** the `(int)(unsigned char)257 == 1` case
- requires the cast to truncate at u8 width. Bare fixnum loses this;
- the (value . ctype) tuple keeps it.
-- **Sizeof arm:** for now, only `sizeof(TYPENAME)` — the only form
- exercised by 118. If a value-expression form surfaces later, grow a
- scope-lookup arm; still no cg interaction.
-- **Wiring (replace existing `parse-const-int` call sites):**
- `parse-enum-spec`, `parse-decl-suf-cont`'s `[]` arm,
- `parse-init-global`'s scalar branch, `parse-switch-stmt`'s case
- label, local array bound in `parse-stmt`.
-- **No interaction with Stream D.** See "Sizeof split" below.
-
-## Stream D — Surgical fixes (082 + 087)
-
-Single agent — both ~30-line changes.
-
-### 082 — union field offsets
-
-`parse-struct-fields` (cc.scm:3634) advances offset after every field
-regardless of kind. For unions all fields must stay at offset 0.
-
-- Thread `kind` from `parse-aggregate-spec` (cc.scm:3611, where it's
- already in scope) through to `parse-struct-fields`.
-- Gate the `(+ oa (max sz 0))` bump on `(eq? kind 'struct)`. Unions
- pass through with `oa` unchanged.
-- `complete-agg!` already sizes unions correctly; no change there.
-
-### 087 — sizeof no-emit
-
-The current sizeof arm at cc.scm:4898 calls `parse-unary` / `parse-expr`
-which emit code for the operand. `sizeof(x++)` therefore actually
-increments x.
-
-- Add cg primitives `cg-snapshot cg → tag` and `cg-rewind cg tag`.
- Snapshot captures vstack depth and fn-buf chunk count; rewind
- restores both. Internal-only.
-- In the sizeof arm, snapshot before parse-unary, read
- `(opnd-type (cg-top))`, rewind, push `cg-push-imm %t-u64 size`.
-- Fixture lock-in: 087 covers `sizeof(x++)`. Add a cg-fixture if cg
- primitives need direct validation.
-
-## Cross-stream contracts
-
-### Sizeof split (C and D stay independent)
-
-Two distinct callers, two independent mechanisms:
-
-- **Outside const-expr** (Stream D — 087): operand can be anything
- (`x++`, calls, side-effects). Result lives at runtime. Use
- `cg-snapshot` / `cg-rewind`.
-- **Inside const-expr** (Stream C — 118): operand grammar restricted;
- result is a parse-time fixnum. Const-expr evaluator handles
- `sizeof(TYPENAME)` directly via `parse-decl-spec` +
- `parse-declarator` + `ctype-size`. Never calls cg.
-
-The two paths share the *concept* (don't evaluate the operand) but
-not the implementation. They can land in either order.
-
-### A1 ↔ B/C/D
-
-Independent. A1 only touches `cg-fn-end`, `cg-call`, and
-`parse-return-stmt`. B touches `parse-cast-or-unary`. C adds a new
-walker plus changes to four call sites that don't overlap with A1.
-D touches `parse-struct-fields` and adds two new cg primitives.
-
-### A2 ↔ A3
-
-A3 depends on A2's indirect-result implementation. A3a/b/c are
-independent of each other once A2 has landed.
-
-## Workflow per stream
-
-Per [CC-INTERNALS §Feature workflow](CC-INTERNALS.md#feature-workflow):
-
-1. cc-cg fixture (red) — drive the cg API directly.
-2. Implement cg primitives until cc-cg green.
-3. cc fixture (red) — full driver.
-4. Implement parser changes until cc green.
-
-Pick the next free `<n>` per suite. cc-cg currently goes up through
-69; cc goes up through 118 (with gaps).
-
-## Acceptance
-
-Per stream: `make test SUITE=cc ARCH=aarch64` shows the stream's
-target tests as PASS, no prior tests regress. Final acceptance: all
-ten currently-failing tests green on aarch64.
diff --git a/docs/CC-STREAM.md b/docs/CC-STREAM.md
@@ -1,254 +0,0 @@
-# Streaming the cc
-
-Working plan. The cc today is a four-phase pipeline that materializes
-the full intermediate state at every layer:
-
-```
-src bv → [lex-tokenize] → tok list → [pp-expand] → tok list → [parse] → cg buf
-```
-
-That shape doesn't fit the data flow. Lex, pp, parse, and cg are each
-inherently sequential, with small bounded lookahead. The full token
-list is never *needed*; it's just where the current code parks
-intermediate state on the heap. For [tcc.flat.c](TCC.md) (608 KB) the
-lex output alone runs ~170 MB at ~280 B/byte even after the
-[per-token mark/rewind tightening](TCC-TODO.md#blocker-3). pp adds
-another similar slug on top. We've doubled the ELF p_memsz twice
-already; the cleaner answer is to stop materializing.
-
-This doc plans the conversion to a true single-pass streaming
-compiler: lex, pp, parse, and cg all run concurrently, each pulling
-one token at a time from the stage upstream. Steady-state live data
-is bounded by *lookahead* (≤2 tokens) plus parser state (typedef
-table, scope chain, macro table, cg output buffer), not by source
-length.
-
-## Why streaming, not GC
-
-A real GC would fix the symptom — the per-byte multiple is mostly
-dead intermediate state, and a GC would reclaim it eventually — but
-the underlying shape would still be wrong. Adding a GC to scheme1 is
-also a much bigger lift than reshaping the cc:
-
-- scheme1 is shared by every bootstrap stage; touching its allocator
- affects M1pp, p1, scheme1's own self-host, and anything else built
- on top. Streaming is local to cc.
-- A GC pass needs to trace through closures, records, env alists, and
- the bytevector heap layout. It's a multi-week project against
- scheme1's current single-page-of-allocator code.
-- Memory headroom for the cc has a hard ceiling (ELF p_memsz, which
- must be a constant in the boot ELF). A GC narrows the *typical*
- high-water mark but doesn't change the *worst case* the cap has to
- cover. Streaming changes the worst case from O(source length) to
- O(parser state).
-- We're aiming for predictability: the cc is the bottleneck stage
- that needs to compile multi-MB inputs. A bounded steady-state model
- is easier to reason about than a "GC will handle it" model.
-
-GC stays a sensible move for scheme1 *eventually* — every bootstrap
-stage would benefit. But for closing the tcc.flat.c gap it's the
-wrong tool.
-
-## Symmetry argument
-
-Each layer in the pipeline has small bounded lookahead:
-
-| layer | lookahead / buffering |
-|--------|------------------------------------------------------|
-| lex | ≤3 source bytes (trigraph), 1 token internally |
-| pp | 1 token peek; arg-collection during fn-macro expansion (bounded by call site) |
-| parse | 2 tokens (`peek2` is the deepest the grammar uses) |
-| cg | 0 (already streams to cg-text/cg-data buffers) |
-
-Live tokens across the pipeline are O(macro-arg-list +
-parse-lookahead) — call it dozens. Persistent state (parser tables
-+ macro definitions + cg output) grows with declared symbols, not
-source length. For tcc.flat.c that's an estimated 5–20 MB.
-
-## Iter abstraction
-
-A single record + three operations per layer:
-
-```scheme
-(define-record-type tok-iter
- (%tok-iter pull-fn state buf)
- tok-iter?
- (pull-fn tok-iter-pull-fn) ; thunk: state -> tok
- (state tok-iter-state) ; per-layer
- (buf tok-iter-buf tok-iter-buf-set!)) ; pushed-back toks (small list)
-
-(define (iter-next it) ...) ; consume + return next tok (EOF sentinel)
-(define (iter-peek it) ...) ; like next, but cache in buf
-(define (iter-unget! it t) ...) ; push back; next iter-next returns t
-```
-
-Each layer wraps the layer below: `lex-iter` over bytes, `pp-iter`
-over `lex-iter`, `pstate` over `pp-iter`. The iter discipline
-replaces the current "list of toks" everywhere a sequence is
-consumed.
-
-## Mapping the layers
-
-### lex-iter
-
-Drop the per-iteration `(reverse (cons … acc))` driver in
-[cc/cc.scm](../cc/cc.scm) `lex-tokenize`. State holds (pos, line,
-col, bol?). `lex-iter-next` runs roughly one iteration of the current
-loop body: skip ws+comments, dispatch, allocate one tok+loc, return.
-
-The per-token `heap-mark` / `heap-rewind!` discipline I just landed
-transposes one-for-one — same `%lex-scratch`, same scalar boxes, same
-post-rewind rebuild of tok+loc. The difference is who advances:
-instead of the loop driving itself to EOF, the *consumer* (pp-iter)
-pulls one tok per `iter-next` call.
-
-### pp-iter
-
-Most invasive. State holds (lex-iter, pending-toks, macros,
-cond-stack, current-loc). `pp-iter-next`:
-
-1. If `pending` non-empty, pop and return.
-2. Pull from `lex-iter`.
-3. If it's `HASH` at bol: collect the directive line into a small
- buffer (bounded by one logical line, terminated by NL), dispatch,
- recurse.
-4. If it's a defined ident not in its hide set: collect args
- (function-like — bounded by the call site), expand into
- `pending`, recurse.
-5. If it's `STR` and the next non-trivia tok is `STR`: fuse on the
- fly (replaces today's post-pass `%pp-merge-adjacent-strs`).
-6. Else return.
-
-The hide set tracking already runs per-token in `pp-expand`; the
-streaming version threads it through `pending` instead of through a
-flat list.
-
-### parser
-
-Already uses `ps-peek` / `ps-advance` (see
-[CC.md §Parser](CC.md)). Repoint them at `pp-iter` instead of
-indexing into a list. Add per-stmt `heap-mark` / `heap-rewind!` in
-`parse-stmt` so that the parse-time interpreter overhead also gets
-reclaimed at statement boundaries. Survivors that must cross the
-rewind (typedef bvs, scope-bound symbols, macro bodies) need to be
-allocated below the mark — same discipline as the lex driver, just
-one layer up.
-
-### cg
-
-Already streams to cg-text/cg-data buffers. No change.
-
-## Subtle bits
-
-- **#if cexpr** evaluates a fully-collected directive line — keep
- that as a *bounded* buffer (one preprocessor line, terminated by
- NL). Don't try to stream cexpr eval.
-- **Function-macro arg collection** also bounded buffer (until
- matching `)`). Worst case is multi-line, but still bounded by the
- call-site span. The only real memory concern; if a macro call spans
- 10 KB of args we hold all of it. tcc.c doesn't do that.
-- **Adjacent string concat** moves from a post-pass to a peek-and-
- fuse in `pp-iter`. Easy.
-- **Builtin macros** (`__FILE__`, `__LINE__`, `__DATE__`, …) need the
- current loc at expansion time, not at lex time. The pp-iter has it
- because it tracks lex's current loc.
-- **EOF discipline.** Each iter must yield exactly one EOF tok and
- then keep yielding EOF on subsequent calls. The parser already
- expects EOF as a stable sentinel.
-- **Lookahead beyond 2.** The parser uses `peek` and `peek2`; nothing
- deeper. If something needs 3-token lookahead later, the unget
- buffer accommodates it without changing the iter shape.
-- **Per-stmt rewind survivors.** A statement may bind a typedef
- whose name must outlive the rewind. The mark goes *after* the
- typedef-add path, or the bv is allocated below the mark — same
- trick the lex driver uses with `%lex-scratch`. Easier than it
- sounds because typedef adds happen at the top of `parse-decl`,
- which is well before any deep parse-time scratch.
-
-## Migration path
-
-Three roughly self-contained PRs. Each is independently testable —
-the existing test suites already drive only the public API
-(`lex-tokenize` for cc-lex, `pp-expand` for cc-pp, the cc front end
-for cc + cc-cg). Adapter wrappers preserve those entry points
-through the migration.
-
-### Step 1 — lex-iter
-
-Convert `lex-tokenize` to `make-lex-iter` + `lex-iter-next`. Leave
-`lex-tokenize` as a thin wrapper that drains the iter into a list,
-so cc-lex tests pass unchanged. Touchpoints:
-[cc/cc.scm](../cc/cc.scm) lex section, [cc/main.scm](../cc/main.scm).
-
-After this step: lex output is still consumed by today's
-`pp-expand` via the wrapper. Identity-preserving; no behavior change
-visible to tests.
-
-### Step 2 — pp-iter
-
-Convert `pp-expand` to `make-pp-iter` + `pp-iter-next`. Directives
-and macro expansion all move from "list-walking" style to
-"stream + pending-buffer" style. Same logic, different shape. Leave
-a `pp-expand` wrapper that drains the iter into a list for
-back-compat. Heaviest single PR.
-
-After this step: pp consumes lex via the iter; parser still drains
-pp into a list.
-
-### Step 3 — parser repoint
-
-Change `pstate` to hold a `pp-iter` instead of a tok-list cursor.
-`peek` / `peek2` / `advance` redirect to iter ops. Drop the wrappers
-from steps 1–2 once the cc-cg / cc tests pass.
-
-Add per-stmt `heap-mark` / `heap-rewind!` in `parse-stmt`. This is
-the layer where the *parser* per-form interpreter overhead gets
-reclaimed.
-
-After this step: full streaming. Memory bound shifts from O(source
-length) to O(parser state).
-
-## Expected impact
-
-For tcc.flat.c (608 KB):
-
-| state | predicted high water |
-|-------------------------------|---------------------:|
-| current (post-lex tightening) | ~170 MB during lex; pp blows the 256 MiB cap |
-| post-step-1 | unchanged (same materialization at pp/parser) |
-| post-step-2 | lex no longer materialized; pp output still listed for parser. ~80 MB? |
-| post-step-3 | full streaming. Bounded by parser state. ~10–30 MB. |
-
-Numbers are guesses until we measure, but the shape is clear: most
-of the per-byte transient cost we pay today disappears not because
-we got faster but because we never store that intermediate at all.
-
-## Open questions
-
-- **Hide-set sharing.** Today the same hide-set list can be shared
- across many tokens because it's immutable. In streaming, the same
- property holds — pp-iter just hands out shared references — but
- we should confirm there's no in-place mutation lurking in the pp
- code paths that gets converted.
-- **Diagnostics ergonomics.** When the parser dies on an unexpected
- token, today the line/col come from `tok-loc`. Same in streaming;
- no change.
-- **Test runner adapters.** cc-lex and cc-pp test runners build full
- token lists for inspection. Keep their wrappers (`lex-tokenize`,
- `pp-expand`) as drain-to-list adapters. Internally the test
- fixtures don't change.
-- **`#include`.** Currently rejected ([CC.md §Toolchain envelope](CC.md)).
- If we ever want to support it, the streaming model is friendlier:
- the include-file's lex-iter is pushed onto a stack inside pp-iter,
- and `pp-iter-next` drains it before resuming the outer file. Out
- of scope for the tcc.flat.c push but worth noting.
-
-## Related
-
-- [TCC-TODO.md](TCC-TODO.md) — blocker 3 covers the same memory
- problem from the per-byte angle. The lex-side tightening landed
- there is what step 1 of this plan transposes onto an iter.
-- [CC.md](CC.md) — the language and toolchain spec the cc implements.
-- [CC-PARALLEL-PLAN.md](CC-PARALLEL-PLAN.md) — independent feature
- punchlist; streaming is orthogonal but unblocks running on
- tcc.flat.c so those features can be exercised end-to-end.
diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md
@@ -2,10 +2,8 @@
Working punch list for what stands between today's scheme1-hosted cc
and a successful compile of `tcc.flat.c`. Companion to
-[TCC.md](TCC.md) (the surrounding three-stage pipeline), [CC.md](CC.md)
-(the C subset the cc accepts), and [CC-PARALLEL-PLAN.md](CC-PARALLEL-PLAN.md)
-(the per-feature `tests/cc/` punchlist). The blockers below are the
-ones that fire *before* we even reach the parallel-plan items.
+[TCC.md](TCC.md) (the surrounding three-stage pipeline) and
+[CC.md](CC.md) (the C subset the cc accepts).
## Repro
@@ -39,141 +37,52 @@ head -c 50000 build/cc-bootstrap/X86_64/tcc.flat.c \
# then re-run the podman invocation against tcc.head.c
```
-## Blocker 1 — `float` / `double` hard-rejected in type specifiers ✓ done
-
-Resolved: `parse-decl-spec` now accepts `float`, `double`, and `long
-double` as type specifiers, mapping them to `%t-flt` / `%t-dbl` /
-`%t-ldbl` ctypes (sizes 4/8/8). `_Complex` / `_Imaginary` are eaten as
-silent qualifiers. The five other type-name-start predicates
-(`paren-is-group?`, `stmt-starts-decl?`, `token-is-decl?`,
-`%const-paren-is-cast?`, `%const-tok-is-decl?`, plus the
-`parse-cast-or-unary` guard) recognize the new kw set. The cg guards
-fp ld/st/cast via `%cg-fp-reject!` so any attempt to materialize an fp
-value dies with `fp not codegen'd` rather than silently emitting
-integer code. Coverage: `tests/cc/119-float-parse.c` exercises
-prototypes, struct layout, and `sizeof(float|double|long double)`.
-Confirmed end-to-end: a 5 KB prefix of `tcc.flat.c` now reaches
-blocker 2 (`dup decl: _exit`) instead of dying on `double`.
-
-## Blocker 2 — compatible redeclaration rejected
-
-Site: [cc/cc.scm](../cc/cc.scm) `scope-bind!`, line 3667.
-
-```scheme
-(if (alist-ref n top) (die #f "dup decl" n)
- (ps-scope-set! ps (cons (alist-set n s top) r)))
-```
+## Blocker — parse-phase heap explosion
-C allows redundant identical extern declarations and prototypes that
-differ only in parameter names. The flattened TU has many — `_exit`
-appears in both `<stdlib.h>` and `<unistd.h>`; `strlen` is declared
-once with a named param and once without; etc.
+Past the redecl gate, the next thing we hit is heap exhaustion well
+before the full TU is parsed. Probing prefixes against the catm'd cc
+(HEAP_CAP_BYTES = 256 MiB):
-Fix: when the name is already bound at the same scope, allow the
-redeclaration if the new type is compatible with the existing one
-(extern decls of the same function; pointer/array decay equivalents).
-Only definition-after-definition or genuinely conflicting types should
-die. The same compatible-redecl rule needs to land in the typedef
-table and the tag table for forward-decl chains.
+| input | bytes | heap after parse | result |
+|--------------------|-------:|-----------------:|-----------------|
+| 220-line struct cut| 7 953 | ~39 MB | cg-finish |
+| 1612-line cut | 40 943 | ~267 MB | cg-finish (just under cap) |
+| 50 000 B head | 49 986 | — | heap exhausted |
+| full tcc.flat.c |608 547 | — | heap exhausted |
-Observed (with blocker 1 fixed, on a 5 KB prefix of tcc.flat.c):
+So parse-phase residency is roughly 6.5 KB heap per source byte at the
+moment, which puts the full 608 KB TU around 4 GB — far beyond any
+reasonable scheme1 cap. Bumping the cap is not the fix; the parser's
+per-token allocations need to drop into the per-token mark/rewind
+arena that the lex pipeline already uses (commit 62dabed).
-```
-error: dup decl: _exit
-```
-
-## Blocker 3 — lex+pp heap usage at ~3 KB per source byte (lex side fixed)
-
-Site: lex/pp pipeline in [cc/cc.scm](../cc/cc.scm) plus heap caps in
-[scheme1/scheme1.P1pp](../scheme1/scheme1.P1pp) and
-[vendor/seed/](../vendor/seed/)`<arch>/ELF.hex2`.
-
-### Lex side — done
-
-Investigation traced the bulk of the per-byte cost to scheme1's
-interpreter calling convention: every closure call allocates ~32 B
-per param via `bind_params`, every `let*` binding allocates two cons
-cells, every `eval_args` allocates one cons per arg, and tail-rec
-named-let in the inner scanners (`%scan-while`, `%fill-while-bv`,
-`%match-bytes`, `%accum-int-while`) re-applies a closure per scanned
-byte. None of this overhead is reclaimed across token boundaries —
-the lex helpers' own internal `heap-mark` / `heap-rewind!` discipline
-trims their per-helper scratch but the result tok + 4-list lives
-below any putative outer mark, so a driver-level rewind needs to
-rebuild survivors from pre-mark scalar boxes.
-
-Fix landed in `cc/cc.scm`: `lex-tokenize` now wraps each iteration in
-its own `heap-mark` / `heap-rewind!`. Scalar fields (kind, value for
-non-bv tokens, npos/nline/ncol) ferry across the rewind via outer-let
-bindings (set! mutates the binding cdr in place; no heap). Bv-typed
-values (IDENT, STRING) ferry through `%lex-scratch`, a single sticky
-bytevector allocated below all marks; post-rewind the driver copies
-back into a fresh bv sized exactly to the content. tok+loc+acc-cons
-are rebuilt post-rewind.
-
-Measured (`tests/cc/001-kitchen-sink.c`, 9.7 KB):
+Repro:
```
-phase=slurp: heap 1.23 MB src-bytes 9782
-phase=lex: heap 3.40 MB (delta ~2.17 MB → ~222 B/byte; 13× better)
-phase=pp: heap 8.58 MB
+podman run --rm --pull=never --platform linux/arm64 \
+ --tmpfs /tmp:size=512M -e ARCH=aarch64 \
+ -v "$(pwd)":/work -w /work boot2-busybox:aarch64 \
+ build/aarch64/scheme1 build/aarch64/cc/cc.scm --cc-debug \
+ build/cc-bootstrap/X86_64/tcc.flat.c /tmp/tcc.flat.P1pp
```
-A 50 KB prefix of `tcc.flat.c` (which used to exhaust the heap during
-lex) now lex+pp's cleanly through to blocker 2 (`dup decl: _exit`).
-Lex finishes at 15.6 MB, pp at 41.3 MB. Extrapolating 222 B/byte to
-the full 608 KB input gives ~135 MB during lex+pp — close enough to
-the 128 MB `p_memsz` cap that a single bump (and a bigger
-`HEAP_CAP_BYTES`) closes the gap.
-
-We also considered an interning pool (one bv per unique IDENT/STRING,
-shared across token instances). With scheme1's interpreter, walking a
-cons-list pool — even bucketed 16-way — costs ~50–150 B per step in
-`bind_params`/`eval_args`/named-let frame overhead. The walk costs
-out-paced the bv-allocation savings: a 50 KB prefix went 60+ MB with
-buckets vs ~17 MB un-interned. Until scheme1 grows a true vector
-primitive (so bucket access is O(1) without interpreter overhead),
-no-intern is the cleanest path. The `%lex-scratch` infrastructure is
-ready to layer interning back on top if a downstream eq?-fast-path
-turns out to need it.
-
-### Remaining
-
-- **pp side memory.** pp still copies the lex token list into a
- buf-list and walks it; `%pp-take-line` reverses tokens. There's
- probably similar interpreter-overhead concentrate to drain. Worth
- the same investigation pass once we're past blocker 2.
-- **Bump arena caps.** Even with the lex fix the full TU is many MB
- of tokens. `READBUF_CAP_BYTES`, `HEAP_CAP_BYTES`, and the ELF
- `p_memsz` literal must all grow in lockstep before tcc.flat.c will
- finish.
+Phase log shows heap climbs only during `parse`; `slurp` is cheap and
+`cg-finish` does not move the needle. Investigation should focus on
+which parser products escape the per-token arena — top suspects are
+ctypes built up during decl parsing, expression vstack residue, and
+the alist-based scope/tag/typedef tables.
## Suspected next-tier blockers (not yet observed)
-Past blockers 1–3 the next wave we expect:
+Past the heap gate, the next wave we expect:
- **`_Bool`, bitfield-typed struct fields, `setjmp.h` typedefs** —
- same "parse, don't codegen" softening as floats. tcc.c carries all
- of these under `HAVE_BITFIELD` / `HAVE_SETJMP` gates that are off
- but leave the declarations in the flattened text.
-- **Existing CC-PARALLEL-PLAN items** — struct-return ABI streams
- A1/A2/A3, compound literals (B), const-expr evaluator (C), union
- offsets + sizeof no-eval (D). tcc.c uses every one of these.
-- **Throughput / wall-clock** — even with the heap fix, lex+pp+parse
- on 18 896 lines under scheme1 is going to be slow. Worth measuring
- before optimizing.
-
-## Suggested order of attack
-
-1. Soften float/double, then bitfields, then setjmp types into
- "parse, codegen rejects ops". Small, mechanical, unblocks the gate
- to actual triage.
-2. Compatible-redecl in `scope-bind!`, the typedef table, and the tag
- table.
-3. Lex/pp memory pass + heap/p_memsz bumps.
-4. Re-run on `tcc.flat.c`. At this point work re-converges with
- [CC-PARALLEL-PLAN.md](CC-PARALLEL-PLAN.md).
-
-Hitting milestone 4 in [CC.md §Validation milestones](CC.md) — "Compile
-tcc.c (under the tcc-mes defines) → tcc-lispcc; verify
-`tcc-lispcc -version` runs" — is the goal the items above feed into.
+ same "parse, don't codegen" softening already applied to floats.
+ tcc.c carries all of these under `HAVE_BITFIELD` / `HAVE_SETJMP`
+ gates that are off but leave the declarations in the flattened text.
+- **Throughput / wall-clock** — lex+pp+parse on 18 896 lines under
+ scheme1 is going to be slow even after heap residency drops.
+
+The end goal is milestone 4 in [CC.md §Validation milestones](CC.md)
+— "Compile tcc.c (under the tcc-mes defines) → tcc-lispcc; verify
+`tcc-lispcc -version` runs."
diff --git a/tests/cc/067-vararg-call.c b/tests/cc/067-vararg-call.c
@@ -1,14 +1,16 @@
/* §G.1 — variadic call: caller default-promotes args.
*
- * Declares sum3 as (int n, ...). Defines a same-ABI sum3 that takes
- * three ints. Calls sum3(1, c, s) where c is signed char = -3 and s
- * is short = 100. Per default-promote rules, c and s are promoted to
- * int before the call, so the callee's a1/a2 hold -3 and 100 with
- * full 32-bit sign-extension.
+ * Declares sum3 as (int n, ...) and defines it as a real variadic fn
+ * pulling its tail args via __builtin_va_arg. Calls sum3(1, c, s)
+ * where c is signed char = -3 and s is short = 100. Per default-
+ * promote rules, c and s widen to int before the call, so the
+ * receive yields full 32-bit -3 and 100.
*
* Result: 1 + (-3) + 100 = 98.
*/
+typedef char *va_list;
+
int sum3(int n, ...);
int main(void) {
@@ -17,6 +19,12 @@ int main(void) {
return sum3(1, c, s); /* 1 + (-3) + 100 = 98 */
}
-int sum3(int n, int a, int b) {
+int sum3(int n, ...) {
+ va_list ap;
+ int a; int b;
+ __builtin_va_start(ap, n);
+ a = __builtin_va_arg(ap, int);
+ b = __builtin_va_arg(ap, int);
+ __builtin_va_end(ap);
return n + a + b;
}
diff --git a/tests/cc/120-redecl-extern-fn.c b/tests/cc/120-redecl-extern-fn.c
@@ -0,0 +1,13 @@
+/* Compatible redecl: two identical extern fn prototypes for the same
+ * function are legal C and common in flattened TUs that pick up the
+ * same declaration from multiple headers. */
+
+extern int square(int);
+extern int square(int);
+
+int square(int x) { return x * x; }
+
+int main(int argc, char **argv) {
+ if (square(5) != 25) return 1;
+ return 0;
+}
diff --git a/tests/cc/120-redecl-extern-fn.expected-exit b/tests/cc/120-redecl-extern-fn.expected-exit
@@ -0,0 +1 @@
+0
diff --git a/tests/cc/121-redecl-fn-paramnames.c b/tests/cc/121-redecl-fn-paramnames.c
@@ -0,0 +1,14 @@
+/* Compatible redecl: prototypes that differ only in parameter names
+ * (or presence/absence of a parameter name) describe the same function
+ * type. tcc.flat.c hits this when a stdlib fn is declared once with a
+ * named param and once without. */
+
+int times3(int);
+int times3(int x);
+
+int times3(int x) { return x + x + x; }
+
+int main(int argc, char **argv) {
+ if (times3(7) != 21) return 1;
+ return 0;
+}
diff --git a/tests/cc/121-redecl-fn-paramnames.expected-exit b/tests/cc/121-redecl-fn-paramnames.expected-exit
@@ -0,0 +1 @@
+0
diff --git a/tests/cc/122-redecl-typedef.c b/tests/cc/122-redecl-typedef.c
@@ -0,0 +1,11 @@
+/* Compatible redecl: identical typedefs from multiple headers (e.g.
+ * size_t in <stddef.h> and <stdlib.h>) collapse into one declaration. */
+
+typedef int sint;
+typedef int sint;
+
+int main(int argc, char **argv) {
+ sint x = 17;
+ if (x != 17) return 1;
+ return 0;
+}
diff --git a/tests/cc/122-redecl-typedef.expected-exit b/tests/cc/122-redecl-typedef.expected-exit
@@ -0,0 +1 @@
+0
diff --git a/tests/cc/123-redecl-extern-var.c b/tests/cc/123-redecl-extern-var.c
@@ -0,0 +1,13 @@
+/* Compatible redecl: a variable declared extern can be re-declared
+ * extern any number of times before the single tentative/initialized
+ * definition. */
+
+extern int counter;
+extern int counter;
+
+int counter = 42;
+
+int main(int argc, char **argv) {
+ if (counter != 42) return 1;
+ return 0;
+}
diff --git a/tests/cc/123-redecl-extern-var.expected-exit b/tests/cc/123-redecl-extern-var.expected-exit
@@ -0,0 +1 @@
+0