boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 1f7b74c622a0e62f2737ae7a364996207026684b
parent 257fdd5b913fd29cea9637e958304a511bc7d84b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 28 Apr 2026 00:21:12 -0700

cc: redecl compatibility, update cc docs

Diffstat:
Mcc/cc.scm | 330++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Ddocs/CC-PARALLEL-PLAN.md | 233-------------------------------------------------------------------------------
Ddocs/CC-STREAM.md | 254-------------------------------------------------------------------------------
Mdocs/TCC-TODO.md | 167++++++++++++++++++-------------------------------------------------------------
Mtests/cc/067-vararg-call.c | 20++++++++++++++------
Atests/cc/120-redecl-extern-fn.c | 13+++++++++++++
Atests/cc/120-redecl-extern-fn.expected-exit | 1+
Atests/cc/121-redecl-fn-paramnames.c | 14++++++++++++++
Atests/cc/121-redecl-fn-paramnames.expected-exit | 1+
Atests/cc/122-redecl-typedef.c | 11+++++++++++
Atests/cc/122-redecl-typedef.expected-exit | 1+
Atests/cc/123-redecl-extern-var.c | 13+++++++++++++
Atests/cc/123-redecl-extern-var.expected-exit | 1+
13 files changed, 284 insertions(+), 775 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -1,8 +1,4 @@ ;; cc/util.scm — leaf helpers. Depends only on the scheme1 prelude. -;; -;; Realization of docs/CC-INTERNALS.md §util.scm. Engineers may add -;; helpers here freely; the listed signatures are the load-bearing -;; surface other modules call. ;; -------------------------------------------------------------------- ;; bytevector helpers (scheme1 strings ARE bytevectors) @@ -181,7 +177,7 @@ ;; diagnostics + I/O ;; -------------------------------------------------------------------- (define (die loc msg . irritants) - ;; Format per CC-CONTRACTS §2.3: + ;; Format: ;; <file>:<line>:<col>: error: <msg>: <irritant> <irritant> ... ;; When loc is #f, the "<file>:<line>:<col>: " prefix is omitted. ;; irritants are written via display semantics (no quoting); format's @@ -284,13 +280,6 @@ (set! ctr (+ ctr 1)) s)))) ;; cc/data.scm — record types and symbol alphabets shared across modules. -;; -;; Concrete realization of: -;; docs/CC-INTERNALS.md §data.scm -;; docs/CC-CONTRACTS.md §1 -;; -;; Adding a record or alphabet symbol requires updating the contract -;; doc first. ;; -------------------------------------------------------------------- ;; loc — source location for diagnostics @@ -303,13 +292,13 @@ (col loc-col)) ; fixnum ;; -------------------------------------------------------------------- -;; tok — lexer token. See CC-CONTRACTS §1.1 for kind set, §1.2 for -;; PUNCT value symbols, §1.3 for KW value symbols. +;; tok — lexer token. ;; -------------------------------------------------------------------- (define-record-type tok (%tok kind value loc hide) tok? - (kind tok-kind) ; symbol from §1.1 + (kind tok-kind) ; IDENT | INT | STR | CHAR | KW | PUNCT + ; | NL | HASH | EOF (value tok-value) ; bv | fixnum | symbol | #f (loc tok-loc) ; loc (hide tok-hide)) ; list of bv (macro names already expanded) @@ -328,8 +317,7 @@ (body macro-body)) ; list of tok ;; -------------------------------------------------------------------- -;; ctype — C type. See CC-CONTRACTS §1.4 for kind set, and -;; CC-INTERNALS §data.scm for the ext payload table. +;; ctype — C type. ;; ;; Fields that mutate over a ctype's lifetime: ;; size and align — set to -1/-1 on forward struct/union decl, @@ -345,7 +333,7 @@ (align ctype-align ctype-align-set!) (ext ctype-ext ctype-ext-set!)) -;; Interned primitive ctypes (CC-CONTRACTS §1.4). Equality is eq?. +;; Interned primitive ctypes. Equality is eq?. (define %t-void (%ctype 'void -1 -1 #f)) (define %t-i8 (%ctype 'i8 1 1 #f)) (define %t-u8 (%ctype 'u8 1 1 #f)) @@ -365,20 +353,23 @@ ;; -------------------------------------------------------------------- ;; sym — declared identifier (function, variable, typedef, …) -;; See CC-CONTRACTS §1.7 (kind), §1.8 (storage). +;; defined? distinguishes a forward declaration (extern fn proto, extern +;; var) from a definition (fn body, var with initializer, tentative def +;; without `extern`). scope-bind! merges compatible decls; only two +;; defined? syms with the same name fire a redefinition error. ;; -------------------------------------------------------------------- (define-record-type sym - (%sym name kind storage type slot) + (%sym name kind storage type slot defined?) sym? - (name sym-name) ; bv - (kind sym-kind) ; symbol from §1.7 - (storage sym-storage) ; symbol from §1.8 or #f - (type sym-type) ; ctype - (slot sym-slot)) ; fixnum | bv | #f, per kind + (name sym-name) ; bv + (kind sym-kind) ; symbol from §1.7 + (storage sym-storage) ; symbol from §1.8 or #f + (type sym-type) ; ctype + (slot sym-slot) ; fixnum | bv | #f, per kind + (defined? sym-defined?)) ; #t = definition, #f = decl-only ;; -------------------------------------------------------------------- -;; opnd — operand on cg's vstack. See CC-CONTRACTS §1.5 (kind), -;; §1.10 (reg names). +;; opnd — operand on cg's vstack. ;; -------------------------------------------------------------------- (define-record-type opnd (%opnd kind type ext lval?) @@ -390,7 +381,6 @@ ;; -------------------------------------------------------------------- ;; loop-ctx — entry on parser's loop/switch context stack. -;; See CC-CONTRACTS §1.9. ;; -------------------------------------------------------------------- (define-record-type loop-ctx (%loop-ctx kind tag has-continue?) @@ -417,7 +407,7 @@ ;; iter holds a tok-iter (typically a pp-iter chained over a lex-iter). ;; peek / peek2 / advance go through iter-peek / iter-peek2 / iter-next ;; so the parser pulls one token at a time, with no full materialized -;; token list. See docs/CC-STREAM.md. +;; token list. (define-record-type pstate (%pstate iter scope tags loops fn-ctx typedefs cg) pstate? @@ -469,10 +459,10 @@ (in-fn? cg-in-fn? cg-in-fn?-set!)) ;; -------------------------------------------------------------------- -;; Symbol alphabets — canonical alists. See CC-CONTRACTS §1. +;; Symbol alphabets — canonical alists. ;; -------------------------------------------------------------------- -;; CC-CONTRACTS §1.3 — keyword bytevector → keyword symbol. +;; Keyword bytevector → keyword symbol. (define %keyword-alist '(;; storage ("auto" . auto) ("register" . register) ("static" . static) @@ -503,7 +493,7 @@ ("_Static_assert" . _Static_assert) ("_Complex" . _Complex) ("_Imaginary" . _Imaginary))) -;; CC-CONTRACTS §1.2 — punctuator bytevector → punct symbol. +;; Punctuator bytevector → punct symbol. ;; Listed longest-match-first; the lexer scans this list in order. ;; Digraphs (<: :> <% %> %: %:%:) lex to their standard equivalents. (define %punct-alist @@ -535,48 +525,33 @@ ;; cc/lex.scm — bytestream → token list. Pure function; no I/O, ;; no macro awareness. ;; -;; Realization of docs/CC-INTERNALS.md §lex.scm. Symbol alphabets -;; (KW, PUNCT, tok-kind) live in cc/data.scm; do not duplicate. -;; -;; Owner: <unassigned> -;; -;; Implementation notes: -;; -;; - The lexer walks `src` byte-by-byte, threading (pos, line, col) -;; explicitly through every helper (no mutable state). Each token -;; captures its starting loc; helpers return (tok npos nline ncol). -;; - Trigraphs and `\<newline>` line splicing are handled via a single -;; logical-byte primitive `%lex-peek`: it advances over splices and -;; translates trigraphs in-place, so downstream code only ever sees -;; the "translation phase 2" stream. -;; - Comments are stripped at the same level as whitespace. -;; - NL tokens are emitted at every physical newline so pp can use -;; them to terminate directives. +;; The lexer walks `src` byte-by-byte, threading (pos, line, col) +;; explicitly through every helper (no mutable state). Each token +;; captures its starting loc; helpers return (tok npos nline ncol). +;; Trigraphs and `\<newline>` line splicing are handled via a single +;; logical-byte primitive `%lex-peek`: it advances over splices and +;; translates trigraphs in-place, so downstream code only ever sees +;; the "translation phase 2" stream. Comments are stripped at the +;; same level as whitespace. NL tokens are emitted at every physical +;; newline so pp can use them to terminate directives. ;; ;; Heap discipline (per tests/scheme1/93-heap-mark-rewind.scm): +;; token-producing helpers wrap their inner work in a heap-mark / +;; heap-rewind! arena. Slots that must survive the rewind (start-loc +;; and the integer holders for npos/nline/ncol) are bound *before* +;; the (set! mark (heap-mark)) so the let's env extensions live below +;; the mark. The byte-run scanners' tail-call env frames and any +;; %lex-peek 4-lists are above the mark and get reclaimed. For +;; helpers that produce a fresh bytevector (ident, string), the bv is +;; allocated post-rewind so it persists into the parent arena. +;; Numeric digit runs accumulate inline via %accum-int-while. ;; -;; - Token-producing helpers wrap their inner work in a heap-mark / -;; heap-rewind! arena. The slots that must survive the rewind -;; (start-loc and the integer holders for npos/nline/ncol) are bound -;; *before* the (set! mark (heap-mark)) so the let's env extensions -;; live below the mark. The byte-run scanners' tail-call env frames -;; and any %lex-peek 4-lists are above the mark and get reclaimed. -;; For helpers that produce a fresh bytevector (ident, string), the -;; bv is allocated post-rewind so it persists into the parent arena. -;; - Numeric digit runs accumulate their value inline via -;; %accum-int-while; they no longer materialize a per-byte cons list -;; and then a separate %digits-value walk. -;; - lex-tokenize wraps each token-emitting iteration in an outer -;; heap-mark / heap-rewind!. The helper allocates its own tok+loc+bv -;; above this outer mark; the driver reads the scalar fields and -;; copies any bv contents into %lex-scratch (sticky, pre-mark) before -;; rewinding. Post-rewind it rebuilds a fresh tok+loc and a fresh bv -;; (for IDENT/STR) sized to the actual content. Surface effect on -;; the kitchen-sink test: per-token heap delta drops from ~12 KB -;; (interpreter overhead, env-extension cons cells, eval_args lists, -;; tail-rec named-let frames in the inner scanners, all sticky for -;; the duration of lex) to ~840 B (tok + loc + acc-cons + bv + -;; the irreducible eval-cost of the post-rewind reconstruction). +;; %lex-iter-pull wraps each token-emitting iteration in an outer +;; heap-mark / heap-rewind!. The helper allocates its own tok+loc+bv +;; above this outer mark; the driver reads the scalar fields and +;; copies any bv contents into %lex-scratch (sticky, pre-mark) before +;; rewinding. Post-rewind it rebuilds a fresh tok+loc and a fresh bv +;; (for IDENT/STR) sized to the actual content. ;; -------------------------------------------------------------------- ;; Cross-rewind transport for IDENT / STR bv values. @@ -587,15 +562,12 @@ ;; ident/string content) by copying back out of scratch. The whole ;; lex run shares this one buffer. ;; -;; We considered a deduplicating *intern pool* (one bv per unique -;; ident, repeats share storage). With scheme1's interpreter, walking -;; the pool cons-list per lookup costs ~50–150 B per step in -;; bind_params/eval_args/named-let env-extension overhead. Even with -;; 16-way bucketing, a 50 KB tcc.flat.c prefix saw walk costs blow -;; past the bv-allocation savings (60+ MB heap vs 17 MB un-interned). -;; The cleanest path until scheme1 gets a vector primitive is no -;; intern; if a downstream eq?-fast-path is needed later, layer it -;; atop %lex-scratch with a hash backed by a u8 bytevector. +;; Why scratch, not a deduplicating intern pool: under scheme1's +;; interpreter, walking a cons-list pool per lookup costs ~50–150 B +;; per step in bind_params/eval_args/named-let env-extension +;; overhead. Even 16-way bucketing has the walk cost outpace the +;; bv-allocation savings until scheme1 grows a vector primitive +;; (an O(1) bucket lookup without interpreter overhead). ;; -------------------------------------------------------------------- (define %lex-scratch-cap 65536) (define %lex-scratch (make-bytevector %lex-scratch-cap 0)) @@ -1279,7 +1251,7 @@ ;; -------------------------------------------------------------------- ;; Punctuator reader. ;; -;; Greedy longest-match against %punct-alist (cc/data.scm). The alist +;; Greedy longest-match against %punct-alist. The alist ;; is already ordered longest-first. We additionally bucket entries by ;; their first byte so %lex-read-punct only loops over the small set of ;; patterns that can start at the current source byte. @@ -1388,8 +1360,7 @@ ;; Each pipeline layer (lex, pp, parser) wraps the layer below as a ;; tok-iter. iter-next pulls one token at a time. iter-peek/iter-peek2 ;; cache lookahead in `buf`. iter-unget! pushes back. Live-data bound is -;; lookahead (≤2) + per-layer state, not source length. See -;; docs/CC-STREAM.md. +;; lookahead (≤2) + per-layer state, not source length. ;; ;; Pull-fns must keep yielding EOF after the first EOF (idempotent). (define-record-type tok-iter @@ -1433,9 +1404,9 @@ (define (iter-unget! it t) (tok-iter-buf-set! it (cons t (tok-iter-buf it)))) -;; Drain to a list ending in EOF. Used by back-compat wrappers -;; (lex-tokenize, pp-expand) so the cc-lex / cc-pp test runners can -;; inspect the materialized stream. +;; Drain an iter to a list ending in EOF. Used by lex-tokenize / +;; pp-expand so the cc-lex / cc-pp test runners can inspect the +;; materialized stream. (define (iter->list it) (let loop ((acc '())) (let ((t (iter-next it))) @@ -1643,15 +1614,14 @@ (lex-state-bol?-set! st nbol?) (make-tok kind tok-val (%loc file loc-line loc-col))))))) -;; Back-compat wrapper — drains lex-iter into a list ending in EOF. -;; Used by the cc-lex test runner. +;; Drain a lex-iter into a list ending in EOF, for the cc-lex test +;; runner. Production callers chain make-lex-iter directly. (define (lex-tokenize src file) (iter->list (make-lex-iter src file))) -;; cc/pp.scm — token list -> expanded token list. -;; Realizes docs/CC-INTERNALS.md §pp.scm. Hide-set per C11 6.10.3.4. +;; cc/pp.scm — preprocessor. Hide-set per C11 6.10.3.4. ;; #include rejected (CC.md §Toolchain envelope). -;; --- helpers (TODO: promote to util.scm if shared more broadly) --- +;; --- helpers --- (define (%pp-bv-mem? x xs) (cond ((null? xs) #f) ((bv= x (car xs)) #t) @@ -1671,8 +1641,8 @@ ;; cond-stack: list of (active? . has-taken?). Outer-active gating is ;; computed by walking the stack rather than encoding it in frames. ;; -;; Streaming fields (used only by make-pp-iter; #f for the legacy -;; bounded-buffer path used by pp-eval-cexpr): +;; Streaming fields drive make-pp-iter; the bounded-buffer path used +;; by pp-eval-cexpr leaves them at #f / '(). ;; lex-iter — upstream tok-iter, or #f ;; up-pending — toks unshifted upstream (macro-expansion bodies that ;; must be re-scanned for further expansion) @@ -1922,9 +1892,9 @@ (pps-out-buf-set! st (cons next (pps-out-buf st))) cur))))))) -;; Back-compat wrapper — drains pp-iter into a list ending in EOF. -;; Used by the cc-pp test runner. Wraps the input list in a list-iter -;; so the streaming engine sees a normal upstream. +;; Drain a pp-iter into a list ending in EOF, for the cc-pp test +;; runner. The input token list becomes the upstream via make-list-iter. +;; Production callers chain make-pp-iter directly over a make-lex-iter. (define (pp-expand toks initial-defines) (iter->list (make-pp-iter (make-list-iter toks) initial-defines))) @@ -2321,8 +2291,8 @@ (%tok 'PUNCT 'comma (%loc "<expand>" 0 0) '())) ;; Body substitution: walk body; replace param IDENTs with arg toks, -;; handle `#param` (stringize) and `a##b` (paste). For v1 we do not -;; pre-expand args before substitution; the rescan after substitution +;; handle `#param` (stringize) and `a##b` (paste). Args are not +;; pre-expanded before substitution; the rescan after substitution ;; catches the same expansions in practice. (define (%pp-substitute body env call-loc) (let loop ((body body) (out '())) @@ -2562,14 +2532,13 @@ (else (values v (cdr r)))))) (else (die (tok-loc (car toks)) "#if: unexpected token" (tok-kind (car toks)))))) ;; cc/cg.scm — codegen state and emission API. -;; Realization of docs/CC-INTERNALS.md §cg.scm. -;; Conversion split per CC-CONTRACTS §4: parse owns promotion etc; -;; cg owns sign extension, signed/unsigned dispatch, pointer scaling. +;; Conversion split: parse owns promotion etc; cg owns sign extension, +;; signed/unsigned dispatch, pointer scaling. ;; ;; Output uses libp1pp's structured macros (%fn, %ifelse_nez, ;; %loop_tag, %break, %continue) per docs/LIBP1PP.md. ;; -;; Frame layout (CC-CONTRACTS §3): +;; Frame layout: ;; [sp + 0 .. staging*8) outgoing-arg staging ;; [sp + staging*8 ..) locals + spilled vstack values ;; Slot offsets are emitted symbolically as `(+ %<fn>__SO N)` so the @@ -2648,7 +2617,7 @@ ;; gather bytes via %lb + shli/or; stores scatter via shri/%sb. ;; Signed loads (i16/i32) sign-extend via shli/sari to canonical ;; 64-bit form. -;; 8 (or anything else for now): %ld / %st. +;; 8 (and any other size): %ld / %st. ;; Scratch convention: helpers may clobber t1; callers never pass ;; reg=t1. @@ -2833,7 +2802,6 @@ ;; clobber a0/a1, so falling straight through to cc__main forwards ;; them unchanged. The 16-byte frame is just enough for %enter's ;; saved-fp/lr to fit; cc__main builds its own frame on top. - ;; (CC-CONTRACTS §J.1, §5.4.) (let ((tb (cg-text cg))) (buf-push! tb "# entry stub: forwards argc=a0, argv=a1 to cc__main\n") (buf-push! tb "%fn(p1_main, 16, {\n") @@ -2908,8 +2876,8 @@ (bv-cat (list "%st(a0, sp, " (%cg-slot-expr cg ss) ")\n"))))) (else (%cg-fn-set! cg '%fn-sret-slot #f)))) - ;; params per CC-CONTRACTS §3.1. With sret, explicit arg i lives at - ;; ABI position (i+1): args 0..2 in a1..a3, args 3+ in slot (i-3). + ;; With sret, explicit arg i lives at ABI position (i+1): args 0..2 + ;; in a1..a3, args 3+ in slot (i-3). (let* ((sret-shift (if (%cg-fn-get cg '%fn-sret?) 1 0)) (spill (lambda (abi off) (cond @@ -2943,7 +2911,7 @@ (nm (car p)) (ty (cdr p)) (off (cg-alloc-slot cg 8 8)) - (psym (%sym nm 'param #f ty off))) + (psym (%sym nm 'param #f ty off #t))) (spill (+ idx sret-shift) off) (walk (cdr ps) (+ idx 1) (cons (cons nm psym) out) (or first-slot off)))))))) @@ -3060,9 +3028,9 @@ ;; (or label, or indirect-marked frame) backing the lval keeps existing ;; until the function ends. For rvals it duplicates the descriptor of ;; the spilled value; both copies refer to the same already-emitted -;; storage. CC-CONTRACTS §4.1: used for `lhs += rhs` and `++lhs` to -;; preserve the lhs across a `cg-load` so the subsequent `cg-assign` -;; still has its address. +;; storage. Used for `lhs += rhs` and `++lhs` to preserve the lhs +;; across a `cg-load` so the subsequent `cg-assign` still has its +;; address. (define (cg-dup cg) (let ((p (cg-top cg))) (cg-push cg p) p)) @@ -3384,8 +3352,8 @@ (else (cg-push cg p))))) (define (cg-arith-conv cg) - ;; Usual arithmetic conversions. CC-CONTRACTS §4.2: applies to - ;; arithmetic operands. When either operand is a pointer (or array, + ;; Usual arithmetic conversions on arithmetic operands. When either + ;; operand is a pointer (or array, ;; which behaves as a pointer in arithmetic), the pair is a ;; pointer-arith case — leave the types alone so cg-binop can detect ;; the ptr operand and apply the right scaling. @@ -3536,8 +3504,8 @@ (define (cg-assign cg) ;; Pops rhs, pops lhs, casts rhs to lhs's type (parser cannot peek - ;; deeper than vstack top to do this itself — CC-CONTRACTS §4.2), - ;; emits the store, pushes the assigned value as the result rval. + ;; deeper than vstack top to do this itself), emits the store, pushes + ;; the assigned value as the result rval. (let* ((rhs0 (cg-pop cg)) (lhs (cg-pop cg)) (ty (opnd-type lhs))) @@ -3714,8 +3682,7 @@ (define (cg-loop cg head-thunk body-thunk) ;; body-thunk receives the loop tag as its argument; parser uses - ;; that tag for cg-break / cg-continue inside the body. CC-CONTRACTS - ;; §1.9 / §3.3. + ;; that tag for cg-break / cg-continue inside the body. (let ((tag (%cg-fresh-loop-tag cg))) (%cg-emit-many cg (list "%loop_tag(" tag ", {\n")) (head-thunk) @@ -3748,7 +3715,7 @@ ;; ;; Limitation: only first 4 incoming args (named + variadic) live in ;; the save area; variadic args at index >= 4 need LDARG and are not -;; yet supported. See punchlist §G.2 for the gap. +;; yet supported. ;; -------------------------------------------------------------------- (define (%cg-vararg-first-slot cg) (let ((s (%cg-fn-get cg '%fn-vararg-first-slot))) @@ -3824,7 +3791,7 @@ 0) ;; -------------------------------------------------------------------- -;; Labels and unconditional goto (§F.4 / CC-CONTRACTS §5.3). +;; Labels and unconditional goto. ;; user_<name> namespace keeps the user's label space disjoint from ;; the compiler-internal ::ret and ::lbl_<n>. Labels resolve through ;; libp1pp's %scope mechanism, so forward references inside the same @@ -4012,10 +3979,63 @@ (define (scope-leave! ps) (ps-scope-set! ps (cdr (ps-scope ps))) (ps-tags-set! ps (cdr (ps-tags ps)))) +(define (ctype-compat? a b) + (cond + ((eq? a b) #t) + ((not (eq? (ctype-kind a) (ctype-kind b))) #f) + (else + (let ((k (ctype-kind a))) + (cond + ((eq? k 'ptr) (ctype-compat? (ctype-ext a) (ctype-ext b))) + ((eq? k 'arr) + (let ((ea (ctype-ext a)) (eb (ctype-ext b))) + (and (ctype-compat? (car ea) (car eb)) + (or (= (cdr ea) (cdr eb)) + (< (cdr ea) 0) (< (cdr eb) 0))))) + ((eq? k 'fn) (%fn-ctype-compat? (ctype-ext a) (ctype-ext b))) + ((or (eq? k 'struct) (eq? k 'union) (eq? k 'enum)) #f) + (else #t)))))) + +(define (%fn-ctype-compat? a b) + (and (ctype-compat? (car a) (car b)) + (eq? (car (cddr a)) (car (cddr b))) + (%fn-params-compat? (cadr a) (cadr b)))) + +(define (%fn-params-compat? pa pb) + (cond + ((and (null? pa) (null? pb)) #t) + ((or (null? pa) (null? pb)) #f) + ((ctype-compat? (cdar pa) (cdar pb)) + (%fn-params-compat? (cdr pa) (cdr pb))) + (else #f))) + +(define (sym-merge old new) + (cond + ((not (eq? (sym-kind old) (sym-kind new))) + (die #f "redecl: kind mismatch" (sym-name old))) + ((not (ctype-compat? (sym-type old) (sym-type new))) + (die #f "redecl: type mismatch" (sym-name old))) + ((eq? (sym-kind old) 'typedef) old) + ((eq? (sym-kind old) 'enum-const) + (cond ((equal? (sym-slot old) (sym-slot new)) old) + (else (die #f "enum-const redecl" (sym-name old))))) + ((and (sym-defined? old) (sym-defined? new)) + (die #f "redefinition" (sym-name old))) + ((sym-defined? new) new) + (else old))) + (define (scope-bind! ps n s) - (let* ((f (ps-scope ps)) (top (car f)) (r (cdr f))) - (if (alist-ref n top) (die #f "dup decl" n) - (ps-scope-set! ps (cons (alist-set n s top) r))))) + (let* ((f (ps-scope ps)) (top (car f)) (r (cdr f)) + (old (alist-ref n top))) + (cond + ((not old) + (ps-scope-set! ps (cons (alist-set n s top) r))) + (else + (let ((merged (sym-merge old s))) + (cond + ((eq? merged old) #t) + (else + (ps-scope-set! ps (cons (alist-set n merged top) r))))))))) (define (scope-lookup ps n) (let loop ((f (ps-scope ps))) (cond ((null? f) #f) @@ -4210,7 +4230,7 @@ (advance ps) (parse-const-int ps)) (else nv)))) (scope-bind! ps nm - (%sym nm 'enum-const #f %t-i32 val)) + (%sym nm 'enum-const #f %t-i32 val #t)) (cond ((at-punct? ps 'comma) (advance ps)) ((at-punct? ps 'rbrace) #t) (else (die (tok-loc (peek ps)) "enum"))) @@ -4571,7 +4591,8 @@ (else (die (tok-loc t) "const-expr: bad operand" (tok-value t)))))) -;; Back-compat wrapper: callers that just want the integer value. +;; Convenience: returns the integer value alone (callers that don't +;; need the type half of parse-const-expr's (value . ctype) result). (define (parse-const-int ps) (car (parse-const-expr ps))) (define (parse-declarator ps base) @@ -4699,11 +4720,11 @@ ((not n) (die #f "no name")) ((eq? sto 'typedef) (typedef-add! ps n) - (scope-bind! ps n (%sym n 'typedef #f ty #f))) + (scope-bind! ps n (%sym n 'typedef #f ty #f #t))) ((ctype-is-fn? ty) (scope-bind! ps n (%sym n 'fn (or sto 'extern) ty - (bytevector-append "cc__" n)))) + (bytevector-append "cc__" n) #f))) ;; §I: block-scope `static` routes to a global with a name mangled ;; on the enclosing function so two functions can each have their ;; own `static int n;` without colliding. The sym's NAME holds the @@ -4714,7 +4735,7 @@ (let* ((fname (fn-ctx-name (ps-fn-ctx ps))) (mangled (bytevector-append fname "__" n)) (sm (%sym mangled 'var 'static ty - (bytevector-append "cc__" mangled)))) + (bytevector-append "cc__" mangled) #t))) (scope-bind! ps n sm) (cond ((at-punct? ps 'assign) @@ -4724,11 +4745,17 @@ (else (cond ((not (ps-fn-ctx ps)) - (let ((sm (%sym n 'var (or sto 'extern) ty - (bytevector-append "cc__" n)))) + ;; defined? = #f for `extern` decls without `=`; everything else + ;; (initializer present, or no `extern` keyword) is a definition + ;; or tentative def, so the merge logic in scope-bind! treats it + ;; as the authoritative binding. + (let* ((init? (at-punct? ps 'assign)) + (def? (or init? (not (eq? sto 'extern)))) + (sm (%sym n 'var (or sto 'extern) ty + (bytevector-append "cc__" n) def?))) (scope-bind! ps n sm) (cond - ((at-punct? ps 'assign) + (init? (advance ps) (cg-emit-global (ps-cg ps) sm (parse-init-global ps ty))) @@ -4738,7 +4765,7 @@ (let* ((sz (max (ctype-size ty) 1)) (al (max (ctype-align ty) 1)) (sl (cg-alloc-slot (ps-cg ps) sz al)) - (sm (%sym n 'var (or sto 'auto) ty sl))) + (sm (%sym n 'var (or sto 'auto) ty sl #t))) (scope-bind! ps n sm) (cond ((at-punct? ps 'assign) @@ -4750,9 +4777,9 @@ (eq? (tok-kind (peek ps)) 'STR))) (parse-init-local-aggregate ps sm ty)) ;; Struct/union initializer from a non-brace expression - ;; (typically a function call returning by-value, per - ;; Stream A1). The expr produces a struct lval; we copy - ;; bytes into the destination slot. + ;; (typically a function call returning by-value). The + ;; expr produces a struct lval; we copy bytes into the + ;; destination slot. ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union)) (cg-push-sym (ps-cg ps) sm) @@ -5208,11 +5235,11 @@ (define (%parse-init-local-struct-list ps sm base-off ty) ;; Track each initialized field by name in `seen`; at the closing brace - ;; zero every field NOT in `seen`. The previous design tracked positional - ;; "remaining fields" via `rest`, which silently dropped earlier fields - ;; when a designator jumped backwards (e.g. `{.y = 5}` left `x` - ;; uninitialized). C requires every unmentioned member of an aggregate - ;; with at least one designator/initializer to be zeroed (C11 §6.7.9 ¶21). + ;; zero every field NOT in `seen`. Tracking by name (rather than + ;; positional "remaining" fields) handles a designator jumping + ;; backwards correctly — e.g. `{.y = 5}` must still zero `x`. + ;; C requires every unmentioned member of an aggregate with at least + ;; one designator/initializer to be zeroed (C11 §6.7.9 ¶21). (let ((fields (%init-struct-fields ty))) (let lp ((rest fields) (seen '())) (cond @@ -5300,11 +5327,12 @@ ;; full heap cost only for functions that genuinely mutate global state. (define (parse-fn-body ps name dt) ;; Hoist the recursive-binding scope-bind! out of the marked region - ;; so the fn-sym cons survives rewind. - (cond ((not (scope-lookup ps name)) - (scope-bind! ps name - (%sym name 'fn 'extern dt - (bytevector-append "cc__" name))))) + ;; so the fn-sym cons survives rewind. defined?=#t marks this as a + ;; definition so a prior forward decl gets overwritten and a second + ;; definition with the same name fires sym-merge's redefinition error. + (scope-bind! ps name + (%sym name 'fn 'extern dt + (bytevector-append "cc__" name) #t)) (let* ((cg (ps-cg ps)) (mark (heap-mark)) (globals-before (cg-globals cg)) @@ -5422,8 +5450,8 @@ (advance ps) (parse-stmt ps)) (else #t))))) -;; cg-loop's body-thunk now receives the tag from cg (CC-CONTRACTS -;; §3.3); the parser threads it into break/continue via loop-ctx. +;; cg-loop's body-thunk receives the tag from cg; the parser threads +;; it into break/continue via loop-ctx. (define (parse-while-stmt ps) (expect-kw ps 'while) @@ -5543,7 +5571,7 @@ (cond ;; Struct/union return — leave the source as a struct lval; ;; cg-return copies bytes into the function's return slot. - ;; (Stream A1, P1.md §Arguments and return values.) + ;; (P1.md §Arguments and return values.) ((or (eq? rk 'struct) (eq? rk 'union)) (parse-expr ps) (cg-return (ps-cg ps))) @@ -5857,7 +5885,7 @@ ;; pass, and array decay all chain through the existing primitives ;; (cg-take-addr / cg-push-field / cg-decay-array via rval!). ;; -;; File-scope literals are out of scope (Stream B, docs/CC-PARALLEL-PLAN.md). +;; File-scope literals are out of scope. ;; -------------------------------------------------------------------- (define (parse-compound-literal ps ty) (cond @@ -5870,7 +5898,7 @@ ;; sym-slot at its top-level entry to seed base-off; the ;; recursive helpers thread `sm` along but never read other ;; fields. The name is unbound and never enters scope. - (sm (%sym "__cl" 'var 'auto ty sl))) + (sm (%sym "__cl" 'var 'auto ty sl #t))) (cond ((or (eq? (ctype-kind ty) 'arr) (eq? (ctype-kind ty) 'struct) @@ -6094,12 +6122,9 @@ (cg-load (ps-cg ps))) (else #t)))) ;; cc/main.scm — driver. Argv, file I/O, ties phases together. -;; -;; Realization of docs/CC-INTERNALS.md §main.scm. ;; -------------------------------------------------------------------- -;; CLI: cc <input.c> <output.P1pp> -;; (-o flag and -D flags are deferred — phase-1 runner doesn't need them.) +;; CLI: cc [--cc-debug] <input.c> <output.P1pp> ;; ;; scheme1 passes (argv) as a list of bvs; argv[0] is "scheme1", argv[1] ;; is the catm'd compiler source path, argv[2..] are the user-facing @@ -6152,7 +6177,6 @@ ;; Streaming pipeline: lex → pp → parser → cg, all concurrent. ;; Each stage pulls one tok at a time from upstream. Steady-state ;; live data is bounded by parser/pp state, not source length. - ;; See docs/CC-STREAM.md. (let* ((src (%cc-slurp in-path)) (_1 (debug-log "phase=slurp" "heap" (heap-usage) "src-bytes" (bytevector-length src))) diff --git a/docs/CC-PARALLEL-PLAN.md b/docs/CC-PARALLEL-PLAN.md @@ -1,233 +0,0 @@ -# CC parallel work plan - -Coordination doc for the next batch of `tests/cc/` features. Defines -four parallel work streams, the design seams between them, and the -sequencing required where streams aren't fully independent. - -Companion to [CC-PUNCHLIST.md](CC-PUNCHLIST.md) (the per-item -checklist) and [CC-INTERNALS.md](CC-INTERNALS.md) (module layering -and the cg-fixture-first workflow). ABI choices below cite -[P1.md §Arguments and return values](P1.md). - -## Tests in scope - -Currently failing on aarch64: - -| Test | Stream | Failure | -|---|---|---| -| cc/082-union-basic | D | exit 1 — union field offsets bumped like struct | -| cc/087-sizeof-noeval | D | exit 2 — sizeof(x++) actually increments x | -| cc/111-struct-ret-1word | A1 | compile-fail — return-by-value struct unhandled | -| cc/112-struct-ret-2word | A1 | compile-fail — no two-word return path | -| cc/113-struct-ret-3word | A2 | compile-fail — no indirect-result path | -| cc/114-struct-ret-many-args | A3 | exit 2 — silently truncates | -| cc/115-struct-ret-3word-many-args | A3 | exit 2 — silently truncates | -| cc/116-struct-ret-vararg | A3 | exit 2 — silently truncates | -| cc/117-compound-literal | B | compile-fail — `(T){…}` unparsed | -| cc/118-const-expr | C | compile-fail — const-expr surface incomplete | - -## Stream layout - -``` - ┌─ A1: 111 + 112 (one-word + two-word direct) -Stream A (sequential) ───┤ -struct-return ABI └─ A2: 113 (indirect-result) - │ - ▼ - ┌─ A3a: 114 (sret + stack-staged args) ┐ - ├─ A3b: 115 (sret + 3-word + many args) │ parallel - └─ A3c: 116 (sret + variadic save area) ┘ - -Stream B (independent, t=0): 117 compound literals -Stream C (independent, t=0): 118 const-expr evaluator -Stream D (independent, t=0): 082 union offsets + 087 sizeof no-emit -``` - -`t=0`: A1, B, C, D fan out together. A2 starts when A1 lands. A3a/b/c -fan out when A2 lands. Total wall time ≈ A1 + A2 + max(A3). - -## Stream A — Struct-return ABI - -Implements the three result conventions from P1.md §Arguments: - -| Width | Convention | a0 | a1 | Args 0..3 | -|---|---|---|---|---| -| ≤ 8B | one-word direct | result word 0 | (caller arg 1) | a0..a3 | -| 9–16B | two-word direct | result word 0 | result word 1 | a0..a3 | -| > 16B | indirect-result | result-buffer ptr | (callee arg 0) | a1..a3 (shifted) | - -In the indirect convention, incoming stack-arg slot 0 corresponds to -explicit arg word 3 (not 4). `LDARG` indexing in the variadic save area -must respect this shift. - -### A1 — one-word and two-word direct - -- **cg primitives:** extend `cg-fn-end` to load the function's return - slot into a0 (≤8B) or a0/a1 (9–16B) per the return ctype's size. - Extend `cg-call`'s receive side to allocate a fresh frame slot via - `cg-alloc-slot` (sized to the return ctype) and store back from - a0[/a1] before pushing a frame-lval. -- **Uniform path:** ≤8B and 9–16B both go through a frame slot. No - register-only fast path. Simpler cg, identical parser surface. -- **Receive-area lifetime:** fresh slot per call site, allocated by - cg-call. Required so chained `make_triple(...).c` (cc/113:32) and - `ret1(99).x` (cc/111:26) don't alias across consecutive calls. -- **Parser:** `parse-return-stmt` accepts a struct-typed expression and - emits a per-byte copy from the source lval into the function's - return slot. `parse-postfix-rest` accepts struct-typed call results - as lvals so `.field` and `&` chain naturally. -- **CC-CONTRACTS update:** §3.2 currently asserts a single 8-byte - return slot. Replace with a pointer to P1.md §Arguments and a note - that the cg lowers all three conventions. -- **Fixtures:** `tests/cc-cg/70-struct-ret-1word.scm`, - `71-struct-ret-2word.scm` first; then unblock `tests/cc/111`, `112`. - -### A2 — indirect-result - -- **cg primitives:** when return ctype size > 16, `cg-fn-begin/v` - treats arg slot 0 as the sret pointer, parameter slots shift by one - register. `cg-fn-end` does no a0 store (callee already wrote through - *a0 during the return-stmt copy); the convention's "a0 holds the - same pointer on return" is automatic since a0 hasn't been clobbered. -- **Caller side:** `cg-call` detects sret-eligible return type; before - emitting the call, materializes the receive-slot's address into a0, - shifts ordinary args from a0..a3 → a1..a3 + stack, with stack slot 0 - now holding arg word 3. -- **Variadic interaction is deferred to A3c.** A2 itself targets only - cc/113 (no varargs, ≤4 named args). -- **Fixtures:** `tests/cc-cg/72-struct-ret-3word.scm`; then `tests/cc/113`. - -### A3 — sret compositions (parallel) - -Each agent picks up one test, mostly composing infrastructure A1+A2 -already shipped: - -- **A3a — cc/114** (sret-pair + 8 stack-staged args): the two-word - return doesn't need sret; the 8 args do exercise stack staging - (P1.md §Incoming stack-argument area). Validates that A1's - two-word receive composes with the existing stack-stage path. -- **A3b — cc/115** (sret-3word + 8 stack-staged args): with sret in - a0, args 0–2 live in a1–a3 and args 3–7 stage to stack slots 0–4. - Pure indexing test for A2's shift. -- **A3c — cc/116** (sret + variadic save area): `cg-fn-begin/v`'s - 16-slot save window indexes from incoming arg word 0. When the fn - uses indirect-result, slot 0 is arg word 3 (not 4) — A3c adjusts the - windowing accordingly. `__builtin_va_start` must skip the sret - pointer; in practice that's automatic if the named-arg count - threading is correct, since the sret pointer occupies a0 and the - named args start at a1. - -## Stream B — Compound literals (117) - -C99 §6.5.2.5: `(T){ init-list }` as a postfix expression. Block-scope -only; file-scope literals `die` explicitly. - -- **Parser:** detect `(T){` lookahead in `parse-cast-or-unary`. Parse - the typename via `parse-decl-spec` + `parse-declarator`, then call - the existing `parse-init-local-aggregate` against a fresh slot from - `cg-alloc-slot`. Push a frame-lval typed as T (or T[N]). -- **Lvalue contract:** the literal is an lvalue, so `&literal`, - `literal.field`, and `literal[i]` all work via existing - `cg-take-addr` / `cg-push-field` / `cg-decay-array` paths. -- **Lifetime:** frame slot ⇒ enclosing block, automatic. Matches C99. -- **Reuse:** no new cg primitives. The fixture surface (positional, - designated, partial-init zero-fill, trailing comma, array decay, - byval struct arg) is all already covered by Stream E in - CC-PUNCHLIST. - -## Stream C — Const-expr evaluator (118) - -Adds `parse-const-expr ps → (value . ctype)`, a self-contained walker -that never touches cg. - -- **Operand surface:** integer/character literal, enum constant, - `sizeof(typename)`, unary `+ - ~ !`, binary `+ - * / % << >> & | ^`, - compare `< <= > >= == !=`, logical `&& ||` (short-circuit), ternary - `?:`, cast to integer type, parenthesization. Anything else `die`s. -- **Width-aware return:** the `(int)(unsigned char)257 == 1` case - requires the cast to truncate at u8 width. Bare fixnum loses this; - the (value . ctype) tuple keeps it. -- **Sizeof arm:** for now, only `sizeof(TYPENAME)` — the only form - exercised by 118. If a value-expression form surfaces later, grow a - scope-lookup arm; still no cg interaction. -- **Wiring (replace existing `parse-const-int` call sites):** - `parse-enum-spec`, `parse-decl-suf-cont`'s `[]` arm, - `parse-init-global`'s scalar branch, `parse-switch-stmt`'s case - label, local array bound in `parse-stmt`. -- **No interaction with Stream D.** See "Sizeof split" below. - -## Stream D — Surgical fixes (082 + 087) - -Single agent — both ~30-line changes. - -### 082 — union field offsets - -`parse-struct-fields` (cc.scm:3634) advances offset after every field -regardless of kind. For unions all fields must stay at offset 0. - -- Thread `kind` from `parse-aggregate-spec` (cc.scm:3611, where it's - already in scope) through to `parse-struct-fields`. -- Gate the `(+ oa (max sz 0))` bump on `(eq? kind 'struct)`. Unions - pass through with `oa` unchanged. -- `complete-agg!` already sizes unions correctly; no change there. - -### 087 — sizeof no-emit - -The current sizeof arm at cc.scm:4898 calls `parse-unary` / `parse-expr` -which emit code for the operand. `sizeof(x++)` therefore actually -increments x. - -- Add cg primitives `cg-snapshot cg → tag` and `cg-rewind cg tag`. - Snapshot captures vstack depth and fn-buf chunk count; rewind - restores both. Internal-only. -- In the sizeof arm, snapshot before parse-unary, read - `(opnd-type (cg-top))`, rewind, push `cg-push-imm %t-u64 size`. -- Fixture lock-in: 087 covers `sizeof(x++)`. Add a cg-fixture if cg - primitives need direct validation. - -## Cross-stream contracts - -### Sizeof split (C and D stay independent) - -Two distinct callers, two independent mechanisms: - -- **Outside const-expr** (Stream D — 087): operand can be anything - (`x++`, calls, side-effects). Result lives at runtime. Use - `cg-snapshot` / `cg-rewind`. -- **Inside const-expr** (Stream C — 118): operand grammar restricted; - result is a parse-time fixnum. Const-expr evaluator handles - `sizeof(TYPENAME)` directly via `parse-decl-spec` + - `parse-declarator` + `ctype-size`. Never calls cg. - -The two paths share the *concept* (don't evaluate the operand) but -not the implementation. They can land in either order. - -### A1 ↔ B/C/D - -Independent. A1 only touches `cg-fn-end`, `cg-call`, and -`parse-return-stmt`. B touches `parse-cast-or-unary`. C adds a new -walker plus changes to four call sites that don't overlap with A1. -D touches `parse-struct-fields` and adds two new cg primitives. - -### A2 ↔ A3 - -A3 depends on A2's indirect-result implementation. A3a/b/c are -independent of each other once A2 has landed. - -## Workflow per stream - -Per [CC-INTERNALS §Feature workflow](CC-INTERNALS.md#feature-workflow): - -1. cc-cg fixture (red) — drive the cg API directly. -2. Implement cg primitives until cc-cg green. -3. cc fixture (red) — full driver. -4. Implement parser changes until cc green. - -Pick the next free `<n>` per suite. cc-cg currently goes up through -69; cc goes up through 118 (with gaps). - -## Acceptance - -Per stream: `make test SUITE=cc ARCH=aarch64` shows the stream's -target tests as PASS, no prior tests regress. Final acceptance: all -ten currently-failing tests green on aarch64. diff --git a/docs/CC-STREAM.md b/docs/CC-STREAM.md @@ -1,254 +0,0 @@ -# Streaming the cc - -Working plan. The cc today is a four-phase pipeline that materializes -the full intermediate state at every layer: - -``` -src bv → [lex-tokenize] → tok list → [pp-expand] → tok list → [parse] → cg buf -``` - -That shape doesn't fit the data flow. Lex, pp, parse, and cg are each -inherently sequential, with small bounded lookahead. The full token -list is never *needed*; it's just where the current code parks -intermediate state on the heap. For [tcc.flat.c](TCC.md) (608 KB) the -lex output alone runs ~170 MB at ~280 B/byte even after the -[per-token mark/rewind tightening](TCC-TODO.md#blocker-3). pp adds -another similar slug on top. We've doubled the ELF p_memsz twice -already; the cleaner answer is to stop materializing. - -This doc plans the conversion to a true single-pass streaming -compiler: lex, pp, parse, and cg all run concurrently, each pulling -one token at a time from the stage upstream. Steady-state live data -is bounded by *lookahead* (≤2 tokens) plus parser state (typedef -table, scope chain, macro table, cg output buffer), not by source -length. - -## Why streaming, not GC - -A real GC would fix the symptom — the per-byte multiple is mostly -dead intermediate state, and a GC would reclaim it eventually — but -the underlying shape would still be wrong. Adding a GC to scheme1 is -also a much bigger lift than reshaping the cc: - -- scheme1 is shared by every bootstrap stage; touching its allocator - affects M1pp, p1, scheme1's own self-host, and anything else built - on top. Streaming is local to cc. -- A GC pass needs to trace through closures, records, env alists, and - the bytevector heap layout. It's a multi-week project against - scheme1's current single-page-of-allocator code. -- Memory headroom for the cc has a hard ceiling (ELF p_memsz, which - must be a constant in the boot ELF). A GC narrows the *typical* - high-water mark but doesn't change the *worst case* the cap has to - cover. Streaming changes the worst case from O(source length) to - O(parser state). -- We're aiming for predictability: the cc is the bottleneck stage - that needs to compile multi-MB inputs. A bounded steady-state model - is easier to reason about than a "GC will handle it" model. - -GC stays a sensible move for scheme1 *eventually* — every bootstrap -stage would benefit. But for closing the tcc.flat.c gap it's the -wrong tool. - -## Symmetry argument - -Each layer in the pipeline has small bounded lookahead: - -| layer | lookahead / buffering | -|--------|------------------------------------------------------| -| lex | ≤3 source bytes (trigraph), 1 token internally | -| pp | 1 token peek; arg-collection during fn-macro expansion (bounded by call site) | -| parse | 2 tokens (`peek2` is the deepest the grammar uses) | -| cg | 0 (already streams to cg-text/cg-data buffers) | - -Live tokens across the pipeline are O(macro-arg-list + -parse-lookahead) — call it dozens. Persistent state (parser tables -+ macro definitions + cg output) grows with declared symbols, not -source length. For tcc.flat.c that's an estimated 5–20 MB. - -## Iter abstraction - -A single record + three operations per layer: - -```scheme -(define-record-type tok-iter - (%tok-iter pull-fn state buf) - tok-iter? - (pull-fn tok-iter-pull-fn) ; thunk: state -> tok - (state tok-iter-state) ; per-layer - (buf tok-iter-buf tok-iter-buf-set!)) ; pushed-back toks (small list) - -(define (iter-next it) ...) ; consume + return next tok (EOF sentinel) -(define (iter-peek it) ...) ; like next, but cache in buf -(define (iter-unget! it t) ...) ; push back; next iter-next returns t -``` - -Each layer wraps the layer below: `lex-iter` over bytes, `pp-iter` -over `lex-iter`, `pstate` over `pp-iter`. The iter discipline -replaces the current "list of toks" everywhere a sequence is -consumed. - -## Mapping the layers - -### lex-iter - -Drop the per-iteration `(reverse (cons … acc))` driver in -[cc/cc.scm](../cc/cc.scm) `lex-tokenize`. State holds (pos, line, -col, bol?). `lex-iter-next` runs roughly one iteration of the current -loop body: skip ws+comments, dispatch, allocate one tok+loc, return. - -The per-token `heap-mark` / `heap-rewind!` discipline I just landed -transposes one-for-one — same `%lex-scratch`, same scalar boxes, same -post-rewind rebuild of tok+loc. The difference is who advances: -instead of the loop driving itself to EOF, the *consumer* (pp-iter) -pulls one tok per `iter-next` call. - -### pp-iter - -Most invasive. State holds (lex-iter, pending-toks, macros, -cond-stack, current-loc). `pp-iter-next`: - -1. If `pending` non-empty, pop and return. -2. Pull from `lex-iter`. -3. If it's `HASH` at bol: collect the directive line into a small - buffer (bounded by one logical line, terminated by NL), dispatch, - recurse. -4. If it's a defined ident not in its hide set: collect args - (function-like — bounded by the call site), expand into - `pending`, recurse. -5. If it's `STR` and the next non-trivia tok is `STR`: fuse on the - fly (replaces today's post-pass `%pp-merge-adjacent-strs`). -6. Else return. - -The hide set tracking already runs per-token in `pp-expand`; the -streaming version threads it through `pending` instead of through a -flat list. - -### parser - -Already uses `ps-peek` / `ps-advance` (see -[CC.md §Parser](CC.md)). Repoint them at `pp-iter` instead of -indexing into a list. Add per-stmt `heap-mark` / `heap-rewind!` in -`parse-stmt` so that the parse-time interpreter overhead also gets -reclaimed at statement boundaries. Survivors that must cross the -rewind (typedef bvs, scope-bound symbols, macro bodies) need to be -allocated below the mark — same discipline as the lex driver, just -one layer up. - -### cg - -Already streams to cg-text/cg-data buffers. No change. - -## Subtle bits - -- **#if cexpr** evaluates a fully-collected directive line — keep - that as a *bounded* buffer (one preprocessor line, terminated by - NL). Don't try to stream cexpr eval. -- **Function-macro arg collection** also bounded buffer (until - matching `)`). Worst case is multi-line, but still bounded by the - call-site span. The only real memory concern; if a macro call spans - 10 KB of args we hold all of it. tcc.c doesn't do that. -- **Adjacent string concat** moves from a post-pass to a peek-and- - fuse in `pp-iter`. Easy. -- **Builtin macros** (`__FILE__`, `__LINE__`, `__DATE__`, …) need the - current loc at expansion time, not at lex time. The pp-iter has it - because it tracks lex's current loc. -- **EOF discipline.** Each iter must yield exactly one EOF tok and - then keep yielding EOF on subsequent calls. The parser already - expects EOF as a stable sentinel. -- **Lookahead beyond 2.** The parser uses `peek` and `peek2`; nothing - deeper. If something needs 3-token lookahead later, the unget - buffer accommodates it without changing the iter shape. -- **Per-stmt rewind survivors.** A statement may bind a typedef - whose name must outlive the rewind. The mark goes *after* the - typedef-add path, or the bv is allocated below the mark — same - trick the lex driver uses with `%lex-scratch`. Easier than it - sounds because typedef adds happen at the top of `parse-decl`, - which is well before any deep parse-time scratch. - -## Migration path - -Three roughly self-contained PRs. Each is independently testable — -the existing test suites already drive only the public API -(`lex-tokenize` for cc-lex, `pp-expand` for cc-pp, the cc front end -for cc + cc-cg). Adapter wrappers preserve those entry points -through the migration. - -### Step 1 — lex-iter - -Convert `lex-tokenize` to `make-lex-iter` + `lex-iter-next`. Leave -`lex-tokenize` as a thin wrapper that drains the iter into a list, -so cc-lex tests pass unchanged. Touchpoints: -[cc/cc.scm](../cc/cc.scm) lex section, [cc/main.scm](../cc/main.scm). - -After this step: lex output is still consumed by today's -`pp-expand` via the wrapper. Identity-preserving; no behavior change -visible to tests. - -### Step 2 — pp-iter - -Convert `pp-expand` to `make-pp-iter` + `pp-iter-next`. Directives -and macro expansion all move from "list-walking" style to -"stream + pending-buffer" style. Same logic, different shape. Leave -a `pp-expand` wrapper that drains the iter into a list for -back-compat. Heaviest single PR. - -After this step: pp consumes lex via the iter; parser still drains -pp into a list. - -### Step 3 — parser repoint - -Change `pstate` to hold a `pp-iter` instead of a tok-list cursor. -`peek` / `peek2` / `advance` redirect to iter ops. Drop the wrappers -from steps 1–2 once the cc-cg / cc tests pass. - -Add per-stmt `heap-mark` / `heap-rewind!` in `parse-stmt`. This is -the layer where the *parser* per-form interpreter overhead gets -reclaimed. - -After this step: full streaming. Memory bound shifts from O(source -length) to O(parser state). - -## Expected impact - -For tcc.flat.c (608 KB): - -| state | predicted high water | -|-------------------------------|---------------------:| -| current (post-lex tightening) | ~170 MB during lex; pp blows the 256 MiB cap | -| post-step-1 | unchanged (same materialization at pp/parser) | -| post-step-2 | lex no longer materialized; pp output still listed for parser. ~80 MB? | -| post-step-3 | full streaming. Bounded by parser state. ~10–30 MB. | - -Numbers are guesses until we measure, but the shape is clear: most -of the per-byte transient cost we pay today disappears not because -we got faster but because we never store that intermediate at all. - -## Open questions - -- **Hide-set sharing.** Today the same hide-set list can be shared - across many tokens because it's immutable. In streaming, the same - property holds — pp-iter just hands out shared references — but - we should confirm there's no in-place mutation lurking in the pp - code paths that gets converted. -- **Diagnostics ergonomics.** When the parser dies on an unexpected - token, today the line/col come from `tok-loc`. Same in streaming; - no change. -- **Test runner adapters.** cc-lex and cc-pp test runners build full - token lists for inspection. Keep their wrappers (`lex-tokenize`, - `pp-expand`) as drain-to-list adapters. Internally the test - fixtures don't change. -- **`#include`.** Currently rejected ([CC.md §Toolchain envelope](CC.md)). - If we ever want to support it, the streaming model is friendlier: - the include-file's lex-iter is pushed onto a stack inside pp-iter, - and `pp-iter-next` drains it before resuming the outer file. Out - of scope for the tcc.flat.c push but worth noting. - -## Related - -- [TCC-TODO.md](TCC-TODO.md) — blocker 3 covers the same memory - problem from the per-byte angle. The lex-side tightening landed - there is what step 1 of this plan transposes onto an iter. -- [CC.md](CC.md) — the language and toolchain spec the cc implements. -- [CC-PARALLEL-PLAN.md](CC-PARALLEL-PLAN.md) — independent feature - punchlist; streaming is orthogonal but unblocks running on - tcc.flat.c so those features can be exercised end-to-end. diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md @@ -2,10 +2,8 @@ Working punch list for what stands between today's scheme1-hosted cc and a successful compile of `tcc.flat.c`. Companion to -[TCC.md](TCC.md) (the surrounding three-stage pipeline), [CC.md](CC.md) -(the C subset the cc accepts), and [CC-PARALLEL-PLAN.md](CC-PARALLEL-PLAN.md) -(the per-feature `tests/cc/` punchlist). The blockers below are the -ones that fire *before* we even reach the parallel-plan items. +[TCC.md](TCC.md) (the surrounding three-stage pipeline) and +[CC.md](CC.md) (the C subset the cc accepts). ## Repro @@ -39,141 +37,52 @@ head -c 50000 build/cc-bootstrap/X86_64/tcc.flat.c \ # then re-run the podman invocation against tcc.head.c ``` -## Blocker 1 — `float` / `double` hard-rejected in type specifiers ✓ done - -Resolved: `parse-decl-spec` now accepts `float`, `double`, and `long -double` as type specifiers, mapping them to `%t-flt` / `%t-dbl` / -`%t-ldbl` ctypes (sizes 4/8/8). `_Complex` / `_Imaginary` are eaten as -silent qualifiers. The five other type-name-start predicates -(`paren-is-group?`, `stmt-starts-decl?`, `token-is-decl?`, -`%const-paren-is-cast?`, `%const-tok-is-decl?`, plus the -`parse-cast-or-unary` guard) recognize the new kw set. The cg guards -fp ld/st/cast via `%cg-fp-reject!` so any attempt to materialize an fp -value dies with `fp not codegen'd` rather than silently emitting -integer code. Coverage: `tests/cc/119-float-parse.c` exercises -prototypes, struct layout, and `sizeof(float|double|long double)`. -Confirmed end-to-end: a 5 KB prefix of `tcc.flat.c` now reaches -blocker 2 (`dup decl: _exit`) instead of dying on `double`. - -## Blocker 2 — compatible redeclaration rejected - -Site: [cc/cc.scm](../cc/cc.scm) `scope-bind!`, line 3667. - -```scheme -(if (alist-ref n top) (die #f "dup decl" n) - (ps-scope-set! ps (cons (alist-set n s top) r))) -``` +## Blocker — parse-phase heap explosion -C allows redundant identical extern declarations and prototypes that -differ only in parameter names. The flattened TU has many — `_exit` -appears in both `<stdlib.h>` and `<unistd.h>`; `strlen` is declared -once with a named param and once without; etc. +Past the redecl gate, the next thing we hit is heap exhaustion well +before the full TU is parsed. Probing prefixes against the catm'd cc +(HEAP_CAP_BYTES = 256 MiB): -Fix: when the name is already bound at the same scope, allow the -redeclaration if the new type is compatible with the existing one -(extern decls of the same function; pointer/array decay equivalents). -Only definition-after-definition or genuinely conflicting types should -die. The same compatible-redecl rule needs to land in the typedef -table and the tag table for forward-decl chains. +| input | bytes | heap after parse | result | +|--------------------|-------:|-----------------:|-----------------| +| 220-line struct cut| 7 953 | ~39 MB | cg-finish | +| 1612-line cut | 40 943 | ~267 MB | cg-finish (just under cap) | +| 50 000 B head | 49 986 | — | heap exhausted | +| full tcc.flat.c |608 547 | — | heap exhausted | -Observed (with blocker 1 fixed, on a 5 KB prefix of tcc.flat.c): +So parse-phase residency is roughly 6.5 KB heap per source byte at the +moment, which puts the full 608 KB TU around 4 GB — far beyond any +reasonable scheme1 cap. Bumping the cap is not the fix; the parser's +per-token allocations need to drop into the per-token mark/rewind +arena that the lex pipeline already uses (commit 62dabed). -``` -error: dup decl: _exit -``` - -## Blocker 3 — lex+pp heap usage at ~3 KB per source byte (lex side fixed) - -Site: lex/pp pipeline in [cc/cc.scm](../cc/cc.scm) plus heap caps in -[scheme1/scheme1.P1pp](../scheme1/scheme1.P1pp) and -[vendor/seed/](../vendor/seed/)`<arch>/ELF.hex2`. - -### Lex side — done - -Investigation traced the bulk of the per-byte cost to scheme1's -interpreter calling convention: every closure call allocates ~32 B -per param via `bind_params`, every `let*` binding allocates two cons -cells, every `eval_args` allocates one cons per arg, and tail-rec -named-let in the inner scanners (`%scan-while`, `%fill-while-bv`, -`%match-bytes`, `%accum-int-while`) re-applies a closure per scanned -byte. None of this overhead is reclaimed across token boundaries — -the lex helpers' own internal `heap-mark` / `heap-rewind!` discipline -trims their per-helper scratch but the result tok + 4-list lives -below any putative outer mark, so a driver-level rewind needs to -rebuild survivors from pre-mark scalar boxes. - -Fix landed in `cc/cc.scm`: `lex-tokenize` now wraps each iteration in -its own `heap-mark` / `heap-rewind!`. Scalar fields (kind, value for -non-bv tokens, npos/nline/ncol) ferry across the rewind via outer-let -bindings (set! mutates the binding cdr in place; no heap). Bv-typed -values (IDENT, STRING) ferry through `%lex-scratch`, a single sticky -bytevector allocated below all marks; post-rewind the driver copies -back into a fresh bv sized exactly to the content. tok+loc+acc-cons -are rebuilt post-rewind. - -Measured (`tests/cc/001-kitchen-sink.c`, 9.7 KB): +Repro: ``` -phase=slurp: heap 1.23 MB src-bytes 9782 -phase=lex: heap 3.40 MB (delta ~2.17 MB → ~222 B/byte; 13× better) -phase=pp: heap 8.58 MB +podman run --rm --pull=never --platform linux/arm64 \ + --tmpfs /tmp:size=512M -e ARCH=aarch64 \ + -v "$(pwd)":/work -w /work boot2-busybox:aarch64 \ + build/aarch64/scheme1 build/aarch64/cc/cc.scm --cc-debug \ + build/cc-bootstrap/X86_64/tcc.flat.c /tmp/tcc.flat.P1pp ``` -A 50 KB prefix of `tcc.flat.c` (which used to exhaust the heap during -lex) now lex+pp's cleanly through to blocker 2 (`dup decl: _exit`). -Lex finishes at 15.6 MB, pp at 41.3 MB. Extrapolating 222 B/byte to -the full 608 KB input gives ~135 MB during lex+pp — close enough to -the 128 MB `p_memsz` cap that a single bump (and a bigger -`HEAP_CAP_BYTES`) closes the gap. - -We also considered an interning pool (one bv per unique IDENT/STRING, -shared across token instances). With scheme1's interpreter, walking a -cons-list pool — even bucketed 16-way — costs ~50–150 B per step in -`bind_params`/`eval_args`/named-let frame overhead. The walk costs -out-paced the bv-allocation savings: a 50 KB prefix went 60+ MB with -buckets vs ~17 MB un-interned. Until scheme1 grows a true vector -primitive (so bucket access is O(1) without interpreter overhead), -no-intern is the cleanest path. The `%lex-scratch` infrastructure is -ready to layer interning back on top if a downstream eq?-fast-path -turns out to need it. - -### Remaining - -- **pp side memory.** pp still copies the lex token list into a - buf-list and walks it; `%pp-take-line` reverses tokens. There's - probably similar interpreter-overhead concentrate to drain. Worth - the same investigation pass once we're past blocker 2. -- **Bump arena caps.** Even with the lex fix the full TU is many MB - of tokens. `READBUF_CAP_BYTES`, `HEAP_CAP_BYTES`, and the ELF - `p_memsz` literal must all grow in lockstep before tcc.flat.c will - finish. +Phase log shows heap climbs only during `parse`; `slurp` is cheap and +`cg-finish` does not move the needle. Investigation should focus on +which parser products escape the per-token arena — top suspects are +ctypes built up during decl parsing, expression vstack residue, and +the alist-based scope/tag/typedef tables. ## Suspected next-tier blockers (not yet observed) -Past blockers 1–3 the next wave we expect: +Past the heap gate, the next wave we expect: - **`_Bool`, bitfield-typed struct fields, `setjmp.h` typedefs** — - same "parse, don't codegen" softening as floats. tcc.c carries all - of these under `HAVE_BITFIELD` / `HAVE_SETJMP` gates that are off - but leave the declarations in the flattened text. -- **Existing CC-PARALLEL-PLAN items** — struct-return ABI streams - A1/A2/A3, compound literals (B), const-expr evaluator (C), union - offsets + sizeof no-eval (D). tcc.c uses every one of these. -- **Throughput / wall-clock** — even with the heap fix, lex+pp+parse - on 18 896 lines under scheme1 is going to be slow. Worth measuring - before optimizing. - -## Suggested order of attack - -1. Soften float/double, then bitfields, then setjmp types into - "parse, codegen rejects ops". Small, mechanical, unblocks the gate - to actual triage. -2. Compatible-redecl in `scope-bind!`, the typedef table, and the tag - table. -3. Lex/pp memory pass + heap/p_memsz bumps. -4. Re-run on `tcc.flat.c`. At this point work re-converges with - [CC-PARALLEL-PLAN.md](CC-PARALLEL-PLAN.md). - -Hitting milestone 4 in [CC.md §Validation milestones](CC.md) — "Compile -tcc.c (under the tcc-mes defines) → tcc-lispcc; verify -`tcc-lispcc -version` runs" — is the goal the items above feed into. + same "parse, don't codegen" softening already applied to floats. + tcc.c carries all of these under `HAVE_BITFIELD` / `HAVE_SETJMP` + gates that are off but leave the declarations in the flattened text. +- **Throughput / wall-clock** — lex+pp+parse on 18 896 lines under + scheme1 is going to be slow even after heap residency drops. + +The end goal is milestone 4 in [CC.md §Validation milestones](CC.md) +— "Compile tcc.c (under the tcc-mes defines) → tcc-lispcc; verify +`tcc-lispcc -version` runs." diff --git a/tests/cc/067-vararg-call.c b/tests/cc/067-vararg-call.c @@ -1,14 +1,16 @@ /* §G.1 — variadic call: caller default-promotes args. * - * Declares sum3 as (int n, ...). Defines a same-ABI sum3 that takes - * three ints. Calls sum3(1, c, s) where c is signed char = -3 and s - * is short = 100. Per default-promote rules, c and s are promoted to - * int before the call, so the callee's a1/a2 hold -3 and 100 with - * full 32-bit sign-extension. + * Declares sum3 as (int n, ...) and defines it as a real variadic fn + * pulling its tail args via __builtin_va_arg. Calls sum3(1, c, s) + * where c is signed char = -3 and s is short = 100. Per default- + * promote rules, c and s widen to int before the call, so the + * receive yields full 32-bit -3 and 100. * * Result: 1 + (-3) + 100 = 98. */ +typedef char *va_list; + int sum3(int n, ...); int main(void) { @@ -17,6 +19,12 @@ int main(void) { return sum3(1, c, s); /* 1 + (-3) + 100 = 98 */ } -int sum3(int n, int a, int b) { +int sum3(int n, ...) { + va_list ap; + int a; int b; + __builtin_va_start(ap, n); + a = __builtin_va_arg(ap, int); + b = __builtin_va_arg(ap, int); + __builtin_va_end(ap); return n + a + b; } diff --git a/tests/cc/120-redecl-extern-fn.c b/tests/cc/120-redecl-extern-fn.c @@ -0,0 +1,13 @@ +/* Compatible redecl: two identical extern fn prototypes for the same + * function are legal C and common in flattened TUs that pick up the + * same declaration from multiple headers. */ + +extern int square(int); +extern int square(int); + +int square(int x) { return x * x; } + +int main(int argc, char **argv) { + if (square(5) != 25) return 1; + return 0; +} diff --git a/tests/cc/120-redecl-extern-fn.expected-exit b/tests/cc/120-redecl-extern-fn.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/cc/121-redecl-fn-paramnames.c b/tests/cc/121-redecl-fn-paramnames.c @@ -0,0 +1,14 @@ +/* Compatible redecl: prototypes that differ only in parameter names + * (or presence/absence of a parameter name) describe the same function + * type. tcc.flat.c hits this when a stdlib fn is declared once with a + * named param and once without. */ + +int times3(int); +int times3(int x); + +int times3(int x) { return x + x + x; } + +int main(int argc, char **argv) { + if (times3(7) != 21) return 1; + return 0; +} diff --git a/tests/cc/121-redecl-fn-paramnames.expected-exit b/tests/cc/121-redecl-fn-paramnames.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/cc/122-redecl-typedef.c b/tests/cc/122-redecl-typedef.c @@ -0,0 +1,11 @@ +/* Compatible redecl: identical typedefs from multiple headers (e.g. + * size_t in <stddef.h> and <stdlib.h>) collapse into one declaration. */ + +typedef int sint; +typedef int sint; + +int main(int argc, char **argv) { + sint x = 17; + if (x != 17) return 1; + return 0; +} diff --git a/tests/cc/122-redecl-typedef.expected-exit b/tests/cc/122-redecl-typedef.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/cc/123-redecl-extern-var.c b/tests/cc/123-redecl-extern-var.c @@ -0,0 +1,13 @@ +/* Compatible redecl: a variable declared extern can be re-declared + * extern any number of times before the single tentative/initialized + * definition. */ + +extern int counter; +extern int counter; + +int counter = 42; + +int main(int argc, char **argv) { + if (counter != 42) return 1; + return 0; +} diff --git a/tests/cc/123-redecl-extern-var.expected-exit b/tests/cc/123-redecl-extern-var.expected-exit @@ -0,0 +1 @@ +0