commit e9a91f83be3d3feb121489e70b1ec7386b97b4f3
parent 65d998517f0acfba3f6722061c65af87d1f5ab4a
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 28 Apr 2026 13:25:07 -0700
cc: parse-and-discard GNU __attribute__ specs
Unblocks tcc.flat.c past line 1628. Same softening pattern as floats:
parse the spec, ignore semantics. Handles both prefix attributes on
decl-specs and trailing attributes after declarators.
Diffstat:
| M | cc/cc.scm | | | 32 | ++++++++++++++++++++++++++++++++ |
| M | docs/TCC-TODO.md | | | 145 | +++++++++++++++++++++++++++++++++++++++---------------------------------------- |
2 files changed, 104 insertions(+), 73 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -482,6 +482,8 @@
;; qualifiers (parsed and discarded by parse)
("const" . const) ("volatile" . volatile) ("restrict" . restrict)
("inline" . inline)
+ ;; GNU attribute spec — parsed and discarded; see skip-gnu-attribute!
+ ("__attribute__" . __attribute__)
;; type specifiers
("void" . void) ("char" . char) ("short" . short)
("int" . int) ("long" . long)
@@ -4075,10 +4077,38 @@
(advance ps) (eat-cv-quals! ps))
(else #t)))
+;; Consume a GNU `__attribute__ (( ... ))` spec and discard. The keyword
+;; has been peeked but not yet consumed. tcc.c's prototypes use these
+;; for noreturn / format / aligned annotations that the bootstrap doesn't
+;; need to honour semantically — same softening pattern as floats and
+;; rejected-but-accepted type specifiers.
+(define (skip-gnu-attribute! ps)
+ (advance ps)
+ (expect-punct ps 'lparen)
+ (let loop ((depth 1))
+ (let ((t (peek ps)))
+ (cond
+ ((eq? (tok-kind t) 'EOF)
+ (die (tok-loc t) "EOF in __attribute__"))
+ ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'lparen))
+ (advance ps) (loop (+ depth 1)))
+ ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'rparen))
+ (advance ps)
+ (cond ((= depth 1) #t)
+ (else (loop (- depth 1)))))
+ (else (advance ps) (loop depth))))))
+
+(define (eat-gnu-attributes! ps)
+ (cond ((at-kw? ps '__attribute__)
+ (skip-gnu-attribute! ps) (eat-gnu-attributes! ps))
+ (else #t)))
+
(define (parse-decl-spec ps)
(let loop ((sto #f) (sn #f) (lg 0) (b #f) (saw #f))
(let ((t (peek ps)))
(cond
+ ((at-kw? ps '__attribute__)
+ (skip-gnu-attribute! ps) (loop sto sn lg b saw))
((or (at-kw? ps 'auto) (at-kw? ps 'register))
(advance ps) (loop sto sn lg b #t))
((at-kw? ps 'static) (advance ps) (loop 'static sn lg b #t))
@@ -4657,6 +4687,8 @@
(expect-punct ps 'rparen)
(let ((r (parse-decl-suf-cont ps)))
(lambda (b) (%mk-fn (r b) p v)))))
+ (($ tok? (kind KW) (value __attribute__))
+ (skip-gnu-attribute! ps) (parse-decl-suf-cont ps))
(else (lambda (b) b))))
(define (paren-is-group? ps)
diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md
@@ -37,46 +37,73 @@ head -c 50000 build/cc-bootstrap/X86_64/tcc.flat.c \
# then re-run the podman invocation against tcc.head.c
```
-## Blocker — parse-phase heap explosion
+## Blocker — `static` tentative-def merge for `gnu_ext`
-Past the redecl gate, the next thing we hit is heap exhaustion well
-before the full TU is parsed. Probing prefixes against the catm'd cc
-(HEAP_CAP_BYTES = 256 MiB):
+With `__attribute__` parse-and-discard in place, parse runs further
+into the TU and trips on `gnu_ext`:
-| input | bytes | heap after parse | result |
-|--------------------|-------:|-----------------:|-----------------|
-| 220-line struct cut| 7 953 | ~39 MB | cg-finish |
-| 1612-line cut | 40 943 | ~267 MB | cg-finish (just under cap) |
-| 50 000 B head | 49 986 | — | heap exhausted |
-| full tcc.flat.c |608 547 | — | heap exhausted |
+```
+error: redefinition: gnu_ext
+```
+
+tcc.c has the textbook tentative-definition pattern:
+
+```
+static int gnu_ext; // line 1613 — tentative
+...
+static int gnu_ext = 1; // line 1919 — actual definition
+```
+
+C allows multiple tentative definitions of the same `static`/external
+object as long as at most one carries an initializer; the cc currently
+treats the second decl as a redefinition. The fix lives in
+`scope-bind!` / `sym-merge` (cc.scm) — when both old and new are
+non-`extern` `var` decls and only the new one is `defined?`, merge
+rather than reject. Same shape as the existing extern-fn /
+extern-var redecl handling.
+
+## Resolved — `__attribute__` decl-spec at line 1628
-Pre-Phase-3 parse-phase residency was roughly 6.5 KB heap per source
-byte, which puts the full 608 KB TU around 4 GB — far beyond any
-reasonable scheme1 cap. The per-decl scratch arena introduced by
-[CC-SCRATCH.md](CC-SCRATCH.md) (Phase 3) addresses this.
+The cc now consumes GNU `__attribute__ ((...))` specs and discards
+them. Skip lives next to `eat-cv-quals!` in cc.scm; called from
+`parse-decl-spec` (prefix attributes) and `parse-decl-suf-cont`
+(trailing attributes after a declarator, e.g.
+`void foo(void) __attribute__((noreturn));`). Same softening pattern
+as floats / wide types — `noreturn`, `format`, `aligned`, etc. are
+not honoured semantically, just parsed away.
-### Post-Phase-3 measurements (scratch = 128 MiB, heap = 256 MiB)
+## Resolved — parse-phase heap pressure
-Re-running the probe with the catm'd cc after Phase 3 lands. Each cut
-ends at a clean top-level `};` boundary so the parse completes:
+The two earlier memory blockers (whole-TU heap explosion, single-decl
+scratch peak inside the `enum tcc_token` block) are both gone after
+the per-decl scratch arena (Phase 3, [CC-SCRATCH.md](CC-SCRATCH.md))
+plus the recent scope-bind alist / scratch reclamation work.
+
+Probing prefixes that end at clean top-level `};` boundaries so the
+parse completes (HEAP_CAP_BYTES = 256 MiB, SCRATCH_CAP_BYTES =
+128 MiB):
| line | bytes | heap after parse | Δ from start | KB / source byte |
|-----:|-------:|-----------------:|-------------:|-----------------:|
-| 220 | 7 953 | 16 421 864 | 15 191 584 | 1.91 |
-| 280 | 9 795 | 16 922 368 | 15 692 088 | 1.60 |
-| 683 | 18 260 | 19 764 528 | 18 534 248 | 1.02 |
-| 880 | 22 111 | 20 763 656 | 19 533 376 | 0.88 |
-| 981 | 24 557 | 22 918 568 | 21 688 288 | 0.88 |
-| 986 | 24 630 | 22 931 984 | 21 701 704 | 0.88 |
-
-Marginal residency (heap delta / new bytes between successive probes):
-~0.26 – 0.88 KB per added source byte, depending on whether the new
-content is dense typedef structs (high) or wider whitespace / forward
-fn decls (low). The per-byte average converges to roughly **0.9 KB /
-input byte** as the prefix grows past the per-decl baseline overhead —
-a **7 ×** drop from the pre-Phase-3 6.5 KB / input byte.
-
-Heap delta minus the start-of-process baseline (~1.23 MB scheme1
+| 220 | 7 953 | 18 012 476 | 16 802 432 | 2.11 |
+| 280 | 9 795 | 18 885 492 | 17 675 448 | 1.81 |
+| 683 | 18 260 | 21 464 308 | 20 254 264 | 1.11 |
+| 880 | 22 111 | 22 650 284 | 21 440 240 | 0.97 |
+| 981 | 24 557 | 23 281 212 | 22 071 168 | 0.90 |
+| 986 | 24 630 | 23 306 676 | 22 096 632 | 0.90 |
+| 1612 | 40 943 | 31 186 500 | 29 976 456 | 0.73 |
+| 1627 | 41 626 | 31 481 060 | 30 271 016 | 0.73 |
+
+Marginal residency converges to roughly **0.9 KB / input byte** at
+the small-prefix scale and drops further across the enum block. The
+per-byte average is now low enough that the full 608 KB TU is
+projected to fit comfortably under a 256 MiB heap cap; in practice
+parse aborts on `__attribute__` long before approaching the cap.
+
+The full 608 KB TU itself slurps to a heap of 3 086 092 bytes (~3 MB)
+— bytevector storage for the source plus runtime baseline.
+
+Heap delta minus the start-of-process baseline (~1.21 MB scheme1
runtime + cc-init bufs at ~12 MB):
```
@@ -87,52 +114,24 @@ parse_heap - start_heap
The cg-init bufs themselves account for ~12 MB of the start baseline
(see %BUF-CAP-* in cc.scm). After amortizing them out, persistent
-parse state is closer to **0.4 KB / input byte** — limit the average
-adds across decls.
-
-### Remaining blocker — single-decl scratch peak
-
-Phase 3 bounds *steady-state* heap, but a single decl that allocates
-> SCRATCH_CAP_BYTES of per-token churn still aborts with `scratch
-exhausted`. tcc.flat.c contains exactly one such decl: the
-`enum tcc_token` block (lines 987–1612, ~16 KB of source) defines
-800+ enum constants in one go. scope-bind!'s `alist-ref` walk is O(N)
-per binding, and each interpreted recursion step extends the env, so
-the cumulative per-decl scratch is O(N²) in member count. 16 MiB,
-64 MiB, and 128 MiB scratch all overflow before the closing `}`;
-the actual peak for this single decl exceeds 128 MiB.
-
-Fix paths (any one suffices):
-
-- Make `scope-bind!` lookup sub-linear — bucketed alist or a real
- hashtable in scheme1. Cheapest big-win for this enum and for the
- general parser, which also pays alist-ref cost per identifier.
-- Bump SCRATCH_CAP_BYTES enough to absorb the worst single decl.
- Needs measurement to pick a final size; also needs the ELF
- `p_memsz` to grow correspondingly (currently 512 MiB, with
- 256 MiB heap + 128 MiB scratch + 1 MiB readbuf already inside it).
-- Bypass scope-bind!'s walk for enum-const insertion specifically
- (build the members list first, batch-bind at the end).
-
-Repro for the per-decl exhaustion:
+parse state at the 1k-line scale is closer to **0.4 KB / input
+byte**.
-```
-podman run --rm --pull=never --platform linux/arm64 \
- --tmpfs /tmp:size=512M -e ARCH=aarch64 \
- -v "$(pwd)":/work -w /work boot2-busybox:aarch64 \
- build/aarch64/scheme1 build/aarch64/cc/cc.scm --cc-debug \
- build/cc-bootstrap/X86_64/tcc.flat.c /tmp/tcc.flat.P1pp
-# -> [cc] phase=slurp ...
-# scheme1: scratch exhausted
-```
+### History — what the earlier numbers looked like
-Phase log shows the abort fires inside the big enum: `slurp` and the
-pre-enum decls all complete normally; the hit lands once parse
-descends into `parse-enum-spec`'s member loop.
+For posterity. Pre-Phase-3, parse-phase residency was roughly
+6.5 KB heap per source byte (1612-line cut: ~267 MB, just under cap;
+50 000 B head and full TU: heap exhausted). Post-Phase-3 dropped that
+to roughly 0.9 KB / byte at the steady state but left a single-decl
+*scratch* peak: the `enum tcc_token` block (lines 987–1612, 800+
+enum constants) overflowed even 128 MiB of scratch because
+`scope-bind!`'s `alist-ref` walk made cumulative per-decl scratch
+O(N²) in member count. The recent scratch / alist work makes that
+decl complete with parse heap at ~31 MB on the 1612-line cut.
## Suspected next-tier blockers (not yet observed)
-Past the heap gate, the next wave we expect:
+Past the `gnu_ext` tentative-def merge, the next wave we expect:
- **`_Bool`, bitfield-typed struct fields, `setjmp.h` typedefs** —
same "parse, don't codegen" softening already applied to floats.