commit 8817bada986aa4c1f0bddf8eeca2d46ba931a52c
parent 53d5ebc3eb8a9780d0f6fce953ae71def9f283a6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 2 May 2026 09:25:09 -0700
cc: clear five cc-ext singletons (138 -> 143)
- 00050 anon-union init: %parse-init-struct-list/mode terminates after
one element in brace-elision mode for unions, leaving sibling tokens
for the parent.
- 00089/00095 &function: cg-take-addr retags a function-typed global
rval as ptr-to-fn instead of dying on the lvalue check.
- 00152 #line MACRO: macro-expand the operand; capture pre-expansion
source line for delta math; pp-eval-cexpr inherits cur-file and
line-delta so __LINE__ inside a following #if reflects the new map.
- 00162 int x[const N] / [static N] / [*]: the array declarator eats
type qualifiers, static, and the VLA `*` inside the brackets
(C99 6.7.5.2 fn-param syntax).
Plus prerequisites that don't flip a test on their own: parse-aggregate-spec
and eat-cv-quals! eat __attribute__ where it can appear; parse-decl-cont
accepts a leading attribute; parse-cast-or-unary recognises a leading
attribute on the cast typename. __builtin_expect(x, y) is stubbed as (x).
Diffstat:
| M | cc/cc.scm | | | 95 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------- |
| M | docs/CC-EXT.md | | | 63 | ++++++++++++++++++++++++++++++++++++++++----------------------- |
2 files changed, 123 insertions(+), 35 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -1994,7 +1994,20 @@
("error" (cond ((%pp-active? state)
(%pp-do-error (cons (car line) rest) state))
(else #t)))
- ("line" (cond ((%pp-active? state) (%pp-do-line rest state)) (else #t)))
+ ("line" (cond ((%pp-active? state)
+ ;; Macro-expand the operands BEFORE
+ ;; processing (`#line MACRO`). Pre-expansion
+ ;; we capture the directive's source line so
+ ;; the line-delta math doesn't anchor on a
+ ;; macro definition site.
+ (let ((here (cond
+ ((null? rest)
+ (loc-line (tok-loc hash-tok)))
+ (else
+ (loc-line (tok-loc (car rest)))))))
+ (%pp-do-line (%pp-expand-line rest state)
+ state here)))
+ (else #t)))
("pragma" (cond ((%pp-active? state) (%pp-do-pragma rest state)) (else #t)))
("include" (cond ((%pp-active? state) (%pp-do-include rest state)) (else #t)))
(else (die loc "unknown preprocessor directive" name)))))
@@ -2095,7 +2108,7 @@
((not (%pp-active? state))
(pps-cond-stack-set! state (cons (%pp-frame #f #f #f) (pps-cond-stack state))))
(else
- (let* ((v (pp-eval-cexpr line (pps-macros state)))
+ (let* ((v (pp-eval-cexpr line state))
(a? (not (= v 0))))
(pps-cond-stack-set! state (cons (%pp-frame a? a? #f) (pps-cond-stack state)))))))
@@ -2143,7 +2156,7 @@
((or (not par?) taken?)
(pps-cond-stack-set! state (cons (%pp-frame #f taken? #f) rest)))
(else
- (let* ((v (pp-eval-cexpr line (pps-macros state)))
+ (let* ((v (pp-eval-cexpr line state))
(a? (not (= v 0))))
(pps-cond-stack-set! state
(cons (%pp-frame a? (or a? taken?) #f) rest))))))))))
@@ -2251,15 +2264,14 @@
;; --- #line / #pragma / #include ---
;; Approximate #line: subsequent toks have line = (orig-line + delta),
;; where delta = (N - here-line - 1). Good enough for most cases.
-(define (%pp-do-line line state)
+(define (%pp-do-line line state here)
(cond
((null? line) (die #f "#line requires a line number"))
((not (%pp-int? (car line)))
(die (tok-loc (car line)) "#line: expected integer"))
(else
(let* ((nt (car line)) (n (tok-value nt))
- (rest (cdr line))
- (here (loc-line (tok-loc nt))))
+ (rest (cdr line)))
(pps-line-delta-set! state (- n here 1))
(cond
((null? rest) #t)
@@ -2526,10 +2538,16 @@
(%world (list '()) (list '()) '() '())
'() #f #f))
-(define (pp-eval-cexpr toks macros)
+(define (pp-eval-cexpr toks outer)
+ ;; `outer` is the live %pp-state. We mint a fresh state for #if
+ ;; evaluation but inherit cur-file and line-delta so __FILE__ /
+ ;; __LINE__ inside the expression reflect any preceding #line.
(call-with-heap-rewind
(lambda ()
- (let* ((state (%pp-state macros '() #f 0 #f '() '()))
+ (let* ((state (%pp-state (pps-macros outer) '()
+ (pps-cur-file outer)
+ (pps-line-delta outer)
+ #f '() '()))
(s1 (%pp-resolve-defined toks state))
(s2 (%pp-expand-line s1 state))
(s3 (%pp-idents-as-zero s2))
@@ -3418,6 +3436,12 @@
;; on use via cg-decay-array, not at the & operator.
(pty (%ctype 'ptr 8 8 ty)))
(pmatch p
+ ;; &function: a function designator (rval of fn type pushed by
+ ;; cg-push-sym) already evaluates to its entry-point address. The
+ ;; `&` is a no-op semantically — re-tag the operand as ptr-to-fn.
+ (($ opnd? (kind global) (type ,t) (ext ,lbl) (lval? #f))
+ (guard (eq? (ctype-kind t) 'fn))
+ (cg-push cg (%opnd 'global pty lbl #f)))
(($ opnd? (lval? #f)) (die #f "cg-take-addr: not an lvalue"))
;; The address itself lives at sp+slot — &*p degenerates to p.
(($ opnd? (kind frame) (ext ,off))
@@ -4381,7 +4405,9 @@
(define (ctype-is-arr? t) (eq? (ctype-kind t) 'arr))
(define (eat-cv-quals! ps)
- (cond ((or (at-kw? ps 'const) (at-kw? ps 'volatile)
+ (cond ((at-kw? ps '__attribute__)
+ (skip-gnu-attribute! ps) (eat-cv-quals! ps))
+ ((or (at-kw? ps 'const) (at-kw? ps 'volatile)
(at-kw? ps 'restrict))
(advance ps) (eat-cv-quals! ps))
(else #t)))
@@ -4499,9 +4525,13 @@
(define (parse-aggregate-spec ps kind)
(advance ps)
+ ;; GCC `__attribute__((...))` may sit between `struct/union` and
+ ;; the tag/`{`. Eat and discard.
+ (eat-gnu-attributes! ps)
(let ((tag (pmatch (peek ps)
(($ tok? (kind IDENT)) (tok-value (advance ps)))
(else #f))))
+ (eat-gnu-attributes! ps)
(cond
((at-punct? ps 'lbrace)
(advance ps)
@@ -5179,6 +5209,8 @@
(define (parse-decl-cont ps)
(pmatch (peek ps)
+ (($ tok? (kind KW) (value __attribute__))
+ (skip-gnu-attribute! ps) (parse-decl-cont ps))
(($ tok? (kind PUNCT) (value star))
(advance ps) (eat-cv-quals! ps)
(let* ((r (parse-decl-cont ps)) (rf (cdr r)))
@@ -5207,7 +5239,20 @@
(pmatch (peek ps)
(($ tok? (kind PUNCT) (value lbrack))
(advance ps)
+ ;; C99 §6.7.5.2 allows `static`, type qualifiers (const /
+ ;; volatile / restrict), and `*` (variable length array
+ ;; placeholder) inside array-of-T brackets in function
+ ;; parameter declarators. We don't honour the qualifier
+ ;; semantics — just consume them so the dimension expression
+ ;; that follows parses.
+ (let lp ()
+ (cond
+ ((or (at-kw? ps 'const) (at-kw? ps 'volatile)
+ (at-kw? ps 'restrict) (at-kw? ps 'static))
+ (advance ps) (lp))
+ (else #t)))
(let* ((ln (cond ((at-punct? ps 'rbrack) -1)
+ ((at-punct? ps 'star) (advance ps) -1)
(else (parse-const-int ps))))
(_ (expect-punct ps 'rbrack))
(r (parse-decl-suf-cont ps)))
@@ -5996,13 +6041,20 @@
;; targets the enclosing aggregate). Doesn't consume the trailing
;; comma after the last field — that belongs to the parent list.
(let* ((fields (%init-struct-fields ty))
- (size (ctype-size ty)))
+ (size (ctype-size ty))
+ (union? (eq? (ctype-kind ty) 'union)))
(let lp ((entries '()) (rest fields))
(cond
((cond (brace? (at-punct? ps 'rbrace))
(else (or (null? rest)
(at-punct? ps 'rbrace)
- (at-punct? ps 'dot))))
+ (at-punct? ps 'dot)
+ ;; Union in brace-elision mode: take one
+ ;; member then return — the next sibling
+ ;; initializer belongs to the parent
+ ;; (C99 §6.7.8 ¶22 + union has one active
+ ;; member at a time).
+ (and union? (pair? entries)))))
(cond (brace? (advance ps)))
(%merge-init-entries (reverse entries) size))
(else
@@ -6070,6 +6122,10 @@
(cond ((at-punct? ps 'comma) (advance ps))))
(else
(cond ((and (not (null? rest1))
+ ;; Union in brace-elision mode terminates
+ ;; after one element regardless of rest1;
+ ;; that means the comma belongs to the parent.
+ (not union?)
(at-punct? ps 'comma))
(advance ps)))))
(lp (cons (cons foff piece-list) entries) rest1)))))))
@@ -6906,7 +6962,11 @@
(eq? v '_Complex) (eq? v '_Imaginary)
(eq? v 'struct) (eq? v 'union) (eq? v 'enum)
(eq? v 'const) (eq? v 'volatile)
- (eq? v 'restrict)))
+ (eq? v 'restrict)
+ ;; A leading GNU attribute on the cast typename
+ ;; (e.g. `((__attribute__((...)) int(*)(void))ptr)()`)
+ ;; — eaten by parse-decl-spec.
+ (eq? v '__attribute__)))
(advance ps)
(let*-values (((_sto bty) (parse-decl-spec ps))
((_n ty) (parse-declarator ps bty)))
@@ -7144,6 +7204,16 @@
(expect-punct ps 'rparen)
(cg-va-arg (ps-cg ps) ty)))
+(define (parse-builtin-expect ps)
+ ;; GCC `__builtin_expect(EXPR, EXPECTED)` — branch-prediction hint.
+ ;; We ignore the hint and emit just the value of EXPR.
+ (advance ps) ; IDENT
+ (expect-punct ps 'lparen)
+ (parse-expr-bp ps 4) (rval! ps) ; result
+ (expect-punct ps 'comma)
+ (parse-expr-bp ps 4) (cg-pop (ps-cg ps)) ; expected (drop)
+ (expect-punct ps 'rparen))
+
(define (parse-builtin-va-end ps)
(advance ps) ; IDENT
(expect-punct ps 'lparen)
@@ -7170,6 +7240,7 @@
((bv= n "__builtin_va_start") (parse-builtin-va-start ps))
((bv= n "__builtin_va_arg") (parse-builtin-va-arg ps))
((bv= n "__builtin_va_end") (parse-builtin-va-end ps))
+ ((bv= n "__builtin_expect") (parse-builtin-expect ps))
(else
(let ((sm (scope-lookup ps n)))
(advance ps)
diff --git a/docs/CC-EXT.md b/docs/CC-EXT.md
@@ -40,38 +40,55 @@ auto-skips: the goal is an honest pass/fail count that drops as
## Current status (aarch64, snapshot 2026-05-02)
-- **138 PASS / 82 FAIL** out of 220 fixtures.
+- **143 PASS / 77 FAIL** out of 220 fixtures.
Failure groups, largest first:
| count | error | likely root cause |
|------:|-------|-------------------|
| 62 | `#include: file inclusion is handled upstream by pre-flatten` | preprocessor doesn't resolve system headers; needs-libc tests use `#include <stdio.h>`. The `cc-libc` suite sidesteps this with explicit `extern int printf(...)` declarations. Either (a) add an include search path + minimal `stdio.h`/`stdlib.h`/`string.h` shims that emit the same `extern` declarations, or (b) pre-flatten these fixtures before handing them to `cc.scm`. |
-| 5 | `const-expr: bad operand: lbrack` | array-element address as a constant initializer (e.g. `int *p = &a[2];` at file scope). `eval-const-expr` doesn't fold `[]` indexing. |
-| 3 | P1pp assemble (hex2 link) failed — fixtures 00211/00215/00217 | undefined symbols against `mes-libc` (the fixtures call libc routines we haven't wired up). |
-| 2 | `cg-take-addr: not an lvalue` — fixtures 00089, 00095 | taking the address of an expression we don't classify as lvalue (likely struct/union field through a pointer-deref chain). |
-| 1 | `unexp: lbrace` | compound literal or designated initializer outside the cases we handle. |
-| 1 | `expected punct: rbrace` — fixture 00050 | anonymous union inside struct initializer. |
-| 1 | `anon agg` | anonymous struct/union member declaration. |
-| 1 | `init: too many fields` | initializer-list overrun (likely flexible array or designated init). |
-| 1 | `field` | (terse error from `cg`) — needs investigation. |
-| 1 | `floating-point literal not supported` — fixture 00123 | no float support in cc.scm. |
-| 1 | `undecl: L` | wide-char literal `L"..."` / `L'x'`. |
-| 1 | `undecl: __builtin_expect` | GCC builtin not stubbed. |
-| 1 | `const-expr: bad operand: const` | `const`-qualified expression in a constant-expression context. |
-| 1 | `#line: expected integer` | preprocessor `#line` directive parser. |
-
-Two fixtures (00056, 00208) report "cc compile failed" with the
-diagnostic on stderr; both have `#include <stdio.h>` first lines, so
-they likely fold into the include bucket once that's resolved.
+| 5 | `const-expr: bad operand: lbrack` | designated array initializer `[N] = ...`. Out of scope — cc.scm intentionally doesn't support designated array init. |
+| 4 | P1pp assemble (hex2 link) failed — fixtures 00210/00211/00215/00217 | undefined symbols against libc. 00211/00215/00217 are tagged `needs-libc` but call routines we haven't wired up; 00210 uses `printf` without the `needs-libc` tag, so the runner pipelines it bare. |
+| 2 | `unexp: lbrace` — fixtures 00213, 00214 | GCC statement expression `({ ... })`. Substantial feature; not on the path for tcc.c bootstrap. |
+| 1 | `init: too many fields` — fixture 00216 | extensive use of designated/range/flex-array initializer features (`[1 ... 5] = 9`, etc.). |
+| 1 | `field` — fixture 00218 | bit-field declaration (`enum tree_code code : 8`). Bit-fields not implemented. |
+| 1 | `floating-point literal not supported` — fixture 00123 | no float support in cc.scm. Out of scope. |
+| 1 | `undecl: L` — fixture 00098 | wide-char literal `L"..."` / `L'x'`. Out of scope. |
+
+## Recently fixed
+
+The 2026-05-02 sweep flipped 5 singletons green via small targeted
+changes:
+
+- 00050 — anon-union inside struct init: `%parse-init-struct-list/mode`
+ in brace-elision mode now terminates after one element when the
+ aggregate is a union, leaving the next sibling for the parent.
+- 00089, 00095 — `&function`: `cg-take-addr` retags a function-typed
+ global rval as ptr-to-fn instead of dying on lvalue check.
+- 00152 — `#line MACRO`: `#line` operands are now macro-expanded; the
+ pre-expansion source line is captured for the delta math, and
+ `pp-eval-cexpr` inherits cur-file/line-delta so `__LINE__` inside a
+ following `#if` reflects the new mapping.
+- 00162 — `int x[const 5]` / `int x[static 5]` / `int x[*]`: the array
+ declarator now consumes type-qualifiers, `static`, and the VLA `*`
+ inside `[…]` (C99 §6.7.5.2 fn-param syntax).
+
+Plus three preparatory changes that don't flip a test on their own
+but unblock attribute-heavy code: `parse-aggregate-spec` and
+`eat-cv-quals!` eat `__attribute__` between tag and `{`, between `*`
+and the next declarator piece; `parse-decl-cont` accepts a leading
+`__attribute__`; and `parse-cast-or-unary` recognises a leading
+attribute on the cast typename. `__builtin_expect(x, y)` is stubbed
+as `(x)`.
## Next steps for bug hunting
-The const-expr-on-arrays cluster (5 fixtures: 00092, 00147, 00148,
-00150, 00151) is the highest-leverage non-include group — one fix,
-several tests flip green. cg-take-addr and the various initializer
-errors are next; each is one or two fixtures, so spend time only if
-the fix is small or the underlying capability is wanted for `tcc.c`.
+Designated array init (`[N] = ...`), wide literals, and floats are
+intentionally not supported. The remaining tractable bucket is small
+and feature-shaped — statement expressions (213/214), bit-fields
+(218), and the kitchen-sink initializer fixture (216) — each is a
+substantial standalone feature, so spend time on them only if the
+underlying capability is wanted for `tcc.c`.
The 60+ `#include` failures are gated on a single design call: either
land a minimal header search path or accept that these fixtures stay