boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 8817bada986aa4c1f0bddf8eeca2d46ba931a52c
parent 53d5ebc3eb8a9780d0f6fce953ae71def9f283a6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  2 May 2026 09:25:09 -0700

cc: clear five cc-ext singletons (138 -> 143)

- 00050 anon-union init: %parse-init-struct-list/mode terminates after
  one element in brace-elision mode for unions, leaving sibling tokens
  for the parent.
- 00089/00095 &function: cg-take-addr retags a function-typed global
  rval as ptr-to-fn instead of dying on the lvalue check.
- 00152 #line MACRO: macro-expand the operand; capture pre-expansion
  source line for delta math; pp-eval-cexpr inherits cur-file and
  line-delta so __LINE__ inside a following #if reflects the new map.
- 00162 int x[const N] / [static N] / [*]: the array declarator eats
  type qualifiers, static, and the VLA `*` inside the brackets
  (C99 6.7.5.2 fn-param syntax).

Plus prerequisites that don't flip a test on their own: parse-aggregate-spec
and eat-cv-quals! eat __attribute__ where it can appear; parse-decl-cont
accepts a leading attribute; parse-cast-or-unary recognises a leading
attribute on the cast typename. __builtin_expect(x, y) is stubbed as (x).

Diffstat:
Mcc/cc.scm | 95+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Mdocs/CC-EXT.md | 63++++++++++++++++++++++++++++++++++++++++-----------------------
2 files changed, 123 insertions(+), 35 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -1994,7 +1994,20 @@ ("error" (cond ((%pp-active? state) (%pp-do-error (cons (car line) rest) state)) (else #t))) - ("line" (cond ((%pp-active? state) (%pp-do-line rest state)) (else #t))) + ("line" (cond ((%pp-active? state) + ;; Macro-expand the operands BEFORE + ;; processing (`#line MACRO`). Pre-expansion + ;; we capture the directive's source line so + ;; the line-delta math doesn't anchor on a + ;; macro definition site. + (let ((here (cond + ((null? rest) + (loc-line (tok-loc hash-tok))) + (else + (loc-line (tok-loc (car rest))))))) + (%pp-do-line (%pp-expand-line rest state) + state here))) + (else #t))) ("pragma" (cond ((%pp-active? state) (%pp-do-pragma rest state)) (else #t))) ("include" (cond ((%pp-active? state) (%pp-do-include rest state)) (else #t))) (else (die loc "unknown preprocessor directive" name))))) @@ -2095,7 +2108,7 @@ ((not (%pp-active? state)) (pps-cond-stack-set! state (cons (%pp-frame #f #f #f) (pps-cond-stack state)))) (else - (let* ((v (pp-eval-cexpr line (pps-macros state))) + (let* ((v (pp-eval-cexpr line state)) (a? (not (= v 0)))) (pps-cond-stack-set! state (cons (%pp-frame a? a? #f) (pps-cond-stack state))))))) @@ -2143,7 +2156,7 @@ ((or (not par?) taken?) (pps-cond-stack-set! state (cons (%pp-frame #f taken? #f) rest))) (else - (let* ((v (pp-eval-cexpr line (pps-macros state))) + (let* ((v (pp-eval-cexpr line state)) (a? (not (= v 0)))) (pps-cond-stack-set! state (cons (%pp-frame a? (or a? taken?) #f) rest)))))))))) @@ -2251,15 +2264,14 @@ ;; --- #line / #pragma / #include --- ;; Approximate #line: subsequent toks have line = (orig-line + delta), ;; where delta = (N - here-line - 1). Good enough for most cases. -(define (%pp-do-line line state) +(define (%pp-do-line line state here) (cond ((null? line) (die #f "#line requires a line number")) ((not (%pp-int? (car line))) (die (tok-loc (car line)) "#line: expected integer")) (else (let* ((nt (car line)) (n (tok-value nt)) - (rest (cdr line)) - (here (loc-line (tok-loc nt)))) + (rest (cdr line))) (pps-line-delta-set! state (- n here 1)) (cond ((null? rest) #t) @@ -2526,10 +2538,16 @@ (%world (list '()) (list '()) '() '()) '() #f #f)) -(define (pp-eval-cexpr toks macros) +(define (pp-eval-cexpr toks outer) + ;; `outer` is the live %pp-state. We mint a fresh state for #if + ;; evaluation but inherit cur-file and line-delta so __FILE__ / + ;; __LINE__ inside the expression reflect any preceding #line. (call-with-heap-rewind (lambda () - (let* ((state (%pp-state macros '() #f 0 #f '() '())) + (let* ((state (%pp-state (pps-macros outer) '() + (pps-cur-file outer) + (pps-line-delta outer) + #f '() '())) (s1 (%pp-resolve-defined toks state)) (s2 (%pp-expand-line s1 state)) (s3 (%pp-idents-as-zero s2)) @@ -3418,6 +3436,12 @@ ;; on use via cg-decay-array, not at the & operator. (pty (%ctype 'ptr 8 8 ty))) (pmatch p + ;; &function: a function designator (rval of fn type pushed by + ;; cg-push-sym) already evaluates to its entry-point address. The + ;; `&` is a no-op semantically — re-tag the operand as ptr-to-fn. + (($ opnd? (kind global) (type ,t) (ext ,lbl) (lval? #f)) + (guard (eq? (ctype-kind t) 'fn)) + (cg-push cg (%opnd 'global pty lbl #f))) (($ opnd? (lval? #f)) (die #f "cg-take-addr: not an lvalue")) ;; The address itself lives at sp+slot — &*p degenerates to p. (($ opnd? (kind frame) (ext ,off)) @@ -4381,7 +4405,9 @@ (define (ctype-is-arr? t) (eq? (ctype-kind t) 'arr)) (define (eat-cv-quals! ps) - (cond ((or (at-kw? ps 'const) (at-kw? ps 'volatile) + (cond ((at-kw? ps '__attribute__) + (skip-gnu-attribute! ps) (eat-cv-quals! ps)) + ((or (at-kw? ps 'const) (at-kw? ps 'volatile) (at-kw? ps 'restrict)) (advance ps) (eat-cv-quals! ps)) (else #t))) @@ -4499,9 +4525,13 @@ (define (parse-aggregate-spec ps kind) (advance ps) + ;; GCC `__attribute__((...))` may sit between `struct/union` and + ;; the tag/`{`. Eat and discard. + (eat-gnu-attributes! ps) (let ((tag (pmatch (peek ps) (($ tok? (kind IDENT)) (tok-value (advance ps))) (else #f)))) + (eat-gnu-attributes! ps) (cond ((at-punct? ps 'lbrace) (advance ps) @@ -5179,6 +5209,8 @@ (define (parse-decl-cont ps) (pmatch (peek ps) + (($ tok? (kind KW) (value __attribute__)) + (skip-gnu-attribute! ps) (parse-decl-cont ps)) (($ tok? (kind PUNCT) (value star)) (advance ps) (eat-cv-quals! ps) (let* ((r (parse-decl-cont ps)) (rf (cdr r))) @@ -5207,7 +5239,20 @@ (pmatch (peek ps) (($ tok? (kind PUNCT) (value lbrack)) (advance ps) + ;; C99 §6.7.5.2 allows `static`, type qualifiers (const / + ;; volatile / restrict), and `*` (variable length array + ;; placeholder) inside array-of-T brackets in function + ;; parameter declarators. We don't honour the qualifier + ;; semantics — just consume them so the dimension expression + ;; that follows parses. + (let lp () + (cond + ((or (at-kw? ps 'const) (at-kw? ps 'volatile) + (at-kw? ps 'restrict) (at-kw? ps 'static)) + (advance ps) (lp)) + (else #t))) (let* ((ln (cond ((at-punct? ps 'rbrack) -1) + ((at-punct? ps 'star) (advance ps) -1) (else (parse-const-int ps)))) (_ (expect-punct ps 'rbrack)) (r (parse-decl-suf-cont ps))) @@ -5996,13 +6041,20 @@ ;; targets the enclosing aggregate). Doesn't consume the trailing ;; comma after the last field — that belongs to the parent list. (let* ((fields (%init-struct-fields ty)) - (size (ctype-size ty))) + (size (ctype-size ty)) + (union? (eq? (ctype-kind ty) 'union))) (let lp ((entries '()) (rest fields)) (cond ((cond (brace? (at-punct? ps 'rbrace)) (else (or (null? rest) (at-punct? ps 'rbrace) - (at-punct? ps 'dot)))) + (at-punct? ps 'dot) + ;; Union in brace-elision mode: take one + ;; member then return — the next sibling + ;; initializer belongs to the parent + ;; (C99 §6.7.8 ¶22 + union has one active + ;; member at a time). + (and union? (pair? entries))))) (cond (brace? (advance ps))) (%merge-init-entries (reverse entries) size)) (else @@ -6070,6 +6122,10 @@ (cond ((at-punct? ps 'comma) (advance ps)))) (else (cond ((and (not (null? rest1)) + ;; Union in brace-elision mode terminates + ;; after one element regardless of rest1; + ;; that means the comma belongs to the parent. + (not union?) (at-punct? ps 'comma)) (advance ps))))) (lp (cons (cons foff piece-list) entries) rest1))))))) @@ -6906,7 +6962,11 @@ (eq? v '_Complex) (eq? v '_Imaginary) (eq? v 'struct) (eq? v 'union) (eq? v 'enum) (eq? v 'const) (eq? v 'volatile) - (eq? v 'restrict))) + (eq? v 'restrict) + ;; A leading GNU attribute on the cast typename + ;; (e.g. `((__attribute__((...)) int(*)(void))ptr)()`) + ;; — eaten by parse-decl-spec. + (eq? v '__attribute__))) (advance ps) (let*-values (((_sto bty) (parse-decl-spec ps)) ((_n ty) (parse-declarator ps bty))) @@ -7144,6 +7204,16 @@ (expect-punct ps 'rparen) (cg-va-arg (ps-cg ps) ty))) +(define (parse-builtin-expect ps) + ;; GCC `__builtin_expect(EXPR, EXPECTED)` — branch-prediction hint. + ;; We ignore the hint and emit just the value of EXPR. + (advance ps) ; IDENT + (expect-punct ps 'lparen) + (parse-expr-bp ps 4) (rval! ps) ; result + (expect-punct ps 'comma) + (parse-expr-bp ps 4) (cg-pop (ps-cg ps)) ; expected (drop) + (expect-punct ps 'rparen)) + (define (parse-builtin-va-end ps) (advance ps) ; IDENT (expect-punct ps 'lparen) @@ -7170,6 +7240,7 @@ ((bv= n "__builtin_va_start") (parse-builtin-va-start ps)) ((bv= n "__builtin_va_arg") (parse-builtin-va-arg ps)) ((bv= n "__builtin_va_end") (parse-builtin-va-end ps)) + ((bv= n "__builtin_expect") (parse-builtin-expect ps)) (else (let ((sm (scope-lookup ps n))) (advance ps) diff --git a/docs/CC-EXT.md b/docs/CC-EXT.md @@ -40,38 +40,55 @@ auto-skips: the goal is an honest pass/fail count that drops as ## Current status (aarch64, snapshot 2026-05-02) -- **138 PASS / 82 FAIL** out of 220 fixtures. +- **143 PASS / 77 FAIL** out of 220 fixtures. Failure groups, largest first: | count | error | likely root cause | |------:|-------|-------------------| | 62 | `#include: file inclusion is handled upstream by pre-flatten` | preprocessor doesn't resolve system headers; needs-libc tests use `#include <stdio.h>`. The `cc-libc` suite sidesteps this with explicit `extern int printf(...)` declarations. Either (a) add an include search path + minimal `stdio.h`/`stdlib.h`/`string.h` shims that emit the same `extern` declarations, or (b) pre-flatten these fixtures before handing them to `cc.scm`. | -| 5 | `const-expr: bad operand: lbrack` | array-element address as a constant initializer (e.g. `int *p = &a[2];` at file scope). `eval-const-expr` doesn't fold `[]` indexing. | -| 3 | P1pp assemble (hex2 link) failed — fixtures 00211/00215/00217 | undefined symbols against `mes-libc` (the fixtures call libc routines we haven't wired up). | -| 2 | `cg-take-addr: not an lvalue` — fixtures 00089, 00095 | taking the address of an expression we don't classify as lvalue (likely struct/union field through a pointer-deref chain). | -| 1 | `unexp: lbrace` | compound literal or designated initializer outside the cases we handle. | -| 1 | `expected punct: rbrace` — fixture 00050 | anonymous union inside struct initializer. | -| 1 | `anon agg` | anonymous struct/union member declaration. | -| 1 | `init: too many fields` | initializer-list overrun (likely flexible array or designated init). | -| 1 | `field` | (terse error from `cg`) — needs investigation. | -| 1 | `floating-point literal not supported` — fixture 00123 | no float support in cc.scm. | -| 1 | `undecl: L` | wide-char literal `L"..."` / `L'x'`. | -| 1 | `undecl: __builtin_expect` | GCC builtin not stubbed. | -| 1 | `const-expr: bad operand: const` | `const`-qualified expression in a constant-expression context. | -| 1 | `#line: expected integer` | preprocessor `#line` directive parser. | - -Two fixtures (00056, 00208) report "cc compile failed" with the -diagnostic on stderr; both have `#include <stdio.h>` first lines, so -they likely fold into the include bucket once that's resolved. +| 5 | `const-expr: bad operand: lbrack` | designated array initializer `[N] = ...`. Out of scope — cc.scm intentionally doesn't support designated array init. | +| 4 | P1pp assemble (hex2 link) failed — fixtures 00210/00211/00215/00217 | undefined symbols against libc. 00211/00215/00217 are tagged `needs-libc` but call routines we haven't wired up; 00210 uses `printf` without the `needs-libc` tag, so the runner pipelines it bare. | +| 2 | `unexp: lbrace` — fixtures 00213, 00214 | GCC statement expression `({ ... })`. Substantial feature; not on the path for tcc.c bootstrap. | +| 1 | `init: too many fields` — fixture 00216 | extensive use of designated/range/flex-array initializer features (`[1 ... 5] = 9`, etc.). | +| 1 | `field` — fixture 00218 | bit-field declaration (`enum tree_code code : 8`). Bit-fields not implemented. | +| 1 | `floating-point literal not supported` — fixture 00123 | no float support in cc.scm. Out of scope. | +| 1 | `undecl: L` — fixture 00098 | wide-char literal `L"..."` / `L'x'`. Out of scope. | + +## Recently fixed + +The 2026-05-02 sweep flipped 5 singletons green via small targeted +changes: + +- 00050 — anon-union inside struct init: `%parse-init-struct-list/mode` + in brace-elision mode now terminates after one element when the + aggregate is a union, leaving the next sibling for the parent. +- 00089, 00095 — `&function`: `cg-take-addr` retags a function-typed + global rval as ptr-to-fn instead of dying on lvalue check. +- 00152 — `#line MACRO`: `#line` operands are now macro-expanded; the + pre-expansion source line is captured for the delta math, and + `pp-eval-cexpr` inherits cur-file/line-delta so `__LINE__` inside a + following `#if` reflects the new mapping. +- 00162 — `int x[const 5]` / `int x[static 5]` / `int x[*]`: the array + declarator now consumes type-qualifiers, `static`, and the VLA `*` + inside `[…]` (C99 §6.7.5.2 fn-param syntax). + +Plus three preparatory changes that don't flip a test on their own +but unblock attribute-heavy code: `parse-aggregate-spec` and +`eat-cv-quals!` eat `__attribute__` between tag and `{`, between `*` +and the next declarator piece; `parse-decl-cont` accepts a leading +`__attribute__`; and `parse-cast-or-unary` recognises a leading +attribute on the cast typename. `__builtin_expect(x, y)` is stubbed +as `(x)`. ## Next steps for bug hunting -The const-expr-on-arrays cluster (5 fixtures: 00092, 00147, 00148, -00150, 00151) is the highest-leverage non-include group — one fix, -several tests flip green. cg-take-addr and the various initializer -errors are next; each is one or two fixtures, so spend time only if -the fix is small or the underlying capability is wanted for `tcc.c`. +Designated array init (`[N] = ...`), wide literals, and floats are +intentionally not supported. The remaining tractable bucket is small +and feature-shaped — statement expressions (213/214), bit-fields +(218), and the kitchen-sink initializer fixture (216) — each is a +substantial standalone feature, so spend time on them only if the +underlying capability is wanted for `tcc.c`. The 60+ `#include` failures are gated on a single design call: either land a minimal header search path or accept that these fixtures stay