commit f8ac00123345f509e1e52cfa59addbf546b5bcc7
parent e883106d20eda5ab8aa4c0f52c3594c9043b633c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 1 May 2026 20:08:10 -0700
cc/parse: support file-scope compound literals (C99 §6.5.2.5)
Diffstat:
7 files changed, 106 insertions(+), 7 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -4107,6 +4107,17 @@
(%cg-bv->hex-lines bv-content #t))))
lbl)))))
+;; Mint a fresh, never-recurring label for an unnamed file-scope
+;; compound literal. Mirrors cg-intern-string's namer pattern (prefix +
+;; "cc__cl_" + N), with N drawn from cg-label-ctr — the same monotonic
+;; counter the per-fn label minters use. Different prefix → no collision
+;; with `Lcc__N` / `lbl_N`.
+(define (%cg-fresh-cl-label cg)
+ (let* ((n (cg-label-ctr cg))
+ (lbl (bytevector-append (cg-str-prefix cg) "cc__cl_" (%n n))))
+ (cg-label-ctr-set! cg (+ n 1))
+ lbl))
+
;; Render BV's bytes as `'XXXXXX'` quoted-hex M0 literals — uniform
;; format for every byte, regardless of whether it would otherwise be
;; printable. Avoids the `"..."` lex path entirely (m1pp's quoted-text
@@ -5417,14 +5428,34 @@
(bytevector-u8-set! out i (bit-and v 255))
(loop (+ i 1) (arithmetic-shift v -8)))))))
+;; File-scope compound literal (C99 §6.5.2.5). The bracketed initializer
+;; following a typename in a static-storage initializer (or behind `&`
+;; in same) is an unnamed object with static storage duration. Drive
+;; the existing parse-init-global → cg-emit-global pipeline against a
+;; synthetic sym whose label is freshly minted via %cg-fresh-cl-label.
+;; Returns the emitted label; the caller wraps it in a (label-ref . LBL)
+;; piece. The leading `(T)` and the storage-class disambiguation belong
+;; to the caller — this entry point assumes peek = `{`.
+(define (%emit-fs-compound-literal ps ty)
+ (let-values (((pieces ty2) (parse-init-global ps ty)))
+ (let* ((lbl (%cg-fresh-cl-label (ps-cg ps)))
+ ;; storage 'extern → %cg-sym-label returns the bare name
+ ;; unchanged (no extra "cc__" prefix), so the emitted label
+ ;; matches what we hand back to the caller.
+ (sm (%sym lbl 'var 'extern ty2 #f #t)))
+ (cg-emit-global (ps-cg ps) sm pieces)
+ lbl)))
+
(define (%const-init-piece ps ty)
;; Parse a non-brace initializer expression for scalar type `ty` and
;; return a single piece. Recognised forms:
;; - INT (with optional unary +/-) -> N-byte LE bv
;; - enum-const IDENT -> N-byte LE bv
;; - &IDENT (address of a global var/fn) -> (label-ref . cc__name)
+ ;; - &(T){...} (address of file-scope literal) -> (label-ref . cc__cl_N)
;; - IDENT (function name; decays to fn ptr) -> (label-ref . cc__name)
;; - STR (only for char* targets) -> (label-ref . string-pool-label)
+ ;; - (T){...} (file-scope compound literal) -> (label-ref . cc__cl_N)
(let ((t (peek ps)))
(cond
;; Address initializer: &ident -> label-ref
@@ -5445,7 +5476,59 @@
(else
(die (tok-loc it) "init: &x must reference a global"
(tok-value it))))))
+ ;; &(T){...} — address of an unnamed file-scope compound
+ ;; literal. Parse the typename, expect `{`, drive the
+ ;; literal into .data, and yield its label.
+ ((and (eq? (tok-kind it) 'PUNCT) (eq? (tok-value it) 'lparen)
+ (%const-paren-is-cast? ps))
+ (advance ps)
+ (let*-values (((_sto bty) (parse-decl-spec ps))
+ ((_n ty2) (parse-declarator ps bty)))
+ (expect-punct ps 'rparen)
+ (cond
+ ((not (at-punct? ps 'lbrace))
+ (die (tok-loc (peek ps))
+ "init: &(T) must be followed by { ... }"
+ (tok-value (peek ps)))))
+ (cons 'label-ref (%emit-fs-compound-literal ps ty2))))
(else (die (tok-loc it) "init: &?" (tok-value it))))))
+ ;; (T){...} — file-scope compound literal. The literal is an
+ ;; lvalue of array/struct/union type; assignment to a pointer
+ ;; target decays it via its label address (label = first byte).
+ ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'lparen)
+ (%const-paren-is-cast? ps)
+ ;; Speculatively look past `(T)` for `{`. Since we have no
+ ;; 3-token peek, we have to commit to the (T) parse; if the
+ ;; following token isn't `{` it's a plain cast, so we fall
+ ;; back to the const-int path with the type already consumed.
+ #t)
+ ;; Take the (T) ourselves so we can dispatch on the next token.
+ (advance ps)
+ (let*-values (((_sto bty) (parse-decl-spec ps))
+ ((_n ty2) (parse-declarator ps bty)))
+ (expect-punct ps 'rparen)
+ (cond
+ ((at-punct? ps 'lbrace)
+ (cons 'label-ref (%emit-fs-compound-literal ps ty2)))
+ (else
+ ;; Not a compound literal — it's a constant cast, e.g.
+ ;; `(int)(unsigned char)257`. Mirror parse-const-cast's
+ ;; cast arm with the already-parsed type.
+ (cond
+ ((%ctype-int? ty2)
+ (let ((v (parse-const-cast ps)))
+ (%int->le-bv (%const-trunc (car v) ty2)
+ (max (ctype-size ty) 1))))
+ ((eq? (ctype-kind ty2) 'ptr)
+ ;; Pointer cast in const-expr: type-retag only. We expect
+ ;; the operand to be an integer-shaped const (e.g. 0) and
+ ;; emit it as the target's byte width.
+ (let ((v (parse-const-cast ps)))
+ (%int->le-bv (car v) (max (ctype-size ty) 1))))
+ (else
+ (die (tok-loc (peek ps))
+ "init: cast to non-scalar non-compound-literal"
+ (ctype-kind ty2))))))))
;; Function name or array name as a label-ref initializer.
;; (Both decay to a pointer when used as a value.)
((and (eq? (tok-kind t) 'IDENT)
@@ -6572,14 +6655,20 @@
;; --------------------------------------------------------------------
;; Compound literals (C99 §6.5.2.5): (T){ init-list }
;;
-;; Allocate a fresh frame slot sized for T, drive the existing
-;; local-aggregate initializer path against it, then push a frame lval
-;; typed as T. The literal is an lvalue with automatic storage tied to
-;; the enclosing block, so &literal, literal.field, literal[i], byval
-;; pass, and array decay all chain through the existing primitives
-;; (cg-take-addr / cg-push-field / cg-decay-array via rval!).
+;; Block scope — allocate a fresh frame slot sized for T, drive the
+;; existing local-aggregate initializer path against it, then push a
+;; frame lval typed as T. The literal is an lvalue with automatic
+;; storage tied to the enclosing block, so &literal, literal.field,
+;; literal[i], byval pass, and array decay all chain through the
+;; existing primitives (cg-take-addr / cg-push-field / cg-decay-array
+;; via rval!).
;;
-;; File-scope literals are out of scope.
+;; File scope — handled out-of-band in %const-init-piece (incl. its `&`
+;; arm) via %emit-fs-compound-literal: pieces go to .data under a fresh
+;; cc__cl_N label and the enclosing initializer takes a (label-ref . LBL)
+;; piece. Reaching parse-compound-literal at file scope would mean an
+;; expression context outside an initializer (which file scope doesn't
+;; have), so this entry point still rejects it.
;; --------------------------------------------------------------------
(define (parse-compound-literal ps ty)
(cond
diff --git a/tests/cc/330-fs-compound-array.c b/tests/cc/330-fs-compound-array.c
@@ -0,0 +1,2 @@
+int *p = (int[]){10,20,30,40};
+int main(void) { return p[0]+p[1]+p[2]+p[3]-100; }
diff --git a/tests/cc/330-fs-compound-array.expected-exit b/tests/cc/330-fs-compound-array.expected-exit
@@ -0,0 +1 @@
+0
diff --git a/tests/cc/331-fs-compound-struct-addr.c b/tests/cc/331-fs-compound-struct-addr.c
@@ -0,0 +1,3 @@
+struct point { int x,y; };
+struct point *o = &(struct point){3,4};
+int main(void) { return o->x*10 + o->y; }
diff --git a/tests/cc/331-fs-compound-struct-addr.expected-exit b/tests/cc/331-fs-compound-struct-addr.expected-exit
@@ -0,0 +1 @@
+34
diff --git a/tests/cc/332-fs-compound-char-array.c b/tests/cc/332-fs-compound-char-array.c
@@ -0,0 +1,2 @@
+char *s = (char[]){'h','i',0};
+int main(void) { return s[0] - 'a'; }
diff --git a/tests/cc/332-fs-compound-char-array.expected-exit b/tests/cc/332-fs-compound-char-array.expected-exit
@@ -0,0 +1 @@
+7