commit bc2bb2738400cc80abd4b9a6c883679a0562e0a2
parent e883106d20eda5ab8aa4c0f52c3594c9043b633c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 1 May 2026 19:40:05 -0700
merge: block-scope inferred-length arrays (rebased on current main)
Diffstat:
5 files changed, 144 insertions(+), 1 deletion(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -5301,6 +5301,91 @@
(handle-decl ps sto n2 t2) (lp)))
(else (expect-punct ps 'semi) 'decl))))))))))
+;; ---- Block-scope inferred-length array length resolution -------------
+;; The token iterator buffers lookahead in a list (see tok-iter); we
+;; can pull arbitrarily many tokens, then push them all back via
+;; iter-unget!. We use that to peek the initializer that follows `=`
+;; (without consuming it) and count its elements so cg-alloc-slot can
+;; reserve the right number of bytes BEFORE the initializer-emission
+;; loop runs (and starts spilling intermediate values into newly-
+;; allocated frame slots).
+;;
+;; Only the OUTERMOST length is inferred per C99 6.7.8/22, so for
+;; `int x[][3] = {{1,2,3},{4,5,6}};` we just count top-level
+;; brace-or-comma groups; the inner brace groups don't matter.
+
+(define (%peek-inferred-arr-init? ps)
+ ;; Check whether the next-after-`=` token starts a brace-init or a
+ ;; string-literal — the only initializer shapes that can resolve a
+ ;; block-scope inferred-length array. We do NOT consume `=`; we
+ ;; peek2 instead.
+ (let ((t2 (peek2 ps)))
+ (or (and (eq? (tok-kind t2) 'PUNCT) (eq? (tok-value t2) 'lbrace))
+ (eq? (tok-kind t2) 'STR))))
+
+(define (%resolve-inferred-arr-len ps ty)
+ ;; Returns a fresh array ctype with the resolved length. Does NOT
+ ;; consume the `=` or any of the initializer tokens — every token
+ ;; pulled is unget back in original order.
+ (let* ((eq-tok (iter-next (ps-iter ps))) ; consume `=` (will unget)
+ (first (iter-next (ps-iter ps))) ; consume `{` or STR
+ (collected (list first eq-tok)) ; head order: revs at end
+ (count
+ (cond
+ ((eq? (tok-kind first) 'STR)
+ ;; String length + NUL.
+ (+ (bytevector-length (tok-value first)) 1))
+ (else
+ ;; first is `{`. Count top-level commas + 1, ignoring a
+ ;; trailing comma before `}`. Track brace depth so nested
+ ;; `{` for sub-aggregates are skipped.
+ (let lp ((depth 1) (n 0) (saw-elem? #f) (last-was-comma? #f)
+ (acc collected))
+ (let ((t (iter-next (ps-iter ps))))
+ (let ((acc2 (cons t acc)))
+ (cond
+ ((eq? (tok-kind t) 'EOF)
+ ;; Bail; let the real parser report the error
+ ;; after we restore tokens.
+ (%inferred-arr-restore! ps acc2)
+ (die #f "init: unterminated brace"))
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (eq? (tok-value t) 'lbrace))
+ (lp (+ depth 1) n #t #f acc2))
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (eq? (tok-value t) 'rbrace))
+ (cond
+ ((= depth 1)
+ ;; Done. Restore tokens (acc2 includes the
+ ;; closing `}`).
+ (%inferred-arr-restore! ps acc2)
+ (cond ((not saw-elem?) 0)
+ (last-was-comma? n)
+ (else (+ n 1))))
+ (else (lp (- depth 1) n saw-elem? #f acc2))))
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (eq? (tok-value t) 'comma)
+ (= depth 1))
+ (lp depth (+ n 1) saw-elem? #t acc2))
+ (else
+ (lp depth n #t #f acc2)))))))))
+ )
+ (cond
+ ((eq? (tok-kind first) 'STR)
+ (%inferred-arr-restore! ps collected)))
+ (%init-fixed-arr-type ty count)))
+
+(define (%inferred-arr-restore! ps acc)
+ ;; acc is a stack of tokens in REVERSE consume order (most-recent
+ ;; first). iter-unget! prepends one at a time, so iterating acc in
+ ;; its current order pushes them back in the right sequence —
+ ;; i.e. the oldest-consumed token ends up at the front of the
+ ;; lookahead buffer.
+ (let lp ((xs acc))
+ (cond
+ ((null? xs) #t)
+ (else (iter-unget! (ps-iter ps) (car xs)) (lp (cdr xs))))))
+
(define (handle-decl ps sto n ty)
(cond
((not n) (die #f "no name"))
@@ -5360,7 +5445,22 @@
(scope-bind! ps n sm)
(cg-add-tentative! (ps-cg ps) n)))))
(else
- (let* ((sz (max (ctype-size ty) 1))
+ ;; Block-scope inferred-length array (`int a[] = {…};` or
+ ;; `char s[] = "…";`): peek the initializer past `=` to count
+ ;; elements / measure the string and rebuild `ty` with the
+ ;; resolved length BEFORE cg-alloc-slot. Otherwise the slot
+ ;; is sized off a -1 / 0 ctype-size (capped to 1 byte) and
+ ;; the per-element stores in parse-init-local-aggregate write
+ ;; past frame-hi — the next %cg-spill-reg then allocates
+ ;; right inside the array, clobbering elements.
+ (let* ((ty (cond
+ ((and (eq? (ctype-kind ty) 'arr)
+ (< (cdr (ctype-ext ty)) 0)
+ (at-punct? ps 'assign)
+ (%peek-inferred-arr-init? ps))
+ (%resolve-inferred-arr-len ps ty))
+ (else ty)))
+ (sz (max (ctype-size ty) 1))
(al (max (ctype-align ty) 1))
(sl (cg-alloc-slot (ps-cg ps) sz al))
(sm (%sym n 'var (or sto 'auto) ty sl #t)))
diff --git a/tests/cc/135-block-inferred-array.c b/tests/cc/135-block-inferred-array.c
@@ -0,0 +1,21 @@
+/* Block-scope array with inferred length: `int a[] = {...};`
+ * Verify (a) sizeof(a) == 16 (4 elements * sizeof(int)),
+ * (b) all elements readable with the right value,
+ * (c) sum is correct, and (d) an adjacent local declared right
+ * after the array is NOT clobbered by the array's initializer
+ * stores or by any spills emitted during initialization.
+ */
+
+int main(int argc, char **argv) {
+ int a[] = {10, 20, 30, 40};
+ int sentinel = 0xCAFE;
+
+ if (sizeof(a) != 16) return 1;
+ if (a[0] != 10) return 2;
+ if (a[1] != 20) return 3;
+ if (a[2] != 30) return 4;
+ if (a[3] != 40) return 5;
+ if ((a[0] + a[1] + a[2] + a[3]) != 100) return 6;
+ if (sentinel != 0xCAFE) return 7;
+ return 0;
+}
diff --git a/tests/cc/135-block-inferred-array.expected-exit b/tests/cc/135-block-inferred-array.expected-exit
@@ -0,0 +1 @@
+0
diff --git a/tests/cc/136-block-inferred-string.c b/tests/cc/136-block-inferred-string.c
@@ -0,0 +1,20 @@
+/* Block-scope char[] with string-literal initializer:
+ * `char s[] = "hello";`. sizeof(s) must be 6 (5 chars + NUL),
+ * each byte must read back correctly, and an adjacent local
+ * declared after must NOT be clobbered.
+ */
+
+int main(int argc, char **argv) {
+ char s[] = "hello";
+ int sentinel = 0xCAFE;
+
+ if (sizeof(s) != 6) return 1;
+ if (s[0] != 'h') return 2;
+ if (s[1] != 'e') return 3;
+ if (s[2] != 'l') return 4;
+ if (s[3] != 'l') return 5;
+ if (s[4] != 'o') return 6;
+ if (s[5] != 0) return 7;
+ if (sentinel != 0xCAFE) return 8;
+ return 0;
+}
diff --git a/tests/cc/136-block-inferred-string.expected-exit b/tests/cc/136-block-inferred-string.expected-exit
@@ -0,0 +1 @@
+0