commit d1f5e99ce448d90af6afa26b585c39bbe393881c
parent 5dbea4132c4faacb1c5a1056b67a75ce698c36bf
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 1 May 2026 17:49:09 -0700
cc/parse: infer length for block-scope arrays from initializer
`int a[] = {...};` and `char s[] = "...";` at block scope used to
keep the local sym's ctype at size=-1 and allocate a 1-byte frame
slot, so sizeof(a) returned 0 and writes past the first element
clobbered adjacent autos. The file-scope path already returns a
refined ctype after parsing the initializer; the block-scope path
cannot do the same because it emits cg ops inline (and expression
spills can interleave fresh slots above the array).
Resolve the inferred length up front via token lookahead before
allocating the slot: scan through the matching `}` (or read the
STR length), unget every consumed token so the real init parser
sees an unmodified stream, then build a fixed-length ctype from
the count.
Diffstat:
5 files changed, 147 insertions(+), 10 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -5218,7 +5218,12 @@
(scope-bind! ps n sm)
(cg-add-tentative! (ps-cg ps) n)))))
(else
- (let* ((sz (max (ctype-size ty) 1))
+ ;; Inferred-length array `T a[] = ...`: resolve the length
+ ;; from the initializer (peek-only; no tokens consumed) so
+ ;; the frame slot is sized correctly and the bound sym's
+ ;; ctype carries the real length (sizeof works, etc.).
+ (let* ((ty (%resolve-inferred-local-array-ty ps ty))
+ (sz (max (ctype-size ty) 1))
(al (max (ctype-align ty) 1))
(sl (cg-alloc-slot (ps-cg ps) sz al))
(sm (%sym n 'var (or sto 'auto) ty sl #t)))
@@ -5342,6 +5347,92 @@
;; pass `ty` through.
(%mk-arr (car (ctype-ext ty)) count))
+;; ----- Inferred-length array resolution for block-scope autos --------
+;; The global initializer path returns a refined ctype after parsing,
+;; and the sym is bound only afterwards. Block-scope autos cannot do
+;; the same: they emit cg ops inline during init parsing, and the
+;; frame slot must be allocated to its final size *before* expression
+;; spills can interleave above it. So when we see `T a[] = ...` at
+;; block scope we resolve the length up front by lookahead, then
+;; allocate the slot off the resolved type. The lookahead pulls
+;; tokens through the matching brace (or reads the STR length), then
+;; pushes everything back onto the iter buffer so the real init
+;; parser sees an unmodified stream.
+(define (%iter-restore! it collected-rev)
+ ;; collected-rev: tokens in pull order, head = most recent.
+ ;; Re-prepend them so the next iter-next yields the oldest first.
+ (let lp ((xs collected-rev))
+ (cond ((null? xs) #t)
+ (else (iter-unget! it (car xs))
+ (lp (cdr xs))))))
+
+(define (%resolve-braced-array-len ps ty)
+ ;; ps positioned at `=`, with `{` as the next token. Drain through
+ ;; the matching `}`, counting top-level (depth-1) initializers, and
+ ;; restore the consumed tokens. Returns the resolved fixed-length
+ ;; ctype, or `ty` unchanged on EOF / malformed input (the real
+ ;; parser will produce a proper diagnostic).
+ (let* ((it (ps-iter ps))
+ (t-eq (iter-next it))
+ (t-lb (iter-next it)))
+ (let scan ((collected (list t-lb t-eq))
+ (depth 1)
+ (count 0)
+ (pending? #f))
+ (let ((t (iter-next it)))
+ (let ((collected1 (cons t collected)))
+ (cond
+ ((eq? (tok-kind t) 'EOF)
+ (%iter-restore! it collected1) ty)
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (eq? (tok-value t) 'rbrace)
+ (= depth 1))
+ (let ((final (cond (pending? (+ count 1)) (else count))))
+ (%iter-restore! it collected1)
+ (%init-fixed-arr-type ty final)))
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (eq? (tok-value t) 'comma)
+ (= depth 1))
+ (scan collected1 depth
+ (cond (pending? (+ count 1)) (else count))
+ #f))
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (or (eq? (tok-value t) 'lbrace)
+ (eq? (tok-value t) 'lparen)
+ (eq? (tok-value t) 'lbracket)))
+ (scan collected1 (+ depth 1) count #t))
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (or (eq? (tok-value t) 'rbrace)
+ (eq? (tok-value t) 'rparen)
+ (eq? (tok-value t) 'rbracket)))
+ (scan collected1 (- depth 1) count pending?))
+ (else
+ (scan collected1 depth count #t))))))))
+
+(define (%resolve-inferred-local-array-ty ps ty)
+ ;; If `ty` is an inferred-length array and the next tokens are
+ ;; `= STR` or `= {`, return a fresh array ctype with the resolved
+ ;; length. Otherwise return `ty` unchanged. Does not consume any
+ ;; tokens.
+ (cond
+ ((not (and (eq? (ctype-kind ty) 'arr)
+ (< (cdr (ctype-ext ty)) 0)
+ (at-punct? ps 'assign)))
+ ty)
+ (else
+ (let ((t2 (peek2 ps)))
+ (cond
+ ((eq? (tok-kind t2) 'STR)
+ (let ((et (car (ctype-ext ty))))
+ (cond
+ ((or (eq? et %t-i8) (eq? et %t-u8))
+ (%init-fixed-arr-type ty
+ (+ 1 (bytevector-length (tok-value t2)))))
+ (else ty))))
+ ((and (eq? (tok-kind t2) 'PUNCT) (eq? (tok-value t2) 'lbrace))
+ (%resolve-braced-array-len ps ty))
+ (else ty))))))
+
(define (%init-struct-fields ty)
;; Return ((name-bv ctype offset) ...) for a struct/union ctype.
(let ((ext (ctype-ext ty)))
@@ -5659,12 +5750,10 @@
(let ((et (car (ctype-ext ty))))
(or (eq? et %t-i8) (eq? et %t-u8)))))
(advance ps)
- ;; Note: for inferred-length (`int x[] = "..."`) auto arrays the
- ;; sm-type still records the original (size=-1) ctype — `sizeof(x)`
- ;; in the body would not see the resolved length. The slot is also
- ;; sized off the original (= 1 byte), so the path is pre-existing
- ;; broken; we don't paper over it here. Real C bootstrap code uses
- ;; statics/globals for inferred-length arrays.
+ ;; For inferred-length `T a[] = "..."` autos the caller resolves
+ ;; the length up front (see %resolve-inferred-local-array-ty),
+ ;; so by the time we get here `ty` is fixed-length and `sm`'s
+ ;; ctype matches.
(let* ((slen (bytevector-length s))
(decl (cdr (ctype-ext ty)))
(final (cond ((< decl 0) (+ slen 1)) (else decl))))
@@ -5710,10 +5799,11 @@
(cond
((at-punct? ps 'rbrace)
(advance ps)
- ;; Inferred-length auto path is pre-existing broken (slot
- ;; allocated off size=-1, sm-type unfixed). See note in
- ;; parse-init-local-aggregate STR branch.
;; Zero out remaining slots if any (declared length > i).
+ ;; Inferred-length autos arrive here with `decl` already
+ ;; resolved by %resolve-inferred-local-array-ty in parse-decl,
+ ;; so the (< decl 0) branch only fires for the synthetic
+ ;; compound-literal path that bypasses that resolver.
(let ((final (cond ((< decl 0) i) (else decl))))
(let zlp ((k i))
(cond
diff --git a/tests/cc/135-block-inferred-array.c b/tests/cc/135-block-inferred-array.c
@@ -0,0 +1,22 @@
+/* Regression: block-scope inferred-length array `int a[] = {...}`
+ * must size the array from the initializer, mirroring file-scope
+ * behaviour. Previously the local sym kept ctype size = -1 and the
+ * frame slot was allocated for 1 byte, so sizeof(a) returned 0 and
+ * a[2] was an out-of-bounds frame access.
+ *
+ * Result: 0. */
+
+int main(void) {
+ int a[] = { 10, 20, 30, 40 };
+ if (sizeof(a) != 16) return 1;
+ if (a[0] != 10) return 2;
+ if (a[1] != 20) return 3;
+ if (a[2] != 30) return 4;
+ if (a[3] != 40) return 5;
+ /* Confirm sum to ensure no slot overlap clobbered nearby autos. */
+ int sum = 0;
+ int i;
+ for (i = 0; i < 4; i = i + 1) sum = sum + a[i];
+ if (sum != 100) return 6;
+ return 0;
+}
diff --git a/tests/cc/135-block-inferred-array.expected-exit b/tests/cc/135-block-inferred-array.expected-exit
@@ -0,0 +1 @@
+0
diff --git a/tests/cc/136-block-inferred-string.c b/tests/cc/136-block-inferred-string.c
@@ -0,0 +1,23 @@
+/* Regression: block-scope inferred-length char array initialised
+ * from a string literal. `char s[] = "hello";` should size `s` to
+ * 6 bytes (5 chars + NUL) — same as file scope. Previously the
+ * local sym kept ctype size = -1 and only one byte of frame was
+ * allocated, so sizeof(s) was 0 and writes past s[0] clobbered
+ * adjacent frame slots.
+ *
+ * Result: 0. */
+
+int main(void) {
+ char s[] = "hello";
+ if (sizeof(s) != 6) return 1;
+ if (s[0] != 'h') return 2;
+ if (s[1] != 'e') return 3;
+ if (s[2] != 'l') return 4;
+ if (s[3] != 'l') return 5;
+ if (s[4] != 'o') return 6;
+ if (s[5] != 0) return 7;
+ /* Adjacent auto must not have been clobbered by the string store. */
+ int guard = 12345;
+ if (guard != 12345) return 8;
+ return 0;
+}
diff --git a/tests/cc/136-block-inferred-string.expected-exit b/tests/cc/136-block-inferred-string.expected-exit
@@ -0,0 +1 @@
+0