commit 0fd7206cab421b0c14cad970c764ebe3514e46c9
parent f80f807bc643e0821ae67619e3d8806eedf671cc
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 1 May 2026 18:47:23 -0700
merge: block-scope inferred-length arrays
Diffstat:
5 files changed, 147 insertions(+), 10 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -5324,7 +5324,12 @@
(scope-bind! ps n sm)
(cg-add-tentative! (ps-cg ps) n)))))
(else
- (let* ((sz (max (ctype-size ty) 1))
+ ;; Inferred-length array `T a[] = ...`: resolve the length
+ ;; from the initializer (peek-only; no tokens consumed) so
+ ;; the frame slot is sized correctly and the bound sym's
+ ;; ctype carries the real length (sizeof works, etc.).
+ (let* ((ty (%resolve-inferred-local-array-ty ps ty))
+ (sz (max (ctype-size ty) 1))
(al (max (ctype-align ty) 1))
(sl (cg-alloc-slot (ps-cg ps) sz al))
(sm (%sym n 'var (or sto 'auto) ty sl #t)))
@@ -5448,6 +5453,92 @@
;; pass `ty` through.
(%mk-arr (car (ctype-ext ty)) count))
+;; ----- Inferred-length array resolution for block-scope autos --------
+;; The global initializer path returns a refined ctype after parsing,
+;; and the sym is bound only afterwards. Block-scope autos cannot do
+;; the same: they emit cg ops inline during init parsing, and the
+;; frame slot must be allocated to its final size *before* expression
+;; spills can interleave above it. So when we see `T a[] = ...` at
+;; block scope we resolve the length up front by lookahead, then
+;; allocate the slot off the resolved type. The lookahead pulls
+;; tokens through the matching brace (or reads the STR length), then
+;; pushes everything back onto the iter buffer so the real init
+;; parser sees an unmodified stream.
+(define (%iter-restore! it collected-rev)
+ ;; collected-rev: tokens in pull order, head = most recent.
+ ;; Re-prepend them so the next iter-next yields the oldest first.
+ (let lp ((xs collected-rev))
+ (cond ((null? xs) #t)
+ (else (iter-unget! it (car xs))
+ (lp (cdr xs))))))
+
+(define (%resolve-braced-array-len ps ty)
+ ;; ps positioned at `=`, with `{` as the next token. Drain through
+ ;; the matching `}`, counting top-level (depth-1) initializers, and
+ ;; restore the consumed tokens. Returns the resolved fixed-length
+ ;; ctype, or `ty` unchanged on EOF / malformed input (the real
+ ;; parser will produce a proper diagnostic).
+ (let* ((it (ps-iter ps))
+ (t-eq (iter-next it))
+ (t-lb (iter-next it)))
+ (let scan ((collected (list t-lb t-eq))
+ (depth 1)
+ (count 0)
+ (pending? #f))
+ (let ((t (iter-next it)))
+ (let ((collected1 (cons t collected)))
+ (cond
+ ((eq? (tok-kind t) 'EOF)
+ (%iter-restore! it collected1) ty)
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (eq? (tok-value t) 'rbrace)
+ (= depth 1))
+ (let ((final (cond (pending? (+ count 1)) (else count))))
+ (%iter-restore! it collected1)
+ (%init-fixed-arr-type ty final)))
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (eq? (tok-value t) 'comma)
+ (= depth 1))
+ (scan collected1 depth
+ (cond (pending? (+ count 1)) (else count))
+ #f))
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (or (eq? (tok-value t) 'lbrace)
+ (eq? (tok-value t) 'lparen)
+ (eq? (tok-value t) 'lbracket)))
+ (scan collected1 (+ depth 1) count #t))
+ ((and (eq? (tok-kind t) 'PUNCT)
+ (or (eq? (tok-value t) 'rbrace)
+ (eq? (tok-value t) 'rparen)
+ (eq? (tok-value t) 'rbracket)))
+ (scan collected1 (- depth 1) count pending?))
+ (else
+ (scan collected1 depth count #t))))))))
+
+(define (%resolve-inferred-local-array-ty ps ty)
+ ;; If `ty` is an inferred-length array and the next tokens are
+ ;; `= STR` or `= {`, return a fresh array ctype with the resolved
+ ;; length. Otherwise return `ty` unchanged. Does not consume any
+ ;; tokens.
+ (cond
+ ((not (and (eq? (ctype-kind ty) 'arr)
+ (< (cdr (ctype-ext ty)) 0)
+ (at-punct? ps 'assign)))
+ ty)
+ (else
+ (let ((t2 (peek2 ps)))
+ (cond
+ ((eq? (tok-kind t2) 'STR)
+ (let ((et (car (ctype-ext ty))))
+ (cond
+ ((or (eq? et %t-i8) (eq? et %t-u8))
+ (%init-fixed-arr-type ty
+ (+ 1 (bytevector-length (tok-value t2)))))
+ (else ty))))
+ ((and (eq? (tok-kind t2) 'PUNCT) (eq? (tok-value t2) 'lbrace))
+ (%resolve-braced-array-len ps ty))
+ (else ty))))))
+
(define (%init-struct-fields ty)
;; Return ((name-bv ctype offset) ...) for a struct/union ctype.
(let ((ext (ctype-ext ty)))
@@ -5742,12 +5833,10 @@
(let ((et (car (ctype-ext ty))))
(or (eq? et %t-i8) (eq? et %t-u8)))))
(advance ps)
- ;; Note: for inferred-length (`int x[] = "..."`) auto arrays the
- ;; sm-type still records the original (size=-1) ctype — `sizeof(x)`
- ;; in the body would not see the resolved length. The slot is also
- ;; sized off the original (= 1 byte), so the path is pre-existing
- ;; broken; we don't paper over it here. Real C bootstrap code uses
- ;; statics/globals for inferred-length arrays.
+ ;; For inferred-length `T a[] = "..."` autos the caller resolves
+ ;; the length up front (see %resolve-inferred-local-array-ty),
+ ;; so by the time we get here `ty` is fixed-length and `sm`'s
+ ;; ctype matches.
(let* ((slen (bytevector-length s))
(decl (cdr (ctype-ext ty)))
(final (cond ((< decl 0) (+ slen 1)) (else decl))))
@@ -5793,10 +5882,11 @@
(cond
((at-punct? ps 'rbrace)
(advance ps)
- ;; Inferred-length auto path is pre-existing broken (slot
- ;; allocated off size=-1, sm-type unfixed). See note in
- ;; parse-init-local-aggregate STR branch.
;; Zero out remaining slots if any (declared length > i).
+ ;; Inferred-length autos arrive here with `decl` already
+ ;; resolved by %resolve-inferred-local-array-ty in parse-decl,
+ ;; so the (< decl 0) branch only fires for the synthetic
+ ;; compound-literal path that bypasses that resolver.
(let ((final (cond ((< decl 0) i) (else decl))))
(let zlp ((k i))
(cond
diff --git a/tests/cc/135-block-inferred-array.c b/tests/cc/135-block-inferred-array.c
@@ -0,0 +1,22 @@
+/* Regression: block-scope inferred-length array `int a[] = {...}`
+ * must size the array from the initializer, mirroring file-scope
+ * behaviour. Previously the local sym kept ctype size = -1 and the
+ * frame slot was allocated for 1 byte, so sizeof(a) returned 0 and
+ * a[2] was an out-of-bounds frame access.
+ *
+ * Result: 0. */
+
+int main(void) {
+ int a[] = { 10, 20, 30, 40 };
+ if (sizeof(a) != 16) return 1;
+ if (a[0] != 10) return 2;
+ if (a[1] != 20) return 3;
+ if (a[2] != 30) return 4;
+ if (a[3] != 40) return 5;
+ /* Confirm sum to ensure no slot overlap clobbered nearby autos. */
+ int sum = 0;
+ int i;
+ for (i = 0; i < 4; i = i + 1) sum = sum + a[i];
+ if (sum != 100) return 6;
+ return 0;
+}
diff --git a/tests/cc/135-block-inferred-array.expected-exit b/tests/cc/135-block-inferred-array.expected-exit
@@ -0,0 +1 @@
+0
diff --git a/tests/cc/136-block-inferred-string.c b/tests/cc/136-block-inferred-string.c
@@ -0,0 +1,23 @@
+/* Regression: block-scope inferred-length char array initialised
+ * from a string literal. `char s[] = "hello";` should size `s` to
+ * 6 bytes (5 chars + NUL) — same as file scope. Previously the
+ * local sym kept ctype size = -1 and only one byte of frame was
+ * allocated, so sizeof(s) was 0 and writes past s[0] clobbered
+ * adjacent frame slots.
+ *
+ * Result: 0. */
+
+int main(void) {
+ char s[] = "hello";
+ if (sizeof(s) != 6) return 1;
+ if (s[0] != 'h') return 2;
+ if (s[1] != 'e') return 3;
+ if (s[2] != 'l') return 4;
+ if (s[3] != 'l') return 5;
+ if (s[4] != 'o') return 6;
+ if (s[5] != 0) return 7;
+ /* Adjacent auto must not have been clobbered by the string store. */
+ int guard = 12345;
+ if (guard != 12345) return 8;
+ return 0;
+}
diff --git a/tests/cc/136-block-inferred-string.expected-exit b/tests/cc/136-block-inferred-string.expected-exit
@@ -0,0 +1 @@
+0