commit e883106d20eda5ab8aa4c0f52c3594c9043b633c
parent 6a36fcf509bc9c4c4b9aa42e90bfd706e0d3db94
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 1 May 2026 19:12:23 -0700
Revert "merge: block-scope inferred-length arrays"
This reverts commit 0fd7206cab421b0c14cad970c764ebe3514e46c9, reversing
changes made to f80f807bc643e0821ae67619e3d8806eedf671cc.
Diffstat:
5 files changed, 10 insertions(+), 147 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -5360,12 +5360,7 @@
(scope-bind! ps n sm)
(cg-add-tentative! (ps-cg ps) n)))))
(else
- ;; Inferred-length array `T a[] = ...`: resolve the length
- ;; from the initializer (peek-only; no tokens consumed) so
- ;; the frame slot is sized correctly and the bound sym's
- ;; ctype carries the real length (sizeof works, etc.).
- (let* ((ty (%resolve-inferred-local-array-ty ps ty))
- (sz (max (ctype-size ty) 1))
+ (let* ((sz (max (ctype-size ty) 1))
(al (max (ctype-align ty) 1))
(sl (cg-alloc-slot (ps-cg ps) sz al))
(sm (%sym n 'var (or sto 'auto) ty sl #t)))
@@ -5489,92 +5484,6 @@
;; pass `ty` through.
(%mk-arr (car (ctype-ext ty)) count))
-;; ----- Inferred-length array resolution for block-scope autos --------
-;; The global initializer path returns a refined ctype after parsing,
-;; and the sym is bound only afterwards. Block-scope autos cannot do
-;; the same: they emit cg ops inline during init parsing, and the
-;; frame slot must be allocated to its final size *before* expression
-;; spills can interleave above it. So when we see `T a[] = ...` at
-;; block scope we resolve the length up front by lookahead, then
-;; allocate the slot off the resolved type. The lookahead pulls
-;; tokens through the matching brace (or reads the STR length), then
-;; pushes everything back onto the iter buffer so the real init
-;; parser sees an unmodified stream.
-(define (%iter-restore! it collected-rev)
- ;; collected-rev: tokens in pull order, head = most recent.
- ;; Re-prepend them so the next iter-next yields the oldest first.
- (let lp ((xs collected-rev))
- (cond ((null? xs) #t)
- (else (iter-unget! it (car xs))
- (lp (cdr xs))))))
-
-(define (%resolve-braced-array-len ps ty)
- ;; ps positioned at `=`, with `{` as the next token. Drain through
- ;; the matching `}`, counting top-level (depth-1) initializers, and
- ;; restore the consumed tokens. Returns the resolved fixed-length
- ;; ctype, or `ty` unchanged on EOF / malformed input (the real
- ;; parser will produce a proper diagnostic).
- (let* ((it (ps-iter ps))
- (t-eq (iter-next it))
- (t-lb (iter-next it)))
- (let scan ((collected (list t-lb t-eq))
- (depth 1)
- (count 0)
- (pending? #f))
- (let ((t (iter-next it)))
- (let ((collected1 (cons t collected)))
- (cond
- ((eq? (tok-kind t) 'EOF)
- (%iter-restore! it collected1) ty)
- ((and (eq? (tok-kind t) 'PUNCT)
- (eq? (tok-value t) 'rbrace)
- (= depth 1))
- (let ((final (cond (pending? (+ count 1)) (else count))))
- (%iter-restore! it collected1)
- (%init-fixed-arr-type ty final)))
- ((and (eq? (tok-kind t) 'PUNCT)
- (eq? (tok-value t) 'comma)
- (= depth 1))
- (scan collected1 depth
- (cond (pending? (+ count 1)) (else count))
- #f))
- ((and (eq? (tok-kind t) 'PUNCT)
- (or (eq? (tok-value t) 'lbrace)
- (eq? (tok-value t) 'lparen)
- (eq? (tok-value t) 'lbracket)))
- (scan collected1 (+ depth 1) count #t))
- ((and (eq? (tok-kind t) 'PUNCT)
- (or (eq? (tok-value t) 'rbrace)
- (eq? (tok-value t) 'rparen)
- (eq? (tok-value t) 'rbracket)))
- (scan collected1 (- depth 1) count pending?))
- (else
- (scan collected1 depth count #t))))))))
-
-(define (%resolve-inferred-local-array-ty ps ty)
- ;; If `ty` is an inferred-length array and the next tokens are
- ;; `= STR` or `= {`, return a fresh array ctype with the resolved
- ;; length. Otherwise return `ty` unchanged. Does not consume any
- ;; tokens.
- (cond
- ((not (and (eq? (ctype-kind ty) 'arr)
- (< (cdr (ctype-ext ty)) 0)
- (at-punct? ps 'assign)))
- ty)
- (else
- (let ((t2 (peek2 ps)))
- (cond
- ((eq? (tok-kind t2) 'STR)
- (let ((et (car (ctype-ext ty))))
- (cond
- ((or (eq? et %t-i8) (eq? et %t-u8))
- (%init-fixed-arr-type ty
- (+ 1 (bytevector-length (tok-value t2)))))
- (else ty))))
- ((and (eq? (tok-kind t2) 'PUNCT) (eq? (tok-value t2) 'lbrace))
- (%resolve-braced-array-len ps ty))
- (else ty))))))
-
(define (%init-struct-fields ty)
;; Return ((name-bv ctype offset) ...) for a struct/union ctype.
(let ((ext (ctype-ext ty)))
@@ -5869,10 +5778,12 @@
(let ((et (car (ctype-ext ty))))
(or (eq? et %t-i8) (eq? et %t-u8)))))
(advance ps)
- ;; For inferred-length `T a[] = "..."` autos the caller resolves
- ;; the length up front (see %resolve-inferred-local-array-ty),
- ;; so by the time we get here `ty` is fixed-length and `sm`'s
- ;; ctype matches.
+ ;; Note: for inferred-length (`int x[] = "..."`) auto arrays the
+ ;; sm-type still records the original (size=-1) ctype — `sizeof(x)`
+ ;; in the body would not see the resolved length. The slot is also
+ ;; sized off the original (= 1 byte), so the path is pre-existing
+ ;; broken; we don't paper over it here. Real C bootstrap code uses
+ ;; statics/globals for inferred-length arrays.
(let* ((slen (bytevector-length s))
(decl (cdr (ctype-ext ty)))
(final (cond ((< decl 0) (+ slen 1)) (else decl))))
@@ -5918,11 +5829,10 @@
(cond
((at-punct? ps 'rbrace)
(advance ps)
+ ;; Inferred-length auto path is pre-existing broken (slot
+ ;; allocated off size=-1, sm-type unfixed). See note in
+ ;; parse-init-local-aggregate STR branch.
;; Zero out remaining slots if any (declared length > i).
- ;; Inferred-length autos arrive here with `decl` already
- ;; resolved by %resolve-inferred-local-array-ty in parse-decl,
- ;; so the (< decl 0) branch only fires for the synthetic
- ;; compound-literal path that bypasses that resolver.
(let ((final (cond ((< decl 0) i) (else decl))))
(let zlp ((k i))
(cond
diff --git a/tests/cc/135-block-inferred-array.c b/tests/cc/135-block-inferred-array.c
@@ -1,22 +0,0 @@
-/* Regression: block-scope inferred-length array `int a[] = {...}`
- * must size the array from the initializer, mirroring file-scope
- * behaviour. Previously the local sym kept ctype size = -1 and the
- * frame slot was allocated for 1 byte, so sizeof(a) returned 0 and
- * a[2] was an out-of-bounds frame access.
- *
- * Result: 0. */
-
-int main(void) {
- int a[] = { 10, 20, 30, 40 };
- if (sizeof(a) != 16) return 1;
- if (a[0] != 10) return 2;
- if (a[1] != 20) return 3;
- if (a[2] != 30) return 4;
- if (a[3] != 40) return 5;
- /* Confirm sum to ensure no slot overlap clobbered nearby autos. */
- int sum = 0;
- int i;
- for (i = 0; i < 4; i = i + 1) sum = sum + a[i];
- if (sum != 100) return 6;
- return 0;
-}
diff --git a/tests/cc/135-block-inferred-array.expected-exit b/tests/cc/135-block-inferred-array.expected-exit
@@ -1 +0,0 @@
-0
diff --git a/tests/cc/136-block-inferred-string.c b/tests/cc/136-block-inferred-string.c
@@ -1,23 +0,0 @@
-/* Regression: block-scope inferred-length char array initialised
- * from a string literal. `char s[] = "hello";` should size `s` to
- * 6 bytes (5 chars + NUL) — same as file scope. Previously the
- * local sym kept ctype size = -1 and only one byte of frame was
- * allocated, so sizeof(s) was 0 and writes past s[0] clobbered
- * adjacent frame slots.
- *
- * Result: 0. */
-
-int main(void) {
- char s[] = "hello";
- if (sizeof(s) != 6) return 1;
- if (s[0] != 'h') return 2;
- if (s[1] != 'e') return 3;
- if (s[2] != 'l') return 4;
- if (s[3] != 'l') return 5;
- if (s[4] != 'o') return 6;
- if (s[5] != 0) return 7;
- /* Adjacent auto must not have been clobbered by the string store. */
- int guard = 12345;
- if (guard != 12345) return 8;
- return 0;
-}
diff --git a/tests/cc/136-block-inferred-string.expected-exit b/tests/cc/136-block-inferred-string.expected-exit
@@ -1 +0,0 @@
-0