boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit e883106d20eda5ab8aa4c0f52c3594c9043b633c
parent 6a36fcf509bc9c4c4b9aa42e90bfd706e0d3db94
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  1 May 2026 19:12:23 -0700

Revert "merge: block-scope inferred-length arrays"

This reverts commit 0fd7206cab421b0c14cad970c764ebe3514e46c9, reversing
changes made to f80f807bc643e0821ae67619e3d8806eedf671cc.

Diffstat:
Mcc/cc.scm | 110++++++++-----------------------------------------------------------------------
Dtests/cc/135-block-inferred-array.c | 22----------------------
Dtests/cc/135-block-inferred-array.expected-exit | 1-
Dtests/cc/136-block-inferred-string.c | 23-----------------------
Dtests/cc/136-block-inferred-string.expected-exit | 1-
5 files changed, 10 insertions(+), 147 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -5360,12 +5360,7 @@ (scope-bind! ps n sm) (cg-add-tentative! (ps-cg ps) n))))) (else - ;; Inferred-length array `T a[] = ...`: resolve the length - ;; from the initializer (peek-only; no tokens consumed) so - ;; the frame slot is sized correctly and the bound sym's - ;; ctype carries the real length (sizeof works, etc.). - (let* ((ty (%resolve-inferred-local-array-ty ps ty)) - (sz (max (ctype-size ty) 1)) + (let* ((sz (max (ctype-size ty) 1)) (al (max (ctype-align ty) 1)) (sl (cg-alloc-slot (ps-cg ps) sz al)) (sm (%sym n 'var (or sto 'auto) ty sl #t))) @@ -5489,92 +5484,6 @@ ;; pass `ty` through. (%mk-arr (car (ctype-ext ty)) count)) -;; ----- Inferred-length array resolution for block-scope autos -------- -;; The global initializer path returns a refined ctype after parsing, -;; and the sym is bound only afterwards. Block-scope autos cannot do -;; the same: they emit cg ops inline during init parsing, and the -;; frame slot must be allocated to its final size *before* expression -;; spills can interleave above it. So when we see `T a[] = ...` at -;; block scope we resolve the length up front by lookahead, then -;; allocate the slot off the resolved type. The lookahead pulls -;; tokens through the matching brace (or reads the STR length), then -;; pushes everything back onto the iter buffer so the real init -;; parser sees an unmodified stream. -(define (%iter-restore! it collected-rev) - ;; collected-rev: tokens in pull order, head = most recent. - ;; Re-prepend them so the next iter-next yields the oldest first. - (let lp ((xs collected-rev)) - (cond ((null? xs) #t) - (else (iter-unget! it (car xs)) - (lp (cdr xs)))))) - -(define (%resolve-braced-array-len ps ty) - ;; ps positioned at `=`, with `{` as the next token. Drain through - ;; the matching `}`, counting top-level (depth-1) initializers, and - ;; restore the consumed tokens. Returns the resolved fixed-length - ;; ctype, or `ty` unchanged on EOF / malformed input (the real - ;; parser will produce a proper diagnostic). - (let* ((it (ps-iter ps)) - (t-eq (iter-next it)) - (t-lb (iter-next it))) - (let scan ((collected (list t-lb t-eq)) - (depth 1) - (count 0) - (pending? #f)) - (let ((t (iter-next it))) - (let ((collected1 (cons t collected))) - (cond - ((eq? (tok-kind t) 'EOF) - (%iter-restore! it collected1) ty) - ((and (eq? (tok-kind t) 'PUNCT) - (eq? (tok-value t) 'rbrace) - (= depth 1)) - (let ((final (cond (pending? (+ count 1)) (else count)))) - (%iter-restore! it collected1) - (%init-fixed-arr-type ty final))) - ((and (eq? (tok-kind t) 'PUNCT) - (eq? (tok-value t) 'comma) - (= depth 1)) - (scan collected1 depth - (cond (pending? (+ count 1)) (else count)) - #f)) - ((and (eq? (tok-kind t) 'PUNCT) - (or (eq? (tok-value t) 'lbrace) - (eq? (tok-value t) 'lparen) - (eq? (tok-value t) 'lbracket))) - (scan collected1 (+ depth 1) count #t)) - ((and (eq? (tok-kind t) 'PUNCT) - (or (eq? (tok-value t) 'rbrace) - (eq? (tok-value t) 'rparen) - (eq? (tok-value t) 'rbracket))) - (scan collected1 (- depth 1) count pending?)) - (else - (scan collected1 depth count #t)))))))) - -(define (%resolve-inferred-local-array-ty ps ty) - ;; If `ty` is an inferred-length array and the next tokens are - ;; `= STR` or `= {`, return a fresh array ctype with the resolved - ;; length. Otherwise return `ty` unchanged. Does not consume any - ;; tokens. - (cond - ((not (and (eq? (ctype-kind ty) 'arr) - (< (cdr (ctype-ext ty)) 0) - (at-punct? ps 'assign))) - ty) - (else - (let ((t2 (peek2 ps))) - (cond - ((eq? (tok-kind t2) 'STR) - (let ((et (car (ctype-ext ty)))) - (cond - ((or (eq? et %t-i8) (eq? et %t-u8)) - (%init-fixed-arr-type ty - (+ 1 (bytevector-length (tok-value t2))))) - (else ty)))) - ((and (eq? (tok-kind t2) 'PUNCT) (eq? (tok-value t2) 'lbrace)) - (%resolve-braced-array-len ps ty)) - (else ty)))))) - (define (%init-struct-fields ty) ;; Return ((name-bv ctype offset) ...) for a struct/union ctype. (let ((ext (ctype-ext ty))) @@ -5869,10 +5778,12 @@ (let ((et (car (ctype-ext ty)))) (or (eq? et %t-i8) (eq? et %t-u8))))) (advance ps) - ;; For inferred-length `T a[] = "..."` autos the caller resolves - ;; the length up front (see %resolve-inferred-local-array-ty), - ;; so by the time we get here `ty` is fixed-length and `sm`'s - ;; ctype matches. + ;; Note: for inferred-length (`int x[] = "..."`) auto arrays the + ;; sm-type still records the original (size=-1) ctype — `sizeof(x)` + ;; in the body would not see the resolved length. The slot is also + ;; sized off the original (= 1 byte), so the path is pre-existing + ;; broken; we don't paper over it here. Real C bootstrap code uses + ;; statics/globals for inferred-length arrays. (let* ((slen (bytevector-length s)) (decl (cdr (ctype-ext ty))) (final (cond ((< decl 0) (+ slen 1)) (else decl)))) @@ -5918,11 +5829,10 @@ (cond ((at-punct? ps 'rbrace) (advance ps) + ;; Inferred-length auto path is pre-existing broken (slot + ;; allocated off size=-1, sm-type unfixed). See note in + ;; parse-init-local-aggregate STR branch. ;; Zero out remaining slots if any (declared length > i). - ;; Inferred-length autos arrive here with `decl` already - ;; resolved by %resolve-inferred-local-array-ty in parse-decl, - ;; so the (< decl 0) branch only fires for the synthetic - ;; compound-literal path that bypasses that resolver. (let ((final (cond ((< decl 0) i) (else decl)))) (let zlp ((k i)) (cond diff --git a/tests/cc/135-block-inferred-array.c b/tests/cc/135-block-inferred-array.c @@ -1,22 +0,0 @@ -/* Regression: block-scope inferred-length array `int a[] = {...}` - * must size the array from the initializer, mirroring file-scope - * behaviour. Previously the local sym kept ctype size = -1 and the - * frame slot was allocated for 1 byte, so sizeof(a) returned 0 and - * a[2] was an out-of-bounds frame access. - * - * Result: 0. */ - -int main(void) { - int a[] = { 10, 20, 30, 40 }; - if (sizeof(a) != 16) return 1; - if (a[0] != 10) return 2; - if (a[1] != 20) return 3; - if (a[2] != 30) return 4; - if (a[3] != 40) return 5; - /* Confirm sum to ensure no slot overlap clobbered nearby autos. */ - int sum = 0; - int i; - for (i = 0; i < 4; i = i + 1) sum = sum + a[i]; - if (sum != 100) return 6; - return 0; -} diff --git a/tests/cc/135-block-inferred-array.expected-exit b/tests/cc/135-block-inferred-array.expected-exit @@ -1 +0,0 @@ -0 diff --git a/tests/cc/136-block-inferred-string.c b/tests/cc/136-block-inferred-string.c @@ -1,23 +0,0 @@ -/* Regression: block-scope inferred-length char array initialised - * from a string literal. `char s[] = "hello";` should size `s` to - * 6 bytes (5 chars + NUL) — same as file scope. Previously the - * local sym kept ctype size = -1 and only one byte of frame was - * allocated, so sizeof(s) was 0 and writes past s[0] clobbered - * adjacent frame slots. - * - * Result: 0. */ - -int main(void) { - char s[] = "hello"; - if (sizeof(s) != 6) return 1; - if (s[0] != 'h') return 2; - if (s[1] != 'e') return 3; - if (s[2] != 'l') return 4; - if (s[3] != 'l') return 5; - if (s[4] != 'o') return 6; - if (s[5] != 0) return 7; - /* Adjacent auto must not have been clobbered by the string store. */ - int guard = 12345; - if (guard != 12345) return 8; - return 0; -} diff --git a/tests/cc/136-block-inferred-string.expected-exit b/tests/cc/136-block-inferred-string.expected-exit @@ -1 +0,0 @@ -0