boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 0fd7206cab421b0c14cad970c764ebe3514e46c9
parent f80f807bc643e0821ae67619e3d8806eedf671cc
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  1 May 2026 18:47:23 -0700

merge: block-scope inferred-length arrays

Diffstat:
Mcc/cc.scm | 110+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Atests/cc/135-block-inferred-array.c | 22++++++++++++++++++++++
Atests/cc/135-block-inferred-array.expected-exit | 1+
Atests/cc/136-block-inferred-string.c | 23+++++++++++++++++++++++
Atests/cc/136-block-inferred-string.expected-exit | 1+
5 files changed, 147 insertions(+), 10 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -5324,7 +5324,12 @@ (scope-bind! ps n sm) (cg-add-tentative! (ps-cg ps) n))))) (else - (let* ((sz (max (ctype-size ty) 1)) + ;; Inferred-length array `T a[] = ...`: resolve the length + ;; from the initializer (peek-only; no tokens consumed) so + ;; the frame slot is sized correctly and the bound sym's + ;; ctype carries the real length (sizeof works, etc.). + (let* ((ty (%resolve-inferred-local-array-ty ps ty)) + (sz (max (ctype-size ty) 1)) (al (max (ctype-align ty) 1)) (sl (cg-alloc-slot (ps-cg ps) sz al)) (sm (%sym n 'var (or sto 'auto) ty sl #t))) @@ -5448,6 +5453,92 @@ ;; pass `ty` through. (%mk-arr (car (ctype-ext ty)) count)) +;; ----- Inferred-length array resolution for block-scope autos -------- +;; The global initializer path returns a refined ctype after parsing, +;; and the sym is bound only afterwards. Block-scope autos cannot do +;; the same: they emit cg ops inline during init parsing, and the +;; frame slot must be allocated to its final size *before* expression +;; spills can interleave above it. So when we see `T a[] = ...` at +;; block scope we resolve the length up front by lookahead, then +;; allocate the slot off the resolved type. The lookahead pulls +;; tokens through the matching brace (or reads the STR length), then +;; pushes everything back onto the iter buffer so the real init +;; parser sees an unmodified stream. +(define (%iter-restore! it collected-rev) + ;; collected-rev: tokens in pull order, head = most recent. + ;; Re-prepend them so the next iter-next yields the oldest first. + (let lp ((xs collected-rev)) + (cond ((null? xs) #t) + (else (iter-unget! it (car xs)) + (lp (cdr xs)))))) + +(define (%resolve-braced-array-len ps ty) + ;; ps positioned at `=`, with `{` as the next token. Drain through + ;; the matching `}`, counting top-level (depth-1) initializers, and + ;; restore the consumed tokens. Returns the resolved fixed-length + ;; ctype, or `ty` unchanged on EOF / malformed input (the real + ;; parser will produce a proper diagnostic). + (let* ((it (ps-iter ps)) + (t-eq (iter-next it)) + (t-lb (iter-next it))) + (let scan ((collected (list t-lb t-eq)) + (depth 1) + (count 0) + (pending? #f)) + (let ((t (iter-next it))) + (let ((collected1 (cons t collected))) + (cond + ((eq? (tok-kind t) 'EOF) + (%iter-restore! it collected1) ty) + ((and (eq? (tok-kind t) 'PUNCT) + (eq? (tok-value t) 'rbrace) + (= depth 1)) + (let ((final (cond (pending? (+ count 1)) (else count)))) + (%iter-restore! it collected1) + (%init-fixed-arr-type ty final))) + ((and (eq? (tok-kind t) 'PUNCT) + (eq? (tok-value t) 'comma) + (= depth 1)) + (scan collected1 depth + (cond (pending? (+ count 1)) (else count)) + #f)) + ((and (eq? (tok-kind t) 'PUNCT) + (or (eq? (tok-value t) 'lbrace) + (eq? (tok-value t) 'lparen) + (eq? (tok-value t) 'lbracket))) + (scan collected1 (+ depth 1) count #t)) + ((and (eq? (tok-kind t) 'PUNCT) + (or (eq? (tok-value t) 'rbrace) + (eq? (tok-value t) 'rparen) + (eq? (tok-value t) 'rbracket))) + (scan collected1 (- depth 1) count pending?)) + (else + (scan collected1 depth count #t)))))))) + +(define (%resolve-inferred-local-array-ty ps ty) + ;; If `ty` is an inferred-length array and the next tokens are + ;; `= STR` or `= {`, return a fresh array ctype with the resolved + ;; length. Otherwise return `ty` unchanged. Does not consume any + ;; tokens. + (cond + ((not (and (eq? (ctype-kind ty) 'arr) + (< (cdr (ctype-ext ty)) 0) + (at-punct? ps 'assign))) + ty) + (else + (let ((t2 (peek2 ps))) + (cond + ((eq? (tok-kind t2) 'STR) + (let ((et (car (ctype-ext ty)))) + (cond + ((or (eq? et %t-i8) (eq? et %t-u8)) + (%init-fixed-arr-type ty + (+ 1 (bytevector-length (tok-value t2))))) + (else ty)))) + ((and (eq? (tok-kind t2) 'PUNCT) (eq? (tok-value t2) 'lbrace)) + (%resolve-braced-array-len ps ty)) + (else ty)))))) + (define (%init-struct-fields ty) ;; Return ((name-bv ctype offset) ...) for a struct/union ctype. (let ((ext (ctype-ext ty))) @@ -5742,12 +5833,10 @@ (let ((et (car (ctype-ext ty)))) (or (eq? et %t-i8) (eq? et %t-u8))))) (advance ps) - ;; Note: for inferred-length (`int x[] = "..."`) auto arrays the - ;; sm-type still records the original (size=-1) ctype — `sizeof(x)` - ;; in the body would not see the resolved length. The slot is also - ;; sized off the original (= 1 byte), so the path is pre-existing - ;; broken; we don't paper over it here. Real C bootstrap code uses - ;; statics/globals for inferred-length arrays. + ;; For inferred-length `T a[] = "..."` autos the caller resolves + ;; the length up front (see %resolve-inferred-local-array-ty), + ;; so by the time we get here `ty` is fixed-length and `sm`'s + ;; ctype matches. (let* ((slen (bytevector-length s)) (decl (cdr (ctype-ext ty))) (final (cond ((< decl 0) (+ slen 1)) (else decl)))) @@ -5793,10 +5882,11 @@ (cond ((at-punct? ps 'rbrace) (advance ps) - ;; Inferred-length auto path is pre-existing broken (slot - ;; allocated off size=-1, sm-type unfixed). See note in - ;; parse-init-local-aggregate STR branch. ;; Zero out remaining slots if any (declared length > i). + ;; Inferred-length autos arrive here with `decl` already + ;; resolved by %resolve-inferred-local-array-ty in parse-decl, + ;; so the (< decl 0) branch only fires for the synthetic + ;; compound-literal path that bypasses that resolver. (let ((final (cond ((< decl 0) i) (else decl)))) (let zlp ((k i)) (cond diff --git a/tests/cc/135-block-inferred-array.c b/tests/cc/135-block-inferred-array.c @@ -0,0 +1,22 @@ +/* Regression: block-scope inferred-length array `int a[] = {...}` + * must size the array from the initializer, mirroring file-scope + * behaviour. Previously the local sym kept ctype size = -1 and the + * frame slot was allocated for 1 byte, so sizeof(a) returned 0 and + * a[2] was an out-of-bounds frame access. + * + * Result: 0. */ + +int main(void) { + int a[] = { 10, 20, 30, 40 }; + if (sizeof(a) != 16) return 1; + if (a[0] != 10) return 2; + if (a[1] != 20) return 3; + if (a[2] != 30) return 4; + if (a[3] != 40) return 5; + /* Confirm sum to ensure no slot overlap clobbered nearby autos. */ + int sum = 0; + int i; + for (i = 0; i < 4; i = i + 1) sum = sum + a[i]; + if (sum != 100) return 6; + return 0; +} diff --git a/tests/cc/135-block-inferred-array.expected-exit b/tests/cc/135-block-inferred-array.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/cc/136-block-inferred-string.c b/tests/cc/136-block-inferred-string.c @@ -0,0 +1,23 @@ +/* Regression: block-scope inferred-length char array initialised + * from a string literal. `char s[] = "hello";` should size `s` to + * 6 bytes (5 chars + NUL) — same as file scope. Previously the + * local sym kept ctype size = -1 and only one byte of frame was + * allocated, so sizeof(s) was 0 and writes past s[0] clobbered + * adjacent frame slots. + * + * Result: 0. */ + +int main(void) { + char s[] = "hello"; + if (sizeof(s) != 6) return 1; + if (s[0] != 'h') return 2; + if (s[1] != 'e') return 3; + if (s[2] != 'l') return 4; + if (s[3] != 'l') return 5; + if (s[4] != 'o') return 6; + if (s[5] != 0) return 7; + /* Adjacent auto must not have been clobbered by the string store. */ + int guard = 12345; + if (guard != 12345) return 8; + return 0; +} diff --git a/tests/cc/136-block-inferred-string.expected-exit b/tests/cc/136-block-inferred-string.expected-exit @@ -0,0 +1 @@ +0