boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 359d0a432184ee44936c6330667fcda4ae729907
parent e883106d20eda5ab8aa4c0f52c3594c9043b633c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  1 May 2026 19:39:44 -0700

cc/parse: infer length for block-scope arrays; reserve frame slot before init

`int a[] = {1,2,3};` and `char s[] = "hi";` at block scope previously
allocated a 1-byte frame slot off the unresolved (size=-1) array
ctype, then per-element stores in parse-init-local-aggregate wrote
past frame-hi. The next %cg-spill-reg call (e.g. when cg-cast spills
the int-literal value) allocated 8-byte slots that landed inside
the array's footprint, clobbering elements and any subsequent
local. sizeof(a) was also 0 in the body.

Resolve the inferred length BEFORE cg-alloc-slot: peek the
initializer following `=` (top-level comma count for a brace-init,
or string length+1 for a STR), unget the tokens, rebuild the
array ctype with the resolved length, and use that for both the
slot size and the bound sym-type. This advances frame-hi past
the array so all initializer-expression spills land above it,
and gives sizeof(a) the right value in the body.

Tests: 135-block-inferred-array (int a[]={10,20,30,40} + sentinel),
136-block-inferred-string (char s[]="hello" + sentinel).

Diffstat:
Mcc/cc.scm | 102++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Atests/cc/135-block-inferred-array.c | 21+++++++++++++++++++++
Atests/cc/135-block-inferred-array.expected-exit | 1+
Atests/cc/136-block-inferred-string.c | 20++++++++++++++++++++
Atests/cc/136-block-inferred-string.expected-exit | 1+
5 files changed, 144 insertions(+), 1 deletion(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -5301,6 +5301,91 @@ (handle-decl ps sto n2 t2) (lp))) (else (expect-punct ps 'semi) 'decl)))))))))) +;; ---- Block-scope inferred-length array length resolution ------------- +;; The token iterator buffers lookahead in a list (see tok-iter); we +;; can pull arbitrarily many tokens, then push them all back via +;; iter-unget!. We use that to peek the initializer that follows `=` +;; (without consuming it) and count its elements so cg-alloc-slot can +;; reserve the right number of bytes BEFORE the initializer-emission +;; loop runs (and starts spilling intermediate values into newly- +;; allocated frame slots). +;; +;; Only the OUTERMOST length is inferred per C99 6.7.8/22, so for +;; `int x[][3] = {{1,2,3},{4,5,6}};` we just count top-level +;; brace-or-comma groups; the inner brace groups don't matter. + +(define (%peek-inferred-arr-init? ps) + ;; Check whether the next-after-`=` token starts a brace-init or a + ;; string-literal — the only initializer shapes that can resolve a + ;; block-scope inferred-length array. We do NOT consume `=`; we + ;; peek2 instead. + (let ((t2 (peek2 ps))) + (or (and (eq? (tok-kind t2) 'PUNCT) (eq? (tok-value t2) 'lbrace)) + (eq? (tok-kind t2) 'STR)))) + +(define (%resolve-inferred-arr-len ps ty) + ;; Returns a fresh array ctype with the resolved length. Does NOT + ;; consume the `=` or any of the initializer tokens — every token + ;; pulled is unget back in original order. + (let* ((eq-tok (iter-next (ps-iter ps))) ; consume `=` (will unget) + (first (iter-next (ps-iter ps))) ; consume `{` or STR + (collected (list first eq-tok)) ; head order: revs at end + (count + (cond + ((eq? (tok-kind first) 'STR) + ;; String length + NUL. + (+ (bytevector-length (tok-value first)) 1)) + (else + ;; first is `{`. Count top-level commas + 1, ignoring a + ;; trailing comma before `}`. Track brace depth so nested + ;; `{` for sub-aggregates are skipped. + (let lp ((depth 1) (n 0) (saw-elem? #f) (last-was-comma? #f) + (acc collected)) + (let ((t (iter-next (ps-iter ps)))) + (let ((acc2 (cons t acc))) + (cond + ((eq? (tok-kind t) 'EOF) + ;; Bail; let the real parser report the error + ;; after we restore tokens. + (%inferred-arr-restore! ps acc2) + (die #f "init: unterminated brace")) + ((and (eq? (tok-kind t) 'PUNCT) + (eq? (tok-value t) 'lbrace)) + (lp (+ depth 1) n #t #f acc2)) + ((and (eq? (tok-kind t) 'PUNCT) + (eq? (tok-value t) 'rbrace)) + (cond + ((= depth 1) + ;; Done. Restore tokens (acc2 includes the + ;; closing `}`). + (%inferred-arr-restore! ps acc2) + (cond ((not saw-elem?) 0) + (last-was-comma? n) + (else (+ n 1)))) + (else (lp (- depth 1) n saw-elem? #f acc2)))) + ((and (eq? (tok-kind t) 'PUNCT) + (eq? (tok-value t) 'comma) + (= depth 1)) + (lp depth (+ n 1) saw-elem? #t acc2)) + (else + (lp depth n #t #f acc2))))))))) + ) + (cond + ((eq? (tok-kind first) 'STR) + (%inferred-arr-restore! ps collected))) + (%init-fixed-arr-type ty count))) + +(define (%inferred-arr-restore! ps acc) + ;; acc is a stack of tokens in REVERSE consume order (most-recent + ;; first). iter-unget! prepends one at a time, so iterating acc in + ;; its current order pushes them back in the right sequence — + ;; i.e. the oldest-consumed token ends up at the front of the + ;; lookahead buffer. + (let lp ((xs acc)) + (cond + ((null? xs) #t) + (else (iter-unget! (ps-iter ps) (car xs)) (lp (cdr xs)))))) + (define (handle-decl ps sto n ty) (cond ((not n) (die #f "no name")) @@ -5360,7 +5445,22 @@ (scope-bind! ps n sm) (cg-add-tentative! (ps-cg ps) n))))) (else - (let* ((sz (max (ctype-size ty) 1)) + ;; Block-scope inferred-length array (`int a[] = {…};` or + ;; `char s[] = "…";`): peek the initializer past `=` to count + ;; elements / measure the string and rebuild `ty` with the + ;; resolved length BEFORE cg-alloc-slot. Otherwise the slot + ;; is sized off a -1 / 0 ctype-size (capped to 1 byte) and + ;; the per-element stores in parse-init-local-aggregate write + ;; past frame-hi — the next %cg-spill-reg then allocates + ;; right inside the array, clobbering elements. + (let* ((ty (cond + ((and (eq? (ctype-kind ty) 'arr) + (< (cdr (ctype-ext ty)) 0) + (at-punct? ps 'assign) + (%peek-inferred-arr-init? ps)) + (%resolve-inferred-arr-len ps ty)) + (else ty))) + (sz (max (ctype-size ty) 1)) (al (max (ctype-align ty) 1)) (sl (cg-alloc-slot (ps-cg ps) sz al)) (sm (%sym n 'var (or sto 'auto) ty sl #t))) diff --git a/tests/cc/135-block-inferred-array.c b/tests/cc/135-block-inferred-array.c @@ -0,0 +1,21 @@ +/* Block-scope array with inferred length: `int a[] = {...};` + * Verify (a) sizeof(a) == 16 (4 elements * sizeof(int)), + * (b) all elements readable with the right value, + * (c) sum is correct, and (d) an adjacent local declared right + * after the array is NOT clobbered by the array's initializer + * stores or by any spills emitted during initialization. + */ + +int main(int argc, char **argv) { + int a[] = {10, 20, 30, 40}; + int sentinel = 0xCAFE; + + if (sizeof(a) != 16) return 1; + if (a[0] != 10) return 2; + if (a[1] != 20) return 3; + if (a[2] != 30) return 4; + if (a[3] != 40) return 5; + if ((a[0] + a[1] + a[2] + a[3]) != 100) return 6; + if (sentinel != 0xCAFE) return 7; + return 0; +} diff --git a/tests/cc/135-block-inferred-array.expected-exit b/tests/cc/135-block-inferred-array.expected-exit @@ -0,0 +1 @@ +0 diff --git a/tests/cc/136-block-inferred-string.c b/tests/cc/136-block-inferred-string.c @@ -0,0 +1,20 @@ +/* Block-scope char[] with string-literal initializer: + * `char s[] = "hello";`. sizeof(s) must be 6 (5 chars + NUL), + * each byte must read back correctly, and an adjacent local + * declared after must NOT be clobbered. + */ + +int main(int argc, char **argv) { + char s[] = "hello"; + int sentinel = 0xCAFE; + + if (sizeof(s) != 6) return 1; + if (s[0] != 'h') return 2; + if (s[1] != 'e') return 3; + if (s[2] != 'l') return 4; + if (s[3] != 'l') return 5; + if (s[4] != 'o') return 6; + if (s[5] != 0) return 7; + if (sentinel != 0xCAFE) return 8; + return 0; +} diff --git a/tests/cc/136-block-inferred-string.expected-exit b/tests/cc/136-block-inferred-string.expected-exit @@ -0,0 +1 @@ +0