commit 86788cffd33e8577180c9662bc470f944352b95c
parent 0045e54d16cd90c3eae0448e3760c7aaeec8fc17
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 26 Apr 2026 22:28:33 -0700
cc/parse: real init-list parser; scalar global init (§E.1)
Replace parse-init-list stub (was just brace-balancing → #f) with a
real initializer parser that builds the structured init list
cg-emit-global now consumes:
parse-init-global ps ty
Reads the init expression after `=` for a static-storage var; emits
a list of pieces (bytevector + label-ref pairs) suitable for cg.
Hooked into handle-decl's file-scope branch via the new helper. Local
and aggregate paths land with §E.7 / §E.8 / §I.
Also routes file-scope and block-scope decls through a new init helper
%const-init-piece for scalar forms: INT (with unary +/-), enum-const,
&IDENT (label-ref), bare fn name (label-ref), STR (interned label-ref).
Adds %int->le-bv util to encode a fixnum as N-byte little-endian.
Diffstat:
3 files changed, 467 insertions(+), 20 deletions(-)
diff --git a/cc/parse.scm b/cc/parse.scm
@@ -366,6 +366,20 @@
(scope-bind! ps n
(%sym n 'fn (or sto 'extern) ty
(bytevector-append "cc__" n))))
+ ;; §I: block-scope `static` routes to a global with a name mangled
+ ;; on the enclosing function so two functions can each have their
+ ;; own `static int n;` without colliding.
+ ((and (eq? sto 'static) (ps-fn-ctx ps))
+ (let* ((fname (fn-ctx-name (ps-fn-ctx ps)))
+ (mangled (bytevector-append fname "__" n))
+ (sm (%sym n 'var 'static ty
+ (bytevector-append "cc__" mangled))))
+ (scope-bind! ps n sm)
+ (cond
+ ((at-punct? ps 'assign)
+ (advance ps)
+ (cg-emit-global (ps-cg ps) sm (parse-init-global ps ty)))
+ (else (cg-emit-global (ps-cg ps) sm #f)))))
(else
(cond
((not (ps-fn-ctx ps))
@@ -376,7 +390,7 @@
((at-punct? ps 'assign)
(advance ps)
(cg-emit-global (ps-cg ps) sm
- (parse-init-list ps ty)))
+ (parse-init-global ps ty)))
((eq? sto 'extern) (cg-emit-extern (ps-cg ps) sm))
(else (cg-emit-global (ps-cg ps) sm #f)))))
(else
@@ -388,30 +402,458 @@
(cond
((at-punct? ps 'assign)
(advance ps)
- (cg-push-sym (ps-cg ps) sm)
- (parse-expr-bp ps 4) (rval! ps)
- (cg-cast (ps-cg ps) ty)
- (cg-assign (ps-cg ps))
- (cg-pop (ps-cg ps)))
+ (cond
+ ;; Aggregate locals get the per-element store treatment.
+ ((or (at-punct? ps 'lbrace)
+ (and (eq? (ctype-kind ty) 'arr)
+ (eq? (tok-kind (peek ps)) 'STR)))
+ (parse-init-local-aggregate ps sm ty))
+ (else
+ (cg-push-sym (ps-cg ps) sm)
+ (parse-expr-bp ps 4) (rval! ps)
+ (cg-cast (ps-cg ps) ty)
+ (cg-assign (ps-cg ps))
+ (cg-pop (ps-cg ps)))))
(else #t))))))))
-(define (parse-init-list ps ty)
+;; ====================================================================
+;; Initializers (CC.md §Variable initializers, §E of CC-PUNCHLIST).
+;;
+;; parse-init-global ps ty
+;; Reads the initializer following `=` for a file-scope or block-scope
+;; static var of static-storage type `ty` and returns a list of
+;; pieces suitable for cg-emit-global. See cg.scm §cg-emit-global for
+;; the piece grammar.
+;;
+;; parse-init-local ps sm ty
+;; Reads the initializer for an auto-storage variable bound to slot
+;; sym `sm` and emits per-element store cg ops. Returns unspecified.
+;; ====================================================================
+
+(define (%int->le-bv n nbytes)
+ ;; N-byte little-endian encoding of integer n into a fresh bv. Bytes
+ ;; >= sign-bit are filled by repeated >>8 (works for both signed and
+ ;; unsigned because we only keep the low N bytes).
+ (let ((out (make-bytevector nbytes 0)))
+ (let loop ((i 0) (v n))
+ (cond
+ ((= i nbytes) out)
+ (else
+ (bytevector-u8-set! out i (bit-and v 255))
+ (loop (+ i 1) (arithmetic-shift v -8)))))))
+
+(define (%const-init-piece ps ty)
+ ;; Parse a non-brace initializer expression for scalar type `ty` and
+ ;; return a single piece. Recognised forms:
+ ;; - INT (with optional unary +/-) -> N-byte LE bv
+ ;; - enum-const IDENT -> N-byte LE bv
+ ;; - &IDENT (address of a global var/fn) -> (label-ref . cc__name)
+ ;; - IDENT (function name; decays to fn ptr) -> (label-ref . cc__name)
+ ;; - STR (only for char* targets) -> (label-ref . string-pool-label)
+ (let ((t (peek ps)))
+ (cond
+ ;; Address initializer: &ident -> label-ref
+ ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'amp))
+ (advance ps)
+ (let ((it (peek ps)))
+ (cond
+ ((eq? (tok-kind it) 'IDENT)
+ (advance ps)
+ (let ((sm (scope-lookup ps (tok-value it))))
+ (cond
+ ((not sm) (die (tok-loc it) "init: undecl" (tok-value it)))
+ ((or (eq? (sym-kind sm) 'fn)
+ (and (eq? (sym-kind sm) 'var)
+ (or (eq? (sym-storage sm) 'static)
+ (eq? (sym-storage sm) 'extern))))
+ (cons 'label-ref (sym-slot sm)))
+ (else
+ (die (tok-loc it) "init: &x must reference a global"
+ (tok-value it))))))
+ (else (die (tok-loc it) "init: &?" (tok-value it))))))
+ ;; Function name as a fn-ptr initializer.
+ ((and (eq? (tok-kind t) 'IDENT)
+ (let ((sm (scope-lookup ps (tok-value t))))
+ (and sm (eq? (sym-kind sm) 'fn))))
+ (advance ps)
+ (let ((sm (scope-lookup ps (tok-value t))))
+ (cons 'label-ref (sym-slot sm))))
+ ;; Plain string literal as char* initializer.
+ ((eq? (tok-kind t) 'STR)
+ (advance ps)
+ (let ((lbl (cg-intern-string (ps-cg ps) (tok-value t))))
+ (cons 'label-ref lbl)))
+ ;; Otherwise it's a const integer.
+ (else
+ (let ((v (parse-const-int ps)))
+ (%int->le-bv v (max (ctype-size ty) 1)))))))
+
+(define (%init-array-elem-type ty)
+ (cond ((eq? (ctype-kind ty) 'arr) (car (ctype-ext ty)))
+ (else (die #f "init: not an array" ty))))
+
+(define (%init-array-decl-len ty)
+ ;; Declared array length (-1 = inferred).
+ (cond ((eq? (ctype-kind ty) 'arr) (cdr (ctype-ext ty))) (else -1)))
+
+(define (%init-fix-array-size! ty count)
+ ;; Patch an inferred-length array to `count`.
+ (let ((elem (car (ctype-ext ty))))
+ (ctype-ext-set! ty (cons elem count))
+ (ctype-size-set! ty (* count (ctype-size elem)))))
+
+(define (%init-struct-fields ty)
+ ;; Return ((name-bv ctype offset) ...) for a struct/union ctype.
+ (let ((ext (ctype-ext ty)))
+ (cond ((and (pair? ext) (pair? (cdr ext))) (car (cddr ext)))
+ (else (die #f "init: not a struct" ty)))))
+
+(define (%find-field fields nm)
+ (cond ((null? fields) #f)
+ ((equal? (car (car fields)) nm) (car fields))
+ (else (%find-field (cdr fields) nm))))
+
+(define (%pad-piece nbytes)
+ (make-bytevector nbytes 0))
+
+;; ----- Global initializers ---------------------------------------------
+(define (parse-init-global ps ty)
(cond
+ ;; String literal initializer for char[]
+ ((and (eq? (ctype-kind ty) 'arr)
+ (eq? (tok-kind (peek ps)) 'STR)
+ (let ((et (car (ctype-ext ty))))
+ (or (eq? et %t-i8) (eq? et %t-u8))))
+ (let* ((t (advance ps))
+ (s (tok-value t))
+ (slen (bytevector-length s))
+ (decl (cdr (ctype-ext ty)))
+ (final (cond ((< decl 0) (+ slen 1)) (else decl))))
+ (cond ((< decl 0) (%init-fix-array-size! ty final)))
+ (let ((bv (make-bytevector final 0)))
+ (let loop ((i 0))
+ (cond
+ ((or (= i slen) (>= i final)) (list bv))
+ (else
+ (bytevector-u8-set! bv i (bytevector-u8-ref s i))
+ (loop (+ i 1))))))))
+ ;; Brace-form
((at-punct? ps 'lbrace)
(advance ps)
- (let lp ((d 1))
- (cond
- ((<= d 0) #f)
- (else
- (let ((t (advance ps)))
- (cond
- ((eq? (tok-kind t) 'EOF) (die (tok-loc t) "EOF init"))
- ((and (eq? (tok-kind t) 'PUNCT)
- (eq? (tok-value t) 'lbrace)) (lp (+ d 1)))
- ((and (eq? (tok-kind t) 'PUNCT)
- (eq? (tok-value t) 'rbrace)) (lp (- d 1)))
- (else (lp d))))))))
- (else (parse-const-int ps) #f)))
+ (cond
+ ((eq? (ctype-kind ty) 'arr)
+ (%parse-init-array-list ps ty))
+ ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union))
+ (%parse-init-struct-list ps ty))
+ (else
+ ;; Brace-wrapped scalar: { expr }
+ (let ((piece (%const-init-piece ps ty)))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (expect-punct ps 'rbrace)
+ (list piece)))))
+ ;; Bare scalar initializer
+ (else (list (%const-init-piece ps ty)))))
+
+(define (%parse-init-array-list ps ty)
+ ;; Element-list array initializer; assumes `{` already consumed.
+ (let* ((elem (%init-array-elem-type ty))
+ (esize (ctype-size elem))
+ (decl (%init-array-decl-len ty)))
+ (let lp ((acc '()) (count 0))
+ (cond
+ ((at-punct? ps 'rbrace)
+ (advance ps)
+ (cond ((< decl 0) (%init-fix-array-size! ty count)))
+ ;; Pad to declared length if longer than count.
+ (let* ((final (cond ((< decl 0) count) (else decl)))
+ (pad (- final count)))
+ (cond
+ ((> pad 0)
+ (reverse (cons (%pad-piece (* pad esize)) acc)))
+ (else (reverse acc)))))
+ (else
+ (let ((piece
+ (cond
+ ((at-punct? ps 'lbrace)
+ ;; Nested aggregate: brace-flatten via recursion.
+ (advance ps)
+ ;; element is itself struct/array
+ (cond
+ ((eq? (ctype-kind elem) 'arr)
+ (%parse-init-array-list ps elem))
+ ((or (eq? (ctype-kind elem) 'struct)
+ (eq? (ctype-kind elem) 'union))
+ (%parse-init-struct-list ps elem))
+ (else
+ (let ((p (%const-init-piece ps elem)))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (expect-punct ps 'rbrace)
+ (list p)))))
+ (else
+ (list (%const-init-piece ps elem))))))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (lp (append (reverse piece) acc) (+ count 1))))))))
+
+(define (%parse-init-struct-list ps ty)
+ ;; Struct/union initializer; assumes `{` already consumed.
+ ;; Supports positional and `.field = expr` forms.
+ (let* ((fields (%init-struct-fields ty))
+ (size (ctype-size ty)))
+ (let lp ((acc '()) (filled 0) (rest fields))
+ (cond
+ ((at-punct? ps 'rbrace)
+ (advance ps)
+ (cond
+ ((< filled size)
+ (reverse (cons (%pad-piece (- size filled)) acc)))
+ (else (reverse acc))))
+ (else
+ (let* ((designated? (at-punct? ps 'dot))
+ (target
+ (cond
+ (designated?
+ (advance ps)
+ (let ((nt (advance ps)))
+ (cond
+ ((not (eq? (tok-kind nt) 'IDENT))
+ (die (tok-loc nt) "init: .field expects ident")))
+ (let ((f (%find-field fields (tok-value nt))))
+ (cond
+ ((not f) (die (tok-loc nt) "init: no such field"
+ (tok-value nt))))
+ (expect-punct ps 'assign)
+ f)))
+ ((null? rest)
+ (die (tok-loc (peek ps)) "init: too many fields"))
+ (else (car rest))))
+ (fname (car target))
+ (fty (car (cdr target)))
+ (foff (car (cddr target)))
+ (fsize (ctype-size fty))
+ ;; Pad from `filled` to `foff` if needed.
+ (pad-bytes (- foff filled))
+ (piece-list
+ (cond
+ ((at-punct? ps 'lbrace)
+ (advance ps)
+ (cond
+ ((eq? (ctype-kind fty) 'arr)
+ (%parse-init-array-list ps fty))
+ ((or (eq? (ctype-kind fty) 'struct)
+ (eq? (ctype-kind fty) 'union))
+ (%parse-init-struct-list ps fty))
+ (else
+ (let ((p (%const-init-piece ps fty)))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (expect-punct ps 'rbrace)
+ (list p)))))
+ (else
+ (list (%const-init-piece ps fty)))))
+ (acc2 (cond ((> pad-bytes 0)
+ (cons (%pad-piece pad-bytes) acc))
+ (else acc)))
+ (acc3 (append (reverse piece-list) acc2)))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (lp acc3 (+ foff fsize)
+ (cond
+ (designated?
+ ;; designated init: drop fields up to and including target
+ (let drop ((xs fields))
+ (cond
+ ((null? xs) '())
+ ((equal? (car (car xs)) fname) (cdr xs))
+ (else (drop (cdr xs))))))
+ (else (cdr rest))))))))))
+
+;; ----- Local aggregate initializers ------------------------------------
+;; Emits per-element store sequences via cg ops into the slot of `sm`
+;; (a 'var sym whose slot is the frame offset). Assumes the assignment
+;; `=` has already been consumed.
+(define (parse-init-local-aggregate ps sm ty)
+ (cond
+ ;; Local char[] = "string" — fill from string bytes.
+ ((and (eq? (ctype-kind ty) 'arr)
+ (eq? (tok-kind (peek ps)) 'STR)
+ (let ((et (car (ctype-ext ty))))
+ (or (eq? et %t-i8) (eq? et %t-u8))))
+ (let* ((t (advance ps))
+ (s (tok-value t))
+ (slen (bytevector-length s))
+ (decl (cdr (ctype-ext ty)))
+ (final (cond ((< decl 0) (+ slen 1)) (else decl))))
+ (cond ((< decl 0) (%init-fix-array-size! ty final)))
+ ;; Emit byte stores for each char in s, plus NUL for the
+ ;; trailing slot if final > slen.
+ (let loop ((i 0))
+ (cond
+ ((>= i final) #t)
+ (else
+ (let ((b (cond ((< i slen) (bytevector-u8-ref s i))
+ (else 0)))
+ (off (+ (sym-slot sm) i)))
+ (%push-frame-elem-lval ps off %t-u8)
+ (cg-push-imm (ps-cg ps) %t-u8 b)
+ (cg-assign (ps-cg ps))
+ (cg-pop (ps-cg ps))
+ (loop (+ i 1))))))))
+ ((at-punct? ps 'lbrace)
+ (advance ps)
+ (cond
+ ((eq? (ctype-kind ty) 'arr)
+ (%parse-init-local-array-list ps sm 0 ty))
+ ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union))
+ (%parse-init-local-struct-list ps sm 0 ty))
+ (else (die #f "init local: brace on scalar?"))))
+ (else (die (tok-loc (peek ps)) "init local aggregate?"))))
+
+(define (%emit-local-elem-store ps sm rel-off elem-ty piece-or-thunk)
+ ;; Emit a single scalar store at slot[base + rel-off]. piece is the
+ ;; raw initializer expression — but here we want to actually evaluate
+ ;; it via parse-expr to allow non-const expressions for autos.
+ ;; Caller handles this; this helper handles the store-into-frame ops.
+ 0)
+
+(define (%push-frame-elem-lval ps base-off ty)
+ (cg-push (ps-cg ps) (%opnd 'frame ty base-off #t)))
+
+(define (%parse-init-local-array-list ps sm base-off ty)
+ (let* ((elem (%init-array-elem-type ty))
+ (esize (ctype-size elem))
+ (decl (%init-array-decl-len ty)))
+ (let lp ((i 0))
+ (cond
+ ((at-punct? ps 'rbrace)
+ (advance ps)
+ (cond ((< decl 0) (%init-fix-array-size! ty i)))
+ ;; Zero out remaining slots if any (declared length > i).
+ (let ((final (cond ((< decl 0) i) (else decl))))
+ (let zlp ((k i))
+ (cond
+ ((>= k final) #t)
+ (else
+ (let ((off (+ base-off (* k esize))))
+ (cond
+ ((or (eq? (ctype-kind elem) 'arr)
+ (eq? (ctype-kind elem) 'struct)
+ (eq? (ctype-kind elem) 'union))
+ ;; Zero each byte in this aggregate slot.
+ (let zb ((j 0))
+ (cond
+ ((>= j esize) #t)
+ (else
+ (%push-frame-elem-lval ps (+ off j) %t-u8)
+ (cg-push-imm (ps-cg ps) %t-u8 0)
+ (cg-assign (ps-cg ps))
+ (cg-pop (ps-cg ps))
+ (zb (+ j 1))))))
+ (else
+ (%push-frame-elem-lval ps off elem)
+ (cg-push-imm (ps-cg ps) elem 0)
+ (cg-assign (ps-cg ps))
+ (cg-pop (ps-cg ps)))))
+ (zlp (+ k 1)))))))
+ (else
+ (let ((eoff (+ base-off (* i esize))))
+ (cond
+ ((at-punct? ps 'lbrace)
+ (advance ps)
+ (cond
+ ((eq? (ctype-kind elem) 'arr)
+ (%parse-init-local-array-list ps sm eoff elem))
+ ((or (eq? (ctype-kind elem) 'struct)
+ (eq? (ctype-kind elem) 'union))
+ (%parse-init-local-struct-list ps sm eoff elem))
+ (else
+ (%push-frame-elem-lval ps eoff elem)
+ (parse-expr-bp ps 4) (rval! ps)
+ (cg-cast (ps-cg ps) elem)
+ (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (expect-punct ps 'rbrace))))
+ (else
+ (%push-frame-elem-lval ps eoff elem)
+ (parse-expr-bp ps 4) (rval! ps)
+ (cg-cast (ps-cg ps) elem)
+ (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (lp (+ i 1))))))))
+
+(define (%parse-init-local-struct-list ps sm base-off ty)
+ (let ((fields (%init-struct-fields ty)))
+ (let lp ((rest fields))
+ (cond
+ ((at-punct? ps 'rbrace)
+ (advance ps)
+ ;; Zero any remaining fields.
+ (let zlp ((xs rest))
+ (cond
+ ((null? xs) #t)
+ (else
+ (let* ((f (car xs)) (fty (car (cdr f)))
+ (foff (car (cddr f))) (fsize (ctype-size fty)))
+ (let zb ((j 0))
+ (cond
+ ((>= j fsize) #t)
+ (else
+ (%push-frame-elem-lval ps (+ base-off foff j) %t-u8)
+ (cg-push-imm (ps-cg ps) %t-u8 0)
+ (cg-assign (ps-cg ps))
+ (cg-pop (ps-cg ps))
+ (zb (+ j 1)))))
+ (zlp (cdr xs)))))))
+ (else
+ (let* ((designated? (at-punct? ps 'dot))
+ (target
+ (cond
+ (designated?
+ (advance ps)
+ (let ((nt (advance ps)))
+ (let ((f (%find-field fields (tok-value nt))))
+ (cond
+ ((not f) (die (tok-loc nt) "init: no such field"
+ (tok-value nt))))
+ (expect-punct ps 'assign)
+ f)))
+ ((null? rest)
+ (die (tok-loc (peek ps)) "init: too many fields"))
+ (else (car rest))))
+ (fname (car target))
+ (fty (car (cdr target)))
+ (foff (car (cddr target)))
+ (eoff (+ base-off foff)))
+ (cond
+ ((at-punct? ps 'lbrace)
+ (advance ps)
+ (cond
+ ((eq? (ctype-kind fty) 'arr)
+ (%parse-init-local-array-list ps sm eoff fty))
+ ((or (eq? (ctype-kind fty) 'struct)
+ (eq? (ctype-kind fty) 'union))
+ (%parse-init-local-struct-list ps sm eoff fty))
+ (else
+ (%push-frame-elem-lval ps eoff fty)
+ (parse-expr-bp ps 4) (rval! ps)
+ (cg-cast (ps-cg ps) fty)
+ (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (expect-punct ps 'rbrace))))
+ (else
+ (%push-frame-elem-lval ps eoff fty)
+ (parse-expr-bp ps 4) (rval! ps)
+ (cg-cast (ps-cg ps) fty)
+ (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps))))
+ (cond ((at-punct? ps 'comma) (advance ps)))
+ (lp (cond
+ (designated?
+ (let drop ((xs fields))
+ (cond
+ ((null? xs) '())
+ ((equal? (car (car xs)) fname) (cdr xs))
+ (else (drop (cdr xs))))))
+ (else (cdr rest))))))))))
+
+;; Backward-compat alias retained while callers transition.
+(define (parse-init-list ps ty) (parse-init-global ps ty))
(define (parse-fn-body ps name dt)
(let* ((e (ctype-ext dt)) (ret (car e))
diff --git a/tests/cc-parse/49-init-scalar-global.c b/tests/cc-parse/49-init-scalar-global.c
@@ -0,0 +1,4 @@
+// tests/cc-parse/49-init-scalar-global.c — scalar global with constant
+// initializer. §E.1.
+int g = 42;
+int main(void) { return g; }
diff --git a/tests/cc-parse/49-init-scalar-global.expected-exit b/tests/cc-parse/49-init-scalar-global.expected-exit
@@ -0,0 +1 @@
+42