boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 86788cffd33e8577180c9662bc470f944352b95c
parent 0045e54d16cd90c3eae0448e3760c7aaeec8fc17
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 26 Apr 2026 22:28:33 -0700

cc/parse: real init-list parser; scalar global init (§E.1)

Replace parse-init-list stub (was just brace-balancing → #f) with a
real initializer parser that builds the structured init list
cg-emit-global now consumes:

  parse-init-global ps ty
    Reads the init expression after `=` for a static-storage var; emits
    a list of pieces (bytevector + label-ref pairs) suitable for cg.

Hooked into handle-decl's file-scope branch via the new helper. Local
and aggregate paths land with §E.7 / §E.8 / §I.

Also routes file-scope and block-scope decls through a new init helper
%const-init-piece for scalar forms: INT (with unary +/-), enum-const,
&IDENT (label-ref), bare fn name (label-ref), STR (interned label-ref).

Adds %int->le-bv util to encode a fixnum as N-byte little-endian.

Diffstat:
Mcc/parse.scm | 482+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Atests/cc-parse/49-init-scalar-global.c | 4++++
Atests/cc-parse/49-init-scalar-global.expected-exit | 1+
3 files changed, 467 insertions(+), 20 deletions(-)

diff --git a/cc/parse.scm b/cc/parse.scm @@ -366,6 +366,20 @@ (scope-bind! ps n (%sym n 'fn (or sto 'extern) ty (bytevector-append "cc__" n)))) + ;; §I: block-scope `static` routes to a global with a name mangled + ;; on the enclosing function so two functions can each have their + ;; own `static int n;` without colliding. + ((and (eq? sto 'static) (ps-fn-ctx ps)) + (let* ((fname (fn-ctx-name (ps-fn-ctx ps))) + (mangled (bytevector-append fname "__" n)) + (sm (%sym n 'var 'static ty + (bytevector-append "cc__" mangled)))) + (scope-bind! ps n sm) + (cond + ((at-punct? ps 'assign) + (advance ps) + (cg-emit-global (ps-cg ps) sm (parse-init-global ps ty))) + (else (cg-emit-global (ps-cg ps) sm #f))))) (else (cond ((not (ps-fn-ctx ps)) @@ -376,7 +390,7 @@ ((at-punct? ps 'assign) (advance ps) (cg-emit-global (ps-cg ps) sm - (parse-init-list ps ty))) + (parse-init-global ps ty))) ((eq? sto 'extern) (cg-emit-extern (ps-cg ps) sm)) (else (cg-emit-global (ps-cg ps) sm #f))))) (else @@ -388,30 +402,458 @@ (cond ((at-punct? ps 'assign) (advance ps) - (cg-push-sym (ps-cg ps) sm) - (parse-expr-bp ps 4) (rval! ps) - (cg-cast (ps-cg ps) ty) - (cg-assign (ps-cg ps)) - (cg-pop (ps-cg ps))) + (cond + ;; Aggregate locals get the per-element store treatment. + ((or (at-punct? ps 'lbrace) + (and (eq? (ctype-kind ty) 'arr) + (eq? (tok-kind (peek ps)) 'STR))) + (parse-init-local-aggregate ps sm ty)) + (else + (cg-push-sym (ps-cg ps) sm) + (parse-expr-bp ps 4) (rval! ps) + (cg-cast (ps-cg ps) ty) + (cg-assign (ps-cg ps)) + (cg-pop (ps-cg ps))))) (else #t)))))))) -(define (parse-init-list ps ty) +;; ==================================================================== +;; Initializers (CC.md §Variable initializers, §E of CC-PUNCHLIST). +;; +;; parse-init-global ps ty +;; Reads the initializer following `=` for a file-scope or block-scope +;; static var of static-storage type `ty` and returns a list of +;; pieces suitable for cg-emit-global. See cg.scm §cg-emit-global for +;; the piece grammar. +;; +;; parse-init-local ps sm ty +;; Reads the initializer for an auto-storage variable bound to slot +;; sym `sm` and emits per-element store cg ops. Returns unspecified. +;; ==================================================================== + +(define (%int->le-bv n nbytes) + ;; N-byte little-endian encoding of integer n into a fresh bv. Bytes + ;; >= sign-bit are filled by repeated >>8 (works for both signed and + ;; unsigned because we only keep the low N bytes). + (let ((out (make-bytevector nbytes 0))) + (let loop ((i 0) (v n)) + (cond + ((= i nbytes) out) + (else + (bytevector-u8-set! out i (bit-and v 255)) + (loop (+ i 1) (arithmetic-shift v -8))))))) + +(define (%const-init-piece ps ty) + ;; Parse a non-brace initializer expression for scalar type `ty` and + ;; return a single piece. Recognised forms: + ;; - INT (with optional unary +/-) -> N-byte LE bv + ;; - enum-const IDENT -> N-byte LE bv + ;; - &IDENT (address of a global var/fn) -> (label-ref . cc__name) + ;; - IDENT (function name; decays to fn ptr) -> (label-ref . cc__name) + ;; - STR (only for char* targets) -> (label-ref . string-pool-label) + (let ((t (peek ps))) + (cond + ;; Address initializer: &ident -> label-ref + ((and (eq? (tok-kind t) 'PUNCT) (eq? (tok-value t) 'amp)) + (advance ps) + (let ((it (peek ps))) + (cond + ((eq? (tok-kind it) 'IDENT) + (advance ps) + (let ((sm (scope-lookup ps (tok-value it)))) + (cond + ((not sm) (die (tok-loc it) "init: undecl" (tok-value it))) + ((or (eq? (sym-kind sm) 'fn) + (and (eq? (sym-kind sm) 'var) + (or (eq? (sym-storage sm) 'static) + (eq? (sym-storage sm) 'extern)))) + (cons 'label-ref (sym-slot sm))) + (else + (die (tok-loc it) "init: &x must reference a global" + (tok-value it)))))) + (else (die (tok-loc it) "init: &?" (tok-value it)))))) + ;; Function name as a fn-ptr initializer. + ((and (eq? (tok-kind t) 'IDENT) + (let ((sm (scope-lookup ps (tok-value t)))) + (and sm (eq? (sym-kind sm) 'fn)))) + (advance ps) + (let ((sm (scope-lookup ps (tok-value t)))) + (cons 'label-ref (sym-slot sm)))) + ;; Plain string literal as char* initializer. + ((eq? (tok-kind t) 'STR) + (advance ps) + (let ((lbl (cg-intern-string (ps-cg ps) (tok-value t)))) + (cons 'label-ref lbl))) + ;; Otherwise it's a const integer. + (else + (let ((v (parse-const-int ps))) + (%int->le-bv v (max (ctype-size ty) 1))))))) + +(define (%init-array-elem-type ty) + (cond ((eq? (ctype-kind ty) 'arr) (car (ctype-ext ty))) + (else (die #f "init: not an array" ty)))) + +(define (%init-array-decl-len ty) + ;; Declared array length (-1 = inferred). + (cond ((eq? (ctype-kind ty) 'arr) (cdr (ctype-ext ty))) (else -1))) + +(define (%init-fix-array-size! ty count) + ;; Patch an inferred-length array to `count`. + (let ((elem (car (ctype-ext ty)))) + (ctype-ext-set! ty (cons elem count)) + (ctype-size-set! ty (* count (ctype-size elem))))) + +(define (%init-struct-fields ty) + ;; Return ((name-bv ctype offset) ...) for a struct/union ctype. + (let ((ext (ctype-ext ty))) + (cond ((and (pair? ext) (pair? (cdr ext))) (car (cddr ext))) + (else (die #f "init: not a struct" ty))))) + +(define (%find-field fields nm) + (cond ((null? fields) #f) + ((equal? (car (car fields)) nm) (car fields)) + (else (%find-field (cdr fields) nm)))) + +(define (%pad-piece nbytes) + (make-bytevector nbytes 0)) + +;; ----- Global initializers --------------------------------------------- +(define (parse-init-global ps ty) (cond + ;; String literal initializer for char[] + ((and (eq? (ctype-kind ty) 'arr) + (eq? (tok-kind (peek ps)) 'STR) + (let ((et (car (ctype-ext ty)))) + (or (eq? et %t-i8) (eq? et %t-u8)))) + (let* ((t (advance ps)) + (s (tok-value t)) + (slen (bytevector-length s)) + (decl (cdr (ctype-ext ty))) + (final (cond ((< decl 0) (+ slen 1)) (else decl)))) + (cond ((< decl 0) (%init-fix-array-size! ty final))) + (let ((bv (make-bytevector final 0))) + (let loop ((i 0)) + (cond + ((or (= i slen) (>= i final)) (list bv)) + (else + (bytevector-u8-set! bv i (bytevector-u8-ref s i)) + (loop (+ i 1)))))))) + ;; Brace-form ((at-punct? ps 'lbrace) (advance ps) - (let lp ((d 1)) - (cond - ((<= d 0) #f) - (else - (let ((t (advance ps))) - (cond - ((eq? (tok-kind t) 'EOF) (die (tok-loc t) "EOF init")) - ((and (eq? (tok-kind t) 'PUNCT) - (eq? (tok-value t) 'lbrace)) (lp (+ d 1))) - ((and (eq? (tok-kind t) 'PUNCT) - (eq? (tok-value t) 'rbrace)) (lp (- d 1))) - (else (lp d)))))))) - (else (parse-const-int ps) #f))) + (cond + ((eq? (ctype-kind ty) 'arr) + (%parse-init-array-list ps ty)) + ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union)) + (%parse-init-struct-list ps ty)) + (else + ;; Brace-wrapped scalar: { expr } + (let ((piece (%const-init-piece ps ty))) + (cond ((at-punct? ps 'comma) (advance ps))) + (expect-punct ps 'rbrace) + (list piece))))) + ;; Bare scalar initializer + (else (list (%const-init-piece ps ty))))) + +(define (%parse-init-array-list ps ty) + ;; Element-list array initializer; assumes `{` already consumed. + (let* ((elem (%init-array-elem-type ty)) + (esize (ctype-size elem)) + (decl (%init-array-decl-len ty))) + (let lp ((acc '()) (count 0)) + (cond + ((at-punct? ps 'rbrace) + (advance ps) + (cond ((< decl 0) (%init-fix-array-size! ty count))) + ;; Pad to declared length if longer than count. + (let* ((final (cond ((< decl 0) count) (else decl))) + (pad (- final count))) + (cond + ((> pad 0) + (reverse (cons (%pad-piece (* pad esize)) acc))) + (else (reverse acc))))) + (else + (let ((piece + (cond + ((at-punct? ps 'lbrace) + ;; Nested aggregate: brace-flatten via recursion. + (advance ps) + ;; element is itself struct/array + (cond + ((eq? (ctype-kind elem) 'arr) + (%parse-init-array-list ps elem)) + ((or (eq? (ctype-kind elem) 'struct) + (eq? (ctype-kind elem) 'union)) + (%parse-init-struct-list ps elem)) + (else + (let ((p (%const-init-piece ps elem))) + (cond ((at-punct? ps 'comma) (advance ps))) + (expect-punct ps 'rbrace) + (list p))))) + (else + (list (%const-init-piece ps elem)))))) + (cond ((at-punct? ps 'comma) (advance ps))) + (lp (append (reverse piece) acc) (+ count 1)))))))) + +(define (%parse-init-struct-list ps ty) + ;; Struct/union initializer; assumes `{` already consumed. + ;; Supports positional and `.field = expr` forms. + (let* ((fields (%init-struct-fields ty)) + (size (ctype-size ty))) + (let lp ((acc '()) (filled 0) (rest fields)) + (cond + ((at-punct? ps 'rbrace) + (advance ps) + (cond + ((< filled size) + (reverse (cons (%pad-piece (- size filled)) acc))) + (else (reverse acc)))) + (else + (let* ((designated? (at-punct? ps 'dot)) + (target + (cond + (designated? + (advance ps) + (let ((nt (advance ps))) + (cond + ((not (eq? (tok-kind nt) 'IDENT)) + (die (tok-loc nt) "init: .field expects ident"))) + (let ((f (%find-field fields (tok-value nt)))) + (cond + ((not f) (die (tok-loc nt) "init: no such field" + (tok-value nt)))) + (expect-punct ps 'assign) + f))) + ((null? rest) + (die (tok-loc (peek ps)) "init: too many fields")) + (else (car rest)))) + (fname (car target)) + (fty (car (cdr target))) + (foff (car (cddr target))) + (fsize (ctype-size fty)) + ;; Pad from `filled` to `foff` if needed. + (pad-bytes (- foff filled)) + (piece-list + (cond + ((at-punct? ps 'lbrace) + (advance ps) + (cond + ((eq? (ctype-kind fty) 'arr) + (%parse-init-array-list ps fty)) + ((or (eq? (ctype-kind fty) 'struct) + (eq? (ctype-kind fty) 'union)) + (%parse-init-struct-list ps fty)) + (else + (let ((p (%const-init-piece ps fty))) + (cond ((at-punct? ps 'comma) (advance ps))) + (expect-punct ps 'rbrace) + (list p))))) + (else + (list (%const-init-piece ps fty))))) + (acc2 (cond ((> pad-bytes 0) + (cons (%pad-piece pad-bytes) acc)) + (else acc))) + (acc3 (append (reverse piece-list) acc2))) + (cond ((at-punct? ps 'comma) (advance ps))) + (lp acc3 (+ foff fsize) + (cond + (designated? + ;; designated init: drop fields up to and including target + (let drop ((xs fields)) + (cond + ((null? xs) '()) + ((equal? (car (car xs)) fname) (cdr xs)) + (else (drop (cdr xs)))))) + (else (cdr rest)))))))))) + +;; ----- Local aggregate initializers ------------------------------------ +;; Emits per-element store sequences via cg ops into the slot of `sm` +;; (a 'var sym whose slot is the frame offset). Assumes the assignment +;; `=` has already been consumed. +(define (parse-init-local-aggregate ps sm ty) + (cond + ;; Local char[] = "string" — fill from string bytes. + ((and (eq? (ctype-kind ty) 'arr) + (eq? (tok-kind (peek ps)) 'STR) + (let ((et (car (ctype-ext ty)))) + (or (eq? et %t-i8) (eq? et %t-u8)))) + (let* ((t (advance ps)) + (s (tok-value t)) + (slen (bytevector-length s)) + (decl (cdr (ctype-ext ty))) + (final (cond ((< decl 0) (+ slen 1)) (else decl)))) + (cond ((< decl 0) (%init-fix-array-size! ty final))) + ;; Emit byte stores for each char in s, plus NUL for the + ;; trailing slot if final > slen. + (let loop ((i 0)) + (cond + ((>= i final) #t) + (else + (let ((b (cond ((< i slen) (bytevector-u8-ref s i)) + (else 0))) + (off (+ (sym-slot sm) i))) + (%push-frame-elem-lval ps off %t-u8) + (cg-push-imm (ps-cg ps) %t-u8 b) + (cg-assign (ps-cg ps)) + (cg-pop (ps-cg ps)) + (loop (+ i 1)))))))) + ((at-punct? ps 'lbrace) + (advance ps) + (cond + ((eq? (ctype-kind ty) 'arr) + (%parse-init-local-array-list ps sm 0 ty)) + ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union)) + (%parse-init-local-struct-list ps sm 0 ty)) + (else (die #f "init local: brace on scalar?")))) + (else (die (tok-loc (peek ps)) "init local aggregate?")))) + +(define (%emit-local-elem-store ps sm rel-off elem-ty piece-or-thunk) + ;; Emit a single scalar store at slot[base + rel-off]. piece is the + ;; raw initializer expression — but here we want to actually evaluate + ;; it via parse-expr to allow non-const expressions for autos. + ;; Caller handles this; this helper handles the store-into-frame ops. + 0) + +(define (%push-frame-elem-lval ps base-off ty) + (cg-push (ps-cg ps) (%opnd 'frame ty base-off #t))) + +(define (%parse-init-local-array-list ps sm base-off ty) + (let* ((elem (%init-array-elem-type ty)) + (esize (ctype-size elem)) + (decl (%init-array-decl-len ty))) + (let lp ((i 0)) + (cond + ((at-punct? ps 'rbrace) + (advance ps) + (cond ((< decl 0) (%init-fix-array-size! ty i))) + ;; Zero out remaining slots if any (declared length > i). + (let ((final (cond ((< decl 0) i) (else decl)))) + (let zlp ((k i)) + (cond + ((>= k final) #t) + (else + (let ((off (+ base-off (* k esize)))) + (cond + ((or (eq? (ctype-kind elem) 'arr) + (eq? (ctype-kind elem) 'struct) + (eq? (ctype-kind elem) 'union)) + ;; Zero each byte in this aggregate slot. + (let zb ((j 0)) + (cond + ((>= j esize) #t) + (else + (%push-frame-elem-lval ps (+ off j) %t-u8) + (cg-push-imm (ps-cg ps) %t-u8 0) + (cg-assign (ps-cg ps)) + (cg-pop (ps-cg ps)) + (zb (+ j 1)))))) + (else + (%push-frame-elem-lval ps off elem) + (cg-push-imm (ps-cg ps) elem 0) + (cg-assign (ps-cg ps)) + (cg-pop (ps-cg ps))))) + (zlp (+ k 1))))))) + (else + (let ((eoff (+ base-off (* i esize)))) + (cond + ((at-punct? ps 'lbrace) + (advance ps) + (cond + ((eq? (ctype-kind elem) 'arr) + (%parse-init-local-array-list ps sm eoff elem)) + ((or (eq? (ctype-kind elem) 'struct) + (eq? (ctype-kind elem) 'union)) + (%parse-init-local-struct-list ps sm eoff elem)) + (else + (%push-frame-elem-lval ps eoff elem) + (parse-expr-bp ps 4) (rval! ps) + (cg-cast (ps-cg ps) elem) + (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)) + (cond ((at-punct? ps 'comma) (advance ps))) + (expect-punct ps 'rbrace)))) + (else + (%push-frame-elem-lval ps eoff elem) + (parse-expr-bp ps 4) (rval! ps) + (cg-cast (ps-cg ps) elem) + (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)))) + (cond ((at-punct? ps 'comma) (advance ps))) + (lp (+ i 1)))))))) + +(define (%parse-init-local-struct-list ps sm base-off ty) + (let ((fields (%init-struct-fields ty))) + (let lp ((rest fields)) + (cond + ((at-punct? ps 'rbrace) + (advance ps) + ;; Zero any remaining fields. + (let zlp ((xs rest)) + (cond + ((null? xs) #t) + (else + (let* ((f (car xs)) (fty (car (cdr f))) + (foff (car (cddr f))) (fsize (ctype-size fty))) + (let zb ((j 0)) + (cond + ((>= j fsize) #t) + (else + (%push-frame-elem-lval ps (+ base-off foff j) %t-u8) + (cg-push-imm (ps-cg ps) %t-u8 0) + (cg-assign (ps-cg ps)) + (cg-pop (ps-cg ps)) + (zb (+ j 1))))) + (zlp (cdr xs))))))) + (else + (let* ((designated? (at-punct? ps 'dot)) + (target + (cond + (designated? + (advance ps) + (let ((nt (advance ps))) + (let ((f (%find-field fields (tok-value nt)))) + (cond + ((not f) (die (tok-loc nt) "init: no such field" + (tok-value nt)))) + (expect-punct ps 'assign) + f))) + ((null? rest) + (die (tok-loc (peek ps)) "init: too many fields")) + (else (car rest)))) + (fname (car target)) + (fty (car (cdr target))) + (foff (car (cddr target))) + (eoff (+ base-off foff))) + (cond + ((at-punct? ps 'lbrace) + (advance ps) + (cond + ((eq? (ctype-kind fty) 'arr) + (%parse-init-local-array-list ps sm eoff fty)) + ((or (eq? (ctype-kind fty) 'struct) + (eq? (ctype-kind fty) 'union)) + (%parse-init-local-struct-list ps sm eoff fty)) + (else + (%push-frame-elem-lval ps eoff fty) + (parse-expr-bp ps 4) (rval! ps) + (cg-cast (ps-cg ps) fty) + (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)) + (cond ((at-punct? ps 'comma) (advance ps))) + (expect-punct ps 'rbrace)))) + (else + (%push-frame-elem-lval ps eoff fty) + (parse-expr-bp ps 4) (rval! ps) + (cg-cast (ps-cg ps) fty) + (cg-assign (ps-cg ps)) (cg-pop (ps-cg ps)))) + (cond ((at-punct? ps 'comma) (advance ps))) + (lp (cond + (designated? + (let drop ((xs fields)) + (cond + ((null? xs) '()) + ((equal? (car (car xs)) fname) (cdr xs)) + (else (drop (cdr xs)))))) + (else (cdr rest)))))))))) + +;; Backward-compat alias retained while callers transition. +(define (parse-init-list ps ty) (parse-init-global ps ty)) (define (parse-fn-body ps name dt) (let* ((e (ctype-ext dt)) (ret (car e)) diff --git a/tests/cc-parse/49-init-scalar-global.c b/tests/cc-parse/49-init-scalar-global.c @@ -0,0 +1,4 @@ +// tests/cc-parse/49-init-scalar-global.c — scalar global with constant +// initializer. §E.1. +int g = 42; +int main(void) { return g; } diff --git a/tests/cc-parse/49-init-scalar-global.expected-exit b/tests/cc-parse/49-init-scalar-global.expected-exit @@ -0,0 +1 @@ +42