kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit bf1dc146e22105508838aeb887215cd591aeb6fb
parent 48ac896bdb02dfc17ed86c38aab168e654eecccc
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 10 May 2026 09:10:52 -0700

parse: Phase 4 — globals, storage classes, linkage, static locals

File-scope object declarations land with their full initializer/linkage
matrix: tentative defs (BSS), static (internal linkage), extern
(declare-without-define + redeclaration reuse), const → .rodata, struct
and array data emission. Static locals promote to internal-linkage
globals via mangled `<orig>.<counter>` names so storage persists across
calls. Initializers go through a shared byte-buffer emitter
(define_static_object) routed to .rodata / .data / .bss based on
qualifiers and content.

Diffstat:
Mdoc/parser-status.md | 45+++++++++++++++++++++++++++++++++++----------
Msrc/parse/parse.c | 343+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mtest/parse/CORPUS.md | 14+++++++-------
3 files changed, 378 insertions(+), 24 deletions(-)

diff --git a/doc/parser-status.md b/doc/parser-status.md @@ -147,19 +147,44 @@ cg-side bitfield path. --- -## Phase 4 — Globals, storage, linkage ⬜ +## Phase 4 — Globals, storage, linkage ✅ File-scope objects with their full initializer / linkage matrix. -- [ ] File-scope object declarations -- [ ] `static` global (internal linkage, `.data` / `.bss` placement) -- [ ] `extern` declaration and resolution -- [ ] Tentative definitions -- [ ] `const` global in `.rodata` -- [ ] Global struct / array data emission -- [ ] `static` local with non-zero init - -Unlocks: `6_7_02–04`, `6_9_03`, `6_9_07–09`. +- [x] File-scope object declarations +- [x] `static` global (internal linkage, `.data` / `.bss` placement) +- [x] `extern` declaration and resolution +- [x] Tentative definitions +- [x] `const` global in `.rodata` +- [x] Global struct / array data emission +- [x] `static` local with non-zero init + +Phase 4 also added: + - `DeclSpecs.quals` carries `Q_CONST` / `Q_VOLATILE` / etc. through the + decl-spec parser; `const` is no longer silently swallowed. The Phase 4 + use is `Q_CONST` → `.rodata` placement; the other bits are recorded for + the next phase (atomic, volatile semantics). + - `define_static_object` is the shared emitter for any static-storage + object: scalars / arrays / structs lower into a working `u8[size]` + buffer, get bucketed into `.rodata` (const + nonzero), `.data` (any + nonzero init), or `.bss` (zero or no init), and then call + `obj_symbol_define`. BSS uses `obj_section_ex(SSEM_NOBITS)` and + accumulates `bss_size` so multiple BSS objects pack correctly. + - Static locals are promoted to globally-visible symbols with internal + linkage. The linker name is mangled `<orig>.<counter>`; the local scope + binds the original name to `SEK_GLOBAL` so subsequent references go + through `cg_push_global`. Storage persists across calls (§6.2.4 ¶3). + - File-scope tentative defs reserve BSS at first sight. Multi-tentative + coalescing (e.g. `int g; int g; int g = 0;`) is a Phase 4 follow-up + when the corpus needs it; today's rows have at most one tentative def + per TU. + - `parse_external_decl` accepts array suffixes on the first declarator + (`int g[3]`) and supports `,`-separated init-declarators on a single + decl-spec line; `extern` followed by a defining declaration of the + same name reuses the `ObjSymId` so the linker sees one symbol. + +Unlocks (status as landed): `6_2_4_01` ★, `6_7_02–04` ★, `6_9_03` ★, +`6_9_07–09` ★. --- diff --git a/src/parse/parse.c b/src/parse/parse.c @@ -209,6 +209,10 @@ typedef struct Parser { * declarator; nested cases panic in apply_decl_suffix. */ u8 vla_pending; FrameSlot vla_pending_count_slot; + + /* Counter used to mint unique linker-visible names for static locals so + * that two functions can each have their own `static int s = ...`. */ + u32 static_local_counter; } Parser; /* ============================================================ @@ -390,6 +394,7 @@ typedef struct DeclSpecs { const Type* type; DeclStorage storage; u32 flags; /* DeclFlag */ + u16 quals; /* TypeQual bits seen in the decl-spec list */ } DeclSpecs; static int parse_decl_specs(Parser* p, DeclSpecs* out); @@ -475,6 +480,7 @@ static int parse_decl_specs(Parser* p, DeclSpecs* out) { out->type = NULL; out->storage = DS_AUTO; out->flags = DF_NONE; + out->quals = 0; loc = tok_loc(&p->cur); for (;;) { Tok t = p->cur; @@ -523,10 +529,17 @@ static int parse_decl_specs(Parser* p, DeclSpecs* out) { out->storage = DS_STATIC; advance(p); seen = 1; } else if (is_kw(p, &t, KW_EXTERN)) { out->storage = DS_EXTERN; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_CONST) || is_kw(p, &t, KW_VOLATILE) || - is_kw(p, &t, KW_RESTRICT) || is_kw(p, &t, KW_INLINE) || + } else if (is_kw(p, &t, KW_CONST)) { + out->quals |= Q_CONST; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_VOLATILE)) { + out->quals |= Q_VOLATILE; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_RESTRICT)) { + out->quals |= Q_RESTRICT; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_ATOMIC)) { + out->quals |= Q_ATOMIC; advance(p); seen = 1; + } else if (is_kw(p, &t, KW_INLINE) || is_kw(p, &t, KW_NORETURN) || is_kw(p, &t, KW_REGISTER) || - is_kw(p, &t, KW_AUTO) || is_kw(p, &t, KW_ATOMIC)) { + is_kw(p, &t, KW_AUTO)) { /* Recognized but currently no-op at this slice. */ advance(p); seen = 1; } else { @@ -2682,6 +2695,192 @@ static void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, } } +/* ============================================================ + * Static-storage initializers (file-scope objects + static locals) + * ============================================================ + * + * A static-storage object's initializer is a constant expression that the + * compiler must materialize as bytes in the object file. We allocate a + * working buffer of `abi_sizeof(ty)` bytes (zero-filled), recursively walk + * the (possibly braced) initializer, and write each scalar's encoding at its + * computed offset. The buffer is then handed to decl_define_object as a + * single INIT_BYTES item — obj_reserve hands back uninitialized chunk + * storage, so we always patch the entire range. + * + * v1 scope: integer scalars only (eval_const_int). Pointer relocations are + * deferred — none of the Phase 4 corpus rows need them. Aggregates are + * positional brace lists with brace-elision elsewhere; designators arrive + * with Phase 6. */ + +static void encode_int_le(u8* dst, u32 size, i64 v) { + for (u32 i = 0; i < size; ++i) { + dst[i] = (u8)((v >> (8u * i)) & 0xffu); + } +} + +static void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset, + const Type* ty) { + if (ty->kind == TY_ARRAY) { + const Type* elem = ty->arr.elem; + u32 esz = abi_sizeof(p->abi, elem); + u32 i = 0; + int had_brace = accept_punct(p, '{'); + if (!had_brace) { + perr(p, "expected '{' for static-storage array initializer"); + } + if (!is_punct(&p->cur, '}')) { + for (;;) { + if (i >= ty->arr.count) { + perr(p, "too many initializers for array"); + } + parse_static_init_at(p, buf, buflen, offset + i * esz, elem); + ++i; + if (!accept_punct(p, ',')) break; + if (is_punct(&p->cur, '}')) break; + } + } + expect_punct(p, '}', "'}' after array initializer"); + return; + } + if (ty->kind == TY_STRUCT) { + int had_brace = accept_punct(p, '{'); + const ABIRecordLayout* L = abi_record_layout(p->abi, ty); + u32 i = 0; + if (!had_brace) { + perr(p, "expected '{' for static-storage struct initializer"); + } + while (i < ty->rec.nfields && !is_punct(&p->cur, '}')) { + const Field* f = &ty->rec.fields[i]; + if (f->flags & FIELD_BITFIELD) { ++i; continue; } + parse_static_init_at(p, buf, buflen, offset + L->fields[i].offset, + f->type); + ++i; + if (!accept_punct(p, ',')) break; + } + expect_punct(p, '}', "'}' after struct initializer"); + return; + } + if (ty->kind == TY_UNION) { + perr(p, "static-storage union initializer not supported in Phase 4"); + } + /* Scalar / pointer: integer constant only. */ + { + int had_brace = accept_punct(p, '{'); + SrcLoc cloc = tok_loc(&p->cur); + i64 v = eval_const_int(p, cloc); + u32 sz = abi_sizeof(p->abi, ty); + if (offset + sz > buflen) perr(p, "initializer overflows object"); + encode_int_le(buf + offset, sz, v); + if (had_brace) { + accept_punct(p, ','); + expect_punct(p, '}', "'}' after scalar initializer"); + } + } +} + +/* Choose the section a defining object decl with `quals` and `storage` + * should land in: const → .rodata, otherwise let decl_define_object pick + * .data/.bss based on whether the init is all zero. Returns OBJ_SEC_NONE + * when the default is appropriate. */ +static ObjSecId pick_object_section(Parser* p, u16 quals, int has_nonzero) { + if ((quals & Q_CONST) != 0 && has_nonzero) { + Sym secname = pool_intern_cstr(p->pool, ".rodata"); + return obj_section(decl_obj(p->decls), secname, SEC_RODATA, SF_ALLOC, 1u); + } + return OBJ_SEC_NONE; +} + +/* Define a static-storage object: allocate the byte buffer, parse the + * (optional) initializer into it, route to .rodata / .data / .bss, and call + * obj_symbol_define. Used for both file-scope objects and static locals. */ +static void define_static_object(Parser* p, ObjSymId sym, const Type* var_ty, + u16 quals, int has_init, SrcLoc loc) { + ObjBuilder* ob = decl_obj(p->decls); + u32 size = abi_sizeof(p->abi, var_ty); + u32 align = abi_alignof(p->abi, var_ty); + u8* buf = NULL; + int has_nonzero = 0; + ObjSecId override_sec; + + if (has_init) { + buf = (u8*)arena_array(p->c->tu, u8, size ? size : 1u); + memset(buf, 0, size); + parse_static_init_at(p, buf, size, 0, var_ty); + for (u32 i = 0; i < size; ++i) { + if (buf[i]) { has_nonzero = 1; break; } + } + } + + override_sec = pick_object_section(p, quals, has_nonzero); + if (override_sec != OBJ_SEC_NONE) { + /* .rodata path: emit bytes directly here so we can pin the section. */ + u32 base = obj_pos(ob, override_sec); + obj_section_set_align(ob, override_sec, + align > 1u ? align : 1u); + { + u8* dst = obj_reserve(ob, override_sec, size); + if (dst && buf) memcpy(dst, buf, size); + } + obj_symbol_define(ob, sym, override_sec, base, size); + (void)loc; + return; + } + + if (!has_init || !has_nonzero) { + /* BSS path. SSEM_NOBITS keeps the bytes off-disk; the loader zeros + * the section image. The symbol's value is the offset within the + * accumulated bss_size — bumped after we record this object. */ + Sym sname = pool_intern_cstr(p->pool, ".bss"); + ObjSecId sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS, + SF_ALLOC | SF_WRITE, + align ? align : 1u, 0, OBJ_SEC_NONE, 0); + const Section* sinfo = obj_section_get(ob, sec); + u32 prev_size = sinfo ? sinfo->bss_size : 0u; + u32 a = align ? align : 1u; + u32 base = (prev_size + (a - 1u)) & ~(a - 1u); + obj_reserve_bss(ob, sec, base + size, a); + obj_symbol_define(ob, sym, sec, base, size); + return; + } + /* .data path. */ + { + Sym sname = pool_intern_cstr(p->pool, ".data"); + ObjSecId sec = obj_section(ob, sname, SEC_DATA, SF_ALLOC | SF_WRITE, + align ? align : 1u); + u32 base = obj_pos(ob, sec); + u8* dst = obj_reserve(ob, sec, size); + if (dst) memcpy(dst, buf, size); + obj_symbol_define(ob, sym, sec, base, size); + } +} + +/* Mint a unique linker name for a static local: `<orig>.<counter>`. The + * static_local_counter never resets across the TU, so two static locals in + * different functions never collide even if they share the source name. */ +static Sym mint_static_local_sym(Parser* p, Sym orig) { + size_t olen = 0; + const char* on = pool_str(p->pool, orig, &olen); + char buf[128]; + u32 wlen = 0; + u32 id = ++p->static_local_counter; + if (olen > 100) olen = 100; + for (size_t i = 0; i < olen && wlen < sizeof buf - 1; ++i) { + buf[wlen++] = on[i]; + } + if (wlen < sizeof buf - 1) buf[wlen++] = '.'; + { + char digits[12]; + int dn = 0; + if (id == 0) digits[dn++] = '0'; + while (id) { + digits[dn++] = (char)('0' + (id % 10)); + id /= 10; + } + while (dn && wlen < sizeof buf - 1) buf[wlen++] = digits[--dn]; + } + return pool_intern(p->pool, buf, wlen); +} + /* Parse a single init-declarator after the decl-specs have been consumed. * Grammar: declarator = (`*` qual*)* (IDENT | `(` declarator `)`) suffix* * init = `=` (assign_expr | brace_init) */ @@ -2689,6 +2888,60 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { SrcLoc loc; Sym name; const Type* var_ty = parse_declarator(p, specs->type, &name, &loc); + + /* Static-storage locals are promoted to a globally-visible symbol with + * internal linkage; the local scope binds to that symbol so subsequent + * uses load through cg_push_global. The variable's storage persists + * across calls (§6.2.4 ¶3). Initializer must be a constant expression. */ + if (specs->storage == DS_STATIC) { + Decl decl_in; + DeclId did; + ObjSymId sym; + SymEntry* e; + Sym lname = mint_static_local_sym(p, name); + int has_init; + memset(&decl_in, 0, sizeof decl_in); + decl_in.name = lname; + decl_in.type = var_ty; + decl_in.loc = loc; + decl_in.storage = DS_STATIC; + decl_in.linkage = DL_INTERNAL; + decl_in.visibility = SV_DEFAULT; + decl_in.flags = DF_STATIC_LOCAL; + did = decl_declare(p->decls, &decl_in); + sym = decl_obj_sym(p->decls, did); + e = scope_define(p, name, SEK_GLOBAL, var_ty); + e->v.sym = sym; + has_init = accept_punct(p, '='); + define_static_object(p, sym, var_ty, specs->quals, has_init, loc); + return; + } + + /* `extern` block-scope declaration: declares the name but does not define + * storage. The matching defining declaration must appear elsewhere (file + * scope here, or another TU). */ + if (specs->storage == DS_EXTERN) { + Decl decl_in; + DeclId did; + ObjSymId sym; + SymEntry* e; + if (accept_punct(p, '=')) { + perr(p, "block-scope extern with initializer not supported"); + } + memset(&decl_in, 0, sizeof decl_in); + decl_in.name = name; + decl_in.type = var_ty; + decl_in.loc = loc; + decl_in.storage = DS_EXTERN; + decl_in.linkage = DL_EXTERNAL; + decl_in.visibility = SV_DEFAULT; + did = decl_declare(p->decls, &decl_in); + sym = decl_obj_sym(p->decls, did); + e = scope_define(p, name, SEK_GLOBAL, var_ty); + e->v.sym = sym; + return; + } + /* VLA: the declarator type is `T[]` (incomplete array) with a pending * runtime count. Bind `name` as `T*` (the pointer the alloca returns) so * subscript/arithmetic on `a` lowers as on a pointer; `sizeof(a)` would @@ -3167,6 +3420,16 @@ static void parse_external_decl(Parser* p) { loc = tok_loc(&p->cur); advance(p); + /* Array suffix(es) after the name pin this as an object declaration — + * e.g. `static int g[3] = {...}`. Apply each suffix in order so the + * resulting type is the array-of-T we'll allocate storage for. */ + while (is_punct(&p->cur, '[')) { + DeclSuffix s; + if (!parse_decl_suffix(p, &s)) break; + if (s.kind != DS_ARRAY) break; + base_ty = apply_decl_suffix(p, base_ty, &s); + } + if (is_punct(&p->cur, '(')) { /* Function declaration or definition: build the type from the param * list, then dispatch on `{` (definition) vs `;` (prototype). */ @@ -3201,10 +3464,76 @@ static void parse_external_decl(Parser* p) { perr(p, "expected '{' or ';' after function declarator"); } - /* Global object declaration: `int g;` / `int g = 7;` / `int g = ..., h;` */ - /* v1 slice does not implement global initializers — defer until §6.7.9 - * cases need them. We just register the decl and reserve BSS. */ - perr(p, "global object declarations not supported in v1 slice"); + /* Global object declaration: `int g;`, `int g = 7;`, `extern int g;`, + * `static T g = ...;`, `const T g = ...;`. */ + for (;;) { + int has_init = is_punct(&p->cur, '='); + int is_pure_extern = (specs.storage == DS_EXTERN) && !has_init; + SymEntry* existing = scope_lookup(p, name); + ObjSymId sym = OBJ_SYM_NONE; + SymEntry* e = NULL; + + if (existing && existing->kind == SEK_GLOBAL) { + /* Redeclaration: reuse the prior ObjSymId so the linker sees one + * symbol. Compatible-types checks live in Phase 10. */ + sym = existing->v.sym; + e = existing; + } else { + Decl decl_in; + DeclId did; + memset(&decl_in, 0, sizeof decl_in); + decl_in.name = name; + decl_in.type = base_ty; + decl_in.loc = loc; + if (specs.storage == DS_STATIC) { + decl_in.storage = DS_STATIC; + decl_in.linkage = DL_INTERNAL; + } else { + /* File-scope objects without an explicit storage class still have + * static storage duration and external linkage (§6.2.2 ¶5, + * §6.2.4 ¶3). Storing DS_EXTERN drives decl_declare to mint an + * obj_sym; DS_AUTO is reserved for block-scope autos. */ + decl_in.storage = DS_EXTERN; + decl_in.linkage = DL_EXTERNAL; + } + decl_in.visibility = SV_DEFAULT; + did = decl_declare(p->decls, &decl_in); + sym = decl_obj_sym(p->decls, did); + e = scope_define(p, name, SEK_GLOBAL, base_ty); + e->v.sym = sym; + } + + if (has_init) { + advance(p); /* '=' */ + define_static_object(p, sym, base_ty, specs.quals, /*has_init=*/1, + loc); + } else if (!is_pure_extern) { + /* Tentative def: emit a BSS reservation now. End-of-TU coalescing of + * multiple tentative defs into one is a Phase 4 follow-up; the + * Phase 4 corpus only has a single tentative def per TU. */ + define_static_object(p, sym, base_ty, specs.quals, /*has_init=*/0, + loc); + } + + (void)e; + + if (!accept_punct(p, ',')) break; + /* Next declarator: parse pointer prefix + IDENT, then loop. */ + base_ty = parse_pointer_layer(p, specs.type); + if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected declarator after ','"); + } + name = p->cur.v.ident; + loc = tok_loc(&p->cur); + advance(p); + /* Optional array suffix on a global declarator (e.g. `int g[3]`). */ + while (is_punct(&p->cur, '[')) { + DeclSuffix s; + if (!parse_decl_suffix(p, &s)) break; + base_ty = apply_decl_suffix(p, base_ty, &s); + } + } + expect_punct(p, ';', "';' after global declaration"); } static void parse_translation_unit(Parser* p) { diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md @@ -81,7 +81,7 @@ natural home elsewhere. |---|---|---|---| | `6_2_3_01_tag_ord_namespace` | · | `struct s { int v; }; int s = 42; struct s t = {0}; return s + t.v;` | 42 | | `6_2_3_02_label_namespace` | · | `int s = 0; goto s; s = 99; s: return 42;` | 42 | -| `6_2_4_01_static_keeps_value` | · | helper `int next(){static int n=40; return ++n;}`; `next(); return next();` | 42 | +| `6_2_4_01_static_keeps_value` | ★ | helper `int next(){static int n=40; return ++n;}`; `next(); return next();` | 42 | | `6_2_5_01_void_func_no_value` | ★ | helper `void f(int *p){*p=42;} int x; f(&x); return x;` | 42 | ## §6.3 Conversions @@ -159,8 +159,8 @@ here for completeness once they're real cases. |---|---|---|---| | `6_7_01_typedef` | · | `typedef int I; I x = 42; return x;` | 42 | | `6_7_02_static_local` | ★ | `static int s = 42; return s;` | 42 | -| `6_7_03_static_global` | · | `static int g = 42; int test_main(void){return g;}` | 42 | -| `6_7_04_extern_resolved` | · | `extern int g; int g = 42; return g;` | 42 | +| `6_7_03_static_global` | ★ | `static int g = 42; int test_main(void){return g;}` | 42 | +| `6_7_04_extern_resolved` | ★ | `extern int g; int g = 42; return g;` | 42 | | `6_7_05_const_qualifier` | ★ | `const int c = 42; return c;` | 42 | | `6_7_06_struct_basic` | ★ | `struct S { int a, b; } s = {10, 32}; return s.a + s.b;` | 42 | | `6_7_07_union_basic` | ★ | `union U { int i; char c[4]; } u; u.i = 42; return u.i;` | 42 | @@ -306,13 +306,13 @@ cover compound typedef targets. |---|---|---|---| | `6_9_01_two_functions` | ★ | helper + caller in one TU | 42 | | `6_9_02_recursive_function` | ★ | `factorial(5)` | 120 | -| `6_9_03_tentative_def` | · | file-scope `int g;` (tentative) + use | 0 | +| `6_9_03_tentative_def` | ★ | file-scope `int g;` (tentative) + use | 0 | | `6_9_04_static_func` | ★ | `static int helper(...)` + caller | 42 | | `6_9_05_proto_then_def` | ★ | forward declaration before body | 42 | | `6_9_06_variadic_func` | · | `sum(int n, ...)` over `va_arg`; `sum(2,20,22)` (paired with builtin_03) | 42 | -| `6_9_07_global_const` | · | full TU: `const int g = 42; int test_main(void){return g;}` | 42 | -| `6_9_08_global_struct_init` | · | full TU: `struct S{int v;} g={42}; int test_main(void){return g.v;}` | 42 | -| `6_9_09_static_data_array` | · | full TU: `static int g[3] = {0, 0, 42}; int test_main(void){return g[2];}` | 42 | +| `6_9_07_global_const` | ★ | full TU: `const int g = 42; int test_main(void){return g;}` | 42 | +| `6_9_08_global_struct_init` | ★ | full TU: `struct S{int v;} g={42}; int test_main(void){return g.v;}` | 42 | +| `6_9_09_static_data_array` | ★ | full TU: `static int g[3] = {0, 0, 42}; int test_main(void){return g[2];}` | 42 | | `6_9_10_kr_function_def` | (deferred) | K&R-style definitions are C90 carryover; revisit if needed | — | ## Builtins