commit 4f4f5f3154004c2d4041df033a93a896d88d8228
parent df346d81fc94705392812053c1a029bbf7e0ed62
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 10 May 2026 10:52:20 -0700
parse: Phase 6 — initializers (designators, string init, compound literals)
Lands the §6.7.9 initializer surface on top of Phase 4's static-storage
machinery and Phase 3's aggregates. Adds a token replay buffer so we
can two-pass scan a brace list to size `T[]` declarators / compound
literals before slot allocation; designator chains (`[i]`, `.field`,
nested `[i][j]`) navigate from any aggregate type and reset the cursor
in both the local and static paths, with locals zero-filling gaps.
String literals initialize char-arrays at any nesting level. Compound
literals lower to a hidden `cg_local` slot whose lvalue is pushed for
postfix/cast machinery to decay. `parse_type_name` now accepts a full
abstract declarator so `(int[])` and `(int (*)[3])` parse.
Flips `6_5_29_compound_literal` and `6_7_9_02..10` from · to ★.
Diffstat:
3 files changed, 538 insertions(+), 53 deletions(-)
diff --git a/doc/parser-status.md b/doc/parser-status.md
@@ -228,20 +228,47 @@ because it's the §6.8.6 spec slot).
---
-## Phase 6 — Initializers ⬜
+## Phase 6 — Initializers ✅
Full §6.7.9 surface. Requires aggregates (Phase 3) and globals
(Phase 4) to be fully useful.
-- [ ] Brace initializer for arrays
-- [ ] Brace initializer for structs
-- [ ] Designated initializers (`[i] = ...`, `.field = ...`)
-- [ ] Nested designators (`[i][j] = ...`)
-- [ ] Partial init with zero-fill
-- [ ] String literal init for `char[]`
-- [ ] Compound literals (`(int[]){1, 2}`)
-
-Unlocks: `6_5_29`, `6_7_9_02–10`, `6_9_08–09`.
+- [x] Brace initializer for arrays
+- [x] Brace initializer for structs
+- [x] Designated initializers (`[i] = ...`, `.field = ...`)
+- [x] Nested designators (`[i][j] = ...`)
+- [x] Partial init with zero-fill
+- [x] String literal init for `char[]`
+- [x] Compound literals (`(int[]){1, 2}`)
+
+Phase 6 also added:
+ - A token replay buffer on `Parser` so we can two-pass scan a braced
+ initializer: record tokens through the matching `}`, count top-level
+ items, then `replay_rewind` to re-parse. Used by
+ `complete_incomplete_array` to size `T[]` declarators / compound
+ literals (`(int[]){...}` and `char s[] = "hi"`) before the slot is
+ allocated.
+ - `parse_designator_chain` walks `[const]` / `.ident` chains starting
+ from any aggregate type, returning the leaf sub-object's offset/type
+ plus the top-level cursor index for the parent loop. Both
+ `init_at` (local) and `parse_static_init_at` (file scope / static
+ locals) consume designators; locals zero-fill gaps between the
+ cursor and a forward designator, while statics rely on the
+ pre-zeroed buffer.
+ - String-literal element initialization for char arrays at any nesting
+ level: `init_string_at` / `parse_static_string_at`. With or without
+ surrounding braces; truncation rules match §6.7.9 ¶14.
+ - Compound literals (`(T){...}`) lower as a hidden `cg_local` slot in
+ `parse_unary` immediately after the type-name; the slot's lvalue is
+ pushed and outer postfix/cast machinery handles array→pointer decay.
+ - `parse_type_name` now accepts a full abstract declarator (pointer
+ prefix + array/function suffixes) so casts like `(int (*)[3])` and
+ compound literals like `(int[]){...}` parse cleanly. The Phase 1
+ docstring noting "abstract declarators are pointer-prefix only" is
+ obsolete.
+
+Unlocks (status as landed): `6_5_29` ★, `6_7_9_02–10` ★. `6_9_08–09`
+were already ★ from Phase 4 (no compound-literal dependency).
---
diff --git a/src/parse/parse.c b/src/parse/parse.c
@@ -252,6 +252,26 @@ typedef struct Parser {
/* Counter used to mint unique linker-visible names for static locals so
* that two functions can each have their own `static int s = ...`. */
u32 static_local_counter;
+
+ /* Counter used to mint anonymous local names for compound literals
+ * (`(T){...}`). Each compound literal becomes a hidden frame slot whose
+ * name is reserved here purely for diagnostics; the symbol is never
+ * visible to user code. */
+ u32 compound_literal_counter;
+
+ /* Replay buffer for two-pass scans of brace-enclosed initializers.
+ * Used when a compound literal or initializer needs to size an
+ * incomplete array (`(int[]){10, 32}`): we record tokens through the
+ * matching `}`, count items, then rewind to re-parse. While
+ * `replay_active`, advance()/peek1() pull from `replay` instead of pp;
+ * once exhausted, they fall back to the regular pp source so the
+ * post-brace token is fetched fresh. The buffer lives in arena
+ * storage. */
+ Tok* replay;
+ u32 replay_cap;
+ u32 replay_len;
+ u32 replay_pos;
+ u8 replay_active;
} Parser;
/* ============================================================
@@ -272,6 +292,16 @@ static _Noreturn void perr(Parser* p, const char* fmt, ...) {
* ============================================================ */
static void advance(Parser* p) {
+ if (p->replay_active) {
+ if (p->replay_pos < p->replay_len) {
+ p->cur = p->replay[p->replay_pos++];
+ return;
+ }
+ /* Replay exhausted; fall back to the underlying source. The pp stream
+ * sits exactly past the recorded `}` (record_braced_block left it
+ * there), so fetching the next token resumes parsing after the brace. */
+ p->replay_active = 0;
+ }
if (p->has_next) {
p->cur = p->next;
p->has_next = 0;
@@ -282,6 +312,9 @@ static void advance(Parser* p) {
/* One-token lookahead beyond p->cur. Lazily populated. */
static Tok peek1(Parser* p) {
+ if (p->replay_active && p->replay_pos < p->replay_len) {
+ return p->replay[p->replay_pos];
+ }
if (!p->has_next) {
p->next = pp_next(p->pp);
p->has_next = 1;
@@ -330,6 +363,76 @@ static void expect_punct(Parser* p, u32 punct, const char* what) {
}
}
+/* Record tokens from the current `{` through the matching `}` into the
+ * parser's replay buffer. Pre: p->cur is `{`. Post: p->cur is the closing
+ * `}` (not yet advanced past); replay buffer holds [`{`, ..., `}`]. The
+ * caller must subsequently call replay_rewind() to re-scan, or simply
+ * advance() to skip past the brace. */
+static void record_braced_block(Parser* p) {
+ int depth = 0;
+ if (!is_punct(&p->cur, '{')) perr(p, "internal: record on non-'{'");
+ p->replay_len = 0;
+ for (;;) {
+ if (p->replay_len == p->replay_cap) {
+ u32 new_cap = p->replay_cap ? p->replay_cap * 2 : 32;
+ Tok* nv = arena_array(p->c->tu, Tok, new_cap);
+ if (!nv) perr(p, "out of memory in record_braced_block");
+ if (p->replay && p->replay_len) {
+ memcpy(nv, p->replay, p->replay_len * sizeof(Tok));
+ }
+ p->replay = nv;
+ p->replay_cap = new_cap;
+ }
+ p->replay[p->replay_len++] = p->cur;
+ if (is_punct(&p->cur, '{')) {
+ ++depth;
+ } else if (is_punct(&p->cur, '}')) {
+ --depth;
+ if (depth == 0) break;
+ } else if (p->cur.kind == TOK_EOF) {
+ perr(p, "unexpected end of file in initializer");
+ }
+ advance(p);
+ }
+ /* cur is the recorded closing `}`. Caller decides what to do next. */
+}
+
+/* After record_braced_block, rewind so subsequent advance()/peek1() pull
+ * tokens from the replay buffer starting at index 0. Discards any
+ * lazily-buffered `next` since tokens within the recorded range are now
+ * served from the buffer. The post-`}` token will be fetched via pp_next
+ * once the replay finishes draining. */
+static void replay_rewind(Parser* p) {
+ if (p->replay_len == 0) perr(p, "internal: replay_rewind with empty buffer");
+ p->cur = p->replay[0];
+ p->replay_pos = 1;
+ p->replay_active = 1;
+ p->has_next = 0;
+}
+
+/* Count top-level items in a recorded brace list (positional or designator-
+ * led). The recording starts with `{` at index 0 and ends with the matching
+ * `}` at len-1. Top-level commas separate items; a trailing comma before
+ * the closing `}` does not introduce an extra item. Used to size incomplete
+ * arrays initialized with `{...}`. */
+static u32 count_recorded_top_level_items(const Tok* vec, u32 len) {
+ u32 count;
+ u32 i;
+ int depth = 0;
+ if (len < 2) return 0;
+ if (len == 2) return 0; /* `{}` */
+ count = 1;
+ for (i = 1; i < len - 1; ++i) {
+ const Tok* t = &vec[i];
+ if (is_punct(t, '{') || is_punct(t, '(') || is_punct(t, '[')) ++depth;
+ else if (is_punct(t, '}') || is_punct(t, ')') || is_punct(t, ']')) --depth;
+ else if (depth == 0 && is_punct(t, ',')) ++count;
+ }
+ /* If the last interior token is `,` it was a trailing separator; back off. */
+ if (is_punct(&vec[len - 2], ',')) --count;
+ return count;
+}
+
/* expect_kw is wired up but unused at this slice — `void` consumption
* goes through accept_kw already. Kept commented as a documentation hook
* for the next slice that needs it (e.g. `_Static_assert`).
@@ -1040,10 +1143,16 @@ static const Type* parse_pointer_layer(Parser* p, const Type* base) {
* function suffixes land in Phase 2. Used by sizeof / _Alignof / cast. */
static const Type* parse_type_name(Parser* p) {
DeclSpecs specs;
+ Sym dummy_name = 0;
+ SrcLoc dummy_loc = {0, 0, 0};
if (!parse_decl_specs(p, &specs)) {
perr(p, "expected type-name");
}
- return parse_pointer_layer(p, specs.type);
+ /* Type-name accepts a full abstract declarator (pointer prefix + array
+ * and/or function suffixes); compound literals like `(int[]){...}` and
+ * casts like `(int (*)[3])` rely on this. */
+ return parse_declarator_full(p, specs.type, /*allow_abstract=*/1,
+ &dummy_name, &dummy_loc);
}
/* ============================================================
@@ -1123,6 +1232,14 @@ static void parse_assign_expr(Parser* p);
static void parse_unary(Parser* p);
static void parse_postfix(Parser* p);
+/* Initializer entry points used by compound-literal lowering in parse_unary;
+ * the bodies live next to the rest of the initializer machinery further
+ * down. */
+typedef struct DeclSpecs DeclSpecs;
+static const Type* complete_incomplete_array(Parser* p, const Type* ty);
+static void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset,
+ const Type* ty);
+
/* Produce an rvalue on the stack. Three cases beyond the trivial scalar:
* - array lvalue: §6.3.2.1 array-to-pointer decay → take address, retag the
* resulting `T(*)[N]` as `T*` so subsequent ops see a pointer.
@@ -1583,6 +1700,36 @@ static void parse_unary(Parser* p) {
advance(p); /* '(' */
dst = parse_type_name(p);
expect_punct(p, ')', "')' after type-name");
+ /* Compound literal `(type-name) { init-list }` per §6.5.2.5. The
+ * literal has automatic storage in the enclosing block (function
+ * scope here — same lifetime as a local). Allocate a hidden frame
+ * slot, parse the brace initializer into it, and push the slot's
+ * lvalue. Outer postfix/cast machinery handles array-to-pointer
+ * decay if the consumer expects an rvalue. */
+ if (is_punct(&p->cur, '{')) {
+ FrameSlotDesc fsd;
+ FrameSlot slot;
+ const Type* lit_ty = dst;
+ if (lit_ty && lit_ty->kind == TY_ARRAY && lit_ty->arr.incomplete) {
+ lit_ty = complete_incomplete_array(p, lit_ty);
+ }
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = lit_ty;
+ fsd.size = abi_sizeof(p->abi, lit_ty);
+ fsd.align = abi_alignof(p->abi, lit_ty);
+ fsd.kind = FS_LOCAL;
+ fsd.flags = FSF_NONE;
+ slot = cg_local(p->cg, &fsd);
+ if (lit_ty && (lit_ty->kind == TY_ARRAY || lit_ty->kind == TY_STRUCT ||
+ lit_ty->kind == TY_UNION)) {
+ init_at(p, slot, lit_ty, 0, lit_ty);
+ } else {
+ /* Scalar compound literal `(int){42}`. */
+ init_at(p, slot, lit_ty, 0, lit_ty);
+ }
+ cg_push_local_typed(p->cg, slot, lit_ty);
+ return;
+ }
parse_unary(p); /* cast-expression */
to_rvalue(p);
/* `(void) expr` is the C idiom for "discard the value"; we must not
@@ -2513,6 +2660,24 @@ static const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out,
return parse_declarator_full(p, base, /*allow_abstract=*/0, name_out, loc_out);
}
+/* True if `ty` is char/signed char/unsigned char (the three element types
+ * permitted as the target of a string-literal initializer per §6.7.9 ¶14). */
+static int is_char_kind(const Type* ty) {
+ if (!ty) return 0;
+ return ty->kind == TY_CHAR || ty->kind == TY_SCHAR || ty->kind == TY_UCHAR;
+}
+
+/* Decode the string token at p->cur (must be TOK_STR) without advancing.
+ * Returns a heap-allocated byte buffer (caller frees) and writes the
+ * length (including the trailing NUL) to *nlen_out. Convenience wrapper
+ * around decode_string_literal, kept here so initializer code doesn't
+ * need to reach into the literal-parsing section. */
+static u8* peek_string_bytes(Parser* p, size_t* nlen_out) {
+ Tok t = p->cur;
+ if (t.kind != TOK_STR) perr(p, "internal: peek_string_bytes on non-string");
+ return decode_string_literal(p, &t, nlen_out);
+}
+
/* Push the lvalue of a sub-object at byte offset `offset` within the array
* local `slot` (whose type is `arr_ty`), with element type `elem_ty`. The
* value stack ends with an OPK_INDIRECT lvalue ready for cg_store. */
@@ -2569,18 +2734,142 @@ static void zero_init_at(Parser* p, FrameSlot slot, const Type* arr_ty,
/* Parse the initializer for the sub-object at `offset` of type `ty`.
*
- * Aggregates (`{...}`) follow §6.7.9 with two simplifications:
- * - No designated initializers (Phase 6).
- * - Brace elision is supported on entry: a sub-aggregate without its own
- * `{` consumes scalars from the parent's initializer stream until its
- * first scalar slot is filled. This matches the corpus rows that nest
- * anonymous structs inside outer braced inits.
+ * Aggregates (`{...}`) follow §6.7.9:
+ * - Designated initializers (`[i] = ...`, `.field = ...`, and chains
+ * such as `[i][j] = ...` or `.a.b = ...`) reset the cursor before
+ * each item; subsequent positional items continue from there. Gaps
+ * between the previous cursor and a forward designator are
+ * zero-filled.
+ * - Brace elision: a sub-aggregate without its own `{` consumes
+ * scalars from the parent's stream until its first scalar slot is
+ * filled.
+ * - String literals initialize char-arrays directly per §6.7.9 ¶14
+ * (with or without surrounding braces).
*
* Scalars take a single assignment-expression, optionally wrapped in
* `{ x }` per §6.7.9 ¶11. */
static void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset,
const Type* ty);
+/* Emit byte stores for a string literal initializing a char-array sub-
+ * object at `offset` whose declared element count is `count`. Bytes
+ * beyond the literal are zero-filled. Per §6.7.9 ¶14 it is well-formed
+ * to drop the terminating NUL when `count == strlen(s)`; longer arrays
+ * keep the NUL and zero-pad. */
+static void init_string_at(Parser* p, FrameSlot slot, const Type* arr_ty,
+ u32 offset, const Type* elem_ty, u32 count) {
+ size_t n = 0;
+ u8* bytes = peek_string_bytes(p, &n);
+ size_t copy = n;
+ size_t i;
+ if (copy > count) copy = count; /* §6.7.9 ¶14 truncation */
+ for (i = 0; i < copy; ++i) {
+ push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty);
+ cg_push_int(p->cg, (i64)bytes[i], elem_ty);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ }
+ for (; i < count; ++i) {
+ push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty);
+ cg_push_int(p->cg, 0, elem_ty);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ }
+ p->c->env->heap->free(p->c->env->heap, bytes, 0);
+ advance(p); /* consume TOK_STR */
+}
+
+/* Parse a designator chain (`[const]` and `.ident` repeats) starting at
+ * the current token and ending at `=`. The chain navigates from the outer
+ * type `outer_ty` (offset_in `outer_offset`) down to a sub-object;
+ * returns the sub-object's type via *sub_ty_out and absolute byte offset
+ * via *sub_offset_out. Also writes the index of the FIRST designator
+ * (which selects the cursor position in the immediately-enclosing brace
+ * list): for an array that's the [i] index, for a struct that's the
+ * field index of the named member. */
+static void parse_designator_chain(Parser* p, const Type* outer_ty,
+ u32 outer_offset, const Type** sub_ty_out,
+ u32* sub_offset_out, u32* top_index_out) {
+ const Type* cur_ty = outer_ty;
+ u32 cur_off = outer_offset;
+ int first = 1;
+ for (;;) {
+ if (is_punct(&p->cur, '[')) {
+ i64 idx;
+ u32 esz;
+ SrcLoc cloc = tok_loc(&p->cur);
+ advance(p);
+ idx = eval_const_int(p, cloc);
+ expect_punct(p, ']', "']' after designator index");
+ if (!cur_ty || cur_ty->kind != TY_ARRAY) {
+ perr(p, "array designator on non-array");
+ }
+ if (idx < 0 || (u32)idx >= cur_ty->arr.count) {
+ perr(p, "array designator index out of range");
+ }
+ esz = abi_sizeof(p->abi, cur_ty->arr.elem);
+ cur_off += (u32)idx * esz;
+ cur_ty = cur_ty->arr.elem;
+ if (first) *top_index_out = (u32)idx;
+ first = 0;
+ } else if (is_punct(&p->cur, '.')) {
+ Sym fname;
+ const Type* fty;
+ u32 foff;
+ const Field* ff;
+ u16 fi;
+ advance(p);
+ if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
+ perr(p, "expected field name after '.'");
+ }
+ fname = p->cur.v.ident;
+ advance(p);
+ if (!cur_ty ||
+ (cur_ty->kind != TY_STRUCT && cur_ty->kind != TY_UNION)) {
+ perr(p, "field designator on non-record type");
+ }
+ if (!find_field(p->abi, cur_ty, fname, &fty, &foff, &ff)) {
+ perr(p, "no such field in designator");
+ }
+ cur_off += foff;
+ if (first) {
+ /* Find the field index for cursor advance in the parent loop.
+ * find_field returns the offset/type but not the index, so do a
+ * second linear scan here. Anonymous-member transparency: an
+ * IDENT inside a nested anonymous member belongs to the outer
+ * record's NTH visible position; we use the outer slot for
+ * cursor advance, scanning the outer record. */
+ for (fi = 0; fi < cur_ty->rec.nfields; ++fi) {
+ const Field* g = &cur_ty->rec.fields[fi];
+ if (g->name == fname && fname != 0) {
+ *top_index_out = fi;
+ break;
+ }
+ if ((g->flags & FIELD_ANON) &&
+ (g->type->kind == TY_STRUCT || g->type->kind == TY_UNION)) {
+ const Type* tmp_ty;
+ u32 tmp_off;
+ const Field* tmp_f;
+ if (find_field(p->abi, g->type, fname, &tmp_ty, &tmp_off,
+ &tmp_f)) {
+ *top_index_out = fi;
+ break;
+ }
+ }
+ }
+ }
+ cur_ty = fty;
+ first = 0;
+ } else {
+ break;
+ }
+ }
+ if (first) perr(p, "internal: empty designator chain");
+ expect_punct(p, '=', "'=' after designator");
+ *sub_ty_out = cur_ty;
+ *sub_offset_out = cur_off;
+}
+
/* Parse a brace-elided sequence of scalars filling sub-objects of `ty`
* starting at `offset`. `count_out` is set to the number of scalars
* consumed; the function returns when the parent's initializer stream
@@ -2596,21 +2885,47 @@ static u32 init_struct_fields(Parser* p, FrameSlot slot, const Type* arr_ty,
* stream. With `braced=1`, we are inside this struct's own `{ ... }` and
* stop on `}`; with `braced=0`, we are eliding into the parent's stream
* and return as soon as the first scalar slot is filled (caller manages
- * outer field index). Returns the number of fields consumed. */
+ * outer field index). Returns the number of fields consumed.
+ *
+ * In braced mode, designated initializers (`.field = ...`) reset `i`;
+ * gaps between the previous cursor and the designator are zero-filled. */
const ABIRecordLayout* L = abi_record_layout(p->abi, ty);
u32 i = start_field;
+ u32 zero_lo = start_field; /* first not-yet-zero-filled field index */
for (; i < ty->rec.nfields; ++i) {
const Field* f = &ty->rec.fields[i];
u32 foff = offset + L->fields[i].offset;
if (f->flags & FIELD_BITFIELD) continue;
if (braced && (is_punct(&p->cur, '}') || p->cur.kind == TOK_EOF)) break;
+ if (braced && is_punct(&p->cur, '.')) {
+ const Type* sub_ty;
+ u32 sub_off;
+ u32 top_idx = 0;
+ parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx);
+ /* Zero-fill any fields the designator skipped over (or back-tracked
+ * past — duplicate inits are allowed but we just overwrite). */
+ while (zero_lo < top_idx) {
+ const Field* zf = &ty->rec.fields[zero_lo];
+ u32 zoff = offset + L->fields[zero_lo].offset;
+ if (!(zf->flags & FIELD_BITFIELD)) {
+ zero_init_at(p, slot, arr_ty, zoff, zf->type);
+ }
+ ++zero_lo;
+ }
+ init_at(p, slot, arr_ty, sub_off, sub_ty);
+ i = top_idx; /* loop ++ advances past it */
+ if (zero_lo <= top_idx) zero_lo = top_idx + 1;
+ goto next_item_struct;
+ }
init_at(p, slot, arr_ty, foff, f->type);
+ if (zero_lo <= i) zero_lo = i + 1;
if (!braced) {
/* Caller (parent's elision) only wanted us to consume one scalar's
* worth into our first non-bitfield slot. */
++i;
break;
}
+ next_item_struct:
if (!accept_punct(p, ',')) {
++i;
break;
@@ -2622,9 +2937,10 @@ static u32 init_struct_fields(Parser* p, FrameSlot slot, const Type* arr_ty,
}
/* Zero-fill any unconsumed fields in braced mode. */
if (braced) {
- for (; i < ty->rec.nfields; ++i) {
- const Field* f = &ty->rec.fields[i];
- u32 foff = offset + L->fields[i].offset;
+ u32 j;
+ for (j = zero_lo; j < ty->rec.nfields; ++j) {
+ const Field* f = &ty->rec.fields[j];
+ u32 foff = offset + L->fields[j].offset;
if (f->flags & FIELD_BITFIELD) continue;
zero_init_at(p, slot, arr_ty, foff, f->type);
}
@@ -2661,31 +2977,68 @@ static u32 init_elided(Parser* p, FrameSlot slot, const Type* arr_ty,
static void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset,
const Type* ty) {
if (ty->kind == TY_ARRAY) {
+ const Type* elem_ty = ty->arr.elem;
+ u32 esz = abi_sizeof(p->abi, elem_ty);
+ /* String literal initializing a char-array (with or without braces) per
+ * §6.7.9 ¶14. Wide character types are deferred (Phase 7). */
+ if (is_char_kind(elem_ty)) {
+ if (p->cur.kind == TOK_STR) {
+ init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count);
+ return;
+ }
+ if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) {
+ advance(p);
+ init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count);
+ accept_punct(p, ',');
+ expect_punct(p, '}', "'}' after string initializer");
+ return;
+ }
+ }
if (!is_punct(&p->cur, '{')) {
/* Brace elision: the array consumes scalars from the parent stream.
* A bare assignment-expression on entry only fills one scalar slot
* worth, then returns. */
- init_elided(p, slot, arr_ty, offset, ty->arr.elem);
+ init_elided(p, slot, arr_ty, offset, elem_ty);
return;
}
advance(p); /* '{' */
- const Type* elem_ty = ty->arr.elem;
- u32 esz = abi_sizeof(p->abi, elem_ty);
- u32 i = 0;
- if (!is_punct(&p->cur, '}')) {
- for (;;) {
- if (i >= ty->arr.count) {
- perr(p, "too many initializers for array");
+ {
+ u32 i = 0;
+ u32 zero_lo = 0; /* first index not yet zero-filled (after explicit init) */
+ if (!is_punct(&p->cur, '}')) {
+ for (;;) {
+ if (is_punct(&p->cur, '[')) {
+ const Type* sub_ty;
+ u32 sub_off;
+ u32 top_idx = 0;
+ parse_designator_chain(p, ty, offset, &sub_ty, &sub_off,
+ &top_idx);
+ while (zero_lo < top_idx) {
+ zero_init_at(p, slot, arr_ty, offset + zero_lo * esz, elem_ty);
+ ++zero_lo;
+ }
+ init_at(p, slot, arr_ty, sub_off, sub_ty);
+ i = top_idx + 1;
+ if (zero_lo < i) zero_lo = i;
+ } else {
+ if (i >= ty->arr.count) {
+ perr(p, "too many initializers for array");
+ }
+ init_at(p, slot, arr_ty, offset + i * esz, elem_ty);
+ ++i;
+ if (zero_lo < i) zero_lo = i;
+ }
+ if (!accept_punct(p, ',')) break;
+ if (is_punct(&p->cur, '}')) break;
+ }
+ }
+ expect_punct(p, '}', "'}' after array initializer");
+ {
+ u32 j;
+ for (j = zero_lo; j < ty->arr.count; ++j) {
+ zero_init_at(p, slot, arr_ty, offset + j * esz, elem_ty);
}
- init_at(p, slot, arr_ty, offset + i * esz, elem_ty);
- ++i;
- if (!accept_punct(p, ',')) break;
- if (is_punct(&p->cur, '}')) break;
}
- }
- expect_punct(p, '}', "'}' after array initializer");
- for (; i < ty->arr.count; ++i) {
- zero_init_at(p, slot, arr_ty, offset + i * esz, elem_ty);
}
return;
}
@@ -2757,23 +3110,63 @@ static void encode_int_le(u8* dst, u32 size, i64 v) {
}
}
+/* Encode a string literal at *buf+offset for a char-array sub-object of
+ * declared element count `count`. Bytes beyond the literal stay zero
+ * (buf is pre-zeroed by define_static_object). Truncation rules match
+ * §6.7.9 ¶14. */
+static void parse_static_string_at(Parser* p, u8* buf, u32 buflen, u32 offset,
+ u32 count) {
+ size_t n = 0;
+ u8* bytes = peek_string_bytes(p, &n);
+ size_t copy = n;
+ if (copy > count) copy = count;
+ if (offset + (u32)copy > buflen) perr(p, "string initializer overflows object");
+ memcpy(buf + offset, bytes, copy);
+ p->c->env->heap->free(p->c->env->heap, bytes, 0);
+ advance(p);
+}
+
static void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset,
const Type* ty) {
if (ty->kind == TY_ARRAY) {
const Type* elem = ty->arr.elem;
u32 esz = abi_sizeof(p->abi, elem);
u32 i = 0;
- int had_brace = accept_punct(p, '{');
+ int had_brace;
+ /* String literal initializer for char-arrays (with or without braces). */
+ if (is_char_kind(elem)) {
+ if (p->cur.kind == TOK_STR) {
+ parse_static_string_at(p, buf, buflen, offset, ty->arr.count);
+ return;
+ }
+ if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) {
+ advance(p);
+ parse_static_string_at(p, buf, buflen, offset, ty->arr.count);
+ accept_punct(p, ',');
+ expect_punct(p, '}', "'}' after string initializer");
+ return;
+ }
+ }
+ had_brace = accept_punct(p, '{');
if (!had_brace) {
perr(p, "expected '{' for static-storage array initializer");
}
if (!is_punct(&p->cur, '}')) {
for (;;) {
- if (i >= ty->arr.count) {
- perr(p, "too many initializers for array");
+ if (is_punct(&p->cur, '[')) {
+ const Type* sub_ty;
+ u32 sub_off;
+ u32 top_idx = 0;
+ parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx);
+ parse_static_init_at(p, buf, buflen, sub_off, sub_ty);
+ i = top_idx + 1;
+ } else {
+ if (i >= ty->arr.count) {
+ perr(p, "too many initializers for array");
+ }
+ parse_static_init_at(p, buf, buflen, offset + i * esz, elem);
+ ++i;
}
- parse_static_init_at(p, buf, buflen, offset + i * esz, elem);
- ++i;
if (!accept_punct(p, ',')) break;
if (is_punct(&p->cur, '}')) break;
}
@@ -2790,6 +3183,16 @@ static void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset,
}
while (i < ty->rec.nfields && !is_punct(&p->cur, '}')) {
const Field* f = &ty->rec.fields[i];
+ if (is_punct(&p->cur, '.')) {
+ const Type* sub_ty;
+ u32 sub_off;
+ u32 top_idx = 0;
+ parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx);
+ parse_static_init_at(p, buf, buflen, sub_off, sub_ty);
+ i = top_idx + 1;
+ if (!accept_punct(p, ',')) break;
+ continue;
+ }
if (f->flags & FIELD_BITFIELD) { ++i; continue; }
parse_static_init_at(p, buf, buflen, offset + L->fields[i].offset,
f->type);
@@ -2920,6 +3323,48 @@ static Sym mint_static_local_sym(Parser* p, Sym orig) {
return pool_intern(p->pool, buf, wlen);
}
+/* If `ty` is an incomplete array (`T[]`), peek the initializer at p->cur
+ * and complete the type by counting the items it provides. Three cases:
+ * - `T` is a char-kind and the initializer is a string literal: count =
+ * decoded length (including NUL).
+ * - `{...}` initializer: record the braced range and count top-level
+ * items; positional only, no designators (sufficient for the corpus).
+ * After completion the parser is rewound to the recorded `{`.
+ * - Otherwise: panic (incomplete array with non-list init).
+ * Returns the completed array type. The caller should use this as the
+ * declared variable type going forward. */
+static const Type* complete_incomplete_array(Parser* p, const Type* ty) {
+ const Type* elem;
+ if (!ty || ty->kind != TY_ARRAY || !ty->arr.incomplete) return ty;
+ elem = ty->arr.elem;
+ if (is_char_kind(elem) && p->cur.kind == TOK_STR) {
+ Tok t = p->cur;
+ size_t n = 0;
+ u8* bytes = decode_string_literal(p, &t, &n);
+ p->c->env->heap->free(p->c->env->heap, bytes, 0);
+ return type_array(p->pool, elem, (u32)n, /*incomplete=*/0);
+ }
+ if (is_punct(&p->cur, '{')) {
+ u32 cnt;
+ record_braced_block(p);
+ cnt = count_recorded_top_level_items(p->replay, p->replay_len);
+ /* String literal as the sole brace contents is also valid: `char s[] =
+ * {"hi"}`. Detect by replay[1] being TOK_STR; recompute count from the
+ * decoded length. */
+ if (cnt == 1 && p->replay_len >= 3 && p->replay[1].kind == TOK_STR &&
+ is_char_kind(elem)) {
+ Tok t = p->replay[1];
+ size_t n = 0;
+ u8* bytes = decode_string_literal(p, &t, &n);
+ p->c->env->heap->free(p->c->env->heap, bytes, 0);
+ cnt = (u32)n;
+ }
+ replay_rewind(p);
+ return type_array(p->pool, elem, cnt, /*incomplete=*/0);
+ }
+ perr(p, "initializer cannot complete incomplete array type");
+}
+
/* Parse a single init-declarator after the decl-specs have been consumed.
* Grammar: declarator = (`*` qual*)* (IDENT | `(` declarator `)`) suffix*
* init = `=` (assign_expr | brace_init) */
@@ -3012,7 +3457,20 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
}
/* Non-VLA local. */
{
- FrameSlot s = make_local(p, name, var_ty, loc);
+ int has_init = is_punct(&p->cur, '=');
+ FrameSlot s;
+ if (has_init && var_ty && var_ty->kind == TY_ARRAY && var_ty->arr.incomplete) {
+ /* `T name[] = ...`: peek the initializer to deduce the count, then
+ * allocate the slot with the now-complete type. The slot allocation
+ * has to wait until after sizing, so move it inside this branch. */
+ advance(p); /* '=' */
+ var_ty = complete_incomplete_array(p, var_ty);
+ s = make_local(p, name, var_ty, loc);
+ cg_set_loc(p->cg, loc);
+ init_at(p, s, var_ty, 0, var_ty);
+ return;
+ }
+ s = make_local(p, name, var_ty, loc);
if (accept_punct(p, '=')) {
cg_set_loc(p->cg, loc);
if (var_ty->kind == TY_ARRAY || var_ty->kind == TY_STRUCT ||
diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md
@@ -139,7 +139,7 @@ here for completeness once they're real cases.
| `6_5_26_pre_dec` | ★ | `int x = 43; return --x;` | 42 |
| `6_5_27_post_dec` | ★ | `int x = 43; x--; return x;` | 42 |
| `6_5_28_arrow` | ★ | `struct S{int v;} s={42}; struct S *p=&s; return p->v;` | 42 |
-| `6_5_29_compound_literal` | · | `int *p = (int[]){10, 32}; return p[0]+p[1];` | 42 |
+| `6_5_29_compound_literal` | ★ | `int *p = (int[]){10, 32}; return p[0]+p[1];` | 42 |
| `6_5_30_generic_selection`| ★ | `int x=42; return _Generic((x), int: x, default: 0);` | 42 |
| `6_5_31_subscript_commute`| ★ | `int a[5]={0,0,42,0,0}; return 2[a];` | 42 |
| `6_5_32_string_subscript` | ★ | `return "*"[0];` | 42 |
@@ -263,15 +263,15 @@ cover compound typedef targets.
| Case | Status | Body | Expected |
|---|---|---|---|
| `6_7_9_01_scalar_init` | ★ | `int x = 42; return x;` | 42 |
-| `6_7_9_02_array_brace` | · | `int a[3] = {10, 20, 12}; return a[0]+a[1]+a[2];` | 42 |
-| `6_7_9_03_partial_zero` | · | `int a[5] = {42}; return a[0] + a[4];` | 42 |
-| `6_7_9_04_designated` | · | `int a[5] = {[2] = 42}; return a[2];` | 42 |
-| `6_7_9_05_struct_init` | · | `struct S {int a,b;} s={40,2}; return s.a+s.b;` | 42 |
-| `6_7_9_06_string_init` | · | `char s[] = "hi"; return s[0]+s[1]+s[2];` | 'h'+'i' |
-| `6_7_9_07_designated_struct` | · | `struct S{int a,b,c;} s={.b=42}; return s.b;` | 42 |
-| `6_7_9_08_nested_designated` | · | `int a[2][3] = {[1][2] = 42}; return a[1][2];` | 42 |
-| `6_7_9_09_struct_in_array` | · | `struct P{int x,y;} a[2] = {{0,0},{0,42}}; return a[1].y;` | 42 |
-| `6_7_9_10_zero_init_static` | · | full TU: `static int g[3]; int test_main(void){return g[0]+g[1]+g[2]+42;}` | 42 |
+| `6_7_9_02_array_brace` | ★ | `int a[3] = {10, 20, 12}; return a[0]+a[1]+a[2];` | 42 |
+| `6_7_9_03_partial_zero` | ★ | `int a[5] = {42}; return a[0] + a[4];` | 42 |
+| `6_7_9_04_designated` | ★ | `int a[5] = {[2] = 42}; return a[2];` | 42 |
+| `6_7_9_05_struct_init` | ★ | `struct S {int a,b;} s={40,2}; return s.a+s.b;` | 42 |
+| `6_7_9_06_string_init` | ★ | `char s[] = "hi"; return s[0]+s[1]+s[2];` | 'h'+'i' |
+| `6_7_9_07_designated_struct` | ★ | `struct S{int a,b,c;} s={.b=42}; return s.b;` | 42 |
+| `6_7_9_08_nested_designated` | ★ | `int a[2][3] = {[1][2] = 42}; return a[1][2];` | 42 |
+| `6_7_9_09_struct_in_array` | ★ | `struct P{int x,y;} a[2] = {{0,0},{0,42}}; return a[1].y;` | 42 |
+| `6_7_9_10_zero_init_static` | ★ | full TU: `static int g[3]; int test_main(void){return g[0]+g[1]+g[2]+42;}` | 42 |
## §6.7.10 Static assertions