commit 84bd8059c53fd1cf3c3612683e0d400d36aa9b42
parent d7882f0ce3836f3d71de3ba9bca652a7fc28a161
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 10 May 2026 06:03:03 -0700
parse: Phase 2 — pointer/array decls, decay, subscript, brace init, VLA
Recursive declarator with array/function suffixes and one level of
nested parens (`int (*fp)(int)`); array→pointer and function→pointer
decay in to_rvalue; subscript a[i] and the commutative i[a] in postfix;
pointer arithmetic in +/-; brace initializer for local arrays with
nested zero-fill; `[static N]` parameter form; VLA local lowered via
cg_alloca (cg.c stub wired through to the existing aarch64 backend).
Flips 10 corpus rows · → ★ (6_3_2_1_01–02, 6_5_22, 6_5_31–32,
6_7_6_03–07). 6_7_6_02_array_2d still fails — nested brace init runs
the aarch64 scratch pool dry; needs the spill path. 6_7_6_08 is
Phase 9 (variadics).
Diffstat:
4 files changed, 599 insertions(+), 80 deletions(-)
diff --git a/doc/parser-status.md b/doc/parser-status.md
@@ -66,22 +66,37 @@ Unlocks (status as landed): `6_5_12–14` ★, `6_5_20–21` ★, `6_5_22` ·
---
-## Phase 2 — Pointers & arrays ⬜
+## Phase 2 — Pointers & arrays ✅
Pointer/array declarator layers and the address operators. Builds on
Phase 1's type-name production.
-- [ ] Pointer declarator (`int *p`, `int **pp`)
-- [ ] Array declarator (`int a[N]`, `int a[]`)
-- [ ] Subscript `a[i]` (and the commutative `i[a]`)
-- [ ] Pointer arithmetic in `+`/`-` (scaled by element size)
-- [ ] Array-to-pointer decay
-- [ ] Function-to-pointer decay + indirect call (`(*fp)(x)`)
-- [ ] `int *const p` / `const int *p` qualifier placement
-- [ ] `[static N]` parameter form
-- [ ] VLA local (`int a[n]`)
-
-Unlocks: `6_3_2_1_*`, `6_5_28–32`, `6_7_3_03–04`, `6_7_6_01–07`.
+- [x] Pointer declarator (`int *p`, `int **pp`)
+- [x] Array declarator (`int a[N]`, `int a[]`)
+- [x] Subscript `a[i]` (and the commutative `i[a]`)
+- [x] Pointer arithmetic in `+`/`-` (scaled by element size)
+- [x] Array-to-pointer decay
+- [x] Function-to-pointer decay + indirect call (`(*fp)(x)`)
+- [x] `int *const p` / `const int *p` qualifier placement
+- [x] `[static N]` parameter form
+- [x] VLA local (`int a[n]`) — lowered via `cg_alloca`; the variable binds
+ as a pointer-to-element rather than as a true array, so subscript
+ and pointer arith just work but `sizeof(vla)` is not yet preserved
+ (Phase 9 follow-up alongside `__builtin_alloca`).
+
+Phase 2 also taught `parse_decl_suffix` to parse function and array
+declarator suffixes recursively, taught `to_rvalue` to do array→pointer
+and function→pointer decay automatically, and added a real recursive
+declarator that handles one level of nested parens (`int (*fp)(int)`).
+The `cg_alloca` stub in `cg.c` was wired through to `CGTarget.alloca_`
+since the aarch64 backend already implements it.
+
+Unlocks (status as landed): `6_3_2_1_01–02` ★, `6_5_22` ★, `6_5_31–32` ★,
+`6_7_6_03–07` ★, `6_7_6_02` · (regalloc pressure under nested brace init —
+needs the spill path that's also blocking other future cases). `6_5_28–30`
+were misattributed in the original phase plan: `6_5_28_arrow` and
+`6_5_30_generic_selection` need struct/`_Generic` (Phase 3),
+`6_5_29_compound_literal` needs Phase 6's brace-init machinery.
---
diff --git a/src/cg/cg.c b/src/cg/cg.c
@@ -378,6 +378,15 @@ const Type* cg_top_type(CG* g) {
return g->stack[g->sp - 1].type;
}
+/* Type of the second-from-top SValue. Used by the parser when both operands
+ * of a binary operator are already on the stack and it needs to pick a
+ * pointer-arithmetic vs. integer-arithmetic lowering. */
+const Type* cg_top2_type(CG* g);
+const Type* cg_top2_type(CG* g) {
+ if (g->sp < 2) return NULL;
+ return g->stack[g->sp - 2].type;
+}
+
/* Replace the type tag on the top SValue without emitting code. Used by
* the parser for casts that are no-ops at the value level (e.g. pointer-
* to-pointer of the same width); the underlying register/operand stays
@@ -854,7 +863,28 @@ void cg_ret(CG* g, int has_value) {
* ============================================================ */
void cg_alloca(CG* g) {
- compiler_panic(g->c, g->cur_loc, "cg_alloca: not in v1 slice");
+ /* Pop the size (i64 imm or reg), call CGTarget.alloca_, push the resulting
+ * void* aligned to max_align_t. The 16-byte alignment is the AAPCS64
+ * max_align_t; cg trusts the backend to honor it (aa_alloca_ rounds the
+ * size up to a 16-byte multiple, which is what keeps SP aligned). */
+ CGTarget* T = g->target;
+ SValue sz = pop(g);
+ Operand sz_op;
+ const Type* void_ptr = type_ptr(g->pool, type_void(g->pool));
+ Reg dst_r;
+ Operand dst;
+ SValue out;
+ if (sz.op.kind == OPK_IMM) {
+ sz_op = sz.op;
+ } else {
+ sz_op = force_reg(g, sz, sz.type ? sz.type : sz.op.type);
+ }
+ dst_r = T->alloc_reg(T, RC_INT, void_ptr);
+ dst = op_reg(dst_r, void_ptr);
+ T->alloca_(T, dst, sz_op, /*align=*/16);
+ out.op = dst;
+ out.type = void_ptr;
+ push(g, out);
}
void cg_va_start_(CG* g) {
compiler_panic(g->c, g->cur_loc, "cg_va_start: not in v1 slice");
diff --git a/src/parse/parse.c b/src/parse/parse.c
@@ -44,6 +44,9 @@ extern void cg_push_local_typed(CG*, FrameSlot, const Type*);
extern void cg_deref(CG*, const Type* pointee);
/* Read SValue.type at top of stack without popping. */
extern const Type* cg_top_type(CG*);
+/* Read SValue.type at second-from-top; used for pointer-arith dispatch when
+ * both operands are already on the stack. */
+extern const Type* cg_top2_type(CG*);
/* Replace the type tag on the top SValue without emitting code (used for
* pointer-to-pointer casts which are no-ops at the value level). */
extern void cg_retag_top(CG*, const Type*);
@@ -182,6 +185,14 @@ typedef struct Parser {
/* Loop/switch context for break/continue. CGLabel 0 means none. */
CGLabel cur_break;
CGLabel cur_continue;
+
+ /* VLA bookkeeping. parse_decl_suffix emits the size-expression code at
+ * suffix-parse time (because the tokens are about to vanish) and stashes
+ * the i64 count in `vla_pending_count_slot`; parse_init_declarator picks
+ * it up to drive cg_alloca. v1 supports only one VLA dimension per
+ * declarator; nested cases panic in apply_decl_suffix. */
+ u8 vla_pending;
+ FrameSlot vla_pending_count_slot;
} Parser;
/* ============================================================
@@ -487,19 +498,21 @@ static int starts_type_name(const Parser* p, const Tok* t) {
}
/* Walk a `*` chain at the front of a declarator (and optional qualifiers
- * after each `*`), wrapping `base` in successive pointer types. Returns
- * the innermost type the IDENT/declarator-tail refers to. */
+ * after each `*`), wrapping `base` in successive pointer types. Qualifiers
+ * after a `*` qualify the pointer just produced (`int *const p` → p is a
+ * const-qualified pointer to int). */
static const Type* parse_pointer_layer(Parser* p, const Type* base) {
while (accept_punct(p, '*')) {
+ u16 q = 0;
base = type_ptr(p->pool, base);
- /* Optional qualifiers after `*`; recognized and ignored at this slice. */
for (;;) {
- if (accept_kw(p, KW_CONST) || accept_kw(p, KW_VOLATILE) ||
- accept_kw(p, KW_RESTRICT) || accept_kw(p, KW_ATOMIC)) {
- continue;
- }
+ if (accept_kw(p, KW_CONST)) { q |= Q_CONST; continue; }
+ if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; }
+ if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; }
+ if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; }
break;
}
+ if (q) base = type_qualified(p->pool, base, q);
}
return base;
}
@@ -592,11 +605,27 @@ static void parse_assign_expr(Parser* p);
static void parse_unary(Parser* p);
static void parse_postfix(Parser* p);
-/* Produce an rvalue on the stack: pop, and if it's an lvalue, load. */
+/* Produce an rvalue on the stack. Three cases beyond the trivial scalar:
+ * - array lvalue: §6.3.2.1 array-to-pointer decay → take address, retag the
+ * resulting `T(*)[N]` as `T*` so subsequent ops see a pointer.
+ * - function lvalue: §6.3.2.1 function-to-pointer decay → take address; the
+ * type becomes `T(*)()` automatically because cg_addr wraps the operand
+ * type in TY_PTR.
+ * - scalar lvalue (LOCAL/GLOBAL/INDIRECT): cg_load. Idempotent on rvalues. */
static void to_rvalue(Parser* p) {
+ const Type* t = cg_top_type(p->cg);
+ if (t) {
+ if (t->kind == TY_ARRAY) {
+ cg_addr(p->cg);
+ cg_retag_top(p->cg, type_ptr(p->pool, t->arr.elem));
+ return;
+ }
+ if (t->kind == TY_FUNC) {
+ cg_addr(p->cg);
+ return;
+ }
+ }
cg_load(p->cg);
- /* cg_load is idempotent on rvalues. */
- (void)p;
}
/* Decode one character (the first encoded code unit) from the token's
@@ -850,11 +879,21 @@ static void parse_postfix(Parser* p) {
continue;
}
if (is_punct(&t, '(')) {
- /* Function call. The callee was pushed by parse_primary as an
- * lvalue (OPK_GLOBAL for SEK_FUNC); cg_call accepts that directly
- * for direct calls. */
- const Type* fn_type = cg_top_type(p->cg);
- if (!fn_type || fn_type->kind != TY_FUNC) {
+ /* Function call. The callee was pushed by parse_primary as a function
+ * lvalue (OPK_GLOBAL when SEK_FUNC); a function-pointer callee is also
+ * accepted (TY_PTR-to-TY_FUNC) — load it to a register and indirect. */
+ const Type* top = cg_top_type(p->cg);
+ const Type* fn_type;
+ if (top && top->kind == TY_FUNC) {
+ fn_type = top;
+ } else if (top && top->kind == TY_PTR && top->ptr.pointee &&
+ top->ptr.pointee->kind == TY_FUNC) {
+ fn_type = top->ptr.pointee;
+ /* Materialize the pointer rvalue (cg_call's force_reg fallback would
+ * also do this, but doing it here keeps the invariant that the value
+ * stack settles to a register before argument evaluation starts). */
+ cg_load(p->cg);
+ } else {
perr(p, "called object is not a function");
}
advance(p); /* '(' */
@@ -886,8 +925,58 @@ static void parse_postfix(Parser* p) {
}
continue;
}
- if (is_punct(&t, '[') || is_punct(&t, '.') || is_punct(&t, P_ARROW)) {
- perr(p, "subscript/member access not supported in v1 slice");
+ if (is_punct(&t, '[')) {
+ /* Subscript `e1[e2]` is `*((e1) + (e2))` per §6.5.2.1. We resolve the
+ * pointer side after parsing the index so the commutative `i[a]` form
+ * (where the bracketed side is the pointer/array) falls out naturally. */
+ const Type* lt0 = cg_top_type(p->cg);
+ advance(p); /* '[' */
+ /* If the left operand is an array/pointer, decay/load to get a pointer
+ * rvalue. Integer base is left alone — we'll commute below if needed. */
+ if (lt0 && lt0->kind == TY_ARRAY) {
+ cg_addr(p->cg);
+ cg_retag_top(p->cg, type_ptr(p->pool, lt0->arr.elem));
+ } else if (lt0 && lt0->kind == TY_PTR) {
+ cg_load(p->cg);
+ }
+ parse_expr(p);
+ /* Decay/load the index side similarly. */
+ {
+ const Type* it0 = cg_top_type(p->cg);
+ if (it0 && it0->kind == TY_ARRAY) {
+ cg_addr(p->cg);
+ cg_retag_top(p->cg, type_ptr(p->pool, it0->arr.elem));
+ } else {
+ to_rvalue(p);
+ }
+ }
+ expect_punct(p, ']', "']' after subscript");
+ {
+ const Type* lt = cg_top2_type(p->cg);
+ const Type* it = cg_top_type(p->cg);
+ const Type* elem;
+ if (lt && lt->kind == TY_PTR && type_is_int(it)) {
+ elem = lt->ptr.pointee;
+ } else if (it && it->kind == TY_PTR && type_is_int(lt)) {
+ /* Commute so the pointer is on the bottom for the add below. */
+ cg_swap(p->cg);
+ elem = it->ptr.pointee;
+ } else {
+ perr(p, "invalid subscript: needs one pointer and one integer");
+ }
+ if (!elem) perr(p, "subscript on incomplete pointee");
+ u32 esz = abi_sizeof(p->abi, elem);
+ if (esz != 1) {
+ cg_push_int(p->cg, (i64)esz, ty_size_t(p));
+ cg_binop(p->cg, BO_IMUL);
+ }
+ cg_binop(p->cg, BO_IADD);
+ cg_deref(p->cg, elem);
+ }
+ continue;
+ }
+ if (is_punct(&t, '.') || is_punct(&t, P_ARROW)) {
+ perr(p, "member access not supported in v1 slice");
}
break;
}
@@ -1078,6 +1167,61 @@ static void parse_mul(Parser* p) {
}
}
+/* Apply C pointer arithmetic to the top two values on the stack:
+ * ptr + int → ptr + int * sizeof(*ptr)
+ * int + ptr → ptr + int * sizeof(*ptr) (commute, then scale)
+ * ptr - int → ptr - int * sizeof(*ptr)
+ * ptr - ptr → (ptr - ptr) / sizeof(*ptr) (ptrdiff_t result)
+ * int +/- int → integer add/sub
+ * Pops both operands and pushes the result. */
+static void emit_add_or_sub(Parser* p, BinOp bop) {
+ const Type* lt = cg_top2_type(p->cg);
+ const Type* rt = cg_top_type(p->cg);
+ int l_is_ptr = lt && lt->kind == TY_PTR;
+ int r_is_ptr = rt && rt->kind == TY_PTR;
+ if (bop == BO_IADD) {
+ if (l_is_ptr && type_is_int(rt)) {
+ u32 esz = abi_sizeof(p->abi, lt->ptr.pointee);
+ if (esz != 1) {
+ cg_push_int(p->cg, (i64)esz, ty_size_t(p));
+ cg_binop(p->cg, BO_IMUL);
+ }
+ cg_binop(p->cg, BO_IADD);
+ return;
+ }
+ if (r_is_ptr && type_is_int(lt)) {
+ cg_swap(p->cg);
+ u32 esz = abi_sizeof(p->abi, rt->ptr.pointee);
+ if (esz != 1) {
+ cg_push_int(p->cg, (i64)esz, ty_size_t(p));
+ cg_binop(p->cg, BO_IMUL);
+ }
+ cg_binop(p->cg, BO_IADD);
+ return;
+ }
+ } else { /* BO_ISUB */
+ if (l_is_ptr && type_is_int(rt)) {
+ u32 esz = abi_sizeof(p->abi, lt->ptr.pointee);
+ if (esz != 1) {
+ cg_push_int(p->cg, (i64)esz, ty_size_t(p));
+ cg_binop(p->cg, BO_IMUL);
+ }
+ cg_binop(p->cg, BO_ISUB);
+ return;
+ }
+ if (l_is_ptr && r_is_ptr) {
+ u32 esz = abi_sizeof(p->abi, lt->ptr.pointee);
+ cg_binop(p->cg, BO_ISUB);
+ if (esz != 1) {
+ cg_push_int(p->cg, (i64)esz, ty_size_t(p));
+ cg_binop(p->cg, BO_SDIV);
+ }
+ return;
+ }
+ }
+ cg_binop(p->cg, bop);
+}
+
static void parse_add(Parser* p) {
parse_mul(p);
for (;;) {
@@ -1094,7 +1238,7 @@ static void parse_add(Parser* p) {
to_rvalue(p);
parse_mul(p);
to_rvalue(p);
- cg_binop(p->cg, bop);
+ emit_add_or_sub(p, bop);
}
}
@@ -1406,41 +1550,379 @@ static FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc) {
return s;
}
-/* Parse a non-abstract declarator: optional `*` pointer prefix followed
- * by an IDENT. v1 doesn't yet implement function or array declarators,
- * which slot in around the IDENT in subsequent phases. Returns the
- * declared type (with pointer layers wrapping `base`) and writes the
- * IDENT to *name_out / *loc_out. */
-static const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out,
- SrcLoc* loc_out) {
+/* Forward decls for declarator components. */
+typedef enum DSuffKind { DS_ARRAY, DS_FUNC } DSuffKind;
+typedef struct ParamInfo ParamInfo;
+typedef struct DeclSuffix {
+ u8 kind; /* DSuffKind */
+ /* DS_ARRAY */
+ u32 count; /* element count; meaningful when !vla and !incomplete */
+ u8 incomplete; /* true for `[]` (no size given) */
+ u8 vla; /* true for `[expr]` with a non-constant size */
+ /* When `vla` is set, the size expression has already been emitted and the
+ * resulting i64 (in bytes-of-elem-count) is held in this scratch slot.
+ * Materialized at suffix-parse time because the size expression's tokens
+ * are consumed there; init_declarator reads it back to drive cg_alloca. */
+ FrameSlot vla_count_slot;
+ /* DS_FUNC */
+ ParamInfo* params;
+ u16 nparams;
+ u8 variadic;
+} DeclSuffix;
+
+typedef struct ParamInfo {
+ Sym name;
+ const Type* type;
+ SrcLoc loc;
+} ParamInfo;
+
+static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out,
+ u8* variadic_out);
+
+/* Parse a single trailing suffix (`[...]` or `(...)`) after a declarator's
+ * IDENT or parenthesized inner-declarator. Returns 1 if a suffix was consumed
+ * and filled into *out, 0 otherwise. */
+static int parse_decl_suffix(Parser* p, DeclSuffix* out) {
+ if (accept_punct(p, '[')) {
+ /* `[ qualifier* static? assignment-expression? ]` or `[ static qualifier* assign-expr ]`.
+ * Only constant integer expressions are accepted at this slice; non-constant
+ * sizes are VLA territory (Phase 9). */
+ out->kind = DS_ARRAY;
+ out->count = 0;
+ out->incomplete = 0;
+ /* Optional `static`/qualifiers before the size; recognized, no-op here.
+ * `[static N]` only changes parameter ABI hints (caller promises ≥N). */
+ for (;;) {
+ if (accept_kw(p, KW_STATIC) || accept_kw(p, KW_CONST) ||
+ accept_kw(p, KW_VOLATILE) || accept_kw(p, KW_RESTRICT) ||
+ accept_kw(p, KW_ATOMIC)) {
+ continue;
+ }
+ break;
+ }
+ if (accept_punct(p, ']')) {
+ out->incomplete = 1;
+ return 1;
+ }
+ /* Constant integer size? A bare TOK_NUM is the entire spine corpus's
+ * idiom; a non-constant size kicks the suffix into VLA mode (Phase 2,
+ * §6.7.6.2 ¶4). Full constant-expression evaluation is a future cross-
+ * cutting concern; for now anything but TOK_NUM goes through alloca. */
+ {
+ Tok t = p->cur;
+ if (t.kind == TOK_NUM) {
+ i64 v = parse_int_literal(p, &t);
+ if (v < 0) perr(p, "negative array size");
+ out->count = (u32)v;
+ advance(p);
+ } else {
+ /* VLA: emit the size-expression code now (the tokens go away after
+ * we return), spill its int value to a fresh i64 frame slot so
+ * init_declarator can pick it back up at the right time. */
+ FrameSlotDesc fsd;
+ if (p->vla_pending) {
+ perr(p, "v1 supports only one VLA dimension per declarator");
+ }
+ out->vla = 1;
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = ty_size_t(p);
+ fsd.size = abi_sizeof(p->abi, fsd.type);
+ fsd.align = abi_alignof(p->abi, fsd.type);
+ fsd.kind = FS_LOCAL;
+ out->vla_count_slot = cg_local(p->cg, &fsd);
+ parse_assign_expr(p);
+ to_rvalue(p);
+ cg_push_local_typed(p->cg, out->vla_count_slot, fsd.type);
+ cg_swap(p->cg);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ cg_reset_scratch(p->cg);
+ p->vla_pending = 1;
+ p->vla_pending_count_slot = out->vla_count_slot;
+ }
+ }
+ expect_punct(p, ']', "']' after array size");
+ return 1;
+ }
+ if (accept_punct(p, '(')) {
+ out->kind = DS_FUNC;
+ out->params = NULL;
+ out->nparams = 0;
+ out->variadic = 0;
+ parse_param_list(p, &out->params, &out->nparams, &out->variadic);
+ expect_punct(p, ')', "')' after parameter list");
+ return 1;
+ }
+ return 0;
+}
+
+/* Wrap `base` with a single suffix's transform. Used when materializing the
+ * declarator type from the collected suffix list. */
+static const Type* apply_decl_suffix(Parser* p, const Type* base,
+ const DeclSuffix* s) {
+ if (s->kind == DS_ARRAY) {
+ /* VLA: count is runtime; record an incomplete array type so the type
+ * system carries the elem-type but the size is treated as unknown.
+ * init_declarator notices the parser-side `vla_pending` flag and emits
+ * the alloca + bind. */
+ return type_array(p->pool, base, s->count, s->incomplete || s->vla);
+ }
+ /* DS_FUNC */
+ {
+ const Type** ptypes = NULL;
+ if (s->nparams) {
+ ptypes = (const Type**)arena_array(p->c->tu, const Type*, s->nparams);
+ for (u16 i = 0; i < s->nparams; ++i) ptypes[i] = s->params[i].type;
+ }
+ return type_func(p->pool, base, ptypes, s->nparams, (int)s->variadic);
+ }
+}
+
+/* Parse a (possibly abstract) declarator. Supports:
+ * pointer-prefix? ( IDENT | '(' inner-declarator ')' ) suffix*
+ * where suffix is `[N]` or `(params)`. The inner declarator handles one level
+ * of nesting (e.g. `int (*fp)(int)`). Multiple nested parens would recurse
+ * naturally — for Phase 2 a single level covers all corpus cases.
+ *
+ * If `allow_abstract` is true, the IDENT may be absent (used by parameters).
+ * On success returns the declared type and writes *name_out (=0 if abstract). */
+static const Type* parse_declarator_full(Parser* p, const Type* base,
+ int allow_abstract, Sym* name_out,
+ SrcLoc* loc_out) {
+ /* Outer pointer prefix wraps `base` as we go. */
base = parse_pointer_layer(p, base);
- if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
- perr(p, "expected declarator name");
+
+ /* Inner declarator: collect inner pointer prefix (innermost-first array)
+ * to wrap LATER (after we know the suffix-applied base). */
+ Sym name = 0;
+ SrcLoc nloc = {0, 0, 0};
+ u8 nptrs_inner = 0;
+ u16 inner_quals[8];
+ int has_inner_parens = 0;
+
+ if (is_punct(&p->cur, '(')) {
+ /* Disambiguate `(declarator)` vs. function suffix `(params)`. The token
+ * after `(` decides:
+ * `*` → inner-declarator pointer prefix
+ * IDENT (non-kw) → inner-declarator IDENT
+ * IDENT (type kw) → function suffix (parameters)
+ * `)` → function suffix `()` (unspecified args)
+ * Phase 2 doesn't have typedef-names; once they land, the IDENT branch
+ * also needs to dispatch on SEK_TYPEDEF. */
+ Tok n = peek1(p);
+ int is_inner = 0;
+ if (is_punct(&n, '*')) {
+ is_inner = 1;
+ } else if (n.kind == TOK_IDENT && ident_kw(p, n.v.ident) == KW_NONE) {
+ is_inner = 1;
+ }
+ if (is_inner) {
+ has_inner_parens = 1;
+ advance(p); /* '(' */
+ /* Inner pointer prefix: each `*` (with optional qualifiers) records one
+ * wrap layer. We store qualifiers per layer so we can apply them in
+ * reverse order below. */
+ while (accept_punct(p, '*')) {
+ u16 q = 0;
+ if (nptrs_inner >= 8) perr(p, "too many pointer levels");
+ for (;;) {
+ if (accept_kw(p, KW_CONST)) { q |= Q_CONST; continue; }
+ if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; }
+ if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; }
+ if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; }
+ break;
+ }
+ inner_quals[nptrs_inner++] = q;
+ }
+ if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) {
+ name = p->cur.v.ident;
+ nloc = tok_loc(&p->cur);
+ advance(p);
+ } else if (!allow_abstract) {
+ perr(p, "expected declarator name");
+ }
+ expect_punct(p, ')', "')' after inner declarator");
+ }
}
- *name_out = p->cur.v.ident;
- *loc_out = tok_loc(&p->cur);
- advance(p);
+
+ if (!has_inner_parens) {
+ if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) {
+ name = p->cur.v.ident;
+ nloc = tok_loc(&p->cur);
+ advance(p);
+ } else if (!allow_abstract) {
+ perr(p, "expected declarator name");
+ }
+ }
+
+ /* Collect outer suffixes left-to-right; apply in reverse so the innermost
+ * suffix wraps `base` first. For `int a[5][3]` the resulting type is
+ * "array[5] of array[3] of int": [3] applied first → array[3], then [5]
+ * wraps that → array[5] of array[3]. */
+ DeclSuffix suffs[8];
+ int nsuffs = 0;
+ while (nsuffs < 8) {
+ if (!parse_decl_suffix(p, &suffs[nsuffs])) break;
+ ++nsuffs;
+ }
+ if (nsuffs == 8 && (is_punct(&p->cur, '[') || is_punct(&p->cur, '('))) {
+ perr(p, "too many declarator suffixes (raise the cap if needed)");
+ }
+ for (int i = nsuffs - 1; i >= 0; --i) {
+ base = apply_decl_suffix(p, base, &suffs[i]);
+ }
+
+ /* Apply inner pointer wraps. inner_quals[0] is the FIRST `*` consumed (the
+ * outermost in the chain `**fp` reads as "fp is ptr to ptr"); the LAST `*`
+ * is the one nearest the IDENT. Wrap from nearest-IDENT outward, so we
+ * iterate inner_quals in reverse. */
+ for (int i = (int)nptrs_inner - 1; i >= 0; --i) {
+ base = type_ptr(p->pool, base);
+ if (inner_quals[i]) {
+ base = type_qualified(p->pool, base, inner_quals[i]);
+ }
+ }
+
+ if (name_out) *name_out = name;
+ if (loc_out) *loc_out = nloc;
return base;
}
+/* Non-abstract entry point used by ordinary declarations. */
+static const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out,
+ SrcLoc* loc_out) {
+ return parse_declarator_full(p, base, /*allow_abstract=*/0, name_out, loc_out);
+}
+
+/* Push the lvalue of a sub-object at byte offset `offset` within the array
+ * local `slot` (whose type is `arr_ty`), with element type `elem_ty`. The
+ * value stack ends with an OPK_INDIRECT lvalue ready for cg_store. */
+static void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty,
+ u32 offset, const Type* elem_ty) {
+ cg_push_local_typed(p->cg, slot, arr_ty);
+ cg_addr(p->cg);
+ cg_retag_top(p->cg, type_ptr(p->pool, elem_ty));
+ if (offset > 0) {
+ cg_push_int(p->cg, (i64)offset, ty_size_t(p));
+ cg_binop(p->cg, BO_IADD);
+ }
+ cg_deref(p->cg, elem_ty);
+}
+
+/* Recursively zero-initialize the sub-object at `offset` of type `ty`. */
+static void zero_init_at(Parser* p, FrameSlot slot, const Type* arr_ty,
+ u32 offset, const Type* ty) {
+ if (ty->kind == TY_ARRAY) {
+ u32 esz = abi_sizeof(p->abi, ty->arr.elem);
+ for (u32 i = 0; i < ty->arr.count; ++i) {
+ zero_init_at(p, slot, arr_ty, offset + i * esz, ty->arr.elem);
+ }
+ return;
+ }
+ push_subobject_lv(p, slot, arr_ty, offset, ty);
+ cg_push_int(p->cg, 0, ty);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ cg_reset_scratch(p->cg);
+}
+
+/* Parse the initializer for the sub-object at `offset` of type `ty`. Arrays
+ * take a brace-enclosed list of element initializers (with optional
+ * zero-fill); scalars take an assignment-expression, optionally surrounded by
+ * a single `{...}` (the C syntax for brace-wrapping a scalar init). */
+static void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset,
+ const Type* ty) {
+ if (ty->kind == TY_ARRAY) {
+ expect_punct(p, '{', "'{' for array initializer");
+ const Type* elem_ty = ty->arr.elem;
+ u32 esz = abi_sizeof(p->abi, elem_ty);
+ u32 i = 0;
+ if (!is_punct(&p->cur, '}')) {
+ for (;;) {
+ if (i >= ty->arr.count) {
+ perr(p, "too many initializers for array");
+ }
+ init_at(p, slot, arr_ty, offset + i * esz, elem_ty);
+ ++i;
+ if (!accept_punct(p, ',')) break;
+ if (is_punct(&p->cur, '}')) break; /* trailing comma */
+ }
+ }
+ expect_punct(p, '}', "'}' after array initializer");
+ /* Zero-fill remaining elements per §6.7.9 ¶21. */
+ for (; i < ty->arr.count; ++i) {
+ zero_init_at(p, slot, arr_ty, offset + i * esz, elem_ty);
+ }
+ return;
+ }
+ /* Scalar (or struct, when Phase 3 lands). */
+ int had_brace = accept_punct(p, '{');
+ push_subobject_lv(p, slot, arr_ty, offset, ty);
+ parse_assign_expr(p);
+ to_rvalue(p);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ cg_reset_scratch(p->cg);
+ if (had_brace) {
+ accept_punct(p, ','); /* tolerate trailing comma inside `{ x, }` */
+ expect_punct(p, '}', "'}' after scalar initializer");
+ }
+}
+
/* Parse a single init-declarator after the decl-specs have been consumed.
- * v1 grammar: declarator = `*`* IDENT ; init = `=` assign_expr. */
+ * Grammar: declarator = (`*` qual*)* (IDENT | `(` declarator `)`) suffix*
+ * init = `=` (assign_expr | brace_init) */
static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
SrcLoc loc;
Sym name;
const Type* var_ty = parse_declarator(p, specs->type, &name, &loc);
- /* Local declaration only at this slice. */
+ /* VLA: the declarator type is `T[]` (incomplete array) with a pending
+ * runtime count. Bind `name` as `T*` (the pointer the alloca returns) so
+ * subscript/arithmetic on `a` lowers as on a pointer; `sizeof(a)` would
+ * need the count to be tracked separately, which is a Phase 9 follow-up. */
+ if (p->vla_pending && var_ty && var_ty->kind == TY_ARRAY) {
+ FrameSlot count_slot = p->vla_pending_count_slot;
+ const Type* elem_ty = var_ty->arr.elem;
+ const Type* ptr_ty = type_ptr(p->pool, elem_ty);
+ FrameSlot ptr_slot = make_local(p, name, ptr_ty, loc);
+ u32 esz = abi_sizeof(p->abi, elem_ty);
+ p->vla_pending = 0;
+ p->vla_pending_count_slot = FRAME_SLOT_NONE;
+ cg_set_loc(p->cg, loc);
+ cg_push_local_typed(p->cg, count_slot, ty_size_t(p));
+ to_rvalue(p);
+ if (esz != 1) {
+ cg_push_int(p->cg, (i64)esz, ty_size_t(p));
+ cg_binop(p->cg, BO_IMUL);
+ }
+ cg_alloca(p->cg);
+ cg_push_local_typed(p->cg, ptr_slot, ptr_ty);
+ cg_swap(p->cg);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ cg_reset_scratch(p->cg);
+ if (accept_punct(p, '=')) {
+ perr(p, "VLA initializers are not allowed (§6.7.9 ¶3)");
+ }
+ return;
+ }
+ /* Non-VLA local. */
{
FrameSlot s = make_local(p, name, var_ty, loc);
if (accept_punct(p, '=')) {
cg_set_loc(p->cg, loc);
- cg_push_local_typed(p->cg, s, var_ty);
- parse_assign_expr(p);
- to_rvalue(p);
- cg_store(p->cg);
- /* cg_store leaves the assigned value on the stack (C semantics);
- * an init-declarator is statement-context, so drop it. */
- cg_drop(p->cg);
+ if (var_ty->kind == TY_ARRAY) {
+ /* Brace initializer (or string literal — Phase 6). */
+ init_at(p, s, var_ty, 0, var_ty);
+ } else {
+ cg_push_local_typed(p->cg, s, var_ty);
+ parse_assign_expr(p);
+ to_rvalue(p);
+ cg_store(p->cg);
+ /* cg_store leaves the assigned value on the stack (C semantics);
+ * an init-declarator is statement-context, so drop it. */
+ cg_drop(p->cg);
+ }
}
}
}
@@ -1660,14 +2142,6 @@ static void parse_stmt(Parser* p) {
* External (top-level) declarations
* ============================================================ */
-/* Helper: holds one parsed parameter's name + type (for binding into the
- * function-body scope after cg_func_begin / cg_param). */
-typedef struct ParamInfo {
- Sym name;
- const Type* type;
- SrcLoc loc;
-} ParamInfo;
-
/* Parse a parameter-type-list. Returns the parameter type array and counts
* via out-pointers; `*variadic_out` is set if the list ends in `, ...`.
*
@@ -1676,9 +2150,9 @@ typedef struct ParamInfo {
* `()` — old-style "unspecified args"; treated as zero
* `(T1, T2, ...)` — named or abstract params, possibly trailing ellipsis
*
- * For each named param we record name+type so the function-body parser can
- * later bind them into the param scope. Abstract (no-name) params are
- * allowed for prototype-only declarations. */
+ * Per §6.7.6.3, a parameter declared as `T x[N]` is rewritten to `T *x` (and
+ * `T x()` to `T (*x)()`); the §6.7.6.3 ¶7 "[static N]" form is a hint to the
+ * caller that the pointer points at ≥N elements — semantically still `T*`. */
static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out,
u8* variadic_out) {
ParamInfo* infos;
@@ -1712,13 +2186,13 @@ static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out,
if (!parse_decl_specs(p, &specs)) {
perr(p, "expected parameter type");
}
- /* Allow either named (`int x`) or abstract (`int`) declarators. We
- * peek the pointer prefix, then if an IDENT follows it's named. */
- pty = parse_pointer_layer(p, specs.type);
- if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) {
- pname = p->cur.v.ident;
- ploc = tok_loc(&p->cur);
- advance(p);
+ pty = parse_declarator_full(p, specs.type, /*allow_abstract=*/1, &pname,
+ &ploc);
+ /* Adjust array/function parameter to pointer per §6.7.6.3. */
+ if (pty && pty->kind == TY_ARRAY) {
+ pty = type_ptr(p->pool, pty->arr.elem);
+ } else if (pty && pty->kind == TY_FUNC) {
+ pty = type_ptr(p->pool, pty);
}
if (n == cap) {
cap *= 2;
diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md
@@ -97,8 +97,8 @@ explicit cast; rows here fill in the rest of the conversion matrix.
| `6_3_1_4_01_float_to_int` | · | `double d = 42.9; return (int)d;` | 42 |
| `6_3_1_4_02_int_to_float` | · | `int n = 42; double d = n; return (int)d;` | 42 |
| `6_3_1_8_01_usual_arith_mixed` | ★ | `int s = -1; unsigned u = 1; return (s + u) ? 0 : 42;` | 42 |
-| `6_3_2_1_01_array_to_ptr` | · | `int a[3] = {0,0,42}; int *p = a; return p[2];` | 42 |
-| `6_3_2_1_02_func_to_ptr` | · | helper `id`; `int (*fp)(int) = id; return fp(42);` | 42 |
+| `6_3_2_1_01_array_to_ptr` | ★ | `int a[3] = {0,0,42}; int *p = a; return p[2];` | 42 |
+| `6_3_2_1_02_func_to_ptr` | ★ | helper `id`; `int (*fp)(int) = id; return fp(42);` | 42 |
| `6_3_2_2_01_void_cast_discard` | ★ | `(void)42; return 42;` | 42 |
| `6_3_2_3_01_null_ptr_cmp` | ★ | `int *p = 0; return p ? 99 : 42;` | 42 |
| `6_3_2_3_02_void_ptr_roundtrip` | ★ | `int x=42; void *v=&x; int *p=(int*)v; return *p;` | 42 |
@@ -132,7 +132,7 @@ here for completeness once they're real cases.
| `6_5_19_post_inc` | ★ | `int x = 42; x++; return x;` | 43; reads as 43 |
| `6_5_20_addr_deref` | ★ | `int x = 42; int *p = &x; return *p;` | 42 |
| `6_5_21_sizeof_int` | ★ | `return (int)sizeof(int);` | 4 |
-| `6_5_22_sizeof_expr` | · | `int a[7]; return (int)(sizeof(a)/sizeof(int));` | 7 |
+| `6_5_22_sizeof_expr` | ★ | `int a[7]; return (int)(sizeof(a)/sizeof(int));` | 7 |
| `6_5_23_cast` | ★ | `return (int)(unsigned char)(-1);` | 255 |
| `6_5_24_func_call` | ★ | helper `int id(int x){return x;}` + `return id(42);` | 42 |
| `6_5_25_unary_plus` | ★ | `return +42;` | 42 |
@@ -141,8 +141,8 @@ here for completeness once they're real cases.
| `6_5_28_arrow` | · | `struct S{int v;} s={42}; struct S *p=&s; return p->v;` | 42 |
| `6_5_29_compound_literal` | · | `int *p = (int[]){10, 32}; return p[0]+p[1];` | 42 |
| `6_5_30_generic_selection`| · | `int x=42; return _Generic((x), int: x, default: 0);` | 42 |
-| `6_5_31_subscript_commute`| · | `int a[5]={0,0,42,0,0}; return 2[a];` | 42 |
-| `6_5_32_string_subscript` | · | `return "*"[0];` | 42 |
+| `6_5_31_subscript_commute`| ★ | `int a[5]={0,0,42,0,0}; return 2[a];` | 42 |
+| `6_5_32_string_subscript` | ★ | `return "*"[0];` | 42 |
## §6.6 Constant expressions
@@ -238,11 +238,11 @@ already exercised in §6.5 and §6.7.
|---|---|---|---|
| `6_7_6_01_ptr_to_ptr` | ★ | `int x=42; int *p=&x; int **pp=&p; return **pp;` | 42 |
| `6_7_6_02_array_2d` | · | `int a[2][3]={{0,0,0},{0,0,42}}; return a[1][2];` | 42 |
-| `6_7_6_03_array_of_ptr` | · | `int x=42; int *a[2]={0,&x}; return *a[1];` | 42 |
-| `6_7_6_04_funcptr_decl` | · | `int id(int x){return x;} int (*fp)(int)=id; return fp(42);` | 42 |
-| `6_7_6_05_funcptr_returning_ptr` | · | helper returns `int*`; `int *(*fp)(int*)=...; return *fp(&x);` | 42 |
-| `6_7_6_06_array_static_n` | · | helper `int rd(int p[static 3]){return p[2];}`; `int a[3]={0,0,42}; return rd(a);` | 42 |
-| `6_7_6_07_vla_local` | · | `int n=7; int a[n]; for(int i=0;i<n;i++) a[i]=i*7; return a[n-1];` | 42 |
+| `6_7_6_03_array_of_ptr` | ★ | `int x=42; int *a[2]={0,&x}; return *a[1];` | 42 |
+| `6_7_6_04_funcptr_decl` | ★ | `int id(int x){return x;} int (*fp)(int)=id; return fp(42);` | 42 |
+| `6_7_6_05_funcptr_returning_ptr` | ★ | helper returns `int*`; `int *(*fp)(int*)=...; return *fp(&x);` | 42 |
+| `6_7_6_06_array_static_n` | ★ | helper `int rd(int p[static 3]){return p[2];}`; `int a[3]={0,0,42}; return rd(a);` | 42 |
+| `6_7_6_07_vla_local` | ★ | `int n=7; int a[n]; for(int i=0;i<n;i++) a[i]=i*7; return a[n-1];` | 42 |
| `6_7_6_08_variadic_decl` | · | helper `int sum(int n, ...)` summing two ints; `sum(2, 20, 22)` | 42 |
## §6.7.8 Type definitions