kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 84bd8059c53fd1cf3c3612683e0d400d36aa9b42
parent d7882f0ce3836f3d71de3ba9bca652a7fc28a161
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 10 May 2026 06:03:03 -0700

parse: Phase 2 — pointer/array decls, decay, subscript, brace init, VLA

Recursive declarator with array/function suffixes and one level of
nested parens (`int (*fp)(int)`); array→pointer and function→pointer
decay in to_rvalue; subscript a[i] and the commutative i[a] in postfix;
pointer arithmetic in +/-; brace initializer for local arrays with
nested zero-fill; `[static N]` parameter form; VLA local lowered via
cg_alloca (cg.c stub wired through to the existing aarch64 backend).

Flips 10 corpus rows · → ★ (6_3_2_1_01–02, 6_5_22, 6_5_31–32,
6_7_6_03–07). 6_7_6_02_array_2d still fails — nested brace init runs
the aarch64 scratch pool dry; needs the spill path. 6_7_6_08 is
Phase 9 (variadics).

Diffstat:
Mdoc/parser-status.md | 39+++++++++++++++++++++++++++------------
Msrc/cg/cg.c | 32+++++++++++++++++++++++++++++++-
Msrc/parse/parse.c | 588+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mtest/parse/CORPUS.md | 20++++++++++----------
4 files changed, 599 insertions(+), 80 deletions(-)

diff --git a/doc/parser-status.md b/doc/parser-status.md @@ -66,22 +66,37 @@ Unlocks (status as landed): `6_5_12–14` ★, `6_5_20–21` ★, `6_5_22` · --- -## Phase 2 — Pointers & arrays ⬜ +## Phase 2 — Pointers & arrays ✅ Pointer/array declarator layers and the address operators. Builds on Phase 1's type-name production. -- [ ] Pointer declarator (`int *p`, `int **pp`) -- [ ] Array declarator (`int a[N]`, `int a[]`) -- [ ] Subscript `a[i]` (and the commutative `i[a]`) -- [ ] Pointer arithmetic in `+`/`-` (scaled by element size) -- [ ] Array-to-pointer decay -- [ ] Function-to-pointer decay + indirect call (`(*fp)(x)`) -- [ ] `int *const p` / `const int *p` qualifier placement -- [ ] `[static N]` parameter form -- [ ] VLA local (`int a[n]`) - -Unlocks: `6_3_2_1_*`, `6_5_28–32`, `6_7_3_03–04`, `6_7_6_01–07`. +- [x] Pointer declarator (`int *p`, `int **pp`) +- [x] Array declarator (`int a[N]`, `int a[]`) +- [x] Subscript `a[i]` (and the commutative `i[a]`) +- [x] Pointer arithmetic in `+`/`-` (scaled by element size) +- [x] Array-to-pointer decay +- [x] Function-to-pointer decay + indirect call (`(*fp)(x)`) +- [x] `int *const p` / `const int *p` qualifier placement +- [x] `[static N]` parameter form +- [x] VLA local (`int a[n]`) — lowered via `cg_alloca`; the variable binds + as a pointer-to-element rather than as a true array, so subscript + and pointer arith just work but `sizeof(vla)` is not yet preserved + (Phase 9 follow-up alongside `__builtin_alloca`). + +Phase 2 also taught `parse_decl_suffix` to parse function and array +declarator suffixes recursively, taught `to_rvalue` to do array→pointer +and function→pointer decay automatically, and added a real recursive +declarator that handles one level of nested parens (`int (*fp)(int)`). +The `cg_alloca` stub in `cg.c` was wired through to `CGTarget.alloca_` +since the aarch64 backend already implements it. + +Unlocks (status as landed): `6_3_2_1_01–02` ★, `6_5_22` ★, `6_5_31–32` ★, +`6_7_6_03–07` ★, `6_7_6_02` · (regalloc pressure under nested brace init — +needs the spill path that's also blocking other future cases). `6_5_28–30` +were misattributed in the original phase plan: `6_5_28_arrow` and +`6_5_30_generic_selection` need struct/`_Generic` (Phase 3), +`6_5_29_compound_literal` needs Phase 6's brace-init machinery. --- diff --git a/src/cg/cg.c b/src/cg/cg.c @@ -378,6 +378,15 @@ const Type* cg_top_type(CG* g) { return g->stack[g->sp - 1].type; } +/* Type of the second-from-top SValue. Used by the parser when both operands + * of a binary operator are already on the stack and it needs to pick a + * pointer-arithmetic vs. integer-arithmetic lowering. */ +const Type* cg_top2_type(CG* g); +const Type* cg_top2_type(CG* g) { + if (g->sp < 2) return NULL; + return g->stack[g->sp - 2].type; +} + /* Replace the type tag on the top SValue without emitting code. Used by * the parser for casts that are no-ops at the value level (e.g. pointer- * to-pointer of the same width); the underlying register/operand stays @@ -854,7 +863,28 @@ void cg_ret(CG* g, int has_value) { * ============================================================ */ void cg_alloca(CG* g) { - compiler_panic(g->c, g->cur_loc, "cg_alloca: not in v1 slice"); + /* Pop the size (i64 imm or reg), call CGTarget.alloca_, push the resulting + * void* aligned to max_align_t. The 16-byte alignment is the AAPCS64 + * max_align_t; cg trusts the backend to honor it (aa_alloca_ rounds the + * size up to a 16-byte multiple, which is what keeps SP aligned). */ + CGTarget* T = g->target; + SValue sz = pop(g); + Operand sz_op; + const Type* void_ptr = type_ptr(g->pool, type_void(g->pool)); + Reg dst_r; + Operand dst; + SValue out; + if (sz.op.kind == OPK_IMM) { + sz_op = sz.op; + } else { + sz_op = force_reg(g, sz, sz.type ? sz.type : sz.op.type); + } + dst_r = T->alloc_reg(T, RC_INT, void_ptr); + dst = op_reg(dst_r, void_ptr); + T->alloca_(T, dst, sz_op, /*align=*/16); + out.op = dst; + out.type = void_ptr; + push(g, out); } void cg_va_start_(CG* g) { compiler_panic(g->c, g->cur_loc, "cg_va_start: not in v1 slice"); diff --git a/src/parse/parse.c b/src/parse/parse.c @@ -44,6 +44,9 @@ extern void cg_push_local_typed(CG*, FrameSlot, const Type*); extern void cg_deref(CG*, const Type* pointee); /* Read SValue.type at top of stack without popping. */ extern const Type* cg_top_type(CG*); +/* Read SValue.type at second-from-top; used for pointer-arith dispatch when + * both operands are already on the stack. */ +extern const Type* cg_top2_type(CG*); /* Replace the type tag on the top SValue without emitting code (used for * pointer-to-pointer casts which are no-ops at the value level). */ extern void cg_retag_top(CG*, const Type*); @@ -182,6 +185,14 @@ typedef struct Parser { /* Loop/switch context for break/continue. CGLabel 0 means none. */ CGLabel cur_break; CGLabel cur_continue; + + /* VLA bookkeeping. parse_decl_suffix emits the size-expression code at + * suffix-parse time (because the tokens are about to vanish) and stashes + * the i64 count in `vla_pending_count_slot`; parse_init_declarator picks + * it up to drive cg_alloca. v1 supports only one VLA dimension per + * declarator; nested cases panic in apply_decl_suffix. */ + u8 vla_pending; + FrameSlot vla_pending_count_slot; } Parser; /* ============================================================ @@ -487,19 +498,21 @@ static int starts_type_name(const Parser* p, const Tok* t) { } /* Walk a `*` chain at the front of a declarator (and optional qualifiers - * after each `*`), wrapping `base` in successive pointer types. Returns - * the innermost type the IDENT/declarator-tail refers to. */ + * after each `*`), wrapping `base` in successive pointer types. Qualifiers + * after a `*` qualify the pointer just produced (`int *const p` → p is a + * const-qualified pointer to int). */ static const Type* parse_pointer_layer(Parser* p, const Type* base) { while (accept_punct(p, '*')) { + u16 q = 0; base = type_ptr(p->pool, base); - /* Optional qualifiers after `*`; recognized and ignored at this slice. */ for (;;) { - if (accept_kw(p, KW_CONST) || accept_kw(p, KW_VOLATILE) || - accept_kw(p, KW_RESTRICT) || accept_kw(p, KW_ATOMIC)) { - continue; - } + if (accept_kw(p, KW_CONST)) { q |= Q_CONST; continue; } + if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; } + if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; } + if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; } break; } + if (q) base = type_qualified(p->pool, base, q); } return base; } @@ -592,11 +605,27 @@ static void parse_assign_expr(Parser* p); static void parse_unary(Parser* p); static void parse_postfix(Parser* p); -/* Produce an rvalue on the stack: pop, and if it's an lvalue, load. */ +/* Produce an rvalue on the stack. Three cases beyond the trivial scalar: + * - array lvalue: §6.3.2.1 array-to-pointer decay → take address, retag the + * resulting `T(*)[N]` as `T*` so subsequent ops see a pointer. + * - function lvalue: §6.3.2.1 function-to-pointer decay → take address; the + * type becomes `T(*)()` automatically because cg_addr wraps the operand + * type in TY_PTR. + * - scalar lvalue (LOCAL/GLOBAL/INDIRECT): cg_load. Idempotent on rvalues. */ static void to_rvalue(Parser* p) { + const Type* t = cg_top_type(p->cg); + if (t) { + if (t->kind == TY_ARRAY) { + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, t->arr.elem)); + return; + } + if (t->kind == TY_FUNC) { + cg_addr(p->cg); + return; + } + } cg_load(p->cg); - /* cg_load is idempotent on rvalues. */ - (void)p; } /* Decode one character (the first encoded code unit) from the token's @@ -850,11 +879,21 @@ static void parse_postfix(Parser* p) { continue; } if (is_punct(&t, '(')) { - /* Function call. The callee was pushed by parse_primary as an - * lvalue (OPK_GLOBAL for SEK_FUNC); cg_call accepts that directly - * for direct calls. */ - const Type* fn_type = cg_top_type(p->cg); - if (!fn_type || fn_type->kind != TY_FUNC) { + /* Function call. The callee was pushed by parse_primary as a function + * lvalue (OPK_GLOBAL when SEK_FUNC); a function-pointer callee is also + * accepted (TY_PTR-to-TY_FUNC) — load it to a register and indirect. */ + const Type* top = cg_top_type(p->cg); + const Type* fn_type; + if (top && top->kind == TY_FUNC) { + fn_type = top; + } else if (top && top->kind == TY_PTR && top->ptr.pointee && + top->ptr.pointee->kind == TY_FUNC) { + fn_type = top->ptr.pointee; + /* Materialize the pointer rvalue (cg_call's force_reg fallback would + * also do this, but doing it here keeps the invariant that the value + * stack settles to a register before argument evaluation starts). */ + cg_load(p->cg); + } else { perr(p, "called object is not a function"); } advance(p); /* '(' */ @@ -886,8 +925,58 @@ static void parse_postfix(Parser* p) { } continue; } - if (is_punct(&t, '[') || is_punct(&t, '.') || is_punct(&t, P_ARROW)) { - perr(p, "subscript/member access not supported in v1 slice"); + if (is_punct(&t, '[')) { + /* Subscript `e1[e2]` is `*((e1) + (e2))` per §6.5.2.1. We resolve the + * pointer side after parsing the index so the commutative `i[a]` form + * (where the bracketed side is the pointer/array) falls out naturally. */ + const Type* lt0 = cg_top_type(p->cg); + advance(p); /* '[' */ + /* If the left operand is an array/pointer, decay/load to get a pointer + * rvalue. Integer base is left alone — we'll commute below if needed. */ + if (lt0 && lt0->kind == TY_ARRAY) { + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, lt0->arr.elem)); + } else if (lt0 && lt0->kind == TY_PTR) { + cg_load(p->cg); + } + parse_expr(p); + /* Decay/load the index side similarly. */ + { + const Type* it0 = cg_top_type(p->cg); + if (it0 && it0->kind == TY_ARRAY) { + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, it0->arr.elem)); + } else { + to_rvalue(p); + } + } + expect_punct(p, ']', "']' after subscript"); + { + const Type* lt = cg_top2_type(p->cg); + const Type* it = cg_top_type(p->cg); + const Type* elem; + if (lt && lt->kind == TY_PTR && type_is_int(it)) { + elem = lt->ptr.pointee; + } else if (it && it->kind == TY_PTR && type_is_int(lt)) { + /* Commute so the pointer is on the bottom for the add below. */ + cg_swap(p->cg); + elem = it->ptr.pointee; + } else { + perr(p, "invalid subscript: needs one pointer and one integer"); + } + if (!elem) perr(p, "subscript on incomplete pointee"); + u32 esz = abi_sizeof(p->abi, elem); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_binop(p->cg, BO_IADD); + cg_deref(p->cg, elem); + } + continue; + } + if (is_punct(&t, '.') || is_punct(&t, P_ARROW)) { + perr(p, "member access not supported in v1 slice"); } break; } @@ -1078,6 +1167,61 @@ static void parse_mul(Parser* p) { } } +/* Apply C pointer arithmetic to the top two values on the stack: + * ptr + int → ptr + int * sizeof(*ptr) + * int + ptr → ptr + int * sizeof(*ptr) (commute, then scale) + * ptr - int → ptr - int * sizeof(*ptr) + * ptr - ptr → (ptr - ptr) / sizeof(*ptr) (ptrdiff_t result) + * int +/- int → integer add/sub + * Pops both operands and pushes the result. */ +static void emit_add_or_sub(Parser* p, BinOp bop) { + const Type* lt = cg_top2_type(p->cg); + const Type* rt = cg_top_type(p->cg); + int l_is_ptr = lt && lt->kind == TY_PTR; + int r_is_ptr = rt && rt->kind == TY_PTR; + if (bop == BO_IADD) { + if (l_is_ptr && type_is_int(rt)) { + u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_binop(p->cg, BO_IADD); + return; + } + if (r_is_ptr && type_is_int(lt)) { + cg_swap(p->cg); + u32 esz = abi_sizeof(p->abi, rt->ptr.pointee); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_binop(p->cg, BO_IADD); + return; + } + } else { /* BO_ISUB */ + if (l_is_ptr && type_is_int(rt)) { + u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_binop(p->cg, BO_ISUB); + return; + } + if (l_is_ptr && r_is_ptr) { + u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); + cg_binop(p->cg, BO_ISUB); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_SDIV); + } + return; + } + } + cg_binop(p->cg, bop); +} + static void parse_add(Parser* p) { parse_mul(p); for (;;) { @@ -1094,7 +1238,7 @@ static void parse_add(Parser* p) { to_rvalue(p); parse_mul(p); to_rvalue(p); - cg_binop(p->cg, bop); + emit_add_or_sub(p, bop); } } @@ -1406,41 +1550,379 @@ static FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc) { return s; } -/* Parse a non-abstract declarator: optional `*` pointer prefix followed - * by an IDENT. v1 doesn't yet implement function or array declarators, - * which slot in around the IDENT in subsequent phases. Returns the - * declared type (with pointer layers wrapping `base`) and writes the - * IDENT to *name_out / *loc_out. */ -static const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out, - SrcLoc* loc_out) { +/* Forward decls for declarator components. */ +typedef enum DSuffKind { DS_ARRAY, DS_FUNC } DSuffKind; +typedef struct ParamInfo ParamInfo; +typedef struct DeclSuffix { + u8 kind; /* DSuffKind */ + /* DS_ARRAY */ + u32 count; /* element count; meaningful when !vla and !incomplete */ + u8 incomplete; /* true for `[]` (no size given) */ + u8 vla; /* true for `[expr]` with a non-constant size */ + /* When `vla` is set, the size expression has already been emitted and the + * resulting i64 (in bytes-of-elem-count) is held in this scratch slot. + * Materialized at suffix-parse time because the size expression's tokens + * are consumed there; init_declarator reads it back to drive cg_alloca. */ + FrameSlot vla_count_slot; + /* DS_FUNC */ + ParamInfo* params; + u16 nparams; + u8 variadic; +} DeclSuffix; + +typedef struct ParamInfo { + Sym name; + const Type* type; + SrcLoc loc; +} ParamInfo; + +static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, + u8* variadic_out); + +/* Parse a single trailing suffix (`[...]` or `(...)`) after a declarator's + * IDENT or parenthesized inner-declarator. Returns 1 if a suffix was consumed + * and filled into *out, 0 otherwise. */ +static int parse_decl_suffix(Parser* p, DeclSuffix* out) { + if (accept_punct(p, '[')) { + /* `[ qualifier* static? assignment-expression? ]` or `[ static qualifier* assign-expr ]`. + * Only constant integer expressions are accepted at this slice; non-constant + * sizes are VLA territory (Phase 9). */ + out->kind = DS_ARRAY; + out->count = 0; + out->incomplete = 0; + /* Optional `static`/qualifiers before the size; recognized, no-op here. + * `[static N]` only changes parameter ABI hints (caller promises ≥N). */ + for (;;) { + if (accept_kw(p, KW_STATIC) || accept_kw(p, KW_CONST) || + accept_kw(p, KW_VOLATILE) || accept_kw(p, KW_RESTRICT) || + accept_kw(p, KW_ATOMIC)) { + continue; + } + break; + } + if (accept_punct(p, ']')) { + out->incomplete = 1; + return 1; + } + /* Constant integer size? A bare TOK_NUM is the entire spine corpus's + * idiom; a non-constant size kicks the suffix into VLA mode (Phase 2, + * §6.7.6.2 ¶4). Full constant-expression evaluation is a future cross- + * cutting concern; for now anything but TOK_NUM goes through alloca. */ + { + Tok t = p->cur; + if (t.kind == TOK_NUM) { + i64 v = parse_int_literal(p, &t); + if (v < 0) perr(p, "negative array size"); + out->count = (u32)v; + advance(p); + } else { + /* VLA: emit the size-expression code now (the tokens go away after + * we return), spill its int value to a fresh i64 frame slot so + * init_declarator can pick it back up at the right time. */ + FrameSlotDesc fsd; + if (p->vla_pending) { + perr(p, "v1 supports only one VLA dimension per declarator"); + } + out->vla = 1; + memset(&fsd, 0, sizeof fsd); + fsd.type = ty_size_t(p); + fsd.size = abi_sizeof(p->abi, fsd.type); + fsd.align = abi_alignof(p->abi, fsd.type); + fsd.kind = FS_LOCAL; + out->vla_count_slot = cg_local(p->cg, &fsd); + parse_assign_expr(p); + to_rvalue(p); + cg_push_local_typed(p->cg, out->vla_count_slot, fsd.type); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + cg_reset_scratch(p->cg); + p->vla_pending = 1; + p->vla_pending_count_slot = out->vla_count_slot; + } + } + expect_punct(p, ']', "']' after array size"); + return 1; + } + if (accept_punct(p, '(')) { + out->kind = DS_FUNC; + out->params = NULL; + out->nparams = 0; + out->variadic = 0; + parse_param_list(p, &out->params, &out->nparams, &out->variadic); + expect_punct(p, ')', "')' after parameter list"); + return 1; + } + return 0; +} + +/* Wrap `base` with a single suffix's transform. Used when materializing the + * declarator type from the collected suffix list. */ +static const Type* apply_decl_suffix(Parser* p, const Type* base, + const DeclSuffix* s) { + if (s->kind == DS_ARRAY) { + /* VLA: count is runtime; record an incomplete array type so the type + * system carries the elem-type but the size is treated as unknown. + * init_declarator notices the parser-side `vla_pending` flag and emits + * the alloca + bind. */ + return type_array(p->pool, base, s->count, s->incomplete || s->vla); + } + /* DS_FUNC */ + { + const Type** ptypes = NULL; + if (s->nparams) { + ptypes = (const Type**)arena_array(p->c->tu, const Type*, s->nparams); + for (u16 i = 0; i < s->nparams; ++i) ptypes[i] = s->params[i].type; + } + return type_func(p->pool, base, ptypes, s->nparams, (int)s->variadic); + } +} + +/* Parse a (possibly abstract) declarator. Supports: + * pointer-prefix? ( IDENT | '(' inner-declarator ')' ) suffix* + * where suffix is `[N]` or `(params)`. The inner declarator handles one level + * of nesting (e.g. `int (*fp)(int)`). Multiple nested parens would recurse + * naturally — for Phase 2 a single level covers all corpus cases. + * + * If `allow_abstract` is true, the IDENT may be absent (used by parameters). + * On success returns the declared type and writes *name_out (=0 if abstract). */ +static const Type* parse_declarator_full(Parser* p, const Type* base, + int allow_abstract, Sym* name_out, + SrcLoc* loc_out) { + /* Outer pointer prefix wraps `base` as we go. */ base = parse_pointer_layer(p, base); - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected declarator name"); + + /* Inner declarator: collect inner pointer prefix (innermost-first array) + * to wrap LATER (after we know the suffix-applied base). */ + Sym name = 0; + SrcLoc nloc = {0, 0, 0}; + u8 nptrs_inner = 0; + u16 inner_quals[8]; + int has_inner_parens = 0; + + if (is_punct(&p->cur, '(')) { + /* Disambiguate `(declarator)` vs. function suffix `(params)`. The token + * after `(` decides: + * `*` → inner-declarator pointer prefix + * IDENT (non-kw) → inner-declarator IDENT + * IDENT (type kw) → function suffix (parameters) + * `)` → function suffix `()` (unspecified args) + * Phase 2 doesn't have typedef-names; once they land, the IDENT branch + * also needs to dispatch on SEK_TYPEDEF. */ + Tok n = peek1(p); + int is_inner = 0; + if (is_punct(&n, '*')) { + is_inner = 1; + } else if (n.kind == TOK_IDENT && ident_kw(p, n.v.ident) == KW_NONE) { + is_inner = 1; + } + if (is_inner) { + has_inner_parens = 1; + advance(p); /* '(' */ + /* Inner pointer prefix: each `*` (with optional qualifiers) records one + * wrap layer. We store qualifiers per layer so we can apply them in + * reverse order below. */ + while (accept_punct(p, '*')) { + u16 q = 0; + if (nptrs_inner >= 8) perr(p, "too many pointer levels"); + for (;;) { + if (accept_kw(p, KW_CONST)) { q |= Q_CONST; continue; } + if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; } + if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; } + if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; } + break; + } + inner_quals[nptrs_inner++] = q; + } + if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { + name = p->cur.v.ident; + nloc = tok_loc(&p->cur); + advance(p); + } else if (!allow_abstract) { + perr(p, "expected declarator name"); + } + expect_punct(p, ')', "')' after inner declarator"); + } } - *name_out = p->cur.v.ident; - *loc_out = tok_loc(&p->cur); - advance(p); + + if (!has_inner_parens) { + if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { + name = p->cur.v.ident; + nloc = tok_loc(&p->cur); + advance(p); + } else if (!allow_abstract) { + perr(p, "expected declarator name"); + } + } + + /* Collect outer suffixes left-to-right; apply in reverse so the innermost + * suffix wraps `base` first. For `int a[5][3]` the resulting type is + * "array[5] of array[3] of int": [3] applied first → array[3], then [5] + * wraps that → array[5] of array[3]. */ + DeclSuffix suffs[8]; + int nsuffs = 0; + while (nsuffs < 8) { + if (!parse_decl_suffix(p, &suffs[nsuffs])) break; + ++nsuffs; + } + if (nsuffs == 8 && (is_punct(&p->cur, '[') || is_punct(&p->cur, '('))) { + perr(p, "too many declarator suffixes (raise the cap if needed)"); + } + for (int i = nsuffs - 1; i >= 0; --i) { + base = apply_decl_suffix(p, base, &suffs[i]); + } + + /* Apply inner pointer wraps. inner_quals[0] is the FIRST `*` consumed (the + * outermost in the chain `**fp` reads as "fp is ptr to ptr"); the LAST `*` + * is the one nearest the IDENT. Wrap from nearest-IDENT outward, so we + * iterate inner_quals in reverse. */ + for (int i = (int)nptrs_inner - 1; i >= 0; --i) { + base = type_ptr(p->pool, base); + if (inner_quals[i]) { + base = type_qualified(p->pool, base, inner_quals[i]); + } + } + + if (name_out) *name_out = name; + if (loc_out) *loc_out = nloc; return base; } +/* Non-abstract entry point used by ordinary declarations. */ +static const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out, + SrcLoc* loc_out) { + return parse_declarator_full(p, base, /*allow_abstract=*/0, name_out, loc_out); +} + +/* Push the lvalue of a sub-object at byte offset `offset` within the array + * local `slot` (whose type is `arr_ty`), with element type `elem_ty`. The + * value stack ends with an OPK_INDIRECT lvalue ready for cg_store. */ +static void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* elem_ty) { + cg_push_local_typed(p->cg, slot, arr_ty); + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, elem_ty)); + if (offset > 0) { + cg_push_int(p->cg, (i64)offset, ty_size_t(p)); + cg_binop(p->cg, BO_IADD); + } + cg_deref(p->cg, elem_ty); +} + +/* Recursively zero-initialize the sub-object at `offset` of type `ty`. */ +static void zero_init_at(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* ty) { + if (ty->kind == TY_ARRAY) { + u32 esz = abi_sizeof(p->abi, ty->arr.elem); + for (u32 i = 0; i < ty->arr.count; ++i) { + zero_init_at(p, slot, arr_ty, offset + i * esz, ty->arr.elem); + } + return; + } + push_subobject_lv(p, slot, arr_ty, offset, ty); + cg_push_int(p->cg, 0, ty); + cg_store(p->cg); + cg_drop(p->cg); + cg_reset_scratch(p->cg); +} + +/* Parse the initializer for the sub-object at `offset` of type `ty`. Arrays + * take a brace-enclosed list of element initializers (with optional + * zero-fill); scalars take an assignment-expression, optionally surrounded by + * a single `{...}` (the C syntax for brace-wrapping a scalar init). */ +static void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, + const Type* ty) { + if (ty->kind == TY_ARRAY) { + expect_punct(p, '{', "'{' for array initializer"); + const Type* elem_ty = ty->arr.elem; + u32 esz = abi_sizeof(p->abi, elem_ty); + u32 i = 0; + if (!is_punct(&p->cur, '}')) { + for (;;) { + if (i >= ty->arr.count) { + perr(p, "too many initializers for array"); + } + init_at(p, slot, arr_ty, offset + i * esz, elem_ty); + ++i; + if (!accept_punct(p, ',')) break; + if (is_punct(&p->cur, '}')) break; /* trailing comma */ + } + } + expect_punct(p, '}', "'}' after array initializer"); + /* Zero-fill remaining elements per §6.7.9 ¶21. */ + for (; i < ty->arr.count; ++i) { + zero_init_at(p, slot, arr_ty, offset + i * esz, elem_ty); + } + return; + } + /* Scalar (or struct, when Phase 3 lands). */ + int had_brace = accept_punct(p, '{'); + push_subobject_lv(p, slot, arr_ty, offset, ty); + parse_assign_expr(p); + to_rvalue(p); + cg_store(p->cg); + cg_drop(p->cg); + cg_reset_scratch(p->cg); + if (had_brace) { + accept_punct(p, ','); /* tolerate trailing comma inside `{ x, }` */ + expect_punct(p, '}', "'}' after scalar initializer"); + } +} + /* Parse a single init-declarator after the decl-specs have been consumed. - * v1 grammar: declarator = `*`* IDENT ; init = `=` assign_expr. */ + * Grammar: declarator = (`*` qual*)* (IDENT | `(` declarator `)`) suffix* + * init = `=` (assign_expr | brace_init) */ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { SrcLoc loc; Sym name; const Type* var_ty = parse_declarator(p, specs->type, &name, &loc); - /* Local declaration only at this slice. */ + /* VLA: the declarator type is `T[]` (incomplete array) with a pending + * runtime count. Bind `name` as `T*` (the pointer the alloca returns) so + * subscript/arithmetic on `a` lowers as on a pointer; `sizeof(a)` would + * need the count to be tracked separately, which is a Phase 9 follow-up. */ + if (p->vla_pending && var_ty && var_ty->kind == TY_ARRAY) { + FrameSlot count_slot = p->vla_pending_count_slot; + const Type* elem_ty = var_ty->arr.elem; + const Type* ptr_ty = type_ptr(p->pool, elem_ty); + FrameSlot ptr_slot = make_local(p, name, ptr_ty, loc); + u32 esz = abi_sizeof(p->abi, elem_ty); + p->vla_pending = 0; + p->vla_pending_count_slot = FRAME_SLOT_NONE; + cg_set_loc(p->cg, loc); + cg_push_local_typed(p->cg, count_slot, ty_size_t(p)); + to_rvalue(p); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_alloca(p->cg); + cg_push_local_typed(p->cg, ptr_slot, ptr_ty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + cg_reset_scratch(p->cg); + if (accept_punct(p, '=')) { + perr(p, "VLA initializers are not allowed (§6.7.9 ¶3)"); + } + return; + } + /* Non-VLA local. */ { FrameSlot s = make_local(p, name, var_ty, loc); if (accept_punct(p, '=')) { cg_set_loc(p->cg, loc); - cg_push_local_typed(p->cg, s, var_ty); - parse_assign_expr(p); - to_rvalue(p); - cg_store(p->cg); - /* cg_store leaves the assigned value on the stack (C semantics); - * an init-declarator is statement-context, so drop it. */ - cg_drop(p->cg); + if (var_ty->kind == TY_ARRAY) { + /* Brace initializer (or string literal — Phase 6). */ + init_at(p, s, var_ty, 0, var_ty); + } else { + cg_push_local_typed(p->cg, s, var_ty); + parse_assign_expr(p); + to_rvalue(p); + cg_store(p->cg); + /* cg_store leaves the assigned value on the stack (C semantics); + * an init-declarator is statement-context, so drop it. */ + cg_drop(p->cg); + } } } } @@ -1660,14 +2142,6 @@ static void parse_stmt(Parser* p) { * External (top-level) declarations * ============================================================ */ -/* Helper: holds one parsed parameter's name + type (for binding into the - * function-body scope after cg_func_begin / cg_param). */ -typedef struct ParamInfo { - Sym name; - const Type* type; - SrcLoc loc; -} ParamInfo; - /* Parse a parameter-type-list. Returns the parameter type array and counts * via out-pointers; `*variadic_out` is set if the list ends in `, ...`. * @@ -1676,9 +2150,9 @@ typedef struct ParamInfo { * `()` — old-style "unspecified args"; treated as zero * `(T1, T2, ...)` — named or abstract params, possibly trailing ellipsis * - * For each named param we record name+type so the function-body parser can - * later bind them into the param scope. Abstract (no-name) params are - * allowed for prototype-only declarations. */ + * Per §6.7.6.3, a parameter declared as `T x[N]` is rewritten to `T *x` (and + * `T x()` to `T (*x)()`); the §6.7.6.3 ¶7 "[static N]" form is a hint to the + * caller that the pointer points at ≥N elements — semantically still `T*`. */ static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, u8* variadic_out) { ParamInfo* infos; @@ -1712,13 +2186,13 @@ static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, if (!parse_decl_specs(p, &specs)) { perr(p, "expected parameter type"); } - /* Allow either named (`int x`) or abstract (`int`) declarators. We - * peek the pointer prefix, then if an IDENT follows it's named. */ - pty = parse_pointer_layer(p, specs.type); - if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { - pname = p->cur.v.ident; - ploc = tok_loc(&p->cur); - advance(p); + pty = parse_declarator_full(p, specs.type, /*allow_abstract=*/1, &pname, + &ploc); + /* Adjust array/function parameter to pointer per §6.7.6.3. */ + if (pty && pty->kind == TY_ARRAY) { + pty = type_ptr(p->pool, pty->arr.elem); + } else if (pty && pty->kind == TY_FUNC) { + pty = type_ptr(p->pool, pty); } if (n == cap) { cap *= 2; diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md @@ -97,8 +97,8 @@ explicit cast; rows here fill in the rest of the conversion matrix. | `6_3_1_4_01_float_to_int` | · | `double d = 42.9; return (int)d;` | 42 | | `6_3_1_4_02_int_to_float` | · | `int n = 42; double d = n; return (int)d;` | 42 | | `6_3_1_8_01_usual_arith_mixed` | ★ | `int s = -1; unsigned u = 1; return (s + u) ? 0 : 42;` | 42 | -| `6_3_2_1_01_array_to_ptr` | · | `int a[3] = {0,0,42}; int *p = a; return p[2];` | 42 | -| `6_3_2_1_02_func_to_ptr` | · | helper `id`; `int (*fp)(int) = id; return fp(42);` | 42 | +| `6_3_2_1_01_array_to_ptr` | ★ | `int a[3] = {0,0,42}; int *p = a; return p[2];` | 42 | +| `6_3_2_1_02_func_to_ptr` | ★ | helper `id`; `int (*fp)(int) = id; return fp(42);` | 42 | | `6_3_2_2_01_void_cast_discard` | ★ | `(void)42; return 42;` | 42 | | `6_3_2_3_01_null_ptr_cmp` | ★ | `int *p = 0; return p ? 99 : 42;` | 42 | | `6_3_2_3_02_void_ptr_roundtrip` | ★ | `int x=42; void *v=&x; int *p=(int*)v; return *p;` | 42 | @@ -132,7 +132,7 @@ here for completeness once they're real cases. | `6_5_19_post_inc` | ★ | `int x = 42; x++; return x;` | 43; reads as 43 | | `6_5_20_addr_deref` | ★ | `int x = 42; int *p = &x; return *p;` | 42 | | `6_5_21_sizeof_int` | ★ | `return (int)sizeof(int);` | 4 | -| `6_5_22_sizeof_expr` | · | `int a[7]; return (int)(sizeof(a)/sizeof(int));` | 7 | +| `6_5_22_sizeof_expr` | ★ | `int a[7]; return (int)(sizeof(a)/sizeof(int));` | 7 | | `6_5_23_cast` | ★ | `return (int)(unsigned char)(-1);` | 255 | | `6_5_24_func_call` | ★ | helper `int id(int x){return x;}` + `return id(42);` | 42 | | `6_5_25_unary_plus` | ★ | `return +42;` | 42 | @@ -141,8 +141,8 @@ here for completeness once they're real cases. | `6_5_28_arrow` | · | `struct S{int v;} s={42}; struct S *p=&s; return p->v;` | 42 | | `6_5_29_compound_literal` | · | `int *p = (int[]){10, 32}; return p[0]+p[1];` | 42 | | `6_5_30_generic_selection`| · | `int x=42; return _Generic((x), int: x, default: 0);` | 42 | -| `6_5_31_subscript_commute`| · | `int a[5]={0,0,42,0,0}; return 2[a];` | 42 | -| `6_5_32_string_subscript` | · | `return "*"[0];` | 42 | +| `6_5_31_subscript_commute`| ★ | `int a[5]={0,0,42,0,0}; return 2[a];` | 42 | +| `6_5_32_string_subscript` | ★ | `return "*"[0];` | 42 | ## §6.6 Constant expressions @@ -238,11 +238,11 @@ already exercised in §6.5 and §6.7. |---|---|---|---| | `6_7_6_01_ptr_to_ptr` | ★ | `int x=42; int *p=&x; int **pp=&p; return **pp;` | 42 | | `6_7_6_02_array_2d` | · | `int a[2][3]={{0,0,0},{0,0,42}}; return a[1][2];` | 42 | -| `6_7_6_03_array_of_ptr` | · | `int x=42; int *a[2]={0,&x}; return *a[1];` | 42 | -| `6_7_6_04_funcptr_decl` | · | `int id(int x){return x;} int (*fp)(int)=id; return fp(42);` | 42 | -| `6_7_6_05_funcptr_returning_ptr` | · | helper returns `int*`; `int *(*fp)(int*)=...; return *fp(&x);` | 42 | -| `6_7_6_06_array_static_n` | · | helper `int rd(int p[static 3]){return p[2];}`; `int a[3]={0,0,42}; return rd(a);` | 42 | -| `6_7_6_07_vla_local` | · | `int n=7; int a[n]; for(int i=0;i<n;i++) a[i]=i*7; return a[n-1];` | 42 | +| `6_7_6_03_array_of_ptr` | ★ | `int x=42; int *a[2]={0,&x}; return *a[1];` | 42 | +| `6_7_6_04_funcptr_decl` | ★ | `int id(int x){return x;} int (*fp)(int)=id; return fp(42);` | 42 | +| `6_7_6_05_funcptr_returning_ptr` | ★ | helper returns `int*`; `int *(*fp)(int*)=...; return *fp(&x);` | 42 | +| `6_7_6_06_array_static_n` | ★ | helper `int rd(int p[static 3]){return p[2];}`; `int a[3]={0,0,42}; return rd(a);` | 42 | +| `6_7_6_07_vla_local` | ★ | `int n=7; int a[n]; for(int i=0;i<n;i++) a[i]=i*7; return a[n-1];` | 42 | | `6_7_6_08_variadic_decl` | · | helper `int sum(int n, ...)` summing two ints; `sum(2, 20, 22)` | 42 | ## §6.7.8 Type definitions