kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 30a524f4c1142fe9ad5e3fd2ba3aed8afdcf3617
parent 15fda5605f98b2ab9473d170d76e01322528cbe0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 19 May 2026 14:37:06 -0700

Support block-scope function declarations

Diffstat:
Mdoc/BUGS.md | 15++++++++++++---
Mlang/c/parse/parse.c | 193+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlang/c/parse/parse_expr.c | 2++
Mlang/c/parse/parse_priv.h | 21+++++++++++++++++++++
Mlang/c/parse/parse_type.c | 74+++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
Mtest/parse/CORPUS.md | 23+++++++++++++++++++----
6 files changed, 304 insertions(+), 24 deletions(-)

diff --git a/doc/BUGS.md b/doc/BUGS.md @@ -1,5 +1,14 @@ Known bugs with red test cases (test-parse) -- [ ] K&R old-style function definitions with declaration lists: 6_9_14_kr_function_def_params -- [ ] K&R promoted parameter declarations, e.g. char: 6_9_15_kr_function_def_promoted_char -- [ ] Non-extern block-scope function declarations resolving to external definitions: 6_9_16_block_scope_func_decl +Format as: + +``` +- [ ] <feature description>: <test case name> +``` + +- [ ] pointer subtraction yields ptrdiff_t (assignable to a wider integer without a cast): `6_5_6_01_ptr_diff_assign_to_long` +- [ ] file-scope array bound with a parenthesized integer constant expression: `6_7_6_18_file_scope_array_bound_paren` +- [ ] parenthesized declarator name (`int (foo)(int)`): `6_7_6_19_paren_declarator_name` +- [ ] function declarator with an inline function-pointer return type (no typedef): `6_7_6_20_func_returning_funcptr_no_typedef` +- [ ] static initializer accepts unary `-` on a floating constant: `6_7_9_30_static_init_neg_float` +- [ ] `#warning` preprocessing directive (non-fatal, parsing continues): `6_10_warning_directive` diff --git a/lang/c/parse/parse.c b/lang/c/parse/parse.c @@ -347,6 +347,31 @@ static void sym_set_decl(SymEntry* e, DeclId id, DeclStorage storage, e->defined = (state == DSTATE_DEFINED || state == DSTATE_FUNC_DEFINED); } +static SymEntry* external_func_lookup(Parser* p, Sym name) { + ExternalFuncDecl* f; + for (f = p->external_funcs; f; f = f->next) { + if (f->name == name) return f->entry; + } + return NULL; +} + +static void external_func_remember(Parser* p, Sym name, SymEntry* entry) { + ExternalFuncDecl* f; + if (!entry) return; + for (f = p->external_funcs; f; f = f->next) { + if (f->name == name) { + f->entry = entry; + return; + } + } + f = arena_new(p->pool->arena, ExternalFuncDecl); + if (!f) perr(p, "out of memory in external_func_remember"); + f->name = name; + f->entry = entry; + f->next = p->external_funcs; + p->external_funcs = f; +} + static int is_ordinary_decl_kind(SymEntryKind k) { return k == SEK_LOCAL || k == SEK_GLOBAL || k == SEK_FUNC || k == SEK_TYPEDEF || k == SEK_ENUM_CST; @@ -448,6 +473,7 @@ static FrameSlot make_vla_size_slot(Parser* p) { static void store_top_to_size_slot(Parser* p, FrameSlot slot) { cg_push_local_typed(p->cg, slot, ty_size_t(p)); cg_swap(p->cg); + coerce_top_to_lvalue(p); cg_store(p->cg); cg_drop(p->cg); } @@ -539,6 +565,93 @@ static FrameSlot finish_vla_layout(Parser* p, const Type* ty, SrcLoc loc, return byte_slot; } +static int type_array_depth(const Type* ty) { + int n = 0; + while (ty && ty->kind == TY_ARRAY) { + ++n; + ty = ty->arr.elem; + } + return n; +} + +static void eval_param_vla_count(Parser* p, const ParamVLABoundExpr* expr, + FrameSlot slot) { + Tok save_cur = p->cur; + Tok save_next = p->next; + int save_has_next = p->has_next; + Tok* save_replay = p->replay; + u32 save_cap = p->replay_cap; + u32 save_len = p->replay_len; + u32 save_pos = p->replay_pos; + u8 save_active = p->replay_active; + Tok* replay; + + if (!expr->has_expr || expr->ntoks == 0) { + perr(p, "missing VLA parameter bound"); + } + replay = arena_array(p->pool->arena, Tok, expr->ntoks + 1u); + memcpy(replay, expr->toks, sizeof(Tok) * expr->ntoks); + memset(&replay[expr->ntoks], 0, sizeof(Tok)); + replay[expr->ntoks].kind = TOK_EOF; + + p->cur = replay[0]; + p->next.kind = TOK_EOF; + p->has_next = 0; + p->replay = replay; + p->replay_cap = expr->ntoks + 1u; + p->replay_len = expr->ntoks + 1u; + p->replay_pos = 1; + p->replay_active = 1; + + parse_assign_expr(p); + to_rvalue(p); + if (p->cur.kind != TOK_EOF) { + perr(p, "unexpected token in VLA parameter bound"); + } + store_top_to_size_slot(p, slot); + + p->cur = save_cur; + p->next = save_next; + p->has_next = save_has_next; + p->replay = save_replay; + p->replay_cap = save_cap; + p->replay_len = save_len; + p->replay_pos = save_pos; + p->replay_active = save_active; +} + +static VLABound* build_param_vla_bounds(Parser* p, const ParamInfo* info, + SrcLoc loc) { + const Type* root = info->declared_type; + u32 dim_skip = 0; + u32 need; + VLABound* bounds = NULL; + + if (!root || info->vla_bound_len == 0) return NULL; + if (root->kind == TY_ARRAY) { + dim_skip = 1; + root = root->arr.elem; + } else if (root->kind == TY_PTR) { + root = root->ptr.pointee; + } + need = (u32)type_array_depth(root); + if (need == 0) return NULL; + if (dim_skip + need > info->vla_bound_len) { + perr(p, "missing VLA parameter bound"); + } + + reset_vla_pending(p); + for (u32 i = 0; i < need; ++i) { + const ParamVLABoundExpr* expr = &info->vla_bounds[dim_skip + i]; + FrameSlot slot = make_vla_size_slot(p); + eval_param_vla_count(p, expr, slot); + p->vla_pending = 1; + p->vla_pending_count_slots[p->vla_pending_count_len++] = slot; + } + (void)finish_vla_layout(p, root, loc, &bounds); + return bounds; +} + /* ============================================================ * Static-local symbol naming * ============================================================ */ @@ -573,6 +686,11 @@ Sym mint_static_local_sym(Parser* p, Sym orig) { * ============================================================ */ /* Parse a single init-declarator after the decl-specs have been consumed. */ +static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty, + const DeclSpecs* specs, SrcLoc fname_loc, + const Attr* dattrs, ObjSecId* out_section_id, + u32* out_decl_flags, Sym* out_alias_target); + static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { SrcLoc loc; Sym name; @@ -608,6 +726,26 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { return; } + if (var_ty && var_ty->kind == TY_FUNC) { + ObjSecId section_id; + u32 decl_flags; + Sym alias_target; + if ((specs->storage == DS_AUTO && specs->storage_explicit) || + specs->storage == DS_REGISTER || specs->storage == DS_STATIC) { + perr(p, "invalid storage-class specifier for block-scope function " + "declaration"); + } + if (is_punct(&p->cur, '=')) { + perr(p, "function declarator cannot have initializer"); + } + (void)declare_function(p, name, var_ty, specs, loc, NULL, &section_id, + &decl_flags, &alias_target); + (void)section_id; + (void)decl_flags; + (void)alias_target; + return; + } + if (specs->storage == DS_STATIC) { Decl decl_in; DeclId did; @@ -825,10 +963,12 @@ void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, specs.storage == DS_TYPEDEF || (specs.flags & DF_THREAD)) { perr(p, "invalid storage-class specifier in parameter declaration"); } + p->param_vla_bound_len = 0; p->in_param_decl++; pty = parse_declarator_full(p, specs.type, /*allow_abstract=*/1, &pname, &ploc); p->in_param_decl--; + const Type* declared_pty = pty; if (pty && pty->kind == TY_ARRAY) { pty = type_ptr(p->pool, pty->arr.elem); } else if (pty && pty->kind == TY_FUNC) { @@ -852,7 +992,18 @@ void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, } infos[n].name = pname; infos[n].type = pty; + infos[n].declared_type = declared_pty; infos[n].loc = ploc; + infos[n].vla_bounds = NULL; + infos[n].vla_bound_len = p->param_vla_bound_len; + if (p->param_vla_bound_len) { + ParamVLABoundExpr* bounds = arena_array(p->pool->arena, + ParamVLABoundExpr, + p->param_vla_bound_len); + memcpy(bounds, p->param_vla_bounds, + sizeof(ParamVLABoundExpr) * p->param_vla_bound_len); + infos[n].vla_bounds = bounds; + } ++n; if (!accept_punct(p, ',')) break; } @@ -864,6 +1015,7 @@ static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty, const DeclSpecs* specs, SrcLoc fname_loc, const Attr* dattrs, ObjSecId* out_section_id, u32* out_decl_flags, Sym* out_alias_target) { + SymEntry* visible; if (out_section_id) *out_section_id = OBJ_SEC_NONE; if (out_decl_flags) *out_decl_flags = 0; if (out_alias_target) *out_alias_target = 0; @@ -887,6 +1039,45 @@ static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty, if (out_section_id) *out_section_id = tmp.section_id; if (out_decl_flags) *out_decl_flags = tmp.flags; if (out_alias_target) *out_alias_target = tmp.alias_target; + external_func_remember(p, fname, existing); + return existing; + } + visible = scope_lookup(p, fname); + if (!existing && visible && visible->kind == SEK_FUNC) { + existing = visible; + } + if (!existing) { + existing = external_func_lookup(p, fname); + } + if (existing) { + const Type* composite = NULL; + CSemCheck chk = + c_sem_check_redeclaration(p->pool, existing->type, fn_ty, &composite); + if (!chk.ok) perr(p, "%s", chk.message); + if (specs->storage == DS_STATIC && existing->linkage == DL_EXTERNAL) { + perr(p, "static declaration follows non-static declaration"); + } + if (scope_lookup_current(p, fname) != existing) { + SymEntry* e = scope_define(p, fname, SEK_FUNC, + composite ? composite : existing->type); + e->v.sym = existing->v.sym; + sym_set_decl(e, existing->decl_id, (DeclStorage)existing->storage, + (DeclLinkage)existing->linkage, existing->decl_flags, + (DeclState)existing->decl_state); + existing = e; + } else if (composite) { + existing->type = composite; + } + { + Decl tmp; + memset(&tmp, 0, sizeof tmp); + attr_list_to_decl(p->c, p->decls, specs->attrs, &tmp); + attr_list_to_decl(p->c, p->decls, dattrs, &tmp); + if (out_section_id) *out_section_id = tmp.section_id; + if (out_decl_flags) *out_decl_flags = tmp.flags; + if (out_alias_target) *out_alias_target = tmp.alias_target; + } + external_func_remember(p, fname, existing); return existing; } { @@ -909,6 +1100,7 @@ static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty, e->v.sym = fsym; sym_set_decl(e, did, decl_in.storage, decl_in.linkage, decl_in.flags, DSTATE_DECLARED); + external_func_remember(p, fname, e); if (out_section_id) *out_section_id = decl_in.section_id; if (out_decl_flags) *out_decl_flags = decl_in.flags; if (out_alias_target) *out_alias_target = decl_in.alias_target; @@ -978,6 +1170,7 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, e = scope_define(p, infos[i].name, SEK_LOCAL, infos[i].type); e->v.slot = s; sym_set_decl(e, DECL_NONE, DS_AUTO, DL_NONE, DF_NONE, DSTATE_DEFINED); + e->vla_bounds = build_param_vla_bounds(p, &infos[i], infos[i].loc); } } diff --git a/lang/c/parse/parse_expr.c b/lang/c/parse/parse_expr.c @@ -1820,6 +1820,8 @@ static void parse_primary(Parser* p) { if (e->storage == DS_REGISTER) pcg_set_top_register(p); if (e->vla_byte_slot != FRAME_SLOT_NONE) { p->last_pushed_vla_slot = e->vla_byte_slot; + } + if (e->vla_bounds) { p->last_pushed_vla_bounds = e->vla_bounds; } return; diff --git a/lang/c/parse/parse_priv.h b/lang/c/parse/parse_priv.h @@ -87,6 +87,7 @@ typedef enum SymEntryKind { typedef struct SymEntry SymEntry; typedef struct VLABound VLABound; +typedef struct ExternalFuncDecl ExternalFuncDecl; struct VLABound { const Type* array_ty; FrameSlot byte_slot; @@ -94,6 +95,14 @@ struct VLABound { VLABound* next; }; +typedef struct ParamVLABoundExpr ParamVLABoundExpr; +struct ParamVLABoundExpr { + Tok* toks; + u32 ntoks; + u8 has_expr; + u8 pad[3]; +}; + struct SymEntry { Sym name; u8 kind; /* SymEntryKind */ @@ -116,6 +125,12 @@ struct SymEntry { SymEntry* next; }; +struct ExternalFuncDecl { + Sym name; + SymEntry* entry; + ExternalFuncDecl* next; +}; + typedef struct TagEntry TagEntry; struct TagEntry { Sym name; @@ -242,6 +257,7 @@ typedef struct Parser { Sym sym_a_signal_fence; Scope* scope; + ExternalFuncDecl* external_funcs; CGLabel cur_break; CGLabel cur_continue; @@ -260,6 +276,8 @@ typedef struct Parser { VLABound* last_pushed_vla_bounds; u8 in_param_decl; + ParamVLABoundExpr param_vla_bounds[8]; + u8 param_vla_bound_len; u32 suppress_codegen; u32 static_local_counter; @@ -378,7 +396,10 @@ static inline int c_type_is_scalar(const Type* ty) { typedef struct ParamInfo { Sym name; const Type* type; + const Type* declared_type; SrcLoc loc; + ParamVLABoundExpr* vla_bounds; + u8 vla_bound_len; } ParamInfo; /* ============================================================ diff --git a/lang/c/parse/parse_type.c b/lang/c/parse/parse_type.c @@ -1183,6 +1183,60 @@ const Type* parse_type_name(Parser* p) { * (DeclSuffix / DSuffKind defined in parse_priv.h) * ============================================================ */ +static void param_vla_record_bound(Parser* p, Tok* toks, u32 ntoks, + int has_expr) { + ParamVLABoundExpr* b; + if (p->param_vla_bound_len >= + sizeof p->param_vla_bounds / sizeof p->param_vla_bounds[0]) { + perr(p, "too many VLA dimensions per parameter"); + } + b = &p->param_vla_bounds[p->param_vla_bound_len++]; + b->toks = toks; + b->ntoks = ntoks; + b->has_expr = (u8)(has_expr ? 1 : 0); +} + +static void parse_param_array_bound(Parser* p, DeclSuffix* out) { + Tok* toks = NULL; + u32 ntoks = 0; + u32 cap = 0; + int depth = 1; + int has_expr = 0; + + out->incomplete = 1; + if (accept_punct(p, ']')) { + param_vla_record_bound(p, NULL, 0, 0); + return; + } + while (depth > 0) { + Tok t = p->cur; + if (t.kind == TOK_EOF) { + perr(p, "unexpected EOF in parameter array bound"); + } + if (is_punct(&t, '[')) { + ++depth; + } else if (is_punct(&t, ']')) { + --depth; + if (depth == 0) break; + } + if (ntoks == cap) { + u32 nc = cap ? cap * 2u : 4u; + Tok* nb = arena_array(p->pool->arena, Tok, nc); + if (toks && ntoks) memcpy(nb, toks, sizeof(Tok) * ntoks); + toks = nb; + cap = nc; + } + toks[ntoks++] = t; + has_expr = 1; + advance(p); + } + if (ntoks == 1 && toks[0].kind == TOK_PUNCT && toks[0].v.punct == '*') { + has_expr = 0; + } + param_vla_record_bound(p, toks, ntoks, has_expr); + expect_punct(p, ']', "']' after array size"); +} + int parse_decl_suffix(Parser* p, DeclSuffix* out) { if (accept_punct(p, '[')) { out->kind = DS_ARRAY; @@ -1197,26 +1251,12 @@ int parse_decl_suffix(Parser* p, DeclSuffix* out) { } break; } - if (accept_punct(p, ']')) { - out->incomplete = 1; + if (p->in_param_decl) { + parse_param_array_bound(p, out); return 1; } - if (p->in_param_decl) { - int depth = 1; - while (depth > 0) { - if (p->cur.kind == TOK_EOF) { - perr(p, "unexpected EOF in parameter array bound"); - } - if (is_punct(&p->cur, '[')) - ++depth; - else if (is_punct(&p->cur, ']')) { - --depth; - if (depth == 0) break; - } - advance(p); - } + if (accept_punct(p, ']')) { out->incomplete = 1; - expect_punct(p, ']', "']' after array size"); return 1; } { diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md @@ -196,7 +196,8 @@ here for completeness once they're real cases. | `6_5_65_file_scope_compound_literal` | RED | `static int *p = (int[]){42}; return p[0];` — file-scope compound literal has static storage duration | 42 | | `6_5_2_5_01_compound_literal_flat_struct` | RED | `(struct O){1,2,39}` initializes nested struct members without inner braces | 42 | | `6_5_2_5_02_compound_literal_designated_continue` | RED | `(struct S){.a[1]=20,22,0}` continues from the next subobject after a designator | 42 | -| `6_5_3_4_04_sizeof_vla_param_row` | RED | `sizeof(a[0])` where `a` is an adjusted `int a[n][m]` parameter is evaluated at runtime | 42 | +| `6_5_3_4_04_sizeof_vla_param_row` | ★ | `sizeof(a[0])` where `a` is an adjusted `int a[n][m]` parameter is evaluated at runtime | 42 | +| `6_5_6_01_ptr_diff_assign_to_long` | RED | `long d = p - q;` — pointer subtraction yields ptrdiff_t and is assignable to a wider integer without a cast; cfree currently leaves the cg-stack top typed as `T*` after BO_ISUB so the assignment trips "incompatible assignment from pointer to integer" | 42 | ## §6.5.2.2 Aggregate function arguments @@ -363,8 +364,11 @@ already exercised in §6.5 and §6.7. | `6_7_6_13_star_in_proto` | ★ | `int total(int n, int a[*]); int total(int n, int a[n]){...}` — `[*]` in non-definition prototype | 42 | | `6_7_6_14_func_param_adjust` | ★ | `int apply(int f(int), int x){return f(x);}` — function parameter adjusted to pointer-to-function | 42 | | `6_7_6_15_multidim_vla_local` | RED | `int a[n][m]; a[n-1][m-1]=42; return a[5][6];` — multiple VLA dimensions in one declarator | 42 | -| `6_7_6_16_vla_param_2d` | RED | `int a[n][m]` parameter passed a 2D array; runtime stride must use `m` | 42 | -| `6_7_6_17_vla_param_3d` | RED | `int a[n][m][k]` parameter passed a 3D array; nested runtime strides must compose | 42 | +| `6_7_6_16_vla_param_2d` | ★ | `int a[n][m]` parameter passed a 2D array; runtime stride must use `m` | 42 | +| `6_7_6_17_vla_param_3d` | ★ | `int a[n][m][k]` parameter passed a 3D array; nested runtime strides must compose | 42 | +| `6_7_6_18_file_scope_array_bound_paren` | RED | `static int marks[(2+5)] = {...};` — parenthesized integer constant expression in a file-scope array bound; cfree currently panics with `CfreeCg: regalloc - no spillable victim`, suggesting the constexpr falls into a runtime-IR lowering path that has no function context | 42 | +| `6_7_6_19_paren_declarator_name` | RED | `int (helper)(int x){...}` and `(helper)(42)` — the C grammar lists `'(' declarator ')'` as a direct-declarator; lua / glibc headers wrap public names in parens to defeat macro expansion. Currently rejected with "expected declarator" | 42 | +| `6_7_6_20_func_returning_funcptr_no_typedef` | RED | `int (*pick(int x))(void){...}` — function declarator with an inline function-pointer return type (the classic `signal()` shape, used by sqlite VFS `xDlSym`). `6_7_6_11` covers the typedef'd form; this row pins the inline declarator. Currently rejected with "expected declarator" | 42 | ## §6.7.7 Type names @@ -419,6 +423,7 @@ cover compound typedef targets. | `6_7_9_27_static_flat_array_init` | RED | file-scope `static int a[2][2] = {1,2,3,36};` | 42 | | `6_7_9_28_static_flat_struct_init` | RED | file-scope `static struct O o = {1,2,39};` | 42 | | `6_7_9_29_unknown_bound_nested_init` | RED | `int a[][2] = {1,2,3,36};` — unknown outer bound is completed from a flat nested initializer | 42 | +| `6_7_9_30_static_init_neg_float` | RED | `static const double tab[2] = { -1.0, 43.0 };` — static initializer of arithmetic type permits unary `-` on a floating constant (§6.6 arithmetic constant expression). `try_parse_static_float` only accepts a bare TOK_FLT/TOK_NUM, so a leading `-` aborts with "expected floating constant expression" | 42 | ## §6.7.10 Static assertions @@ -493,7 +498,17 @@ memcpys into before `ret`. | `6_9_13_extern_func_def` | ★ | full TU: `extern int helper(int x){return x+1;}` — extern on a definition; `helper(41)` | 42 | | `6_9_14_kr_function_def_params` | RED | old-style function definition with declaration-list `int add(a,b) int a; int b;` | 42 | | `6_9_15_kr_function_def_promoted_char` | RED | old-style definition with promoted `char` parameter declaration | 42 | -| `6_9_16_block_scope_func_decl` | RED | block-scope `int helper(void);` has external linkage and calls the later file-scope definition | 42 | +| `6_9_16_block_scope_func_decl` | ★ | block-scope `int helper(void);` has external linkage and calls the later file-scope definition | 42 | + +## §6.10 Preprocessing directives + +Most preprocessor coverage lives under `test/pp/`; rows here only exist +when a directive interacts with the parse-runner's end-to-end pipeline +in a way `test/pp/` cannot catch on its own. + +| Case | Status | Body | Expected | +|---|---|---|---| +| `6_10_warning_directive` | RED | `#warning "..."` followed by a valid TU — non-fatal diagnostic, parsing continues. cfree's `process_directive` doesn't route `warning` and falls into the catch-all "unsupported directive" panic, which breaks every header that issues one (including the macOS SDK's `sys/cdefs.h`). | 42 | ## Builtins