commit 30a524f4c1142fe9ad5e3fd2ba3aed8afdcf3617
parent 15fda5605f98b2ab9473d170d76e01322528cbe0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 19 May 2026 14:37:06 -0700
Support block-scope function declarations
Diffstat:
6 files changed, 304 insertions(+), 24 deletions(-)
diff --git a/doc/BUGS.md b/doc/BUGS.md
@@ -1,5 +1,14 @@
Known bugs with red test cases (test-parse)
-- [ ] K&R old-style function definitions with declaration lists: 6_9_14_kr_function_def_params
-- [ ] K&R promoted parameter declarations, e.g. char: 6_9_15_kr_function_def_promoted_char
-- [ ] Non-extern block-scope function declarations resolving to external definitions: 6_9_16_block_scope_func_decl
+Format as:
+
+```
+- [ ] <feature description>: <test case name>
+```
+
+- [ ] pointer subtraction yields ptrdiff_t (assignable to a wider integer without a cast): `6_5_6_01_ptr_diff_assign_to_long`
+- [ ] file-scope array bound with a parenthesized integer constant expression: `6_7_6_18_file_scope_array_bound_paren`
+- [ ] parenthesized declarator name (`int (foo)(int)`): `6_7_6_19_paren_declarator_name`
+- [ ] function declarator with an inline function-pointer return type (no typedef): `6_7_6_20_func_returning_funcptr_no_typedef`
+- [ ] static initializer accepts unary `-` on a floating constant: `6_7_9_30_static_init_neg_float`
+- [ ] `#warning` preprocessing directive (non-fatal, parsing continues): `6_10_warning_directive`
diff --git a/lang/c/parse/parse.c b/lang/c/parse/parse.c
@@ -347,6 +347,31 @@ static void sym_set_decl(SymEntry* e, DeclId id, DeclStorage storage,
e->defined = (state == DSTATE_DEFINED || state == DSTATE_FUNC_DEFINED);
}
+static SymEntry* external_func_lookup(Parser* p, Sym name) {
+ ExternalFuncDecl* f;
+ for (f = p->external_funcs; f; f = f->next) {
+ if (f->name == name) return f->entry;
+ }
+ return NULL;
+}
+
+static void external_func_remember(Parser* p, Sym name, SymEntry* entry) {
+ ExternalFuncDecl* f;
+ if (!entry) return;
+ for (f = p->external_funcs; f; f = f->next) {
+ if (f->name == name) {
+ f->entry = entry;
+ return;
+ }
+ }
+ f = arena_new(p->pool->arena, ExternalFuncDecl);
+ if (!f) perr(p, "out of memory in external_func_remember");
+ f->name = name;
+ f->entry = entry;
+ f->next = p->external_funcs;
+ p->external_funcs = f;
+}
+
static int is_ordinary_decl_kind(SymEntryKind k) {
return k == SEK_LOCAL || k == SEK_GLOBAL || k == SEK_FUNC ||
k == SEK_TYPEDEF || k == SEK_ENUM_CST;
@@ -448,6 +473,7 @@ static FrameSlot make_vla_size_slot(Parser* p) {
static void store_top_to_size_slot(Parser* p, FrameSlot slot) {
cg_push_local_typed(p->cg, slot, ty_size_t(p));
cg_swap(p->cg);
+ coerce_top_to_lvalue(p);
cg_store(p->cg);
cg_drop(p->cg);
}
@@ -539,6 +565,93 @@ static FrameSlot finish_vla_layout(Parser* p, const Type* ty, SrcLoc loc,
return byte_slot;
}
+static int type_array_depth(const Type* ty) {
+ int n = 0;
+ while (ty && ty->kind == TY_ARRAY) {
+ ++n;
+ ty = ty->arr.elem;
+ }
+ return n;
+}
+
+static void eval_param_vla_count(Parser* p, const ParamVLABoundExpr* expr,
+ FrameSlot slot) {
+ Tok save_cur = p->cur;
+ Tok save_next = p->next;
+ int save_has_next = p->has_next;
+ Tok* save_replay = p->replay;
+ u32 save_cap = p->replay_cap;
+ u32 save_len = p->replay_len;
+ u32 save_pos = p->replay_pos;
+ u8 save_active = p->replay_active;
+ Tok* replay;
+
+ if (!expr->has_expr || expr->ntoks == 0) {
+ perr(p, "missing VLA parameter bound");
+ }
+ replay = arena_array(p->pool->arena, Tok, expr->ntoks + 1u);
+ memcpy(replay, expr->toks, sizeof(Tok) * expr->ntoks);
+ memset(&replay[expr->ntoks], 0, sizeof(Tok));
+ replay[expr->ntoks].kind = TOK_EOF;
+
+ p->cur = replay[0];
+ p->next.kind = TOK_EOF;
+ p->has_next = 0;
+ p->replay = replay;
+ p->replay_cap = expr->ntoks + 1u;
+ p->replay_len = expr->ntoks + 1u;
+ p->replay_pos = 1;
+ p->replay_active = 1;
+
+ parse_assign_expr(p);
+ to_rvalue(p);
+ if (p->cur.kind != TOK_EOF) {
+ perr(p, "unexpected token in VLA parameter bound");
+ }
+ store_top_to_size_slot(p, slot);
+
+ p->cur = save_cur;
+ p->next = save_next;
+ p->has_next = save_has_next;
+ p->replay = save_replay;
+ p->replay_cap = save_cap;
+ p->replay_len = save_len;
+ p->replay_pos = save_pos;
+ p->replay_active = save_active;
+}
+
+static VLABound* build_param_vla_bounds(Parser* p, const ParamInfo* info,
+ SrcLoc loc) {
+ const Type* root = info->declared_type;
+ u32 dim_skip = 0;
+ u32 need;
+ VLABound* bounds = NULL;
+
+ if (!root || info->vla_bound_len == 0) return NULL;
+ if (root->kind == TY_ARRAY) {
+ dim_skip = 1;
+ root = root->arr.elem;
+ } else if (root->kind == TY_PTR) {
+ root = root->ptr.pointee;
+ }
+ need = (u32)type_array_depth(root);
+ if (need == 0) return NULL;
+ if (dim_skip + need > info->vla_bound_len) {
+ perr(p, "missing VLA parameter bound");
+ }
+
+ reset_vla_pending(p);
+ for (u32 i = 0; i < need; ++i) {
+ const ParamVLABoundExpr* expr = &info->vla_bounds[dim_skip + i];
+ FrameSlot slot = make_vla_size_slot(p);
+ eval_param_vla_count(p, expr, slot);
+ p->vla_pending = 1;
+ p->vla_pending_count_slots[p->vla_pending_count_len++] = slot;
+ }
+ (void)finish_vla_layout(p, root, loc, &bounds);
+ return bounds;
+}
+
/* ============================================================
* Static-local symbol naming
* ============================================================ */
@@ -573,6 +686,11 @@ Sym mint_static_local_sym(Parser* p, Sym orig) {
* ============================================================ */
/* Parse a single init-declarator after the decl-specs have been consumed. */
+static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty,
+ const DeclSpecs* specs, SrcLoc fname_loc,
+ const Attr* dattrs, ObjSecId* out_section_id,
+ u32* out_decl_flags, Sym* out_alias_target);
+
static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
SrcLoc loc;
Sym name;
@@ -608,6 +726,26 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
return;
}
+ if (var_ty && var_ty->kind == TY_FUNC) {
+ ObjSecId section_id;
+ u32 decl_flags;
+ Sym alias_target;
+ if ((specs->storage == DS_AUTO && specs->storage_explicit) ||
+ specs->storage == DS_REGISTER || specs->storage == DS_STATIC) {
+ perr(p, "invalid storage-class specifier for block-scope function "
+ "declaration");
+ }
+ if (is_punct(&p->cur, '=')) {
+ perr(p, "function declarator cannot have initializer");
+ }
+ (void)declare_function(p, name, var_ty, specs, loc, NULL, §ion_id,
+ &decl_flags, &alias_target);
+ (void)section_id;
+ (void)decl_flags;
+ (void)alias_target;
+ return;
+ }
+
if (specs->storage == DS_STATIC) {
Decl decl_in;
DeclId did;
@@ -825,10 +963,12 @@ void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out,
specs.storage == DS_TYPEDEF || (specs.flags & DF_THREAD)) {
perr(p, "invalid storage-class specifier in parameter declaration");
}
+ p->param_vla_bound_len = 0;
p->in_param_decl++;
pty = parse_declarator_full(p, specs.type, /*allow_abstract=*/1, &pname,
&ploc);
p->in_param_decl--;
+ const Type* declared_pty = pty;
if (pty && pty->kind == TY_ARRAY) {
pty = type_ptr(p->pool, pty->arr.elem);
} else if (pty && pty->kind == TY_FUNC) {
@@ -852,7 +992,18 @@ void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out,
}
infos[n].name = pname;
infos[n].type = pty;
+ infos[n].declared_type = declared_pty;
infos[n].loc = ploc;
+ infos[n].vla_bounds = NULL;
+ infos[n].vla_bound_len = p->param_vla_bound_len;
+ if (p->param_vla_bound_len) {
+ ParamVLABoundExpr* bounds = arena_array(p->pool->arena,
+ ParamVLABoundExpr,
+ p->param_vla_bound_len);
+ memcpy(bounds, p->param_vla_bounds,
+ sizeof(ParamVLABoundExpr) * p->param_vla_bound_len);
+ infos[n].vla_bounds = bounds;
+ }
++n;
if (!accept_punct(p, ',')) break;
}
@@ -864,6 +1015,7 @@ static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty,
const DeclSpecs* specs, SrcLoc fname_loc,
const Attr* dattrs, ObjSecId* out_section_id,
u32* out_decl_flags, Sym* out_alias_target) {
+ SymEntry* visible;
if (out_section_id) *out_section_id = OBJ_SEC_NONE;
if (out_decl_flags) *out_decl_flags = 0;
if (out_alias_target) *out_alias_target = 0;
@@ -887,6 +1039,45 @@ static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty,
if (out_section_id) *out_section_id = tmp.section_id;
if (out_decl_flags) *out_decl_flags = tmp.flags;
if (out_alias_target) *out_alias_target = tmp.alias_target;
+ external_func_remember(p, fname, existing);
+ return existing;
+ }
+ visible = scope_lookup(p, fname);
+ if (!existing && visible && visible->kind == SEK_FUNC) {
+ existing = visible;
+ }
+ if (!existing) {
+ existing = external_func_lookup(p, fname);
+ }
+ if (existing) {
+ const Type* composite = NULL;
+ CSemCheck chk =
+ c_sem_check_redeclaration(p->pool, existing->type, fn_ty, &composite);
+ if (!chk.ok) perr(p, "%s", chk.message);
+ if (specs->storage == DS_STATIC && existing->linkage == DL_EXTERNAL) {
+ perr(p, "static declaration follows non-static declaration");
+ }
+ if (scope_lookup_current(p, fname) != existing) {
+ SymEntry* e = scope_define(p, fname, SEK_FUNC,
+ composite ? composite : existing->type);
+ e->v.sym = existing->v.sym;
+ sym_set_decl(e, existing->decl_id, (DeclStorage)existing->storage,
+ (DeclLinkage)existing->linkage, existing->decl_flags,
+ (DeclState)existing->decl_state);
+ existing = e;
+ } else if (composite) {
+ existing->type = composite;
+ }
+ {
+ Decl tmp;
+ memset(&tmp, 0, sizeof tmp);
+ attr_list_to_decl(p->c, p->decls, specs->attrs, &tmp);
+ attr_list_to_decl(p->c, p->decls, dattrs, &tmp);
+ if (out_section_id) *out_section_id = tmp.section_id;
+ if (out_decl_flags) *out_decl_flags = tmp.flags;
+ if (out_alias_target) *out_alias_target = tmp.alias_target;
+ }
+ external_func_remember(p, fname, existing);
return existing;
}
{
@@ -909,6 +1100,7 @@ static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty,
e->v.sym = fsym;
sym_set_decl(e, did, decl_in.storage, decl_in.linkage, decl_in.flags,
DSTATE_DECLARED);
+ external_func_remember(p, fname, e);
if (out_section_id) *out_section_id = decl_in.section_id;
if (out_decl_flags) *out_decl_flags = decl_in.flags;
if (out_alias_target) *out_alias_target = decl_in.alias_target;
@@ -978,6 +1170,7 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty,
e = scope_define(p, infos[i].name, SEK_LOCAL, infos[i].type);
e->v.slot = s;
sym_set_decl(e, DECL_NONE, DS_AUTO, DL_NONE, DF_NONE, DSTATE_DEFINED);
+ e->vla_bounds = build_param_vla_bounds(p, &infos[i], infos[i].loc);
}
}
diff --git a/lang/c/parse/parse_expr.c b/lang/c/parse/parse_expr.c
@@ -1820,6 +1820,8 @@ static void parse_primary(Parser* p) {
if (e->storage == DS_REGISTER) pcg_set_top_register(p);
if (e->vla_byte_slot != FRAME_SLOT_NONE) {
p->last_pushed_vla_slot = e->vla_byte_slot;
+ }
+ if (e->vla_bounds) {
p->last_pushed_vla_bounds = e->vla_bounds;
}
return;
diff --git a/lang/c/parse/parse_priv.h b/lang/c/parse/parse_priv.h
@@ -87,6 +87,7 @@ typedef enum SymEntryKind {
typedef struct SymEntry SymEntry;
typedef struct VLABound VLABound;
+typedef struct ExternalFuncDecl ExternalFuncDecl;
struct VLABound {
const Type* array_ty;
FrameSlot byte_slot;
@@ -94,6 +95,14 @@ struct VLABound {
VLABound* next;
};
+typedef struct ParamVLABoundExpr ParamVLABoundExpr;
+struct ParamVLABoundExpr {
+ Tok* toks;
+ u32 ntoks;
+ u8 has_expr;
+ u8 pad[3];
+};
+
struct SymEntry {
Sym name;
u8 kind; /* SymEntryKind */
@@ -116,6 +125,12 @@ struct SymEntry {
SymEntry* next;
};
+struct ExternalFuncDecl {
+ Sym name;
+ SymEntry* entry;
+ ExternalFuncDecl* next;
+};
+
typedef struct TagEntry TagEntry;
struct TagEntry {
Sym name;
@@ -242,6 +257,7 @@ typedef struct Parser {
Sym sym_a_signal_fence;
Scope* scope;
+ ExternalFuncDecl* external_funcs;
CGLabel cur_break;
CGLabel cur_continue;
@@ -260,6 +276,8 @@ typedef struct Parser {
VLABound* last_pushed_vla_bounds;
u8 in_param_decl;
+ ParamVLABoundExpr param_vla_bounds[8];
+ u8 param_vla_bound_len;
u32 suppress_codegen;
u32 static_local_counter;
@@ -378,7 +396,10 @@ static inline int c_type_is_scalar(const Type* ty) {
typedef struct ParamInfo {
Sym name;
const Type* type;
+ const Type* declared_type;
SrcLoc loc;
+ ParamVLABoundExpr* vla_bounds;
+ u8 vla_bound_len;
} ParamInfo;
/* ============================================================
diff --git a/lang/c/parse/parse_type.c b/lang/c/parse/parse_type.c
@@ -1183,6 +1183,60 @@ const Type* parse_type_name(Parser* p) {
* (DeclSuffix / DSuffKind defined in parse_priv.h)
* ============================================================ */
+static void param_vla_record_bound(Parser* p, Tok* toks, u32 ntoks,
+ int has_expr) {
+ ParamVLABoundExpr* b;
+ if (p->param_vla_bound_len >=
+ sizeof p->param_vla_bounds / sizeof p->param_vla_bounds[0]) {
+ perr(p, "too many VLA dimensions per parameter");
+ }
+ b = &p->param_vla_bounds[p->param_vla_bound_len++];
+ b->toks = toks;
+ b->ntoks = ntoks;
+ b->has_expr = (u8)(has_expr ? 1 : 0);
+}
+
+static void parse_param_array_bound(Parser* p, DeclSuffix* out) {
+ Tok* toks = NULL;
+ u32 ntoks = 0;
+ u32 cap = 0;
+ int depth = 1;
+ int has_expr = 0;
+
+ out->incomplete = 1;
+ if (accept_punct(p, ']')) {
+ param_vla_record_bound(p, NULL, 0, 0);
+ return;
+ }
+ while (depth > 0) {
+ Tok t = p->cur;
+ if (t.kind == TOK_EOF) {
+ perr(p, "unexpected EOF in parameter array bound");
+ }
+ if (is_punct(&t, '[')) {
+ ++depth;
+ } else if (is_punct(&t, ']')) {
+ --depth;
+ if (depth == 0) break;
+ }
+ if (ntoks == cap) {
+ u32 nc = cap ? cap * 2u : 4u;
+ Tok* nb = arena_array(p->pool->arena, Tok, nc);
+ if (toks && ntoks) memcpy(nb, toks, sizeof(Tok) * ntoks);
+ toks = nb;
+ cap = nc;
+ }
+ toks[ntoks++] = t;
+ has_expr = 1;
+ advance(p);
+ }
+ if (ntoks == 1 && toks[0].kind == TOK_PUNCT && toks[0].v.punct == '*') {
+ has_expr = 0;
+ }
+ param_vla_record_bound(p, toks, ntoks, has_expr);
+ expect_punct(p, ']', "']' after array size");
+}
+
int parse_decl_suffix(Parser* p, DeclSuffix* out) {
if (accept_punct(p, '[')) {
out->kind = DS_ARRAY;
@@ -1197,26 +1251,12 @@ int parse_decl_suffix(Parser* p, DeclSuffix* out) {
}
break;
}
- if (accept_punct(p, ']')) {
- out->incomplete = 1;
+ if (p->in_param_decl) {
+ parse_param_array_bound(p, out);
return 1;
}
- if (p->in_param_decl) {
- int depth = 1;
- while (depth > 0) {
- if (p->cur.kind == TOK_EOF) {
- perr(p, "unexpected EOF in parameter array bound");
- }
- if (is_punct(&p->cur, '['))
- ++depth;
- else if (is_punct(&p->cur, ']')) {
- --depth;
- if (depth == 0) break;
- }
- advance(p);
- }
+ if (accept_punct(p, ']')) {
out->incomplete = 1;
- expect_punct(p, ']', "']' after array size");
return 1;
}
{
diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md
@@ -196,7 +196,8 @@ here for completeness once they're real cases.
| `6_5_65_file_scope_compound_literal` | RED | `static int *p = (int[]){42}; return p[0];` — file-scope compound literal has static storage duration | 42 |
| `6_5_2_5_01_compound_literal_flat_struct` | RED | `(struct O){1,2,39}` initializes nested struct members without inner braces | 42 |
| `6_5_2_5_02_compound_literal_designated_continue` | RED | `(struct S){.a[1]=20,22,0}` continues from the next subobject after a designator | 42 |
-| `6_5_3_4_04_sizeof_vla_param_row` | RED | `sizeof(a[0])` where `a` is an adjusted `int a[n][m]` parameter is evaluated at runtime | 42 |
+| `6_5_3_4_04_sizeof_vla_param_row` | ★ | `sizeof(a[0])` where `a` is an adjusted `int a[n][m]` parameter is evaluated at runtime | 42 |
+| `6_5_6_01_ptr_diff_assign_to_long` | RED | `long d = p - q;` — pointer subtraction yields ptrdiff_t and is assignable to a wider integer without a cast; cfree currently leaves the cg-stack top typed as `T*` after BO_ISUB so the assignment trips "incompatible assignment from pointer to integer" | 42 |
## §6.5.2.2 Aggregate function arguments
@@ -363,8 +364,11 @@ already exercised in §6.5 and §6.7.
| `6_7_6_13_star_in_proto` | ★ | `int total(int n, int a[*]); int total(int n, int a[n]){...}` — `[*]` in non-definition prototype | 42 |
| `6_7_6_14_func_param_adjust` | ★ | `int apply(int f(int), int x){return f(x);}` — function parameter adjusted to pointer-to-function | 42 |
| `6_7_6_15_multidim_vla_local` | RED | `int a[n][m]; a[n-1][m-1]=42; return a[5][6];` — multiple VLA dimensions in one declarator | 42 |
-| `6_7_6_16_vla_param_2d` | RED | `int a[n][m]` parameter passed a 2D array; runtime stride must use `m` | 42 |
-| `6_7_6_17_vla_param_3d` | RED | `int a[n][m][k]` parameter passed a 3D array; nested runtime strides must compose | 42 |
+| `6_7_6_16_vla_param_2d` | ★ | `int a[n][m]` parameter passed a 2D array; runtime stride must use `m` | 42 |
+| `6_7_6_17_vla_param_3d` | ★ | `int a[n][m][k]` parameter passed a 3D array; nested runtime strides must compose | 42 |
+| `6_7_6_18_file_scope_array_bound_paren` | RED | `static int marks[(2+5)] = {...};` — parenthesized integer constant expression in a file-scope array bound; cfree currently panics with `CfreeCg: regalloc - no spillable victim`, suggesting the constexpr falls into a runtime-IR lowering path that has no function context | 42 |
+| `6_7_6_19_paren_declarator_name` | RED | `int (helper)(int x){...}` and `(helper)(42)` — the C grammar lists `'(' declarator ')'` as a direct-declarator; lua / glibc headers wrap public names in parens to defeat macro expansion. Currently rejected with "expected declarator" | 42 |
+| `6_7_6_20_func_returning_funcptr_no_typedef` | RED | `int (*pick(int x))(void){...}` — function declarator with an inline function-pointer return type (the classic `signal()` shape, used by sqlite VFS `xDlSym`). `6_7_6_11` covers the typedef'd form; this row pins the inline declarator. Currently rejected with "expected declarator" | 42 |
## §6.7.7 Type names
@@ -419,6 +423,7 @@ cover compound typedef targets.
| `6_7_9_27_static_flat_array_init` | RED | file-scope `static int a[2][2] = {1,2,3,36};` | 42 |
| `6_7_9_28_static_flat_struct_init` | RED | file-scope `static struct O o = {1,2,39};` | 42 |
| `6_7_9_29_unknown_bound_nested_init` | RED | `int a[][2] = {1,2,3,36};` — unknown outer bound is completed from a flat nested initializer | 42 |
+| `6_7_9_30_static_init_neg_float` | RED | `static const double tab[2] = { -1.0, 43.0 };` — static initializer of arithmetic type permits unary `-` on a floating constant (§6.6 arithmetic constant expression). `try_parse_static_float` only accepts a bare TOK_FLT/TOK_NUM, so a leading `-` aborts with "expected floating constant expression" | 42 |
## §6.7.10 Static assertions
@@ -493,7 +498,17 @@ memcpys into before `ret`.
| `6_9_13_extern_func_def` | ★ | full TU: `extern int helper(int x){return x+1;}` — extern on a definition; `helper(41)` | 42 |
| `6_9_14_kr_function_def_params` | RED | old-style function definition with declaration-list `int add(a,b) int a; int b;` | 42 |
| `6_9_15_kr_function_def_promoted_char` | RED | old-style definition with promoted `char` parameter declaration | 42 |
-| `6_9_16_block_scope_func_decl` | RED | block-scope `int helper(void);` has external linkage and calls the later file-scope definition | 42 |
+| `6_9_16_block_scope_func_decl` | ★ | block-scope `int helper(void);` has external linkage and calls the later file-scope definition | 42 |
+
+## §6.10 Preprocessing directives
+
+Most preprocessor coverage lives under `test/pp/`; rows here only exist
+when a directive interacts with the parse-runner's end-to-end pipeline
+in a way `test/pp/` cannot catch on its own.
+
+| Case | Status | Body | Expected |
+|---|---|---|---|
+| `6_10_warning_directive` | RED | `#warning "..."` followed by a valid TU — non-fatal diagnostic, parsing continues. cfree's `process_directive` doesn't route `warning` and falls into the catch-all "unsupported directive" panic, which breaks every header that issues one (including the macOS SDK's `sys/cdefs.h`). | 42 |
## Builtins