kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 28c94cf5c588425e4ebd5a30bfc8a18e61794a8c
parent 78dd6876acf34875b49ae2aad972ddeb0a2b38e6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 19 May 2026 14:51:34 -0700

Fix parser bugs from BUGS list

Diffstat:
Mdoc/BUGS.md | 12++++++------
Mlang/c/parse/parse.c | 58++++++++++++----------------------------------------------
Mlang/c/parse/parse_expr.c | 1+
Mlang/c/parse/parse_init.c | 6++++++
Mlang/c/parse/parse_priv.h | 10++++++++++
Mlang/c/parse/parse_type.c | 23+++++++++++++++++++++++
Mlang/c/type/type.c | 2+-
Msrc/api/cg.c | 9+++++++--
Mtest/parse/CORPUS.md | 12++++++------
Atest/parse/cases/6_10_warning_directive.c | 12++++++++++++
Atest/parse/cases/6_10_warning_directive.expected | 1+
Atest/parse/cases/6_5_6_01_ptr_diff_assign_to_long.c | 12++++++++++++
Atest/parse/cases/6_5_6_01_ptr_diff_assign_to_long.expected | 1+
Atest/parse/cases/6_7_6_18_file_scope_array_bound_paren.c | 11+++++++++++
Atest/parse/cases/6_7_6_18_file_scope_array_bound_paren.expected | 1+
Atest/parse/cases/6_7_6_19_paren_declarator_name.c | 10++++++++++
Atest/parse/cases/6_7_6_19_paren_declarator_name.expected | 1+
Atest/parse/cases/6_7_6_20_func_returning_funcptr_no_typedef.c | 17+++++++++++++++++
Atest/parse/cases/6_7_6_20_func_returning_funcptr_no_typedef.expected | 1+
Atest/parse/cases/6_7_9_30_static_init_neg_float.c | 12++++++++++++
Atest/parse/cases/6_7_9_30_static_init_neg_float.expected | 1+
21 files changed, 152 insertions(+), 61 deletions(-)

diff --git a/doc/BUGS.md b/doc/BUGS.md @@ -6,9 +6,9 @@ Format as: - [ ] <feature description>: <test case name> ``` -- [ ] pointer subtraction yields ptrdiff_t (assignable to a wider integer without a cast): `6_5_6_01_ptr_diff_assign_to_long` -- [ ] file-scope array bound with a parenthesized integer constant expression: `6_7_6_18_file_scope_array_bound_paren` -- [ ] parenthesized declarator name (`int (foo)(int)`): `6_7_6_19_paren_declarator_name` -- [ ] function declarator with an inline function-pointer return type (no typedef): `6_7_6_20_func_returning_funcptr_no_typedef` -- [ ] static initializer accepts unary `-` on a floating constant: `6_7_9_30_static_init_neg_float` -- [ ] `#warning` preprocessing directive (non-fatal, parsing continues): `6_10_warning_directive` +- [x] pointer subtraction yields ptrdiff_t (assignable to a wider integer without a cast): `6_5_6_01_ptr_diff_assign_to_long` +- [x] file-scope array bound with a parenthesized integer constant expression: `6_7_6_18_file_scope_array_bound_paren` +- [x] parenthesized declarator name (`int (foo)(int)`): `6_7_6_19_paren_declarator_name` +- [x] function declarator with an inline function-pointer return type (no typedef): `6_7_6_20_func_returning_funcptr_no_typedef` +- [x] static initializer accepts unary `-` on a floating constant: `6_7_9_30_static_init_neg_float` +- [x] `#warning` preprocessing directive (non-fatal, parsing continues): `6_10_warning_directive` diff --git a/lang/c/parse/parse.c b/lang/c/parse/parse.c @@ -1199,6 +1199,8 @@ static void parse_external_decl(Parser* p) { Sym name; SrcLoc loc; const Type* base_ty; + Attr* dattrs = NULL; + DeclaratorInfo dinfo; if (!parse_decl_specs(p, &specs)) { perr(p, "expected declaration"); @@ -1236,45 +1238,19 @@ static void parse_external_decl(Parser* p) { return; } - base_ty = parse_pointer_layer(p, specs.type); - if (p->cur.kind != TOK_IDENT || - ident_kw_inline(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected declarator"); - } - name = p->cur.v.ident; - loc = tok_loc(&p->cur); - advance(p); - - Attr* dattrs = NULL; - parse_attrs_into(p, &dattrs); + base_ty = parse_declarator_full_info(p, specs.type, /*allow_abstract=*/0, + &name, &loc, &dattrs, &dinfo); - while (is_punct(&p->cur, '[')) { - DeclSuffix s; - if (!parse_decl_suffix(p, &s)) break; - if (s.kind != DS_ARRAY) break; - base_ty = apply_decl_suffix(p, base_ty, &s); - } - parse_attrs_into(p, &dattrs); - - if (is_punct(&p->cur, '(')) { + if (base_ty && base_ty->kind == TY_FUNC) { ParamInfo* infos = NULL; u16 nparams = 0; - u8 variadic = 0; - const Type** ptypes = NULL; const Type* fn_ty; const ABIFuncInfo* abi; SymEntry* fent; - advance(p); /* '(' */ - parse_param_list(p, &infos, &nparams, &variadic); - expect_punct(p, ')', "')' after parameter list"); - parse_attrs_into(p, &dattrs); - - if (nparams) { - ptypes = (const Type**)arena_array(p->pool->arena, const Type*, nparams); - for (u16 i = 0; i < nparams; ++i) ptypes[i] = infos[i].type; - } - fn_ty = type_func(p->pool, base_ty, ptypes, nparams, (int)variadic); + fn_ty = base_ty; + infos = dinfo.fn_params; + nparams = dinfo.fn_nparams; validate_decl_type_constraints(p, &specs, fn_ty, /*is_function=*/1, /*is_member=*/0); abi = c_abi_func_info(p->abi, p->pool, fn_ty); @@ -1416,22 +1392,12 @@ static void parse_external_decl(Parser* p) { } if (!accept_punct(p, ',')) break; - base_ty = parse_pointer_layer(p, specs.type); - if (p->cur.kind != TOK_IDENT || - ident_kw_inline(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected declarator after ','"); - } - name = p->cur.v.ident; - loc = tok_loc(&p->cur); - advance(p); dattrs = NULL; - parse_attrs_into(p, &dattrs); - while (is_punct(&p->cur, '[')) { - DeclSuffix s; - if (!parse_decl_suffix(p, &s)) break; - base_ty = apply_decl_suffix(p, base_ty, &s); + base_ty = parse_declarator_full_info(p, specs.type, /*allow_abstract=*/0, + &name, &loc, &dattrs, &dinfo); + if (base_ty && base_ty->kind == TY_FUNC) { + perr(p, "function declarator in object declaration list"); } - parse_attrs_into(p, &dattrs); } expect_punct(p, ';', "';' after global declaration"); } diff --git a/lang/c/parse/parse_expr.c b/lang/c/parse/parse_expr.c @@ -2607,6 +2607,7 @@ static void emit_add_or_sub(Parser* p, BinOp bop) { cg_push_int(p->cg, (i64)esz, ty_size_t(p)); cg_binop(p->cg, BO_SDIV); } + cg_convert(p->cg, c_abi_ptrdiff_type(p->abi, p->pool)); return; } } diff --git a/lang/c/parse/parse_init.c b/lang/c/parse/parse_init.c @@ -714,15 +714,21 @@ static int try_parse_static_float(Parser* p, u8* dst, u32 size, const Type* ty) { const Type* uty = type_unqual(p->pool, ty); double value; + int neg = 0; if (!uty || (uty->kind != TY_FLOAT && uty->kind != TY_DOUBLE && uty->kind != TY_LDOUBLE)) { return 0; } + if (is_punct(&p->cur, '-') || is_punct(&p->cur, '+')) { + neg = is_punct(&p->cur, '-'); + advance(p); + } if (p->cur.kind != TOK_FLT && p->cur.kind != TOK_NUM) perr(p, "expected floating constant expression"); value = p->cur.kind == TOK_FLT ? parse_float_literal(p, &p->cur) : (double)parse_int_literal(p, &p->cur); + if (neg) value = -value; advance(p); if (uty->kind == TY_FLOAT && size == 4u) { union { diff --git a/lang/c/parse/parse_priv.h b/lang/c/parse/parse_priv.h @@ -418,6 +418,12 @@ typedef struct DeclSuffix { u8 variadic; } DeclSuffix; +typedef struct DeclaratorInfo { + ParamInfo* fn_params; + u16 fn_nparams; + u8 fn_variadic; +} DeclaratorInfo; + /* ============================================================ * Cross-module forward declarations * ============================================================ */ @@ -437,6 +443,10 @@ const Type* parse_declarator_full(Parser* p, const Type* base, const Type* parse_declarator_full_ex(Parser* p, const Type* base, int allow_abstract, Sym* name_out, SrcLoc* loc_out, Attr** attrs_out); +const Type* parse_declarator_full_info(Parser* p, const Type* base, + int allow_abstract, Sym* name_out, + SrcLoc* loc_out, Attr** attrs_out, + DeclaratorInfo* info_out); const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out, SrcLoc* loc_out); const Type* complete_incomplete_array(Parser* p, const Type* ty); diff --git a/lang/c/parse/parse_type.c b/lang/c/parse/parse_type.c @@ -1262,6 +1262,8 @@ int parse_decl_suffix(Parser* p, DeclSuffix* out) { { Tok t = p->cur; int is_const_start = (t.kind == TOK_NUM || t.kind == TOK_CHR); + if (p->cur_func_name == 0 && t.kind == TOK_PUNCT && t.v.punct == '(') + is_const_start = 1; if (t.kind == TOK_FLT) { perr(p, "array bound requires integer type"); } @@ -1363,6 +1365,14 @@ const Type* parse_declarator_full(Parser* p, const Type* base, const Type* parse_declarator_full_ex(Parser* p, const Type* base, int allow_abstract, Sym* name_out, SrcLoc* loc_out, Attr** attrs_out) { + return parse_declarator_full_info(p, base, allow_abstract, name_out, loc_out, + attrs_out, NULL); +} + +const Type* parse_declarator_full_info(Parser* p, const Type* base, + int allow_abstract, Sym* name_out, + SrcLoc* loc_out, Attr** attrs_out, + DeclaratorInfo* info_out) { Attr* local_attrs = NULL; base = parse_pointer_layer(p, base); @@ -1450,6 +1460,8 @@ const Type* parse_declarator_full_ex(Parser* p, const Type* base, DeclSuffix suffs[8]; int nsuffs = 0; + DeclSuffix* final_fn_suff = NULL; + if (info_out) memset(info_out, 0, sizeof *info_out); while (nsuffs < 8) { if (!parse_decl_suffix(p, &suffs[nsuffs])) break; ++nsuffs; @@ -1464,6 +1476,12 @@ const Type* parse_declarator_full_ex(Parser* p, const Type* base, if (nsuffs == 8 && (is_punct(&p->cur, '[') || is_punct(&p->cur, '('))) { perr(p, "too many declarator suffixes (raise the cap if needed)"); } + if (n_inner_suffs > 0 && inner_suffs[0].kind == DS_FUNC) { + final_fn_suff = &inner_suffs[0]; + } else if (n_inner_suffs == 0 && nptrs_inner == 0 && nsuffs > 0 && + suffs[0].kind == DS_FUNC) { + final_fn_suff = &suffs[0]; + } for (int i = nsuffs - 1; i >= 0; --i) { base = apply_decl_suffix(p, base, &suffs[i]); } @@ -1479,6 +1497,11 @@ const Type* parse_declarator_full_ex(Parser* p, const Type* base, base = apply_decl_suffix(p, base, &inner_suffs[i]); } + if (info_out && base && base->kind == TY_FUNC && final_fn_suff) { + info_out->fn_params = final_fn_suff->params; + info_out->fn_nparams = final_fn_suff->nparams; + info_out->fn_variadic = final_fn_suff->variadic; + } if (name_out) *name_out = name; if (loc_out) *loc_out = nloc; return base; diff --git a/lang/c/type/type.c b/lang/c/type/type.c @@ -509,7 +509,7 @@ static CfreeCgTypeId type_cg_id_walk(CfreeCompiler* c, Pool* p, const Type* t, CfreeCgTypeId id; if (!c || !t) return CFREE_CG_TYPE_NONE; id = type_cg_builtin(c, (TypeKind)t->kind); - if (id != CFREE_CG_TYPE_NONE && !t->qual) return id; + if (id != CFREE_CG_TYPE_NONE) return id; switch ((TypeKind)t->kind) { case TY_PTR: { const Type* pointee = t->ptr.pointee; diff --git a/src/api/cg.c b/src/api/cg.c @@ -4221,8 +4221,13 @@ static void api_cg_binop(CfreeCg *g, BinOp iop, u32 flags) { return; } - ra = api_force_reg_unless_imm(g, &a, ty); - rb = api_force_reg_unless_imm(g, &b, ty); + if (api_type_class(ty) == RC_FP) { + ra = api_force_reg(g, &a, ty); + rb = api_force_reg(g, &b, ty); + } else { + ra = api_force_reg_unless_imm(g, &a, ty); + rb = api_force_reg_unless_imm(g, &b, ty); + } if (api_can_delay_int_arith(g, ty, flags) && api_try_collapse_binop_identity(g, iop, ty, &a, &b, &folded_sv)) { diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md @@ -197,7 +197,7 @@ here for completeness once they're real cases. | `6_5_2_5_01_compound_literal_flat_struct` | RED | `(struct O){1,2,39}` initializes nested struct members without inner braces | 42 | | `6_5_2_5_02_compound_literal_designated_continue` | RED | `(struct S){.a[1]=20,22,0}` continues from the next subobject after a designator | 42 | | `6_5_3_4_04_sizeof_vla_param_row` | ★ | `sizeof(a[0])` where `a` is an adjusted `int a[n][m]` parameter is evaluated at runtime | 42 | -| `6_5_6_01_ptr_diff_assign_to_long` | RED | `long d = p - q;` — pointer subtraction yields ptrdiff_t and is assignable to a wider integer without a cast; cfree currently leaves the cg-stack top typed as `T*` after BO_ISUB so the assignment trips "incompatible assignment from pointer to integer" | 42 | +| `6_5_6_01_ptr_diff_assign_to_long` | ★ | `long d = p - q;` — pointer subtraction yields ptrdiff_t and is assignable to a wider integer without a cast | 42 | ## §6.5.2.2 Aggregate function arguments @@ -366,9 +366,9 @@ already exercised in §6.5 and §6.7. | `6_7_6_15_multidim_vla_local` | RED | `int a[n][m]; a[n-1][m-1]=42; return a[5][6];` — multiple VLA dimensions in one declarator | 42 | | `6_7_6_16_vla_param_2d` | ★ | `int a[n][m]` parameter passed a 2D array; runtime stride must use `m` | 42 | | `6_7_6_17_vla_param_3d` | ★ | `int a[n][m][k]` parameter passed a 3D array; nested runtime strides must compose | 42 | -| `6_7_6_18_file_scope_array_bound_paren` | RED | `static int marks[(2+5)] = {...};` — parenthesized integer constant expression in a file-scope array bound; cfree currently panics with `CfreeCg: regalloc - no spillable victim`, suggesting the constexpr falls into a runtime-IR lowering path that has no function context | 42 | -| `6_7_6_19_paren_declarator_name` | RED | `int (helper)(int x){...}` and `(helper)(42)` — the C grammar lists `'(' declarator ')'` as a direct-declarator; lua / glibc headers wrap public names in parens to defeat macro expansion. Currently rejected with "expected declarator" | 42 | -| `6_7_6_20_func_returning_funcptr_no_typedef` | RED | `int (*pick(int x))(void){...}` — function declarator with an inline function-pointer return type (the classic `signal()` shape, used by sqlite VFS `xDlSym`). `6_7_6_11` covers the typedef'd form; this row pins the inline declarator. Currently rejected with "expected declarator" | 42 | +| `6_7_6_18_file_scope_array_bound_paren` | ★ | `static int marks[(2+5)] = {...};` — parenthesized integer constant expression in a file-scope array bound | 42 | +| `6_7_6_19_paren_declarator_name` | ★ | `int (helper)(int x){...}` and `(helper)(42)` — the C grammar lists `'(' declarator ')'` as a direct-declarator; lua / glibc headers wrap public names in parens to defeat macro expansion | 42 | +| `6_7_6_20_func_returning_funcptr_no_typedef` | ★ | `int (*pick(int x))(void){...}` — function declarator with an inline function-pointer return type (the classic `signal()` shape, used by sqlite VFS `xDlSym`). `6_7_6_11` covers the typedef'd form; this row pins the inline declarator | 42 | ## §6.7.7 Type names @@ -423,7 +423,7 @@ cover compound typedef targets. | `6_7_9_27_static_flat_array_init` | RED | file-scope `static int a[2][2] = {1,2,3,36};` | 42 | | `6_7_9_28_static_flat_struct_init` | RED | file-scope `static struct O o = {1,2,39};` | 42 | | `6_7_9_29_unknown_bound_nested_init` | RED | `int a[][2] = {1,2,3,36};` — unknown outer bound is completed from a flat nested initializer | 42 | -| `6_7_9_30_static_init_neg_float` | RED | `static const double tab[2] = { -1.0, 43.0 };` — static initializer of arithmetic type permits unary `-` on a floating constant (§6.6 arithmetic constant expression). `try_parse_static_float` only accepts a bare TOK_FLT/TOK_NUM, so a leading `-` aborts with "expected floating constant expression" | 42 | +| `6_7_9_30_static_init_neg_float` | ★ | `static const double tab[2] = { -1.0, 43.0 };` — static initializer of arithmetic type permits unary `-` on a floating constant (§6.6 arithmetic constant expression) | 42 | ## §6.7.10 Static assertions @@ -508,7 +508,7 @@ in a way `test/pp/` cannot catch on its own. | Case | Status | Body | Expected | |---|---|---|---| -| `6_10_warning_directive` | RED | `#warning "..."` followed by a valid TU — non-fatal diagnostic, parsing continues. cfree's `process_directive` doesn't route `warning` and falls into the catch-all "unsupported directive" panic, which breaks every header that issues one (including the macOS SDK's `sys/cdefs.h`). | 42 | +| `6_10_warning_directive` | ★ | `#warning "..."` followed by a valid TU — non-fatal diagnostic, parsing continues | 42 | ## Builtins diff --git a/test/parse/cases/6_10_warning_directive.c b/test/parse/cases/6_10_warning_directive.c @@ -0,0 +1,12 @@ +/* §6.10 (preprocessing directives) -- `#warning` is a non-fatal + * diagnostic recognised by every mainstream C preprocessor since the + * 1990s and standardised in C23. cfree's `process_directive` doesn't + * route it, so it falls into the catch-all and panics with + * "unsupported directive". That makes any source tree whose headers + * issue `#warning` unbuildable -- including the macOS SDK's + * `sys/cdefs.h`, which emits one whenever `__GNUC__ < 4`. */ +#warning "non-fatal diagnostic; parsing should continue" + +int test_main(void) { + return 42; +} diff --git a/test/parse/cases/6_10_warning_directive.expected b/test/parse/cases/6_10_warning_directive.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/6_5_6_01_ptr_diff_assign_to_long.c b/test/parse/cases/6_5_6_01_ptr_diff_assign_to_long.c @@ -0,0 +1,12 @@ +/* §6.5.6 -- pointer subtraction yields ptrdiff_t (a signed integer type), + * so the result is assignable to a wider signed integer without a cast. + * Currently rejected: cfree leaves the cg stack typed as `T*` after + * BO_ISUB on two pointer operands, so the integer assignment trips the + * "incompatible assignment from pointer to integer" constraint check. */ +int test_main(void) { + int a[8] = {0}; + int *p = a + 7; + int *q = a + 4; + long d = p - q; + return (int)d + 39; +} diff --git a/test/parse/cases/6_5_6_01_ptr_diff_assign_to_long.expected b/test/parse/cases/6_5_6_01_ptr_diff_assign_to_long.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/6_7_6_18_file_scope_array_bound_paren.c b/test/parse/cases/6_7_6_18_file_scope_array_bound_paren.c @@ -0,0 +1,11 @@ +/* §6.7.6.2 -- file-scope array bound that is a parenthesized integer + * constant expression. The parser must fold `(N)+M` at parse time the + * same way it folds `N+M` for the [N+M] form. Currently this path + * panics from the codegen layer ("CfreeCg: regalloc - no spillable + * victim"), as if the constexpr were being lowered to runtime IR for + * a file-scope declaration that has no enclosing function. */ +static int marks[(2 + 5)] = {0, 0, 0, 0, 0, 0, 42}; + +int test_main(void) { + return marks[6]; +} diff --git a/test/parse/cases/6_7_6_18_file_scope_array_bound_paren.expected b/test/parse/cases/6_7_6_18_file_scope_array_bound_paren.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/6_7_6_19_paren_declarator_name.c b/test/parse/cases/6_7_6_19_paren_declarator_name.c @@ -0,0 +1,10 @@ +/* §6.7.6 direct-declarator -- `'(' declarator ')'` is one of the + * direct-declarator productions, so wrapping the declared name in + * parens is legal and changes nothing semantically. Real-world headers + * (lua, glibc) use this to suppress function-like macro expansion at + * the declaration site. Currently rejected with "expected declarator". */ +int (helper)(int x) { return x; } + +int test_main(void) { + return (helper)(42); +} diff --git a/test/parse/cases/6_7_6_19_paren_declarator_name.expected b/test/parse/cases/6_7_6_19_paren_declarator_name.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/6_7_6_20_func_returning_funcptr_no_typedef.c b/test/parse/cases/6_7_6_20_func_returning_funcptr_no_typedef.c @@ -0,0 +1,17 @@ +/* §6.7.6 -- a function declarator may have a function-pointer return + * type written inline (no intermediate typedef). The shape + * `RETTY (*name(PARAMS))(INNER_PARAMS)` is the classic `signal()` form + * and shows up in the SQLite VFS table (`xDlSym`). + * `6_7_6_11_func_returning_funcptr` already covers the typedef'd form; + * this row pins the inline declarator. Currently rejected with + * "expected declarator" inside the return-type position. */ +static int leaf(void) { return 42; } + +int (*pick(int x))(void) { + (void)x; + return leaf; +} + +int test_main(void) { + return pick(0)(); +} diff --git a/test/parse/cases/6_7_6_20_func_returning_funcptr_no_typedef.expected b/test/parse/cases/6_7_6_20_func_returning_funcptr_no_typedef.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/6_7_9_30_static_init_neg_float.c b/test/parse/cases/6_7_9_30_static_init_neg_float.c @@ -0,0 +1,12 @@ +/* §6.7.9 -- the initializer for a static-storage object of arithmetic + * type is an arithmetic constant expression (§6.6), which permits + * unary `-` on a floating-constant. cfree's `try_parse_static_float` + * only accepts a bare TOK_FLT / TOK_NUM, so a leading `-` aborts with + * "expected floating constant expression". Hits stb_sprintf's + * `stbsp__negboterr` table, libm coefficient tables, and any numeric + * library that ships negative constants. */ +static const double tab[2] = { -1.0, 43.0 }; + +int test_main(void) { + return (int)(tab[0] + tab[1]); +} diff --git a/test/parse/cases/6_7_9_30_static_init_neg_float.expected b/test/parse/cases/6_7_9_30_static_init_neg_float.expected @@ -0,0 +1 @@ +42