kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 5db3a3fbdfc23fa85e50ab5f38f7170f7f74c906
parent b8daff4719a017bf3f397759e6cd85245b1f6ceb
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 18 May 2026 14:12:48 -0700

Fix C11 parse error diagnostics

Diffstat:
Adoc/C11_CONFORMANCE_CHECKLIST.md | 209+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlang/c/parse/parse.c | 9+++++++++
Mlang/c/parse/parse_expr.c | 32++++++++++++++++++++++++++++++++
Mlang/c/parse/parse_priv.h | 3++-
Mlang/c/parse/parse_stmt.c | 3+++
Mlang/c/parse/parse_type.c | 43++++++++++++++++++++++++++++++++++++++-----
6 files changed, 293 insertions(+), 6 deletions(-)

diff --git a/doc/C11_CONFORMANCE_CHECKLIST.md b/doc/C11_CONFORMANCE_CHECKLIST.md @@ -0,0 +1,209 @@ +# C11 conformance checklist + +Status snapshot: 2026-05-18. + +Ground truth should be the implementation plus targeted tests, not README.md. +Keep this checklist red-green: add or unskip the smallest case first, then +make the implementation pass it. + +## Current signal + +- [x] `make test-lex` passes: 16/16. +- [x] `make test-pp test-pp-err` passes: 82/82 and 15/15. +- [x] `make test-parse-err` passes: 30/30. +- [ ] `make test-parse` passes without skips: currently 2504 pass, 0 fail, + 4 skip. Skips are `long double` and file-scope `asm`. +- [x] `make test-cg-api test-opt test-dwarf test-debug` passes. +- [ ] `make rt` builds the default runtime archives. Currently fails in + `rt/lib/atomic/atomic_common.inc` because exported `__atomic_*` + functions conflict with clang builtin declarations. +- [ ] `make test-lib-deps` passes. Current external-symbol allowlist drift: + `___memmove_chk`, `___snprintf_chk`, `_qsort`, `_strtod` were added and + `_strstr` disappeared. + +## First conformance gate: required diagnostics + +Goal: keep `make test-parse-err` green. These C11 constraint diagnostics now +have targeted negative coverage; broaden the checks as adjacent semantic rules +are implemented. + +- [x] Reject `sizeof` on incomplete object types. + Test: `test/parse/cases_err/6_5_sizeof_incomplete.c`. + Code: `parse_expr.c` `sizeof` / `c_abi_sizeof` call sites. +- [x] Reject invalid implicit assignment conversions, starting with pointer to + integer without an explicit cast. + Test: `test/parse/cases_err/6_5_type_mismatch.c`. + Code: `parse_assign_expr` in `parse_expr.c`. +- [x] Reject bit-field widths wider than the declared bit-field type. + Test: `test/parse/cases_err/6_7_2_1_bitfield_too_wide.c`. + Code: `parse_member_decls` in `parse_type.c`. +- [x] Reject multiple storage-class specifiers in one declaration. + Test: `test/parse/cases_err/6_7_2_storage_class_combo.c`. + Code: `parse_decl_specs`. +- [x] Reject redefining a complete struct/union tag in the same scope. + Test: `test/parse/cases_err/6_7_2_two_struct_defs.c`. + Code: `parse_struct_or_union`; `complete` is set for newly defined tags, + not only previously forward-declared tags. +- [x] Reject assignment to const-qualified lvalues. + Test: `test/parse/cases_err/6_7_3_const_assign.c`. + Code: declaration qualifiers are applied to the base type and checked in + `parse_assign_expr`. +- [x] Reject duplicate file-scope object definitions with external/internal + linkage. + Test: `test/parse/cases_err/6_7_redefinition.c`. + Code: `parse_external_decl`, symbol `defined` state. +- [x] Reject duplicate `case` values within one switch after integer constant + conversion. + Test: `test/parse/cases_err/6_8_duplicate_case.c`. + Code: `parse_case_stmt` / `SwitchCtx`. +- [x] Reject duplicate function definitions while still allowing compatible + declarations before one definition. + Test: `test/parse/cases_err/6_9_redefinition_function.c`. + Code: `parse_external_decl`; `SEK_FUNC` symbols track a `defined` bit. +- [x] Reject `void` mixed with other function parameters. + Test: `test/parse/cases_err/6_9_void_param_with_other.c`. + Code: `parse_param_list`. +- [x] Reject non-power-of-two positive `aligned(N)` values. + Test: `test/parse/cases_err/attr_p2_aligned_not_pow2.c`. + Code: attribute argument parsing in `parse_type.c`. + +Suggested cadence: + +```sh +make test-parse-err > /tmp/cfree_parse_err.log 2>&1 || tail -n 80 /tmp/cfree_parse_err.log +``` + +## Positive parse skips + +Goal: `make test-parse` is green with `CFREE_TEST_ALLOW_SKIP` unset. + +- [ ] Implement `long double` enough for parser/codegen/runtime tests. + Current skipped case: `test/parse/cases/6_7_2_12_long_double.c`. + Skip reason: binary128 literal/convert needs `rt/lib/fp_tf` wiring + through CG. +- [ ] Enable file-scope `asm`. + Current skipped case: `test/parse/cases/asm_02_file_scope.c`. + Parser currently parses and then deliberately errors in + `parse_file_scope_asm` because the C frontend is isolated from assembler + internals. + +Focused run: + +```sh +CFREE_TEST_FILTER=6_7_2_12_long_double make test-parse +CFREE_TEST_FILTER=asm_02_file_scope make test-parse +``` + +## Type system and declarations + +- [ ] Implement enough structural compatibility for redeclarations and + composite types beyond pointer identity. + Existing planned cases: `6_2_7_01_composite_array_size`, + `6_2_2_01_extern_in_block_inherits_internal`. +- [ ] Track declaration state separately from scope lookup: + declaration, tentative definition, definition, function definition, + linkage, storage duration, and type compatibility. +- [ ] Add same-scope ordinary identifier redefinition checks while preserving + legal shadowing in nested block scopes. +- [ ] Complete tag state handling for forward declarations, same-scope + completion, and wrong-kind redeclarations. +- [ ] Validate function declarator constraints: + `void` parameter rules, variadic placement, function returning function, + function returning array, array/function parameter adjustment. +- [ ] Decide and document implementation-defined bit-field behavior: + plain `int` signedness, allowed extended bit-field types, allocation + order, straddling, and alignment. +- [ ] Add positive bit-field lowering cases from `test/parse/CORPUS.md`, + including zero-width bit-fields. + +## Expressions and conversions + +- [ ] Make implicit conversions constraint-aware. Do not rely on CG conversion + success as the semantic check. +- [ ] Preserve lvalue properties: modifiable, const-qualified, bit-field, + array, function designator, and incomplete type. +- [ ] Implement `sizeof` rules completely: + no incomplete object type, no function type, no bit-field, VLA operand + evaluated, non-VLA operand not evaluated. +- [ ] Complete conditional operator usual-conversion behavior for arithmetic + and pointer/null arms. +- [ ] Complete pointer compound assignment (`p += n`, `p -= n`). +- [ ] Expand `_Generic` tests for default selection, compatible types, and + unevaluated controlling expression. +- [ ] Add negative tests for invalid pointer arithmetic, invalid relational + comparisons, invalid casts, modifying non-lvalues, and scalar-required + operators. + +## Constant expressions and initializers + +- [ ] Replace the current narrow integer evaluator with a C11-aware constant + expression evaluator that tracks type, value category, and allowed forms. +- [ ] Accept `_Alignof` in integer constant expressions. +- [ ] Complete static initializer address constants: + object address, function address, array plus/minus integer constant, + and null pointer constants. +- [ ] Implement static-storage union initialization or document a temporary + nonconformance gate. +- [ ] Complete designated initializers: + nested designators, enum-valued array designators, duplicate designator + overwrite rules, non-first union member. +- [ ] Add diagnostics for initializer overflow, excess scalar initializers, + non-constant static initializers, and invalid designators. + +## Preprocessor and translation phases + +- [x] Object/function-like macros, stringize, paste, rescan, conditionals, + includes, line control, unknown pragmas, and `#embed` have passing tests. +- [ ] Audit remaining C11 translation-phase requirements: + universal character names, multibyte characters, trigraph policy, + diagnostics for invalid preprocessing tokens, and line-splice edge cases. +- [ ] Add conformance tests for implementation-defined preprocessor behavior + documented in C11 Annex J.3.12. +- [ ] Decide whether `#embed` is extension-only under strict C11 mode once a + strict mode exists. + +## Freestanding library surface + +C11 freestanding requires at least `<float.h>`, `<iso646.h>`, `<limits.h>`, +`<stdalign.h>`, `<stdarg.h>`, `<stdbool.h>`, `<stddef.h>`, `<stdint.h>`, and +`<stdnoreturn.h>`. This tree also ships `assert.h`, `setjmp.h`, and +`stdatomic.h`, plus cfree extensions. + +- [ ] Add header compile smoke tests per supported target for every + freestanding header. +- [ ] Add macro/value tests for `limits.h`, `stdint.h`, `stddef.h`, and + `float.h` against target ABI expectations. +- [ ] Add `stdarg.h` runtime tests for AArch64, x86-64, and RV64. +- [ ] Get `stdatomic.h` tests passing against both parser builtins and + `libcfree_rt.a`. +- [ ] Fix `make rt` before treating atomics as conforming. +- [ ] Decide whether `setjmp.h` remains an advertised extension or is part of + a hosted profile only. + +## Strict mode and extensions + +Today the frontend accepts GNU extensions needed by the project. C11 +conformance needs a mode story. + +- [ ] Add a driver/frontend option for strict C11 diagnostics, or document that + the current mode is GNU-ish C11. +- [ ] Classify extensions: `__int128`, `asm`, GNU attributes, statement + expressions if added, binary integer literals, `#embed`, and cfree + builtins. +- [ ] In strict mode, diagnose extensions that can invalidate strictly + conforming programs. +- [ ] Keep extension tests separate from strict C11 tests. + +## Suggested working order + +1. Keep `test-parse-err` green while broadening semantic diagnostics beyond + the first targeted cases. +2. Add a compact "semantic type checks" helper layer so assignment, return, + initialization, conditional expressions, and calls share rules. +3. Fix declaration-state tracking: redeclarations, tentative definitions, + function definitions, tag completion, and composite types. +4. Finish bit-fields: diagnostics first, then layout/codegen. +5. Finish `sizeof`/constant-expression/static-initializer semantics. +6. Unskip `long double` or explicitly narrow the supported C profile until + runtime/CG support exists. +7. Bring `rt` and freestanding header tests into the default conformance gate. diff --git a/lang/c/parse/parse.c b/lang/c/parse/parse.c @@ -680,6 +680,9 @@ void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, } else if (pty && pty->kind == TY_FUNC) { pty = type_ptr(p->pool, pty); } + if (pty && pty->kind == TY_VOID) { + perr(p, "'void' must be the only parameter"); + } if (n == cap) { cap *= 2; ParamInfo* nbuf = (ParamInfo*)arena_array(p->pool->arena, ParamInfo, cap); @@ -900,6 +903,8 @@ static void parse_external_decl(Parser* p) { attr_list_append(&fent->attrs, dattrs); if (is_punct(&p->cur, '{')) { + if (fent->defined) perr(p, "redefinition of function"); + fent->defined = 1; Sym saved_func_name = p->cur_func_name; p->cur_func_name = name; parse_function_body(p, fent->v.sym, fn_ty, abi, infos, nparams, loc, @@ -948,6 +953,9 @@ static void parse_external_decl(Parser* p) { if (existing && existing->kind == SEK_GLOBAL) { sym = existing->v.sym; e = existing; + if (has_init && e->defined) { + perr(p, "redefinition of object"); + } if (e->type && base_ty && e->type->kind == TY_ARRAY && base_ty->kind == TY_ARRAY) { if (e->type->arr.incomplete && !base_ty->arr.incomplete) { @@ -988,6 +996,7 @@ static void parse_external_decl(Parser* p) { u32 align_eff = (specs.align > attr_align) ? specs.align : attr_align; if (has_init) { + if (e) e->defined = 1; advance(p); /* '=' */ if (base_ty && base_ty->kind == TY_ARRAY && base_ty->arr.incomplete) { const Type* completed = complete_incomplete_array(p, base_ty); diff --git a/lang/c/parse/parse_expr.c b/lang/c/parse/parse_expr.c @@ -20,6 +20,21 @@ static int accept_kw(Parser* p, CKw k) { return 0; } +static int type_is_incomplete(const Type* t) { + if (!t) return 1; + if (t->kind == TY_VOID) return 1; + if ((t->kind == TY_STRUCT || t->kind == TY_UNION) && t->rec.incomplete) + return 1; + if (t->kind == TY_ARRAY && t->arr.incomplete) return 1; + return 0; +} + +static void require_sizeof_type(Parser* p, const Type* ty) { + if (!ty || type_is_incomplete(ty) || ty->kind == TY_FUNC) { + perr(p, "sizeof operand has incomplete or function type"); + } +} + /* ============================================================ * Literal parsing * ============================================================ */ @@ -440,6 +455,7 @@ static i64 cexpr_unary(Parser* p, SrcLoc loc) { { const Type* t = parse_type_name(p); expect_punct(p, ')', "')' after sizeof type-name"); + require_sizeof_type(p, t); return (i64)c_abi_sizeof(p->abi, t); } } @@ -447,6 +463,7 @@ static i64 cexpr_unary(Parser* p, SrcLoc loc) { parse_unary(p); { const Type* ty = cg_top_type(p->cg); + require_sizeof_type(p, ty); i64 sz = (i64)c_abi_sizeof(p->abi, ty); cg_drop(p->cg); return sz; @@ -574,6 +591,8 @@ void coerce_top_to_lvalue(Parser* p) { cg_convert(p->cg, dst); } else if (type_is_arith(src) && type_is_ptr(dst)) { cg_convert(p->cg, dst); + } else if (type_is_ptr(src) && type_is_ptr(dst)) { + cg_convert(p->cg, dst); } } @@ -1378,6 +1397,7 @@ void parse_unary(Parser* p) { cg_push_local_typed(p->cg, vla_slot, ty_size_t(p)); cg_load(p->cg); } else { + require_sizeof_type(p, ty); cg_push_int(p->cg, (i64)c_abi_sizeof(p->abi, ty), ty_size_t(p)); } return; @@ -1946,9 +1966,21 @@ void parse_assign_expr(Parser* p) { return; } advance(p); + const Type* lhs = cg_top_type(p->cg); + { + if (lhs && (lhs->qual & Q_CONST)) { + perr(p, "assignment to const-qualified object"); + } + } if (is_simple_assign) { parse_assign_expr(p); to_rvalue(p); + { + const Type* rhs = cg_top_type(p->cg); + if (type_is_ptr(rhs) && type_is_arith(lhs)) { + perr(p, "incompatible assignment from pointer to integer"); + } + } coerce_top_to_lvalue(p); cg_store(p->cg); return; diff --git a/lang/c/parse/parse_priv.h b/lang/c/parse/parse_priv.h @@ -88,7 +88,8 @@ typedef struct SymEntry SymEntry; struct SymEntry { Sym name; u8 kind; /* SymEntryKind */ - u8 pad[3]; + u8 defined; + u8 pad[2]; const Type* type; union { FrameSlot slot; diff --git a/lang/c/parse/parse_stmt.c b/lang/c/parse/parse_stmt.c @@ -219,6 +219,9 @@ static void parse_case_stmt(Parser* p) { SrcLoc loc = tok_loc_stmt(&p->cur); if (!p->cur_switch) perr(p, "'case' label not in switch statement"); v = eval_const_int(p, loc); + for (ce = p->cur_switch->cases; ce; ce = ce->next) { + if (ce->value == v) perr(p, "duplicate case value"); + } expect_punct(p, ':', "':' after case constant"); L = cg_label_new(p->cg); cg_label_place(p->cg, L); diff --git a/lang/c/parse/parse_type.c b/lang/c/parse/parse_type.c @@ -166,6 +166,10 @@ static void parse_attr_args(Parser* p, Attr* a, AttrArgShape shape, } loc = tok_loc(&p->cur); a->v.i = eval_const_int(p, loc); + if (a->kind == ATTR_ALIGNED && a->v.i > 0 && + (((u64)a->v.i & ((u64)a->v.i - 1u)) != 0)) { + perr(p, "attribute 'aligned' argument must be a power of two"); + } a->nargs = 1; expect_punct(p, ')', "')' after attribute integer argument"); return; @@ -386,6 +390,7 @@ int parse_decl_specs(Parser* p, DeclSpecs* out) { TypeSpecAccum acc; SrcLoc loc; int seen = 0; + int storage_seen = 0; const Type* tagged_ty = NULL; memset(&acc, 0, sizeof acc); out->type = NULL; @@ -502,10 +507,14 @@ int parse_decl_specs(Parser* p, DeclSpecs* out) { advance(p); seen = 1; } else if (is_kw(p, &t, KW_STATIC)) { + if (storage_seen) perr(p, "multiple storage-class specifiers"); + storage_seen = 1; out->storage = DS_STATIC; advance(p); seen = 1; } else if (is_kw(p, &t, KW_EXTERN)) { + if (storage_seen) perr(p, "multiple storage-class specifiers"); + storage_seen = 1; out->storage = DS_EXTERN; advance(p); seen = 1; @@ -541,6 +550,8 @@ int parse_decl_specs(Parser* p, DeclSpecs* out) { advance(p); seen = 1; } else if (is_kw(p, &t, KW_TYPEDEF)) { + if (storage_seen) perr(p, "multiple storage-class specifiers"); + storage_seen = 1; out->storage = DS_TYPEDEF; advance(p); seen = 1; @@ -567,8 +578,19 @@ int parse_decl_specs(Parser* p, DeclSpecs* out) { out->flags |= DF_THREAD; advance(p); seen = 1; - } else if (is_kw(p, &t, KW_NORETURN) || is_kw(p, &t, KW_REGISTER) || - is_kw(p, &t, KW_AUTO)) { + } else if (is_kw(p, &t, KW_NORETURN)) { + advance(p); + seen = 1; + } else if (is_kw(p, &t, KW_REGISTER)) { + if (storage_seen) perr(p, "multiple storage-class specifiers"); + storage_seen = 1; + out->storage = DS_REGISTER; + advance(p); + seen = 1; + } else if (is_kw(p, &t, KW_AUTO)) { + if (storage_seen) perr(p, "multiple storage-class specifiers"); + storage_seen = 1; + out->storage = DS_AUTO; advance(p); seen = 1; } else if (!acc.saw_explicit_type && !tagged_ty && t.kind == TOK_IDENT && @@ -605,6 +627,10 @@ int parse_decl_specs(Parser* p, DeclSpecs* out) { out->type = ty_int(p); } } + if (out->type && out->quals) { + out->type = type_qualified(p->pool, out->type, + (u16)(out->type->qual | out->quals)); + } } return seen; } @@ -673,6 +699,10 @@ static void parse_member_decls(Parser* p, TypeRecordBuilder* b) { if (is_punct(&p->cur, ':')) { advance(p); i64 w = eval_const_int(p, mloc); + if (w < 0) perr(p, "negative bit-field width"); + if (w > (i64)c_abi_sizeof(p->abi, specs.type) * 8) { + perr(p, "bit-field width exceeds its type width"); + } f.name = 0; f.type = specs.type; f.bitfield_width = (u16)w; @@ -689,6 +719,11 @@ static void parse_member_decls(Parser* p, TypeRecordBuilder* b) { &mname, &mloc, &mattrs); if (accept_punct(p, ':')) { i64 w = eval_const_int(p, mloc); + if (w < 0) perr(p, "negative bit-field width"); + if (w == 0 && mname != 0) perr(p, "zero-width bit-field must be unnamed"); + if (w > (i64)c_abi_sizeof(p->abi, mty) * 8) { + perr(p, "bit-field width exceeds its type width"); + } f.name = mname; f.type = mty; f.bitfield_width = (u16)w; @@ -789,9 +824,7 @@ const Type* parse_struct_or_union(Parser* p, TypeKind kind, if (opts.align_override > target->rec.align_override) target->rec.align_override = opts.align_override; } - if (existing) { - existing->complete = 1; - } + if (te) te->complete = 1; return target; }