commit 5db3a3fbdfc23fa85e50ab5f38f7170f7f74c906
parent b8daff4719a017bf3f397759e6cd85245b1f6ceb
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 18 May 2026 14:12:48 -0700
Fix C11 parse error diagnostics
Diffstat:
6 files changed, 293 insertions(+), 6 deletions(-)
diff --git a/doc/C11_CONFORMANCE_CHECKLIST.md b/doc/C11_CONFORMANCE_CHECKLIST.md
@@ -0,0 +1,209 @@
+# C11 conformance checklist
+
+Status snapshot: 2026-05-18.
+
+Ground truth should be the implementation plus targeted tests, not README.md.
+Keep this checklist red-green: add or unskip the smallest case first, then
+make the implementation pass it.
+
+## Current signal
+
+- [x] `make test-lex` passes: 16/16.
+- [x] `make test-pp test-pp-err` passes: 82/82 and 15/15.
+- [x] `make test-parse-err` passes: 30/30.
+- [ ] `make test-parse` passes without skips: currently 2504 pass, 0 fail,
+ 4 skip. Skips are `long double` and file-scope `asm`.
+- [x] `make test-cg-api test-opt test-dwarf test-debug` passes.
+- [ ] `make rt` builds the default runtime archives. Currently fails in
+ `rt/lib/atomic/atomic_common.inc` because exported `__atomic_*`
+ functions conflict with clang builtin declarations.
+- [ ] `make test-lib-deps` passes. Current external-symbol allowlist drift:
+ `___memmove_chk`, `___snprintf_chk`, `_qsort`, `_strtod` were added and
+ `_strstr` disappeared.
+
+## First conformance gate: required diagnostics
+
+Goal: keep `make test-parse-err` green. These C11 constraint diagnostics now
+have targeted negative coverage; broaden the checks as adjacent semantic rules
+are implemented.
+
+- [x] Reject `sizeof` on incomplete object types.
+ Test: `test/parse/cases_err/6_5_sizeof_incomplete.c`.
+ Code: `parse_expr.c` `sizeof` / `c_abi_sizeof` call sites.
+- [x] Reject invalid implicit assignment conversions, starting with pointer to
+ integer without an explicit cast.
+ Test: `test/parse/cases_err/6_5_type_mismatch.c`.
+ Code: `parse_assign_expr` in `parse_expr.c`.
+- [x] Reject bit-field widths wider than the declared bit-field type.
+ Test: `test/parse/cases_err/6_7_2_1_bitfield_too_wide.c`.
+ Code: `parse_member_decls` in `parse_type.c`.
+- [x] Reject multiple storage-class specifiers in one declaration.
+ Test: `test/parse/cases_err/6_7_2_storage_class_combo.c`.
+ Code: `parse_decl_specs`.
+- [x] Reject redefining a complete struct/union tag in the same scope.
+ Test: `test/parse/cases_err/6_7_2_two_struct_defs.c`.
+ Code: `parse_struct_or_union`; `complete` is set for newly defined tags,
+ not only previously forward-declared tags.
+- [x] Reject assignment to const-qualified lvalues.
+ Test: `test/parse/cases_err/6_7_3_const_assign.c`.
+ Code: declaration qualifiers are applied to the base type and checked in
+ `parse_assign_expr`.
+- [x] Reject duplicate file-scope object definitions with external/internal
+ linkage.
+ Test: `test/parse/cases_err/6_7_redefinition.c`.
+ Code: `parse_external_decl`, symbol `defined` state.
+- [x] Reject duplicate `case` values within one switch after integer constant
+ conversion.
+ Test: `test/parse/cases_err/6_8_duplicate_case.c`.
+ Code: `parse_case_stmt` / `SwitchCtx`.
+- [x] Reject duplicate function definitions while still allowing compatible
+ declarations before one definition.
+ Test: `test/parse/cases_err/6_9_redefinition_function.c`.
+ Code: `parse_external_decl`; `SEK_FUNC` symbols track a `defined` bit.
+- [x] Reject `void` mixed with other function parameters.
+ Test: `test/parse/cases_err/6_9_void_param_with_other.c`.
+ Code: `parse_param_list`.
+- [x] Reject non-power-of-two positive `aligned(N)` values.
+ Test: `test/parse/cases_err/attr_p2_aligned_not_pow2.c`.
+ Code: attribute argument parsing in `parse_type.c`.
+
+Suggested cadence:
+
+```sh
+make test-parse-err > /tmp/cfree_parse_err.log 2>&1 || tail -n 80 /tmp/cfree_parse_err.log
+```
+
+## Positive parse skips
+
+Goal: `make test-parse` is green with `CFREE_TEST_ALLOW_SKIP` unset.
+
+- [ ] Implement `long double` enough for parser/codegen/runtime tests.
+ Current skipped case: `test/parse/cases/6_7_2_12_long_double.c`.
+ Skip reason: binary128 literal/convert needs `rt/lib/fp_tf` wiring
+ through CG.
+- [ ] Enable file-scope `asm`.
+ Current skipped case: `test/parse/cases/asm_02_file_scope.c`.
+ Parser currently parses and then deliberately errors in
+ `parse_file_scope_asm` because the C frontend is isolated from assembler
+ internals.
+
+Focused run:
+
+```sh
+CFREE_TEST_FILTER=6_7_2_12_long_double make test-parse
+CFREE_TEST_FILTER=asm_02_file_scope make test-parse
+```
+
+## Type system and declarations
+
+- [ ] Implement enough structural compatibility for redeclarations and
+ composite types beyond pointer identity.
+ Existing planned cases: `6_2_7_01_composite_array_size`,
+ `6_2_2_01_extern_in_block_inherits_internal`.
+- [ ] Track declaration state separately from scope lookup:
+ declaration, tentative definition, definition, function definition,
+ linkage, storage duration, and type compatibility.
+- [ ] Add same-scope ordinary identifier redefinition checks while preserving
+ legal shadowing in nested block scopes.
+- [ ] Complete tag state handling for forward declarations, same-scope
+ completion, and wrong-kind redeclarations.
+- [ ] Validate function declarator constraints:
+ `void` parameter rules, variadic placement, function returning function,
+ function returning array, array/function parameter adjustment.
+- [ ] Decide and document implementation-defined bit-field behavior:
+ plain `int` signedness, allowed extended bit-field types, allocation
+ order, straddling, and alignment.
+- [ ] Add positive bit-field lowering cases from `test/parse/CORPUS.md`,
+ including zero-width bit-fields.
+
+## Expressions and conversions
+
+- [ ] Make implicit conversions constraint-aware. Do not rely on CG conversion
+ success as the semantic check.
+- [ ] Preserve lvalue properties: modifiable, const-qualified, bit-field,
+ array, function designator, and incomplete type.
+- [ ] Implement `sizeof` rules completely:
+ no incomplete object type, no function type, no bit-field, VLA operand
+ evaluated, non-VLA operand not evaluated.
+- [ ] Complete conditional operator usual-conversion behavior for arithmetic
+ and pointer/null arms.
+- [ ] Complete pointer compound assignment (`p += n`, `p -= n`).
+- [ ] Expand `_Generic` tests for default selection, compatible types, and
+ unevaluated controlling expression.
+- [ ] Add negative tests for invalid pointer arithmetic, invalid relational
+ comparisons, invalid casts, modifying non-lvalues, and scalar-required
+ operators.
+
+## Constant expressions and initializers
+
+- [ ] Replace the current narrow integer evaluator with a C11-aware constant
+ expression evaluator that tracks type, value category, and allowed forms.
+- [ ] Accept `_Alignof` in integer constant expressions.
+- [ ] Complete static initializer address constants:
+ object address, function address, array plus/minus integer constant,
+ and null pointer constants.
+- [ ] Implement static-storage union initialization or document a temporary
+ nonconformance gate.
+- [ ] Complete designated initializers:
+ nested designators, enum-valued array designators, duplicate designator
+ overwrite rules, non-first union member.
+- [ ] Add diagnostics for initializer overflow, excess scalar initializers,
+ non-constant static initializers, and invalid designators.
+
+## Preprocessor and translation phases
+
+- [x] Object/function-like macros, stringize, paste, rescan, conditionals,
+ includes, line control, unknown pragmas, and `#embed` have passing tests.
+- [ ] Audit remaining C11 translation-phase requirements:
+ universal character names, multibyte characters, trigraph policy,
+ diagnostics for invalid preprocessing tokens, and line-splice edge cases.
+- [ ] Add conformance tests for implementation-defined preprocessor behavior
+ documented in C11 Annex J.3.12.
+- [ ] Decide whether `#embed` is extension-only under strict C11 mode once a
+ strict mode exists.
+
+## Freestanding library surface
+
+C11 freestanding requires at least `<float.h>`, `<iso646.h>`, `<limits.h>`,
+`<stdalign.h>`, `<stdarg.h>`, `<stdbool.h>`, `<stddef.h>`, `<stdint.h>`, and
+`<stdnoreturn.h>`. This tree also ships `assert.h`, `setjmp.h`, and
+`stdatomic.h`, plus cfree extensions.
+
+- [ ] Add header compile smoke tests per supported target for every
+ freestanding header.
+- [ ] Add macro/value tests for `limits.h`, `stdint.h`, `stddef.h`, and
+ `float.h` against target ABI expectations.
+- [ ] Add `stdarg.h` runtime tests for AArch64, x86-64, and RV64.
+- [ ] Get `stdatomic.h` tests passing against both parser builtins and
+ `libcfree_rt.a`.
+- [ ] Fix `make rt` before treating atomics as conforming.
+- [ ] Decide whether `setjmp.h` remains an advertised extension or is part of
+ a hosted profile only.
+
+## Strict mode and extensions
+
+Today the frontend accepts GNU extensions needed by the project. C11
+conformance needs a mode story.
+
+- [ ] Add a driver/frontend option for strict C11 diagnostics, or document that
+ the current mode is GNU-ish C11.
+- [ ] Classify extensions: `__int128`, `asm`, GNU attributes, statement
+ expressions if added, binary integer literals, `#embed`, and cfree
+ builtins.
+- [ ] In strict mode, diagnose extensions that can invalidate strictly
+ conforming programs.
+- [ ] Keep extension tests separate from strict C11 tests.
+
+## Suggested working order
+
+1. Keep `test-parse-err` green while broadening semantic diagnostics beyond
+ the first targeted cases.
+2. Add a compact "semantic type checks" helper layer so assignment, return,
+ initialization, conditional expressions, and calls share rules.
+3. Fix declaration-state tracking: redeclarations, tentative definitions,
+ function definitions, tag completion, and composite types.
+4. Finish bit-fields: diagnostics first, then layout/codegen.
+5. Finish `sizeof`/constant-expression/static-initializer semantics.
+6. Unskip `long double` or explicitly narrow the supported C profile until
+ runtime/CG support exists.
+7. Bring `rt` and freestanding header tests into the default conformance gate.
diff --git a/lang/c/parse/parse.c b/lang/c/parse/parse.c
@@ -680,6 +680,9 @@ void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out,
} else if (pty && pty->kind == TY_FUNC) {
pty = type_ptr(p->pool, pty);
}
+ if (pty && pty->kind == TY_VOID) {
+ perr(p, "'void' must be the only parameter");
+ }
if (n == cap) {
cap *= 2;
ParamInfo* nbuf = (ParamInfo*)arena_array(p->pool->arena, ParamInfo, cap);
@@ -900,6 +903,8 @@ static void parse_external_decl(Parser* p) {
attr_list_append(&fent->attrs, dattrs);
if (is_punct(&p->cur, '{')) {
+ if (fent->defined) perr(p, "redefinition of function");
+ fent->defined = 1;
Sym saved_func_name = p->cur_func_name;
p->cur_func_name = name;
parse_function_body(p, fent->v.sym, fn_ty, abi, infos, nparams, loc,
@@ -948,6 +953,9 @@ static void parse_external_decl(Parser* p) {
if (existing && existing->kind == SEK_GLOBAL) {
sym = existing->v.sym;
e = existing;
+ if (has_init && e->defined) {
+ perr(p, "redefinition of object");
+ }
if (e->type && base_ty && e->type->kind == TY_ARRAY &&
base_ty->kind == TY_ARRAY) {
if (e->type->arr.incomplete && !base_ty->arr.incomplete) {
@@ -988,6 +996,7 @@ static void parse_external_decl(Parser* p) {
u32 align_eff = (specs.align > attr_align) ? specs.align : attr_align;
if (has_init) {
+ if (e) e->defined = 1;
advance(p); /* '=' */
if (base_ty && base_ty->kind == TY_ARRAY && base_ty->arr.incomplete) {
const Type* completed = complete_incomplete_array(p, base_ty);
diff --git a/lang/c/parse/parse_expr.c b/lang/c/parse/parse_expr.c
@@ -20,6 +20,21 @@ static int accept_kw(Parser* p, CKw k) {
return 0;
}
+static int type_is_incomplete(const Type* t) {
+ if (!t) return 1;
+ if (t->kind == TY_VOID) return 1;
+ if ((t->kind == TY_STRUCT || t->kind == TY_UNION) && t->rec.incomplete)
+ return 1;
+ if (t->kind == TY_ARRAY && t->arr.incomplete) return 1;
+ return 0;
+}
+
+static void require_sizeof_type(Parser* p, const Type* ty) {
+ if (!ty || type_is_incomplete(ty) || ty->kind == TY_FUNC) {
+ perr(p, "sizeof operand has incomplete or function type");
+ }
+}
+
/* ============================================================
* Literal parsing
* ============================================================ */
@@ -440,6 +455,7 @@ static i64 cexpr_unary(Parser* p, SrcLoc loc) {
{
const Type* t = parse_type_name(p);
expect_punct(p, ')', "')' after sizeof type-name");
+ require_sizeof_type(p, t);
return (i64)c_abi_sizeof(p->abi, t);
}
}
@@ -447,6 +463,7 @@ static i64 cexpr_unary(Parser* p, SrcLoc loc) {
parse_unary(p);
{
const Type* ty = cg_top_type(p->cg);
+ require_sizeof_type(p, ty);
i64 sz = (i64)c_abi_sizeof(p->abi, ty);
cg_drop(p->cg);
return sz;
@@ -574,6 +591,8 @@ void coerce_top_to_lvalue(Parser* p) {
cg_convert(p->cg, dst);
} else if (type_is_arith(src) && type_is_ptr(dst)) {
cg_convert(p->cg, dst);
+ } else if (type_is_ptr(src) && type_is_ptr(dst)) {
+ cg_convert(p->cg, dst);
}
}
@@ -1378,6 +1397,7 @@ void parse_unary(Parser* p) {
cg_push_local_typed(p->cg, vla_slot, ty_size_t(p));
cg_load(p->cg);
} else {
+ require_sizeof_type(p, ty);
cg_push_int(p->cg, (i64)c_abi_sizeof(p->abi, ty), ty_size_t(p));
}
return;
@@ -1946,9 +1966,21 @@ void parse_assign_expr(Parser* p) {
return;
}
advance(p);
+ const Type* lhs = cg_top_type(p->cg);
+ {
+ if (lhs && (lhs->qual & Q_CONST)) {
+ perr(p, "assignment to const-qualified object");
+ }
+ }
if (is_simple_assign) {
parse_assign_expr(p);
to_rvalue(p);
+ {
+ const Type* rhs = cg_top_type(p->cg);
+ if (type_is_ptr(rhs) && type_is_arith(lhs)) {
+ perr(p, "incompatible assignment from pointer to integer");
+ }
+ }
coerce_top_to_lvalue(p);
cg_store(p->cg);
return;
diff --git a/lang/c/parse/parse_priv.h b/lang/c/parse/parse_priv.h
@@ -88,7 +88,8 @@ typedef struct SymEntry SymEntry;
struct SymEntry {
Sym name;
u8 kind; /* SymEntryKind */
- u8 pad[3];
+ u8 defined;
+ u8 pad[2];
const Type* type;
union {
FrameSlot slot;
diff --git a/lang/c/parse/parse_stmt.c b/lang/c/parse/parse_stmt.c
@@ -219,6 +219,9 @@ static void parse_case_stmt(Parser* p) {
SrcLoc loc = tok_loc_stmt(&p->cur);
if (!p->cur_switch) perr(p, "'case' label not in switch statement");
v = eval_const_int(p, loc);
+ for (ce = p->cur_switch->cases; ce; ce = ce->next) {
+ if (ce->value == v) perr(p, "duplicate case value");
+ }
expect_punct(p, ':', "':' after case constant");
L = cg_label_new(p->cg);
cg_label_place(p->cg, L);
diff --git a/lang/c/parse/parse_type.c b/lang/c/parse/parse_type.c
@@ -166,6 +166,10 @@ static void parse_attr_args(Parser* p, Attr* a, AttrArgShape shape,
}
loc = tok_loc(&p->cur);
a->v.i = eval_const_int(p, loc);
+ if (a->kind == ATTR_ALIGNED && a->v.i > 0 &&
+ (((u64)a->v.i & ((u64)a->v.i - 1u)) != 0)) {
+ perr(p, "attribute 'aligned' argument must be a power of two");
+ }
a->nargs = 1;
expect_punct(p, ')', "')' after attribute integer argument");
return;
@@ -386,6 +390,7 @@ int parse_decl_specs(Parser* p, DeclSpecs* out) {
TypeSpecAccum acc;
SrcLoc loc;
int seen = 0;
+ int storage_seen = 0;
const Type* tagged_ty = NULL;
memset(&acc, 0, sizeof acc);
out->type = NULL;
@@ -502,10 +507,14 @@ int parse_decl_specs(Parser* p, DeclSpecs* out) {
advance(p);
seen = 1;
} else if (is_kw(p, &t, KW_STATIC)) {
+ if (storage_seen) perr(p, "multiple storage-class specifiers");
+ storage_seen = 1;
out->storage = DS_STATIC;
advance(p);
seen = 1;
} else if (is_kw(p, &t, KW_EXTERN)) {
+ if (storage_seen) perr(p, "multiple storage-class specifiers");
+ storage_seen = 1;
out->storage = DS_EXTERN;
advance(p);
seen = 1;
@@ -541,6 +550,8 @@ int parse_decl_specs(Parser* p, DeclSpecs* out) {
advance(p);
seen = 1;
} else if (is_kw(p, &t, KW_TYPEDEF)) {
+ if (storage_seen) perr(p, "multiple storage-class specifiers");
+ storage_seen = 1;
out->storage = DS_TYPEDEF;
advance(p);
seen = 1;
@@ -567,8 +578,19 @@ int parse_decl_specs(Parser* p, DeclSpecs* out) {
out->flags |= DF_THREAD;
advance(p);
seen = 1;
- } else if (is_kw(p, &t, KW_NORETURN) || is_kw(p, &t, KW_REGISTER) ||
- is_kw(p, &t, KW_AUTO)) {
+ } else if (is_kw(p, &t, KW_NORETURN)) {
+ advance(p);
+ seen = 1;
+ } else if (is_kw(p, &t, KW_REGISTER)) {
+ if (storage_seen) perr(p, "multiple storage-class specifiers");
+ storage_seen = 1;
+ out->storage = DS_REGISTER;
+ advance(p);
+ seen = 1;
+ } else if (is_kw(p, &t, KW_AUTO)) {
+ if (storage_seen) perr(p, "multiple storage-class specifiers");
+ storage_seen = 1;
+ out->storage = DS_AUTO;
advance(p);
seen = 1;
} else if (!acc.saw_explicit_type && !tagged_ty && t.kind == TOK_IDENT &&
@@ -605,6 +627,10 @@ int parse_decl_specs(Parser* p, DeclSpecs* out) {
out->type = ty_int(p);
}
}
+ if (out->type && out->quals) {
+ out->type = type_qualified(p->pool, out->type,
+ (u16)(out->type->qual | out->quals));
+ }
}
return seen;
}
@@ -673,6 +699,10 @@ static void parse_member_decls(Parser* p, TypeRecordBuilder* b) {
if (is_punct(&p->cur, ':')) {
advance(p);
i64 w = eval_const_int(p, mloc);
+ if (w < 0) perr(p, "negative bit-field width");
+ if (w > (i64)c_abi_sizeof(p->abi, specs.type) * 8) {
+ perr(p, "bit-field width exceeds its type width");
+ }
f.name = 0;
f.type = specs.type;
f.bitfield_width = (u16)w;
@@ -689,6 +719,11 @@ static void parse_member_decls(Parser* p, TypeRecordBuilder* b) {
&mname, &mloc, &mattrs);
if (accept_punct(p, ':')) {
i64 w = eval_const_int(p, mloc);
+ if (w < 0) perr(p, "negative bit-field width");
+ if (w == 0 && mname != 0) perr(p, "zero-width bit-field must be unnamed");
+ if (w > (i64)c_abi_sizeof(p->abi, mty) * 8) {
+ perr(p, "bit-field width exceeds its type width");
+ }
f.name = mname;
f.type = mty;
f.bitfield_width = (u16)w;
@@ -789,9 +824,7 @@ const Type* parse_struct_or_union(Parser* p, TypeKind kind,
if (opts.align_override > target->rec.align_override)
target->rec.align_override = opts.align_override;
}
- if (existing) {
- existing->complete = 1;
- }
+ if (te) te->complete = 1;
return target;
}