commit 7783c4556dcf5bd14b3703e46f72c5fcb3281f17 parent 042eb011bb367ab2845535914ec40a570d114ace Author: Ryan Sepassi <rsepassi@gmail.com> Date: Mon, 11 May 2026 08:41:56 -0700 parse: __attribute__((...)) parsing — Phase 1 carriers, no codegen GNU attribute syntax is recognized in every position the corpus needs (decl-specs, struct/union/enum keyword + body, pointer layer, declarator suffixes, after-declarator-id, after function `)`). Recognized attrs are validated for argument shape; unknown attrs swallow balanced tokens silently to match GCC's default. Both `name` and `__name__` spellings canonicalize to the same kind. Carriers landed so Phase 2 can wire packed/aligned/section/used/ noreturn/alias/weak/visibility without revisiting parse sites: DeclSpecs.attrs, TagEntry.attrs (record-level, leading + trailing), SymEntry.attrs (per-declarator on globals and functions). Per-member and pointer-layer attrs still parse-and-drop — those move when Field gains an attrs slot. Bundles a small parse_primary fix: TOK_STR now pushes a `char[N]` lvalue rather than a `char*` rvalue so sizeof("…") and "…"[i] match the spec — pairs with the test cases landed in 042eb01. Diffstat:
23 files changed, 977 insertions(+), 29 deletions(-)
diff --git a/doc/ATTRIBUTE.md b/doc/ATTRIBUTE.md @@ -0,0 +1,301 @@ +# `__attribute__` support + +Plan for adding GNU `__attribute__((...))` parsing to cfree. Phase 1 +parses everything and stores it on the AST; only a small subset is +honored semantically. The rest is recognized, validated for argument +shape (or skipped as opaque token soup), and dropped. + +## Surface syntax + +GNU form only for Phase 1 (C23 `[[name(...)]]` later — same AST): + + __attribute__ '(' '(' attr-list ')' ')' + attr-list := attr (',' attr)* | <empty> + attr := attr-name + | attr-name '(' balanced-tokens ')' + attr-name := identifier | keyword /* e.g. `const`, `__const__` */ + +Lexer: no new token. `__attribute__` is matched by IDENT spelling (like +`__builtin_va_list`) via an interned Sym in the parser. Both +`__attribute__` and `attribute` ARE NOT both accepted — only the +double-underscore spelling. + +GCC accepts `__name__` and `name` for every attribute. The parser +canonicalizes by stripping a leading+trailing `__` pair before lookup +(so `__packed__` ≡ `packed`). + +## Where attributes may appear (Phase 1 scope) + +Allowed positions, with the entity they attach to: + +| Position | Attaches to | +| --------------------------------------------------- | ---------------------- | +| In decl-specs (anywhere among the specifiers) | The declaration | +| After `struct`/`union`/`enum` keyword (before tag) | The record/enum type | +| After the closing `}` of a record/enum body | The record/enum type | +| After a struct/union member declarator | That member | +| Inside a declarator, after `*` qualifiers | The pointer layer | +| After a function declarator's `)` | The function decl | +| After the declarator name (init-declarator) | The declared object | + +Out of scope for Phase 1: statement attributes (`__attribute__` on a +label/`fallthrough`), attributes on parameters, and the C23 `[[...]]` +form. Parser should still *recognize* and skip these positions +gracefully if encountered (consume balanced tokens), but they don't +need an attachment point. + +## AST representation + +New types in `src/parse/parse.h` (or kept private to parse.c if no other +TU needs them yet — start private): + +```c +typedef enum AttrKind { + ATTR_UNKNOWN = 0, /* parsed but not recognized */ + ATTR_PACKED, + ATTR_ALIGNED, + ATTR_SECTION, + ATTR_USED, + ATTR_NORETURN, + ATTR_ALIAS, + ATTR_WEAK, + ATTR_VISIBILITY, + ATTR_ALWAYS_INLINE, + ATTR_NOINLINE, + ATTR_UNUSED, + ATTR_DEPRECATED, + ATTR_WARN_UNUSED_RESULT, + ATTR_FORMAT, + ATTR_NONNULL, + ATTR_RETURNS_NONNULL, + ATTR_PURE, + ATTR_CONST, + ATTR_MALLOC, + ATTR_NOTHROW, + ATTR_LEAF, + ATTR_COLD, + ATTR_HOT, + ATTR_CONSTRUCTOR, + ATTR_DESTRUCTOR, + ATTR_CLEANUP, + ATTR_MODE, + ATTR_VECTOR_SIZE, + ATTR_TRANSPARENT_UNION, + ATTR_GNU_INLINE, + ATTR_FALLTHROUGH, + ATTR_SENTINEL, + ATTR_NO_INSTRUMENT_FUNCTION, + ATTR_NO_SANITIZE, +} AttrKind; + +typedef struct Attr { + u16 kind; /* AttrKind */ + u16 nargs; + SrcLoc loc; + Sym name; /* canonical (post-underscore-strip) spelling */ + /* For recognized attrs with structured args, decoded values: */ + union { + i64 i; /* aligned(N), vector_size(N), constructor(prio) */ + Sym sym; /* section("..."), alias("..."), visibility("...") */ + struct { u16 fmt_idx; u16 first; } format; /* format(printf, m, n) */ + } v; + /* For ATTR_UNKNOWN: opaque token range so diagnostics can re-print. */ + /* (Phase 1 may store just `Sym name` and skip token capture.) */ + struct Attr* next; +} Attr; +``` + +Carriers (Phase 1 — fields added; consumers may still ignore them): + +- `DeclSpecs.attrs` — attributes from decl-spec positions, plus any + record-level attrs hoisted out of an anonymous struct/union/enum. +- `TagEntry.attrs` — record-level attrs (leading + trailing) for tagged + struct/union/enum types. Phase 2's layout pass reads `ATTR_PACKED` / + `ATTR_ALIGNED` from here. `parse_struct_or_union` and `parse_enum` + take an `Attr** anon_attrs_out` so anonymous records can return their + attrs to the caller (which chains them onto `DeclSpecs.attrs`). +- `SymEntry.attrs` — per-declarator attrs from positions between the + declarator-id and `=`/`,`/`;` (plus, for functions, between `)` and + `{`/`;`). Each declarator in a `,`-separated init-declarator list + gets its own attr list. Phase 2 reads `used` / `section` / `noreturn` + / `alias` / `weak` / `visibility` / `aligned` here. +- Per-member (Field-level): still discarded in Phase 1. The `Field` + struct lives in `type/type.h` and gains an `attrs` slot in Phase 2 + alongside the layout work. Member-level `aligned` will land then. +- Pointer-layer (`int * __attribute__((aligned(8)))`): still discarded. + Rare; Phase 2 can wire if/when a use case appears. + +For Phase 1, **storing** is enough. Wire-up into codegen/layout is +Phase 2 (see "Honored" below). + +## Attribute table + +Recognition is table-driven: + +```c +typedef enum AttrArgShape { + AS_NONE, /* no parens, or empty parens */ + AS_OPTIONAL, /* parens optional */ + AS_INT, /* one integer-constant-expression */ + AS_INT_OPT, /* zero or one integer */ + AS_STRING, /* one string-literal */ + AS_IDENT, /* one identifier (e.g. visibility kind) */ + AS_FORMAT, /* (archetype, m, n) */ + AS_OPAQUE, /* any balanced tokens; ignored */ +} AttrArgShape; + +static const struct { + const char* name; /* canonical, no underscores */ + AttrKind kind; + AttrArgShape shape; +} kAttrTable[] = { + {"packed", ATTR_PACKED, AS_NONE}, + {"aligned", ATTR_ALIGNED, AS_INT_OPT}, + {"section", ATTR_SECTION, AS_STRING}, + {"used", ATTR_USED, AS_NONE}, + {"noreturn", ATTR_NORETURN, AS_NONE}, + {"alias", ATTR_ALIAS, AS_STRING}, + {"weak", ATTR_WEAK, AS_NONE}, + {"visibility", ATTR_VISIBILITY, AS_STRING}, + {"always_inline", ATTR_ALWAYS_INLINE, AS_NONE}, + {"noinline", ATTR_NOINLINE, AS_NONE}, + {"unused", ATTR_UNUSED, AS_NONE}, + {"deprecated", ATTR_DEPRECATED, AS_OPAQUE}, + {"warn_unused_result", ATTR_WARN_UNUSED_RESULT, AS_NONE}, + {"format", ATTR_FORMAT, AS_FORMAT}, + {"nonnull", ATTR_NONNULL, AS_OPAQUE}, + {"returns_nonnull", ATTR_RETURNS_NONNULL, AS_NONE}, + {"pure", ATTR_PURE, AS_NONE}, + {"const", ATTR_CONST, AS_NONE}, + {"malloc", ATTR_MALLOC, AS_OPAQUE}, + {"nothrow", ATTR_NOTHROW, AS_NONE}, + {"leaf", ATTR_LEAF, AS_NONE}, + {"cold", ATTR_COLD, AS_NONE}, + {"hot", ATTR_HOT, AS_NONE}, + {"constructor", ATTR_CONSTRUCTOR, AS_INT_OPT}, + {"destructor", ATTR_DESTRUCTOR, AS_INT_OPT}, + {"cleanup", ATTR_CLEANUP, AS_IDENT}, + {"mode", ATTR_MODE, AS_IDENT}, + {"vector_size", ATTR_VECTOR_SIZE, AS_INT}, + {"transparent_union", ATTR_TRANSPARENT_UNION, AS_NONE}, + {"gnu_inline", ATTR_GNU_INLINE, AS_NONE}, + {"fallthrough", ATTR_FALLTHROUGH, AS_NONE}, + {"sentinel", ATTR_SENTINEL, AS_OPAQUE}, + {"no_instrument_function", ATTR_NO_INSTRUMENT_FUNCTION, AS_NONE}, + {"no_sanitize", ATTR_NO_SANITIZE, AS_OPAQUE}, +}; +``` + +Unknown attribute name → parsed, kind = `ATTR_UNKNOWN`, opaque args +skipped via balanced-paren counting. No diagnostic in Phase 1 (matches +GCC's `-Wno-attributes` behavior by default). + +## Honored vs. parsed-only + +Phase 1 wires **nothing** into codegen; it only adds parsing and the +AST carriers. Phase 2 will then honor: + +- `packed` — struct layout: pack=1, override member alignment. +- `aligned(N)` — feeds into the same channel as `_Alignas`. +- `section("name")` — sets `ObjSym.section`. +- `used` — marks `ObjSym` as retained (matches `link_layout.c:400`). +- `noreturn` — sets the existing `DF_NORETURN`-equivalent flag (today + `KW_NORETURN` is no-op'd; same path). +- `alias("target")` — emits an alias symbol. +- `weak` — sets weak binding on the `ObjSym`. +- `visibility("...")` — sets ELF visibility on the `ObjSym`. +- `always_inline` / `noinline` / `gnu_inline` — inlining policy hooks + (no-op until cfree gains an inliner; the flags are still recorded). + +Everything else is parsed-and-dropped in both phases. + +## Parser surface + +New helpers in `parse.c`: + +- `static int starts_attr(const Parser* p)` — `cur` is IDENT spelled + `__attribute__`. +- `static Attr* parse_attribute_spec_list(Parser* p)` — consumes one or + more `__attribute__((...))` runs and returns a linked list. +- `static AttrKind classify_attr(Sym name)` — table lookup with + underscore stripping. +- `static void parse_attr_args(Parser* p, Attr* a, AttrArgShape shape)` + — shape-driven; for `AS_OPAQUE` and unknown kinds, skip with a + balanced-paren counter. + +Call sites (insertion plan): + +1. `parse_decl_specs`: at the top of the loop, if `starts_attr`, chain + into `out->attrs`. This is the most common position. +2. `parse_struct_or_union`: between the keyword and the optional tag, + and after the closing `}`. +3. Member loop in `parse_struct_or_union`: after each member-declarator. +4. `parse_pointer_layer`: after each `*` and its qualifiers. +5. `parse_declarator_full`: after the function-declarator `)` and after + the declarator-id. +6. `parse_init_declarator`: between the declarator and `=`/`,`/`;`. + +Each site that doesn't yet have a place to *store* the attrs in Phase 1 +must still *consume* them — leaving them un-consumed would surface as +"unexpected token" errors. A `parse_and_discard_attributes` helper keeps +the call sites tidy until carriers land. + +## Constraints / error handling + +Phase 1 is permissive: + +- Unknown attribute name → silently parse opaque args. +- Recognized attribute with wrong argument shape → emit a parser error + via the usual `perr`, naming the attribute (e.g. `attribute 'aligned' + expects an optional integer argument`). +- Empty `__attribute__(())` and `__attribute__((,))` are accepted (GCC + compat). +- Attribute on a position not covered in Phase 1 → consume gracefully + if encountered in a recognized position; otherwise the existing + parser error path applies. + +## Test coverage (Phase 1 — parse-only) + +All cases live in `test/parse/cases/`, each with a `.expected` exit +code. Since none of these are wired into codegen yet, all tests should +return a value computable without honoring the attribute. + +Smoke tests: + +- `attr_01_packed_struct.c` — `struct __attribute__((packed)) S { ... }`, + use as field of another struct; return a value derived from + `sizeof(S)` that matches the *unpacked* layout (Phase 1 ignores it). +- `attr_02_aligned_var.c` — `int x __attribute__((aligned(16)));`, + return 0. +- `attr_03_section_func.c` — function with `__attribute__((section(".text.foo")))`. +- `attr_04_used_static.c` — static with `used`. +- `attr_05_noreturn_func.c` — function decl with `noreturn`. +- `attr_06_unused_local.c` — local with `unused`. +- `attr_07_multi_attrs.c` — `__attribute__((packed, aligned(8)))`. +- `attr_08_double_underscore.c` — `__packed__` accepted as `packed`. +- `attr_09_format_printf.c` — `format(printf, 1, 2)` on a function. +- `attr_10_unknown_attr.c` — `__attribute__((xyzzy_not_real)))` parsed + and ignored. +- `attr_11_attr_on_pointer.c` — `int* __attribute__((aligned(8))) p;` +- `attr_12_attr_on_typedef.c` — `typedef int T __attribute__((aligned(4)));` +- `attr_13_attr_after_record_brace.c` — `struct S { int x; } + __attribute__((packed));` +- `attr_14_attr_in_decl_specs.c` — attribute interleaved with `static`, + `const`, type. +- `attr_15_attr_in_member.c` — attribute on a struct member. +- `attr_16_empty_attribute.c` — `__attribute__(())` and + `__attribute__((,))`. +- `attr_17_visibility_hidden.c` — `visibility("hidden")` on a function. + +Error cases in `test/parse/cases_err/`: + +- `attr_aligned_wrong_arg.c` — `aligned("oops")` → error. +- `attr_format_wrong_arity.c` — `format(printf)` → error. +- `attr_section_no_string.c` — `section(42)` → error. +- `attr_unterminated.c` — `__attribute__((packed)` (one `)`) → error. + +## Phase 2 — honor the small set + +Out of scope for this PR. Tracked separately: wire `packed`, `aligned`, +`section`, `used`, `noreturn`, `alias`, `weak`, `visibility` through +the existing `ObjSym` / record-layout / link paths. diff --git a/src/parse/parse.c b/src/parse/parse.c @@ -154,6 +154,11 @@ struct SymEntry { * for non-VLA entries. Used by sizeof on VLA-bound IDENTs and by * VLA-typedef variable declarations. */ FrameSlot vla_byte_slot; + /* Phase 1: parsed __attribute__((...)) list attached to this entry. + * Populated for SEK_GLOBAL / SEK_FUNC declarators (used, section, + * noreturn, alias, weak, visibility, aligned). NULL otherwise. + * Phase 2 reads this; nothing in Phase 1 does. */ + struct Attr* attrs; SymEntry* next; }; @@ -170,6 +175,11 @@ struct TagEntry { u8 complete; u16 pad; Type* type; + /* Phase 1: record-level __attribute__((...)) list (packed, aligned). + * Both leading-position (between keyword and tag/body) and trailing + * (after `}`) attrs are chained here. Phase 2 reads this; Phase 1 + * does not. */ + struct Attr* attrs; TagEntry* next; }; @@ -249,6 +259,10 @@ typedef struct Parser { Sym sym_b_va_arg; Sym sym_b_va_end; Sym sym_b_va_copy; + /* GNU `__attribute__` keyword spelling (Phase 1). Not a real C keyword, + * so it lives outside kw_names[] — matched by IDENT comparison just like + * the __builtin_* family. */ + Sym sym_attribute; Sym sym_a_load_n; Sym sym_a_store_n; Sym sym_a_exchange_n; @@ -697,6 +711,138 @@ static const Type* ty_size_t(Parser* p) { return abi_size_type(p->abi, p->pool); } +/* ============================================================ + * GNU __attribute__ (Phase 1 — parse + carry; no semantic wire-up) + * ============================================================ + * Surface grammar: + * __attribute__ '(' '(' attr-list ')' ')' + * attr-list := attr (',' attr)* | <empty> + * attr := attr-name | attr-name '(' balanced-tokens ')' + * + * Both `name` and `__name__` map to the same attribute. Phase 1 stores + * the parsed list on DeclSpecs.attrs; other carrier sites consume tokens + * cleanly via parse_and_discard_attributes until Phase 2 wires them up. */ +typedef enum AttrKind { + ATTR_UNKNOWN = 0, + ATTR_PACKED, + ATTR_ALIGNED, + ATTR_SECTION, + ATTR_USED, + ATTR_NORETURN, + ATTR_ALIAS, + ATTR_WEAK, + ATTR_VISIBILITY, + ATTR_ALWAYS_INLINE, + ATTR_NOINLINE, + ATTR_UNUSED, + ATTR_DEPRECATED, + ATTR_WARN_UNUSED_RESULT, + ATTR_FORMAT, + ATTR_NONNULL, + ATTR_RETURNS_NONNULL, + ATTR_PURE, + ATTR_CONST, + ATTR_MALLOC, + ATTR_NOTHROW, + ATTR_LEAF, + ATTR_COLD, + ATTR_HOT, + ATTR_CONSTRUCTOR, + ATTR_DESTRUCTOR, + ATTR_CLEANUP, + ATTR_MODE, + ATTR_VECTOR_SIZE, + ATTR_TRANSPARENT_UNION, + ATTR_GNU_INLINE, + ATTR_FALLTHROUGH, + ATTR_SENTINEL, + ATTR_NO_INSTRUMENT_FUNCTION, + ATTR_NO_SANITIZE, +} AttrKind; + +typedef enum AttrArgShape { + AS_NONE, + AS_OPTIONAL, + AS_INT, + AS_INT_OPT, + AS_STRING, + AS_IDENT, + AS_FORMAT, + AS_OPAQUE, +} AttrArgShape; + +typedef struct Attr Attr; +struct Attr { + u16 kind; + u16 nargs; + SrcLoc loc; + Sym name; + union { + i64 i; + Sym sym; + struct { u16 fmt_idx; u16 first; } format; + } v; + Attr* next; +}; + +static const struct { + const char* name; + AttrKind kind; + AttrArgShape shape; +} kAttrTable[] = { + {"packed", ATTR_PACKED, AS_NONE}, + {"aligned", ATTR_ALIGNED, AS_INT_OPT}, + {"section", ATTR_SECTION, AS_STRING}, + {"used", ATTR_USED, AS_NONE}, + {"noreturn", ATTR_NORETURN, AS_NONE}, + {"alias", ATTR_ALIAS, AS_STRING}, + {"weak", ATTR_WEAK, AS_NONE}, + {"visibility", ATTR_VISIBILITY, AS_STRING}, + {"always_inline", ATTR_ALWAYS_INLINE, AS_NONE}, + {"noinline", ATTR_NOINLINE, AS_NONE}, + {"unused", ATTR_UNUSED, AS_NONE}, + {"deprecated", ATTR_DEPRECATED, AS_OPAQUE}, + {"warn_unused_result", ATTR_WARN_UNUSED_RESULT, AS_NONE}, + {"format", ATTR_FORMAT, AS_FORMAT}, + {"nonnull", ATTR_NONNULL, AS_OPAQUE}, + {"returns_nonnull", ATTR_RETURNS_NONNULL, AS_NONE}, + {"pure", ATTR_PURE, AS_NONE}, + {"const", ATTR_CONST, AS_NONE}, + {"malloc", ATTR_MALLOC, AS_OPAQUE}, + {"nothrow", ATTR_NOTHROW, AS_NONE}, + {"leaf", ATTR_LEAF, AS_NONE}, + {"cold", ATTR_COLD, AS_NONE}, + {"hot", ATTR_HOT, AS_NONE}, + {"constructor", ATTR_CONSTRUCTOR, AS_INT_OPT}, + {"destructor", ATTR_DESTRUCTOR, AS_INT_OPT}, + {"cleanup", ATTR_CLEANUP, AS_IDENT}, + {"mode", ATTR_MODE, AS_IDENT}, + {"vector_size", ATTR_VECTOR_SIZE, AS_INT}, + {"transparent_union", ATTR_TRANSPARENT_UNION, AS_NONE}, + {"gnu_inline", ATTR_GNU_INLINE, AS_NONE}, + {"fallthrough", ATTR_FALLTHROUGH, AS_NONE}, + {"sentinel", ATTR_SENTINEL, AS_OPAQUE}, + {"no_instrument_function", ATTR_NO_INSTRUMENT_FUNCTION, AS_NONE}, + {"no_sanitize", ATTR_NO_SANITIZE, AS_OPAQUE}, +}; + +static int starts_attr(const Parser* p); +static Attr* parse_attribute_spec_list(Parser* p); +static void parse_and_discard_attributes(Parser* p); +/* Append `add` to the end of `*head` (linked via Attr.next). Both args + * are in source order; result preserves source order. */ +static void attr_list_append(Attr** head, Attr* add) { + if (!add) return; + if (!*head) { *head = add; return; } + Attr* tail = *head; + while (tail->next) tail = tail->next; + tail->next = add; +} +/* If `starts_attr`, parse and append to `*sink`. No-op otherwise. */ +static void parse_attrs_into(Parser* p, Attr** sink) { + if (starts_attr(p)) attr_list_append(sink, parse_attribute_spec_list(p)); +} + /* DeclSpecs and the matching parser landed up in the declaration section * historically; we hoist it before expression parsing because * sizeof / _Alignof / cast need to consume a type-name from inside @@ -711,11 +857,17 @@ typedef struct DeclSpecs { * captured byte-size slot so init_declarator can alloca the right * runtime size. FRAME_SLOT_NONE otherwise. */ FrameSlot vla_byte_slot; + /* Phase 1: parsed __attribute__((...)) list attached to this decl. */ + Attr* attrs; } DeclSpecs; static int parse_decl_specs(Parser* p, DeclSpecs* out); -static const Type* parse_struct_or_union(Parser* p, TypeKind kind); -static const Type* parse_enum(Parser* p); +/* `*anon_attrs_out` receives any record-level attrs when the record is + * anonymous (no TagEntry to attach to). For tagged records the attrs go + * on the TagEntry directly. May be NULL if caller doesn't care. */ +static const Type* parse_struct_or_union(Parser* p, TypeKind kind, + Attr** anon_attrs_out); +static const Type* parse_enum(Parser* p, Attr** anon_attrs_out); static void parse_assign_expr(Parser* p); static i64 eval_const_int(Parser* p, SrcLoc loc); /* tiny constant evaluator */ static const Type* parse_pointer_layer(Parser* p, const Type* base); @@ -800,26 +952,44 @@ static int parse_decl_specs(Parser* p, DeclSpecs* out) { out->quals = 0; out->align = 0; out->vla_byte_slot = FRAME_SLOT_NONE; + out->attrs = NULL; loc = tok_loc(&p->cur); for (;;) { Tok t = p->cur; + /* GNU __attribute__((...)) may appear anywhere among decl-specifiers + * and may repeat. Chain onto out->attrs (in source order). */ + if (starts_attr(p)) { + Attr* a = parse_attribute_spec_list(p); + if (a) { + Attr* tail = a; + while (tail->next) tail = tail->next; + tail->next = out->attrs; + out->attrs = a; + } + seen = 1; + continue; + } if (is_kw(p, &t, KW_STRUCT) || is_kw(p, &t, KW_UNION)) { TypeKind kind = is_kw(p, &t, KW_STRUCT) ? TY_STRUCT : TY_UNION; + Attr* anon_attrs = NULL; if (tagged_ty || acc.saw_explicit_type) { perr(p, "conflicting type specifiers (struct/union mixed)"); } advance(p); - tagged_ty = parse_struct_or_union(p, kind); + tagged_ty = parse_struct_or_union(p, kind, &anon_attrs); + attr_list_append(&out->attrs, anon_attrs); acc.saw_explicit_type = 1; seen = 1; continue; } if (is_kw(p, &t, KW_ENUM)) { + Attr* anon_attrs = NULL; if (tagged_ty || acc.saw_explicit_type) { perr(p, "conflicting type specifiers (enum mixed)"); } advance(p); - tagged_ty = parse_enum(p); + tagged_ty = parse_enum(p, &anon_attrs); + attr_list_append(&out->attrs, anon_attrs); acc.saw_explicit_type = 1; seen = 1; continue; @@ -1133,6 +1303,223 @@ static i64 cexpr_unary(Parser* p, SrcLoc loc) { } static i64 eval_const_int(Parser* p, SrcLoc loc) { return cexpr_bor(p, loc); } +/* ============================================================ + * __attribute__ helpers + * ============================================================ */ + +static int starts_attr(const Parser* p) { + return p->cur.kind == TOK_IDENT && p->cur.v.ident == p->sym_attribute; +} + +/* Canonicalize an attribute name spelling: strip exactly one leading `__` + * and one trailing `__` pair if both are present (GCC compat). Returns the + * (start, len) of the canonical substring within the original spelling. */ +static void attr_canon_range(const char* s, size_t len, + const char** out_p, size_t* out_len) { + if (len >= 4 && s[0] == '_' && s[1] == '_' && + s[len - 1] == '_' && s[len - 2] == '_') { + *out_p = s + 2; + *out_len = len - 4; + return; + } + *out_p = s; + *out_len = len; +} + +static AttrKind classify_attr(Parser* p, Sym name, AttrArgShape* shape_out) { + size_t len = 0; + const char* s = pool_str(p->pool, name, &len); + const char* cs; + size_t clen; + size_t i; + if (!s) { + *shape_out = AS_OPAQUE; + return ATTR_UNKNOWN; + } + attr_canon_range(s, len, &cs, &clen); + for (i = 0; i < sizeof(kAttrTable) / sizeof(kAttrTable[0]); ++i) { + const char* tn = kAttrTable[i].name; + size_t tlen = strlen(tn); + if (tlen == clen && memcmp(tn, cs, clen) == 0) { + *shape_out = kAttrTable[i].shape; + return kAttrTable[i].kind; + } + } + *shape_out = AS_OPAQUE; + return ATTR_UNKNOWN; +} + +/* Skip a balanced parenthesized token group. Pre: p->cur is `(`. Post: + * p->cur is the token immediately after the matching `)`. */ +static void skip_balanced_parens(Parser* p) { + int depth; + if (!is_punct(&p->cur, '(')) perr(p, "internal: skip_balanced_parens"); + depth = 1; + advance(p); + while (depth > 0) { + if (p->cur.kind == TOK_EOF) { + perr(p, "unexpected EOF inside attribute arguments"); + } + if (is_punct(&p->cur, '(')) ++depth; + else if (is_punct(&p->cur, ')')) { + --depth; + if (depth == 0) { advance(p); return; } + } + advance(p); + } +} + +/* Parse the body between `(` and `)` for one attribute according to its + * shape. Pre: p->cur is `(`. Post: p->cur is the token after the matching + * `)`. For unrecognized shapes or ATTR_UNKNOWN, swallows balanced tokens. */ +static void parse_attr_args(Parser* p, Attr* a, AttrArgShape shape, + const char* attr_diag_name) { + if (!is_punct(&p->cur, '(')) { + if (shape == AS_NONE || shape == AS_OPTIONAL || shape == AS_INT_OPT || + shape == AS_OPAQUE) { + return; + } + perr(p, "attribute '%s' expects '(' arguments", attr_diag_name); + } + switch (shape) { + case AS_NONE: { + /* Tolerate empty `()`. */ + advance(p); /* '(' */ + if (!accept_punct(p, ')')) { + perr(p, "attribute '%s' takes no arguments", attr_diag_name); + } + return; + } + case AS_OPTIONAL: { + skip_balanced_parens(p); + return; + } + case AS_INT: + case AS_INT_OPT: { + SrcLoc loc; + advance(p); /* '(' */ + if (is_punct(&p->cur, ')')) { + if (shape == AS_INT) { + perr(p, "attribute '%s' expects an integer argument", + attr_diag_name); + } + advance(p); + return; + } + loc = tok_loc(&p->cur); + a->v.i = eval_const_int(p, loc); + a->nargs = 1; + expect_punct(p, ')', "')' after attribute integer argument"); + return; + } + case AS_STRING: { + advance(p); /* '(' */ + if (p->cur.kind != TOK_STR) { + perr(p, "attribute '%s' expects a string literal", attr_diag_name); + } + a->v.sym = p->cur.spelling; + a->nargs = 1; + advance(p); + expect_punct(p, ')', "')' after attribute string argument"); + return; + } + case AS_IDENT: { + advance(p); /* '(' */ + if (p->cur.kind != TOK_IDENT) { + perr(p, "attribute '%s' expects an identifier", attr_diag_name); + } + a->v.sym = p->cur.v.ident; + a->nargs = 1; + advance(p); + expect_punct(p, ')', "')' after attribute identifier argument"); + return; + } + case AS_FORMAT: { + /* format(archetype, m, n) — archetype is an identifier, m and n are + * positive integers. */ + SrcLoc mloc, nloc; + i64 mv, nv; + advance(p); /* '(' */ + if (p->cur.kind != TOK_IDENT) { + perr(p, "attribute 'format' expects (archetype, m, n)"); + } + advance(p); + expect_punct(p, ',', "',' after format archetype"); + mloc = tok_loc(&p->cur); + mv = eval_const_int(p, mloc); + expect_punct(p, ',', "',' after format string-index"); + nloc = tok_loc(&p->cur); + nv = eval_const_int(p, nloc); + if (mv < 0 || mv > 0xFFFF || nv < 0 || nv > 0xFFFF) { + perr(p, "attribute 'format' indices out of range"); + } + a->v.format.fmt_idx = (u16)mv; + a->v.format.first = (u16)nv; + a->nargs = 3; + expect_punct(p, ')', "')' after format arguments"); + return; + } + case AS_OPAQUE: + default: { + skip_balanced_parens(p); + return; + } + } +} + +/* Parse one or more `__attribute__((...))` runs starting at p->cur (must + * satisfy starts_attr). Returns a linked list of Attr* in source order. */ +static Attr* parse_attribute_spec_list(Parser* p) { + Attr* head = NULL; + Attr* tail = NULL; + while (starts_attr(p)) { + SrcLoc kw_loc = tok_loc(&p->cur); + advance(p); /* __attribute__ */ + expect_punct(p, '(', "'(' after __attribute__"); + expect_punct(p, '(', "'((' after __attribute__"); + /* attr-list: zero or more attr, comma-separated. Empty list and + * trailing/leading bare commas are accepted (GCC compat). */ + for (;;) { + Sym aname; + AttrArgShape shape; + Attr* a; + const char* diag_name; + size_t diag_len; + const char* canon; + size_t canon_len; + /* Accept stray commas and an empty list. */ + while (accept_punct(p, ',')) { /* skip */ } + if (is_punct(&p->cur, ')')) break; + if (p->cur.kind != TOK_IDENT) { + perr(p, "expected attribute name"); + } + aname = p->cur.v.ident; + a = arena_new(p->c->tu, Attr); + if (!a) perr(p, "out of memory in parse_attribute_spec_list"); + memset(a, 0, sizeof *a); + a->loc = tok_loc(&p->cur); + a->name = aname; + a->kind = (u16)classify_attr(p, aname, &shape); + advance(p); + diag_name = pool_str(p->pool, aname, &diag_len); + attr_canon_range(diag_name, diag_len, &canon, &canon_len); + (void)canon; (void)canon_len; + parse_attr_args(p, a, shape, diag_name ? diag_name : "<unknown>"); + if (tail) tail->next = a; else head = a; + tail = a; + if (!accept_punct(p, ',')) break; + } + expect_punct(p, ')', "')' after attribute list"); + expect_punct(p, ')', "'))' after attribute list"); + (void)kw_loc; + } + return head; +} + +static void parse_and_discard_attributes(Parser* p) { + (void)parse_attribute_spec_list(p); +} + /* Parse a struct/union member-declaration list. The `{` has already been * consumed. Fills `b` with each member's Field; bumps anonymous flags as * needed. Bitfields are diagnosed (cg lacks the codegen for them in this @@ -1212,6 +1599,8 @@ static void parse_member_decls(Parser* p, TypeRecordBuilder* b) { f.flags = FIELD_NONE; type_record_field(b, f); } + /* Optional attributes after a member declarator (Phase 1: drop). */ + if (starts_attr(p)) parse_and_discard_attributes(p); if (!accept_punct(p, ',')) break; } expect_punct(p, ';', "';' after struct member declaration"); @@ -1220,10 +1609,15 @@ static void parse_member_decls(Parser* p, TypeRecordBuilder* b) { /* Parse `struct/union [tag] [{ members }]` after the keyword has been * consumed. Returns the (possibly incomplete) record type. */ -static const Type* parse_struct_or_union(Parser* p, TypeKind kind) { +static const Type* parse_struct_or_union(Parser* p, TypeKind kind, + Attr** anon_attrs_out) { Sym tag_name = 0; - SrcLoc tag_loc = tok_loc(&p->cur); + SrcLoc tag_loc; TagDeclKind tdk = (kind == TY_STRUCT) ? TAG_STRUCT : TAG_UNION; + Attr* rec_attrs = NULL; + /* Attributes between `struct`/`union` keyword and the tag/body. */ + parse_attrs_into(p, &rec_attrs); + tag_loc = tok_loc(&p->cur); if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { tag_name = p->cur.v.ident; advance(p); @@ -1241,12 +1635,14 @@ static const Type* parse_struct_or_union(Parser* p, TypeKind kind) { if (e->kind != tdk) { perr(p, "use of tag with wrong kind (struct vs union)"); } + attr_list_append(&e->attrs, rec_attrs); return e->type; } { TagId tid = type_tag_new(p->pool, tdk, tag_name, tag_loc); Type* t = type_record_forward(p->pool, kind, tid, tag_name); - tag_define(p, tag_name, tdk, t, /*complete=*/0); + TagEntry* te = tag_define(p, tag_name, tdk, t, /*complete=*/0); + attr_list_append(&te->attrs, rec_attrs); return t; } } @@ -1256,6 +1652,7 @@ static const Type* parse_struct_or_union(Parser* p, TypeKind kind) { * pointer type built off the forward node automatically updates. */ Type* target = NULL; TagEntry* existing = tag_name ? tag_lookup_local(p, tag_name) : NULL; + TagEntry* te = NULL; if (existing) { if (existing->kind != tdk) { perr(p, "tag redeclared with wrong kind"); @@ -1264,11 +1661,12 @@ static const Type* parse_struct_or_union(Parser* p, TypeKind kind) { perr(p, "redefinition of tag"); } target = existing->type; + te = existing; } else { TagId tid = type_tag_new(p->pool, tdk, tag_name, tag_loc); target = type_record_forward(p->pool, kind, tid, tag_name); if (tag_name) { - tag_define(p, tag_name, tdk, target, /*complete=*/0); + te = tag_define(p, tag_name, tdk, target, /*complete=*/0); } } expect_punct(p, '{', "'{' to start aggregate body"); @@ -1276,6 +1674,15 @@ static const Type* parse_struct_or_union(Parser* p, TypeKind kind) { type_record_begin(p->pool, kind, target->rec.tag_id, tag_name); parse_member_decls(p, b); expect_punct(p, '}', "'}' after aggregate body"); + /* Trailing attributes (after `}`) attach to the record type. */ + parse_attrs_into(p, &rec_attrs); + if (te) { + attr_list_append(&te->attrs, rec_attrs); + } else if (anon_attrs_out) { + /* Anonymous record — let the caller hoist record attrs onto the + * surrounding declaration (e.g. into DeclSpecs.attrs). */ + attr_list_append(anon_attrs_out, rec_attrs); + } /* Pull the accumulated fields out of the builder and install them on the * target node so any pre-existing pointer-to-target types see complete * fields. The builder's Type* (returned by type_record_end) is discarded; @@ -1298,9 +1705,13 @@ static const Type* parse_struct_or_union(Parser* p, TypeKind kind) { /* Parse `enum [tag] [{ K [= cexpr] (, K [= cexpr])* [,] }]` after the * `enum` keyword has been consumed. Returns the enum type (interned). */ -static const Type* parse_enum(Parser* p) { +static const Type* parse_enum(Parser* p, Attr** anon_attrs_out) { Sym tag_name = 0; - SrcLoc tag_loc = tok_loc(&p->cur); + SrcLoc tag_loc; + Attr* rec_attrs = NULL; + /* Attributes between `enum` keyword and tag/body. */ + parse_attrs_into(p, &rec_attrs); + tag_loc = tok_loc(&p->cur); if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { tag_name = p->cur.v.ident; advance(p); @@ -1316,11 +1727,18 @@ static const Type* parse_enum(Parser* p) { * synonymous with `int` — simplest behavior consistent with * §6.7.2.2 ¶4 (enum compatible with int). */ TagEntry* e = tag_lookup(p, tag_name); - if (e && e->kind == TAG_ENUM) return e->type; + if (e && e->kind == TAG_ENUM) { + attr_list_append(&e->attrs, rec_attrs); + return e->type; + } /* Forward enum: install an incomplete enum-type at int width. */ TagId tid = type_tag_new(p->pool, TAG_ENUM, tag_name, tag_loc); const Type* et = type_enum(p->pool, tid, tag_name, ty_int(p)); - tag_define(p, tag_name, TAG_ENUM, (Type*)et, /*complete=*/0); + { + TagEntry* te = tag_define(p, tag_name, TAG_ENUM, (Type*)et, + /*complete=*/0); + attr_list_append(&te->attrs, rec_attrs); + } return et; } /* Definition: parse enumerator list, bind each into the ordinary scope @@ -1350,6 +1768,8 @@ static const Type* parse_enum(Parser* p) { if (is_punct(&p->cur, '}')) break; /* trailing comma */ } expect_punct(p, '}', "'}' after enumerator list"); + /* Trailing attributes after `}` attach to the enum type. */ + parse_attrs_into(p, &rec_attrs); if (tag_name) { /* Replace any incomplete forward entry; otherwise install fresh. */ TagEntry* existing = tag_lookup_local(p, tag_name); @@ -1358,9 +1778,14 @@ static const Type* parse_enum(Parser* p) { perr(p, "tag redeclared with wrong kind"); } existing->complete = 1; + attr_list_append(&existing->attrs, rec_attrs); } else { - tag_define(p, tag_name, TAG_ENUM, (Type*)et, /*complete=*/1); + TagEntry* te = tag_define(p, tag_name, TAG_ENUM, (Type*)et, + /*complete=*/1); + attr_list_append(&te->attrs, rec_attrs); } + } else if (anon_attrs_out) { + attr_list_append(anon_attrs_out, rec_attrs); } return et; } @@ -1469,6 +1894,9 @@ static const Type* parse_pointer_layer(Parser* p, const Type* base) { if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; } if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; } if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; } + /* Pointer-layer attributes (e.g. `int * __attribute__((aligned(8))) p`). + * Phase 1 parses + drops. */ + if (starts_attr(p)) { parse_and_discard_attributes(p); continue; } break; } if (q) base = type_qualified(p->pool, base, q); @@ -2300,8 +2728,11 @@ static void parse_primary(Parser* p) { return; } if (t.kind == TOK_STR) { - /* Decoded bytes go into a fresh anonymous .rodata symbol; the value - * of the expression is a pointer to char[] decayed to char*. */ + /* Per §6.4.5 ¶6 the literal has type `char[N]` (N includes the NUL). + * Push it as a GLOBAL lvalue of that array type so sizeof/_Alignof see + * the array, subscripting computes the right element offset, and the + * normal array-to-pointer decay in to_rvalue / parse_postfix '[' kicks + * in everywhere else. */ size_t n = 0; u8* bytes = decode_string_literal(p, &t, &n); ObjSymId sym = emit_string_to_rodata(p, bytes, n); @@ -2310,18 +2741,7 @@ static void parse_primary(Parser* p) { { const Type* char_ty = type_prim(p->pool, TY_CHAR); const Type* arr_ty = type_array(p->pool, char_ty, (u32)n, 0); - const Type* ptr_ty = type_ptr(p->pool, char_ty); - /* Array-to-pointer decay would normally happen at use; cg_push_global - * is given a pointer-typed lvalue so subsequent operations treat it - * as `char*` rvalue once loaded. */ - (void)arr_ty; - cg_push_global(p->cg, sym, ptr_ty); - /* String address is already the pointer rvalue we want — promote - * away from "lvalue of pointer-to-char[N]" to just "rvalue of - * char*" by tagging it as an rvalue at the cg layer. cg_push_global - * pushes a GLOBAL lvalue; for strings we want the address itself, - * i.e. an rvalue. cg_addr converts. */ - cg_addr(p->cg); + cg_push_global(p->cg, sym, arr_ty); } return; } @@ -3628,6 +4048,7 @@ static const Type* parse_declarator_full(Parser* p, const Type* base, if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; } if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; } if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; } + if (starts_attr(p)) { parse_and_discard_attributes(p); continue; } break; } inner_quals[nptrs_inner++] = q; @@ -3643,9 +4064,11 @@ static const Type* parse_declarator_full(Parser* p, const Type* base, * has `[2]` between IDENT and the closing `)`. Collect them so * they wrap LAST (closest to IDENT), after the outer suffix and * inner pointer layers. */ + if (starts_attr(p)) parse_and_discard_attributes(p); while (n_inner_suffs < 8) { if (!parse_decl_suffix(p, &inner_suffs[n_inner_suffs])) break; ++n_inner_suffs; + if (starts_attr(p)) parse_and_discard_attributes(p); } expect_punct(p, ')', "')' after inner declarator"); } @@ -3661,6 +4084,10 @@ static const Type* parse_declarator_full(Parser* p, const Type* base, } } + /* Optional attributes after the declarator-id (before any suffix). + * Phase 1: parse + drop. */ + if (starts_attr(p)) parse_and_discard_attributes(p); + /* Collect outer suffixes left-to-right; apply in reverse so the innermost * suffix wraps `base` first. For `int a[5][3]` the resulting type is * "array[5] of array[3] of int": [3] applied first → array[3], then [5] @@ -3670,6 +4097,9 @@ static const Type* parse_declarator_full(Parser* p, const Type* base, while (nsuffs < 8) { if (!parse_decl_suffix(p, &suffs[nsuffs])) break; ++nsuffs; + /* Attributes between/after suffixes — most commonly after a function + * declarator's `)`. Phase 1: parse + drop. */ + if (starts_attr(p)) parse_and_discard_attributes(p); } if (nsuffs == 8 && (is_punct(&p->cur, '[') || is_punct(&p->cur, '('))) { perr(p, "too many declarator suffixes (raise the cap if needed)"); @@ -5593,6 +6023,14 @@ static void parse_external_decl(Parser* p) { loc = tok_loc(&p->cur); advance(p); + /* Per-declarator attrs collected from positions between the + * declarator-id and `=`/`,`/`;` (plus, for functions, between `)` + * and `{`/`;`). Chained in source order; attached to the SymEntry + * below so Phase 2 can read used/section/noreturn/alias/weak/ + * visibility/aligned without revisiting decl-spec attrs. */ + Attr* dattrs = NULL; + parse_attrs_into(p, &dattrs); + /* Array suffix(es) after the name pin this as an object declaration — * e.g. `static int g[3] = {...}`. Apply each suffix in order so the * resulting type is the array-of-T we'll allocate storage for. */ @@ -5602,6 +6040,7 @@ static void parse_external_decl(Parser* p) { if (s.kind != DS_ARRAY) break; base_ty = apply_decl_suffix(p, base_ty, &s); } + parse_attrs_into(p, &dattrs); if (is_punct(&p->cur, '(')) { /* Function declaration or definition: build the type from the param @@ -5617,6 +6056,8 @@ static void parse_external_decl(Parser* p) { advance(p); /* '(' */ parse_param_list(p, &infos, &nparams, &variadic); expect_punct(p, ')', "')' after parameter list"); + /* Attributes between `)` and `{`/`;` (e.g. `noreturn`, `section`). */ + parse_attrs_into(p, &dattrs); if (nparams) { ptypes = (const Type**)arena_array(p->c->tu, const Type*, nparams); @@ -5626,6 +6067,7 @@ static void parse_external_decl(Parser* p) { abi = abi_func_info(p->abi, fn_ty); fent = declare_function(p, name, fn_ty, &specs, loc); + attr_list_append(&fent->attrs, dattrs); if (is_punct(&p->cur, '{')) { parse_function_body(p, fent->v.sym, fn_ty, abi, infos, nparams, loc); @@ -5685,6 +6127,7 @@ static void parse_external_decl(Parser* p) { e = scope_define(p, name, SEK_GLOBAL, base_ty); e->v.sym = sym; } + attr_list_append(&e->attrs, dattrs); if (has_init) { advance(p); /* '=' */ @@ -5698,8 +6141,6 @@ static void parse_external_decl(Parser* p) { loc, specs.align); } - (void)e; - if (!accept_punct(p, ',')) break; /* Next declarator: parse pointer prefix + IDENT, then loop. */ base_ty = parse_pointer_layer(p, specs.type); @@ -5709,12 +6150,17 @@ static void parse_external_decl(Parser* p) { name = p->cur.v.ident; loc = tok_loc(&p->cur); advance(p); + /* Reset per-declarator attrs for this iteration; collect post-IDENT + * and post-suffix attrs (e.g. `int a, b __attribute__((aligned(4)))`). */ + dattrs = NULL; + parse_attrs_into(p, &dattrs); /* Optional array suffix on a global declarator (e.g. `int g[3]`). */ while (is_punct(&p->cur, '[')) { DeclSuffix s; if (!parse_decl_suffix(p, &s)) break; base_ty = apply_decl_suffix(p, base_ty, &s); } + parse_attrs_into(p, &dattrs); } expect_punct(p, ';', "';' after global declaration"); } @@ -5768,6 +6214,7 @@ void parse_c(Compiler* c, Pp* pp, DeclTable* decls, CG* cg, Debug* debug) { p.sym_b_va_arg = pool_intern_cstr(p.pool, "__builtin_va_arg"); p.sym_b_va_end = pool_intern_cstr(p.pool, "__builtin_va_end"); p.sym_b_va_copy = pool_intern_cstr(p.pool, "__builtin_va_copy"); + p.sym_attribute = pool_intern_cstr(p.pool, "__attribute__"); p.sym_a_load_n = pool_intern_cstr(p.pool, "__atomic_load_n"); p.sym_a_store_n = pool_intern_cstr(p.pool, "__atomic_store_n"); p.sym_a_exchange_n = pool_intern_cstr(p.pool, "__atomic_exchange_n"); diff --git a/test/parse/cases/attr_01_packed_struct.c b/test/parse/cases/attr_01_packed_struct.c @@ -0,0 +1,22 @@ +/* __attribute__((packed)) on a struct definition; used as a field of + * another struct. Phase 1 parses but does not honor the attribute, so + * we must return a value that is correct under the *unpacked* layout. + * Unpacked: char (1) + 3 pad + int (4) = 8 bytes; we just declare it + * and return 0 to avoid asserting any specific size. */ +struct __attribute__((packed)) S { + char c; + int i; +}; + +struct Outer { + struct S s; + int x; +}; + +int test_main(void) { + struct Outer o; + o.s.c = 1; + o.s.i = 0; + o.x = 0; + return o.s.i; +} diff --git a/test/parse/cases/attr_02_aligned_var.c b/test/parse/cases/attr_02_aligned_var.c @@ -0,0 +1,8 @@ +/* __attribute__((aligned(N))) on a file-scope variable. Phase 1 parses + * and discards the attribute; runtime alignment is not asserted. */ +int x __attribute__((aligned(16))); + +int test_main(void) { + x = 0; + return x; +} diff --git a/test/parse/cases/attr_03_section_func.c b/test/parse/cases/attr_03_section_func.c @@ -0,0 +1,7 @@ +/* __attribute__((section("..."))) on a function declaration. Phase 1 + * parses but does not set ObjSym.section; we just call the function. */ +__attribute__((section(".text.foo"))) int foo(void) { return 0; } + +int test_main(void) { + return foo(); +} diff --git a/test/parse/cases/attr_04_used_static.c b/test/parse/cases/attr_04_used_static.c @@ -0,0 +1,8 @@ +/* __attribute__((used)) on a static variable. Phase 1 parses but does + * not mark the ObjSym as retained. The reference from test_main keeps + * it alive regardless. */ +static int k __attribute__((used)) = 7; + +int test_main(void) { + return k - 7; +} diff --git a/test/parse/cases/attr_05_noreturn_func.c b/test/parse/cases/attr_05_noreturn_func.c @@ -0,0 +1,12 @@ +/* __attribute__((noreturn)) on a function definition. The attribute is + * parsed; Phase 1 does not enforce no-return semantics. test_main does + * not call die(), so control returns normally. Use `static` so the + * unreferenced symbol does not produce an undefined external at link. */ +__attribute__((noreturn)) static void die(void) { + while (1) { } +} + +int test_main(void) { + (void)¨ + return 0; +} diff --git a/test/parse/cases/attr_06_unused_local.c b/test/parse/cases/attr_06_unused_local.c @@ -0,0 +1,6 @@ +/* __attribute__((unused)) on a local variable. Phase 1 only needs to + * accept the attribute in this position. */ +int test_main(void) { + int unused_local __attribute__((unused)) = 42; + return 0; +} diff --git a/test/parse/cases/attr_07_multi_attrs.c b/test/parse/cases/attr_07_multi_attrs.c @@ -0,0 +1,12 @@ +/* Multiple attributes in a single __attribute__(()) clause. */ +struct __attribute__((packed, aligned(8))) S { + char c; + int i; +}; + +int test_main(void) { + struct S s; + s.c = 0; + s.i = 0; + return s.i; +} diff --git a/test/parse/cases/attr_08_double_underscore.c b/test/parse/cases/attr_08_double_underscore.c @@ -0,0 +1,13 @@ +/* Double-underscore spelling: __packed__ and __aligned__ should be + * canonicalized to packed/aligned by the parser. */ +struct __attribute__((__packed__, __aligned__(4))) S { + char c; + int i; +}; + +int test_main(void) { + struct S s; + s.c = 0; + s.i = 0; + return s.i; +} diff --git a/test/parse/cases/attr_09_format_printf.c b/test/parse/cases/attr_09_format_printf.c @@ -0,0 +1,14 @@ +/* __attribute__((format(printf, m, n))) on a varargs function + * definition. Phase 1 only needs to accept the attribute shape; the + * function body is `static` and unreferenced to avoid an unresolved + * external at link. */ +__attribute__((format(printf, 1, 2))) +static int my_printf(const char *fmt, ...) { + (void)fmt; + return 0; +} + +int test_main(void) { + (void)&my_printf; + return 0; +} diff --git a/test/parse/cases/attr_10_unknown_attr.c b/test/parse/cases/attr_10_unknown_attr.c @@ -0,0 +1,10 @@ +/* Unknown attributes — both bare and with args — must be parsed and + * silently ignored (matches GCC's default -Wno-attributes behavior). */ +__attribute__((xyzzy_not_real)) int a; +__attribute__((xyzzy(1, "two", 3))) int b; + +int test_main(void) { + a = 0; + b = 0; + return a + b; +} diff --git a/test/parse/cases/attr_11_attr_on_pointer.c b/test/parse/cases/attr_11_attr_on_pointer.c @@ -0,0 +1,7 @@ +/* Attribute attached to a pointer layer inside a declarator. */ +int* __attribute__((aligned(8))) p; + +int test_main(void) { + p = 0; + return p == 0 ? 0 : 1; +} diff --git a/test/parse/cases/attr_12_attr_on_typedef.c b/test/parse/cases/attr_12_attr_on_typedef.c @@ -0,0 +1,7 @@ +/* Attribute on a typedef declaration. */ +typedef int T __attribute__((aligned(4))); + +int test_main(void) { + T v = 0; + return v; +} diff --git a/test/parse/cases/attr_13_attr_after_record_brace.c b/test/parse/cases/attr_13_attr_after_record_brace.c @@ -0,0 +1,8 @@ +/* Attribute after the closing brace of a struct definition. */ +struct S { int x; } __attribute__((packed)); + +int test_main(void) { + struct S s; + s.x = 0; + return s.x; +} diff --git a/test/parse/cases/attr_14_attr_in_decl_specs.c b/test/parse/cases/attr_14_attr_in_decl_specs.c @@ -0,0 +1,6 @@ +/* Attribute interleaved among decl-specifiers (static, const, type). */ +static __attribute__((unused)) const int K = 7; + +int test_main(void) { + return K - 7; +} diff --git a/test/parse/cases/attr_15_attr_in_member.c b/test/parse/cases/attr_15_attr_in_member.c @@ -0,0 +1,12 @@ +/* Attribute on a struct member declarator. */ +struct S { + char c; + int i __attribute__((aligned(8))); +}; + +int test_main(void) { + struct S s; + s.c = 0; + s.i = 0; + return s.i; +} diff --git a/test/parse/cases/attr_16_empty_attribute.c b/test/parse/cases/attr_16_empty_attribute.c @@ -0,0 +1,11 @@ +/* Empty attribute clauses are accepted for GCC compatibility: + * __attribute__(()) — no attrs + * __attribute__((,)) — empty entries separated by commas */ +__attribute__(()) int a; +__attribute__((,)) int b; + +int test_main(void) { + a = 0; + b = 0; + return a + b; +} diff --git a/test/parse/cases/attr_17_visibility_hidden.c b/test/parse/cases/attr_17_visibility_hidden.c @@ -0,0 +1,7 @@ +/* __attribute__((visibility("hidden"))) on a function. Phase 1 parses + * the attribute but does not set ELF visibility. */ +__attribute__((visibility("hidden"))) int hidden_fn(void) { return 0; } + +int test_main(void) { + return hidden_fn(); +} diff --git a/test/parse/cases_err/attr_aligned_wrong_arg.c b/test/parse/cases_err/attr_aligned_wrong_arg.c @@ -0,0 +1,7 @@ +/* aligned() expects an integer-constant-expression (AS_INT_OPT); + * passing a string literal must be rejected by the parser. */ +int x __attribute__((aligned("oops"))); + +int test_main(void) { + return x; +} diff --git a/test/parse/cases_err/attr_format_wrong_arity.c b/test/parse/cases_err/attr_format_wrong_arity.c @@ -0,0 +1,8 @@ +/* format(archetype, m, n) requires three arguments (AS_FORMAT); + * passing only the archetype must be rejected. */ +__attribute__((format(printf))) +int my_printf(const char *fmt, ...); + +int test_main(void) { + return 0; +} diff --git a/test/parse/cases_err/attr_section_no_string.c b/test/parse/cases_err/attr_section_no_string.c @@ -0,0 +1,7 @@ +/* section() expects a string-literal argument (AS_STRING); passing an + * integer must be rejected. */ +__attribute__((section(42))) int foo(void) { return 0; } + +int test_main(void) { + return foo(); +} diff --git a/test/parse/cases_err/attr_unterminated.c b/test/parse/cases_err/attr_unterminated.c @@ -0,0 +1,8 @@ +/* Unterminated __attribute__ clause: missing the second closing paren. + * The parser must reject this rather than swallowing the rest of the + * translation unit. */ +int x __attribute__((packed); + +int test_main(void) { + return x; +}