kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 4d79515eb873bad5a8cd75952628e0eebe1f2df1
parent e4cd5e7ca7a0f2f6d733d2b49fc524f8b83bc025
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 14 May 2026 05:39:45 -0700

Port C frontend to public CG API

Diffstat:
MMakefile | 6++++++
Ainclude/abi/abi.h | 138+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/cfree/cg.h | 2++
Ainclude/core/arena.h | 31+++++++++++++++++++++++++++++++
Ainclude/core/core.h | 160+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/core/diag.h | 19+++++++++++++++++++
Ainclude/core/hashmap.h | 202+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/core/heap.h | 16++++++++++++++++
Ainclude/core/pool.h | 48++++++++++++++++++++++++++++++++++++++++++++++++
Alang/c/c.c | 135+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/c/c.h | 12++++++++++++
Alang/c/decl/decl.c | 129+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/c/decl/decl.h | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/c/decl/decl_attrs.c | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/c/decl/decl_attrs.h | 32++++++++++++++++++++++++++++++++
Rsrc/lex/lex.c -> lang/c/lex/lex.c | 0
Csrc/lex/lex.h -> lang/c/lex/lex.h | 0
Rsrc/parse/attr.h -> lang/c/parse/attr.h | 0
Alang/c/parse/cg_adapter.c | 451+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/c/parse/cg_public_compat.h | 284+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/c/parse/parse.c | 1097+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/c/parse/parse.h | 14++++++++++++++
Alang/c/parse/parse_expr.c | 1883+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/c/parse/parse_init.c | 775+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/c/parse/parse_priv.h | 437+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rsrc/parse/parse_stmt.c -> lang/c/parse/parse_stmt.c | 0
Rsrc/parse/parse_type.c -> lang/c/parse/parse_type.c | 0
Rsrc/pp/pp.c -> lang/c/pp/pp.c | 0
Csrc/pp/pp.h -> lang/c/pp/pp.h | 0
Rsrc/pp/pp_directive.c -> lang/c/pp/pp_directive.c | 0
Rsrc/pp/pp_expand.c -> lang/c/pp/pp_expand.c | 0
Rsrc/pp/pp_priv.h -> lang/c/pp/pp_priv.h | 0
Alang/c/type/type.c | 448+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alang/c/type/type.h | 173+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/api/cg.c | 206++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/api/cg_api.h | 17+++++++++++++++++
Msrc/api/pipeline.c | 147+++++++++----------------------------------------------------------------------
Msrc/api/stubs.c | 2--
Dsrc/decl/decl.c | 311-------------------------------------------------------------------------------
Msrc/decl/decl.h | 117+++-----------------------------------------------------------------------------
Dsrc/decl/decl_attrs.c | 101-------------------------------------------------------------------------------
Msrc/decl/decl_attrs.h | 32+++-----------------------------
Msrc/lex/lex.h | 127++-----------------------------------------------------------------------------
Asrc/parse/cg_public_compat.h | 6++++++
Dsrc/parse/parse.c | 1117-------------------------------------------------------------------------------
Msrc/parse/parse.h | 16+---------------
Dsrc/parse/parse_expr.c | 1888-------------------------------------------------------------------------------
Dsrc/parse/parse_init.c | 799-------------------------------------------------------------------------------
Dsrc/parse/parse_priv.h | 449-------------------------------------------------------------------------------
Msrc/pp/pp.h | 36+++---------------------------------
Dsrc/type/type.c | 348-------------------------------------------------------------------------------
Msrc/type/type.h | 169++-----------------------------------------------------------------------------
Atest/parse/cases/asm_02_file_scope.skip | 1+
53 files changed, 6916 insertions(+), 5634 deletions(-)

diff --git a/Makefile b/Makefile @@ -20,8 +20,10 @@ DRIVER_CFLAGS = $(CFLAGS_COMMON) -Iinclude -I. LANG_CFLAGS = $(CFLAGS_COMMON) -Iinclude LIB_SRCS = $(shell find src -name '*.c') +LANG_C_SRCS = $(shell find lang/c -name '*.c' 2>/dev/null) LIB_ASMS = $(shell find src -name '*.S') LIB_OBJS = $(patsubst src/%.c,build/lib/%.o,$(LIB_SRCS)) \ + $(patsubst lang/c/%.c,build/lang/c/%.o,$(LANG_C_SRCS)) \ $(patsubst src/%.S,build/lib/%.o,$(LIB_ASMS)) LIB_DEPS = $(LIB_OBJS:.o=.d) @@ -64,6 +66,10 @@ build/lib/%.o: src/%.c @mkdir -p $(dir $@) $(CC) $(LIB_CFLAGS) $(DEPFLAGS) -c $< -o $@ +build/lang/c/%.o: lang/c/%.c + @mkdir -p $(dir $@) + $(CC) $(CFLAGS_COMMON) -ffreestanding -Iinclude -Ilang/c $(DEPFLAGS) -c $< -o $@ + build/lib/%.o: src/%.S @mkdir -p $(dir $@) $(CC) $(LIB_CFLAGS) $(DEPFLAGS) -c $< -o $@ diff --git a/include/abi/abi.h b/include/abi/abi.h @@ -0,0 +1,138 @@ +#ifndef CFREE_ABI_H +#define CFREE_ABI_H + +#include "core/core.h" +#include "type/type.h" + +/* TargetABI is the single authority for target-dependent C layout and calling + * convention decisions. Type remains structural and ABI-neutral; all sizes, + * alignments, field offsets, bitfield packing, scalar widths, and + * argument/return classifications are derived here from Compiler.target. */ +typedef struct TargetABI TargetABI; + +typedef enum ABIScalarKind { + ABI_SC_VOID, + ABI_SC_BOOL, + ABI_SC_INT, + ABI_SC_FLOAT, + ABI_SC_PTR, +} ABIScalarKind; + +typedef struct ABITypeInfo { + u32 size; + u32 align; + u8 scalar_kind; /* ABIScalarKind; ABI_SC_VOID for aggregates/void */ + u8 signed_; + u8 atomic; + u8 pad; +} ABITypeInfo; + +typedef struct ABIFieldLayout { + u32 offset; /* byte offset from record base */ + u16 bit_offset; /* bit offset within storage unit for bitfields */ + u16 bit_width; /* 0 for non-bitfield */ + u32 storage_size; /* bytes in the bitfield storage unit; 0 otherwise */ +} ABIFieldLayout; + +typedef struct ABIRecordLayout { + u32 size; + u32 align; + u32 nfields; + const ABIFieldLayout* fields; +} ABIRecordLayout; + +typedef enum ABIArgKind { + ABI_ARG_IGNORE, + ABI_ARG_DIRECT, /* one or more inspectable parts */ + ABI_ARG_INDIRECT, /* caller passes address */ + ABI_ARG_EXPAND, /* aggregate split into parts below */ +} ABIArgKind; + +typedef enum ABIArgClass { + ABI_CLASS_NONE, + ABI_CLASS_INT, + ABI_CLASS_FP, + ABI_CLASS_VEC, + ABI_CLASS_MEM, +} ABIArgClass; + +typedef enum ABIArgLoc { + ABI_LOC_NONE, + ABI_LOC_REG, + ABI_LOC_STACK, + ABI_LOC_EITHER, +} ABIArgLoc; + +typedef enum ABIArgFlag { + ABI_AF_NONE = 0, + ABI_AF_SRET = 1u << 0, /* hidden structure-return pointer */ + ABI_AF_BYVAL = 1u << 1, /* caller passes an address to a copy */ + ABI_AF_SIGN_EXT = 1u << 2, + ABI_AF_ZERO_EXT = 1u << 3, + ABI_AF_VARARG = 1u << 4, /* placement affected by variadic rules */ + ABI_AF_SPLIT = 1u << 5, /* source value is split across parts */ +} ABIArgFlag; + +typedef struct ABIArgPart { + u8 cls; /* ABIArgClass */ + u8 loc; /* ABIArgLoc preference */ + u16 flags; /* ABIArgFlag */ + u32 src_offset; /* byte offset within source object */ + u32 size; /* bytes carried by this part */ + u32 align; /* part alignment */ + u32 stack_align; /* required stack alignment if stack-passed */ +} ABIArgPart; + +typedef struct ABIArgInfo { + u8 kind; /* ABIArgKind */ + u8 flags; /* ABIArgFlag applying to the whole argument */ + u16 nparts; + u32 indirect_align; /* required alignment for ABI_ARG_INDIRECT/byval copy */ + const ABIArgPart* parts; +} ABIArgInfo; + +typedef struct ABIFuncInfo { + ABIArgInfo ret; + const ABIArgInfo* params; + u16 nparams; + u8 variadic; + u8 has_sret; + /* True when the trailing `...` portion of a variadic call must be + * routed to the stack exclusively, bypassing the GPR/FPR arg pools. + * Apple ARM64 sets this; AAPCS64 / SysV-x64 leave it 0 (variadics + * use the same register routing as fixed args). */ + u8 vararg_on_stack; + u32 vararg_gp_offset; + u32 vararg_fp_offset; + u32 vararg_overflow_offset; +} ABIFuncInfo; + +void abi_init(TargetABI*, Compiler*); +void abi_fini(TargetABI*); + +/* Heap-allocating wrappers around abi_init/abi_fini, used by compiler_init. + * The returned pointer is valid until abi_free returns. */ +TargetABI* abi_new(Compiler*); +void abi_free(TargetABI*); + +/* Builtin scalar profiles and general type layout. */ +ABITypeInfo abi_type_info(TargetABI*, const Type*); +u32 abi_sizeof(TargetABI*, const Type*); +u32 abi_alignof(TargetABI*, const Type*); + +/* Record layout is cached by Type* identity inside TargetABI and is stable for + * the lifetime of the ABI object. Incomplete records are fatal diagnostics. */ +const ABIRecordLayout* abi_record_layout(TargetABI*, const Type*); + +/* Calling convention classification. The returned object is owned by the ABI + * cache and remains valid until abi_fini. */ +const ABIFuncInfo* abi_func_info(TargetABI*, const Type* fn_type); + +/* Target-defined library types used by headers and builtins. */ +const Type* abi_size_type(TargetABI*, Pool*); +const Type* abi_ptrdiff_type(TargetABI*, Pool*); +const Type* abi_intptr_type(TargetABI*, Pool*); +const Type* abi_uintptr_type(TargetABI*, Pool*); +const Type* abi_va_list_type(TargetABI*, Pool*); + +#endif diff --git a/include/cfree/cg.h b/include/cfree/cg.h @@ -869,6 +869,8 @@ typedef enum CfreeCgDataDefFlag { CFREE_CG_DATADEF_RETAIN = 1u << 0, CFREE_CG_DATADEF_MERGE = 1u << 1, CFREE_CG_DATADEF_STRINGS = 1u << 2, + CFREE_CG_DATADEF_READONLY = 1u << 3, + CFREE_CG_DATADEF_ZERO_FILL = 1u << 4, } CfreeCgDataDefFlag; typedef struct CfreeCgDataDefAttrs { diff --git a/include/core/arena.h b/include/core/arena.h @@ -0,0 +1,31 @@ +#ifndef CFREE_ARENA_H +#define CFREE_ARENA_H + +#include "core/core.h" +#include "core/heap.h" + +typedef struct ArenaBlock ArenaBlock; + +struct Arena { + Heap* heap; + ArenaBlock* head; + u8* cur; /* points into head's buffer */ + u8* end; /* end of head's buffer */ + size_t block_size; +}; + +void arena_init(Arena*, Heap*, size_t block_size); +void arena_fini(Arena*); +void arena_reset(Arena*); +void* arena_alloc(Arena*, size_t size, size_t align); +void* arena_zalloc(Arena*, size_t size, size_t align); /* zeroed; NULL on OOM */ +char* arena_strdup(Arena*, const char* s, size_t len); + +#define arena_new(a, T) ((T*)arena_alloc((a), sizeof(T), _Alignof(T))) +#define arena_znew(a, T) ((T*)arena_zalloc((a), sizeof(T), _Alignof(T))) +#define arena_array(a, T, n) \ + ((T*)arena_alloc((a), sizeof(T) * (size_t)(n), _Alignof(T))) +#define arena_zarray(a, T, n) \ + ((T*)arena_zalloc((a), sizeof(T) * (size_t)(n), _Alignof(T))) + +#endif diff --git a/include/core/core.h b/include/core/core.h @@ -0,0 +1,160 @@ +#ifndef CFREE_INTERNAL_CORE_H +#define CFREE_INTERNAL_CORE_H + +#include <cfree.h> +#include <setjmp.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdint.h> + +/* Short integer aliases used throughout libcfree's internal headers. */ +typedef int8_t i8; +typedef int16_t i16; +typedef int32_t i32; +typedef int64_t i64; +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +/* Internal aliases for types that also have a public Cfree-prefixed name. + * <cfree.h> is the single source of truth for those types' identities; + * src/ keeps its terser, plain names so the internal call sites don't + * change. Both names refer to the same struct. */ +typedef CfreeCompiler Compiler; +typedef CfreeHeap Heap; +typedef CfreeDiagSink DiagSink; +typedef CfreeWriter Writer; +typedef CfreeTarget Target; +typedef CfreeObjBuilder ObjBuilder; +typedef enum CfreeArchKind ArchKind; +typedef enum CfreeOSKind OSKind; +typedef enum CfreeObjFmt ObjFmt; + +/* Internal-only forward declarations. */ +typedef struct Arena Arena; +typedef struct Pool Pool; +typedef struct TargetABI TargetABI; +typedef struct SourceManager SourceManager; + +/* Interned string ids. 0 reserved as "none". Object and linker symbols are + * intentionally not intern-pool concepts; see obj/obj.h and link/link.h. */ +typedef u32 Sym; + +/* Binary-blob handle into the same Pool. Shares the numeric value space with + * Sym but is a distinct typedef so callers don't accidentally mix interned + * strings with decoded literal bytes. */ +typedef u32 BytesId; +#define BYTES_NONE 0u + +/* SrcLoc is the public CfreeSrcLoc; the alias keeps internal call sites + * terse. */ +typedef CfreeSrcLoc SrcLoc; + +typedef struct SrcRange { + SrcLoc begin; + SrcLoc end; +} SrcRange; + +typedef enum SourceFileKind { + SRC_FILE_REAL, + SRC_FILE_MEMORY, + SRC_FILE_BUILTIN, + SRC_FILE_MACRO, +} SourceFileKind; + +typedef struct SourceFile { + u32 id; + Sym name; /* spelling used in diagnostics */ + Sym path; /* normalized path, 0 for memory/builtin-only input */ + u8 kind; /* SourceFileKind */ + u8 system_header; + u16 pad; +} SourceFile; + +typedef struct SourceInclude { + u32 includer_file_id; + u32 included_file_id; + SrcLoc include_loc; + u8 system; + u8 pad[3]; +} SourceInclude; + +typedef struct SourceExpansion { + SrcLoc spelling_loc; /* where the token text came from */ + SrcLoc expansion_loc; /* where the macro expansion was requested */ + Sym macro_name; +} SourceExpansion; + +typedef struct SourceDepIter SourceDepIter; + +SourceManager* source_new(Compiler*); +void source_free(SourceManager*); + +u32 source_add_file(SourceManager*, const char* path, int system_header); +u32 source_add_memory(SourceManager*, const char* name); +u32 source_add_builtin(SourceManager*, const char* name); +void source_add_include(SourceManager*, u32 includer_file_id, + u32 included_file_id, SrcLoc include_loc, int system); +u32 source_add_macro_expansion(SourceManager*, Sym macro_name, + SrcLoc spelling_loc, SrcLoc expansion_loc); + +const SourceFile* source_file(SourceManager*, u32 file_id); +const SourceExpansion* source_expansion(SourceManager*, u32 expansion_file_id); +SrcLoc source_spelling_loc(SourceManager*, SrcLoc); +SrcLoc source_expansion_loc(SourceManager*, SrcLoc); + +SourceDepIter* source_depiter_new(SourceManager*); +const SourceInclude* source_depiter_next(SourceDepIter*); +void source_depiter_free(SourceDepIter*); + +/* compiler_defer registers a cleanup that runs LIFO from cfree_run's panic + * boundary (or any caller that establishes its own setjmp). Each subsystem + * _new registers its matching _free; the matched _free calls compiler_undefer + * with the returned handle. The cleanup stack lives in scratch arena and is + * bounded by pipeline depth (~10 entries). */ +typedef struct CompilerCleanup CompilerCleanup; + +struct CfreeCompiler { + jmp_buf panic; + const CfreeEnv* env; + Pool* global; + Arena* tu; + Arena* scratch; + SourceManager* sources; + TargetABI* abi; + Target target; + CompilerCleanup* cleanup; /* top of LIFO cleanup stack */ + CfreeCompileFn frontends[CFREE_LANG_COUNT]; + void* cg_api; /* public cfree/cg.h adapter state */ + void (*cg_api_free)(Compiler*); +}; + +void compiler_init(Compiler*, Target, const CfreeEnv*); +void compiler_fini(Compiler*); + +/* Cleanup stack. compiler_defer returns an opaque handle; compiler_undefer + * removes the entry without running it (use after a successful _free). + * compiler_run_cleanups runs everything LIFO, used by the panic handler. */ +CompilerCleanup* compiler_defer(Compiler*, void (*fn)(void*), void* arg); +void compiler_undefer(Compiler*, CompilerCleanup*); +void compiler_run_cleanups(Compiler*); + +/* Emits a diagnostic and longjmps c->panic. Used by parse/cg/arch fatal paths. + */ +_Noreturn void compiler_panic(Compiler*, SrcLoc, const char* fmt, ...); +_Noreturn void compiler_panicv(Compiler*, SrcLoc, const char* fmt, va_list); + +/* Save/restore the panic jmp_buf so layered APIs can nest. Each top-level + * driver function (cfree_compile_obj, cfree_link_*, ...) saves c->panic, + * installs its own setjmp, and restores on every exit path — both the + * panic-return path (after compiler_run_cleanups) and the success path. + * Without this, an inner setjmp clobbers an outer setjmp's jmp_buf, and a + * subsequent compiler_panic by the outer caller longjmps to a dead frame. */ +typedef struct PanicSave { + jmp_buf buf; +} PanicSave; +void compiler_panic_save(Compiler*, PanicSave* out); +void compiler_panic_restore(Compiler*, const PanicSave* saved); + +#endif diff --git a/include/core/diag.h b/include/core/diag.h @@ -0,0 +1,19 @@ +#ifndef CFREE_DIAG_H +#define CFREE_DIAG_H + +#include "core/core.h" + +/* DiagKind / DiagSink struct are public (see <cfree.h>). The internal + * aliases below keep terse names; the unprefixed enum constants below + * are the names libcfree's source uses internally. */ +typedef CfreeDiagKind DiagKind; +#define DIAG_NOTE CFREE_DIAG_NOTE +#define DIAG_WARN CFREE_DIAG_WARN +#define DIAG_ERROR CFREE_DIAG_ERROR +#define DIAG_FATAL CFREE_DIAG_FATAL + +/* Convenience varargs wrappers around `sink->emit`. Internal use only. */ +void diag_emit(DiagSink*, DiagKind, SrcLoc, const char* fmt, ...); +void diag_emitv(DiagSink*, DiagKind, SrcLoc, const char* fmt, va_list); + +#endif diff --git a/include/core/hashmap.h b/include/core/hashmap.h @@ -0,0 +1,202 @@ +#ifndef CFREE_HASHMAP_H +#define CFREE_HASHMAP_H + +/* Generic open-addressed hashmap as a typed-macro template. + * + * Linear probing; doubling rehash at 75% load. Tombstoneless deletion + * via cluster rehash. Empty-key sentinel is 0 — slots are zero-initialized + * by allocation, and callers must never insert a key whose value compares + * equal to 0. (Sym=0 already means "none" per core.h:42; OBJ_SEC_NONE=0; + * pointer keys avoid 0 by construction.) + * + * HASHMAP_DEFINE(NAME, KT, VT, HASH_FN) + * NAME — struct typedef name for this instance. + * KT — key type (must support `== 0` and `==` against another KT). + * VT — value type (any assignable type). + * HASH_FN — function-like expression mapping KT -> u32. + * + * Emits typedef NAME, NAME##Slot, and these static functions: + * void NAME##_init (NAME*, Heap*) — default initial cap + * void NAME##_init_cap(NAME*, Heap*, u32 cap) — caller picks initial + * cap void NAME##_fini (NAME*) VT* NAME##_get (const NAME*, KT) — + * NULL if absent int NAME##_set (NAME*, KT, VT) — 1 + * inserted, 0 updated void NAME##_del (NAME*, KT) — no-op + * if absent + * + * Equality is `==` on KT. That covers all keys we use (Sym which is u32, + * u64 guest_pc). A string-keyed instance would need a small extension. + * + * Built-in mixers below cover u32 (hash_u32) and u64 (hash_u64) keys. */ + +#include <string.h> + +#include "core/core.h" +#include "core/heap.h" + +/* xorshift mixer suitable for dense u32 keys (interned Sym ids etc.). */ +static inline u32 hash_u32(u32 x) { + x += 0x9e3779b9u; + x ^= x >> 16; + x *= 0x7feb352du; + x ^= x >> 15; + x *= 0x846ca68bu; + x ^= x >> 16; + return x; +} + +/* SplitMix-style mixer for u64 keys (e.g. guest PC). */ +static inline u32 hash_u64(u64 x) { + x ^= x >> 33; + x *= 0xff51afd7ed558ccdULL; + x ^= x >> 33; + x *= 0xc4ceb9fe1a85ec53ULL; + x ^= x >> 33; + return (u32)x; +} + +#define HASHMAP_LOAD_NUM 3u +#define HASHMAP_LOAD_DEN 4u +#define HASHMAP_INIT_CAP 16u + +#define HASHMAP_DEFINE(NAME, KT, VT, HASH_FN) \ + typedef struct NAME##Slot { \ + KT k; \ + VT v; \ + } NAME##Slot; \ + typedef struct NAME { \ + Heap* heap; \ + NAME##Slot* slots; \ + u32 cap; \ + u32 used; \ + } NAME; \ + \ + __attribute__((unused)) static void NAME##_resize(NAME* m, u32 new_cap) { \ + NAME##Slot* fresh; \ + u32 i, mask; \ + fresh = (NAME##Slot*)m->heap->alloc(m->heap, sizeof(*fresh) * new_cap, \ + _Alignof(NAME##Slot)); \ + if (!fresh) return; \ + memset(fresh, 0, sizeof(*fresh) * new_cap); \ + mask = new_cap - 1u; \ + for (i = 0; i < m->cap; ++i) { \ + KT k = m->slots[i].k; \ + u32 j; \ + if (!(k)) continue; \ + j = HASH_FN(k) & mask; \ + while (fresh[j].k) j = (j + 1u) & mask; \ + fresh[j] = m->slots[i]; \ + } \ + if (m->slots) \ + m->heap->free(m->heap, m->slots, sizeof(*m->slots) * m->cap); \ + m->slots = fresh; \ + m->cap = new_cap; \ + } \ + \ + __attribute__((unused)) static inline void NAME##_init_cap(NAME* m, Heap* h, \ + u32 cap) { \ + m->heap = h; \ + m->slots = NULL; \ + m->cap = 0; \ + m->used = 0; \ + if (cap) NAME##_resize(m, cap); \ + } \ + \ + __attribute__((unused)) static inline void NAME##_init(NAME* m, Heap* h) { \ + NAME##_init_cap(m, h, HASHMAP_INIT_CAP); \ + } \ + \ + __attribute__((unused)) static inline void NAME##_fini(NAME* m) { \ + if (m->slots) \ + m->heap->free(m->heap, m->slots, sizeof(*m->slots) * m->cap); \ + m->slots = NULL; \ + m->cap = m->used = 0; \ + } \ + \ + __attribute__((unused)) static inline VT* NAME##_get(const NAME* m, KT k) { \ + u32 mask, j; \ + if (m->cap == 0 || !(k)) return NULL; \ + mask = m->cap - 1u; \ + j = HASH_FN(k) & mask; \ + while (m->slots[j].k) { \ + if (m->slots[j].k == (k)) return &m->slots[j].v; \ + j = (j + 1u) & mask; \ + } \ + return NULL; \ + } \ + \ + __attribute__((unused)) static inline int NAME##_set(NAME* m, KT k, VT v) { \ + u32 mask, j; \ + if (m->cap == 0 || \ + m->used * HASHMAP_LOAD_DEN >= m->cap * HASHMAP_LOAD_NUM) \ + NAME##_resize(m, m->cap ? m->cap * 2u : HASHMAP_INIT_CAP); \ + mask = m->cap - 1u; \ + j = HASH_FN(k) & mask; \ + while (m->slots[j].k) { \ + if (m->slots[j].k == (k)) { \ + m->slots[j].v = (v); \ + return 0; \ + } \ + j = (j + 1u) & mask; \ + } \ + m->slots[j].k = (k); \ + m->slots[j].v = (v); \ + m->used++; \ + return 1; \ + } \ + \ + /* Insert if absent. Returns 1 if newly inserted; 0 if k was present \ + * (in that case writes the existing value to *existing_out when \ + * existing_out is non-NULL). */ \ + __attribute__((unused)) static inline int NAME##_try_insert( \ + NAME* m, KT k, VT v, VT* existing_out) { \ + u32 mask, j; \ + if (m->cap == 0 || \ + m->used * HASHMAP_LOAD_DEN >= m->cap * HASHMAP_LOAD_NUM) \ + NAME##_resize(m, m->cap ? m->cap * 2u : HASHMAP_INIT_CAP); \ + mask = m->cap - 1u; \ + j = HASH_FN(k) & mask; \ + while (m->slots[j].k) { \ + if (m->slots[j].k == (k)) { \ + if (existing_out) *existing_out = m->slots[j].v; \ + return 0; \ + } \ + j = (j + 1u) & mask; \ + } \ + m->slots[j].k = (k); \ + m->slots[j].v = (v); \ + m->used++; \ + return 1; \ + } \ + \ + __attribute__((unused)) static inline void NAME##_del(NAME* m, KT k) { \ + u32 mask, j; \ + if (m->cap == 0 || !(k)) return; \ + mask = m->cap - 1u; \ + j = HASH_FN(k) & mask; \ + while (m->slots[j].k) { \ + if (m->slots[j].k == (k)) { \ + u32 i = (j + 1u) & mask; \ + m->slots[j].k = 0; \ + m->used--; \ + while (m->slots[i].k) { \ + KT rk = m->slots[i].k; \ + VT rv = m->slots[i].v; \ + u32 nh; \ + m->slots[i].k = 0; \ + m->used--; \ + nh = HASH_FN(rk) & mask; \ + while (m->slots[nh].k) nh = (nh + 1u) & mask; \ + m->slots[nh].k = rk; \ + m->slots[nh].v = rv; \ + m->used++; \ + i = (i + 1u) & mask; \ + } \ + return; \ + } \ + j = (j + 1u) & mask; \ + } \ + } \ + /* trailing struct decl swallows the macro-call's semicolon */ \ + struct NAME + +#endif diff --git a/include/core/heap.h b/include/core/heap.h @@ -0,0 +1,16 @@ +#ifndef CFREE_HEAP_H +#define CFREE_HEAP_H + +#include "core/core.h" + +/* CfreeHeap struct definition is in <cfree.h> (public). The host + * implements `alloc`/`realloc`/`free` and passes the heap in via + * CfreeEnv.heap. + * + * heap_mmap_exec is a libcfree-internal helper used only by the JIT path + * (mapped pages with PROT_EXEC available on flip). It is the one place + * inside libcfree that genuinely depends on host memory mapping; for now + * it stays internal until the JIT is wired up. */ +Heap* heap_mmap_exec(void); + +#endif diff --git a/include/core/pool.h b/include/core/pool.h @@ -0,0 +1,48 @@ +#ifndef CFREE_POOL_H +#define CFREE_POOL_H + +#include "core/arena.h" +#include "core/core.h" +#include "core/heap.h" + +typedef struct Type Type; /* declared in src/type/type.h */ + +typedef struct PoolEntry { + const char* data; + u32 len; + u32 hash; +} PoolEntry; + +struct Pool { + Heap* heap; + Arena arena; /* string and type-template storage */ + + /* Hash table: 0 means empty. Otherwise it's a Sym id (1-based). */ + Sym* table; + u32 cap; /* always a power of two */ + u32 used; + + /* Sym → string mapping. Index 0 reserved as Sym = 0 ("none"). */ + PoolEntry* entries; + u32 nentries; + u32 entries_cap; + + /* Lazily-initialized type interning cache (defined in src/type/type.c). + * Opaque to other consumers; type.c casts as needed. */ + void* type_cache; +}; + +void pool_init(Pool*, Heap*); +void pool_fini(Pool*); + +/* Strings. Returns canonical id; equal strings → equal ids. */ +Sym pool_intern(Pool*, const char* s, size_t len); +Sym pool_intern_cstr(Pool*, const char* s); +const char* pool_str(Pool*, Sym, size_t* len_out); + +/* Types. Caller fills a stack-allocated Type template; pool returns the + * canonical pointer (allocating into the pool the first time). Equal types → + * equal pointers. */ +const Type* pool_type(Pool*, const Type* tmpl); + +#endif diff --git a/lang/c/c.c b/lang/c/c.c @@ -0,0 +1,135 @@ +#include "c.h" + +#include <cfree/cg.h> + +#include "core/pool.h" +#include "decl/decl.h" +#include "lex/lex.h" +#include "parse/parse.h" +#include "pp/pp.h" + +static void c_apply_pp_options(Pp* pp, const CfreePpOptions* opts) { + u32 i; + + for (i = 0; i < opts->ninclude_dirs; ++i) { + pp_add_include_dir(pp, opts->include_dirs[i], 0); + } + for (i = 0; i < opts->nsystem_include_dirs; ++i) { + pp_add_include_dir(pp, opts->system_include_dirs[i], 1); + } + for (i = 0; i < opts->ndefines; ++i) { + const char* body = opts->defines[i].body ? opts->defines[i].body : "1"; + pp_define(pp, opts->defines[i].name, body); + } + for (i = 0; i < opts->nundefines; ++i) { + pp_undef(pp, opts->undefines[i]); + } +} + +int cfree_c_preprocess(CfreeCompiler* c, const CfreePpOptions* opts, + const CfreeBytesInput* input, CfreeWriter* out) { + Lexer* lex; + Pp* pp; + + lex = lex_open_mem(c, input->name, (const char*)input->data, input->len); + pp = pp_new(c); + c_apply_pp_options(pp, opts); + pp_push_input(pp, lex); + pp_emit_text(pp, out); + pp_free(pp); + return 0; +} + +static void dump_write_str(CfreeWriter* w, const char* s) { + size_t n = 0; + while (s[n]) ++n; + w->write(w, s, n); +} + +static void dump_write_sym(CfreeWriter* w, Pool* p, Sym sym) { + size_t len = 0; + const char* s = sym ? pool_str(p, sym, &len) : NULL; + if (s && len) w->write(w, s, len); +} + +static void dump_emit(CfreeWriter* w, Pool* p, const Tok* t) { + switch (t->kind) { + case TOK_EOF: + dump_write_str(w, "(eof)\n"); + return; + case TOK_NEWLINE: + dump_write_str(w, "(newline)\n"); + return; + case TOK_PP_HASH: + dump_write_str(w, "(pp-hash)\n"); + return; + case TOK_PP_PASTE: + dump_write_str(w, "(pp-paste)\n"); + return; + case TOK_HEADER: + dump_write_str(w, "(header "); + break; + case TOK_IDENT: + dump_write_str(w, "(ident "); + break; + case TOK_NUM: + dump_write_str(w, "(num "); + break; + case TOK_FLT: + dump_write_str(w, "(flt "); + break; + case TOK_STR: + dump_write_str(w, "(str "); + break; + case TOK_CHR: + dump_write_str(w, "(chr "); + break; + case TOK_PUNCT: + dump_write_str(w, "(punct "); + break; + default: + dump_write_str(w, "(unknown "); + break; + } + dump_write_sym(w, p, t->spelling); + dump_write_str(w, ")\n"); +} + +int cfree_c_dump_tokens(CfreeCompiler* c, const CfreeBytesInput* input, + CfreeWriter* out) { + Lexer* lex; + Tok t; + + lex = lex_open_mem(c, input->name, (const char*)input->data, input->len); + for (;;) { + t = lex_next(lex); + dump_emit(out, c->global, &t); + if (t.kind == TOK_EOF) break; + } + lex_close(lex); + return 0; +} + +int cfree_c_compile(CfreeCompiler* c, const CfreeCompileOptions* opts, + const CfreeBytesInput* input, CfreeObjBuilder* out) { + Lexer* lex; + Pp* pp; + DeclTable* decls; + CfreeCg* cg; + + lex = lex_open_mem(c, input->name, (const char*)input->data, input->len); + pp = pp_new(c); + cg = cfree_cg_new(c, out); + (void)out; + decls = decl_new(c, cg); + + c_apply_pp_options(pp, &opts->pp); + pp_push_input(pp, lex); + + parse_c(c, pp, decls, cg, NULL); + + cfree_cg_free(cg); + decl_free(decls); + pp_free(pp); + return 0; +} diff --git a/lang/c/c.h b/lang/c/c.h @@ -0,0 +1,12 @@ +#ifndef CFREE_LANG_C_H +#define CFREE_LANG_C_H + +#include <cfree.h> + +int cfree_c_compile(CfreeCompiler*, const CfreeCompileOptions*, + const CfreeBytesInput* input, CfreeObjBuilder* out); +int cfree_c_preprocess(CfreeCompiler*, const CfreePpOptions*, + const CfreeBytesInput*, CfreeWriter*); +int cfree_c_dump_tokens(CfreeCompiler*, const CfreeBytesInput*, CfreeWriter*); + +#endif diff --git a/lang/c/decl/decl.c b/lang/c/decl/decl.c @@ -0,0 +1,129 @@ +/* DeclTable — C declaration semantics above the public CG API. + * + * Maps DeclId → Decl record. Allocates a CfreeCgSym for any non-typedef, + * non-auto/register decl with linkage. + * + * Identifier *lookup* is not handled here — that lives on the parser's + * scope stack so block scopes and shadowing fall out naturally. DeclTable + * is just the C-language layer above CG: storage class, linkage, + * static-locals, tentative defs, and global initializers. + * + * v1 surface is intentionally minimal: just enough for the spine corpus + * (functions; ints; static locals) plus the hooks DESIGN.md §5.3.1 + * commits to. Tentative-definition coalescing, COMDAT, and aliases are + * stubs at the API edge; their full semantics arrive with the multi-TU + * corpus. */ + +#include "decl/decl.h" + +#include <string.h> + +#include "core/arena.h" +#include "core/core.h" +#include "core/heap.h" +#include "core/pool.h" +#include "parse/cg_public_compat.h" + +struct DeclTable { + Compiler* c; + CfreeCg* cg; + Decl* slots; /* index 0 reserved as DECL_NONE */ + u32 nslots; + u32 cap; +}; + +#define DECL_INITIAL_CAP 16u + +static void decls_grow(DeclTable* t, u32 want) { + Heap* h = t->c->env->heap; + u32 cap = t->cap; + Decl* nb; + if (cap >= want) return; + while (cap < want) cap = cap ? cap * 2u : DECL_INITIAL_CAP; + nb = (Decl*)h->alloc(h, sizeof(Decl) * cap, _Alignof(Decl)); + if (t->slots) { + memcpy(nb, t->slots, sizeof(Decl) * t->nslots); + h->free(h, t->slots, sizeof(Decl) * t->cap); + } + t->slots = nb; + t->cap = cap; +} + +DeclTable* decl_new(Compiler* c, CfreeCg* cg) { + Heap* h = c->env->heap; + DeclTable* t = + (DeclTable*)h->alloc(h, sizeof(DeclTable), _Alignof(DeclTable)); + memset(t, 0, sizeof *t); + t->c = c; + t->cg = cg; + decls_grow(t, 1); + memset(&t->slots[0], 0, sizeof(Decl)); + t->nslots = 1; + return t; +} + +void decl_free(DeclTable* t) { + Heap* h; + if (!t) return; + h = t->c->env->heap; + if (t->slots) h->free(h, t->slots, sizeof(Decl) * t->cap); + h->free(h, t, sizeof(*t)); +} + +static CfreeCgSymbolAttrs decl_sym_attrs(const Decl* d) { + CfreeCgSymbolAttrs a; + memset(&a, 0, sizeof a); + a.bind = (d->linkage == DL_EXTERNAL) ? CFREE_SB_GLOBAL : CFREE_SB_LOCAL; + if (d->flags & DF_WEAK) a.bind = CFREE_SB_WEAK; + a.visibility = (CfreeCgVisibility)d->visibility; + if (d->flags & DF_USED) a.flags |= CFREE_CG_SYM_USED; + return a; +} + +DeclId decl_declare(DeclTable* t, const Decl* in) { + DeclId id; + Decl* slot; + decls_grow(t, t->nslots + 1); + id = (DeclId)t->nslots++; + slot = &t->slots[id]; + *slot = *in; + slot->id = id; + if (slot->obj_sym == OBJ_SYM_NONE && slot->name && + slot->storage != DS_TYPEDEF && slot->storage != DS_AUTO && + slot->storage != DS_REGISTER) { + if (slot->flags & DF_WEAK) { + if (slot->linkage != DL_EXTERNAL) + compiler_panic(t->c, slot->loc, + "weak attribute requires external linkage"); + } + CfreeCgDecl decl; + memset(&decl, 0, sizeof decl); + decl.kind = (slot->type && slot->type->kind == TY_FUNC) + ? CFREE_CG_DECL_FUNC + : CFREE_CG_DECL_OBJECT; + decl.display_name = slot->name; + decl.linkage_name = cfree_cg_c_linkage_name(t->c, slot->name); + decl.type = pcg_tid(t->c, slot->type); + decl.sym = decl_sym_attrs(slot); + if (decl.kind == CFREE_CG_DECL_FUNC) { + if (slot->flags & DF_NORETURN) decl.as.func.flags |= CFREE_CG_FUNC_NORETURN; + decl.as.func.section = slot->section_id; + } else { + if (slot->flags & DF_THREAD) decl.as.object.flags |= CFREE_CG_OBJ_TLS; + decl.as.object.section = slot->section_id; + decl.as.object.align = slot->align; + } + slot->obj_sym = cfree_cg_decl(t->cg, decl); + } + return id; +} + +const Decl* decl_get(const DeclTable* t, DeclId id) { + if (!t || id == DECL_NONE || id >= t->nslots) return NULL; + return &t->slots[id]; +} + +ObjSymId decl_obj_sym(const DeclTable* t, DeclId id) { + const Decl* d = decl_get(t, id); + return d ? d->obj_sym : OBJ_SYM_NONE; +} diff --git a/lang/c/decl/decl.h b/lang/c/decl/decl.h @@ -0,0 +1,88 @@ +#ifndef CFREE_DECL_H +#define CFREE_DECL_H + +#include <cfree/cg.h> + +#include "core/core.h" +#include "type/type.h" + +#ifndef CFREE_OBJ_H +typedef CfreeCgSym ObjSymId; +typedef CfreeSym ObjSecId; + +#define OBJ_SYM_NONE CFREE_CG_SYM_NONE +#define OBJ_SEC_NONE 0u + +enum { + SV_DEFAULT = CFREE_CG_VIS_DEFAULT, + SV_HIDDEN = CFREE_CG_VIS_HIDDEN, + SV_PROTECTED = CFREE_CG_VIS_PROTECTED, + SV_INTERNAL = CFREE_CG_VIS_HIDDEN, +}; +#endif + +/* C declaration semantics. This layer is deliberately above ObjBuilder: + * ObjBuilder stores object-format facts, while DeclTable owns C linkage, + * storage duration, tentative-definition, static-local, and initializer rules. + */ +typedef struct DeclTable DeclTable; + +typedef u32 DeclId; +#define DECL_NONE 0u + +typedef enum DeclStorage { + DS_EXTERN, + DS_STATIC, + DS_AUTO, + DS_REGISTER, + DS_TYPEDEF, +} DeclStorage; + +typedef enum DeclLinkage { + DL_NONE, + DL_INTERNAL, + DL_EXTERNAL, +} DeclLinkage; + +typedef enum DeclFlag { + DF_NONE = 0, + DF_THREAD = 1u << 0, + DF_INLINE = 1u << 1, + DF_TENTATIVE = 1u << 2, + DF_USED = 1u << 3, + DF_WEAK = 1u << 4, + DF_STATIC_LOCAL = 1u << 5, + /* Phase 2 attribute-honoring flags. DF_NORETURN is the unified bit for + * _Noreturn and __attribute__((noreturn)); the inline-policy flags are + * recorded but not yet consulted (cfree has no inliner). */ + DF_NORETURN = 1u << 6, + DF_ALWAYS_INLINE = 1u << 7, + DF_NOINLINE = 1u << 8, + DF_GNU_INLINE = 1u << 9, +} DeclFlag; + +typedef struct Decl { + DeclId id; + Sym name; + const Type* type; + ObjSymId obj_sym; + ObjSecId section_id; /* optional explicit section; OBJ_SEC_NONE => default */ + SrcLoc loc; + u8 storage; /* DeclStorage */ + u8 linkage; /* DeclLinkage */ + u8 visibility; /* SymVis */ + u8 pad; + u32 flags; /* DeclFlag */ + /* Phase 2 attribute carriers — populated by attr_list_to_decl. */ + u32 align; /* explicit alignment from _Alignas or aligned(N); 0=natural */ + Sym alias_target; /* target name for __attribute__((alias("..."))); 0=none */ +} Decl; + +DeclTable* decl_new(Compiler*, CfreeCg*); +void decl_free(DeclTable*); + +DeclId decl_declare(DeclTable*, const Decl*); +const Decl* decl_get(const DeclTable*, DeclId); +ObjSymId decl_obj_sym(const DeclTable*, DeclId); + +#endif diff --git a/lang/c/decl/decl_attrs.c b/lang/c/decl/decl_attrs.c @@ -0,0 +1,81 @@ +#include "decl/decl_attrs.h" + +#include <string.h> + +#include "core/pool.h" + +/* Bare `__attribute__((aligned))` (no argument) means "biggest scalar + * alignment". cfree's targets all have `_Alignof(long double) == 16` + * (x86_64 SysV, AArch64 AAPCS, RISC-V LP64D), so 16 is a valid v1 + * stand-in across the board. */ +#define ATTR_ALIGNED_DEFAULT 16u + +static void apply_section(Compiler* c, DeclTable* t, const Attr* a, Decl* out) { + (void)c; + if (!t || a->v.sym == 0) return; + out->section_id = a->v.sym; +} + +static void apply_visibility(Compiler* c, const Attr* a, Decl* out) { + if (a->v.sym == 0) { + compiler_panic(c, a->loc, "visibility attribute missing argument"); + } + size_t n = 0; + const char* s = pool_str(c->global, a->v.sym, &n); + if (s && strcmp(s, "default") == 0) { + out->visibility = SV_DEFAULT; + } else if (s && strcmp(s, "hidden") == 0) { + out->visibility = SV_HIDDEN; + } else if (s && strcmp(s, "protected") == 0) { + out->visibility = SV_PROTECTED; + } else if (s && strcmp(s, "internal") == 0) { + out->visibility = SV_INTERNAL; + } else { + compiler_panic(c, a->loc, + "unknown visibility '%s' (expected default|hidden|" + "protected|internal)", + s ? s : ""); + } +} + +void attr_list_to_decl(Compiler* c, DeclTable* t, const Attr* attrs, + Decl* out) { + for (const Attr* a = attrs; a; a = a->next) { + switch ((AttrKind)a->kind) { + case ATTR_ALIGNED: { + u32 v = (a->nargs == 0) ? ATTR_ALIGNED_DEFAULT : (u32)a->v.i; + if (v > out->align) out->align = v; + break; + } + case ATTR_SECTION: + apply_section(c, t, a, out); + break; + case ATTR_USED: + out->flags |= DF_USED; + break; + case ATTR_WEAK: + out->flags |= DF_WEAK; + break; + case ATTR_NORETURN: + out->flags |= DF_NORETURN; + break; + case ATTR_ALWAYS_INLINE: + out->flags |= DF_ALWAYS_INLINE; + break; + case ATTR_NOINLINE: + out->flags |= DF_NOINLINE; + break; + case ATTR_GNU_INLINE: + out->flags |= DF_GNU_INLINE; + break; + case ATTR_VISIBILITY: + apply_visibility(c, a, out); + break; + case ATTR_ALIAS: + out->alias_target = a->v.sym; + break; + default: + break; + } + } +} diff --git a/lang/c/decl/decl_attrs.h b/lang/c/decl/decl_attrs.h @@ -0,0 +1,32 @@ +#ifndef CFREE_DECL_ATTRS_H +#define CFREE_DECL_ATTRS_H + +#include "core/core.h" +#include "decl/decl.h" +#include "parse/attr.h" + +/* Decodes a parser-produced Attr* list onto a Decl. Walks the chain and + * applies every honored attribute (see doc/ATTRIBUTE.md "Phase 2"): + * + * packed — N/A here (record-level; see Type.rec.packed) + * aligned(N) — Decl.align = max(Decl.align, N) + * section("s") — interns the section name and stores Decl.section_id + * used — Decl.flags |= DF_USED + * noreturn — Decl.flags |= DF_NORETURN + * alias("t") — Decl.alias_target = intern("t") + * weak — Decl.flags |= DF_WEAK + * visibility(s)— Decl.visibility = SV_* + * always_inline / noinline / gnu_inline — Decl.flags |= DF_* + * + * Unknown / non-honored attributes (deprecated, format, nonnull, ...) + * are silently skipped — they were validated for argument shape during + * parsing and have no Decl-side effect in Phase 2. + * + * `attrs` may be NULL; `out` must be non-NULL. Idempotent: applying a + * list twice produces the same Decl state. Phase 2 callers invoke this + * once, between filling out the bulk Decl fields and decl_declare(). + * DeclTable* is reserved for declaration-table context; attributes do not + * reach below the public frontend/codegen boundary. */ +void attr_list_to_decl(Compiler*, DeclTable*, const Attr* attrs, Decl* out); + +#endif diff --git a/src/lex/lex.c b/lang/c/lex/lex.c diff --git a/src/lex/lex.h b/lang/c/lex/lex.h diff --git a/src/parse/attr.h b/lang/c/parse/attr.h diff --git a/lang/c/parse/cg_adapter.c b/lang/c/parse/cg_adapter.c @@ -0,0 +1,451 @@ +#include "parse/parse_priv.h" + +#include <string.h> + +CfreeCgTypeId pcg_tid(Compiler* c, const Type* ty) { return type_cg_id(c, ty); } + +static u32 pcg_sizeof(Parser* p, const Type* ty) { + return (u32)cfree_cg_type_size(p->c, pcg_tid(p->c, ty)); +} + +static u32 pcg_alignof(Parser* p, const Type* ty) { + return (u32)cfree_cg_type_align(p->c, pcg_tid(p->c, ty)); +} + +CfreeCgMemAccess pcg_mem(Parser* p, const Type* ty) { + CfreeCgMemAccess m; + memset(&m, 0, sizeof m); + m.type = pcg_tid(p->c, ty); + m.align = pcg_alignof(p, ty); + if (ty && (ty->qual & Q_VOLATILE)) m.flags |= CFREE_CG_MEM_VOLATILE; + return m; +} + +static void pcg_stack_grow(Parser* p, u32 want) { + const Type** ns; + u32 nc; + if (p->cg_type_cap >= want) return; + nc = p->cg_type_cap ? p->cg_type_cap * 2u : 64u; + while (nc < want) nc *= 2u; + ns = arena_array(p->c->tu, const Type*, nc); + if (!ns) perr(p, "out of memory in CG type stack"); + if (p->cg_type_stack && p->cg_type_sp) { + memcpy(ns, p->cg_type_stack, sizeof(*ns) * p->cg_type_sp); + } + p->cg_type_stack = ns; + p->cg_type_cap = nc; +} + +void pcg_push_type(Parser* p, const Type* ty) { + pcg_stack_grow(p, p->cg_type_sp + 1u); + p->cg_type_stack[p->cg_type_sp++] = ty; +} + +void pcg_drop_type(Parser* p) { + if (p->cg_type_sp) --p->cg_type_sp; +} + +void pcg_dup_type(Parser* p) { + const Type* ty = pcg_top_type(p); + pcg_push_type(p, ty); +} + +void pcg_swap_type(Parser* p) { + if (p->cg_type_sp >= 2) { + const Type* a = p->cg_type_stack[p->cg_type_sp - 1u]; + p->cg_type_stack[p->cg_type_sp - 1u] = + p->cg_type_stack[p->cg_type_sp - 2u]; + p->cg_type_stack[p->cg_type_sp - 2u] = a; + } +} + +void pcg_rot3_type(Parser* p) { + if (p->cg_type_sp >= 3) { + const Type* a = p->cg_type_stack[p->cg_type_sp - 3u]; + p->cg_type_stack[p->cg_type_sp - 3u] = + p->cg_type_stack[p->cg_type_sp - 2u]; + p->cg_type_stack[p->cg_type_sp - 2u] = + p->cg_type_stack[p->cg_type_sp - 1u]; + p->cg_type_stack[p->cg_type_sp - 1u] = a; + } +} + +const Type* pcg_top_type(Parser* p) { + return p->cg_type_sp ? p->cg_type_stack[p->cg_type_sp - 1u] : NULL; +} + +const Type* pcg_top2_type(Parser* p) { + return p->cg_type_sp >= 2 ? p->cg_type_stack[p->cg_type_sp - 2u] : NULL; +} + +void pcg_retag_top(Parser* p, const Type* ty) { + if (p->cg_type_sp) p->cg_type_stack[p->cg_type_sp - 1u] = ty; +} + +int pcg_type_is_fp(const Type* ty) { + return ty && (ty->kind == TY_FLOAT || ty->kind == TY_DOUBLE || + ty->kind == TY_LDOUBLE); +} + +int pcg_type_is_signed(const Type* ty) { + if (!ty) return 0; + switch ((TypeKind)ty->kind) { + case TY_CHAR: + case TY_SCHAR: + case TY_SHORT: + case TY_INT: + case TY_LONG: + case TY_LLONG: + case TY_INT128: + case TY_ENUM: + return 1; + default: + return 0; + } +} + +CfreeCgIntBinOp pcg_int_binop(BinOp op) { + switch (op) { + case BO_IADD: return CFREE_CG_INT_ADD; + case BO_ISUB: return CFREE_CG_INT_SUB; + case BO_IMUL: return CFREE_CG_INT_MUL; + case BO_SDIV: return CFREE_CG_INT_SDIV; + case BO_UDIV: return CFREE_CG_INT_UDIV; + case BO_SREM: return CFREE_CG_INT_SREM; + case BO_UREM: return CFREE_CG_INT_UREM; + case BO_AND: return CFREE_CG_INT_AND; + case BO_OR: return CFREE_CG_INT_OR; + case BO_XOR: return CFREE_CG_INT_XOR; + case BO_SHL: return CFREE_CG_INT_SHL; + case BO_SHR_S: return CFREE_CG_INT_ASHR; + case BO_SHR_U: return CFREE_CG_INT_LSHR; + default: return CFREE_CG_INT_ADD; + } +} + +CfreeCgFpBinOp pcg_fp_binop(BinOp op) { + switch (op) { + case BO_FADD: return CFREE_CG_FP_ADD; + case BO_FSUB: return CFREE_CG_FP_SUB; + case BO_FMUL: return CFREE_CG_FP_MUL; + case BO_FDIV: return CFREE_CG_FP_DIV; + default: return CFREE_CG_FP_ADD; + } +} + +CfreeCgIntCmpOp pcg_int_cmp(CmpOp op) { + switch (op) { + case CMP_EQ: return CFREE_CG_INT_EQ; + case CMP_NE: return CFREE_CG_INT_NE; + case CMP_LT_S: return CFREE_CG_INT_LT_S; + case CMP_LE_S: return CFREE_CG_INT_LE_S; + case CMP_GT_S: return CFREE_CG_INT_GT_S; + case CMP_GE_S: return CFREE_CG_INT_GE_S; + case CMP_LT_U: return CFREE_CG_INT_LT_U; + case CMP_LE_U: return CFREE_CG_INT_LE_U; + case CMP_GT_U: return CFREE_CG_INT_GT_U; + case CMP_GE_U: return CFREE_CG_INT_GE_U; + default: return CFREE_CG_INT_EQ; + } +} + +CfreeCgFpCmpOp pcg_fp_cmp(CmpOp op) { + switch (op) { + case CMP_EQ: return CFREE_CG_FP_OEQ; + case CMP_NE: return CFREE_CG_FP_ONE; + case CMP_LT_F: return CFREE_CG_FP_OLT; + case CMP_LE_F: return CFREE_CG_FP_OLE; + case CMP_GT_F: return CFREE_CG_FP_OGT; + case CMP_GE_F: return CFREE_CG_FP_OGE; + default: return CFREE_CG_FP_OEQ; + } +} + +CfreeCgAtomicOp pcg_atomic_op(AtomicOp op) { + switch (op) { + case AO_XCHG: return CFREE_CG_ATOMIC_XCHG; + case AO_ADD: return CFREE_CG_ATOMIC_ADD; + case AO_SUB: return CFREE_CG_ATOMIC_SUB; + case AO_AND: return CFREE_CG_ATOMIC_AND; + case AO_OR: return CFREE_CG_ATOMIC_OR; + case AO_XOR: return CFREE_CG_ATOMIC_XOR; + case AO_NAND: return CFREE_CG_ATOMIC_NAND; + } + return CFREE_CG_ATOMIC_XCHG; +} + +CfreeCgMemOrder pcg_mem_order(MemOrder ord) { return (CfreeCgMemOrder)ord; } + +FrameSlot pcg_local(Parser* p, const FrameSlotDesc* fsd) { + CfreeCgSlotAttrs attrs; + memset(&attrs, 0, sizeof attrs); + attrs.name = fsd->name; + attrs.align = fsd->align; + if (fsd->flags & FSF_ADDR_TAKEN) attrs.flags |= CFREE_CG_SLOT_ADDRESS_TAKEN; + return cfree_cg_local_slot(p->cg, pcg_tid(p->c, fsd->type), attrs); +} + +FrameSlot pcg_param_slot(Parser* p, u32 index, const FrameSlotDesc* fsd) { + CfreeCgSlotAttrs attrs; + memset(&attrs, 0, sizeof attrs); + attrs.name = fsd->name; + attrs.align = fsd->align; + if (fsd->flags & FSF_ADDR_TAKEN) attrs.flags |= CFREE_CG_SLOT_ADDRESS_TAKEN; + return cfree_cg_param_slot(p->cg, index, pcg_tid(p->c, fsd->type), attrs); +} + +void pcg_param(Parser* p, const CGParamDesc* pd) { + (void)p; + (void)pd; +} + +void pcg_func_begin(Parser* p, const CGFuncDesc* fd) { + cfree_cg_func_begin(p->cg, fd->sym); +} + +void pcg_push_int(Parser* p, i64 v, const Type* ty) { + cfree_cg_push_int(p->cg, (uint64_t)v, pcg_tid(p->c, ty)); + pcg_push_type(p, ty); +} + +void pcg_push_float(Parser* p, double v, const Type* ty) { + cfree_cg_push_float(p->cg, v, pcg_tid(p->c, ty)); + pcg_push_type(p, ty); +} + +void pcg_push_local_typed(Parser* p, FrameSlot s, const Type* ty) { + cfree_cg_push_local(p->cg, s); + pcg_push_type(p, ty); +} + +void pcg_push_global(Parser* p, ObjSymId sym, const Type* ty) { + cfree_cg_push_symbol_lvalue(p->cg, sym, 0); + pcg_push_type(p, ty); +} + +void pcg_load(Parser* p) { cfree_cg_load(p->cg, pcg_mem(p, pcg_top_type(p))); } + +void pcg_addr(Parser* p) { + const Type* ty = pcg_top_type(p); + cfree_cg_addr(p->cg); + pcg_retag_top(p, type_ptr(p->pool, ty)); +} + +void pcg_store(Parser* p) { + const Type* lv_ty = pcg_top2_type(p); + const Type* rv_ty = pcg_top_type(p); + const Type* mem_ty = lv_ty; + if (rv_ty && type_is_ptr(rv_ty) && (!lv_ty || !type_is_ptr(lv_ty))) { + mem_ty = rv_ty; + } + cfree_cg_dup(p->cg); + pcg_dup_type(p); + cfree_cg_rot3(p->cg); + pcg_rot3_type(p); + cfree_cg_swap(p->cg); + pcg_swap_type(p); + cfree_cg_store(p->cg, pcg_mem(p, mem_ty ? mem_ty : rv_ty)); + pcg_drop_type(p); + pcg_drop_type(p); + pcg_push_type(p, rv_ty); +} + +void pcg_deref(Parser* p, const Type* pointee) { + cfree_cg_indirect(p->cg); + pcg_retag_top(p, pointee); +} + +void pcg_binop(Parser* p, BinOp op) { + const Type* result = pcg_top2_type(p); + if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) + cfree_cg_fp_binop(p->cg, pcg_fp_binop(op), CFREE_CG_FP_NONE); + else + cfree_cg_int_binop(p->cg, pcg_int_binop(op), CFREE_CG_INTOP_NONE); + pcg_drop_type(p); + pcg_retag_top(p, result); +} + +void pcg_unop(Parser* p, UnOp op) { + if (op == UO_NEG && pcg_type_is_fp(pcg_top_type(p))) { + cfree_cg_fp_unop(p->cg, CFREE_CG_FP_NEG, CFREE_CG_FP_NONE); + } else { + CfreeCgIntUnOp iop = op == UO_NOT ? CFREE_CG_INT_NOT + : op == UO_BNOT ? CFREE_CG_INT_BNOT + : CFREE_CG_INT_NEG; + cfree_cg_int_unop(p->cg, iop, CFREE_CG_INTOP_NONE); + } +} + +void pcg_cmp(Parser* p, CmpOp op) { + if (op == CMP_LT_F || op == CMP_LE_F || op == CMP_GT_F || op == CMP_GE_F) + cfree_cg_fp_cmp(p->cg, pcg_fp_cmp(op)); + else + cfree_cg_int_cmp(p->cg, pcg_int_cmp(op)); + pcg_drop_type(p); + pcg_retag_top(p, type_prim(p->pool, TY_BOOL)); +} + +void pcg_convert(Parser* p, const Type* dst) { + const Type* src = pcg_top_type(p); + u32 ss = pcg_sizeof(p, src); + u32 ds = pcg_sizeof(p, dst); + int si = type_is_int(src) || type_is_ptr(src); + int di = type_is_int(dst) || type_is_ptr(dst); + int sf = pcg_type_is_fp(src); + int df = pcg_type_is_fp(dst); + CfreeCgTypeId id = pcg_tid(p->c, dst); + if (src == dst) return; + if (type_is_ptr(src) && type_is_ptr(dst)) { + cfree_cg_bitcast(p->cg, id); + pcg_retag_top(p, dst); + return; + } + if (si && di) { + if (ds < ss) cfree_cg_trunc(p->cg, id); + else if (ds > ss && type_is_int(src) && pcg_type_is_signed(src)) + cfree_cg_sext(p->cg, id); + else if (ds > ss) cfree_cg_zext(p->cg, id); + } else if (type_is_int(src) && df) { + if (pcg_type_is_signed(src)) + cfree_cg_sint_to_float(p->cg, id, CFREE_CG_ROUND_DEFAULT); + else + cfree_cg_uint_to_float(p->cg, id, CFREE_CG_ROUND_DEFAULT); + } else if (sf && type_is_int(dst)) { + if (pcg_type_is_signed(dst)) + cfree_cg_float_to_sint(p->cg, id, CFREE_CG_ROUND_DEFAULT); + else + cfree_cg_float_to_uint(p->cg, id, CFREE_CG_ROUND_DEFAULT); + } else if (sf && df) { + if (ds > ss) cfree_cg_fpext(p->cg, id); + else if (ds < ss) cfree_cg_fptrunc(p->cg, id); + } else { + cfree_cg_bitcast(p->cg, id); + } + pcg_retag_top(p, dst); +} + +void pcg_inc_dec(Parser* p, BinOp op, int post) { + const Type* ty = pcg_top_type(p); + cfree_cg_inc_dec(p->cg, pcg_int_binop(op), post, pcg_tid(p->c, ty), + pcg_mem(p, ty)); +} + +void pcg_call(Parser* p, u32 nargs, const Type* fn_type) { + cfree_cg_call_default(p->cg, nargs, pcg_tid(p->c, fn_type)); + for (u32 i = 0; i < nargs + 1u; ++i) pcg_drop_type(p); + if (fn_type && fn_type->kind == TY_FUNC && fn_type->fn.ret->kind != TY_VOID) { + pcg_push_type(p, fn_type->fn.ret); + } +} + +void pcg_ret(Parser* p, int has_value) { + if (has_value) { + cfree_cg_ret(p->cg); + pcg_drop_type(p); + } else { + cfree_cg_ret_void(p->cg); + } +} + +void pcg_alloca(Parser* p) { + cfree_cg_alloca(p->cg, 16, + pcg_tid(p->c, type_ptr(p->pool, type_void(p->pool)))); + pcg_drop_type(p); + pcg_push_type(p, type_ptr(p->pool, type_void(p->pool))); +} + +void pcg_va_arg(Parser* p, const Type* ty) { + cfree_cg_vararg_next(p->cg, pcg_tid(p->c, ty)); + pcg_drop_type(p); + pcg_push_type(p, ty); +} + +void pcg_atomic_load(Parser* p, MemOrder ord) { + const Type* pty = pcg_top_type(p); + const Type* ty = (pty && pty->kind == TY_PTR) ? pty->ptr.pointee : pty; + cfree_cg_atomic_load(p->cg, pcg_mem(p, ty), pcg_mem_order(ord)); + pcg_retag_top(p, ty); +} + +void pcg_atomic_store(Parser* p, MemOrder ord) { + const Type* pty = pcg_top2_type(p); + const Type* ty = (pty && pty->kind == TY_PTR) ? pty->ptr.pointee : pty; + cfree_cg_atomic_store(p->cg, pcg_mem(p, ty), pcg_mem_order(ord)); + pcg_drop_type(p); + pcg_drop_type(p); +} + +void pcg_atomic_rmw(Parser* p, AtomicOp op, MemOrder ord) { + const Type* pty = pcg_top2_type(p); + const Type* ty = (pty && pty->kind == TY_PTR) ? pty->ptr.pointee : pty; + cfree_cg_atomic_rmw(p->cg, pcg_mem(p, ty), pcg_atomic_op(op), + pcg_mem_order(ord)); + pcg_drop_type(p); + pcg_retag_top(p, ty); +} + +void pcg_atomic_cas(Parser* p, MemOrder succ, MemOrder fail) { + const Type* ty = pcg_top_type(p); + cfree_cg_atomic_cmpxchg(p->cg, pcg_mem(p, ty), pcg_mem_order(succ), + pcg_mem_order(fail), 0); + pcg_retag_top(p, type_prim(p->pool, TY_BOOL)); +} + +void pcg_fence(Parser* p, MemOrder ord) { + cfree_cg_atomic_fence(p->cg, pcg_mem_order(ord)); +} + +void pcg_intrinsic_unary_to_int(Parser* p, IntrinKind k) { + CfreeCgIntrinsic ck = k == INTRIN_CLZ ? CFREE_CG_INTRIN_CLZ + : k == INTRIN_CTZ ? CFREE_CG_INTRIN_CTZ + : CFREE_CG_INTRIN_POPCOUNT; + const Type* ity = type_prim(p->pool, TY_INT); + cfree_cg_intrinsic(p->cg, ck, 1, pcg_tid(p->c, ity)); + pcg_retag_top(p, ity); +} + +void pcg_intrinsic_void(Parser* p, IntrinKind k) { + if (k == INTRIN_UNREACHABLE) cfree_cg_unreachable(p->cg); + else cfree_cg_intrinsic(p->cg, CFREE_CG_INTRIN_TRAP, 0, CFREE_CG_TYPE_NONE); +} + +void pcg_inline_asm(Parser* p, const char* tmpl, const AsmConstraint* outs, + u32 nout, const AsmConstraint* ins, u32 nin, + const Sym* clobbers, u32 nclob) { + CfreeCgInlineAsm a; + CfreeCgAsmOperand* o = NULL; + CfreeCgAsmOperand* in = NULL; + CfreeSym* cl = NULL; + memset(&a, 0, sizeof a); + a.tmpl = cfree_sym_intern(p->c, tmpl ? tmpl : ""); + if (nout) { + o = arena_zarray(p->c->tu, CfreeCgAsmOperand, nout); + for (u32 i = 0; i < nout; ++i) { + o[i].constraint = cfree_sym_intern(p->c, outs[i].str ? outs[i].str : ""); + o[i].name = outs[i].name; + o[i].type = pcg_tid(p->c, outs[i].type); + o[i].dir = CFREE_CG_ASM_OUT; + } + } + if (nin) { + in = arena_zarray(p->c->tu, CfreeCgAsmOperand, nin); + for (u32 i = 0; i < nin; ++i) { + in[i].constraint = cfree_sym_intern(p->c, ins[i].str ? ins[i].str : ""); + in[i].name = ins[i].name; + in[i].type = pcg_tid(p->c, ins[i].type); + in[i].dir = (ins[i].dir == ASM_INOUT) ? CFREE_CG_ASM_INOUT + : CFREE_CG_ASM_IN; + } + } + if (nclob) { + cl = arena_array(p->c->tu, CfreeSym, nclob); + for (u32 i = 0; i < nclob; ++i) cl[i] = clobbers[i]; + } + a.outputs = o; + a.noutputs = nout; + a.inputs = in; + a.ninputs = nin; + a.clobbers = cl; + a.nclobbers = nclob; + cfree_cg_inline_asm(p->cg, a); +} diff --git a/lang/c/parse/cg_public_compat.h b/lang/c/parse/cg_public_compat.h @@ -0,0 +1,284 @@ +#ifndef CFREE_PARSE_CG_PUBLIC_COMPAT_H +#define CFREE_PARSE_CG_PUBLIC_COMPAT_H + +#include <cfree/cg.h> + +#include "core/core.h" +#include "type/type.h" + +typedef CfreeCg CG; +typedef CfreeCgLabel CGLabel; +typedef CfreeCgScope CGScope; +typedef CfreeCgSlot FrameSlot; + +#define FRAME_SLOT_NONE CFREE_CG_SLOT_NONE +#define OBJ_GROUP_NONE 0u + +typedef enum BinOp { + BO_IADD, + BO_ISUB, + BO_IMUL, + BO_SDIV, + BO_UDIV, + BO_SREM, + BO_UREM, + BO_FADD, + BO_FSUB, + BO_FMUL, + BO_FDIV, + BO_AND, + BO_OR, + BO_XOR, + BO_SHL, + BO_SHR_S, + BO_SHR_U, +} BinOp; + +typedef enum UnOp { + UO_NEG, + UO_NOT, + UO_BNOT, +} UnOp; + +typedef enum CmpOp { + CMP_EQ, + CMP_NE, + CMP_LT_S, + CMP_LE_S, + CMP_GT_S, + CMP_GE_S, + CMP_LT_U, + CMP_LE_U, + CMP_GT_U, + CMP_GE_U, + CMP_LT_F, + CMP_LE_F, + CMP_GT_F, + CMP_GE_F, +} CmpOp; + +typedef enum AtomicOp { + AO_XCHG, + AO_ADD, + AO_SUB, + AO_AND, + AO_OR, + AO_XOR, + AO_NAND, +} AtomicOp; + +typedef enum MemOrder { + MO_RELAXED, + MO_CONSUME, + MO_ACQUIRE, + MO_RELEASE, + MO_ACQ_REL, + MO_SEQ_CST, +} MemOrder; + +typedef enum IntrinKind { + INTRIN_NONE = 0, + INTRIN_POPCOUNT, + INTRIN_CTZ, + INTRIN_CLZ, + INTRIN_BSWAP16, + INTRIN_BSWAP32, + INTRIN_BSWAP64, + INTRIN_MEMCPY, + INTRIN_MEMMOVE, + INTRIN_MEMSET, + INTRIN_PREFETCH, + INTRIN_ASSUME_ALIGNED, + INTRIN_EXPECT, + INTRIN_UNREACHABLE, + INTRIN_TRAP, + INTRIN_SETJMP, + INTRIN_LONGJMP, + INTRIN_ADD_OVERFLOW, + INTRIN_SUB_OVERFLOW, + INTRIN_MUL_OVERFLOW, +} IntrinKind; + +typedef enum AsmDir { ASM_IN, ASM_OUT, ASM_INOUT } AsmDir; + +typedef struct AsmConstraint { + const char* str; + Sym name; + const Type* type; + u8 dir; + u8 pad[3]; +} AsmConstraint; + +typedef enum FrameSlotKind { + FS_LOCAL, + FS_PARAM, + FS_SPILL, + FS_SRET, + FS_ALLOCA, +} FrameSlotKind; + +typedef enum FrameSlotFlag { + FSF_NONE = 0, + FSF_ADDR_TAKEN = 1u << 0, + FSF_VOLATILE = 1u << 1, +} FrameSlotFlag; + +typedef struct FrameSlotDesc { + const Type* type; + Sym name; + SrcLoc loc; + u32 size; + u32 align; + u8 kind; + u8 pad; + u16 flags; +} FrameSlotDesc; + +typedef struct CGParamDesc { + u32 index; + Sym name; + const Type* type; + FrameSlot slot; + const void* abi; + const void* incoming; + u32 nincoming; + SrcLoc loc; +} CGParamDesc; + +typedef enum CGFuncDescFlag { + CGFD_NONE = 0, + CGFD_NORETURN = 1u << 0, +} CGFuncDescFlag; + +typedef struct CGFuncDesc { + ObjSymId sym; + ObjSecId text_section_id; + u32 group_id; + const Type* fn_type; + const void* abi; + const CGParamDesc* params; + u32 nparams; + SrcLoc loc; + u32 flags; +} CGFuncDesc; + +typedef struct Parser Parser; + +CfreeCgTypeId pcg_tid(Compiler*, const Type*); +CfreeCgMemAccess pcg_mem(Parser*, const Type*); +const Type* pcg_top_type(Parser*); +const Type* pcg_top2_type(Parser*); +void pcg_retag_top(Parser*, const Type*); +void pcg_push_type(Parser*, const Type*); +void pcg_drop_type(Parser*); +void pcg_dup_type(Parser*); +void pcg_swap_type(Parser*); +void pcg_rot3_type(Parser*); + +CfreeCgIntBinOp pcg_int_binop(BinOp); +CfreeCgFpBinOp pcg_fp_binop(BinOp); +CfreeCgIntCmpOp pcg_int_cmp(CmpOp); +CfreeCgFpCmpOp pcg_fp_cmp(CmpOp); +CfreeCgAtomicOp pcg_atomic_op(AtomicOp); +CfreeCgMemOrder pcg_mem_order(MemOrder); +int pcg_type_is_fp(const Type*); +int pcg_type_is_signed(const Type*); + +FrameSlot pcg_local(Parser*, const FrameSlotDesc*); +FrameSlot pcg_param_slot(Parser*, u32, const FrameSlotDesc*); +void pcg_param(Parser*, const CGParamDesc*); +void pcg_func_begin(Parser*, const CGFuncDesc*); +void pcg_push_int(Parser*, i64, const Type*); +void pcg_push_float(Parser*, double, const Type*); +void pcg_push_local_typed(Parser*, FrameSlot, const Type*); +void pcg_push_global(Parser*, ObjSymId, const Type*); +void pcg_load(Parser*); +void pcg_addr(Parser*); +void pcg_store(Parser*); +void pcg_deref(Parser*, const Type*); +void pcg_binop(Parser*, BinOp); +void pcg_unop(Parser*, UnOp); +void pcg_cmp(Parser*, CmpOp); +void pcg_convert(Parser*, const Type*); +void pcg_inc_dec(Parser*, BinOp, int); +void pcg_call(Parser*, u32, const Type*); +void pcg_ret(Parser*, int); +void pcg_alloca(Parser*); +void pcg_va_arg(Parser*, const Type*); +void pcg_atomic_load(Parser*, MemOrder); +void pcg_atomic_store(Parser*, MemOrder); +void pcg_atomic_rmw(Parser*, AtomicOp, MemOrder); +void pcg_atomic_cas(Parser*, MemOrder, MemOrder); +void pcg_fence(Parser*, MemOrder); +void pcg_intrinsic_unary_to_int(Parser*, IntrinKind); +void pcg_intrinsic_void(Parser*, IntrinKind); +void pcg_inline_asm(Parser*, const char*, const AsmConstraint*, u32, + const AsmConstraint*, u32, const Sym*, u32); + +#define cg_set_loc(g, loc) cfree_cg_set_loc((g), (loc)) +#define cg_local(g, fsd) pcg_local(p, (fsd)) +#define cg_param(g, pd) pcg_param(p, (pd)) +#define cg_func_begin(g, fd) pcg_func_begin(p, (fd)) +#define cg_func_end(g) cfree_cg_func_end((g)) +#define cg_push_int(g, v, ty) pcg_push_int(p, (v), (ty)) +#define cg_push_float(g, v, ty) pcg_push_float(p, (v), (ty)) +#define cg_push_local_typed(g, s, ty) pcg_push_local_typed(p, (s), (ty)) +#define cg_push_global(g, sym, ty) pcg_push_global(p, (sym), (ty)) +#define cg_load(g) pcg_load(p) +#define cg_addr(g) pcg_addr(p) +#define cg_dup(g) \ + do { \ + cfree_cg_dup((g)); \ + pcg_dup_type(p); \ + } while (0) +#define cg_swap(g) \ + do { \ + cfree_cg_swap((g)); \ + pcg_swap_type(p); \ + } while (0) +#define cg_drop(g) \ + do { \ + cfree_cg_drop((g)); \ + pcg_drop_type(p); \ + } while (0) +#define cg_store(g) pcg_store(p) +#define cg_deref(g, ty) pcg_deref(p, (ty)) +#define cg_top_type(g) pcg_top_type(p) +#define cg_top2_type(g) pcg_top2_type(p) +#define cg_retag_top(g, ty) pcg_retag_top(p, (ty)) +#define cg_binop(g, op) pcg_binop(p, (op)) +#define cg_unop(g, op) pcg_unop(p, (op)) +#define cg_cmp(g, op) pcg_cmp(p, (op)) +#define cg_convert(g, ty) pcg_convert(p, (ty)) +#define cg_inc_dec(g, op, post) pcg_inc_dec(p, (op), (post)) +#define cg_call(g, nargs, fn_type) pcg_call(p, (nargs), (fn_type)) +#define cg_ret(g, has_value) pcg_ret(p, (has_value)) +#define cg_alloca(g) pcg_alloca(p) +#define cg_va_start_(g) cfree_cg_vararg_start((g)) +#define cg_va_end_(g) cfree_cg_vararg_end((g)) +#define cg_va_copy_(g) cfree_cg_vararg_copy((g)) +#define cg_va_arg_(g, ty) pcg_va_arg(p, (ty)) +#define cg_atomic_load(g, ord) pcg_atomic_load(p, (ord)) +#define cg_atomic_store(g, ord) pcg_atomic_store(p, (ord)) +#define cg_atomic_rmw(g, op, ord) pcg_atomic_rmw(p, (op), (ord)) +#define cg_atomic_cas(g, succ, fail) pcg_atomic_cas(p, (succ), (fail)) +#define cg_fence(g, ord) pcg_fence(p, (ord)) +#define cg_intrinsic_unary_to_int(g, k) pcg_intrinsic_unary_to_int(p, (k)) +#define cg_intrinsic_void(g, k) pcg_intrinsic_void(p, (k)) +#define cg_label_new(g) cfree_cg_label_new((g)) +#define cg_label_place(g, l) cfree_cg_label_place((g), (l)) +#define cg_jump(g, l) cfree_cg_jump((g), (l)) +#define cg_branch_true(g, l) \ + do { \ + cfree_cg_branch_true((g), (l)); \ + pcg_drop_type(p); \ + } while (0) +#define cg_branch_false(g, l) \ + do { \ + cfree_cg_branch_false((g), (l)); \ + pcg_drop_type(p); \ + } while (0) +#define cg_inline_asm(g, tmpl, outs, nout, ins, nin, clob, nclob) \ + pcg_inline_asm(p, (tmpl), (outs), (nout), (ins), (nin), (clob), (nclob)) + +#endif diff --git a/lang/c/parse/parse.c b/lang/c/parse/parse.c @@ -0,0 +1,1097 @@ +/* parse.c — residual C11 parser core. + * + * Contains: + * - kw_names[] table (used by parse_c to intern keywords) + * - Diagnostics/token helpers (perr, advance, peek1, fetch_tok, ...) + * - Scope/tag operations + * - Type helpers (ty_int, ty_size_t) + * - Local-variable slot allocation (make_local, make_local_aligned) + * - Static-local symbol naming (mint_static_local_sym) + * - Declaration driver (parse_init_declarator, parse_local_decl) + * - TU-level driver (parse_param_list, declare_function, + * parse_function_body, parse_external_decl, parse_translation_unit, + * parse_c) + * + * All expression, type, initializer, and statement code lives in + * parse_expr.c, parse_type.c, parse_init.c, and parse_stmt.c. */ + +#include "parse/parse_priv.h" + +#include <stdarg.h> +#include <string.h> + +/* ============================================================ + * Keywords + * ============================================================ */ + +static const char* const kw_names[KW_COUNT] = { + NULL, "auto", "break", "case", "char", + "const", "continue", "default", "do", "double", + "else", "enum", "extern", "float", "for", + "goto", "if", "inline", "int", "long", + "register", "restrict", "return", "short", "signed", + "sizeof", "static", "struct", "switch", "typedef", + "union", "unsigned", "void", "volatile", "while", + "_Bool", "_Complex", "_Imaginary","_Alignas", "_Alignof", + "_Atomic", "_Generic", "_Noreturn", "_Static_assert", "_Thread_local", + "asm", "__asm__", +}; + +/* ============================================================ + * Diagnostics + * ============================================================ */ + +static SrcLoc tok_loc(const Tok* t) { return t->loc; } + +_Noreturn void perr(Parser* p, const char* fmt, ...) { + va_list ap; + SrcLoc loc = tok_loc(&p->cur); + va_start(ap, fmt); + compiler_panicv(p->c, loc, fmt, ap); +} + +/* ============================================================ + * Token helpers + * ============================================================ */ + +/* Width of an encoding prefix on a string-literal spelling: 0 for ordinary, + * 1 for L/u/U, 2 for u8. */ +static size_t str_prefix_len(u16 flags) { + if (flags & TF_STR_U8) return 2; + if (flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) return 1; + return 0; +} + +#define STR_ENC_MASK \ + (TF_STR_WIDE | TF_STR_U8 | TF_STR_U16 | TF_STR_U32) + +/* Fuse two adjacent TOK_STR tokens into one per C11 §6.4.5 ¶5. */ +static Tok fuse_string_lits(Parser* p, Tok a, Tok b) { + u16 ae = (u16)(a.flags & STR_ENC_MASK); + u16 be = (u16)(b.flags & STR_ENC_MASK); + u16 fused_enc; + size_t alen = 0, blen = 0; + const char* as = pool_str(p->pool, a.spelling, &alen); + const char* bs = pool_str(p->pool, b.spelling, &blen); + size_t apfx, bpfx; + size_t a_content_len, b_content_len; + size_t out_pfx_len; + size_t out_len; + Heap* h = p->c->env->heap; + char* buf; + size_t k = 0; + Tok out; + if (!as || !bs) perr(p, "bad string literal in concatenation"); + if (ae != 0 && be != 0 && ae != be) { + perr(p, "concatenating string literals with incompatible " + "encoding prefixes"); + } + fused_enc = ae ? ae : be; + apfx = str_prefix_len(a.flags); + bpfx = str_prefix_len(b.flags); + if (alen < apfx + 2 || as[apfx] != '"' || as[alen - 1] != '"' || + blen < bpfx + 2 || bs[bpfx] != '"' || bs[blen - 1] != '"') { + perr(p, "malformed string literal in concatenation"); + } + a_content_len = alen - apfx - 2; + b_content_len = blen - bpfx - 2; + out_pfx_len = ae ? apfx : bpfx; + out_len = out_pfx_len + 1 + a_content_len + b_content_len + 1; + buf = (char*)h->alloc(h, out_len, 1); + if (!buf) perr(p, "out of memory fusing string literals"); + if (out_pfx_len) { + const char* src = ae ? as : bs; + memcpy(buf + k, src, out_pfx_len); + k += out_pfx_len; + } + buf[k++] = '"'; + if (a_content_len) { + memcpy(buf + k, as + apfx + 1, a_content_len); + k += a_content_len; + } + if (b_content_len) { + memcpy(buf + k, bs + bpfx + 1, b_content_len); + k += b_content_len; + } + buf[k++] = '"'; + out = a; + out.spelling = pool_intern(p->pool, buf, k); + out.flags = (u16)((a.flags & ~STR_ENC_MASK) | fused_enc); + out.lit = LIT_NONE; + h->free(h, buf, 0); + return out; +} + +/* Pull one logical token from pp, collapsing adjacent TOK_STR runs. */ +static Tok fetch_tok(Parser* p) { + Tok t; + if (p->has_pending) { + t = p->pending; + p->has_pending = 0; + } else { + t = pp_next(p->pp); + } + if (t.kind != TOK_STR) return t; + for (;;) { + Tok n = pp_next(p->pp); + if (n.kind != TOK_STR) { + p->pending = n; + p->has_pending = 1; + return t; + } + t = fuse_string_lits(p, t, n); + } +} + +void advance(Parser* p) { + if (p->replay_active) { + if (p->replay_pos < p->replay_len) { + p->cur = p->replay[p->replay_pos++]; + return; + } + p->replay_active = 0; + } + if (p->has_next) { + p->cur = p->next; + p->has_next = 0; + } else { + p->cur = fetch_tok(p); + } +} + +Tok peek1(Parser* p) { + if (p->replay_active && p->replay_pos < p->replay_len) { + return p->replay[p->replay_pos]; + } + if (!p->has_next) { + p->next = fetch_tok(p); + p->has_next = 1; + } + return p->next; +} + +void expect_punct(Parser* p, u32 punct, const char* what) { + if (!accept_punct(p, punct)) { + perr(p, "expected %s", what); + } +} + +int accept_punct(Parser* p, u32 punct) { + if (is_punct(&p->cur, punct)) { + advance(p); + return 1; + } + return 0; +} + +/* Record tokens from the current `{` through the matching `}` into the + * parser's replay buffer. */ +void record_braced_block(Parser* p) { + int depth = 0; + if (!is_punct(&p->cur, '{')) perr(p, "internal: record on non-'{'"); + p->replay_len = 0; + for (;;) { + if (p->replay_len == p->replay_cap) { + u32 new_cap = p->replay_cap ? p->replay_cap * 2 : 32; + Tok* nv = arena_array(p->c->tu, Tok, new_cap); + if (!nv) perr(p, "out of memory in record_braced_block"); + if (p->replay && p->replay_len) { + memcpy(nv, p->replay, p->replay_len * sizeof(Tok)); + } + p->replay = nv; + p->replay_cap = new_cap; + } + p->replay[p->replay_len++] = p->cur; + if (is_punct(&p->cur, '{')) { + ++depth; + } else if (is_punct(&p->cur, '}')) { + --depth; + if (depth == 0) break; + } else if (p->cur.kind == TOK_EOF) { + perr(p, "unexpected end of file in initializer"); + } + advance(p); + } +} + +/* After record_braced_block, rewind to replay from the start. */ +void replay_rewind(Parser* p) { + if (p->replay_len == 0) perr(p, "internal: replay_rewind with empty buffer"); + p->cur = p->replay[0]; + p->replay_pos = 1; + p->replay_active = 1; + p->has_next = 0; +} + +/* Count top-level items in a recorded brace list. */ +u32 count_recorded_top_level_items(const Tok* vec, u32 len) { + u32 count; + u32 i; + int depth = 0; + if (len < 2) return 0; + if (len == 2) return 0; /* `{}` */ + count = 1; + for (i = 1; i < len - 1; ++i) { + const Tok* t = &vec[i]; + if (is_punct(t, '{') || is_punct(t, '(') || is_punct(t, '[')) ++depth; + else if (is_punct(t, '}') || is_punct(t, ')') || is_punct(t, ']')) --depth; + else if (depth == 0 && is_punct(t, ',')) ++count; + } + if (is_punct(&vec[len - 2], ',')) --count; + return count; +} + +/* ============================================================ + * Scopes + * ============================================================ */ + +Scope* scope_new(Parser* p, Scope* parent) { + Scope* s = arena_new(p->c->tu, Scope); + if (!s) perr(p, "out of memory in scope_new"); + s->entries = NULL; + s->tags = NULL; + s->parent = parent; + return s; +} + +void scope_push(Parser* p) { p->scope = scope_new(p, p->scope); } + +void scope_pop(Parser* p) { + if (p->scope) p->scope = p->scope->parent; +} + +SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind, + const Type* type) { + SymEntry* e = arena_new(p->c->tu, SymEntry); + if (!e) perr(p, "out of memory in scope_define"); + memset(e, 0, sizeof *e); + e->name = name; + e->kind = (u8)kind; + e->type = type; + e->next = p->scope->entries; + p->scope->entries = e; + return e; +} + +SymEntry* scope_lookup(Parser* p, Sym name) { + Scope* s; + for (s = p->scope; s; s = s->parent) { + SymEntry* e; + for (e = s->entries; e; e = e->next) { + if (e->name == name) return e; + } + } + return NULL; +} + +TagEntry* tag_define(Parser* p, Sym name, TagDeclKind kind, Type* type, + int complete) { + TagEntry* e = arena_new(p->c->tu, TagEntry); + if (!e) perr(p, "out of memory in tag_define"); + memset(e, 0, sizeof *e); + e->name = name; + e->kind = (u8)kind; + e->complete = (u8)(complete ? 1 : 0); + e->type = type; + e->next = p->scope->tags; + p->scope->tags = e; + return e; +} + +TagEntry* tag_lookup(Parser* p, Sym name) { + Scope* s; + for (s = p->scope; s; s = s->parent) { + TagEntry* e; + for (e = s->tags; e; e = e->next) { + if (e->name == name) return e; + } + } + return NULL; +} + +TagEntry* tag_lookup_local(Parser* p, Sym name) { + TagEntry* e; + for (e = p->scope->tags; e; e = e->next) { + if (e->name == name) return e; + } + return NULL; +} + +/* ============================================================ + * Type helpers + * ============================================================ */ + +static const Type* ty_size_t(Parser* p) { + return abi_size_type(p->abi, p->pool); +} + +/* ============================================================ + * Local-variable slot allocation + * ============================================================ */ + +FrameSlot make_local_aligned(Parser* p, Sym name, const Type* type, + SrcLoc loc, u32 align_override) { + FrameSlotDesc fsd; + FrameSlot s; + SymEntry* e; + u32 nat = abi_alignof(p->abi, type); + memset(&fsd, 0, sizeof fsd); + fsd.type = type; + fsd.name = name; + fsd.loc = loc; + fsd.size = abi_sizeof(p->abi, type); + fsd.align = (align_override > nat) ? align_override : nat; + fsd.kind = FS_LOCAL; + fsd.flags = FSF_NONE; + s = cg_local(p->cg, &fsd); + e = scope_define(p, name, SEK_LOCAL, type); + e->v.slot = s; + return s; +} + +FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc) { + return make_local_aligned(p, name, type, loc, 0); +} + +/* ============================================================ + * Static-local symbol naming + * ============================================================ */ + +/* Mint a unique linker name for a static local: `<orig>.<counter>`. */ +Sym mint_static_local_sym(Parser* p, Sym orig) { + size_t olen = 0; + const char* on = pool_str(p->pool, orig, &olen); + char buf[128]; + u32 wlen = 0; + u32 id = ++p->static_local_counter; + if (olen > 100) olen = 100; + for (size_t i = 0; i < olen && wlen < sizeof buf - 1; ++i) { + buf[wlen++] = on[i]; + } + if (wlen < sizeof buf - 1) buf[wlen++] = '.'; + { + char digits[12]; + int dn = 0; + if (id == 0) digits[dn++] = '0'; + while (id) { + digits[dn++] = (char)('0' + (id % 10)); + id /= 10; + } + while (dn && wlen < sizeof buf - 1) buf[wlen++] = digits[--dn]; + } + return pool_intern(p->pool, buf, wlen); +} + +/* ============================================================ + * Declarations + * ============================================================ */ + +/* Parse a single init-declarator after the decl-specs have been consumed. */ +static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { + SrcLoc loc; + Sym name; + const Type* var_ty = parse_declarator(p, specs->type, &name, &loc); + + if (specs->storage == DS_TYPEDEF) { + if (is_punct(&p->cur, '=')) { + perr(p, "typedef declarator cannot have initializer"); + } + { + SymEntry* e = scope_define(p, name, SEK_TYPEDEF, var_ty); + if (p->vla_pending && var_ty && var_ty->kind == TY_ARRAY) { + FrameSlot count_slot = p->vla_pending_count_slot; + const Type* elem_ty = var_ty->arr.elem; + u32 esz = abi_sizeof(p->abi, elem_ty); + FrameSlotDesc bsd; + FrameSlot byte_slot; + memset(&bsd, 0, sizeof bsd); + bsd.type = ty_size_t(p); + bsd.size = abi_sizeof(p->abi, bsd.type); + bsd.align = abi_alignof(p->abi, bsd.type); + bsd.kind = FS_LOCAL; + byte_slot = cg_local(p->cg, &bsd); + cg_set_loc(p->cg, loc); + cg_push_local_typed(p->cg, count_slot, ty_size_t(p)); + to_rvalue(p); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_push_local_typed(p->cg, byte_slot, ty_size_t(p)); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + e->vla_byte_slot = byte_slot; + p->vla_pending = 0; + p->vla_pending_count_slot = FRAME_SLOT_NONE; + } else if (specs->vla_byte_slot != FRAME_SLOT_NONE) { + e->vla_byte_slot = specs->vla_byte_slot; + } + } + (void)loc; + return; + } + + if (specs->storage == DS_STATIC) { + Decl decl_in; + DeclId did; + ObjSymId sym; + SymEntry* e; + Sym lname = mint_static_local_sym(p, name); + int has_init; + u32 align_eff; + memset(&decl_in, 0, sizeof decl_in); + decl_in.name = lname; + decl_in.type = var_ty; + decl_in.loc = loc; + decl_in.storage = DS_STATIC; + decl_in.linkage = DL_INTERNAL; + decl_in.visibility = SV_DEFAULT; + decl_in.flags = DF_STATIC_LOCAL | (specs->flags & DF_THREAD); + attr_list_to_decl(p->c, p->decls, specs->attrs, &decl_in); + did = decl_declare(p->decls, &decl_in); + sym = decl_obj_sym(p->decls, did); + e = scope_define(p, name, SEK_GLOBAL, var_ty); + e->v.sym = sym; + has_init = accept_punct(p, '='); + if (has_init && var_ty && var_ty->kind == TY_ARRAY && var_ty->arr.incomplete) { + const Type* completed = complete_incomplete_array(p, var_ty); + if (completed != var_ty) { + var_ty = completed; + e->type = var_ty; + } + } + align_eff = (specs->align > decl_in.align) ? specs->align : decl_in.align; + define_static_object(p, sym, decl_in.section_id, var_ty, specs->quals, + has_init, loc, align_eff); + return; + } + + if (specs->storage == DS_EXTERN) { + Decl decl_in; + DeclId did; + ObjSymId sym; + SymEntry* e; + SymEntry* prior; + if (accept_punct(p, '=')) { + perr(p, "block-scope extern with initializer not supported"); + } + prior = scope_lookup(p, name); + if (prior && prior->kind == SEK_GLOBAL) { + e = scope_define(p, name, SEK_GLOBAL, var_ty); + e->v.sym = prior->v.sym; + return; + } + memset(&decl_in, 0, sizeof decl_in); + decl_in.name = name; + decl_in.type = var_ty; + decl_in.loc = loc; + decl_in.storage = DS_EXTERN; + decl_in.linkage = DL_EXTERNAL; + decl_in.visibility = SV_DEFAULT; + decl_in.flags = specs->flags & DF_THREAD; + attr_list_to_decl(p->c, p->decls, specs->attrs, &decl_in); + did = decl_declare(p->decls, &decl_in); + sym = decl_obj_sym(p->decls, did); + e = scope_define(p, name, SEK_GLOBAL, var_ty); + e->v.sym = sym; + return; + } + + if (var_ty && var_ty->kind == TY_ARRAY && var_ty->arr.incomplete && + (p->vla_pending || specs->vla_byte_slot != FRAME_SLOT_NONE)) { + const Type* elem_ty = var_ty->arr.elem; + const Type* ptr_ty = type_ptr(p->pool, elem_ty); + FrameSlot byte_slot; + FrameSlot ptr_slot; + SymEntry* sym_entry; + if (p->vla_pending) { + FrameSlot count_slot = p->vla_pending_count_slot; + u32 esz = abi_sizeof(p->abi, elem_ty); + FrameSlotDesc bsd; + memset(&bsd, 0, sizeof bsd); + bsd.type = ty_size_t(p); + bsd.size = abi_sizeof(p->abi, bsd.type); + bsd.align = abi_alignof(p->abi, bsd.type); + bsd.kind = FS_LOCAL; + byte_slot = cg_local(p->cg, &bsd); + p->vla_pending = 0; + p->vla_pending_count_slot = FRAME_SLOT_NONE; + cg_set_loc(p->cg, loc); + cg_push_local_typed(p->cg, count_slot, ty_size_t(p)); + to_rvalue(p); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_push_local_typed(p->cg, byte_slot, ty_size_t(p)); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + } else { + byte_slot = specs->vla_byte_slot; + } + ptr_slot = make_local(p, name, ptr_ty, loc); + cg_set_loc(p->cg, loc); + cg_push_local_typed(p->cg, byte_slot, ty_size_t(p)); + cg_load(p->cg); + cg_alloca(p->cg); + cg_push_local_typed(p->cg, ptr_slot, ptr_ty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + sym_entry = scope_lookup(p, name); + if (sym_entry && sym_entry->kind == SEK_LOCAL) { + sym_entry->vla_byte_slot = byte_slot; + } + if (accept_punct(p, '=')) { + perr(p, "VLA initializers are not allowed (§6.7.9 ¶3)"); + } + return; + } + /* Non-VLA local. */ + { + int has_init = is_punct(&p->cur, '='); + FrameSlot s; + if (has_init && var_ty && var_ty->kind == TY_ARRAY && var_ty->arr.incomplete) { + advance(p); /* '=' */ + var_ty = complete_incomplete_array(p, var_ty); + s = make_local_aligned(p, name, var_ty, loc, specs->align); + cg_set_loc(p->cg, loc); + init_at(p, s, var_ty, 0, var_ty); + return; + } + s = make_local_aligned(p, name, var_ty, loc, specs->align); + if (accept_punct(p, '=')) { + cg_set_loc(p->cg, loc); + if ((var_ty->kind == TY_STRUCT || var_ty->kind == TY_UNION) && + !is_punct(&p->cur, '{')) { + parse_assign_expr(p); + emit_struct_copy_into_slot(p, s, var_ty, 0, var_ty); + } else if (var_ty->kind == TY_ARRAY || var_ty->kind == TY_STRUCT || + var_ty->kind == TY_UNION) { + init_at(p, s, var_ty, 0, var_ty); + } else { + cg_push_local_typed(p->cg, s, var_ty); + parse_assign_expr(p); + to_rvalue(p); + coerce_top_to_lvalue(p); + cg_store(p->cg); + cg_drop(p->cg); + } + } + } +} + +void parse_local_decl(Parser* p, const DeclSpecs* specs) { + if (accept_punct(p, ';')) return; + parse_init_declarator(p, specs); + while (accept_punct(p, ',')) { + parse_init_declarator(p, specs); + } + expect_punct(p, ';', "';' after declaration"); +} + +/* ============================================================ + * External (top-level) declarations + * ============================================================ */ + +void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, + u8* variadic_out) { + ParamInfo* infos; + u32 cap = 4; + u32 n = 0; + *variadic_out = 0; + *infos_out = NULL; + *nparams_out = 0; + + if (is_punct(&p->cur, ')')) { + return; + } + if (is_kw(p, &p->cur, KW_VOID)) { + Tok n2 = peek1(p); + if (is_punct(&n2, ')')) { + advance(p); /* `void` */ + return; /* `(void)` */ + } + } + + infos = (ParamInfo*)arena_array(p->c->tu, ParamInfo, cap); + for (;;) { + DeclSpecs specs; + Sym pname = 0; + SrcLoc ploc = {0, 0, 0}; + const Type* pty; + if (accept_punct(p, P_ELLIPSIS)) { + *variadic_out = 1; + break; + } + if (!parse_decl_specs(p, &specs)) { + perr(p, "expected parameter type"); + } + p->in_param_decl++; + pty = parse_declarator_full(p, specs.type, /*allow_abstract=*/1, &pname, + &ploc); + p->in_param_decl--; + if (pty && pty->kind == TY_ARRAY) { + pty = type_ptr(p->pool, pty->arr.elem); + } else if (pty && pty->kind == TY_FUNC) { + pty = type_ptr(p->pool, pty); + } + if (n == cap) { + cap *= 2; + ParamInfo* nbuf = (ParamInfo*)arena_array(p->c->tu, ParamInfo, cap); + memcpy(nbuf, infos, sizeof(ParamInfo) * n); + infos = nbuf; + } + infos[n].name = pname; + infos[n].type = pty; + infos[n].loc = ploc; + ++n; + if (!accept_punct(p, ',')) break; + } + *infos_out = infos; + *nparams_out = (u16)n; +} + +static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty, + const DeclSpecs* specs, SrcLoc fname_loc, + const Attr* dattrs, + ObjSecId* out_section_id, + u32* out_decl_flags, + Sym* out_alias_target) { + if (out_section_id) *out_section_id = OBJ_SEC_NONE; + if (out_decl_flags) *out_decl_flags = 0; + if (out_alias_target) *out_alias_target = 0; + SymEntry* existing = scope_lookup(p, fname); + if (existing && existing->kind == SEK_FUNC) { + Decl tmp; + memset(&tmp, 0, sizeof tmp); + attr_list_to_decl(p->c, p->decls, specs->attrs, &tmp); + attr_list_to_decl(p->c, p->decls, dattrs, &tmp); + if (out_section_id) *out_section_id = tmp.section_id; + if (out_decl_flags) *out_decl_flags = tmp.flags; + if (out_alias_target) *out_alias_target = tmp.alias_target; + return existing; + } + { + Decl decl_in; + DeclId did; + ObjSymId fsym; + SymEntry* e; + memset(&decl_in, 0, sizeof decl_in); + decl_in.name = fname; + decl_in.type = fn_ty; + decl_in.loc = fname_loc; + decl_in.storage = (specs->storage == DS_STATIC) ? DS_STATIC : DS_EXTERN; + decl_in.linkage = + (specs->storage == DS_STATIC) ? DL_INTERNAL : DL_EXTERNAL; + decl_in.visibility = SV_DEFAULT; + attr_list_to_decl(p->c, p->decls, specs->attrs, &decl_in); + attr_list_to_decl(p->c, p->decls, dattrs, &decl_in); + did = decl_declare(p->decls, &decl_in); + fsym = decl_obj_sym(p->decls, did); + e = scope_define(p, fname, SEK_FUNC, fn_ty); + e->v.sym = fsym; + if (out_section_id) *out_section_id = decl_in.section_id; + if (out_decl_flags) *out_decl_flags = decl_in.flags; + if (out_alias_target) *out_alias_target = decl_in.alias_target; + return e; + } +} + +static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, + const ABIFuncInfo* abi, const ParamInfo* infos, + u16 nparams, SrcLoc fname_loc, + ObjSecId section_id, u32 decl_flags) { + CGFuncDesc fd; + CGParamDesc* pds = NULL; + + memset(&fd, 0, sizeof fd); + fd.sym = fsym; + fd.text_section_id = section_id; + fd.group_id = OBJ_GROUP_NONE; + fd.fn_type = fn_ty; + fd.abi = abi; + fd.params = NULL; + fd.nparams = nparams; + fd.loc = fname_loc; + if (decl_flags & DF_NORETURN) fd.flags |= CGFD_NORETURN; + + if (nparams) { + pds = (CGParamDesc*)arena_array(p->c->tu, CGParamDesc, nparams); + memset(pds, 0, sizeof(CGParamDesc) * nparams); + for (u16 i = 0; i < nparams; ++i) { + pds[i].index = i; + pds[i].name = infos[i].name; + pds[i].type = infos[i].type; + pds[i].slot = FRAME_SLOT_NONE; + pds[i].abi = &abi->params[i]; + pds[i].incoming = NULL; + pds[i].nincoming = 0; + pds[i].loc = infos[i].loc; + } + fd.params = pds; + } + + scope_push(p); /* parameter scope */ + GotoLabel* saved_goto_labels = p->goto_labels; + SwitchCtx* saved_switch = p->cur_switch; + p->goto_labels = NULL; + p->cur_switch = NULL; + cg_set_loc(p->cg, fname_loc); + cg_func_begin(p->cg, &fd); + + for (u16 i = 0; i < nparams; ++i) { + FrameSlotDesc fsd; + FrameSlot s; + SymEntry* e; + memset(&fsd, 0, sizeof fsd); + fsd.type = infos[i].type; + fsd.name = infos[i].name; + fsd.loc = infos[i].loc; + fsd.size = abi_sizeof(p->abi, infos[i].type); + fsd.align = abi_alignof(p->abi, infos[i].type); + fsd.kind = FS_PARAM; + fsd.flags = FSF_NONE; + s = pcg_param_slot(p, i, &fsd); + pds[i].slot = s; + if (infos[i].name) { + e = scope_define(p, infos[i].name, SEK_LOCAL, infos[i].type); + e->v.slot = s; + } + } + + parse_compound_stmt(p); + if (fn_ty->fn.ret && fn_ty->fn.ret->kind != TY_VOID && + fn_ty->fn.ret->kind != TY_STRUCT && fn_ty->fn.ret->kind != TY_UNION) { + cg_push_int(p->cg, 0, fn_ty->fn.ret); + cg_ret(p->cg, 1); + } else { + cg_ret(p->cg, 0); + } + for (GotoLabel* gl = p->goto_labels; gl; gl = gl->next) { + if (!gl->placed) { + compiler_panic(p->c, gl->first_use, "goto to undefined label"); + } + } + p->goto_labels = saved_goto_labels; + p->cur_switch = saved_switch; + cg_func_end(p->cg); + scope_pop(p); +} + +/* Parse one external declaration. */ +static void parse_external_decl(Parser* p) { + DeclSpecs specs; + Sym name; + SrcLoc loc; + const Type* base_ty; + + if (!parse_decl_specs(p, &specs)) { + perr(p, "expected declaration"); + } + + if (accept_punct(p, ';')) return; + + if (specs.storage == DS_TYPEDEF) { + for (;;) { + Sym tname = 0; + SrcLoc tloc = {0, 0, 0}; + const Type* tty = parse_declarator_full(p, specs.type, + /*allow_abstract=*/0, + &tname, &tloc); + if (is_punct(&p->cur, '=')) { + perr(p, "typedef declarator cannot have initializer"); + } + scope_define(p, tname, SEK_TYPEDEF, tty); + (void)tloc; + if (!accept_punct(p, ',')) break; + } + expect_punct(p, ';', "';' after typedef declaration"); + return; + } + + base_ty = parse_pointer_layer(p, specs.type); + if (p->cur.kind != TOK_IDENT || ident_kw_inline(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected declarator"); + } + name = p->cur.v.ident; + loc = tok_loc(&p->cur); + advance(p); + + Attr* dattrs = NULL; + parse_attrs_into(p, &dattrs); + + while (is_punct(&p->cur, '[')) { + DeclSuffix s; + if (!parse_decl_suffix(p, &s)) break; + if (s.kind != DS_ARRAY) break; + base_ty = apply_decl_suffix(p, base_ty, &s); + } + parse_attrs_into(p, &dattrs); + + if (is_punct(&p->cur, '(')) { + ParamInfo* infos = NULL; + u16 nparams = 0; + u8 variadic = 0; + const Type** ptypes = NULL; + const Type* fn_ty; + const ABIFuncInfo* abi; + SymEntry* fent; + + advance(p); /* '(' */ + parse_param_list(p, &infos, &nparams, &variadic); + expect_punct(p, ')', "')' after parameter list"); + parse_attrs_into(p, &dattrs); + + if (nparams) { + ptypes = (const Type**)arena_array(p->c->tu, const Type*, nparams); + for (u16 i = 0; i < nparams; ++i) ptypes[i] = infos[i].type; + } + fn_ty = type_func(p->pool, base_ty, ptypes, nparams, (int)variadic); + abi = abi_func_info(p->abi, fn_ty); + + ObjSecId fn_section_id; + u32 fn_decl_flags; + Sym fn_alias_target; + fent = declare_function(p, name, fn_ty, &specs, loc, dattrs, + &fn_section_id, &fn_decl_flags, + &fn_alias_target); + attr_list_append(&fent->attrs, dattrs); + + if (is_punct(&p->cur, '{')) { + Sym saved_func_name = p->cur_func_name; + p->cur_func_name = name; + parse_function_body(p, fent->v.sym, fn_ty, abi, infos, nparams, loc, + fn_section_id, fn_decl_flags); + p->cur_func_name = saved_func_name; + return; + } + if (accept_punct(p, ';')) { + if (fn_alias_target != 0) { + SymEntry* te = scope_lookup(p, fn_alias_target); + if (!te) { + size_t nl = 0; + const char* nm = pool_str(p->pool, fn_alias_target, &nl); + compiler_panic(p->c, loc, + "alias target '%s' is undefined", + nm ? nm : "?"); + } + CfreeCgAlias alias; + memset(&alias, 0, sizeof alias); + alias.display_name = name; + alias.linkage_name = cfree_cg_c_linkage_name(p->c, name); + alias.target = te->v.sym; + alias.sym.bind = (fn_decl_flags & DF_WEAK) ? CFREE_SB_WEAK + : CFREE_SB_GLOBAL; + alias.sym.visibility = CFREE_CG_VIS_DEFAULT; + if (cfree_cg_alias(p->cg, alias) == CFREE_CG_SYM_NONE) { + size_t nl = 0; + const char* nm = pool_str(p->pool, fn_alias_target, &nl); + compiler_panic(p->c, loc, + "alias target '%s' is undefined", + nm ? nm : "?"); + } + } + return; + } + perr(p, "expected '{' or ';' after function declarator"); + } + + /* Global object declaration. */ + for (;;) { + int has_init = is_punct(&p->cur, '='); + int is_pure_extern = (specs.storage == DS_EXTERN) && !has_init; + SymEntry* existing = scope_lookup(p, name); + ObjSymId sym = OBJ_SYM_NONE; + ObjSecId section_id = OBJ_SEC_NONE; + SymEntry* e = NULL; + + if (existing && existing->kind == SEK_GLOBAL) { + sym = existing->v.sym; + e = existing; + if (e->type && base_ty && e->type->kind == TY_ARRAY && + base_ty->kind == TY_ARRAY) { + if (e->type->arr.incomplete && !base_ty->arr.incomplete) { + e->type = base_ty; + } + } + } else { + Decl decl_in; + DeclId did; + memset(&decl_in, 0, sizeof decl_in); + decl_in.name = name; + decl_in.type = base_ty; + decl_in.loc = loc; + if (specs.storage == DS_STATIC) { + decl_in.storage = DS_STATIC; + decl_in.linkage = DL_INTERNAL; + } else { + decl_in.storage = DS_EXTERN; + decl_in.linkage = DL_EXTERNAL; + } + decl_in.visibility = SV_DEFAULT; + decl_in.flags = specs.flags & DF_THREAD; + attr_list_to_decl(p->c, p->decls, specs.attrs, &decl_in); + attr_list_to_decl(p->c, p->decls, dattrs, &decl_in); + did = decl_declare(p->decls, &decl_in); + sym = decl_obj_sym(p->decls, did); + section_id = decl_in.section_id; + e = scope_define(p, name, SEK_GLOBAL, base_ty); + e->v.sym = sym; + } + attr_list_append(&e->attrs, dattrs); + + u32 attr_align = attrs_pick_aligned(specs.attrs); + { + u32 a2 = attrs_pick_aligned(dattrs); + if (a2 > attr_align) attr_align = a2; + } + u32 align_eff = (specs.align > attr_align) ? specs.align : attr_align; + + if (has_init) { + advance(p); /* '=' */ + if (base_ty && base_ty->kind == TY_ARRAY && base_ty->arr.incomplete) { + const Type* completed = complete_incomplete_array(p, base_ty); + if (completed != base_ty) { + base_ty = completed; + if (e) e->type = base_ty; + } + } + define_static_object(p, sym, section_id, base_ty, specs.quals, + /*has_init=*/1, loc, + align_eff); + } else if (!is_pure_extern) { + define_static_object(p, sym, section_id, base_ty, specs.quals, + /*has_init=*/0, loc, + align_eff); + } + + if (!accept_punct(p, ',')) break; + base_ty = parse_pointer_layer(p, specs.type); + if (p->cur.kind != TOK_IDENT || ident_kw_inline(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected declarator after ','"); + } + name = p->cur.v.ident; + loc = tok_loc(&p->cur); + advance(p); + dattrs = NULL; + parse_attrs_into(p, &dattrs); + while (is_punct(&p->cur, '[')) { + DeclSuffix s; + if (!parse_decl_suffix(p, &s)) break; + base_ty = apply_decl_suffix(p, base_ty, &s); + } + parse_attrs_into(p, &dattrs); + } + expect_punct(p, ';', "';' after global declaration"); +} + +static void parse_file_scope_asm(Parser* p) { + advance(p); /* asm / __asm__ */ + for (;;) { + if (is_kw(p, &p->cur, KW_VOLATILE)) { + advance(p); + continue; + } + if (p->cur.kind == TOK_IDENT && p->cur.v.ident == p->sym_volatile_alias) { + advance(p); + continue; + } + break; + } + expect_punct(p, '(', "'(' after file-scope asm"); + if (p->cur.kind != TOK_STR) { + perr(p, "expected string literal in file-scope asm"); + } + advance(p); + expect_punct(p, ')', "')' after file-scope asm"); + expect_punct(p, ';', "';' after file-scope asm"); + perr(p, "file-scope asm is disabled"); +} + +static void parse_translation_unit(Parser* p) { + while (p->cur.kind != TOK_EOF) { + if (p->cur.kind == TOK_NEWLINE || is_pp_hash(&p->cur)) { + advance(p); + continue; + } + if (is_kw(p, &p->cur, KW_STATIC_ASSERT)) { + parse_static_assert(p); + continue; + } + if (is_kw(p, &p->cur, KW_ASM) || is_kw(p, &p->cur, KW_BUILTIN_ASM)) { + parse_file_scope_asm(p); + continue; + } + parse_external_decl(p); + } +} + +/* ============================================================ + * Entry point + * ============================================================ */ + +void parse_c(Compiler* c, Pp* pp, DeclTable* decls, CG* cg, Debug* debug) { + Parser p; + CKw i; + + memset(&p, 0, sizeof p); + p.c = c; + p.pp = pp; + p.decls = decls; + p.cg = cg; + p.debug = debug; + p.abi = c->abi; + p.pool = c->global; + + for (i = (CKw)1; i < KW_COUNT; ++i) { + p.kw_sym[i] = pool_intern_cstr(p.pool, kw_names[i]); + } + + p.sym_b_alloca = pool_intern_cstr(p.pool, "__builtin_alloca"); + p.sym_b_ctz = pool_intern_cstr(p.pool, "__builtin_ctz"); + p.sym_b_clz = pool_intern_cstr(p.pool, "__builtin_clz"); + p.sym_b_clzl = pool_intern_cstr(p.pool, "__builtin_clzl"); + p.sym_b_clzll = pool_intern_cstr(p.pool, "__builtin_clzll"); + p.sym_b_trap = pool_intern_cstr(p.pool, "__builtin_trap"); + p.sym_b_unreachable = pool_intern_cstr(p.pool, "__builtin_unreachable"); + p.sym_b_memcpy = pool_intern_cstr(p.pool, "__builtin_memcpy"); + p.sym_b_memmove = pool_intern_cstr(p.pool, "__builtin_memmove"); + p.sym_b_memcmp = pool_intern_cstr(p.pool, "__builtin_memcmp"); + p.sym_b_memset = pool_intern_cstr(p.pool, "__builtin_memset"); + p.sym_func = pool_intern_cstr(p.pool, "__func__"); + p.sym_func_gcc = pool_intern_cstr(p.pool, "__FUNCTION__"); + p.sym_pretty_func_gcc = pool_intern_cstr(p.pool, "__PRETTY_FUNCTION__"); + p.sym_b_expect = pool_intern_cstr(p.pool, "__builtin_expect"); + p.sym_b_offsetof = pool_intern_cstr(p.pool, "__builtin_offsetof"); + p.sym_b_va_list = pool_intern_cstr(p.pool, "__builtin_va_list"); + p.sym_b_va_start = pool_intern_cstr(p.pool, "__builtin_va_start"); + p.sym_b_va_arg = pool_intern_cstr(p.pool, "__builtin_va_arg"); + p.sym_b_va_end = pool_intern_cstr(p.pool, "__builtin_va_end"); + p.sym_b_va_copy = pool_intern_cstr(p.pool, "__builtin_va_copy"); + p.sym_attribute = pool_intern_cstr(p.pool, "__attribute__"); + p.sym_volatile_alias = pool_intern_cstr(p.pool, "__volatile__"); + p.sym_alignof_alias = pool_intern_cstr(p.pool, "__alignof__"); + p.sym_int128 = pool_intern_cstr(p.pool, "__int128"); + p.sym_int128_t = pool_intern_cstr(p.pool, "__int128_t"); + p.sym_uint128_t = pool_intern_cstr(p.pool, "__uint128_t"); + p.sym_a_load_n = pool_intern_cstr(p.pool, "__atomic_load_n"); + p.sym_a_store_n = pool_intern_cstr(p.pool, "__atomic_store_n"); + p.sym_a_exchange_n = pool_intern_cstr(p.pool, "__atomic_exchange_n"); + p.sym_a_fetch_add = pool_intern_cstr(p.pool, "__atomic_fetch_add"); + p.sym_a_fetch_sub = pool_intern_cstr(p.pool, "__atomic_fetch_sub"); + p.sym_a_fetch_and = pool_intern_cstr(p.pool, "__atomic_fetch_and"); + p.sym_a_fetch_or = pool_intern_cstr(p.pool, "__atomic_fetch_or"); + p.sym_a_fetch_xor = pool_intern_cstr(p.pool, "__atomic_fetch_xor"); + p.sym_a_cas_n = pool_intern_cstr(p.pool, "__atomic_compare_exchange_n"); + p.sym_a_thread_fence = pool_intern_cstr(p.pool, "__atomic_thread_fence"); + p.sym_a_signal_fence = pool_intern_cstr(p.pool, "__atomic_signal_fence"); + + p.scope = scope_new(&p, NULL); + + p.cur = fetch_tok(&p); + + parse_translation_unit(&p); +} diff --git a/lang/c/parse/parse.h b/lang/c/parse/parse.h @@ -0,0 +1,14 @@ +#ifndef CFREE_LANG_C_PARSE_H +#define CFREE_LANG_C_PARSE_H + +#include "decl/decl.h" +#include "parse/cg_public_compat.h" +#include "pp/pp.h" + +typedef struct Debug Debug; + +/* C11 frontend. Reads preprocessed tokens, records C declarations, and drives + * the public CG API for executable code and object data. */ +void parse_c(Compiler*, Pp*, DeclTable*, CG*, Debug*); + +#endif diff --git a/lang/c/parse/parse_expr.c b/lang/c/parse/parse_expr.c @@ -0,0 +1,1883 @@ +/* parse_expr.c — precedence climbing, unary/primary, literal decoding, + * constant evaluation. */ + +#include "parse/parse_priv.h" + +static const Type* ty_int(Parser* p) { return type_prim(p->pool, TY_INT); } +static const Type* ty_size_t(Parser* p) { + return abi_size_type(p->abi, p->pool); +} + + +static CKw ident_kw(const Parser* p, Sym name) { + return ident_kw_inline(p, name); +} + +static int accept_kw(Parser* p, CKw k) { + if (is_kw(p, &p->cur, k)) { + advance(p); + return 1; + } + return 0; +} + +/* ============================================================ + * Literal parsing + * ============================================================ */ + +i64 parse_int_literal(Parser* p, const Tok* t) { + size_t len = 0; + const char* s = pool_str(p->pool, t->spelling, &len); + size_t i = 0; + i64 base = 10; + i64 acc = 0; + if (!s) perr(p, "bad numeric literal"); + if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + base = 16; + i = 2; + } else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) { + base = 2; + i = 2; + } else if (len >= 1 && s[0] == '0') { + base = 8; + i = 1; + } + for (; i < len; ++i) { + int c = (unsigned char)s[i]; + int dv; + if (c == 'u' || c == 'U' || c == 'l' || c == 'L') break; + if (c >= '0' && c <= '9') + dv = c - '0'; + else if (c >= 'a' && c <= 'f') + dv = c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + dv = c - 'A' + 10; + else + perr(p, "bad digit in numeric literal"); + if (dv >= base) perr(p, "digit out of range for base"); + acc = acc * base + dv; + } + return acc; +} + +static const Type* int_literal_type(Parser* p, const Tok* t) { + int u = (t->flags & TF_INT_U) != 0; + int l = (t->flags & TF_INT_L) != 0; + int ll = (t->flags & TF_INT_LL) != 0; + TypeKind k; + if (ll) k = u ? TY_ULLONG : TY_LLONG; + else if (l) k = u ? TY_ULONG : TY_LONG; + else if (u) k = TY_UINT; + else k = TY_INT; + return type_prim(p->pool, k); +} + +static double parse_float_literal(Parser* p, const Tok* t) { + size_t len = 0; + const char* s = pool_str(p->pool, t->spelling, &len); + size_t i = 0; + int is_hex = 0; + double v = 0.0; + int exp = 0; + int dec_exp = 0; + int frac_seen = 0; + if (!s) perr(p, "bad float literal"); + if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + is_hex = 1; + i = 2; + } + while (i < len) { + int c = (unsigned char)s[i]; + int dv; + if (c == '.' || c == 'e' || c == 'E' || c == 'p' || c == 'P' || + c == 'f' || c == 'F' || c == 'l' || c == 'L') + break; + if (c >= '0' && c <= '9') dv = c - '0'; + else if (is_hex && c >= 'a' && c <= 'f') dv = c - 'a' + 10; + else if (is_hex && c >= 'A' && c <= 'F') dv = c - 'A' + 10; + else perr(p, "bad digit in float literal"); + v = v * (is_hex ? 16.0 : 10.0) + (double)dv; + i++; + } + if (i < len && s[i] == '.') { + i++; + while (i < len) { + int c = (unsigned char)s[i]; + int dv; + if (c == 'e' || c == 'E' || c == 'p' || c == 'P' || + c == 'f' || c == 'F' || c == 'l' || c == 'L') + break; + if (c >= '0' && c <= '9') dv = c - '0'; + else if (is_hex && c >= 'a' && c <= 'f') dv = c - 'a' + 10; + else if (is_hex && c >= 'A' && c <= 'F') dv = c - 'A' + 10; + else perr(p, "bad digit in float literal"); + v = v * (is_hex ? 16.0 : 10.0) + (double)dv; + exp -= 1; + frac_seen = 1; + i++; + } + } + (void)frac_seen; + if (i < len && (s[i] == 'e' || s[i] == 'E' || s[i] == 'p' || s[i] == 'P')) { + int neg = 0; + int n = 0; + int hex_exp = (s[i] == 'p' || s[i] == 'P'); + i++; + if (i < len && (s[i] == '+' || s[i] == '-')) { + if (s[i] == '-') neg = 1; + i++; + } + while (i < len) { + int c = (unsigned char)s[i]; + if (c < '0' || c > '9') break; + n = n * 10 + (c - '0'); + i++; + } + dec_exp = neg ? -n : n; + if (hex_exp) { + dec_exp += exp * 4; + exp = 0; + } + } + while (exp < 0) { v /= (is_hex ? 16.0 : 10.0); exp++; } + while (exp > 0) { v *= (is_hex ? 16.0 : 10.0); exp--; } + if (is_hex) { + while (dec_exp < 0) { v /= 2.0; dec_exp++; } + while (dec_exp > 0) { v *= 2.0; dec_exp--; } + } else { + while (dec_exp < 0) { v /= 10.0; dec_exp++; } + while (dec_exp > 0) { v *= 10.0; dec_exp--; } + } + return v; +} + +static const Type* float_literal_type(Parser* p, const Tok* t) { + if (t->flags & TF_FLT_F) return type_prim(p->pool, TY_FLOAT); + if (t->flags & TF_FLT_L) return type_prim(p->pool, TY_LDOUBLE); + return type_prim(p->pool, TY_DOUBLE); +} + +static i64 decode_one_char(Parser* p, const char* s, size_t len, size_t* pi, + SrcLoc loc) { + size_t i = *pi; + i64 v; + int c; + if (i >= len) compiler_panic(p->c, loc, "truncated character literal"); + if (s[i] != '\\') { + v = (unsigned char)s[i++]; + *pi = i; + return v; + } + i++; + if (i >= len) compiler_panic(p->c, loc, "trailing '\\' in literal"); + c = (unsigned char)s[i++]; + switch (c) { + case 'n': v = '\n'; break; + case 't': v = '\t'; break; + case 'r': v = '\r'; break; + case 'b': v = '\b'; break; + case 'f': v = '\f'; break; + case 'v': v = '\v'; break; + case 'a': v = '\a'; break; + case '\\': v = '\\'; break; + case '\'': v = '\''; break; + case '"': v = '"'; break; + case '?': v = '?'; break; + case 'x': { + i64 hex = 0; + int any = 0; + while (i < len) { + int d = (unsigned char)s[i]; + int dv; + if (d >= '0' && d <= '9') dv = d - '0'; + else if (d >= 'a' && d <= 'f') dv = d - 'a' + 10; + else if (d >= 'A' && d <= 'F') dv = d - 'A' + 10; + else break; + hex = hex * 16 + dv; + any = 1; + i++; + } + if (!any) compiler_panic(p->c, loc, "\\x with no hex digits"); + v = hex & 0xff; + break; + } + default: + if (c >= '0' && c <= '7') { + i64 oct = c - '0'; + int n = 1; + while (n < 3 && i < len && s[i] >= '0' && s[i] <= '7') { + oct = oct * 8 + (s[i] - '0'); + i++; + n++; + } + v = oct & 0xff; + } else { + v = c; + } + break; + } + *pi = i; + return v; +} + +i64 decode_char_literal(Parser* p, const Tok* t) { + size_t len = 0; + const char* s = pool_str(p->pool, t->spelling, &len); + size_t i = 0; + i64 v; + if (!s) perr(p, "bad char literal"); + if (t->flags & TF_STR_U8) i = 2; + else if (t->flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) i = 1; + if (i >= len || s[i] != '\'') perr(p, "malformed character literal"); + i++; + if (i >= len || s[i] == '\'') perr(p, "empty character literal"); + v = decode_one_char(p, s, len, &i, t->loc); + if (i >= len || s[i] != '\'') { + perr(p, "multi-character constants are not supported"); + } + return v; +} + +u8* decode_string_literal(Parser* p, const Tok* t, size_t* nlen_out) { + size_t len = 0; + const char* s = pool_str(p->pool, t->spelling, &len); + size_t i = 0; + Heap* h = p->c->env->heap; + u8* buf; + size_t k = 0; + if (!s) perr(p, "bad string literal"); + if (t->flags & TF_STR_U8) i = 2; + else if (t->flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) i = 1; + if (i >= len || s[i] != '"') perr(p, "malformed string literal"); + i++; + buf = (u8*)h->alloc(h, len + 1, 1); + if (!buf) perr(p, "out of memory in string literal"); + while (i < len && s[i] != '"') { + i64 ch = decode_one_char(p, s, len, &i, t->loc); + buf[k++] = (u8)ch; + } + buf[k++] = 0; + *nlen_out = k; + return buf; +} + +CfreeCgSym emit_string_to_rodata(Parser* p, const u8* bytes, size_t n) { + const Type* arr_ty = type_array(p->pool, type_prim(p->pool, TY_CHAR), (u32)n, 0); + return cfree_cg_const_data(p->cg, bytes, n, 1u, pcg_tid(p->c, arr_ty)); +} + +/* ============================================================ + * Constant expression evaluator (cexpr_*) + * ============================================================ */ + +static i64 cexpr_unary(Parser* p, SrcLoc loc); +static const Type* offsetof_designator(Parser* p, const Type* base, u32* off); + +static i64 cexpr_mul(Parser* p, SrcLoc loc) { + i64 v = cexpr_unary(p, loc); + for (;;) { + if (accept_punct(p, '*')) v = v * cexpr_unary(p, loc); + else if (accept_punct(p, '/')) { + i64 r = cexpr_unary(p, loc); + if (r == 0) compiler_panic(p->c, loc, "division by zero in constant"); + v = v / r; + } else if (accept_punct(p, '%')) { + i64 r = cexpr_unary(p, loc); + if (r == 0) compiler_panic(p->c, loc, "modulo by zero in constant"); + v = v % r; + } else break; + } + return v; +} +static i64 cexpr_add(Parser* p, SrcLoc loc) { + i64 v = cexpr_mul(p, loc); + for (;;) { + if (accept_punct(p, '+')) v = v + cexpr_mul(p, loc); + else if (accept_punct(p, '-')) v = v - cexpr_mul(p, loc); + else break; + } + return v; +} +static i64 cexpr_shift(Parser* p, SrcLoc loc) { + i64 v = cexpr_add(p, loc); + for (;;) { + if (accept_punct(p, P_SHL)) v = v << cexpr_add(p, loc); + else if (accept_punct(p, P_SHR)) v = v >> cexpr_add(p, loc); + else break; + } + return v; +} +static i64 cexpr_rel(Parser* p, SrcLoc loc) { + i64 v = cexpr_shift(p, loc); + for (;;) { + if (accept_punct(p, P_LE)) v = v <= cexpr_shift(p, loc); + else if (accept_punct(p, P_GE)) v = v >= cexpr_shift(p, loc); + else if (is_punct(&p->cur, '<')) { + advance(p); v = v < cexpr_shift(p, loc); + } else if (is_punct(&p->cur, '>')) { + advance(p); v = v > cexpr_shift(p, loc); + } else break; + } + return v; +} +static i64 cexpr_eq(Parser* p, SrcLoc loc) { + i64 v = cexpr_rel(p, loc); + for (;;) { + if (accept_punct(p, P_EQ)) v = (v == cexpr_rel(p, loc)); + else if (accept_punct(p, P_NE)) v = (v != cexpr_rel(p, loc)); + else break; + } + return v; +} +static i64 cexpr_band(Parser* p, SrcLoc loc) { + i64 v = cexpr_eq(p, loc); + while (is_punct(&p->cur, '&') && !is_punct(&p->cur, P_AND)) { + advance(p); + v = v & cexpr_eq(p, loc); + } + return v; +} +static i64 cexpr_bxor(Parser* p, SrcLoc loc) { + i64 v = cexpr_band(p, loc); + while (accept_punct(p, '^')) v = v ^ cexpr_band(p, loc); + return v; +} +static i64 cexpr_bor(Parser* p, SrcLoc loc) { + i64 v = cexpr_bxor(p, loc); + while (is_punct(&p->cur, '|') && !is_punct(&p->cur, P_OR)) { + advance(p); + v = v | cexpr_bxor(p, loc); + } + return v; +} + +static i64 cexpr_unary(Parser* p, SrcLoc loc) { + if (accept_punct(p, '+')) return cexpr_unary(p, loc); + if (accept_punct(p, '-')) return -cexpr_unary(p, loc); + if (accept_punct(p, '~')) return ~cexpr_unary(p, loc); + if (accept_punct(p, '!')) return cexpr_unary(p, loc) ? 0 : 1; + if (accept_kw(p, KW_SIZEOF)) { + if (is_punct(&p->cur, '(')) { + Tok n = peek1(p); + if (starts_type_name(p, &n)) { + advance(p); + { + const Type* t = parse_type_name(p); + expect_punct(p, ')', "')' after sizeof type-name"); + return (i64)abi_sizeof(p->abi, t); + } + } + } + parse_unary(p); + { + const Type* ty = cg_top_type(p->cg); + i64 sz = (i64)abi_sizeof(p->abi, ty); + cg_drop(p->cg); + return sz; + } + } + if (accept_kw(p, KW_ALIGNOF)) { + if (is_punct(&p->cur, '(')) { + Tok n = peek1(p); + if (starts_type_name(p, &n)) { + advance(p); + { + const Type* t = parse_type_name(p); + expect_punct(p, ')', "')' after _Alignof type-name"); + return (i64)abi_alignof(p->abi, t); + } + } + } + parse_unary(p); + { + const Type* ty = cg_top_type(p->cg); + i64 al = (i64)abi_alignof(p->abi, ty); + cg_drop(p->cg); + return al; + } + } + if (accept_punct(p, '(')) { + if (starts_type_name(p, &p->cur)) { + const Type* t = parse_type_name(p); + expect_punct(p, ')', "')' after cast type-name"); + { + i64 v = cexpr_unary(p, loc); + u32 sz = abi_sizeof(p->abi, t); + int is_signed = abi_type_info(p->abi, t).signed_; + if (sz < 8) { + u64 mask = (1ull << (sz * 8u)) - 1ull; + u64 uv = (u64)v & mask; + if (is_signed) { + u64 sign = 1ull << (sz * 8u - 1u); + v = (i64)((uv ^ sign) - sign); + } else { + v = (i64)uv; + } + } + return v; + } + } + { + i64 v = cexpr_bor(p, loc); + expect_punct(p, ')', "')' in constant expression"); + return v; + } + } + if (p->cur.kind == TOK_NUM) { + i64 v = parse_int_literal(p, &p->cur); + advance(p); + return v; + } + if (p->cur.kind == TOK_CHR) { + i64 v = decode_char_literal(p, &p->cur); + advance(p); + return v; + } + if (p->cur.kind == TOK_IDENT) { + Sym name = p->cur.v.ident; + if (name == p->sym_b_offsetof) { + u32 off = 0; + const Type* root; + advance(p); /* IDENT */ + expect_punct(p, '(', "'(' after __builtin_offsetof"); + root = parse_type_name(p); + expect_punct(p, ',', "',' in __builtin_offsetof"); + (void)offsetof_designator(p, root, &off); + expect_punct(p, ')', "')' after __builtin_offsetof"); + return (i64)off; + } + { + SymEntry* e = scope_lookup(p, name); + if (e && e->kind == SEK_ENUM_CST) { + advance(p); + return e->v.enum_value; + } + } + compiler_panic(p->c, loc, "non-constant identifier in constant expression"); + } + compiler_panic(p->c, loc, "expected constant expression"); +} + +i64 eval_const_int(Parser* p, SrcLoc loc) { return cexpr_bor(p, loc); } + +/* ============================================================ + * to_rvalue + * ============================================================ */ + +void to_rvalue(Parser* p) { + const Type* t = cg_top_type(p->cg); + if (t) { + if (t->kind == TY_ARRAY) { + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, t->arr.elem)); + return; + } + if (t->kind == TY_FUNC) { + cg_addr(p->cg); + return; + } + if (t->kind == TY_STRUCT || t->kind == TY_UNION) return; + } + cg_load(p->cg); +} + +/* ============================================================ + * coerce_top_to_lvalue (used by assignment / initializers) + * ============================================================ */ + +void coerce_top_to_lvalue(Parser* p) { + const Type* src = cg_top_type(p->cg); + const Type* dst = cg_top2_type(p->cg); + if (!src || !dst || src == dst) return; + if (type_is_arith(src) && type_is_arith(dst)) { + cg_convert(p->cg, dst); + } +} + +/* ============================================================ + * Builtin call handling + * ============================================================ */ + +static const Type* offsetof_designator(Parser* p, const Type* base, u32* off) { + const Type* cur = base; + if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected member name in __builtin_offsetof"); + } + for (;;) { + if (cur->kind == TY_STRUCT || cur->kind == TY_UNION) { + Sym mname = p->cur.v.ident; + const Type* mty = NULL; + u32 moff = 0; + const Field* mf = NULL; + /* find_field is static in parse_type.c; we need it here. + * We call abi_record_layout directly inline. */ + const ABIRecordLayout* L = abi_record_layout(p->abi, cur); + if (!L) perr(p, "no such member in __builtin_offsetof"); + int found = 0; + for (u16 i = 0; i < cur->rec.nfields; ++i) { + const Field* f = &cur->rec.fields[i]; + if (f->name == mname && mname != 0) { + mty = f->type; + moff = L->fields[i].offset; + mf = f; + found = 1; + break; + } + } + (void)mf; + if (!found) perr(p, "no such member in __builtin_offsetof"); + advance(p); + *off += moff; + cur = mty; + } else if (cur->kind == TY_ARRAY) { + /* fall through to bracket branch */ + } else { + perr(p, "__builtin_offsetof step into non-aggregate"); + } + if (is_punct(&p->cur, '.')) { + advance(p); + if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected member name after '.'"); + } + continue; + } + if (is_punct(&p->cur, '[')) { + advance(p); + i64 idx = eval_const_int(p, p->cur.loc); + expect_punct(p, ']', "']' in __builtin_offsetof"); + if (cur->kind != TY_ARRAY) { + perr(p, "__builtin_offsetof '[' on non-array"); + } + *off += (u32)((i64)abi_sizeof(p->abi, cur->arr.elem) * idx); + cur = cur->arr.elem; + continue; + } + break; + } + return cur; +} + +static int try_parse_builtin_call(Parser* p) { + Sym name = p->cur.v.ident; + SrcLoc loc = p->cur.loc; + + /* `__builtin_mem{cpy,move,cmp,set}` are GCC/Clang's compiler-inlinable + * aliases for the libc functions. cfree's INTRIN_MEMCPY/MEMMOVE + * backend paths only handle constant byte counts, but the rt code + * calls them with runtime sizes. Rewrite each builtin into a plain + * call and let the normal function-call path handle it. The caller + * (parse_primary) reports a clean "undeclared identifier" if the TU + * forgot to declare the underlying libc function. */ + if (name == p->sym_b_memcpy || name == p->sym_b_memmove || + name == p->sym_b_memcmp || name == p->sym_b_memset) { + const char* libname = (name == p->sym_b_memcpy) ? "memcpy" + : (name == p->sym_b_memmove) ? "memmove" + : (name == p->sym_b_memcmp) ? "memcmp" + : "memset"; + p->cur.v.ident = pool_intern_cstr(p->pool, libname); + return 0; + } + + if (name != p->sym_b_alloca && name != p->sym_b_ctz && + name != p->sym_b_clz && name != p->sym_b_clzl && + name != p->sym_b_clzll && name != p->sym_b_trap && + name != p->sym_b_unreachable && + name != p->sym_b_expect && + name != p->sym_b_offsetof && name != p->sym_b_va_start && + name != p->sym_b_va_arg && name != p->sym_b_va_end && + name != p->sym_b_va_copy && name != p->sym_a_load_n && + name != p->sym_a_store_n && name != p->sym_a_exchange_n && + name != p->sym_a_fetch_add && name != p->sym_a_fetch_sub && + name != p->sym_a_fetch_and && name != p->sym_a_fetch_or && + name != p->sym_a_fetch_xor && name != p->sym_a_cas_n && + name != p->sym_a_thread_fence && name != p->sym_a_signal_fence) { + return 0; + } + advance(p); /* IDENT */ + expect_punct(p, '(', "'(' after builtin"); + + if (name == p->sym_b_offsetof) { + const Type* root = parse_type_name(p); + expect_punct(p, ',', "',' in __builtin_offsetof"); + u32 off = 0; + (void)offsetof_designator(p, root, &off); + expect_punct(p, ')', "')' after __builtin_offsetof"); + cg_push_int(p->cg, (i64)off, ty_size_t(p)); + return 1; + } + + if (name == p->sym_b_expect) { + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ',', "',' in __builtin_expect"); + parse_assign_expr(p); + cg_drop(p->cg); + expect_punct(p, ')', "')' after __builtin_expect"); + return 1; + } + + if (name == p->sym_b_alloca) { + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ')', "')' after __builtin_alloca"); + cg_set_loc(p->cg, loc); + cg_alloca(p->cg); + return 1; + } + + if (name == p->sym_b_ctz) { + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ')', "')' after __builtin_ctz"); + cg_set_loc(p->cg, loc); + cg_intrinsic_unary_to_int(p->cg, INTRIN_CTZ); + return 1; + } + + if (name == p->sym_b_clz || name == p->sym_b_clzl || + name == p->sym_b_clzll) { + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ')', "')' after __builtin_clz"); + cg_set_loc(p->cg, loc); + /* The operand carries its own type, which drives the sf bit on + * aarch64 / REX.W on x64 / sf on rv64. Whether the caller used the + * `l` / `ll` suffix only changes the C-level type the user wrote; + * cfree picks the instruction width from the value type. */ + cg_intrinsic_unary_to_int(p->cg, INTRIN_CLZ); + return 1; + } + + if (name == p->sym_b_trap || name == p->sym_b_unreachable) { + expect_punct(p, ')', "')' after __builtin_trap/unreachable"); + cg_set_loc(p->cg, loc); + cg_intrinsic_void(p->cg, + name == p->sym_b_trap ? INTRIN_TRAP : INTRIN_UNREACHABLE); + /* Both are noreturn at the C level. Push a dummy `int 0` so callers + * that consume an expression value (e.g. ternary, comma) don't see + * an empty stack — the dead value will be folded out. */ + cg_push_int(p->cg, 0, ty_int(p)); + return 1; + } + + if (name == p->sym_b_va_start) { + parse_assign_expr(p); + cg_addr(p->cg); + expect_punct(p, ',', "',' in __builtin_va_start"); + parse_assign_expr(p); + cg_drop(p->cg); + expect_punct(p, ')', "')' after __builtin_va_start"); + cg_set_loc(p->cg, loc); + cg_va_start_(p->cg); + cg_push_int(p->cg, 0, ty_int(p)); + return 1; + } + + if (name == p->sym_b_va_end) { + parse_assign_expr(p); + cg_addr(p->cg); + expect_punct(p, ')', "')' after __builtin_va_end"); + cg_set_loc(p->cg, loc); + cg_va_end_(p->cg); + cg_push_int(p->cg, 0, ty_int(p)); + return 1; + } + + if (name == p->sym_b_va_copy) { + parse_assign_expr(p); + cg_addr(p->cg); + expect_punct(p, ',', "',' in __builtin_va_copy"); + parse_assign_expr(p); + cg_addr(p->cg); + expect_punct(p, ')', "')' after __builtin_va_copy"); + cg_set_loc(p->cg, loc); + cg_va_copy_(p->cg); + cg_push_int(p->cg, 0, ty_int(p)); + return 1; + } + + if (name == p->sym_b_va_arg) { + parse_assign_expr(p); + cg_addr(p->cg); + expect_punct(p, ',', "',' in __builtin_va_arg"); + const Type* ty = parse_type_name(p); + expect_punct(p, ')', "')' after __builtin_va_arg"); + cg_set_loc(p->cg, loc); + cg_va_arg_(p->cg, ty); + return 1; + } + + if (name == p->sym_a_load_n) { + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ',', "',' in __atomic_load_n"); + i64 ord = eval_const_int(p, p->cur.loc); + expect_punct(p, ')', "')' after __atomic_load_n"); + cg_set_loc(p->cg, loc); + cg_atomic_load(p->cg, (MemOrder)ord); + return 1; + } + + if (name == p->sym_a_store_n) { + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ',', "',' in __atomic_store_n"); + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ',', "',' in __atomic_store_n"); + i64 ord = eval_const_int(p, p->cur.loc); + expect_punct(p, ')', "')' after __atomic_store_n"); + cg_set_loc(p->cg, loc); + cg_atomic_store(p->cg, (MemOrder)ord); + cg_push_int(p->cg, 0, ty_int(p)); + return 1; + } + + if (name == p->sym_a_thread_fence || name == p->sym_a_signal_fence) { + i64 ord = eval_const_int(p, p->cur.loc); + expect_punct(p, ')', "')' after atomic fence"); + cg_set_loc(p->cg, loc); + cg_fence(p->cg, (MemOrder)ord); + cg_push_int(p->cg, 0, ty_int(p)); + return 1; + } + + if (name == p->sym_a_cas_n) { + parse_assign_expr(p); to_rvalue(p); /* ptr */ + expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); + + parse_assign_expr(p); to_rvalue(p); /* &expected */ + const Type* eptr_ty = cg_top_type(p->cg); + if (!eptr_ty || eptr_ty->kind != TY_PTR) { + perr(p, "__atomic_compare_exchange_n: arg 2 must be a pointer"); + } + const Type* val_ty = eptr_ty->ptr.pointee; + + FrameSlotDesc fsd; memset(&fsd, 0, sizeof fsd); + fsd.type = eptr_ty; fsd.size = 8; fsd.align = 8; fsd.kind = FS_LOCAL; + FrameSlot eslot = cg_local(p->cg, &fsd); + cg_push_local_typed(p->cg, eslot, eptr_ty); + cg_swap(p->cg); + cg_store(p->cg); cg_drop(p->cg); + + cg_push_local_typed(p->cg, eslot, eptr_ty); + cg_load(p->cg); + cg_deref(p->cg, val_ty); + cg_load(p->cg); + + expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); + parse_assign_expr(p); to_rvalue(p); /* desired */ + expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); + + (void)eval_const_int(p, p->cur.loc); /* weak */ + expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); + i64 succ = eval_const_int(p, p->cur.loc); + expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); + i64 fail = eval_const_int(p, p->cur.loc); + expect_punct(p, ')', "')' after __atomic_compare_exchange_n"); + + cg_set_loc(p->cg, loc); + cg_atomic_cas(p->cg, (MemOrder)succ, (MemOrder)fail); + + const Type* ok_ty = cg_top_type(p->cg); + FrameSlotDesc okd; memset(&okd, 0, sizeof okd); + okd.type = ok_ty; okd.size = 4; okd.align = 4; okd.kind = FS_LOCAL; + FrameSlot okslot = cg_local(p->cg, &okd); + cg_push_local_typed(p->cg, okslot, ok_ty); + cg_swap(p->cg); cg_store(p->cg); cg_drop(p->cg); + + FrameSlotDesc pd; memset(&pd, 0, sizeof pd); + pd.type = val_ty; + pd.size = abi_sizeof(p->abi, val_ty); + pd.align = abi_alignof(p->abi, val_ty); + pd.kind = FS_LOCAL; + FrameSlot pslot = cg_local(p->cg, &pd); + cg_push_local_typed(p->cg, pslot, val_ty); + cg_swap(p->cg); cg_store(p->cg); cg_drop(p->cg); + + cg_push_local_typed(p->cg, okslot, ok_ty); + cg_load(p->cg); + CGLabel L_done = cg_label_new(p->cg); + cg_branch_true(p->cg, L_done); + cg_push_local_typed(p->cg, eslot, eptr_ty); + cg_load(p->cg); + cg_deref(p->cg, val_ty); + cg_push_local_typed(p->cg, pslot, val_ty); + cg_load(p->cg); + cg_store(p->cg); cg_drop(p->cg); + cg_label_place(p->cg, L_done); + + cg_push_local_typed(p->cg, okslot, ok_ty); + cg_load(p->cg); + return 1; + } + + AtomicOp op; + if (name == p->sym_a_exchange_n) op = AO_XCHG; + else if (name == p->sym_a_fetch_add) op = AO_ADD; + else if (name == p->sym_a_fetch_sub) op = AO_SUB; + else if (name == p->sym_a_fetch_and) op = AO_AND; + else if (name == p->sym_a_fetch_or) op = AO_OR; + else if (name == p->sym_a_fetch_xor) op = AO_XOR; + else { perr(p, "internal: unhandled builtin"); } + + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ',', "',' in atomic builtin"); + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ',', "',' in atomic builtin"); + i64 ord = eval_const_int(p, p->cur.loc); + expect_punct(p, ')', "')' after atomic builtin"); + cg_set_loc(p->cg, loc); + cg_atomic_rmw(p->cg, op, (MemOrder)ord); + return 1; +} + +/* ============================================================ + * parse_primary, parse_postfix, parse_unary + * ============================================================ */ + +static void parse_primary(Parser* p) { + Tok t = p->cur; + if (t.kind == TOK_NUM) { + i64 v = parse_int_literal(p, &t); + const Type* lty = int_literal_type(p, &t); + advance(p); + cg_push_int(p->cg, v, lty); + return; + } + if (t.kind == TOK_FLT) { + double v = parse_float_literal(p, &t); + const Type* lty = float_literal_type(p, &t); + advance(p); + cg_push_float(p->cg, v, lty); + return; + } + if (is_punct(&t, '(')) { + advance(p); + parse_expr(p); + expect_punct(p, ')', "')'"); + return; + } + if (t.kind == TOK_IDENT) { + SymEntry* e; + if (ident_kw(p, t.v.ident) != KW_NONE) { + perr(p, "unexpected keyword in expression"); + } + { + Tok n = peek1(p); + if (is_punct(&n, '(') && try_parse_builtin_call(p)) return; + } + /* try_parse_builtin_call may rewrite the current ident in-place + * (e.g. __builtin_memcpy → memcpy) and return 0, asking us to + * resume normal lookup with the rewritten name. */ + t = p->cur; + /* C99 §6.4.2.2: `__func__` inside a function-body acts as + * static const char __func__[] = "<function-name>"; + * GCC also exposes `__FUNCTION__` and `__PRETTY_FUNCTION__` with + * the same value. We synthesize the string lazily — the symbol + * lives in .rodata and the resulting type is `char[N+1]` (with the + * trailing NUL). */ + if (t.v.ident == p->sym_func || t.v.ident == p->sym_func_gcc || + t.v.ident == p->sym_pretty_func_gcc) { + if (p->cur_func_name == 0) { + compiler_panic(p->c, t.loc, "'%s' used outside a function", + t.v.ident == p->sym_func ? "__func__" + : t.v.ident == p->sym_func_gcc ? "__FUNCTION__" + : "__PRETTY_FUNCTION__"); + } + size_t nlen = 0; + const char* fn_name = pool_str(p->pool, p->cur_func_name, &nlen); + Heap* h = p->c->env->heap; + u8* bytes = (u8*)h->alloc(h, nlen + 1u, 1u); + for (size_t i = 0; i < nlen; ++i) bytes[i] = (u8)fn_name[i]; + bytes[nlen] = 0; + ObjSymId sym = emit_string_to_rodata(p, bytes, nlen + 1u); + h->free(h, bytes, 0); + advance(p); + const Type* char_ty = type_prim(p->pool, TY_CHAR); + const Type* arr_ty = type_array(p->pool, char_ty, (u32)(nlen + 1u), 0); + cg_push_global(p->cg, sym, arr_ty); + return; + } + e = scope_lookup(p, t.v.ident); + if (!e) { + size_t nlen = 0; + const char* nm = pool_str(p->pool, t.v.ident, &nlen); + compiler_panic(p->c, t.loc, "undeclared identifier '%.*s'", (int)nlen, + nm ? nm : "?"); + } + advance(p); + switch (e->kind) { + case SEK_LOCAL: + cg_push_local_typed(p->cg, e->v.slot, e->type); + if (e->vla_byte_slot != FRAME_SLOT_NONE) { + p->last_pushed_vla_slot = e->vla_byte_slot; + } + return; + case SEK_GLOBAL: + case SEK_FUNC: + cg_push_global(p->cg, e->v.sym, e->type); + return; + case SEK_ENUM_CST: + cg_push_int(p->cg, e->v.enum_value, e->type); + return; + case SEK_TYPEDEF: + default: + perr(p, "identifier is not a value"); + } + } + if (t.kind == TOK_CHR) { + i64 v = decode_char_literal(p, &t); + advance(p); + cg_push_int(p->cg, v, ty_int(p)); + return; + } + if (t.kind == TOK_STR) { + size_t n = 0; + u8* bytes = decode_string_literal(p, &t, &n); + ObjSymId sym = emit_string_to_rodata(p, bytes, n); + p->c->env->heap->free(p->c->env->heap, bytes, 0); + advance(p); + { + const Type* char_ty = type_prim(p->pool, TY_CHAR); + const Type* arr_ty = type_array(p->pool, char_ty, (u32)n, 0); + cg_push_global(p->cg, sym, arr_ty); + } + return; + } + perr(p, "expected expression"); +} + +static void parse_postfix(Parser* p) { + parse_primary(p); + for (;;) { + Tok t = p->cur; + if (is_punct(&t, P_INC)) { + advance(p); + cg_inc_dec(p->cg, BO_IADD, /*post=*/1); + continue; + } + if (is_punct(&t, P_DEC)) { + advance(p); + cg_inc_dec(p->cg, BO_ISUB, /*post=*/1); + continue; + } + if (is_punct(&t, '(')) { + const Type* top = cg_top_type(p->cg); + const Type* fn_type; + if (top && top->kind == TY_FUNC) { + fn_type = top; + } else if (top && top->kind == TY_PTR && top->ptr.pointee && + top->ptr.pointee->kind == TY_FUNC) { + fn_type = top->ptr.pointee; + cg_load(p->cg); + } else { + perr(p, "called object is not a function"); + } + advance(p); /* '(' */ + u32 nargs = 0; + if (!is_punct(&p->cur, ')')) { + for (;;) { + parse_assign_expr(p); + to_rvalue(p); + ++nargs; + if (!accept_punct(p, ',')) break; + } + } + expect_punct(p, ')', "')' after argument list"); + if (fn_type->fn.nparams != nargs && !fn_type->fn.variadic) { + perr(p, "wrong number of arguments"); + } + if (fn_type->fn.variadic && nargs < fn_type->fn.nparams) { + perr(p, "too few arguments to variadic function"); + } + cg_call(p->cg, nargs, fn_type); + if (fn_type->fn.ret && fn_type->fn.ret->kind == TY_VOID) { + cg_push_int(p->cg, 0, ty_int(p)); + } + continue; + } + if (is_punct(&t, '[')) { + const Type* lt0 = cg_top_type(p->cg); + advance(p); /* '[' */ + if (lt0 && lt0->kind == TY_ARRAY) { + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, lt0->arr.elem)); + } else if (lt0 && lt0->kind == TY_PTR) { + cg_load(p->cg); + } + parse_expr(p); + { + const Type* it0 = cg_top_type(p->cg); + if (it0 && it0->kind == TY_ARRAY) { + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, it0->arr.elem)); + } else { + to_rvalue(p); + } + } + expect_punct(p, ']', "']' after subscript"); + { + const Type* lt = cg_top2_type(p->cg); + const Type* it = cg_top_type(p->cg); + const Type* elem; + if (lt && lt->kind == TY_PTR && type_is_int(it)) { + elem = lt->ptr.pointee; + } else if (it && it->kind == TY_PTR && type_is_int(lt)) { + cg_swap(p->cg); + elem = it->ptr.pointee; + } else { + perr(p, "invalid subscript: needs one pointer and one integer"); + } + if (!elem) perr(p, "subscript on incomplete pointee"); + u32 esz = abi_sizeof(p->abi, elem); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_binop(p->cg, BO_IADD); + cg_deref(p->cg, elem); + } + continue; + } + if (is_punct(&t, '.')) { + const Type* lt = cg_top_type(p->cg); + Sym mname; + const Type* mty = NULL; + u32 moff = 0; + const Field* mf = NULL; + advance(p); /* '.' */ + if (!lt || (lt->kind != TY_STRUCT && lt->kind != TY_UNION)) { + perr(p, "request for member in something that is not a struct or union"); + } + if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected member name after '.'"); + } + mname = p->cur.v.ident; + advance(p); + { + const ABIRecordLayout* L = abi_record_layout(p->abi, lt); + if (!L) perr(p, "no such member"); + int found = 0; + for (u16 i = 0; i < lt->rec.nfields; ++i) { + const Field* f = &lt->rec.fields[i]; + if (f->name == mname && mname != 0) { + mty = f->type; + moff = L->fields[i].offset; + mf = f; + found = 1; + break; + } + /* anonymous member flattening */ + if ((f->flags & FIELD_ANON) && (f->type->kind == TY_STRUCT || + f->type->kind == TY_UNION)) { + const Type* inner_ty = NULL; + u32 inner_off = 0; + const Field* inner_f = NULL; + const ABIRecordLayout* IL = abi_record_layout(p->abi, f->type); + if (IL) { + for (u16 j = 0; j < f->type->rec.nfields; ++j) { + const Field* ff = &f->type->rec.fields[j]; + if (ff->name == mname && mname != 0) { + inner_ty = ff->type; + inner_off = IL->fields[j].offset; + inner_f = ff; + break; + } + } + } + if (inner_ty) { + mty = inner_ty; + moff = L->fields[i].offset + inner_off; + mf = inner_f; + found = 1; + break; + } + } + } + if (!found) perr(p, "no such member"); + } + (void)mf; + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, mty)); + if (moff > 0) { + cg_push_int(p->cg, (i64)moff, ty_size_t(p)); + cg_binop(p->cg, BO_IADD); + } + cg_deref(p->cg, mty); + continue; + } + if (is_punct(&t, P_ARROW)) { + const Type* lt0; + const Type* rec_ty; + Sym mname; + const Type* mty = NULL; + u32 moff = 0; + const Field* mf = NULL; + advance(p); /* `->` */ + to_rvalue(p); + lt0 = cg_top_type(p->cg); + if (!lt0 || lt0->kind != TY_PTR) { + perr(p, "'->' requires a pointer operand"); + } + rec_ty = lt0->ptr.pointee; + if (!rec_ty || (rec_ty->kind != TY_STRUCT && rec_ty->kind != TY_UNION)) { + perr(p, "'->' on pointer to non-struct/union"); + } + if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected member name after '->'"); + } + mname = p->cur.v.ident; + advance(p); + { + const ABIRecordLayout* L = abi_record_layout(p->abi, rec_ty); + if (!L) perr(p, "no such member"); + int found = 0; + for (u16 i = 0; i < rec_ty->rec.nfields; ++i) { + const Field* f = &rec_ty->rec.fields[i]; + if (f->name == mname && mname != 0) { + mty = f->type; + moff = L->fields[i].offset; + mf = f; + found = 1; + break; + } + if ((f->flags & FIELD_ANON) && (f->type->kind == TY_STRUCT || + f->type->kind == TY_UNION)) { + const ABIRecordLayout* IL = abi_record_layout(p->abi, f->type); + if (IL) { + for (u16 j = 0; j < f->type->rec.nfields; ++j) { + const Field* ff = &f->type->rec.fields[j]; + if (ff->name == mname && mname != 0) { + mty = ff->type; + moff = L->fields[i].offset + IL->fields[j].offset; + mf = ff; + found = 1; + break; + } + } + } + if (found) break; + } + } + if (!found) perr(p, "no such member"); + } + (void)mf; + if (moff > 0) { + cg_push_int(p->cg, (i64)moff, ty_size_t(p)); + cg_binop(p->cg, BO_IADD); + } + cg_deref(p->cg, mty); + continue; + } + break; + } +} + +void parse_unary(Parser* p) { + Tok t = p->cur; + if (is_punct(&t, '(')) { + Tok n = peek1(p); + if (starts_type_name(p, &n)) { + const Type* dst; + const Type* src; + advance(p); /* '(' */ + dst = parse_type_name(p); + expect_punct(p, ')', "')' after type-name"); + if (is_punct(&p->cur, '{')) { + FrameSlotDesc fsd; + FrameSlot slot; + const Type* lit_ty = dst; + if (lit_ty && lit_ty->kind == TY_ARRAY && lit_ty->arr.incomplete) { + lit_ty = complete_incomplete_array(p, lit_ty); + } + memset(&fsd, 0, sizeof fsd); + fsd.type = lit_ty; + fsd.size = abi_sizeof(p->abi, lit_ty); + fsd.align = abi_alignof(p->abi, lit_ty); + fsd.kind = FS_LOCAL; + fsd.flags = FSF_NONE; + slot = cg_local(p->cg, &fsd); + if (lit_ty && (lit_ty->kind == TY_ARRAY || lit_ty->kind == TY_STRUCT || + lit_ty->kind == TY_UNION)) { + init_at(p, slot, lit_ty, 0, lit_ty); + } else { + init_at(p, slot, lit_ty, 0, lit_ty); + } + cg_push_local_typed(p->cg, slot, lit_ty); + return; + } + parse_unary(p); + to_rvalue(p); + if (dst && dst->kind == TY_VOID) { + cg_drop(p->cg); + cg_push_int(p->cg, 0, ty_int(p)); + return; + } + src = cg_top_type(p->cg); + if (src && src->kind == TY_PTR && dst->kind == TY_PTR) { + cg_convert(p->cg, dst); + return; + } + cg_convert(p->cg, dst); + return; + } + } + if (is_punct(&t, '+')) { + advance(p); + parse_unary(p); + to_rvalue(p); + return; + } + if (is_punct(&t, '-')) { + advance(p); + parse_unary(p); + to_rvalue(p); + cg_unop(p->cg, UO_NEG); + return; + } + if (is_punct(&t, '!')) { + advance(p); + parse_unary(p); + to_rvalue(p); + cg_push_int(p->cg, 0, ty_int(p)); + cg_cmp(p->cg, CMP_EQ); + return; + } + if (is_punct(&t, '~')) { + advance(p); + parse_unary(p); + to_rvalue(p); + cg_unop(p->cg, UO_BNOT); + return; + } + if (is_punct(&t, '&')) { + advance(p); + parse_unary(p); + cg_addr(p->cg); + return; + } + if (is_punct(&t, '*')) { + const Type* pty; + const Type* pointee; + advance(p); + parse_unary(p); + to_rvalue(p); + pty = cg_top_type(p->cg); + if (!pty || pty->kind != TY_PTR) { + perr(p, "indirection requires pointer operand"); + } + pointee = pty->ptr.pointee; + if (pointee && pointee->kind == TY_VOID) { + perr(p, "dereferencing pointer to incomplete type"); + } + cg_deref(p->cg, pointee); + return; + } + if (is_punct(&t, P_INC) || is_punct(&t, P_DEC)) { + BinOp bop = is_punct(&t, P_INC) ? BO_IADD : BO_ISUB; + advance(p); + parse_unary(p); + cg_inc_dec(p->cg, bop, /*post=*/0); + return; + } + if (is_kw(p, &t, KW_SIZEOF)) { + const Type* ty = NULL; + FrameSlot vla_slot = FRAME_SLOT_NONE; + advance(p); + if (is_punct(&p->cur, '(')) { + Tok n = peek1(p); + if (starts_type_name(p, &n)) { + advance(p); + ty = parse_type_name(p); + expect_punct(p, ')', "')'"); + } else { + p->last_pushed_vla_slot = FRAME_SLOT_NONE; + parse_unary(p); + ty = cg_top_type(p->cg); + vla_slot = p->last_pushed_vla_slot; + cg_drop(p->cg); + } + } else { + p->last_pushed_vla_slot = FRAME_SLOT_NONE; + parse_unary(p); + ty = cg_top_type(p->cg); + vla_slot = p->last_pushed_vla_slot; + cg_drop(p->cg); + } + if (vla_slot != FRAME_SLOT_NONE) { + cg_push_local_typed(p->cg, vla_slot, ty_size_t(p)); + cg_load(p->cg); + } else { + cg_push_int(p->cg, (i64)abi_sizeof(p->abi, ty), ty_size_t(p)); + } + return; + } + if (is_kw(p, &t, KW_GENERIC)) { + advance(p); + expect_punct(p, '(', "'('"); + parse_assign_expr(p); + to_rvalue(p); + const Type* ctl_ty = cg_top_type(p->cg); + cg_drop(p->cg); + expect_punct(p, ',', "','"); + int emitted = 0; + Tok* default_buf = NULL; + u32 default_len = 0; + for (;;) { + const Type* assoc_ty = NULL; + int is_default = 0; + if (is_kw(p, &p->cur, KW_DEFAULT)) { + advance(p); + is_default = 1; + } else { + assoc_ty = parse_type_name(p); + } + expect_punct(p, ':', "':' in _Generic association"); + int take = 0; + if (!emitted && !is_default && ctl_ty && assoc_ty && + ctl_ty->kind == assoc_ty->kind) { + take = 1; + } + if (take) { + parse_assign_expr(p); + emitted = 1; + } else if (is_default && !default_buf) { + u32 cap = 16; + Tok* buf = arena_array(p->c->tu, Tok, cap); + u32 len = 0; + int paren_depth = 0, brack_depth = 0, brace_depth = 0; + while (p->cur.kind != TOK_EOF) { + if (paren_depth == 0 && brack_depth == 0 && brace_depth == 0) { + if (is_punct(&p->cur, ',') || is_punct(&p->cur, ')')) break; + } + if (len == cap) { + u32 new_cap = cap * 2; + Tok* nv = arena_array(p->c->tu, Tok, new_cap); + if (!nv) perr(p, "out of memory recording _Generic default"); + memcpy(nv, buf, len * sizeof(Tok)); + buf = nv; + cap = new_cap; + } + buf[len++] = p->cur; + if (is_punct(&p->cur, '(')) ++paren_depth; + else if (is_punct(&p->cur, ')')) --paren_depth; + else if (is_punct(&p->cur, '[')) ++brack_depth; + else if (is_punct(&p->cur, ']')) --brack_depth; + else if (is_punct(&p->cur, '{')) ++brace_depth; + else if (is_punct(&p->cur, '}')) --brace_depth; + advance(p); + } + if (len == cap) { + u32 new_cap = cap + 1; + Tok* nv = arena_array(p->c->tu, Tok, new_cap); + if (!nv) perr(p, "out of memory recording _Generic default"); + memcpy(nv, buf, len * sizeof(Tok)); + buf = nv; + cap = new_cap; + } + memset(&buf[len], 0, sizeof(Tok)); + buf[len].kind = TOK_PUNCT; + buf[len].v.punct = ','; + ++len; + default_buf = buf; + default_len = len; + } else { + int paren_depth = 0; + int brack_depth = 0; + int brace_depth = 0; + while (p->cur.kind != TOK_EOF) { + if (paren_depth == 0 && brack_depth == 0 && brace_depth == 0) { + if (is_punct(&p->cur, ',') || is_punct(&p->cur, ')')) break; + } + if (is_punct(&p->cur, '(')) ++paren_depth; + else if (is_punct(&p->cur, ')')) --paren_depth; + else if (is_punct(&p->cur, '[')) ++brack_depth; + else if (is_punct(&p->cur, ']')) --brack_depth; + else if (is_punct(&p->cur, '{')) ++brace_depth; + else if (is_punct(&p->cur, '}')) --brace_depth; + advance(p); + } + } + if (!accept_punct(p, ',')) break; + } + if (!emitted && default_buf) { + Tok* save_replay = p->replay; + u32 save_cap = p->replay_cap; + u32 save_len = p->replay_len; + u32 save_pos = p->replay_pos; + u8 save_active = p->replay_active; + Tok save_cur = p->cur; + int save_has_next = p->has_next; + p->replay = default_buf; + p->replay_cap = default_len; + p->replay_len = default_len; + p->replay_pos = 1; + p->replay_active = 1; + p->cur = default_buf[0]; + p->has_next = 0; + parse_assign_expr(p); + emitted = 1; + p->replay = save_replay; + p->replay_cap = save_cap; + p->replay_len = save_len; + p->replay_pos = save_pos; + p->replay_active = save_active; + p->cur = save_cur; + p->has_next = save_has_next; + } + expect_punct(p, ')', "')' after _Generic"); + if (!emitted) { + perr(p, "_Generic: no association matched and no default present"); + } + return; + } + if (is_kw(p, &t, KW_ALIGNOF)) { + const Type* ty; + advance(p); + expect_punct(p, '(', "'('"); + if (starts_type_name(p, &p->cur)) { + ty = parse_type_name(p); + } else { + parse_unary(p); + ty = cg_top_type(p->cg); + cg_drop(p->cg); + } + expect_punct(p, ')', "')'"); + cg_push_int(p->cg, (i64)abi_alignof(p->abi, ty), ty_size_t(p)); + return; + } + parse_postfix(p); +} + +/* ============================================================ + * Binary operator levels + * ============================================================ */ + +static int type_is_fp(const Type* t) { + return t && (t->kind == TY_FLOAT || t->kind == TY_DOUBLE || + t->kind == TY_LDOUBLE); +} + +static const Type* common_fp_type(Parser* p, const Type* a, const Type* b) { + if (!type_is_fp(a) && !type_is_fp(b)) return NULL; + if ((a && a->kind == TY_LDOUBLE) || (b && b->kind == TY_LDOUBLE)) { + return type_prim(p->pool, TY_LDOUBLE); + } + if ((a && a->kind == TY_DOUBLE) || (b && b->kind == TY_DOUBLE)) { + return type_prim(p->pool, TY_DOUBLE); + } + return type_prim(p->pool, TY_FLOAT); +} + +static void emit_fp_binop(Parser* p, BinOp bop, const Type* common) { + if (cg_top_type(p->cg) != common) cg_convert(p->cg, common); + cg_swap(p->cg); + if (cg_top_type(p->cg) != common) cg_convert(p->cg, common); + cg_swap(p->cg); + BinOp fop; + switch (bop) { + case BO_IADD: fop = BO_FADD; break; + case BO_ISUB: fop = BO_FSUB; break; + case BO_IMUL: fop = BO_FMUL; break; + case BO_SDIV: fop = BO_FDIV; break; + default: + perr(p, "operator does not apply to floating types"); + return; + } + cg_binop(p->cg, fop); +} + +static void parse_mul(Parser* p) { + parse_unary(p); + for (;;) { + Tok t = p->cur; + BinOp bop; + if (is_punct(&t, '*')) { + bop = BO_IMUL; + } else if (is_punct(&t, '/')) { + bop = BO_SDIV; + } else if (is_punct(&t, '%')) { + bop = BO_SREM; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_unary(p); + to_rvalue(p); + const Type* lt = cg_top2_type(p->cg); + const Type* rt = cg_top_type(p->cg); + const Type* common = common_fp_type(p, lt, rt); + if (common) { + emit_fp_binop(p, bop, common); + } else { + cg_binop(p->cg, bop); + } + } +} + +static void emit_add_or_sub(Parser* p, BinOp bop) { + const Type* lt = cg_top2_type(p->cg); + const Type* rt = cg_top_type(p->cg); + int l_is_ptr = lt && lt->kind == TY_PTR; + int r_is_ptr = rt && rt->kind == TY_PTR; + if (bop == BO_IADD) { + if (l_is_ptr && type_is_int(rt)) { + u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_binop(p->cg, BO_IADD); + return; + } + if (r_is_ptr && type_is_int(lt)) { + cg_swap(p->cg); + u32 esz = abi_sizeof(p->abi, rt->ptr.pointee); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_binop(p->cg, BO_IADD); + return; + } + } else { /* BO_ISUB */ + if (l_is_ptr && type_is_int(rt)) { + u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_IMUL); + } + cg_binop(p->cg, BO_ISUB); + return; + } + if (l_is_ptr && r_is_ptr) { + u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); + cg_binop(p->cg, BO_ISUB); + if (esz != 1) { + cg_push_int(p->cg, (i64)esz, ty_size_t(p)); + cg_binop(p->cg, BO_SDIV); + } + return; + } + } + const Type* common = common_fp_type(p, lt, rt); + if (common) { + emit_fp_binop(p, bop, common); + return; + } + cg_binop(p->cg, bop); +} + +static void parse_add(Parser* p) { + parse_mul(p); + for (;;) { + Tok t = p->cur; + BinOp bop; + if (is_punct(&t, '+')) { + bop = BO_IADD; + } else if (is_punct(&t, '-')) { + bop = BO_ISUB; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_mul(p); + to_rvalue(p); + emit_add_or_sub(p, bop); + } +} + +static void parse_shift(Parser* p) { + parse_add(p); + for (;;) { + Tok t = p->cur; + BinOp bop; + if (is_punct(&t, P_SHL)) { + bop = BO_SHL; + } else if (is_punct(&t, P_SHR)) { + bop = BO_SHR_S; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_add(p); + to_rvalue(p); + cg_binop(p->cg, bop); + } +} + +static void parse_rel(Parser* p) { + parse_shift(p); + for (;;) { + Tok t = p->cur; + CmpOp cop; + if (is_punct(&t, '<')) { + cop = CMP_LT_S; + } else if (is_punct(&t, '>')) { + cop = CMP_GT_S; + } else if (is_punct(&t, P_LE)) { + cop = CMP_LE_S; + } else if (is_punct(&t, P_GE)) { + cop = CMP_GE_S; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_shift(p); + to_rvalue(p); + cg_cmp(p->cg, cop); + } +} + +static void parse_eq(Parser* p) { + parse_rel(p); + for (;;) { + Tok t = p->cur; + CmpOp cop; + if (is_punct(&t, P_EQ)) { + cop = CMP_EQ; + } else if (is_punct(&t, P_NE)) { + cop = CMP_NE; + } else { + break; + } + advance(p); + to_rvalue(p); + parse_rel(p); + to_rvalue(p); + cg_cmp(p->cg, cop); + } +} + +static void parse_band(Parser* p) { + parse_eq(p); + while (is_punct(&p->cur, '&')) { + advance(p); + to_rvalue(p); + parse_eq(p); + to_rvalue(p); + cg_binop(p->cg, BO_AND); + } +} + +static void parse_bxor(Parser* p) { + parse_band(p); + while (is_punct(&p->cur, '^')) { + advance(p); + to_rvalue(p); + parse_band(p); + to_rvalue(p); + cg_binop(p->cg, BO_XOR); + } +} + +static void parse_bor(Parser* p) { + parse_bxor(p); + while (is_punct(&p->cur, '|')) { + advance(p); + to_rvalue(p); + parse_bxor(p); + to_rvalue(p); + cg_binop(p->cg, BO_OR); + } +} + +static FrameSlot ll_tmp_slot(Parser* p, const Type* ty) { + FrameSlotDesc fsd; + memset(&fsd, 0, sizeof fsd); + fsd.type = ty; + fsd.size = abi_sizeof(p->abi, ty); + fsd.align = abi_alignof(p->abi, ty); + fsd.kind = FS_LOCAL; + fsd.flags = FSF_NONE; + return cg_local(p->cg, &fsd); +} + +static void ll_store_const(Parser* p, FrameSlot tmp, const Type* ty, i64 v) { + cg_push_local_typed(p->cg, tmp, ty); + cg_push_int(p->cg, v, ty); + cg_store(p->cg); + cg_drop(p->cg); +} + +static void parse_land(Parser* p) { + parse_bor(p); + while (is_punct(&p->cur, P_AND)) { + CGLabel L_false = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + const Type* result_ty = ty_int(p); + FrameSlot tmp = ll_tmp_slot(p, result_ty); + advance(p); + to_rvalue(p); + cg_branch_false(p->cg, L_false); + parse_bor(p); + to_rvalue(p); + cg_branch_false(p->cg, L_false); + ll_store_const(p, tmp, result_ty, 1); + cg_jump(p->cg, L_end); + cg_label_place(p->cg, L_false); + ll_store_const(p, tmp, result_ty, 0); + cg_label_place(p->cg, L_end); + cg_push_local_typed(p->cg, tmp, result_ty); + } +} + +static void parse_lor(Parser* p) { + parse_land(p); + while (is_punct(&p->cur, P_OR)) { + CGLabel L_true = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + const Type* result_ty = ty_int(p); + FrameSlot tmp = ll_tmp_slot(p, result_ty); + advance(p); + to_rvalue(p); + cg_branch_true(p->cg, L_true); + parse_land(p); + to_rvalue(p); + cg_branch_true(p->cg, L_true); + ll_store_const(p, tmp, result_ty, 0); + cg_jump(p->cg, L_end); + cg_label_place(p->cg, L_true); + ll_store_const(p, tmp, result_ty, 1); + cg_label_place(p->cg, L_end); + cg_push_local_typed(p->cg, tmp, result_ty); + } +} + +static const Type* common_fp_type(Parser* p, const Type* a, const Type* b); + +static void parse_ternary(Parser* p) { + parse_lor(p); + if (!is_punct(&p->cur, '?')) return; + CGLabel L_else = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + const Type* result_ty = ty_int(p); + FrameSlot tmp; + FrameSlotDesc fsd; + advance(p); /* '?' */ + to_rvalue(p); + cg_branch_false(p->cg, L_else); + parse_assign_expr(p); + to_rvalue(p); + result_ty = cg_top_type(p->cg); + if (!result_ty) result_ty = ty_int(p); + memset(&fsd, 0, sizeof fsd); + fsd.type = result_ty; + fsd.size = abi_sizeof(p->abi, result_ty); + fsd.align = abi_alignof(p->abi, result_ty); + fsd.kind = FS_LOCAL; + fsd.flags = FSF_NONE; + tmp = cg_local(p->cg, &fsd); + cg_push_local_typed(p->cg, tmp, result_ty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + cg_jump(p->cg, L_end); + cg_label_place(p->cg, L_else); + expect_punct(p, ':', "':' in ternary"); + parse_assign_expr(p); + to_rvalue(p); + const Type* else_ty = cg_top_type(p->cg); + const Type* common = common_fp_type(p, result_ty, else_ty); + if (cg_top_type(p->cg) != result_ty) { + cg_convert(p->cg, result_ty); + } + cg_push_local_typed(p->cg, tmp, result_ty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + cg_label_place(p->cg, L_end); + if (common && common != result_ty) { + FrameSlotDesc cfsd; + FrameSlot ctmp; + memset(&cfsd, 0, sizeof cfsd); + cfsd.type = common; + cfsd.size = abi_sizeof(p->abi, common); + cfsd.align = abi_alignof(p->abi, common); + cfsd.kind = FS_LOCAL; + cfsd.flags = FSF_NONE; + ctmp = cg_local(p->cg, &cfsd); + cg_push_local_typed(p->cg, tmp, result_ty); + cg_load(p->cg); + cg_convert(p->cg, common); + cg_push_local_typed(p->cg, ctmp, common); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + cg_push_local_typed(p->cg, ctmp, common); + return; + } + cg_push_local_typed(p->cg, tmp, result_ty); +} + +void parse_assign_expr(Parser* p) { + parse_ternary(p); + Tok t = p->cur; + BinOp compound; + int is_simple_assign; + if (is_punct(&t, '=')) { + is_simple_assign = 1; + compound = (BinOp)0; + } else if (is_punct(&t, P_ADD_ASSIGN)) { + is_simple_assign = 0; compound = BO_IADD; + } else if (is_punct(&t, P_SUB_ASSIGN)) { + is_simple_assign = 0; compound = BO_ISUB; + } else if (is_punct(&t, P_MUL_ASSIGN)) { + is_simple_assign = 0; compound = BO_IMUL; + } else if (is_punct(&t, P_DIV_ASSIGN)) { + is_simple_assign = 0; compound = BO_SDIV; + } else if (is_punct(&t, P_MOD_ASSIGN)) { + is_simple_assign = 0; compound = BO_SREM; + } else if (is_punct(&t, P_AND_ASSIGN)) { + is_simple_assign = 0; compound = BO_AND; + } else if (is_punct(&t, P_OR_ASSIGN)) { + is_simple_assign = 0; compound = BO_OR; + } else if (is_punct(&t, P_XOR_ASSIGN)) { + is_simple_assign = 0; compound = BO_XOR; + } else if (is_punct(&t, P_SHL_ASSIGN)) { + is_simple_assign = 0; compound = BO_SHL; + } else if (is_punct(&t, P_SHR_ASSIGN)) { + is_simple_assign = 0; compound = BO_SHR_S; + } else { + return; + } + advance(p); + if (is_simple_assign) { + parse_assign_expr(p); + to_rvalue(p); + coerce_top_to_lvalue(p); + cg_store(p->cg); + return; + } + cg_dup(p->cg); + cg_load(p->cg); + parse_assign_expr(p); + to_rvalue(p); + if (compound == BO_IADD || compound == BO_ISUB) { + emit_add_or_sub(p, compound); + } else { + cg_binop(p->cg, compound); + } + cg_store(p->cg); +} + +void parse_expr(Parser* p) { + parse_assign_expr(p); + while (is_punct(&p->cur, ',')) { + advance(p); + cg_drop(p->cg); + parse_assign_expr(p); + } +} + +/* parse_cond_expr is the ternary level, provided for completeness */ +void parse_cond_expr(Parser* p) { + parse_ternary(p); +} diff --git a/lang/c/parse/parse_init.c b/lang/c/parse/parse_init.c @@ -0,0 +1,775 @@ +/* parse_init.c — runtime and static-storage initializers. + * + * Covers §6.7.9 (initializers): + * - Runtime aggregate/scalar initializers (init_at, init_elided, + * init_struct_fields, init_string_at, parse_designator_chain, + * push_subobject_lv, emit_copy_leaf, emit_walk_copy, + * emit_struct_copy_into_slot, zero_init_at) + * - Static-storage object definition (parse_static_init_at, + * parse_static_string_at, try_parse_addr_const, encode_int_le, + * pick_object_section, define_static_object, srl_push) + */ + +#include "parse/parse_priv.h" + +/* ============================================================ + * File-local helpers + * ============================================================ */ + +static const Type* ty_size_t_init(Parser* p) { + return abi_size_type(p->abi, p->pool); +} + +static SrcLoc tok_loc_init(const Tok* t) { return t->loc; } + +static CKw ident_kw_init(const Parser* p, Sym name) { + return ident_kw_inline(p, name); +} + +/* True if `ty` is char/signed char/unsigned char. */ +int is_char_kind(const Type* ty) { + if (!ty) return 0; + return ty->kind == TY_CHAR || ty->kind == TY_SCHAR || ty->kind == TY_UCHAR; +} + +/* Decode the string token at p->cur without advancing. Returns a heap- + * allocated byte buffer (caller frees) and writes length (including NUL) + * to *nlen_out. */ +static u8* peek_string_bytes(Parser* p, size_t* nlen_out) { + Tok t = p->cur; + if (t.kind != TOK_STR) perr(p, "internal: peek_string_bytes on non-string"); + return decode_string_literal(p, &t, nlen_out); +} + +/* ============================================================ + * Runtime initializers + * ============================================================ */ + +/* Forward declaration for mutual recursion. */ +void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, + const Type* ty); +static u32 init_elided(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* ty); + +/* Push the lvalue of a sub-object at byte offset `offset` within the array + * local `slot` (whose type is `arr_ty`), with element type `elem_ty`. */ +void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* elem_ty) { + cg_push_local_typed(p->cg, slot, arr_ty); + cg_addr(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, elem_ty)); + if (offset > 0) { + cg_push_int(p->cg, (i64)offset, ty_size_t_init(p)); + cg_binop(p->cg, BO_IADD); + } + cg_deref(p->cg, elem_ty); +} + +/* Emit a load+store for one scalar leaf. */ +static void emit_copy_leaf(Parser* p, FrameSlot dst_slot, const Type* dst_arr_ty, + u32 dst_off, FrameSlot src_ptr_slot, + const Type* src_ptr_ty, u32 src_off, + const Type* leaf_ty) { + push_subobject_lv(p, dst_slot, dst_arr_ty, dst_off, leaf_ty); + cg_push_local_typed(p->cg, src_ptr_slot, src_ptr_ty); + cg_load(p->cg); + cg_retag_top(p->cg, type_ptr(p->pool, leaf_ty)); + if (src_off > 0) { + cg_push_int(p->cg, (i64)src_off, ty_size_t_init(p)); + cg_binop(p->cg, BO_IADD); + } + cg_deref(p->cg, leaf_ty); + cg_load(p->cg); + cg_store(p->cg); + cg_drop(p->cg); +} + +/* Walk a (possibly nested) aggregate, emitting a leaf load+store for each + * scalar member. */ +static void emit_walk_copy(Parser* p, FrameSlot dst_slot, + const Type* dst_arr_ty, u32 dst_off, + FrameSlot src_ptr_slot, const Type* src_ptr_ty, + u32 src_off, const Type* ty) { + if (ty->kind == TY_STRUCT) { + const ABIRecordLayout* L = abi_record_layout(p->abi, ty); + for (u16 i = 0; i < ty->rec.nfields; ++i) { + const Field* f = &ty->rec.fields[i]; + if (f->flags & FIELD_BITFIELD) continue; + u32 foff = L->fields[i].offset; + emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off + foff, + src_ptr_slot, src_ptr_ty, src_off + foff, f->type); + } + return; + } + if (ty->kind == TY_ARRAY) { + u32 esz = abi_sizeof(p->abi, ty->arr.elem); + for (u32 i = 0; i < ty->arr.count; ++i) { + emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off + i * esz, + src_ptr_slot, src_ptr_ty, src_off + i * esz, + ty->arr.elem); + } + return; + } + if (ty->kind == TY_UNION) { + u32 sz = abi_sizeof(p->abi, ty); + const Type* uchar_ty = type_prim(p->pool, TY_UCHAR); + for (u32 i = 0; i < sz; ++i) { + emit_copy_leaf(p, dst_slot, dst_arr_ty, dst_off + i, + src_ptr_slot, src_ptr_ty, src_off + i, uchar_ty); + } + return; + } + emit_copy_leaf(p, dst_slot, dst_arr_ty, dst_off, src_ptr_slot, src_ptr_ty, + src_off, ty); +} + +/* Source struct/union value is on top of the cg stack as an lvalue. + * Spill its address into a fresh pointer slot, then walk the type and + * copy each scalar leaf into the destination sub-object. */ +void emit_struct_copy_into_slot(Parser* p, FrameSlot dst_slot, + const Type* dst_arr_ty, u32 dst_off, + const Type* ty) { + const Type* ptr_ty = type_ptr(p->pool, ty); + FrameSlotDesc fsd; + FrameSlot src_ptr_slot; + cg_addr(p->cg); + memset(&fsd, 0, sizeof fsd); + fsd.type = ptr_ty; + fsd.size = abi_sizeof(p->abi, ptr_ty); + fsd.align = abi_alignof(p->abi, ptr_ty); + fsd.kind = FS_LOCAL; + fsd.flags = FSF_NONE; + src_ptr_slot = cg_local(p->cg, &fsd); + cg_push_local_typed(p->cg, src_ptr_slot, ptr_ty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off, src_ptr_slot, ptr_ty, 0, ty); +} + +/* Recursively zero-initialize the sub-object at `offset` of type `ty`. */ +static void zero_init_at(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* ty) { + if (ty->kind == TY_ARRAY) { + u32 esz = abi_sizeof(p->abi, ty->arr.elem); + for (u32 i = 0; i < ty->arr.count; ++i) { + zero_init_at(p, slot, arr_ty, offset + i * esz, ty->arr.elem); + } + return; + } + if (ty->kind == TY_STRUCT) { + const ABIRecordLayout* L = abi_record_layout(p->abi, ty); + for (u16 i = 0; i < ty->rec.nfields; ++i) { + const Field* f = &ty->rec.fields[i]; + zero_init_at(p, slot, arr_ty, offset + L->fields[i].offset, f->type); + } + return; + } + if (ty->kind == TY_UNION) { + if (ty->rec.nfields > 0) { + const Field* f = &ty->rec.fields[0]; + if (!(f->flags & FIELD_BITFIELD)) { + zero_init_at(p, slot, arr_ty, offset, f->type); + } + } + return; + } + push_subobject_lv(p, slot, arr_ty, offset, ty); + cg_push_int(p->cg, 0, ty); + cg_store(p->cg); + cg_drop(p->cg); +} + +/* Emit byte stores for a string literal initializing a char-array sub-object. */ +static void init_string_at(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* elem_ty, u32 count) { + size_t n = 0; + u8* bytes = peek_string_bytes(p, &n); + size_t copy = n; + size_t i; + if (copy > count) copy = count; + for (i = 0; i < copy; ++i) { + push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty); + cg_push_int(p->cg, (i64)bytes[i], elem_ty); + cg_store(p->cg); + cg_drop(p->cg); + } + for (; i < count; ++i) { + push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty); + cg_push_int(p->cg, 0, elem_ty); + cg_store(p->cg); + cg_drop(p->cg); + } + p->c->env->heap->free(p->c->env->heap, bytes, 0); + advance(p); /* consume TOK_STR */ +} + +/* Parse a designator chain (`[const]` and `.ident` repeats) ending at `=`. */ +static void parse_designator_chain(Parser* p, const Type* outer_ty, + u32 outer_offset, const Type** sub_ty_out, + u32* sub_offset_out, u32* top_index_out) { + const Type* cur_ty = outer_ty; + u32 cur_off = outer_offset; + int first = 1; + for (;;) { + if (is_punct(&p->cur, '[')) { + i64 idx; + u32 esz; + SrcLoc cloc = tok_loc_init(&p->cur); + advance(p); + idx = eval_const_int(p, cloc); + expect_punct(p, ']', "']' after designator index"); + if (!cur_ty || cur_ty->kind != TY_ARRAY) { + perr(p, "array designator on non-array"); + } + if (idx < 0 || (u32)idx >= cur_ty->arr.count) { + perr(p, "array designator index out of range"); + } + esz = abi_sizeof(p->abi, cur_ty->arr.elem); + cur_off += (u32)idx * esz; + cur_ty = cur_ty->arr.elem; + if (first) *top_index_out = (u32)idx; + first = 0; + } else if (is_punct(&p->cur, '.')) { + Sym fname; + const Type* fty; + u32 foff; + const Field* ff; + u16 fi; + advance(p); + if (p->cur.kind != TOK_IDENT || ident_kw_init(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected field name after '.'"); + } + fname = p->cur.v.ident; + advance(p); + if (!cur_ty || + (cur_ty->kind != TY_STRUCT && cur_ty->kind != TY_UNION)) { + perr(p, "field designator on non-record type"); + } + if (!find_field(p->abi, cur_ty, fname, &fty, &foff, &ff)) { + perr(p, "no such field in designator"); + } + cur_off += foff; + if (first) { + for (fi = 0; fi < cur_ty->rec.nfields; ++fi) { + const Field* g = &cur_ty->rec.fields[fi]; + if (g->name == fname && fname != 0) { + *top_index_out = fi; + break; + } + if ((g->flags & FIELD_ANON) && + (g->type->kind == TY_STRUCT || g->type->kind == TY_UNION)) { + const Type* tmp_ty; + u32 tmp_off; + const Field* tmp_f; + if (find_field(p->abi, g->type, fname, &tmp_ty, &tmp_off, + &tmp_f)) { + *top_index_out = fi; + break; + } + } + } + } + cur_ty = fty; + first = 0; + } else { + break; + } + } + if (first) perr(p, "internal: empty designator chain"); + expect_punct(p, '=', "'=' after designator"); + *sub_ty_out = cur_ty; + *sub_offset_out = cur_off; +} + +static u32 init_struct_fields(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* ty, u32 start_field, + int braced) { + const ABIRecordLayout* L = abi_record_layout(p->abi, ty); + u32 i = start_field; + u32 zero_lo = start_field; + for (; i < ty->rec.nfields; ++i) { + const Field* f = &ty->rec.fields[i]; + u32 foff = offset + L->fields[i].offset; + if (braced && (is_punct(&p->cur, '}') || p->cur.kind == TOK_EOF)) break; + if (braced && is_punct(&p->cur, '.')) { + const Type* sub_ty; + u32 sub_off; + u32 top_idx = 0; + parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); + while (zero_lo < top_idx) { + const Field* zf = &ty->rec.fields[zero_lo]; + u32 zoff = offset + L->fields[zero_lo].offset; + zero_init_at(p, slot, arr_ty, zoff, zf->type); + ++zero_lo; + } + init_at(p, slot, arr_ty, sub_off, sub_ty); + i = top_idx; + if (zero_lo <= top_idx) zero_lo = top_idx + 1; + goto next_item_struct; + } + init_at(p, slot, arr_ty, foff, f->type); + if (zero_lo <= i) zero_lo = i + 1; + if (!braced) { + ++i; + break; + } + next_item_struct: + if (!accept_punct(p, ',')) { + ++i; + break; + } + if (is_punct(&p->cur, '}')) { + ++i; + break; + } + } + if (braced) { + u32 j; + for (j = zero_lo; j < ty->rec.nfields; ++j) { + const Field* f = &ty->rec.fields[j]; + u32 foff = offset + L->fields[j].offset; + zero_init_at(p, slot, arr_ty, foff, f->type); + } + } + return i; +} + +static u32 init_elided(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* ty) { + if (ty->kind == TY_ARRAY) { + u32 esz = abi_sizeof(p->abi, ty->arr.elem); + init_at(p, slot, arr_ty, offset, ty->arr.elem); + (void)esz; + return 1; + } + if (ty->kind == TY_STRUCT) { + init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/0); + return 1; + } + /* Scalar / pointer / union: consume one assignment-expr. */ + int had_brace = accept_punct(p, '{'); + push_subobject_lv(p, slot, arr_ty, offset, ty); + parse_assign_expr(p); + to_rvalue(p); + cg_store(p->cg); + cg_drop(p->cg); + if (had_brace) { + accept_punct(p, ','); + expect_punct(p, '}', "'}' after scalar initializer"); + } + return 1; +} + +void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, + const Type* ty) { + if (ty->kind == TY_ARRAY) { + const Type* elem_ty = ty->arr.elem; + u32 esz = abi_sizeof(p->abi, elem_ty); + if (is_char_kind(elem_ty)) { + if (p->cur.kind == TOK_STR) { + init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count); + return; + } + if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) { + advance(p); + init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count); + accept_punct(p, ','); + expect_punct(p, '}', "'}' after string initializer"); + return; + } + } + if (!is_punct(&p->cur, '{')) { + init_elided(p, slot, arr_ty, offset, elem_ty); + return; + } + advance(p); /* '{' */ + { + u32 i = 0; + u32 zero_lo = 0; + if (!is_punct(&p->cur, '}')) { + for (;;) { + if (is_punct(&p->cur, '[')) { + const Type* sub_ty; + u32 sub_off; + u32 top_idx = 0; + parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, + &top_idx); + while (zero_lo < top_idx) { + zero_init_at(p, slot, arr_ty, offset + zero_lo * esz, elem_ty); + ++zero_lo; + } + init_at(p, slot, arr_ty, sub_off, sub_ty); + i = top_idx + 1; + if (zero_lo < i) zero_lo = i; + } else { + if (i >= ty->arr.count) { + perr(p, "too many initializers for array"); + } + init_at(p, slot, arr_ty, offset + i * esz, elem_ty); + ++i; + if (zero_lo < i) zero_lo = i; + } + if (!accept_punct(p, ',')) break; + if (is_punct(&p->cur, '}')) break; + } + } + expect_punct(p, '}', "'}' after array initializer"); + { + u32 j; + for (j = zero_lo; j < ty->arr.count; ++j) { + zero_init_at(p, slot, arr_ty, offset + j * esz, elem_ty); + } + } + } + return; + } + if (ty->kind == TY_STRUCT) { + if (!is_punct(&p->cur, '{')) { + init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/0); + return; + } + advance(p); /* '{' */ + init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/1); + expect_punct(p, '}', "'}' after struct initializer"); + return; + } + if (ty->kind == TY_UNION) { + int had_brace = accept_punct(p, '{'); + if (ty->rec.nfields == 0) { + if (had_brace) expect_punct(p, '}', "'}'"); + return; + } + if (had_brace && is_punct(&p->cur, '.')) { + const Type* sub_ty; + u32 sub_off; + u32 top_idx = 0; + parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); + init_at(p, slot, arr_ty, sub_off, sub_ty); + } else { + const Field* f = &ty->rec.fields[0]; + if (!(f->flags & FIELD_BITFIELD)) { + init_at(p, slot, arr_ty, offset, f->type); + } + } + if (had_brace) { + accept_punct(p, ','); + expect_punct(p, '}', "'}' after union initializer"); + } + return; + } + /* Scalar (incl. pointer). */ + int had_brace = accept_punct(p, '{'); + push_subobject_lv(p, slot, arr_ty, offset, ty); + parse_assign_expr(p); + to_rvalue(p); + coerce_top_to_lvalue(p); + cg_store(p->cg); + cg_drop(p->cg); + if (had_brace) { + accept_punct(p, ','); + expect_punct(p, '}', "'}' after scalar initializer"); + } +} + +/* ============================================================ + * Static-storage initializers + * ============================================================ */ + +void encode_int_le(u8* dst, u32 size, i64 v) { + for (u32 i = 0; i < size; ++i) { + dst[i] = (u8)((v >> (8u * i)) & 0xffu); + } +} + +/* Encode a string literal at *buf+offset for a char-array sub-object. */ +static void parse_static_string_at(Parser* p, u8* buf, u32 buflen, u32 offset, + u32 count) { + size_t n = 0; + u8* bytes = peek_string_bytes(p, &n); + size_t copy = n; + if (copy > count) copy = count; + if (offset + (u32)copy > buflen) perr(p, "string initializer overflows object"); + memcpy(buf + offset, bytes, copy); + p->c->env->heap->free(p->c->env->heap, bytes, 0); + advance(p); +} + +/* Append one pending relocation to the parser-side list. */ +void srl_push(Parser* p, u32 offset, u32 size, ObjSymId target, i64 addend) { + if (p->static_relocs_len == p->static_relocs_cap) { + u32 nc = p->static_relocs_cap ? p->static_relocs_cap * 2u : 4u; + void* nb = arena_array(p->c->tu, char, + nc * sizeof(*p->static_relocs)); + if (!nb) perr(p, "out of memory recording static relocs"); + if (p->static_relocs && p->static_relocs_len) { + memcpy(nb, p->static_relocs, + p->static_relocs_len * sizeof(*p->static_relocs)); + } + p->static_relocs = nb; + p->static_relocs_cap = nc; + } + p->static_relocs[p->static_relocs_len].offset = offset; + p->static_relocs[p->static_relocs_len].size = size; + p->static_relocs[p->static_relocs_len].target = target; + p->static_relocs[p->static_relocs_len].addend = addend; + ++p->static_relocs_len; +} + +/* Try to parse the current expression as an address constant. */ +static int try_parse_addr_const(Parser* p, const Type* ty, u8* buf, + u32 offset, u32 sz) { + Tok t = p->cur; + Sym name = 0; + SrcLoc nloc = tok_loc_init(&p->cur); + int saw_amp = 0; + i64 element_addend = 0; + i64 byte_addend = 0; + SymEntry* e; + const Type* tgt_ty; + ObjSymId tgt; + if (t.kind == TOK_STR) { + size_t n = 0; + u8* bytes = decode_string_literal(p, &t, &n); + ObjSymId str_sym = emit_string_to_rodata(p, bytes, n); + p->c->env->heap->free(p->c->env->heap, bytes, 0); + advance(p); + (void)ty; + (void)buf; + srl_push(p, offset, sz, str_sym, 0); + return 1; + } + if (is_punct(&t, '&')) { + saw_amp = 1; + advance(p); + if (p->cur.kind != TOK_IDENT || ident_kw_init(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected identifier after '&' in static initializer"); + } + name = p->cur.v.ident; + nloc = tok_loc_init(&p->cur); + advance(p); + } else if (t.kind == TOK_IDENT && ident_kw_init(p, t.v.ident) == KW_NONE) { + name = t.v.ident; + advance(p); + } else { + return 0; + } + e = scope_lookup(p, name); + if (!e || (e->kind != SEK_GLOBAL && e->kind != SEK_FUNC)) { + perr(p, "static initializer is not a constant address expression"); + } + tgt = e->v.sym; + tgt_ty = e->type; + if (saw_amp && is_punct(&p->cur, '[')) { + SrcLoc cloc; + advance(p); + cloc = tok_loc_init(&p->cur); + element_addend = eval_const_int(p, cloc); + expect_punct(p, ']', "']' after array-subscript constant"); + if (tgt_ty && tgt_ty->kind == TY_ARRAY) { + byte_addend += + element_addend * (i64)abi_sizeof(p->abi, tgt_ty->arr.elem); + } else { + byte_addend += element_addend; + } + } + while (is_punct(&p->cur, '+') || is_punct(&p->cur, '-')) { + int neg = is_punct(&p->cur, '-'); + SrcLoc cloc; + i64 v; + advance(p); + cloc = tok_loc_init(&p->cur); + v = eval_const_int(p, cloc); + if (neg) v = -v; + if (tgt_ty && tgt_ty->kind == TY_ARRAY) { + byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty->arr.elem); + } else if (tgt_ty && tgt_ty->kind == TY_PTR) { + byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty->ptr.pointee); + } else if (saw_amp) { + byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty); + } else { + byte_addend += v; + } + } + (void)nloc; + (void)ty; + (void)buf; + srl_push(p, offset, sz, tgt, byte_addend); + return 1; +} + +void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset, + const Type* ty) { + if (ty->kind == TY_ARRAY) { + const Type* elem = ty->arr.elem; + u32 esz = abi_sizeof(p->abi, elem); + u32 i = 0; + int had_brace; + if (is_char_kind(elem)) { + if (p->cur.kind == TOK_STR) { + parse_static_string_at(p, buf, buflen, offset, ty->arr.count); + return; + } + if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) { + advance(p); + parse_static_string_at(p, buf, buflen, offset, ty->arr.count); + accept_punct(p, ','); + expect_punct(p, '}', "'}' after string initializer"); + return; + } + } + had_brace = accept_punct(p, '{'); + if (!had_brace) { + perr(p, "expected '{' for static-storage array initializer"); + } + if (!is_punct(&p->cur, '}')) { + for (;;) { + if (is_punct(&p->cur, '[')) { + const Type* sub_ty; + u32 sub_off; + u32 top_idx = 0; + parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); + parse_static_init_at(p, buf, buflen, sub_off, sub_ty); + i = top_idx + 1; + } else { + if (i >= ty->arr.count) { + perr(p, "too many initializers for array"); + } + parse_static_init_at(p, buf, buflen, offset + i * esz, elem); + ++i; + } + if (!accept_punct(p, ',')) break; + if (is_punct(&p->cur, '}')) break; + } + } + expect_punct(p, '}', "'}' after array initializer"); + return; + } + if (ty->kind == TY_STRUCT) { + int had_brace = accept_punct(p, '{'); + const ABIRecordLayout* L = abi_record_layout(p->abi, ty); + u32 i = 0; + if (!had_brace) { + perr(p, "expected '{' for static-storage struct initializer"); + } + while (i < ty->rec.nfields && !is_punct(&p->cur, '}')) { + const Field* f = &ty->rec.fields[i]; + if (is_punct(&p->cur, '.')) { + const Type* sub_ty; + u32 sub_off; + u32 top_idx = 0; + parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); + parse_static_init_at(p, buf, buflen, sub_off, sub_ty); + i = top_idx + 1; + if (!accept_punct(p, ',')) break; + continue; + } + parse_static_init_at(p, buf, buflen, offset + L->fields[i].offset, + f->type); + ++i; + if (!accept_punct(p, ',')) break; + } + expect_punct(p, '}', "'}' after struct initializer"); + return; + } + if (ty->kind == TY_UNION) { + perr(p, "static-storage union initializer not supported in Phase 4"); + } + /* Scalar / pointer. */ + { + int had_brace = accept_punct(p, '{'); + SrcLoc cloc = tok_loc_init(&p->cur); + u32 sz = abi_sizeof(p->abi, ty); + if (offset + sz > buflen) perr(p, "initializer overflows object"); + if (ty->kind == TY_PTR && try_parse_addr_const(p, ty, buf, offset, sz)) { + /* Address constant recorded as a reloc. */ + } else { + i64 v = eval_const_int(p, cloc); + encode_int_le(buf + offset, sz, v); + } + if (had_brace) { + accept_punct(p, ','); + expect_punct(p, '}', "'}' after scalar initializer"); + } + } +} + +static void emit_static_data(Parser* p, const u8* buf, u32 size) { + u32 pos = 0; + u32 emitted_relocs = 0; + + while (emitted_relocs < p->static_relocs_len) { + u32 best = p->static_relocs_len; + u32 best_off = 0xffffffffu; + for (u32 i = 0; i < p->static_relocs_len; ++i) { + if (p->static_relocs[i].offset < pos) continue; + if (p->static_relocs[i].offset < best_off) { + best = i; + best_off = p->static_relocs[i].offset; + } + } + if (best == p->static_relocs_len) break; + if (best_off > size || p->static_relocs[best].size > size - best_off) { + perr(p, "static initializer relocation overflows object"); + } + if (best_off > pos) { + if (buf) { + cfree_cg_data_bytes(p->cg, buf + pos, best_off - pos); + } else { + cfree_cg_data_zero(p->cg, best_off - pos); + } + } + cfree_cg_data_addr(p->cg, p->static_relocs[best].target, + p->static_relocs[best].addend, + p->static_relocs[best].size, 0); + pos = best_off + p->static_relocs[best].size; + ++emitted_relocs; + } + + if (pos < size) { + if (buf) { + cfree_cg_data_bytes(p->cg, buf + pos, size - pos); + } else { + cfree_cg_data_zero(p->cg, size - pos); + } + } +} + +/* Define a static-storage object. */ +void define_static_object(Parser* p, ObjSymId sym, ObjSecId section_id, + const Type* var_ty, u16 quals, int has_init, + SrcLoc loc, u32 align_override) { + u32 size = abi_sizeof(p->abi, var_ty); + u32 align = abi_alignof(p->abi, var_ty); + CfreeCgDataDefAttrs attrs; + if (align_override > align) align = align_override; + u8* buf = NULL; + int has_nonzero = 0; + + if (has_init) { + buf = (u8*)arena_array(p->c->tu, u8, size ? size : 1u); + memset(buf, 0, size); + p->static_relocs_len = 0; + parse_static_init_at(p, buf, size, 0, var_ty); + for (u32 i = 0; i < size; ++i) { + if (buf[i]) { has_nonzero = 1; break; } + } + if (p->static_relocs_len) has_nonzero = 1; + } + + memset(&attrs, 0, sizeof attrs); + attrs.section = section_id; + attrs.align = align ? align : 1u; + if ((quals & Q_CONST) != 0 && has_nonzero) { + attrs.flags |= CFREE_CG_DATADEF_READONLY; + } + if (!has_init || !has_nonzero) { + attrs.flags |= CFREE_CG_DATADEF_ZERO_FILL; + } + + cfree_cg_data_begin(p->cg, sym, attrs); + emit_static_data(p, buf, size); + cfree_cg_data_end(p->cg); + p->static_relocs_len = 0; + (void)loc; +} diff --git a/lang/c/parse/parse_priv.h b/lang/c/parse/parse_priv.h @@ -0,0 +1,437 @@ +/* parse_priv.h — private header shared across parse_*.c modules. + * + * Declares: Parser struct, shared types (Scope, SymEntry, TagEntry, + * DeclSpecs, TypeSpecAccum, CKw, TagDeclKind), forward decls of + * cross-module functions, and inline/shared helpers. */ + +#pragma once + +#include "parse/parse.h" + +#include <stdarg.h> +#include <string.h> + +#include "abi/abi.h" +#include "core/arena.h" +#include "core/core.h" +#include "core/heap.h" +#include "core/pool.h" +#include "decl/decl.h" +#include "decl/decl_attrs.h" +#include "lex/lex.h" +#include "parse/attr.h" +#include "pp/pp.h" +#include "type/type.h" + +/* ============================================================ + * Keywords + * ============================================================ */ +typedef enum CKw { + KW_NONE = 0, + KW_AUTO, + KW_BREAK, + KW_CASE, + KW_CHAR, + KW_CONST, + KW_CONTINUE, + KW_DEFAULT, + KW_DO, + KW_DOUBLE, + KW_ELSE, + KW_ENUM, + KW_EXTERN, + KW_FLOAT, + KW_FOR, + KW_GOTO, + KW_IF, + KW_INLINE, + KW_INT, + KW_LONG, + KW_REGISTER, + KW_RESTRICT, + KW_RETURN, + KW_SHORT, + KW_SIGNED, + KW_SIZEOF, + KW_STATIC, + KW_STRUCT, + KW_SWITCH, + KW_TYPEDEF, + KW_UNION, + KW_UNSIGNED, + KW_VOID, + KW_VOLATILE, + KW_WHILE, + KW_BOOL, /* _Bool */ + KW_COMPLEX, /* _Complex */ + KW_IMAGINARY, /* _Imaginary */ + KW_ALIGNAS, /* _Alignas */ + KW_ALIGNOF, /* _Alignof */ + KW_ATOMIC, /* _Atomic */ + KW_GENERIC, /* _Generic */ + KW_NORETURN, /* _Noreturn */ + KW_STATIC_ASSERT, /* _Static_assert */ + KW_THREAD_LOCAL, /* _Thread_local */ + KW_ASM, /* GNU `asm` */ + KW_BUILTIN_ASM, /* GNU `__asm__` */ + KW_COUNT +} CKw; + +/* ============================================================ + * Scope stack types + * ============================================================ */ + +typedef enum SymEntryKind { + SEK_LOCAL, /* local variable, OPK_LOCAL via FrameSlot */ + SEK_GLOBAL, /* global var, OPK_GLOBAL via ObjSymId */ + SEK_FUNC, /* function decl, OPK_GLOBAL via ObjSymId */ + SEK_TYPEDEF, /* typedef name */ + SEK_ENUM_CST, /* enumeration constant */ +} SymEntryKind; + +typedef struct SymEntry SymEntry; +struct SymEntry { + Sym name; + u8 kind; /* SymEntryKind */ + u8 pad[3]; + const Type* type; + union { + FrameSlot slot; + ObjSymId sym; + i64 enum_value; + } v; + FrameSlot vla_byte_slot; + struct Attr* attrs; + SymEntry* next; +}; + +typedef struct TagEntry TagEntry; +struct TagEntry { + Sym name; + u8 kind; /* TagDeclKind */ + u8 complete; + u16 pad; + Type* type; + struct Attr* attrs; + TagEntry* next; +}; + +typedef struct Scope Scope; +struct Scope { + SymEntry* entries; /* LIFO */ + TagEntry* tags; /* LIFO */ + Scope* parent; +}; + +/* ============================================================ + * Switch/goto control-flow types + * ============================================================ */ + +typedef struct CaseEntry CaseEntry; +struct CaseEntry { + i64 value; + CGLabel label; + CaseEntry* next; +}; + +typedef struct SwitchCtx SwitchCtx; +struct SwitchCtx { + CaseEntry* cases; + CGLabel default_label; + FrameSlot value_slot; + const Type* value_type; + SwitchCtx* parent; +}; + +typedef struct GotoLabel GotoLabel; +struct GotoLabel { + Sym name; + CGLabel label; + u8 placed; + u8 pad[3]; + SrcLoc first_use; + GotoLabel* next; +}; + +/* ============================================================ + * Parser context + * ============================================================ */ + +typedef struct Parser { + Compiler* c; + Pp* pp; + DeclTable* decls; + CG* cg; + Debug* debug; + TargetABI* abi; + Pool* pool; + + const Type** cg_type_stack; + u32 cg_type_sp; + u32 cg_type_cap; + + Tok cur; + Tok next; + int has_next; + + Tok pending; + int has_pending; + + Sym kw_sym[KW_COUNT]; + + Sym sym_b_alloca; + Sym sym_b_ctz; + Sym sym_b_clz; + Sym sym_b_clzl; + Sym sym_b_clzll; + Sym sym_b_trap; + Sym sym_b_unreachable; + Sym sym_b_memcpy; + Sym sym_b_memmove; + Sym sym_b_memcmp; + Sym sym_b_memset; + Sym sym_func; /* __func__ */ + Sym sym_func_gcc; /* __FUNCTION__ */ + Sym sym_pretty_func_gcc; /* __PRETTY_FUNCTION__ */ + Sym cur_func_name; /* name of the function whose body we're in, + * 0 at file scope */ + Sym sym_b_expect; + Sym sym_b_offsetof; + Sym sym_b_va_list; + Sym sym_b_va_start; + Sym sym_b_va_arg; + Sym sym_b_va_end; + Sym sym_b_va_copy; + Sym sym_attribute; + Sym sym_volatile_alias; + Sym sym_alignof_alias; + Sym sym_int128; /* __int128 */ + Sym sym_int128_t; /* __int128_t */ + Sym sym_uint128_t; /* __uint128_t */ + Sym sym_a_load_n; + Sym sym_a_store_n; + Sym sym_a_exchange_n; + Sym sym_a_fetch_add; + Sym sym_a_fetch_sub; + Sym sym_a_fetch_and; + Sym sym_a_fetch_or; + Sym sym_a_fetch_xor; + Sym sym_a_cas_n; + Sym sym_a_thread_fence; + Sym sym_a_signal_fence; + + Scope* scope; + + CGLabel cur_break; + CGLabel cur_continue; + + SwitchCtx* cur_switch; + + GotoLabel* goto_labels; + + u8 vla_pending; + FrameSlot vla_pending_count_slot; + + FrameSlot last_pushed_vla_slot; + + u8 in_param_decl; + + u32 static_local_counter; + + u32 compound_literal_counter; + + Tok* replay; + u32 replay_cap; + u32 replay_len; + u32 replay_pos; + u8 replay_active; + + struct { + u32 offset; + u32 size; + ObjSymId target; + i64 addend; + } *static_relocs; + u32 static_relocs_len; + u32 static_relocs_cap; +} Parser; + +/* ============================================================ + * DeclSpecs and TypeSpecAccum + * ============================================================ */ + +typedef struct DeclSpecs { + const Type* type; + DeclStorage storage; + u32 flags; /* DeclFlag */ + u16 quals; + u32 align; + FrameSlot vla_byte_slot; + Attr* attrs; +} DeclSpecs; + +typedef struct TypeSpecAccum { + u8 saw_void; + u8 saw_char; + u8 saw_int; + u8 saw_short; + u8 long_count; + u8 saw_signed; + u8 saw_unsigned; + u8 saw_bool; + u8 saw_float; + u8 saw_double; + u8 saw_int128; /* __int128 / __int128_t / __uint128_t */ + u8 saw_explicit_type; +} TypeSpecAccum; + +/* ============================================================ + * Shared token/diagnostic helpers (defined in parse.c) + * ============================================================ */ + +_Noreturn void perr(Parser* p, const char* fmt, ...); +void advance(Parser* p); +Tok peek1(Parser* p); +void expect_punct(Parser* p, u32 punct, const char* what); +int accept_punct(Parser* p, u32 punct); + +/* ============================================================ + * Scope/tag ops (defined in parse.c) + * ============================================================ */ + +Scope* scope_new(Parser* p, Scope* parent); +void scope_push(Parser* p); +void scope_pop(Parser* p); +SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind, const Type* type); +SymEntry* scope_lookup(Parser* p, Sym name); +TagEntry* tag_define(Parser* p, Sym name, TagDeclKind kind, Type* type, int complete); +TagEntry* tag_lookup(Parser* p, Sym name); +TagEntry* tag_lookup_local(Parser* p, Sym name); + +/* ============================================================ + * Token predicate helpers (defined in parse.c — file-scope static, + * exposed here as inline equivalents; each .c file sees its own copy) + * ============================================================ */ + +static inline int is_punct(const Tok* t, u32 punct) { + return t->kind == TOK_PUNCT && t->v.punct == punct; +} + +static inline int is_pp_hash(const Tok* t) { return t->kind == TOK_PP_HASH; } + +static inline CKw ident_kw_inline(const Parser* p, Sym name) { + CKw i; + for (i = (CKw)1; i < KW_COUNT; ++i) { + if (p->kw_sym[i] == name) return i; + } + if (name == p->sym_alignof_alias) return KW_ALIGNOF; + return KW_NONE; +} + +static inline int is_kw(const Parser* p, const Tok* t, CKw k) { + if (t->kind != TOK_IDENT) return 0; + if (t->v.ident == p->kw_sym[k]) return 1; + if (k == KW_ALIGNOF && t->v.ident == p->sym_alignof_alias) return 1; + return 0; +} + +/* ============================================================ + * Shared types (needed across multiple modules) + * ============================================================ */ + +typedef struct ParamInfo { + Sym name; + const Type* type; + SrcLoc loc; +} ParamInfo; + +/* ============================================================ + * Declarator suffix types (defined in parse_type.c, shared here) + * ============================================================ */ + +typedef enum DSuffKind { DS_ARRAY, DS_FUNC } DSuffKind; +typedef struct DeclSuffix { + u8 kind; /* DSuffKind */ + u32 count; /* element count; meaningful when !vla and !incomplete */ + u8 incomplete; /* true for `[]` (no size given) */ + u8 vla; /* true for `[expr]` with a non-constant size */ + FrameSlot vla_count_slot; + ParamInfo* params; + u16 nparams; + u8 variadic; +} DeclSuffix; + +/* ============================================================ + * Cross-module forward declarations + * ============================================================ */ + +/* parse_type.c */ + +int parse_decl_specs(Parser* p, DeclSpecs* out); +const Type* parse_struct_or_union(Parser* p, TypeKind kind, Attr** anon_attrs_out); +const Type* parse_enum(Parser* p, Attr** anon_attrs_out); +const Type* resolve_type_specs(Parser* p, const TypeSpecAccum* a, SrcLoc loc); +const Type* parse_type_name(Parser* p); +const Type* parse_pointer_layer(Parser* p, const Type* base); +const Type* parse_declarator_full(Parser* p, const Type* base, int allow_abstract, + Sym* name_out, SrcLoc* loc_out); +const Type* parse_declarator_full_ex(Parser* p, const Type* base, int allow_abstract, + Sym* name_out, SrcLoc* loc_out, Attr** attrs_out); +const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out, SrcLoc* loc_out); +const Type* complete_incomplete_array(Parser* p, const Type* ty); +int starts_type_name(const Parser* p, const Tok* t); +int starts_attr(const Parser* p); +Attr* parse_attribute_spec_list(Parser* p); +void parse_and_discard_attributes(Parser* p); +int find_field(TargetABI* abi, const Type* rec, Sym name, + const Type** out_type, u32* out_offset, const Field** out_field); +u32 attrs_pick_aligned(const Attr* a); +void attr_list_append(Attr** head, Attr* add); +void parse_attrs_into(Parser* p, Attr** sink); +int parse_decl_suffix(Parser* p, DeclSuffix* out); +const Type* apply_decl_suffix(Parser* p, const Type* base, const DeclSuffix* s); + +/* parse_expr.c */ +void parse_expr(Parser* p); +void parse_assign_expr(Parser* p); +void parse_cond_expr(Parser* p); +void parse_unary(Parser* p); +i64 eval_const_int(Parser* p, SrcLoc loc); +i64 parse_int_literal(Parser* p, const Tok* t); +i64 decode_char_literal(Parser* p, const Tok* t); +u8* decode_string_literal(Parser* p, const Tok* t, size_t* nlen_out); +void to_rvalue(Parser* p); +void coerce_top_to_lvalue(Parser* p); +CfreeCgSym emit_string_to_rodata(Parser* p, const u8* bytes, size_t n); + +/* parse_init.c */ +void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, const Type* ty); +void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset, const Type* ty); +void define_static_object(Parser* p, ObjSymId sym, ObjSecId section_id, + const Type* var_ty, u16 quals, int has_init, + SrcLoc loc, u32 align_override); +void srl_push(Parser* p, u32 offset, u32 size, ObjSymId target, i64 addend); +void encode_int_le(u8* dst, u32 size, i64 v); +void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty, + u32 offset, const Type* elem_ty); +void emit_struct_copy_into_slot(Parser* p, FrameSlot dst_slot, + const Type* dst_arr_ty, u32 dst_off, + const Type* ty); +int is_char_kind(const Type* ty); + +/* parse_stmt.c */ +void parse_stmt(Parser* p); +void parse_compound_stmt(Parser* p); +void parse_static_assert(Parser* p); + +/* parse.c (residual — TU driver) */ +void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, + u8* variadic_out); +void parse_local_decl(Parser* p, const DeclSpecs* specs); +FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc); +FrameSlot make_local_aligned(Parser* p, Sym name, const Type* type, SrcLoc loc, + u32 align_override); +Sym mint_static_local_sym(Parser* p, Sym orig); +void record_braced_block(Parser* p); +void replay_rewind(Parser* p); +u32 count_recorded_top_level_items(const Tok* vec, u32 len); diff --git a/src/parse/parse_stmt.c b/lang/c/parse/parse_stmt.c diff --git a/src/parse/parse_type.c b/lang/c/parse/parse_type.c diff --git a/src/pp/pp.c b/lang/c/pp/pp.c diff --git a/src/pp/pp.h b/lang/c/pp/pp.h diff --git a/src/pp/pp_directive.c b/lang/c/pp/pp_directive.c diff --git a/src/pp/pp_expand.c b/lang/c/pp/pp_expand.c diff --git a/src/pp/pp_priv.h b/lang/c/pp/pp_priv.h diff --git a/lang/c/type/type.c b/lang/c/type/type.c @@ -0,0 +1,448 @@ +/* C type construction. + * + * Types are interned per-Pool: a single `type_void(pool)` returns the same + * Type* on every call against the same pool, and structurally-equal calls + * to type_prim/type_ptr/type_func collapse to the same Type*. The cache is + * a small open structure stored through Pool.type_cache (opaque to other + * consumers). + * + * Storage: every Type and every supporting array (TY_FUNC param vectors, + * TY_STRUCT field arrays) is allocated from the Pool's arena, so pointers + * are stable for the Pool's lifetime. + * + * v1 covers what the cg test harness drives: + * void / scalars / pointer / function / struct / union + * Other constructors (array, qualified, enum) and predicates have minimal + * implementations sufficient for the cg test surface; they will grow with + * the parser. */ + +#include "type/type.h" + +#include <stdint.h> +#include <string.h> + +#include "core/arena.h" +#include "core/pool.h" + +#define NUM_PRIM_KINDS ((unsigned)TY_LDOUBLE + 1u) + +typedef struct TypeListNode TypeListNode; +struct TypeListNode { + TypeListNode* next; + Type ty; +}; + +typedef struct PoolTypeCache { + /* Direct slots for void + primitive kinds (TY_VOID..TY_LDOUBLE). */ + const Type* prim[NUM_PRIM_KINDS]; + /* Linked list of every other type allocated through this pool. */ + TypeListNode* derived; + /* Tag id allocator (1-based; TAG_NONE = 0). */ + u32 next_tag; +} PoolTypeCache; + +static PoolTypeCache* cache_get(Pool* p) { + PoolTypeCache* c = (PoolTypeCache*)p->type_cache; + if (c) return c; + c = arena_new(&p->arena, PoolTypeCache); + if (!c) return NULL; + memset(c, 0, sizeof *c); + c->next_tag = 1; + p->type_cache = c; + return c; +} + +static Type* alloc_type_node(Pool* p, PoolTypeCache* c) { + TypeListNode* n = arena_new(&p->arena, TypeListNode); + if (!n) return NULL; + memset(n, 0, sizeof *n); + n->next = c->derived; + c->derived = n; + return &n->ty; +} + +const Type* type_void(Pool* p) { return type_prim(p, TY_VOID); } + +const Type* type_prim(Pool* p, TypeKind kind) { + PoolTypeCache* c = cache_get(p); + if (!c) return NULL; + if ((unsigned)kind >= NUM_PRIM_KINDS) return NULL; + if (c->prim[kind]) return c->prim[kind]; + Type* t = alloc_type_node(p, c); + if (!t) return NULL; + t->kind = (u16)kind; + t->qual = 0; + c->prim[kind] = t; + return t; +} + +const Type* type_ptr(Pool* p, const Type* pointee) { + PoolTypeCache* c = cache_get(p); + if (!c) return NULL; + /* Linear search; small N in practice. */ + for (TypeListNode* n = c->derived; n; n = n->next) { + if (n->ty.kind == TY_PTR && n->ty.qual == 0 && + n->ty.ptr.pointee == pointee) { + return &n->ty; + } + } + Type* t = alloc_type_node(p, c); + if (!t) return NULL; + t->kind = TY_PTR; + t->qual = 0; + t->ptr.pointee = pointee; + return t; +} + +const Type* type_array(Pool* p, const Type* elem, u32 count, int incomplete) { + PoolTypeCache* c = cache_get(p); + if (!c) return NULL; + for (TypeListNode* n = c->derived; n; n = n->next) { + if (n->ty.kind == TY_ARRAY && n->ty.qual == 0 && n->ty.arr.elem == elem && + n->ty.arr.count == count && + n->ty.arr.incomplete == (u8)(incomplete ? 1 : 0)) { + return &n->ty; + } + } + Type* t = alloc_type_node(p, c); + if (!t) return NULL; + t->kind = TY_ARRAY; + t->qual = 0; + t->arr.elem = elem; + t->arr.count = count; + t->arr.incomplete = (u8)(incomplete ? 1 : 0); + return t; +} + +static int param_arrays_eq(const Type* const* a, const Type* const* b, u16 n) { + for (u16 i = 0; i < n; ++i) + if (a[i] != b[i]) return 0; + return 1; +} + +const Type* type_func(Pool* p, const Type* ret, const Type** params, u16 n, + int variadic) { + PoolTypeCache* c = cache_get(p); + if (!c) return NULL; + for (TypeListNode* nd = c->derived; nd; nd = nd->next) { + if (nd->ty.kind == TY_FUNC && nd->ty.qual == 0 && nd->ty.fn.ret == ret && + nd->ty.fn.nparams == n && + nd->ty.fn.variadic == (u8)(variadic ? 1 : 0) && + param_arrays_eq(nd->ty.fn.params, params, n)) { + return &nd->ty; + } + } + Type* t = alloc_type_node(p, c); + if (!t) return NULL; + t->kind = TY_FUNC; + t->qual = 0; + t->fn.ret = ret; + t->fn.nparams = n; + t->fn.variadic = (u8)(variadic ? 1 : 0); + if (n) { + const Type** dst = arena_array(&p->arena, const Type*, n); + if (!dst) return NULL; + for (u16 i = 0; i < n; ++i) dst[i] = params[i]; + t->fn.params = dst; + } else { + t->fn.params = NULL; + } + return t; +} + +const Type* type_qualified(Pool* p, const Type* base, u16 qual) { + if (!base || qual == 0) return base; + PoolTypeCache* c = cache_get(p); + if (!c) return NULL; + for (TypeListNode* n = c->derived; n; n = n->next) { + if (n->ty.kind == base->kind && n->ty.qual == qual) { + /* Compare body bytes other than qual. Cheap: types are POD. */ + Type tmpl = *base; + tmpl.qual = qual; + if (memcmp(&n->ty, &tmpl, sizeof(Type)) == 0) return &n->ty; + } + } + Type* t = alloc_type_node(p, c); + if (!t) return NULL; + *t = *base; + t->qual = qual; + return t; +} + +/* ---- aggregates ---- */ + +struct TypeRecordBuilder { + Pool* pool; + TypeKind kind; /* TY_STRUCT or TY_UNION */ + TagId tag_id; + Sym tag; + Field* fields; + u32 nfields; + u32 cap; + TypeRecordOpts opts; +}; + +TagId type_tag_new(Pool* p, TagDeclKind kind, Sym spelling, SrcLoc loc) { + PoolTypeCache* c = cache_get(p); + if (!c) return TAG_NONE; + (void)kind; + (void)spelling; + (void)loc; + return (TagId)(c->next_tag++); +} + +const TagDecl* type_tag_get(Pool* p, TagId id) { + (void)p; + (void)id; + /* TagDecl table is parser-territory; not modeled in v1. */ + return NULL; +} + +TypeRecordBuilder* type_record_begin(Pool* p, TypeKind kind, TagId tag_id, + Sym tag) { + TypeRecordOpts opts; + memset(&opts, 0, sizeof opts); + return type_record_begin_ex(p, kind, tag_id, tag, opts); +} + +TypeRecordBuilder* type_record_begin_ex(Pool* p, TypeKind kind, TagId tag_id, + Sym tag, TypeRecordOpts opts) { + TypeRecordBuilder* b = arena_new(&p->arena, TypeRecordBuilder); + if (!b) return NULL; + memset(b, 0, sizeof *b); + b->pool = p; + b->kind = kind; + b->tag_id = tag_id; + b->tag = tag; + b->opts = opts; + return b; +} + +void type_record_field(TypeRecordBuilder* b, Field f) { + if (b->nfields == b->cap) { + u32 nc = b->cap ? b->cap * 2 : 4; + Field* nf = arena_array(&b->pool->arena, Field, nc); + if (!nf) return; + if (b->fields) memcpy(nf, b->fields, sizeof(Field) * b->nfields); + b->fields = nf; + b->cap = nc; + } + b->fields[b->nfields++] = f; +} + +const Type* type_record_end(Pool* p, TypeRecordBuilder* b) { + PoolTypeCache* c = cache_get(p); + if (!c) return NULL; + Type* t = alloc_type_node(p, c); + if (!t) return NULL; + t->kind = (u16)b->kind; + t->qual = 0; + t->rec.tag_id = b->tag_id; + t->rec.tag = b->tag; + t->rec.fields = b->fields; + t->rec.nfields = (u16)b->nfields; + t->rec.incomplete = 0; + t->rec.packed = b->opts.packed; + t->rec.align_override = b->opts.align_override; + return t; +} + +Type* type_record_forward(Pool* p, TypeKind kind, TagId tag_id, Sym tag) { + PoolTypeCache* c = cache_get(p); + if (!c) return NULL; + Type* t = alloc_type_node(p, c); + if (!t) return NULL; + t->kind = (u16)kind; + t->qual = 0; + t->rec.tag_id = tag_id; + t->rec.tag = tag; + t->rec.fields = NULL; + t->rec.nfields = 0; + t->rec.incomplete = 1; + t->rec.packed = 0; + t->rec.align_override = 0; + return t; +} + +void type_record_install(Type* forward, const Field* fields, u16 nfields) { + if (!forward) return; + forward->rec.fields = fields; + forward->rec.nfields = nfields; + forward->rec.incomplete = 0; +} + +const Type* type_enum(Pool* p, TagId tag_id, Sym tag, const Type* base) { + PoolTypeCache* c = cache_get(p); + if (!c) return NULL; + Type* t = alloc_type_node(p, c); + if (!t) return NULL; + t->kind = TY_ENUM; + t->qual = 0; + t->enm.tag_id = tag_id; + t->enm.tag = tag; + t->enm.base = base; + return t; +} + +/* ---- predicates / utilities ---- */ + +const Type* type_unqual(Pool* p, const Type* t) { + if (!t || t->qual == 0) return t; + return type_qualified(p, t, 0); +} + +const Type* type_promoted(Pool* p, const Type* t) { + if (!t) return t; + switch (t->kind) { + case TY_BOOL: + case TY_CHAR: + case TY_SCHAR: + case TY_UCHAR: + case TY_SHORT: + case TY_USHORT: + return type_prim(p, TY_INT); + default: + return t; + } +} + +int type_compatible(const Type* a, const Type* b) { + if (a == b) return 1; + if (!a || !b) return 0; + if (a->kind != b->kind) return 0; + /* Strict structural compatibility past identity is parser territory; v1 + * relies on interning for the common cases. */ + return 0; +} + +int type_is_int(const Type* t) { + if (!t) return 0; + switch (t->kind) { + case TY_BOOL: + case TY_CHAR: + case TY_SCHAR: + case TY_UCHAR: + case TY_SHORT: + case TY_USHORT: + case TY_INT: + case TY_UINT: + case TY_LONG: + case TY_ULONG: + case TY_LLONG: + case TY_ULLONG: + case TY_INT128: + case TY_UINT128: + case TY_ENUM: + return 1; + default: + return 0; + } +} + +int type_is_arith(const Type* t) { + if (!t) return 0; + if (type_is_int(t)) return 1; + return t->kind == TY_FLOAT || t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE; +} + +int type_is_ptr(const Type* t) { return t && t->kind == TY_PTR; } + +static CfreeCgTypeId type_cg_builtin(CfreeCompiler* c, TypeKind kind) { + CfreeCgBuiltinTypes b = cfree_cg_builtin_types(c); + switch (kind) { + case TY_VOID: return b.id[CFREE_CG_BUILTIN_VOID]; + case TY_BOOL: return b.id[CFREE_CG_BUILTIN_BOOL]; + case TY_CHAR: + case TY_SCHAR: + case TY_UCHAR: + return b.id[CFREE_CG_BUILTIN_I8]; + case TY_SHORT: + case TY_USHORT: + return b.id[CFREE_CG_BUILTIN_I16]; + case TY_INT: + case TY_UINT: + return b.id[CFREE_CG_BUILTIN_I32]; + case TY_LONG: + case TY_ULONG: + case TY_LLONG: + case TY_ULLONG: + return b.id[CFREE_CG_BUILTIN_I64]; + case TY_INT128: + case TY_UINT128: + return b.id[CFREE_CG_BUILTIN_I128]; + case TY_FLOAT: return b.id[CFREE_CG_BUILTIN_F32]; + case TY_DOUBLE: + case TY_LDOUBLE: + return b.id[CFREE_CG_BUILTIN_F64]; + default: + break; + } + return CFREE_CG_TYPE_NONE; +} + +static CfreeCgTypeId type_cg_id_walk(CfreeCompiler* c, const Type* t, + const Type* pending_record) { + CfreeCgTypeId id; + if (!c || !t) return CFREE_CG_TYPE_NONE; + id = type_cg_builtin(c, (TypeKind)t->kind); + if (id != CFREE_CG_TYPE_NONE && !t->qual) return id; + switch ((TypeKind)t->kind) { + case TY_PTR: { + const Type* pointee = t->ptr.pointee; + if (pointee == pending_record) { + pointee = type_void(c->global); + } + return cfree_cg_type_ptr(c, type_cg_id_walk(c, pointee, pending_record), + 0); + } + case TY_ARRAY: + return cfree_cg_type_array(c, type_cg_id_walk(c, t->arr.elem, + pending_record), + t->arr.count); + case TY_FUNC: { + CfreeCgParam* params = NULL; + CfreeCgFuncSig sig; + memset(&sig, 0, sizeof sig); + sig.ret = type_cg_id_walk(c, t->fn.ret, pending_record); + sig.nparams = t->fn.nparams; + sig.abi_variadic = t->fn.variadic; + sig.call_conv = CFREE_CG_CC_TARGET_C; + if (t->fn.nparams) { + params = arena_zarray(c->tu, CfreeCgParam, t->fn.nparams); + for (u32 i = 0; i < t->fn.nparams; ++i) { + params[i].type = + type_cg_id_walk(c, t->fn.params[i], pending_record); + } + } + sig.params = params; + return cfree_cg_type_func(c, sig); + } + case TY_STRUCT: + case TY_UNION: { + CfreeCgField* fields = NULL; + if (t->rec.nfields) { + fields = arena_zarray(c->tu, CfreeCgField, t->rec.nfields); + for (u32 i = 0; i < t->rec.nfields; ++i) { + fields[i].name = t->rec.fields[i].name; + fields[i].type = type_cg_id_walk(c, t->rec.fields[i].type, t); + fields[i].align_override = t->rec.fields[i].align_override; + } + } + return cfree_cg_type_record(c, t->rec.tag, fields, t->rec.nfields); + } + case TY_ENUM: + return cfree_cg_type_enum(c, t->enm.tag, + type_cg_id_walk(c, t->enm.base, + pending_record), + NULL, 0); + default: + if (id != CFREE_CG_TYPE_NONE) { + return cfree_cg_type_alias(c, 0, id); + } + return CFREE_CG_TYPE_NONE; + } +} + +CfreeCgTypeId type_cg_id(CfreeCompiler* c, const Type* t) { + return type_cg_id_walk(c, t, NULL); +} diff --git a/lang/c/type/type.h b/lang/c/type/type.h @@ -0,0 +1,173 @@ +#ifndef CFREE_TYPE_H +#define CFREE_TYPE_H + +#include <cfree/cg.h> + +#include "core/core.h" +#include "core/pool.h" + +typedef enum TypeKind { + TY_VOID, + TY_BOOL, + TY_CHAR, + TY_SCHAR, + TY_UCHAR, + TY_SHORT, + TY_USHORT, + TY_INT, + TY_UINT, + TY_LONG, + TY_ULONG, + TY_LLONG, + TY_ULLONG, + TY_INT128, + TY_UINT128, + TY_FLOAT, + TY_DOUBLE, + TY_LDOUBLE, + TY_PTR, + TY_ARRAY, + TY_FUNC, + TY_STRUCT, + TY_UNION, + TY_ENUM, +} TypeKind; + +/* C tag identity is scoped declaration identity, not the spelling. `Sym tag` + * remains the diagnostic/debug name; TagId prevents two scoped `struct S` + * declarations from collapsing under global Type interning. */ +typedef u32 TagId; +#define TAG_NONE 0u + +typedef enum TagDeclKind { + TAG_STRUCT, + TAG_UNION, + TAG_ENUM, +} TagDeclKind; + +typedef struct TagDecl { + TagId id; + Sym spelling; + SrcLoc loc; + u8 kind; /* TagDeclKind */ + u8 complete; + u16 pad; +} TagDecl; + +typedef enum TypeQual { + Q_CONST = 1u << 0, + Q_VOLATILE = 1u << 1, + Q_RESTRICT = 1u << 2, + Q_ATOMIC = 1u << 3, +} TypeQual; + +typedef enum FieldFlag { + FIELD_NONE = 0, + FIELD_BITFIELD = 1u << 0, + FIELD_ZERO_WIDTH = 1u << 1, + FIELD_ANON = 1u << 2, + FIELD_FLEXIBLE_ARRAY = 1u << 3, +} FieldFlag; + +typedef struct Field { + Sym name; + const Type* type; + u16 bitfield_width; /* valid when FIELD_BITFIELD is set; may be 0 */ + u16 flags; /* FieldFlag */ + /* Phase 2 attribute carriers — populated by the parser when the member + * carries __attribute__((aligned(N))) / ((packed)). Zero means "no + * override"; abi_record_layout interprets them. */ + u16 align_override; + u8 packed; + u8 pad; +} Field; + +struct Type { + u16 kind; + u16 qual; + union { + struct { + const Type* pointee; + } ptr; + struct { + const Type* elem; + u32 count; + u8 incomplete; + } arr; + struct { + const Type* ret; + const Type** params; + u16 nparams; + u8 variadic; + } fn; + struct { + TagId tag_id; + Sym tag; + const Field* fields; + u16 nfields; + u8 incomplete; + /* Phase 2 attribute carriers — record-level + * __attribute__((packed)) / ((aligned(N))). Both zero means + * "natural layout". abi_record_layout honors them. */ + u8 packed; + u16 align_override; + } rec; /* struct / union */ + struct { + TagId tag_id; + Sym tag; + const Type* base; + } enm; + }; +}; + +const Type* type_void(Pool*); +const Type* type_prim(Pool*, TypeKind); +const Type* type_ptr(Pool*, const Type*); +const Type* type_array(Pool*, const Type* elem, u32 count, int incomplete); +const Type* type_func(Pool*, const Type* ret, const Type** params, u16 n, + int variadic); +const Type* type_qualified(Pool*, const Type*, u16 qual); + +/* Aggregate construction is mutable only through TypeRecordBuilder. The + * committed Type is immutable and interned; field offsets, record + * size/alignment, and bitfield storage are target ABI facts. */ +typedef struct TypeRecordBuilder TypeRecordBuilder; +TagId type_tag_new(Pool*, TagDeclKind, Sym spelling, SrcLoc); +const TagDecl* type_tag_get(Pool*, TagId); +TypeRecordBuilder* type_record_begin(Pool*, TypeKind kind, TagId, + Sym tag); /* TY_STRUCT or TY_UNION */ + +/* Phase 2 record options carried from __attribute__((packed))/aligned(N)). + * Zero-initialized = natural layout. Fields kept as a struct so future + * options (e.g. transparent_union) don't churn the call sites. */ +typedef struct TypeRecordOpts { + u8 packed; + u16 align_override; +} TypeRecordOpts; + +/* Variant of type_record_begin that records record-level attribute + * options on the builder; type_record_end copies them to Type.rec. The + * plain type_record_begin is equivalent to passing a zeroed + * TypeRecordOpts. */ +TypeRecordBuilder* type_record_begin_ex(Pool*, TypeKind kind, TagId, + Sym tag, TypeRecordOpts); +void type_record_field(TypeRecordBuilder*, Field); +const Type* type_record_end(Pool*, TypeRecordBuilder*); +/* Forward-declared struct/union: returns a mutable, incomplete Type with the + * given tag identity but no fields. Pointers to it are valid; sizeof/member + * access are not until type_record_install is called. The same Type* identity + * survives completion, so any TY_PTR(forward) pointer types remain valid. */ +Type* type_record_forward(Pool*, TypeKind kind, TagId, Sym tag); +void type_record_install(Type* forward, const Field* fields, u16 nfields); +const Type* type_enum(Pool*, TagId, Sym tag, const Type* base); + +const Type* type_unqual(Pool*, const Type*); +const Type* type_promoted(Pool*, const Type*); +int type_compatible(const Type*, const Type*); +int type_is_arith(const Type*); +int type_is_int(const Type*); +int type_is_ptr(const Type*); + +CfreeCgTypeId type_cg_id(CfreeCompiler*, const Type*); + +#endif diff --git a/src/api/cg.c b/src/api/cg.c @@ -426,6 +426,89 @@ CfreeCgTypeId cfree_cg_type_func(CfreeCompiler* c, CfreeCgFuncSig sig) { return e->type ? id : CFREE_CG_TYPE_NONE; } +CfreeCgTypeId cg_api_type_import(Compiler* c, const Type* ty) { + CgApiState* s; + CfreeCgTypeId id; + CgApiType* e; + if (!c || !ty) return CFREE_CG_TYPE_NONE; + if (!ty->qual) { + switch (ty->kind) { + case TY_VOID: + return builtin_id(CFREE_CG_BUILTIN_VOID); + case TY_BOOL: + return builtin_id(CFREE_CG_BUILTIN_BOOL); + case TY_CHAR: + case TY_SCHAR: + case TY_UCHAR: + return builtin_id(CFREE_CG_BUILTIN_I8); + case TY_SHORT: + case TY_USHORT: + return builtin_id(CFREE_CG_BUILTIN_I16); + case TY_INT: + case TY_UINT: + return builtin_id(CFREE_CG_BUILTIN_I32); + case TY_LONG: + case TY_ULONG: + case TY_LLONG: + case TY_ULLONG: + return builtin_id(CFREE_CG_BUILTIN_I64); + case TY_INT128: + case TY_UINT128: + return builtin_id(CFREE_CG_BUILTIN_I128); + case TY_FLOAT: + return builtin_id(CFREE_CG_BUILTIN_F32); + case TY_DOUBLE: + return builtin_id(CFREE_CG_BUILTIN_F64); + default: + break; + } + } + + s = cg_api_get(c); + if (!s) return CFREE_CG_TYPE_NONE; + for (u32 i = 0, n = CgApiTypes_count(&s->types); i < n; ++i) { + e = CgApiTypes_at(&s->types, i); + if (e && e->type == ty) return type_id_for_user_index(i); + } + + e = type_alloc(c, &id); + if (!e) return CFREE_CG_TYPE_NONE; + e->type = ty; + switch (ty->kind) { + case TY_PTR: + e->base = cg_api_type_import(c, ty->ptr.pointee); + e->kind = CG_API_TYPE_PTR; + break; + case TY_ARRAY: + e->base = cg_api_type_import(c, ty->arr.elem); + e->array_count = ty->arr.count; + e->kind = CG_API_TYPE_ARRAY; + break; + case TY_FUNC: + e->base = cg_api_type_import(c, ty->fn.ret); + e->count = ty->fn.nparams; + e->abi_variadic = ty->fn.variadic; + e->call_conv = CFREE_CG_CC_TARGET_C; + e->kind = CG_API_TYPE_FUNC; + break; + case TY_STRUCT: + case TY_UNION: + e->name = ty->rec.tag; + e->count = ty->rec.nfields; + e->kind = CG_API_TYPE_RECORD; + break; + case TY_ENUM: + e->name = ty->enm.tag; + e->base = cg_api_type_import(c, ty->enm.base); + e->kind = CG_API_TYPE_ENUM; + break; + default: + e->kind = CG_API_TYPE_ALIAS; + break; + } + return id; +} + const Type* cg_api_type_resolve(Compiler* c, CfreeCgTypeId id) { return resolve_type(c, id); } @@ -1452,6 +1535,81 @@ void cfree_cg_free(CfreeCg* g) { h->free(h, g, sizeof *g); } +Compiler* cfree_cg_internal_compiler(CfreeCg* g) { + return g ? g->c : NULL; +} + +CGTarget* cfree_cg_internal_target(CfreeCg* g) { + return g ? g->target : NULL; +} + +MCEmitter* cfree_cg_internal_mc(CfreeCg* g) { + return g ? g->mc : NULL; +} + +const Type* cfree_cg_internal_top_type(CfreeCg* g) { + if (!g || g->sp == 0) return NULL; + return api_sv_type(&g->stack[g->sp - 1u]); +} + +const Type* cfree_cg_internal_top2_type(CfreeCg* g) { + if (!g || g->sp < 2) return NULL; + return api_sv_type(&g->stack[g->sp - 2u]); +} + +void cfree_cg_internal_retag_top(CfreeCg* g, const Type* ty) { + if (!g || g->sp == 0) return; + g->stack[g->sp - 1u].type = ty; + g->stack[g->sp - 1u].op.type = ty; +} + +void cfree_cg_internal_push_local_typed(CfreeCg* g, CfreeCgSlot slot, + const Type* ty) { + if (!g) return; + api_remember_slot_type(g, (FrameSlot)slot, ty); + cfree_cg_push_local(g, slot); +} + +void cfree_cg_internal_bind_sym(CfreeCg* g, ObjSymId sym, const Type* ty, + CfreeCgDeclKind kind) { + CfreeCgDecl decl; + const ObjSym* os; + if (!g || sym == OBJ_SYM_NONE || !ty) return; + memset(&decl, 0, sizeof decl); + os = obj_symbol_get(g->obj, sym); + decl.kind = kind; + if (os) { + decl.linkage_name = os->name; + decl.sym.bind = os->bind == SB_LOCAL ? CFREE_SB_LOCAL + : os->bind == SB_WEAK ? CFREE_SB_WEAK + : CFREE_SB_GLOBAL; + decl.sym.visibility = + (os->vis == SV_HIDDEN || os->vis == SV_INTERNAL) + ? CFREE_CG_VIS_HIDDEN + : (os->vis == SV_PROTECTED ? CFREE_CG_VIS_PROTECTED + : CFREE_CG_VIS_DEFAULT); + } + api_remember_sym(g, sym, ty, decl); +} + +void cfree_cg_internal_param_slot_existing(CfreeCg* g, uint32_t index, + CfreeCgSlot slot, + const Type* ty, CfreeSym name) { + CGParamDesc pd; + if (!g || !ty || slot == CFREE_CG_SLOT_NONE) return; + api_remember_slot_type(g, (FrameSlot)slot, ty); + memset(&pd, 0, sizeof pd); + pd.index = index; + pd.name = (Sym)name; + pd.type = ty; + pd.slot = (FrameSlot)slot; + if (g->fn_abi && index < g->fn_abi->nparams) { + pd.abi = &g->fn_abi->params[index]; + } + pd.loc = g->cur_loc; + g->target->param(g->target, &pd); +} + /* ============================================================ * Source location * ============================================================ */ @@ -2042,6 +2200,13 @@ static void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, api_push(g, v); return; } + if (ck == CV_BITCAST && abi_sizeof(g->c->abi, sty) == abi_sizeof(g->c->abi, dty) && + api_type_class(sty) == api_type_class(dty)) { + v.type = dty; + v.op.type = dty; + api_push(g, v); + return; + } src = api_force_reg(g, &v, sty); rr = api_alloc_reg_or_spill(g, api_type_class(dty), dty); @@ -3134,8 +3299,8 @@ void cfree_cg_continue_false(CfreeCg* g, CfreeCgScope scope) { * Dynamic stack allocation / variadics (stubs) * ============================================================ */ -void cfree_cg_alloca(CfreeCg* g, CfreeCgTypeId result_ptr_type, - uint32_t align) { +void cfree_cg_alloca(CfreeCg* g, uint32_t align, + CfreeCgTypeId result_ptr_type) { ApiSValue sz; CGTarget* T; const Type* pty; @@ -3750,11 +3915,25 @@ void cfree_cg_data_begin(CfreeCg* g, CfreeCgSym cg_sym, if (!ty) return; decl_attrs = api_sym_attrs(g, cg_sym); align = attrs.align ? attrs.align : (u32)abi_alignof(c->abi, ty); - if (attrs.section) { - sec_name_sym = (Sym)attrs.section; - sec_kind = SEC_OTHER; + if (!attrs.section && decl_attrs.as.object.section) { + attrs.section = decl_attrs.as.object.section; + } + if (attrs.flags & CFREE_CG_DATADEF_ZERO_FILL) { + sec_kind = SEC_BSS; sec_flags = SF_ALLOC | SF_WRITE; - } else if (decl_attrs.as.object.flags & CFREE_CG_OBJ_READONLY) { + sec_name_sym = attrs.section ? (Sym)attrs.section + : pool_intern_cstr(c->global, ".bss"); + } else if (attrs.section) { + sec_name_sym = (Sym)attrs.section; + if (attrs.flags & CFREE_CG_DATADEF_READONLY) { + sec_kind = SEC_RODATA; + sec_flags = SF_ALLOC; + } else { + sec_kind = SEC_OTHER; + sec_flags = SF_ALLOC | SF_WRITE; + } + } else if ((attrs.flags & CFREE_CG_DATADEF_READONLY) || + (decl_attrs.as.object.flags & CFREE_CG_OBJ_READONLY)) { sec_kind = SEC_RODATA; sec_flags = SF_ALLOC; sec_name_sym = pool_intern_cstr(c->global, ".rodata"); @@ -3770,7 +3949,12 @@ void cfree_cg_data_begin(CfreeCg* g, CfreeCgSym cg_sym, if (attrs.flags & CFREE_CG_DATADEF_RETAIN) sec_flags |= SF_RETAIN; if (attrs.flags & CFREE_CG_DATADEF_MERGE) sec_flags |= SF_MERGE; if (attrs.flags & CFREE_CG_DATADEF_STRINGS) sec_flags |= SF_STRINGS; - sec = obj_section(ob, sec_name_sym, sec_kind, sec_flags, align); + if (attrs.flags & CFREE_CG_DATADEF_ZERO_FILL) { + sec = obj_section_ex(ob, sec_name_sym, sec_kind, SSEM_NOBITS, sec_flags, + align, 0, OBJ_SEC_NONE, 0); + } else { + sec = obj_section(ob, sec_name_sym, sec_kind, sec_flags, align); + } g->data_sec = sec; g->data_sym = sym; g->data_base = obj_align_to(ob, sec, align); @@ -3877,7 +4061,15 @@ void cfree_cg_data_bytes(CfreeCg* g, const uint8_t* data, size_t len) { } void cfree_cg_data_zero(CfreeCg* g, uint64_t size) { + const Section* sec; if (!g || !size) return; + sec = obj_section_get(g->obj, g->data_sec); + if (sec && (sec->kind == SEC_BSS || sec->sem == SSEM_NOBITS)) { + obj_reserve_bss(g->obj, g->data_sec, g->data_base + (u32)(g->data_size + size), + 0); + g->data_size += size; + return; + } { u8 pad[64]; memset(pad, 0, sizeof pad); diff --git a/src/api/cg_api.h b/src/api/cg_api.h @@ -6,6 +6,10 @@ #include "core/core.h" #include "type/type.h" +typedef struct CGTarget CGTarget; +typedef struct MCEmitter MCEmitter; +typedef uint32_t ObjSymId; + enum { CG_API_TYPE_SEG_SHIFT = 6, CG_API_TYPE_SEG_SIZE = 1u << CG_API_TYPE_SEG_SHIFT, @@ -15,6 +19,19 @@ enum { }; const Type* cg_api_type_resolve(Compiler*, CfreeCgTypeId); +CfreeCgTypeId cg_api_type_import(Compiler*, const Type*); +Compiler* cfree_cg_internal_compiler(CfreeCg*); +CGTarget* cfree_cg_internal_target(CfreeCg*); +MCEmitter* cfree_cg_internal_mc(CfreeCg*); +const Type* cfree_cg_internal_top_type(CfreeCg*); +const Type* cfree_cg_internal_top2_type(CfreeCg*); +void cfree_cg_internal_retag_top(CfreeCg*, const Type*); +void cfree_cg_internal_push_local_typed(CfreeCg*, CfreeCgSlot, const Type*); +void cfree_cg_internal_bind_sym(CfreeCg*, ObjSymId, const Type*, + CfreeCgDeclKind); +void cfree_cg_internal_param_slot_existing(CfreeCg*, uint32_t index, + CfreeCgSlot slot, + const Type* type, CfreeSym name); void cg_api_fini(Compiler*); #endif diff --git a/src/api/pipeline.c b/src/api/pipeline.c @@ -6,19 +6,13 @@ #include <cfree.h> +#include "../../lang/c/c.h" #include "arch/arch.h" -#include "cg/cg.h" #include "core/arena.h" #include "core/heap.h" -#include "core/pool.h" -#include "debug/debug.h" -#include "decl/decl.h" -#include "lex/lex.h" #include "link/link.h" #include "obj/obj.h" -#include "opt/opt.h" #include "parse/parse.h" -#include "pp/pp.h" /* CfreeCompiler lifecycle (cfree_compiler_new / cfree_compiler_free) * lives in src/api/lifecycle.c so consumers that only need lifecycle @@ -46,24 +40,6 @@ static _Noreturn void panic_bad_options(Compiler* c, const char* msg) { compiler_panic(c, no_loc(), "bad cfree options: %s", msg); } -static void apply_pp_options(Pp* pp, const CfreePpOptions* opts) { - u32 i; - - for (i = 0; i < opts->ninclude_dirs; ++i) { - pp_add_include_dir(pp, opts->include_dirs[i], 0); - } - for (i = 0; i < opts->nsystem_include_dirs; ++i) { - pp_add_include_dir(pp, opts->system_include_dirs[i], 1); - } - for (i = 0; i < opts->ndefines; ++i) { - const char* body = opts->defines[i].body ? opts->defines[i].body : "1"; - pp_define(pp, opts->defines[i].name, body); - } - for (i = 0; i < opts->nundefines; ++i) { - pp_undef(pp, opts->undefines[i]); - } -} - /* ============================================================ * Preprocess one TU * ============================================================ */ @@ -71,8 +47,6 @@ static void apply_pp_options(Pp* pp, const CfreePpOptions* opts) { int cfree_preprocess(CfreeCompiler* c, const CfreePpOptions* pp_opts, const CfreeBytesInput* input, CfreeWriter* out) { PanicSave saved; - Pp* pp; - Lexer* lex; compiler_panic_save(c, &saved); if (setjmp(c->panic)) { @@ -88,12 +62,10 @@ int cfree_preprocess(CfreeCompiler* c, const CfreePpOptions* pp_opts, panic_bad_options(c, "input data is NULL but len > 0"); } - pp = pp_new(c); - lex = lex_open_mem(c, input->name, (const char*)input->data, input->len); - apply_pp_options(pp, pp_opts); - pp_push_input(pp, lex); /* PP owns the lexer from here on */ - pp_emit_text(pp, out); - pp_free(pp); + if (cfree_c_preprocess(c, pp_opts, input, out) != 0) { + compiler_panic(c, no_loc(), "C preprocessor failed for input: %s", + input->name); + } compiler_panic_restore(c, &saved); return 0; @@ -103,66 +75,9 @@ int cfree_preprocess(CfreeCompiler* c, const CfreePpOptions* pp_opts, * Dump tokens (lex-only) * ============================================================ */ -static void dt_write_str(Writer* w, const char* s) { - size_t n = 0; - while (s[n]) ++n; - w->write(w, s, n); -} - -static void dt_write_sym(Writer* w, Pool* p, Sym sym) { - size_t len = 0; - const char* s = sym ? pool_str(p, sym, &len) : NULL; - if (s && len) w->write(w, s, len); -} - -static void dt_emit(Writer* w, Pool* p, const Tok* t) { - switch (t->kind) { - case TOK_EOF: - dt_write_str(w, "(eof)\n"); - return; - case TOK_NEWLINE: - dt_write_str(w, "(newline)\n"); - return; - case TOK_PP_HASH: - dt_write_str(w, "(pp-hash)\n"); - return; - case TOK_PP_PASTE: - dt_write_str(w, "(pp-paste)\n"); - return; - case TOK_HEADER: - dt_write_str(w, "(header "); - break; - case TOK_IDENT: - dt_write_str(w, "(ident "); - break; - case TOK_NUM: - dt_write_str(w, "(num "); - break; - case TOK_FLT: - dt_write_str(w, "(flt "); - break; - case TOK_STR: - dt_write_str(w, "(str "); - break; - case TOK_CHR: - dt_write_str(w, "(chr "); - break; - case TOK_PUNCT: - dt_write_str(w, "(punct "); - break; - default: - dt_write_str(w, "(unknown "); - break; - } - dt_write_sym(w, p, t->spelling); - dt_write_str(w, ")\n"); -} - int cfree_dump_tokens(CfreeCompiler* c, const CfreeBytesInput* input, CfreeWriter* out) { PanicSave saved; - Lexer* lex; - Tok t; compiler_panic_save(c, &saved); if (setjmp(c->panic)) { @@ -178,13 +93,9 @@ int cfree_dump_tokens(CfreeCompiler* c, const CfreeBytesInput* input, panic_bad_options(c, "input data is NULL but len > 0"); } - lex = lex_open_mem(c, input->name, (const char*)input->data, input->len); - for (;;) { - t = lex_next(lex); - dt_emit(out, c->global, &t); - if (t.kind == TOK_EOF) break; + if (cfree_c_dump_tokens(c, input, out) != 0) { + compiler_panic(c, no_loc(), "C lexer failed for input: %s", input->name); } - lex_close(lex); compiler_panic_restore(c, &saved); return 0; @@ -216,6 +127,15 @@ static void compile_into(Compiler* c, const CfreeCompileOptions* opts, return; } + if (input->lang == CFREE_LANG_C) { + if (cfree_c_compile(c, opts, input, ob) != 0) { + compiler_panic(c, no_loc(), "C frontend failed for input: %s", + input->name); + } + obj_finalize(ob); + return; + } + lex = lex_open_mem(c, input->name, (const char*)input->data, input->len); mc = mc_new(c, ob); @@ -227,41 +147,6 @@ static void compile_into(Compiler* c, const CfreeCompileOptions* opts, /* The assembler owns the lexer it was handed; no pp_free release. */ return; } - - { - Pp* pp = pp_new(c); - DeclTable* decls = decl_new(c, ob); - CGTarget* target = cgtarget_new(c, ob, mc); - Debug* debug = NULL; - CG* cg = NULL; - - apply_pp_options(pp, &opts->pp); - pp_push_input(pp, lex); /* PP owns the lexer from here on */ - - if (opts->opt_level > 0) { - target = opt_cgtarget_new(c, target, opts->opt_level); - } - if (opts->debug_info) { - debug = debug_new(c, ob); - } - cg = cg_new(c, target, debug); - - parse_c(c, pp, decls, cg, debug); - cgtarget_finalize(target); - if (debug) { - debug_emit(debug); - } - obj_finalize(ob); - - cg_free(cg); - if (debug) { - debug_free(debug); - } - cgtarget_free(target); /* opt_cgtarget cascades to wrapped target */ - mc_free(mc); - decl_free(decls); - pp_free(pp); /* releases the pushed lexer */ - } } /* Suffix-based language inference. See header. */ diff --git a/src/api/stubs.c b/src/api/stubs.c @@ -15,13 +15,11 @@ #include <cfree.h> #include "arch/arch.h" -#include "cg/cg.h" #include "debug/debug.h" #include "decl/decl.h" #include "lex/lex.h" #include "link/link.h" #include "obj/obj.h" -#include "opt/opt.h" #include "parse/parse.h" #include "pp/pp.h" diff --git a/src/decl/decl.c b/src/decl/decl.c @@ -1,311 +0,0 @@ -/* DeclTable — C declaration semantics above ObjBuilder. - * - * Maps DeclId → Decl record. Allocates an ObjSymId for any non-typedef, - * non-auto/register decl with linkage; the parser may also pre-create a - * symbol (forward reference) and pass it in via Decl.obj_sym. - * - * Identifier *lookup* is not handled here — that lives on the parser's - * scope stack so block scopes and shadowing fall out naturally. DeclTable - * is just the C-language layer above ObjBuilder: storage class, linkage, - * static-locals, tentative defs, and global initializers. - * - * v1 surface is intentionally minimal: just enough for the spine corpus - * (functions; ints; static locals) plus the hooks DESIGN.md §5.3.1 - * commits to. Tentative-definition coalescing, COMDAT, and aliases are - * stubs at the API edge; their full semantics arrive with the multi-TU - * corpus. */ - -#include "decl/decl.h" - -#include <string.h> - -#include "core/arena.h" -#include "core/core.h" -#include "core/heap.h" -#include "core/pool.h" -#include "obj/obj.h" - -struct DeclTable { - Compiler* c; - ObjBuilder* ob; - Decl* slots; /* index 0 reserved as DECL_NONE */ - u32 nslots; - u32 cap; -}; - -#define DECL_INITIAL_CAP 16u - -static void decls_grow(DeclTable* t, u32 want) { - Heap* h = t->c->env->heap; - u32 cap = t->cap; - Decl* nb; - if (cap >= want) return; - while (cap < want) cap = cap ? cap * 2u : DECL_INITIAL_CAP; - nb = (Decl*)h->alloc(h, sizeof(Decl) * cap, _Alignof(Decl)); - if (t->slots) { - memcpy(nb, t->slots, sizeof(Decl) * t->nslots); - h->free(h, t->slots, sizeof(Decl) * t->cap); - } - t->slots = nb; - t->cap = cap; -} - -DeclTable* decl_new(Compiler* c, ObjBuilder* ob) { - Heap* h = c->env->heap; - DeclTable* t = - (DeclTable*)h->alloc(h, sizeof(DeclTable), _Alignof(DeclTable)); - memset(t, 0, sizeof *t); - t->c = c; - t->ob = ob; - decls_grow(t, 1); - memset(&t->slots[0], 0, sizeof(Decl)); - t->nslots = 1; - return t; -} - -void decl_free(DeclTable* t) { - Heap* h; - if (!t) return; - h = t->c->env->heap; - if (t->slots) h->free(h, t->slots, sizeof(Decl) * t->cap); - h->free(h, t, sizeof(*t)); -} - -ObjBuilder* decl_obj(DeclTable* t) { return t ? t->ob : NULL; } - -DeclId decl_declare(DeclTable* t, const Decl* in) { - DeclId id; - Decl* slot; - decls_grow(t, t->nslots + 1); - id = (DeclId)t->nslots++; - slot = &t->slots[id]; - *slot = *in; - slot->id = id; - /* Mint an ObjSymId for any decl that needs one and lacks one. The parser - * may pre-create a symbol for forward references (e.g. a function - * called before its definition) and bind it through `obj_sym` here. */ - if (slot->obj_sym == OBJ_SYM_NONE && slot->name && - slot->storage != DS_TYPEDEF && slot->storage != DS_AUTO && - slot->storage != DS_REGISTER) { - SymBind bind = (slot->linkage == DL_EXTERNAL) ? SB_GLOBAL : SB_LOCAL; - SymKind k = (slot->type && slot->type->kind == TY_FUNC) ? SK_FUNC : SK_OBJ; - if (slot->flags & DF_THREAD) k = SK_TLS; - if (slot->flags & DF_WEAK) { - if (slot->linkage != DL_EXTERNAL) - compiler_panic(t->c, slot->loc, - "weak attribute requires external linkage"); - bind = SB_WEAK; - } - Sym onwire = slot->name; - /* Mach-O C-symbol convention: every C identifier carries a leading - * `_` on disk (so `int main()` is exposed as `_main`). The cgtarget - * never sees the unmangled form again — emit_macho writes ObjSym - * names verbatim, and the linker's link_intern_c_name rewrites - * caller-supplied source-level names back to this on-disk form - * when matching entry symbols / JIT lookups. */ - if (t->c->target.obj == CFREE_OBJ_MACHO && - slot->linkage != DL_NONE) { - /* Apple's C-symbol convention is unconditional: every C - * identifier with linkage gets a leading `_` on disk, regardless - * of whether the source name already started with one - * (`extern int __init_array_start[];` becomes - * `___init_array_start`). The boundary_name helper in - * link_layout.c mirrors the same rule for linker-synthesized - * boundary symbols. */ - size_t nl; - const char* nm = pool_str(t->c->global, slot->name, &nl); - if (nm) { - Heap* h = t->c->env->heap; - char* buf = (char*)h->alloc(h, nl + 2u, 1); - if (buf) { - buf[0] = '_'; - if (nl) memcpy(buf + 1, nm, nl); - buf[nl + 1] = 0; - onwire = pool_intern(t->c->global, buf, (u32)(nl + 1u)); - h->free(h, buf, nl + 2u); - } - } - } - slot->obj_sym = obj_symbol_find(t->ob, onwire); - if (slot->obj_sym == OBJ_SYM_NONE) { - slot->obj_sym = obj_symbol_ex(t->ob, onwire, bind, - (SymVis)slot->visibility, k, - OBJ_SEC_NONE, 0, 0, 0); - } - } - return id; -} - -const Decl* decl_get(const DeclTable* t, DeclId id) { - if (!t || id == DECL_NONE || id >= t->nslots) return NULL; - return &t->slots[id]; -} - -ObjSymId decl_obj_sym(const DeclTable* t, DeclId id) { - const Decl* d = decl_get(t, id); - return d ? d->obj_sym : OBJ_SYM_NONE; -} - -void decl_define_function(DeclTable* t, DeclId id, ObjSecId text_section_id, - u64 value, u64 size) { - /* Backends call obj_symbol_define from func_end already; this hook is for - * callers that want explicit decl-side definition (e.g. asm-defined - * functions where no CGTarget func_end runs). */ - const Decl* d = decl_get(t, id); - ObjSecId sec_id; - if (!d || d->obj_sym == OBJ_SYM_NONE) return; - /* Caller's section wins when supplied; otherwise fall back to the Decl's - * attribute-driven section. */ - sec_id = (text_section_id != OBJ_SEC_NONE) ? text_section_id : d->section_id; - obj_symbol_define(t->ob, d->obj_sym, sec_id, value, size); - if ((d->flags & DF_USED) && sec_id != OBJ_SEC_NONE) { - const Section* s = obj_section_get(t->ob, sec_id); - if (s) obj_section_set_flags(t->ob, sec_id, (u16)(s->flags | SF_RETAIN)); - } -} - -void decl_define_object(DeclTable* t, DeclId id, u64 size, u32 align, - const InitItem* init, u32 ninit) { - const Decl* d = decl_get(t, id); - ObjSecId sec_id; - Sym sec_name; - int has_nonzero; - u32 i; - u32 base; - if (!d || d->obj_sym == OBJ_SYM_NONE) return; - has_nonzero = 0; - for (i = 0; i < ninit; ++i) { - if (init[i].kind != INIT_ZERO) { - has_nonzero = 1; - break; - } - } - if (d->section_id != OBJ_SEC_NONE) { - /* Attribute-pinned section: use it for both BSS-style (all-zero) and - * initialized layouts. The section's kind was set at creation time. - * Multiple decls can target the same attribute section, so place - * each one at the aligned tail rather than at offset 0. */ - sec_id = d->section_id; - if (!has_nonzero) { - base = obj_align_to(t->ob, sec_id, align ? align : 1u); - obj_reserve_bss(t->ob, sec_id, base + (u32)size, align ? align : 1u); - obj_symbol_define(t->ob, d->obj_sym, sec_id, base, size); - } else { - base = obj_align_to(t->ob, sec_id, align ? align : 1u); - obj_reserve(t->ob, sec_id, size); - obj_symbol_define(t->ob, d->obj_sym, sec_id, base, size); - for (i = 0; i < ninit; ++i) { - const InitItem* it = &init[i]; - switch (it->kind) { - case INIT_BYTES: - obj_patch(t->ob, sec_id, base + it->offset, it->v.bytes.bytes, - it->v.bytes.size); - break; - case INIT_FILL: { - u32 j; - u8 b = it->v.fill.byte; - for (j = 0; j < it->size; ++j) { - obj_patch(t->ob, sec_id, base + it->offset + j, &b, 1); - } - break; - } - case INIT_RELOC: - obj_reloc(t->ob, sec_id, base + it->offset, it->v.reloc.kind, - it->v.reloc.target, it->v.reloc.addend); - break; - case INIT_ZERO: - default: - break; - } - } - } - if (d->flags & DF_USED) { - const Section* s = obj_section_get(t->ob, sec_id); - if (s) obj_section_set_flags(t->ob, sec_id, (u16)(s->flags | SF_RETAIN)); - } - return; - } - if (!has_nonzero) { - sec_name = pool_intern_cstr(t->c->global, ".bss"); - sec_id = obj_section(t->ob, sec_name, SEC_BSS, SF_ALLOC | SF_WRITE, - align ? align : 1u); - base = obj_align_to(t->ob, sec_id, align ? align : 1u); - obj_reserve_bss(t->ob, sec_id, base + (u32)size, align ? align : 1u); - obj_symbol_define(t->ob, d->obj_sym, sec_id, base, size); - if (d->flags & DF_USED) { - const Section* s = obj_section_get(t->ob, sec_id); - if (s) obj_section_set_flags(t->ob, sec_id, (u16)(s->flags | SF_RETAIN)); - } - return; - } - sec_name = pool_intern_cstr(t->c->global, ".data"); - sec_id = obj_section(t->ob, sec_name, SEC_DATA, SF_ALLOC | SF_WRITE, - align ? align : 1u); - { - base = obj_align_to(t->ob, sec_id, align ? align : 1u); - obj_reserve(t->ob, sec_id, size); - obj_symbol_define(t->ob, d->obj_sym, sec_id, base, size); - for (i = 0; i < ninit; ++i) { - const InitItem* it = &init[i]; - switch (it->kind) { - case INIT_BYTES: - obj_patch(t->ob, sec_id, base + it->offset, it->v.bytes.bytes, - it->v.bytes.size); - break; - case INIT_FILL: { - u32 j; - u8 b = it->v.fill.byte; - for (j = 0; j < it->size; ++j) { - obj_patch(t->ob, sec_id, base + it->offset + j, &b, 1); - } - break; - } - case INIT_RELOC: - obj_reloc(t->ob, sec_id, base + it->offset, it->v.reloc.kind, - it->v.reloc.target, it->v.reloc.addend); - break; - case INIT_ZERO: - default: - break; - } - } - } - if (d->flags & DF_USED) { - const Section* s = obj_section_get(t->ob, sec_id); - if (s) obj_section_set_flags(t->ob, sec_id, (u16)(s->flags | SF_RETAIN)); - } -} - -void decl_define_tentative(DeclTable* t, DeclId id, u64 size, u32 align) { - Decl* slot; - if (id == DECL_NONE || id >= t->nslots) return; - slot = &t->slots[id]; - slot->flags |= DF_TENTATIVE; - decl_define_object(t, id, size, align, NULL, 0); -} - -void decl_define_alias(DeclTable* t, DeclId self, DeclId target) { - const Decl* sd = decl_get(t, self); - const Decl* td = decl_get(t, target); - ObjSymId tsym; - const ObjSym* ts; - if (!sd || sd->obj_sym == OBJ_SYM_NONE) { - compiler_panic(t->c, sd ? sd->loc : (SrcLoc){0}, - "alias self invalid"); - } - if (!td || target == DECL_NONE) { - compiler_panic(t->c, sd->loc, "alias target invalid"); - } - tsym = td->obj_sym; - if (tsym == OBJ_SYM_NONE) { - compiler_panic(t->c, sd->loc, "alias target has no symbol"); - } - ts = obj_symbol_get(t->ob, tsym); - if (!ts || ts->kind == SK_UNDEF) { - size_t nl = 0; - const char* nm = td->name ? pool_str(t->c->global, td->name, &nl) : NULL; - compiler_panic(t->c, sd->loc, "alias target '%s' is undefined", - nm ? nm : "?"); - } - obj_symbol_define(t->ob, sd->obj_sym, ts->section_id, ts->value, ts->size); -} diff --git a/src/decl/decl.h b/src/decl/decl.h @@ -1,117 +1,6 @@ -#ifndef CFREE_DECL_H -#define CFREE_DECL_H +#ifndef CFREE_SRC_DECL_COMPAT_H +#define CFREE_SRC_DECL_COMPAT_H -#include "arch/arch.h" - -/* C declaration semantics. This layer is deliberately above ObjBuilder: - * ObjBuilder stores object-format facts, while DeclTable owns C linkage, - * storage duration, tentative-definition, static-local, and initializer rules. - */ -typedef struct DeclTable DeclTable; - -typedef u32 DeclId; -#define DECL_NONE 0u - -typedef enum DeclStorage { - DS_EXTERN, - DS_STATIC, - DS_AUTO, - DS_REGISTER, - DS_TYPEDEF, -} DeclStorage; - -typedef enum DeclLinkage { - DL_NONE, - DL_INTERNAL, - DL_EXTERNAL, -} DeclLinkage; - -typedef enum DeclFlag { - DF_NONE = 0, - DF_THREAD = 1u << 0, - DF_INLINE = 1u << 1, - DF_TENTATIVE = 1u << 2, - DF_USED = 1u << 3, - DF_WEAK = 1u << 4, - DF_STATIC_LOCAL = 1u << 5, - /* Phase 2 attribute-honoring flags. DF_NORETURN is the unified bit for - * _Noreturn and __attribute__((noreturn)); the inline-policy flags are - * recorded but not yet consulted (cfree has no inliner). */ - DF_NORETURN = 1u << 6, - DF_ALWAYS_INLINE = 1u << 7, - DF_NOINLINE = 1u << 8, - DF_GNU_INLINE = 1u << 9, -} DeclFlag; - -typedef struct Decl { - DeclId id; - Sym name; - const Type* type; - ObjSymId obj_sym; - ObjSecId section_id; /* optional explicit section; OBJ_SEC_NONE => default */ - SrcLoc loc; - u8 storage; /* DeclStorage */ - u8 linkage; /* DeclLinkage */ - u8 visibility; /* SymVis */ - u8 pad; - u32 flags; /* DeclFlag */ - /* Phase 2 attribute carriers — populated by attr_list_to_decl. */ - u32 align; /* explicit alignment from _Alignas or aligned(N); 0=natural */ - Sym alias_target; /* target name for __attribute__((alias("..."))); 0=none */ -} Decl; - -typedef enum InitKind { - INIT_ZERO, - INIT_BYTES, - INIT_RELOC, - INIT_FILL, -} InitKind; - -typedef struct InitReloc { - RelocKind kind; - ObjSymId target; - i64 addend; - u32 width; -} InitReloc; - -typedef struct InitItem { - u32 offset; /* byte offset inside the initialized object */ - u32 size; - u8 kind; /* InitKind */ - u8 pad[3]; - union { - ConstBytes bytes; - InitReloc reloc; - struct { - u8 byte; - } fill; - } v; -} InitItem; - -DeclTable* decl_new(Compiler*, ObjBuilder*); -void decl_free(DeclTable*); - -/* The ObjBuilder a DeclTable was created against. Useful for the parser - * (which receives a DeclTable, not the builder) when it needs to create - * sections or look up object-level facts. */ -ObjBuilder* decl_obj(DeclTable*); - -DeclId decl_declare(DeclTable*, const Decl*); -const Decl* decl_get(const DeclTable*, DeclId); -ObjSymId decl_obj_sym(const DeclTable*, DeclId); - -void decl_define_function(DeclTable*, DeclId, ObjSecId text_section_id, - u64 value, u64 size); -void decl_define_object(DeclTable*, DeclId, u64 size, u32 align, - const InitItem* init, u32 ninit); -void decl_define_tentative(DeclTable*, DeclId, u64 size, u32 align); - -/* Defines `self` as an alias of `target`: self's ObjSym shares - * (section_id, value, size) with target's symbol. Bind/visibility come from - * self's Decl; DF_WEAK on self produces a weak alias (weakref-equivalent). - * Aliasing an undefined or still-tentative target at finalize time is a fatal - * diagnostic. Object-format mapping: ELF same (st_shndx, st_value); COFF - * redirect; Mach-O indirect symbol. */ -void decl_define_alias(DeclTable*, DeclId self, DeclId target); +#include "../../lang/c/decl/decl.h" #endif diff --git a/src/decl/decl_attrs.c b/src/decl/decl_attrs.c @@ -1,101 +0,0 @@ -#include "decl/decl_attrs.h" - -#include <string.h> - -#include "core/pool.h" -#include "obj/obj.h" - -/* Bare `__attribute__((aligned))` (no argument) means "biggest scalar - * alignment". cfree's targets all have `_Alignof(long double) == 16` - * (x86_64 SysV, AArch64 AAPCS, RISC-V LP64D), so 16 is a valid v1 - * stand-in across the board. */ -#define ATTR_ALIGNED_DEFAULT 16u - -static void apply_section(Compiler* c, DeclTable* t, const Attr* a, Decl* out) { - if (!t || a->v.sym == 0) return; - ObjBuilder* ob = decl_obj(t); - size_t nlen = 0; - const char* name = pool_str(c->global, a->v.sym, &nlen); - if (!name) return; - - SecKind kind; - u16 flags; - if (strstr(name, "text") != NULL) { - kind = SEC_TEXT; - flags = SF_ALLOC | SF_EXEC; - } else if (strstr(name, "rodata") != NULL) { - kind = SEC_RODATA; - flags = SF_ALLOC; - } else if (strstr(name, "bss") != NULL) { - kind = SEC_BSS; - flags = SF_ALLOC | SF_WRITE; - } else { - kind = SEC_DATA; - flags = SF_ALLOC | SF_WRITE; - } - out->section_id = obj_section(ob, a->v.sym, kind, flags, 1u); -} - -static void apply_visibility(Compiler* c, const Attr* a, Decl* out) { - if (a->v.sym == 0) { - compiler_panic(c, a->loc, "visibility attribute missing argument"); - } - size_t n = 0; - const char* s = pool_str(c->global, a->v.sym, &n); - if (s && strcmp(s, "default") == 0) { - out->visibility = SV_DEFAULT; - } else if (s && strcmp(s, "hidden") == 0) { - out->visibility = SV_HIDDEN; - } else if (s && strcmp(s, "protected") == 0) { - out->visibility = SV_PROTECTED; - } else if (s && strcmp(s, "internal") == 0) { - out->visibility = SV_INTERNAL; - } else { - compiler_panic(c, a->loc, - "unknown visibility '%s' (expected default|hidden|" - "protected|internal)", - s ? s : ""); - } -} - -void attr_list_to_decl(Compiler* c, DeclTable* t, const Attr* attrs, - Decl* out) { - for (const Attr* a = attrs; a; a = a->next) { - switch ((AttrKind)a->kind) { - case ATTR_ALIGNED: { - u32 v = (a->nargs == 0) ? ATTR_ALIGNED_DEFAULT : (u32)a->v.i; - if (v > out->align) out->align = v; - break; - } - case ATTR_SECTION: - apply_section(c, t, a, out); - break; - case ATTR_USED: - out->flags |= DF_USED; - break; - case ATTR_WEAK: - out->flags |= DF_WEAK; - break; - case ATTR_NORETURN: - out->flags |= DF_NORETURN; - break; - case ATTR_ALWAYS_INLINE: - out->flags |= DF_ALWAYS_INLINE; - break; - case ATTR_NOINLINE: - out->flags |= DF_NOINLINE; - break; - case ATTR_GNU_INLINE: - out->flags |= DF_GNU_INLINE; - break; - case ATTR_VISIBILITY: - apply_visibility(c, a, out); - break; - case ATTR_ALIAS: - out->alias_target = a->v.sym; - break; - default: - break; - } - } -} diff --git a/src/decl/decl_attrs.h b/src/decl/decl_attrs.h @@ -1,32 +1,6 @@ -#ifndef CFREE_DECL_ATTRS_H -#define CFREE_DECL_ATTRS_H +#ifndef CFREE_SRC_DECL_ATTRS_COMPAT_H +#define CFREE_SRC_DECL_ATTRS_COMPAT_H -#include "core/core.h" -#include "decl/decl.h" -#include "parse/attr.h" - -/* Decodes a parser-produced Attr* list onto a Decl. Walks the chain and - * applies every honored attribute (see doc/ATTRIBUTE.md "Phase 2"): - * - * packed — N/A here (record-level; see Type.rec.packed) - * aligned(N) — Decl.align = max(Decl.align, N) - * section("s") — interns/creates ObjSecId, stores Decl.section_id - * used — Decl.flags |= DF_USED - * noreturn — Decl.flags |= DF_NORETURN - * alias("t") — Decl.alias_target = intern("t") - * weak — Decl.flags |= DF_WEAK - * visibility(s)— Decl.visibility = SV_* - * always_inline / noinline / gnu_inline — Decl.flags |= DF_* - * - * Unknown / non-honored attributes (deprecated, format, nonnull, ...) - * are silently skipped — they were validated for argument shape during - * parsing and have no Decl-side effect in Phase 2. - * - * `attrs` may be NULL; `out` must be non-NULL. Idempotent: applying a - * list twice produces the same Decl state. Phase 2 callers invoke this - * once, between filling out the bulk Decl fields and decl_declare(). - * The DeclTable* is used to reach the underlying ObjBuilder when - * `__attribute__((section("...")))` requires creating an ObjSecId. */ -void attr_list_to_decl(Compiler*, DeclTable*, const Attr* attrs, Decl* out); +#include "../../lang/c/decl/decl_attrs.h" #endif diff --git a/src/lex/lex.h b/src/lex/lex.h @@ -1,127 +1,6 @@ -#ifndef CFREE_LEX_H -#define CFREE_LEX_H +#ifndef CFREE_SRC_LEX_COMPAT_H +#define CFREE_SRC_LEX_COMPAT_H -#include "core/core.h" - -typedef enum TokKind { - TOK_EOF = 0, - TOK_IDENT, /* v.ident */ - TOK_NUM, /* lit */ - TOK_FLT, /* lit */ - TOK_STR, /* lit; v.str is decoded bytes if target-independent */ - TOK_CHR, /* lit */ - TOK_PUNCT, /* v.punct */ - TOK_PP_HASH, /* # */ - TOK_PP_PASTE, /* ## */ - TOK_HEADER, /* header-name in #include / #embed */ - TOK_NEWLINE, /* visible to PP only */ - TOK_KW_FIRST, - /* C11 keywords are inserted into this range by parse_c via pool */ - TOK_KW_LAST = 0x1000, -} TokKind; - -typedef enum TokFlag { - TF_AT_BOL = 1u << 0, - TF_HAS_SPACE = 1u << 1, - TF_NO_EXPAND = 1u << 2, - TF_INT_U = 1u << 3, - TF_INT_L = 1u << 4, - TF_INT_LL = 1u << 5, - TF_FLT_F = 1u << 6, - TF_FLT_L = 1u << 7, - TF_STR_WIDE = 1u << 8, - TF_STR_U8 = 1u << 9, - TF_STR_U16 = 1u << 10, - TF_STR_U32 = 1u << 11, - TF_LITERAL_BAD = 1u << 12, -} TokFlag; - -typedef enum Punct { - P_NONE = 0, - /* Single-char punctuators reuse their ASCII codepoint here. */ - P_ARROW = 256, - P_INC, - P_DEC, - P_SHL, - P_SHR, - P_LE, - P_GE, - P_EQ, - P_NE, - P_AND, - P_OR, - P_ADD_ASSIGN, - P_SUB_ASSIGN, - P_MUL_ASSIGN, - P_DIV_ASSIGN, - P_MOD_ASSIGN, - P_AND_ASSIGN, - P_OR_ASSIGN, - P_XOR_ASSIGN, - P_SHL_ASSIGN, - P_SHR_ASSIGN, - P_ELLIPSIS, - P_HASH_HASH, -} Punct; - -typedef u32 LitId; -#define LIT_NONE 0u - -typedef enum LitKind { - LIT_INT, - LIT_FLOAT, - LIT_STRING, - LIT_CHAR, -} LitKind; - -typedef enum LitEnc { - LENC_ORDINARY, - LENC_UTF8, - LENC_WIDE, - LENC_UTF16, - LENC_UTF32, -} LitEnc; - -typedef struct LitInfo { - u8 kind; /* LitKind */ - u8 enc; /* LitEnc for strings/chars */ - u16 flags; /* TokFlag suffix/encoding bits */ - Sym spelling; /* exact source spelling */ - BytesId bytes; /* decoded bytes/code units, if already decoded */ -} LitInfo; - -typedef struct Tok { - u16 kind; - u16 flags; - SrcLoc loc; - Sym spelling; /* exact token spelling for diagnostics/#/## */ - LitId lit; /* literal-table handle; LIT_NONE otherwise */ - union { - Sym ident; - Sym str; - u32 punct; - } v; -} Tok; - -typedef struct Lexer Lexer; - -/* lex_open_mem borrows (src, len). The lexer does not copy source bytes; - * tokens carry SrcLoc + Sym spellings into the global pool, but diagnostics - * and the preprocessor's directive scanner read from the borrowed buffer. - * - * Ownership: a Lexer that has been handed to pp_push_input is owned by PP - * thereafter — PP closes it on EOF-pop or in pp_free. Callers only call - * lex_close on a Lexer they have not pushed (e.g. standalone .s assembly). - * - * The borrowed (src, len) buffer must outlive the Lexer, which for a pushed - * Lexer means outliving pp_free. */ -Lexer* lex_open_mem(Compiler*, const char* name, const char* src, size_t len); -void lex_close(Lexer*); - -/* Streaming. Returns TOK_EOF repeatedly at end of input. */ -Tok lex_next(Lexer*); -SrcLoc lex_loc(const Lexer*); -u32 lex_file_id(const Lexer*); -const LitInfo* lex_lit(const Lexer*, LitId); +#include "../../lang/c/lex/lex.h" #endif diff --git a/src/parse/cg_public_compat.h b/src/parse/cg_public_compat.h @@ -0,0 +1,6 @@ +#ifndef CFREE_SRC_PARSE_CG_PUBLIC_COMPAT_H +#define CFREE_SRC_PARSE_CG_PUBLIC_COMPAT_H + +#include "../../lang/c/parse/cg_public_compat.h" + +#endif diff --git a/src/parse/parse.c b/src/parse/parse.c @@ -1,1117 +0,0 @@ -/* parse.c — residual C11 parser core. - * - * Contains: - * - kw_names[] table (used by parse_c to intern keywords) - * - Diagnostics/token helpers (perr, advance, peek1, fetch_tok, ...) - * - Scope/tag operations - * - Type helpers (ty_int, ty_size_t) - * - Local-variable slot allocation (make_local, make_local_aligned) - * - Static-local symbol naming (mint_static_local_sym) - * - Declaration driver (parse_init_declarator, parse_local_decl) - * - TU-level driver (parse_param_list, declare_function, - * parse_function_body, parse_external_decl, parse_translation_unit, - * parse_c) - * - * All expression, type, initializer, and statement code lives in - * parse_expr.c, parse_type.c, parse_init.c, and parse_stmt.c. */ - -#include "parse/parse_priv.h" - -#include <stdarg.h> -#include <string.h> - -/* ============================================================ - * Keywords - * ============================================================ */ - -static const char* const kw_names[KW_COUNT] = { - NULL, "auto", "break", "case", "char", - "const", "continue", "default", "do", "double", - "else", "enum", "extern", "float", "for", - "goto", "if", "inline", "int", "long", - "register", "restrict", "return", "short", "signed", - "sizeof", "static", "struct", "switch", "typedef", - "union", "unsigned", "void", "volatile", "while", - "_Bool", "_Complex", "_Imaginary","_Alignas", "_Alignof", - "_Atomic", "_Generic", "_Noreturn", "_Static_assert", "_Thread_local", - "asm", "__asm__", -}; - -/* ============================================================ - * Diagnostics - * ============================================================ */ - -static SrcLoc tok_loc(const Tok* t) { return t->loc; } - -_Noreturn void perr(Parser* p, const char* fmt, ...) { - va_list ap; - SrcLoc loc = tok_loc(&p->cur); - va_start(ap, fmt); - compiler_panicv(p->c, loc, fmt, ap); -} - -/* ============================================================ - * Token helpers - * ============================================================ */ - -/* Width of an encoding prefix on a string-literal spelling: 0 for ordinary, - * 1 for L/u/U, 2 for u8. */ -static size_t str_prefix_len(u16 flags) { - if (flags & TF_STR_U8) return 2; - if (flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) return 1; - return 0; -} - -#define STR_ENC_MASK \ - (TF_STR_WIDE | TF_STR_U8 | TF_STR_U16 | TF_STR_U32) - -/* Fuse two adjacent TOK_STR tokens into one per C11 §6.4.5 ¶5. */ -static Tok fuse_string_lits(Parser* p, Tok a, Tok b) { - u16 ae = (u16)(a.flags & STR_ENC_MASK); - u16 be = (u16)(b.flags & STR_ENC_MASK); - u16 fused_enc; - size_t alen = 0, blen = 0; - const char* as = pool_str(p->pool, a.spelling, &alen); - const char* bs = pool_str(p->pool, b.spelling, &blen); - size_t apfx, bpfx; - size_t a_content_len, b_content_len; - size_t out_pfx_len; - size_t out_len; - Heap* h = p->c->env->heap; - char* buf; - size_t k = 0; - Tok out; - if (!as || !bs) perr(p, "bad string literal in concatenation"); - if (ae != 0 && be != 0 && ae != be) { - perr(p, "concatenating string literals with incompatible " - "encoding prefixes"); - } - fused_enc = ae ? ae : be; - apfx = str_prefix_len(a.flags); - bpfx = str_prefix_len(b.flags); - if (alen < apfx + 2 || as[apfx] != '"' || as[alen - 1] != '"' || - blen < bpfx + 2 || bs[bpfx] != '"' || bs[blen - 1] != '"') { - perr(p, "malformed string literal in concatenation"); - } - a_content_len = alen - apfx - 2; - b_content_len = blen - bpfx - 2; - out_pfx_len = ae ? apfx : bpfx; - out_len = out_pfx_len + 1 + a_content_len + b_content_len + 1; - buf = (char*)h->alloc(h, out_len, 1); - if (!buf) perr(p, "out of memory fusing string literals"); - if (out_pfx_len) { - const char* src = ae ? as : bs; - memcpy(buf + k, src, out_pfx_len); - k += out_pfx_len; - } - buf[k++] = '"'; - if (a_content_len) { - memcpy(buf + k, as + apfx + 1, a_content_len); - k += a_content_len; - } - if (b_content_len) { - memcpy(buf + k, bs + bpfx + 1, b_content_len); - k += b_content_len; - } - buf[k++] = '"'; - out = a; - out.spelling = pool_intern(p->pool, buf, k); - out.flags = (u16)((a.flags & ~STR_ENC_MASK) | fused_enc); - out.lit = LIT_NONE; - h->free(h, buf, 0); - return out; -} - -/* Pull one logical token from pp, collapsing adjacent TOK_STR runs. */ -static Tok fetch_tok(Parser* p) { - Tok t; - if (p->has_pending) { - t = p->pending; - p->has_pending = 0; - } else { - t = pp_next(p->pp); - } - if (t.kind != TOK_STR) return t; - for (;;) { - Tok n = pp_next(p->pp); - if (n.kind != TOK_STR) { - p->pending = n; - p->has_pending = 1; - return t; - } - t = fuse_string_lits(p, t, n); - } -} - -void advance(Parser* p) { - if (p->replay_active) { - if (p->replay_pos < p->replay_len) { - p->cur = p->replay[p->replay_pos++]; - return; - } - p->replay_active = 0; - } - if (p->has_next) { - p->cur = p->next; - p->has_next = 0; - } else { - p->cur = fetch_tok(p); - } -} - -Tok peek1(Parser* p) { - if (p->replay_active && p->replay_pos < p->replay_len) { - return p->replay[p->replay_pos]; - } - if (!p->has_next) { - p->next = fetch_tok(p); - p->has_next = 1; - } - return p->next; -} - -void expect_punct(Parser* p, u32 punct, const char* what) { - if (!accept_punct(p, punct)) { - perr(p, "expected %s", what); - } -} - -int accept_punct(Parser* p, u32 punct) { - if (is_punct(&p->cur, punct)) { - advance(p); - return 1; - } - return 0; -} - -/* Record tokens from the current `{` through the matching `}` into the - * parser's replay buffer. */ -void record_braced_block(Parser* p) { - int depth = 0; - if (!is_punct(&p->cur, '{')) perr(p, "internal: record on non-'{'"); - p->replay_len = 0; - for (;;) { - if (p->replay_len == p->replay_cap) { - u32 new_cap = p->replay_cap ? p->replay_cap * 2 : 32; - Tok* nv = arena_array(p->c->tu, Tok, new_cap); - if (!nv) perr(p, "out of memory in record_braced_block"); - if (p->replay && p->replay_len) { - memcpy(nv, p->replay, p->replay_len * sizeof(Tok)); - } - p->replay = nv; - p->replay_cap = new_cap; - } - p->replay[p->replay_len++] = p->cur; - if (is_punct(&p->cur, '{')) { - ++depth; - } else if (is_punct(&p->cur, '}')) { - --depth; - if (depth == 0) break; - } else if (p->cur.kind == TOK_EOF) { - perr(p, "unexpected end of file in initializer"); - } - advance(p); - } -} - -/* After record_braced_block, rewind to replay from the start. */ -void replay_rewind(Parser* p) { - if (p->replay_len == 0) perr(p, "internal: replay_rewind with empty buffer"); - p->cur = p->replay[0]; - p->replay_pos = 1; - p->replay_active = 1; - p->has_next = 0; -} - -/* Count top-level items in a recorded brace list. */ -u32 count_recorded_top_level_items(const Tok* vec, u32 len) { - u32 count; - u32 i; - int depth = 0; - if (len < 2) return 0; - if (len == 2) return 0; /* `{}` */ - count = 1; - for (i = 1; i < len - 1; ++i) { - const Tok* t = &vec[i]; - if (is_punct(t, '{') || is_punct(t, '(') || is_punct(t, '[')) ++depth; - else if (is_punct(t, '}') || is_punct(t, ')') || is_punct(t, ']')) --depth; - else if (depth == 0 && is_punct(t, ',')) ++count; - } - if (is_punct(&vec[len - 2], ',')) --count; - return count; -} - -/* ============================================================ - * Scopes - * ============================================================ */ - -Scope* scope_new(Parser* p, Scope* parent) { - Scope* s = arena_new(p->c->tu, Scope); - if (!s) perr(p, "out of memory in scope_new"); - s->entries = NULL; - s->tags = NULL; - s->parent = parent; - return s; -} - -void scope_push(Parser* p) { p->scope = scope_new(p, p->scope); } - -void scope_pop(Parser* p) { - if (p->scope) p->scope = p->scope->parent; -} - -SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind, - const Type* type) { - SymEntry* e = arena_new(p->c->tu, SymEntry); - if (!e) perr(p, "out of memory in scope_define"); - memset(e, 0, sizeof *e); - e->name = name; - e->kind = (u8)kind; - e->type = type; - e->next = p->scope->entries; - p->scope->entries = e; - return e; -} - -SymEntry* scope_lookup(Parser* p, Sym name) { - Scope* s; - for (s = p->scope; s; s = s->parent) { - SymEntry* e; - for (e = s->entries; e; e = e->next) { - if (e->name == name) return e; - } - } - return NULL; -} - -TagEntry* tag_define(Parser* p, Sym name, TagDeclKind kind, Type* type, - int complete) { - TagEntry* e = arena_new(p->c->tu, TagEntry); - if (!e) perr(p, "out of memory in tag_define"); - memset(e, 0, sizeof *e); - e->name = name; - e->kind = (u8)kind; - e->complete = (u8)(complete ? 1 : 0); - e->type = type; - e->next = p->scope->tags; - p->scope->tags = e; - return e; -} - -TagEntry* tag_lookup(Parser* p, Sym name) { - Scope* s; - for (s = p->scope; s; s = s->parent) { - TagEntry* e; - for (e = s->tags; e; e = e->next) { - if (e->name == name) return e; - } - } - return NULL; -} - -TagEntry* tag_lookup_local(Parser* p, Sym name) { - TagEntry* e; - for (e = p->scope->tags; e; e = e->next) { - if (e->name == name) return e; - } - return NULL; -} - -/* ============================================================ - * Type helpers - * ============================================================ */ - -static const Type* ty_size_t(Parser* p) { - return abi_size_type(p->abi, p->pool); -} - -/* ============================================================ - * Local-variable slot allocation - * ============================================================ */ - -FrameSlot make_local_aligned(Parser* p, Sym name, const Type* type, - SrcLoc loc, u32 align_override) { - FrameSlotDesc fsd; - FrameSlot s; - SymEntry* e; - u32 nat = abi_alignof(p->abi, type); - memset(&fsd, 0, sizeof fsd); - fsd.type = type; - fsd.name = name; - fsd.loc = loc; - fsd.size = abi_sizeof(p->abi, type); - fsd.align = (align_override > nat) ? align_override : nat; - fsd.kind = FS_LOCAL; - fsd.flags = FSF_NONE; - s = cg_local(p->cg, &fsd); - e = scope_define(p, name, SEK_LOCAL, type); - e->v.slot = s; - return s; -} - -FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc) { - return make_local_aligned(p, name, type, loc, 0); -} - -/* ============================================================ - * Static-local symbol naming - * ============================================================ */ - -/* Mint a unique linker name for a static local: `<orig>.<counter>`. */ -Sym mint_static_local_sym(Parser* p, Sym orig) { - size_t olen = 0; - const char* on = pool_str(p->pool, orig, &olen); - char buf[128]; - u32 wlen = 0; - u32 id = ++p->static_local_counter; - if (olen > 100) olen = 100; - for (size_t i = 0; i < olen && wlen < sizeof buf - 1; ++i) { - buf[wlen++] = on[i]; - } - if (wlen < sizeof buf - 1) buf[wlen++] = '.'; - { - char digits[12]; - int dn = 0; - if (id == 0) digits[dn++] = '0'; - while (id) { - digits[dn++] = (char)('0' + (id % 10)); - id /= 10; - } - while (dn && wlen < sizeof buf - 1) buf[wlen++] = digits[--dn]; - } - return pool_intern(p->pool, buf, wlen); -} - -/* ============================================================ - * Declarations - * ============================================================ */ - -/* Parse a single init-declarator after the decl-specs have been consumed. */ -static void parse_init_declarator(Parser* p, const DeclSpecs* specs) { - SrcLoc loc; - Sym name; - const Type* var_ty = parse_declarator(p, specs->type, &name, &loc); - - if (specs->storage == DS_TYPEDEF) { - if (is_punct(&p->cur, '=')) { - perr(p, "typedef declarator cannot have initializer"); - } - { - SymEntry* e = scope_define(p, name, SEK_TYPEDEF, var_ty); - if (p->vla_pending && var_ty && var_ty->kind == TY_ARRAY) { - FrameSlot count_slot = p->vla_pending_count_slot; - const Type* elem_ty = var_ty->arr.elem; - u32 esz = abi_sizeof(p->abi, elem_ty); - FrameSlotDesc bsd; - FrameSlot byte_slot; - memset(&bsd, 0, sizeof bsd); - bsd.type = ty_size_t(p); - bsd.size = abi_sizeof(p->abi, bsd.type); - bsd.align = abi_alignof(p->abi, bsd.type); - bsd.kind = FS_LOCAL; - byte_slot = cg_local(p->cg, &bsd); - cg_set_loc(p->cg, loc); - cg_push_local_typed(p->cg, count_slot, ty_size_t(p)); - to_rvalue(p); - if (esz != 1) { - cg_push_int(p->cg, (i64)esz, ty_size_t(p)); - cg_binop(p->cg, BO_IMUL); - } - cg_push_local_typed(p->cg, byte_slot, ty_size_t(p)); - cg_swap(p->cg); - cg_store(p->cg); - cg_drop(p->cg); - e->vla_byte_slot = byte_slot; - p->vla_pending = 0; - p->vla_pending_count_slot = FRAME_SLOT_NONE; - } else if (specs->vla_byte_slot != FRAME_SLOT_NONE) { - e->vla_byte_slot = specs->vla_byte_slot; - } - } - (void)loc; - return; - } - - if (specs->storage == DS_STATIC) { - Decl decl_in; - DeclId did; - ObjSymId sym; - SymEntry* e; - Sym lname = mint_static_local_sym(p, name); - int has_init; - u32 align_eff; - memset(&decl_in, 0, sizeof decl_in); - decl_in.name = lname; - decl_in.type = var_ty; - decl_in.loc = loc; - decl_in.storage = DS_STATIC; - decl_in.linkage = DL_INTERNAL; - decl_in.visibility = SV_DEFAULT; - decl_in.flags = DF_STATIC_LOCAL | (specs->flags & DF_THREAD); - attr_list_to_decl(p->c, p->decls, specs->attrs, &decl_in); - did = decl_declare(p->decls, &decl_in); - sym = decl_obj_sym(p->decls, did); - e = scope_define(p, name, SEK_GLOBAL, var_ty); - e->v.sym = sym; - has_init = accept_punct(p, '='); - if (has_init && var_ty && var_ty->kind == TY_ARRAY && var_ty->arr.incomplete) { - const Type* completed = complete_incomplete_array(p, var_ty); - if (completed != var_ty) { - var_ty = completed; - e->type = var_ty; - } - } - align_eff = (specs->align > decl_in.align) ? specs->align : decl_in.align; - define_static_object(p, sym, var_ty, specs->quals, has_init, loc, - align_eff); - return; - } - - if (specs->storage == DS_EXTERN) { - Decl decl_in; - DeclId did; - ObjSymId sym; - SymEntry* e; - SymEntry* prior; - if (accept_punct(p, '=')) { - perr(p, "block-scope extern with initializer not supported"); - } - prior = scope_lookup(p, name); - if (prior && prior->kind == SEK_GLOBAL) { - e = scope_define(p, name, SEK_GLOBAL, var_ty); - e->v.sym = prior->v.sym; - return; - } - memset(&decl_in, 0, sizeof decl_in); - decl_in.name = name; - decl_in.type = var_ty; - decl_in.loc = loc; - decl_in.storage = DS_EXTERN; - decl_in.linkage = DL_EXTERNAL; - decl_in.visibility = SV_DEFAULT; - decl_in.flags = specs->flags & DF_THREAD; - attr_list_to_decl(p->c, p->decls, specs->attrs, &decl_in); - did = decl_declare(p->decls, &decl_in); - sym = decl_obj_sym(p->decls, did); - e = scope_define(p, name, SEK_GLOBAL, var_ty); - e->v.sym = sym; - return; - } - - if (var_ty && var_ty->kind == TY_ARRAY && var_ty->arr.incomplete && - (p->vla_pending || specs->vla_byte_slot != FRAME_SLOT_NONE)) { - const Type* elem_ty = var_ty->arr.elem; - const Type* ptr_ty = type_ptr(p->pool, elem_ty); - FrameSlot byte_slot; - FrameSlot ptr_slot; - SymEntry* sym_entry; - if (p->vla_pending) { - FrameSlot count_slot = p->vla_pending_count_slot; - u32 esz = abi_sizeof(p->abi, elem_ty); - FrameSlotDesc bsd; - memset(&bsd, 0, sizeof bsd); - bsd.type = ty_size_t(p); - bsd.size = abi_sizeof(p->abi, bsd.type); - bsd.align = abi_alignof(p->abi, bsd.type); - bsd.kind = FS_LOCAL; - byte_slot = cg_local(p->cg, &bsd); - p->vla_pending = 0; - p->vla_pending_count_slot = FRAME_SLOT_NONE; - cg_set_loc(p->cg, loc); - cg_push_local_typed(p->cg, count_slot, ty_size_t(p)); - to_rvalue(p); - if (esz != 1) { - cg_push_int(p->cg, (i64)esz, ty_size_t(p)); - cg_binop(p->cg, BO_IMUL); - } - cg_push_local_typed(p->cg, byte_slot, ty_size_t(p)); - cg_swap(p->cg); - cg_store(p->cg); - cg_drop(p->cg); - } else { - byte_slot = specs->vla_byte_slot; - } - ptr_slot = make_local(p, name, ptr_ty, loc); - cg_set_loc(p->cg, loc); - cg_push_local_typed(p->cg, byte_slot, ty_size_t(p)); - cg_load(p->cg); - cg_alloca(p->cg); - cg_push_local_typed(p->cg, ptr_slot, ptr_ty); - cg_swap(p->cg); - cg_store(p->cg); - cg_drop(p->cg); - sym_entry = scope_lookup(p, name); - if (sym_entry && sym_entry->kind == SEK_LOCAL) { - sym_entry->vla_byte_slot = byte_slot; - } - if (accept_punct(p, '=')) { - perr(p, "VLA initializers are not allowed (§6.7.9 ¶3)"); - } - return; - } - /* Non-VLA local. */ - { - int has_init = is_punct(&p->cur, '='); - FrameSlot s; - if (has_init && var_ty && var_ty->kind == TY_ARRAY && var_ty->arr.incomplete) { - advance(p); /* '=' */ - var_ty = complete_incomplete_array(p, var_ty); - s = make_local_aligned(p, name, var_ty, loc, specs->align); - cg_set_loc(p->cg, loc); - init_at(p, s, var_ty, 0, var_ty); - return; - } - s = make_local_aligned(p, name, var_ty, loc, specs->align); - if (accept_punct(p, '=')) { - cg_set_loc(p->cg, loc); - if ((var_ty->kind == TY_STRUCT || var_ty->kind == TY_UNION) && - !is_punct(&p->cur, '{')) { - parse_assign_expr(p); - emit_struct_copy_into_slot(p, s, var_ty, 0, var_ty); - } else if (var_ty->kind == TY_ARRAY || var_ty->kind == TY_STRUCT || - var_ty->kind == TY_UNION) { - init_at(p, s, var_ty, 0, var_ty); - } else { - cg_push_local_typed(p->cg, s, var_ty); - parse_assign_expr(p); - to_rvalue(p); - coerce_top_to_lvalue(p); - cg_store(p->cg); - cg_drop(p->cg); - } - } - } -} - -void parse_local_decl(Parser* p, const DeclSpecs* specs) { - if (accept_punct(p, ';')) return; - parse_init_declarator(p, specs); - while (accept_punct(p, ',')) { - parse_init_declarator(p, specs); - } - expect_punct(p, ';', "';' after declaration"); -} - -/* ============================================================ - * External (top-level) declarations - * ============================================================ */ - -void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, - u8* variadic_out) { - ParamInfo* infos; - u32 cap = 4; - u32 n = 0; - *variadic_out = 0; - *infos_out = NULL; - *nparams_out = 0; - - if (is_punct(&p->cur, ')')) { - return; - } - if (is_kw(p, &p->cur, KW_VOID)) { - Tok n2 = peek1(p); - if (is_punct(&n2, ')')) { - advance(p); /* `void` */ - return; /* `(void)` */ - } - } - - infos = (ParamInfo*)arena_array(p->c->tu, ParamInfo, cap); - for (;;) { - DeclSpecs specs; - Sym pname = 0; - SrcLoc ploc = {0, 0, 0}; - const Type* pty; - if (accept_punct(p, P_ELLIPSIS)) { - *variadic_out = 1; - break; - } - if (!parse_decl_specs(p, &specs)) { - perr(p, "expected parameter type"); - } - p->in_param_decl++; - pty = parse_declarator_full(p, specs.type, /*allow_abstract=*/1, &pname, - &ploc); - p->in_param_decl--; - if (pty && pty->kind == TY_ARRAY) { - pty = type_ptr(p->pool, pty->arr.elem); - } else if (pty && pty->kind == TY_FUNC) { - pty = type_ptr(p->pool, pty); - } - if (n == cap) { - cap *= 2; - ParamInfo* nbuf = (ParamInfo*)arena_array(p->c->tu, ParamInfo, cap); - memcpy(nbuf, infos, sizeof(ParamInfo) * n); - infos = nbuf; - } - infos[n].name = pname; - infos[n].type = pty; - infos[n].loc = ploc; - ++n; - if (!accept_punct(p, ',')) break; - } - *infos_out = infos; - *nparams_out = (u16)n; -} - -static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty, - const DeclSpecs* specs, SrcLoc fname_loc, - const Attr* dattrs, - ObjSecId* out_section_id, - u32* out_decl_flags, - Sym* out_alias_target) { - if (out_section_id) *out_section_id = OBJ_SEC_NONE; - if (out_decl_flags) *out_decl_flags = 0; - if (out_alias_target) *out_alias_target = 0; - SymEntry* existing = scope_lookup(p, fname); - if (existing && existing->kind == SEK_FUNC) { - Decl tmp; - memset(&tmp, 0, sizeof tmp); - attr_list_to_decl(p->c, p->decls, specs->attrs, &tmp); - attr_list_to_decl(p->c, p->decls, dattrs, &tmp); - if (out_section_id) *out_section_id = tmp.section_id; - if (out_decl_flags) *out_decl_flags = tmp.flags; - if (out_alias_target) *out_alias_target = tmp.alias_target; - return existing; - } - { - Decl decl_in; - DeclId did; - ObjSymId fsym; - SymEntry* e; - memset(&decl_in, 0, sizeof decl_in); - decl_in.name = fname; - decl_in.type = fn_ty; - decl_in.loc = fname_loc; - decl_in.storage = (specs->storage == DS_STATIC) ? DS_STATIC : DS_EXTERN; - decl_in.linkage = - (specs->storage == DS_STATIC) ? DL_INTERNAL : DL_EXTERNAL; - decl_in.visibility = SV_DEFAULT; - attr_list_to_decl(p->c, p->decls, specs->attrs, &decl_in); - attr_list_to_decl(p->c, p->decls, dattrs, &decl_in); - did = decl_declare(p->decls, &decl_in); - fsym = decl_obj_sym(p->decls, did); - e = scope_define(p, fname, SEK_FUNC, fn_ty); - e->v.sym = fsym; - if (out_section_id) *out_section_id = decl_in.section_id; - if (out_decl_flags) *out_decl_flags = decl_in.flags; - if (out_alias_target) *out_alias_target = decl_in.alias_target; - return e; - } -} - -static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, - const ABIFuncInfo* abi, const ParamInfo* infos, - u16 nparams, SrcLoc fname_loc, - ObjSecId section_id, u32 decl_flags) { - CGFuncDesc fd; - CGParamDesc* pds = NULL; - - memset(&fd, 0, sizeof fd); - fd.sym = fsym; - fd.text_section_id = - (section_id != OBJ_SEC_NONE) ? section_id : p->text_sec; - fd.group_id = OBJ_GROUP_NONE; - fd.fn_type = fn_ty; - fd.abi = abi; - fd.params = NULL; - fd.nparams = nparams; - fd.loc = fname_loc; - if (decl_flags & DF_NORETURN) fd.flags |= CGFD_NORETURN; - - if (nparams) { - pds = (CGParamDesc*)arena_array(p->c->tu, CGParamDesc, nparams); - memset(pds, 0, sizeof(CGParamDesc) * nparams); - for (u16 i = 0; i < nparams; ++i) { - pds[i].index = i; - pds[i].name = infos[i].name; - pds[i].type = infos[i].type; - pds[i].slot = FRAME_SLOT_NONE; - pds[i].abi = &abi->params[i]; - pds[i].incoming = NULL; - pds[i].nincoming = 0; - pds[i].loc = infos[i].loc; - } - fd.params = pds; - } - - scope_push(p); /* parameter scope */ - GotoLabel* saved_goto_labels = p->goto_labels; - SwitchCtx* saved_switch = p->cur_switch; - p->goto_labels = NULL; - p->cur_switch = NULL; - cg_set_loc(p->cg, fname_loc); - cg_func_begin(p->cg, &fd); - - for (u16 i = 0; i < nparams; ++i) { - FrameSlotDesc fsd; - FrameSlot s; - SymEntry* e; - memset(&fsd, 0, sizeof fsd); - fsd.type = infos[i].type; - fsd.name = infos[i].name; - fsd.loc = infos[i].loc; - fsd.size = abi_sizeof(p->abi, infos[i].type); - fsd.align = abi_alignof(p->abi, infos[i].type); - fsd.kind = FS_PARAM; - fsd.flags = FSF_NONE; - s = cg_local(p->cg, &fsd); - pds[i].slot = s; - cg_param(p->cg, &pds[i]); - if (infos[i].name) { - e = scope_define(p, infos[i].name, SEK_LOCAL, infos[i].type); - e->v.slot = s; - } - } - - parse_compound_stmt(p); - if (fn_ty->fn.ret && fn_ty->fn.ret->kind != TY_VOID && - fn_ty->fn.ret->kind != TY_STRUCT && fn_ty->fn.ret->kind != TY_UNION) { - cg_push_int(p->cg, 0, fn_ty->fn.ret); - cg_ret(p->cg, 1); - } else { - cg_ret(p->cg, 0); - } - for (GotoLabel* gl = p->goto_labels; gl; gl = gl->next) { - if (!gl->placed) { - compiler_panic(p->c, gl->first_use, "goto to undefined label"); - } - } - p->goto_labels = saved_goto_labels; - p->cur_switch = saved_switch; - cg_func_end(p->cg); - scope_pop(p); -} - -/* Parse one external declaration. */ -static void parse_external_decl(Parser* p) { - DeclSpecs specs; - Sym name; - SrcLoc loc; - const Type* base_ty; - - if (!parse_decl_specs(p, &specs)) { - perr(p, "expected declaration"); - } - - if (accept_punct(p, ';')) return; - - if (specs.storage == DS_TYPEDEF) { - for (;;) { - Sym tname = 0; - SrcLoc tloc = {0, 0, 0}; - const Type* tty = parse_declarator_full(p, specs.type, - /*allow_abstract=*/0, - &tname, &tloc); - if (is_punct(&p->cur, '=')) { - perr(p, "typedef declarator cannot have initializer"); - } - scope_define(p, tname, SEK_TYPEDEF, tty); - (void)tloc; - if (!accept_punct(p, ',')) break; - } - expect_punct(p, ';', "';' after typedef declaration"); - return; - } - - base_ty = parse_pointer_layer(p, specs.type); - if (p->cur.kind != TOK_IDENT || ident_kw_inline(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected declarator"); - } - name = p->cur.v.ident; - loc = tok_loc(&p->cur); - advance(p); - - Attr* dattrs = NULL; - parse_attrs_into(p, &dattrs); - - while (is_punct(&p->cur, '[')) { - DeclSuffix s; - if (!parse_decl_suffix(p, &s)) break; - if (s.kind != DS_ARRAY) break; - base_ty = apply_decl_suffix(p, base_ty, &s); - } - parse_attrs_into(p, &dattrs); - - if (is_punct(&p->cur, '(')) { - ParamInfo* infos = NULL; - u16 nparams = 0; - u8 variadic = 0; - const Type** ptypes = NULL; - const Type* fn_ty; - const ABIFuncInfo* abi; - SymEntry* fent; - - advance(p); /* '(' */ - parse_param_list(p, &infos, &nparams, &variadic); - expect_punct(p, ')', "')' after parameter list"); - parse_attrs_into(p, &dattrs); - - if (nparams) { - ptypes = (const Type**)arena_array(p->c->tu, const Type*, nparams); - for (u16 i = 0; i < nparams; ++i) ptypes[i] = infos[i].type; - } - fn_ty = type_func(p->pool, base_ty, ptypes, nparams, (int)variadic); - abi = abi_func_info(p->abi, fn_ty); - - ObjSecId fn_section_id; - u32 fn_decl_flags; - Sym fn_alias_target; - fent = declare_function(p, name, fn_ty, &specs, loc, dattrs, - &fn_section_id, &fn_decl_flags, - &fn_alias_target); - attr_list_append(&fent->attrs, dattrs); - - if (is_punct(&p->cur, '{')) { - Sym saved_func_name = p->cur_func_name; - p->cur_func_name = name; - parse_function_body(p, fent->v.sym, fn_ty, abi, infos, nparams, loc, - fn_section_id, fn_decl_flags); - p->cur_func_name = saved_func_name; - return; - } - if (accept_punct(p, ';')) { - if (fn_alias_target != 0) { - SymEntry* te = scope_lookup(p, fn_alias_target); - if (!te) { - size_t nl = 0; - const char* nm = pool_str(p->pool, fn_alias_target, &nl); - compiler_panic(p->c, loc, - "alias target '%s' is undefined", - nm ? nm : "?"); - } - ObjBuilder* ob = decl_obj(p->decls); - const ObjSym* ts = obj_symbol_get(ob, te->v.sym); - if (!ts || ts->kind == SK_UNDEF) { - size_t nl = 0; - const char* nm = pool_str(p->pool, fn_alias_target, &nl); - compiler_panic(p->c, loc, - "alias target '%s' is undefined", - nm ? nm : "?"); - } - obj_symbol_define(ob, fent->v.sym, ts->section_id, ts->value, - ts->size); - } - return; - } - perr(p, "expected '{' or ';' after function declarator"); - } - - /* Global object declaration. */ - for (;;) { - int has_init = is_punct(&p->cur, '='); - int is_pure_extern = (specs.storage == DS_EXTERN) && !has_init; - SymEntry* existing = scope_lookup(p, name); - ObjSymId sym = OBJ_SYM_NONE; - SymEntry* e = NULL; - - if (existing && existing->kind == SEK_GLOBAL) { - sym = existing->v.sym; - e = existing; - if (e->type && base_ty && e->type->kind == TY_ARRAY && - base_ty->kind == TY_ARRAY) { - if (e->type->arr.incomplete && !base_ty->arr.incomplete) { - e->type = base_ty; - } - } - } else { - Decl decl_in; - DeclId did; - memset(&decl_in, 0, sizeof decl_in); - decl_in.name = name; - decl_in.type = base_ty; - decl_in.loc = loc; - if (specs.storage == DS_STATIC) { - decl_in.storage = DS_STATIC; - decl_in.linkage = DL_INTERNAL; - } else { - decl_in.storage = DS_EXTERN; - decl_in.linkage = DL_EXTERNAL; - } - decl_in.visibility = SV_DEFAULT; - decl_in.flags = specs.flags & DF_THREAD; - attr_list_to_decl(p->c, p->decls, specs.attrs, &decl_in); - attr_list_to_decl(p->c, p->decls, dattrs, &decl_in); - did = decl_declare(p->decls, &decl_in); - sym = decl_obj_sym(p->decls, did); - e = scope_define(p, name, SEK_GLOBAL, base_ty); - e->v.sym = sym; - } - attr_list_append(&e->attrs, dattrs); - - u32 attr_align = attrs_pick_aligned(specs.attrs); - { - u32 a2 = attrs_pick_aligned(dattrs); - if (a2 > attr_align) attr_align = a2; - } - u32 align_eff = (specs.align > attr_align) ? specs.align : attr_align; - - if (has_init) { - advance(p); /* '=' */ - if (base_ty && base_ty->kind == TY_ARRAY && base_ty->arr.incomplete) { - const Type* completed = complete_incomplete_array(p, base_ty); - if (completed != base_ty) { - base_ty = completed; - if (e) e->type = base_ty; - } - } - define_static_object(p, sym, base_ty, specs.quals, /*has_init=*/1, - loc, align_eff); - } else if (!is_pure_extern) { - define_static_object(p, sym, base_ty, specs.quals, /*has_init=*/0, - loc, align_eff); - } - - if (!accept_punct(p, ',')) break; - base_ty = parse_pointer_layer(p, specs.type); - if (p->cur.kind != TOK_IDENT || ident_kw_inline(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected declarator after ','"); - } - name = p->cur.v.ident; - loc = tok_loc(&p->cur); - advance(p); - dattrs = NULL; - parse_attrs_into(p, &dattrs); - while (is_punct(&p->cur, '[')) { - DeclSuffix s; - if (!parse_decl_suffix(p, &s)) break; - base_ty = apply_decl_suffix(p, base_ty, &s); - } - parse_attrs_into(p, &dattrs); - } - expect_punct(p, ';', "';' after global declaration"); -} - -static void parse_file_scope_asm(Parser* p) { - SrcLoc loc = tok_loc(&p->cur); - u8* bytes; - size_t nlen = 0; - Lexer* lex; - CGTarget* target; - - advance(p); /* asm / __asm__ */ - for (;;) { - if (is_kw(p, &p->cur, KW_VOLATILE)) { - advance(p); - continue; - } - if (p->cur.kind == TOK_IDENT && p->cur.v.ident == p->sym_volatile_alias) { - advance(p); - continue; - } - break; - } - expect_punct(p, '(', "'(' after file-scope asm"); - if (p->cur.kind != TOK_STR) { - perr(p, "expected string literal in file-scope asm"); - } - { - Tok t = p->cur; - advance(p); - bytes = decode_string_literal(p, &t, &nlen); - } - if (nlen > 0) nlen -= 1; /* drop C string terminator */ - expect_punct(p, ')', "')' after file-scope asm"); - expect_punct(p, ';', "';' after file-scope asm"); - - target = cg_target(p->cg); - if (!target || !target->mc) { - perr(p, "file-scope asm requires an object-code target"); - } - cg_set_loc(p->cg, loc); - if (target->mc->set_loc) target->mc->set_loc(target->mc, loc); - lex = lex_open_mem(p->c, "<file-scope-asm>", (const char*)bytes, nlen); - parse_asm(p->c, lex, target->mc); - lex_close(lex); - p->c->env->heap->free(p->c->env->heap, bytes, 0); -} - -static void parse_translation_unit(Parser* p) { - while (p->cur.kind != TOK_EOF) { - if (p->cur.kind == TOK_NEWLINE || is_pp_hash(&p->cur)) { - advance(p); - continue; - } - if (is_kw(p, &p->cur, KW_STATIC_ASSERT)) { - parse_static_assert(p); - continue; - } - if (is_kw(p, &p->cur, KW_ASM) || is_kw(p, &p->cur, KW_BUILTIN_ASM)) { - parse_file_scope_asm(p); - continue; - } - parse_external_decl(p); - } -} - -/* ============================================================ - * Entry point - * ============================================================ */ - -void parse_c(Compiler* c, Pp* pp, DeclTable* decls, CG* cg, Debug* debug) { - Parser p; - Sym text_name; - CKw i; - - memset(&p, 0, sizeof p); - p.c = c; - p.pp = pp; - p.decls = decls; - p.cg = cg; - p.debug = debug; - p.abi = c->abi; - p.pool = c->global; - - for (i = (CKw)1; i < KW_COUNT; ++i) { - p.kw_sym[i] = pool_intern_cstr(p.pool, kw_names[i]); - } - - p.sym_b_alloca = pool_intern_cstr(p.pool, "__builtin_alloca"); - p.sym_b_ctz = pool_intern_cstr(p.pool, "__builtin_ctz"); - p.sym_b_clz = pool_intern_cstr(p.pool, "__builtin_clz"); - p.sym_b_clzl = pool_intern_cstr(p.pool, "__builtin_clzl"); - p.sym_b_clzll = pool_intern_cstr(p.pool, "__builtin_clzll"); - p.sym_b_trap = pool_intern_cstr(p.pool, "__builtin_trap"); - p.sym_b_unreachable = pool_intern_cstr(p.pool, "__builtin_unreachable"); - p.sym_b_memcpy = pool_intern_cstr(p.pool, "__builtin_memcpy"); - p.sym_b_memmove = pool_intern_cstr(p.pool, "__builtin_memmove"); - p.sym_b_memcmp = pool_intern_cstr(p.pool, "__builtin_memcmp"); - p.sym_b_memset = pool_intern_cstr(p.pool, "__builtin_memset"); - p.sym_func = pool_intern_cstr(p.pool, "__func__"); - p.sym_func_gcc = pool_intern_cstr(p.pool, "__FUNCTION__"); - p.sym_pretty_func_gcc = pool_intern_cstr(p.pool, "__PRETTY_FUNCTION__"); - p.sym_b_expect = pool_intern_cstr(p.pool, "__builtin_expect"); - p.sym_b_offsetof = pool_intern_cstr(p.pool, "__builtin_offsetof"); - p.sym_b_va_list = pool_intern_cstr(p.pool, "__builtin_va_list"); - p.sym_b_va_start = pool_intern_cstr(p.pool, "__builtin_va_start"); - p.sym_b_va_arg = pool_intern_cstr(p.pool, "__builtin_va_arg"); - p.sym_b_va_end = pool_intern_cstr(p.pool, "__builtin_va_end"); - p.sym_b_va_copy = pool_intern_cstr(p.pool, "__builtin_va_copy"); - p.sym_attribute = pool_intern_cstr(p.pool, "__attribute__"); - p.sym_volatile_alias = pool_intern_cstr(p.pool, "__volatile__"); - p.sym_alignof_alias = pool_intern_cstr(p.pool, "__alignof__"); - p.sym_int128 = pool_intern_cstr(p.pool, "__int128"); - p.sym_int128_t = pool_intern_cstr(p.pool, "__int128_t"); - p.sym_uint128_t = pool_intern_cstr(p.pool, "__uint128_t"); - p.sym_a_load_n = pool_intern_cstr(p.pool, "__atomic_load_n"); - p.sym_a_store_n = pool_intern_cstr(p.pool, "__atomic_store_n"); - p.sym_a_exchange_n = pool_intern_cstr(p.pool, "__atomic_exchange_n"); - p.sym_a_fetch_add = pool_intern_cstr(p.pool, "__atomic_fetch_add"); - p.sym_a_fetch_sub = pool_intern_cstr(p.pool, "__atomic_fetch_sub"); - p.sym_a_fetch_and = pool_intern_cstr(p.pool, "__atomic_fetch_and"); - p.sym_a_fetch_or = pool_intern_cstr(p.pool, "__atomic_fetch_or"); - p.sym_a_fetch_xor = pool_intern_cstr(p.pool, "__atomic_fetch_xor"); - p.sym_a_cas_n = pool_intern_cstr(p.pool, "__atomic_compare_exchange_n"); - p.sym_a_thread_fence = pool_intern_cstr(p.pool, "__atomic_thread_fence"); - p.sym_a_signal_fence = pool_intern_cstr(p.pool, "__atomic_signal_fence"); - - p.scope = scope_new(&p, NULL); - - text_name = pool_intern_cstr(p.pool, ".text"); - p.text_sec = obj_section(decl_obj(decls), text_name, SEC_TEXT, - SF_ALLOC | SF_EXEC, 4u); - - p.cur = fetch_tok(&p); - - parse_translation_unit(&p); -} diff --git a/src/parse/parse.h b/src/parse/parse.h @@ -2,21 +2,7 @@ #define CFREE_PARSE_H #include "arch/arch.h" -#include "cg/cg.h" -#include "decl/decl.h" -#include "pp/pp.h" - -/* C11 frontend. Reads tokens from `pp`, records C declarations in DeclTable, - * and drives `cg` for executable code. - * - * Per doc/DWARF.md §3.1 the parser is the driver for Class-1 DWARF events - * (decl-time things: function/scope/type/param/local). When `debug` is - * non-NULL the parser must call debug_func_begin / debug_param / - * debug_local / debug_scope_begin / debug_scope_end at the matching - * decl/scope sites. Class-2 (line rows) goes through cg_set_loc. Class-3 - * (func_pc_range) is CG's responsibility in cg_func_end. NULL means -g - * is off and the parser skips all Debug fanout. */ -void parse_c(Compiler*, Pp*, DeclTable*, CG*, Debug*); +#include "lex/lex.h" /* Standalone assembler. Reads tokens directly from a Lexer; emits via * MCEmitter. */ diff --git a/src/parse/parse_expr.c b/src/parse/parse_expr.c @@ -1,1888 +0,0 @@ -/* parse_expr.c — precedence climbing, unary/primary, literal decoding, - * constant evaluation. */ - -#include "parse/parse_priv.h" - -static const Type* ty_int(Parser* p) { return type_prim(p->pool, TY_INT); } -static const Type* ty_size_t(Parser* p) { - return abi_size_type(p->abi, p->pool); -} - - -static CKw ident_kw(const Parser* p, Sym name) { - return ident_kw_inline(p, name); -} - -static int accept_kw(Parser* p, CKw k) { - if (is_kw(p, &p->cur, k)) { - advance(p); - return 1; - } - return 0; -} - -/* ============================================================ - * Literal parsing - * ============================================================ */ - -i64 parse_int_literal(Parser* p, const Tok* t) { - size_t len = 0; - const char* s = pool_str(p->pool, t->spelling, &len); - size_t i = 0; - i64 base = 10; - i64 acc = 0; - if (!s) perr(p, "bad numeric literal"); - if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { - base = 16; - i = 2; - } else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) { - base = 2; - i = 2; - } else if (len >= 1 && s[0] == '0') { - base = 8; - i = 1; - } - for (; i < len; ++i) { - int c = (unsigned char)s[i]; - int dv; - if (c == 'u' || c == 'U' || c == 'l' || c == 'L') break; - if (c >= '0' && c <= '9') - dv = c - '0'; - else if (c >= 'a' && c <= 'f') - dv = c - 'a' + 10; - else if (c >= 'A' && c <= 'F') - dv = c - 'A' + 10; - else - perr(p, "bad digit in numeric literal"); - if (dv >= base) perr(p, "digit out of range for base"); - acc = acc * base + dv; - } - return acc; -} - -static const Type* int_literal_type(Parser* p, const Tok* t) { - int u = (t->flags & TF_INT_U) != 0; - int l = (t->flags & TF_INT_L) != 0; - int ll = (t->flags & TF_INT_LL) != 0; - TypeKind k; - if (ll) k = u ? TY_ULLONG : TY_LLONG; - else if (l) k = u ? TY_ULONG : TY_LONG; - else if (u) k = TY_UINT; - else k = TY_INT; - return type_prim(p->pool, k); -} - -static double parse_float_literal(Parser* p, const Tok* t) { - size_t len = 0; - const char* s = pool_str(p->pool, t->spelling, &len); - size_t i = 0; - int is_hex = 0; - double v = 0.0; - int exp = 0; - int dec_exp = 0; - int frac_seen = 0; - if (!s) perr(p, "bad float literal"); - if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { - is_hex = 1; - i = 2; - } - while (i < len) { - int c = (unsigned char)s[i]; - int dv; - if (c == '.' || c == 'e' || c == 'E' || c == 'p' || c == 'P' || - c == 'f' || c == 'F' || c == 'l' || c == 'L') - break; - if (c >= '0' && c <= '9') dv = c - '0'; - else if (is_hex && c >= 'a' && c <= 'f') dv = c - 'a' + 10; - else if (is_hex && c >= 'A' && c <= 'F') dv = c - 'A' + 10; - else perr(p, "bad digit in float literal"); - v = v * (is_hex ? 16.0 : 10.0) + (double)dv; - i++; - } - if (i < len && s[i] == '.') { - i++; - while (i < len) { - int c = (unsigned char)s[i]; - int dv; - if (c == 'e' || c == 'E' || c == 'p' || c == 'P' || - c == 'f' || c == 'F' || c == 'l' || c == 'L') - break; - if (c >= '0' && c <= '9') dv = c - '0'; - else if (is_hex && c >= 'a' && c <= 'f') dv = c - 'a' + 10; - else if (is_hex && c >= 'A' && c <= 'F') dv = c - 'A' + 10; - else perr(p, "bad digit in float literal"); - v = v * (is_hex ? 16.0 : 10.0) + (double)dv; - exp -= 1; - frac_seen = 1; - i++; - } - } - (void)frac_seen; - if (i < len && (s[i] == 'e' || s[i] == 'E' || s[i] == 'p' || s[i] == 'P')) { - int neg = 0; - int n = 0; - int hex_exp = (s[i] == 'p' || s[i] == 'P'); - i++; - if (i < len && (s[i] == '+' || s[i] == '-')) { - if (s[i] == '-') neg = 1; - i++; - } - while (i < len) { - int c = (unsigned char)s[i]; - if (c < '0' || c > '9') break; - n = n * 10 + (c - '0'); - i++; - } - dec_exp = neg ? -n : n; - if (hex_exp) { - dec_exp += exp * 4; - exp = 0; - } - } - while (exp < 0) { v /= (is_hex ? 16.0 : 10.0); exp++; } - while (exp > 0) { v *= (is_hex ? 16.0 : 10.0); exp--; } - if (is_hex) { - while (dec_exp < 0) { v /= 2.0; dec_exp++; } - while (dec_exp > 0) { v *= 2.0; dec_exp--; } - } else { - while (dec_exp < 0) { v /= 10.0; dec_exp++; } - while (dec_exp > 0) { v *= 10.0; dec_exp--; } - } - return v; -} - -static const Type* float_literal_type(Parser* p, const Tok* t) { - if (t->flags & TF_FLT_F) return type_prim(p->pool, TY_FLOAT); - if (t->flags & TF_FLT_L) return type_prim(p->pool, TY_LDOUBLE); - return type_prim(p->pool, TY_DOUBLE); -} - -static i64 decode_one_char(Parser* p, const char* s, size_t len, size_t* pi, - SrcLoc loc) { - size_t i = *pi; - i64 v; - int c; - if (i >= len) compiler_panic(p->c, loc, "truncated character literal"); - if (s[i] != '\\') { - v = (unsigned char)s[i++]; - *pi = i; - return v; - } - i++; - if (i >= len) compiler_panic(p->c, loc, "trailing '\\' in literal"); - c = (unsigned char)s[i++]; - switch (c) { - case 'n': v = '\n'; break; - case 't': v = '\t'; break; - case 'r': v = '\r'; break; - case 'b': v = '\b'; break; - case 'f': v = '\f'; break; - case 'v': v = '\v'; break; - case 'a': v = '\a'; break; - case '\\': v = '\\'; break; - case '\'': v = '\''; break; - case '"': v = '"'; break; - case '?': v = '?'; break; - case 'x': { - i64 hex = 0; - int any = 0; - while (i < len) { - int d = (unsigned char)s[i]; - int dv; - if (d >= '0' && d <= '9') dv = d - '0'; - else if (d >= 'a' && d <= 'f') dv = d - 'a' + 10; - else if (d >= 'A' && d <= 'F') dv = d - 'A' + 10; - else break; - hex = hex * 16 + dv; - any = 1; - i++; - } - if (!any) compiler_panic(p->c, loc, "\\x with no hex digits"); - v = hex & 0xff; - break; - } - default: - if (c >= '0' && c <= '7') { - i64 oct = c - '0'; - int n = 1; - while (n < 3 && i < len && s[i] >= '0' && s[i] <= '7') { - oct = oct * 8 + (s[i] - '0'); - i++; - n++; - } - v = oct & 0xff; - } else { - v = c; - } - break; - } - *pi = i; - return v; -} - -i64 decode_char_literal(Parser* p, const Tok* t) { - size_t len = 0; - const char* s = pool_str(p->pool, t->spelling, &len); - size_t i = 0; - i64 v; - if (!s) perr(p, "bad char literal"); - if (t->flags & TF_STR_U8) i = 2; - else if (t->flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) i = 1; - if (i >= len || s[i] != '\'') perr(p, "malformed character literal"); - i++; - if (i >= len || s[i] == '\'') perr(p, "empty character literal"); - v = decode_one_char(p, s, len, &i, t->loc); - if (i >= len || s[i] != '\'') { - perr(p, "multi-character constants are not supported"); - } - return v; -} - -u8* decode_string_literal(Parser* p, const Tok* t, size_t* nlen_out) { - size_t len = 0; - const char* s = pool_str(p->pool, t->spelling, &len); - size_t i = 0; - Heap* h = p->c->env->heap; - u8* buf; - size_t k = 0; - if (!s) perr(p, "bad string literal"); - if (t->flags & TF_STR_U8) i = 2; - else if (t->flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) i = 1; - if (i >= len || s[i] != '"') perr(p, "malformed string literal"); - i++; - buf = (u8*)h->alloc(h, len + 1, 1); - if (!buf) perr(p, "out of memory in string literal"); - while (i < len && s[i] != '"') { - i64 ch = decode_one_char(p, s, len, &i, t->loc); - buf[k++] = (u8)ch; - } - buf[k++] = 0; - *nlen_out = k; - return buf; -} - -ObjSymId emit_string_to_rodata(Parser* p, const u8* bytes, size_t n) { - ObjBuilder* ob = decl_obj(p->decls); - Sym secname = pool_intern_cstr(p->pool, ".rodata"); - ObjSecId sec = obj_section(ob, secname, SEC_RODATA, SF_ALLOC, 1u); - u32 base = obj_pos(ob, sec); - Sym lname; - ObjSymId sym; - char namebuf[32]; - static u32 counter; - int wlen = 0; - u32 id = ++counter; - namebuf[wlen++] = '.'; - namebuf[wlen++] = 'L'; - namebuf[wlen++] = 'C'; - { - char digits[12]; - int dn = 0; - if (id == 0) digits[dn++] = '0'; - while (id) { - digits[dn++] = (char)('0' + (id % 10)); - id /= 10; - } - while (dn) namebuf[wlen++] = digits[--dn]; - } - namebuf[wlen] = 0; - lname = pool_intern(p->pool, namebuf, (size_t)wlen); - sym = obj_symbol(ob, lname, SB_LOCAL, SK_OBJ, sec, base, n); - { - u8* dst = obj_reserve(ob, sec, n); - if (dst) memcpy(dst, bytes, n); - } - return sym; -} - -/* ============================================================ - * Constant expression evaluator (cexpr_*) - * ============================================================ */ - -static i64 cexpr_unary(Parser* p, SrcLoc loc); -static const Type* offsetof_designator(Parser* p, const Type* base, u32* off); - -static i64 cexpr_mul(Parser* p, SrcLoc loc) { - i64 v = cexpr_unary(p, loc); - for (;;) { - if (accept_punct(p, '*')) v = v * cexpr_unary(p, loc); - else if (accept_punct(p, '/')) { - i64 r = cexpr_unary(p, loc); - if (r == 0) compiler_panic(p->c, loc, "division by zero in constant"); - v = v / r; - } else if (accept_punct(p, '%')) { - i64 r = cexpr_unary(p, loc); - if (r == 0) compiler_panic(p->c, loc, "modulo by zero in constant"); - v = v % r; - } else break; - } - return v; -} -static i64 cexpr_add(Parser* p, SrcLoc loc) { - i64 v = cexpr_mul(p, loc); - for (;;) { - if (accept_punct(p, '+')) v = v + cexpr_mul(p, loc); - else if (accept_punct(p, '-')) v = v - cexpr_mul(p, loc); - else break; - } - return v; -} -static i64 cexpr_shift(Parser* p, SrcLoc loc) { - i64 v = cexpr_add(p, loc); - for (;;) { - if (accept_punct(p, P_SHL)) v = v << cexpr_add(p, loc); - else if (accept_punct(p, P_SHR)) v = v >> cexpr_add(p, loc); - else break; - } - return v; -} -static i64 cexpr_rel(Parser* p, SrcLoc loc) { - i64 v = cexpr_shift(p, loc); - for (;;) { - if (accept_punct(p, P_LE)) v = v <= cexpr_shift(p, loc); - else if (accept_punct(p, P_GE)) v = v >= cexpr_shift(p, loc); - else if (is_punct(&p->cur, '<')) { - advance(p); v = v < cexpr_shift(p, loc); - } else if (is_punct(&p->cur, '>')) { - advance(p); v = v > cexpr_shift(p, loc); - } else break; - } - return v; -} -static i64 cexpr_eq(Parser* p, SrcLoc loc) { - i64 v = cexpr_rel(p, loc); - for (;;) { - if (accept_punct(p, P_EQ)) v = (v == cexpr_rel(p, loc)); - else if (accept_punct(p, P_NE)) v = (v != cexpr_rel(p, loc)); - else break; - } - return v; -} -static i64 cexpr_band(Parser* p, SrcLoc loc) { - i64 v = cexpr_eq(p, loc); - while (is_punct(&p->cur, '&') && !is_punct(&p->cur, P_AND)) { - advance(p); - v = v & cexpr_eq(p, loc); - } - return v; -} -static i64 cexpr_bxor(Parser* p, SrcLoc loc) { - i64 v = cexpr_band(p, loc); - while (accept_punct(p, '^')) v = v ^ cexpr_band(p, loc); - return v; -} -static i64 cexpr_bor(Parser* p, SrcLoc loc) { - i64 v = cexpr_bxor(p, loc); - while (is_punct(&p->cur, '|') && !is_punct(&p->cur, P_OR)) { - advance(p); - v = v | cexpr_bxor(p, loc); - } - return v; -} - -static i64 cexpr_unary(Parser* p, SrcLoc loc) { - if (accept_punct(p, '+')) return cexpr_unary(p, loc); - if (accept_punct(p, '-')) return -cexpr_unary(p, loc); - if (accept_punct(p, '~')) return ~cexpr_unary(p, loc); - if (accept_punct(p, '!')) return cexpr_unary(p, loc) ? 0 : 1; - if (accept_kw(p, KW_SIZEOF)) { - if (is_punct(&p->cur, '(')) { - Tok n = peek1(p); - if (starts_type_name(p, &n)) { - advance(p); - { - const Type* t = parse_type_name(p); - expect_punct(p, ')', "')' after sizeof type-name"); - return (i64)abi_sizeof(p->abi, t); - } - } - } - parse_unary(p); - { - const Type* ty = cg_top_type(p->cg); - i64 sz = (i64)abi_sizeof(p->abi, ty); - cg_drop(p->cg); - return sz; - } - } - if (accept_kw(p, KW_ALIGNOF)) { - if (is_punct(&p->cur, '(')) { - Tok n = peek1(p); - if (starts_type_name(p, &n)) { - advance(p); - { - const Type* t = parse_type_name(p); - expect_punct(p, ')', "')' after _Alignof type-name"); - return (i64)abi_alignof(p->abi, t); - } - } - } - parse_unary(p); - { - const Type* ty = cg_top_type(p->cg); - i64 al = (i64)abi_alignof(p->abi, ty); - cg_drop(p->cg); - return al; - } - } - if (accept_punct(p, '(')) { - if (starts_type_name(p, &p->cur)) { - const Type* t = parse_type_name(p); - expect_punct(p, ')', "')' after cast type-name"); - { - i64 v = cexpr_unary(p, loc); - u32 sz = abi_sizeof(p->abi, t); - int is_signed = abi_type_info(p->abi, t).signed_; - if (sz < 8) { - u64 mask = (1ull << (sz * 8u)) - 1ull; - u64 uv = (u64)v & mask; - if (is_signed) { - u64 sign = 1ull << (sz * 8u - 1u); - v = (i64)((uv ^ sign) - sign); - } else { - v = (i64)uv; - } - } - return v; - } - } - { - i64 v = cexpr_bor(p, loc); - expect_punct(p, ')', "')' in constant expression"); - return v; - } - } - if (p->cur.kind == TOK_NUM) { - i64 v = parse_int_literal(p, &p->cur); - advance(p); - return v; - } - if (p->cur.kind == TOK_CHR) { - i64 v = decode_char_literal(p, &p->cur); - advance(p); - return v; - } - if (p->cur.kind == TOK_IDENT) { - Sym name = p->cur.v.ident; - if (name == p->sym_b_offsetof) { - u32 off = 0; - const Type* root; - advance(p); /* IDENT */ - expect_punct(p, '(', "'(' after __builtin_offsetof"); - root = parse_type_name(p); - expect_punct(p, ',', "',' in __builtin_offsetof"); - (void)offsetof_designator(p, root, &off); - expect_punct(p, ')', "')' after __builtin_offsetof"); - return (i64)off; - } - { - SymEntry* e = scope_lookup(p, name); - if (e && e->kind == SEK_ENUM_CST) { - advance(p); - return e->v.enum_value; - } - } - compiler_panic(p->c, loc, "non-constant identifier in constant expression"); - } - compiler_panic(p->c, loc, "expected constant expression"); -} - -i64 eval_const_int(Parser* p, SrcLoc loc) { return cexpr_bor(p, loc); } - -/* ============================================================ - * to_rvalue - * ============================================================ */ - -void to_rvalue(Parser* p) { - const Type* t = cg_top_type(p->cg); - if (t) { - if (t->kind == TY_ARRAY) { - cg_addr(p->cg); - cg_retag_top(p->cg, type_ptr(p->pool, t->arr.elem)); - return; - } - if (t->kind == TY_FUNC) { - cg_addr(p->cg); - return; - } - if (t->kind == TY_STRUCT || t->kind == TY_UNION) return; - } - cg_load(p->cg); -} - -/* ============================================================ - * coerce_top_to_lvalue (used by assignment / initializers) - * ============================================================ */ - -void coerce_top_to_lvalue(Parser* p) { - const Type* src = cg_top_type(p->cg); - const Type* dst = cg_top2_type(p->cg); - if (!src || !dst || src == dst) return; - if (type_is_arith(src) && type_is_arith(dst)) { - cg_convert(p->cg, dst); - } -} - -/* ============================================================ - * Builtin call handling - * ============================================================ */ - -static const Type* offsetof_designator(Parser* p, const Type* base, u32* off) { - const Type* cur = base; - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected member name in __builtin_offsetof"); - } - for (;;) { - if (cur->kind == TY_STRUCT || cur->kind == TY_UNION) { - Sym mname = p->cur.v.ident; - const Type* mty = NULL; - u32 moff = 0; - const Field* mf = NULL; - /* find_field is static in parse_type.c; we need it here. - * We call abi_record_layout directly inline. */ - const ABIRecordLayout* L = abi_record_layout(p->abi, cur); - if (!L) perr(p, "no such member in __builtin_offsetof"); - int found = 0; - for (u16 i = 0; i < cur->rec.nfields; ++i) { - const Field* f = &cur->rec.fields[i]; - if (f->name == mname && mname != 0) { - mty = f->type; - moff = L->fields[i].offset; - mf = f; - found = 1; - break; - } - } - (void)mf; - if (!found) perr(p, "no such member in __builtin_offsetof"); - advance(p); - *off += moff; - cur = mty; - } else if (cur->kind == TY_ARRAY) { - /* fall through to bracket branch */ - } else { - perr(p, "__builtin_offsetof step into non-aggregate"); - } - if (is_punct(&p->cur, '.')) { - advance(p); - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected member name after '.'"); - } - continue; - } - if (is_punct(&p->cur, '[')) { - advance(p); - i64 idx = eval_const_int(p, p->cur.loc); - expect_punct(p, ']', "']' in __builtin_offsetof"); - if (cur->kind != TY_ARRAY) { - perr(p, "__builtin_offsetof '[' on non-array"); - } - *off += (u32)((i64)abi_sizeof(p->abi, cur->arr.elem) * idx); - cur = cur->arr.elem; - continue; - } - break; - } - return cur; -} - -static int try_parse_builtin_call(Parser* p) { - Sym name = p->cur.v.ident; - SrcLoc loc = p->cur.loc; - - /* `__builtin_mem{cpy,move,cmp,set}` are GCC/Clang's compiler-inlinable - * aliases for the libc functions. cfree's INTRIN_MEMCPY/MEMMOVE - * backend paths only handle constant byte counts, but the rt code - * calls them with runtime sizes. Rewrite each builtin into a plain - * call and let the normal function-call path handle it. The caller - * (parse_primary) reports a clean "undeclared identifier" if the TU - * forgot to declare the underlying libc function. */ - if (name == p->sym_b_memcpy || name == p->sym_b_memmove || - name == p->sym_b_memcmp || name == p->sym_b_memset) { - const char* libname = (name == p->sym_b_memcpy) ? "memcpy" - : (name == p->sym_b_memmove) ? "memmove" - : (name == p->sym_b_memcmp) ? "memcmp" - : "memset"; - p->cur.v.ident = pool_intern_cstr(p->pool, libname); - return 0; - } - - if (name != p->sym_b_alloca && name != p->sym_b_ctz && - name != p->sym_b_clz && name != p->sym_b_clzl && - name != p->sym_b_clzll && name != p->sym_b_trap && - name != p->sym_b_unreachable && - name != p->sym_b_expect && - name != p->sym_b_offsetof && name != p->sym_b_va_start && - name != p->sym_b_va_arg && name != p->sym_b_va_end && - name != p->sym_b_va_copy && name != p->sym_a_load_n && - name != p->sym_a_store_n && name != p->sym_a_exchange_n && - name != p->sym_a_fetch_add && name != p->sym_a_fetch_sub && - name != p->sym_a_fetch_and && name != p->sym_a_fetch_or && - name != p->sym_a_fetch_xor && name != p->sym_a_cas_n && - name != p->sym_a_thread_fence && name != p->sym_a_signal_fence) { - return 0; - } - advance(p); /* IDENT */ - expect_punct(p, '(', "'(' after builtin"); - - if (name == p->sym_b_offsetof) { - const Type* root = parse_type_name(p); - expect_punct(p, ',', "',' in __builtin_offsetof"); - u32 off = 0; - (void)offsetof_designator(p, root, &off); - expect_punct(p, ')', "')' after __builtin_offsetof"); - cg_push_int(p->cg, (i64)off, ty_size_t(p)); - return 1; - } - - if (name == p->sym_b_expect) { - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ',', "',' in __builtin_expect"); - parse_assign_expr(p); - cg_drop(p->cg); - expect_punct(p, ')', "')' after __builtin_expect"); - return 1; - } - - if (name == p->sym_b_alloca) { - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ')', "')' after __builtin_alloca"); - cg_set_loc(p->cg, loc); - cg_alloca(p->cg); - return 1; - } - - if (name == p->sym_b_ctz) { - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ')', "')' after __builtin_ctz"); - cg_set_loc(p->cg, loc); - cg_intrinsic_unary_to_int(p->cg, INTRIN_CTZ); - return 1; - } - - if (name == p->sym_b_clz || name == p->sym_b_clzl || - name == p->sym_b_clzll) { - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ')', "')' after __builtin_clz"); - cg_set_loc(p->cg, loc); - /* The operand carries its own type, which drives the sf bit on - * aarch64 / REX.W on x64 / sf on rv64. Whether the caller used the - * `l` / `ll` suffix only changes the C-level type the user wrote; - * cfree picks the instruction width from the value type. */ - cg_intrinsic_unary_to_int(p->cg, INTRIN_CLZ); - return 1; - } - - if (name == p->sym_b_trap || name == p->sym_b_unreachable) { - expect_punct(p, ')', "')' after __builtin_trap/unreachable"); - cg_set_loc(p->cg, loc); - cg_intrinsic_void(p->cg, - name == p->sym_b_trap ? INTRIN_TRAP : INTRIN_UNREACHABLE); - /* Both are noreturn at the C level. Push a dummy `int 0` so callers - * that consume an expression value (e.g. ternary, comma) don't see - * an empty stack — the dead value will be folded out. */ - cg_push_int(p->cg, 0, ty_int(p)); - return 1; - } - - if (name == p->sym_b_va_start) { - parse_assign_expr(p); - cg_addr(p->cg); - expect_punct(p, ',', "',' in __builtin_va_start"); - parse_assign_expr(p); - cg_drop(p->cg); - expect_punct(p, ')', "')' after __builtin_va_start"); - cg_set_loc(p->cg, loc); - cg_va_start_(p->cg); - cg_push_int(p->cg, 0, ty_int(p)); - return 1; - } - - if (name == p->sym_b_va_end) { - parse_assign_expr(p); - cg_addr(p->cg); - expect_punct(p, ')', "')' after __builtin_va_end"); - cg_set_loc(p->cg, loc); - cg_va_end_(p->cg); - cg_push_int(p->cg, 0, ty_int(p)); - return 1; - } - - if (name == p->sym_b_va_copy) { - parse_assign_expr(p); - cg_addr(p->cg); - expect_punct(p, ',', "',' in __builtin_va_copy"); - parse_assign_expr(p); - cg_addr(p->cg); - expect_punct(p, ')', "')' after __builtin_va_copy"); - cg_set_loc(p->cg, loc); - cg_va_copy_(p->cg); - cg_push_int(p->cg, 0, ty_int(p)); - return 1; - } - - if (name == p->sym_b_va_arg) { - parse_assign_expr(p); - cg_addr(p->cg); - expect_punct(p, ',', "',' in __builtin_va_arg"); - const Type* ty = parse_type_name(p); - expect_punct(p, ')', "')' after __builtin_va_arg"); - cg_set_loc(p->cg, loc); - cg_va_arg_(p->cg, ty); - return 1; - } - - if (name == p->sym_a_load_n) { - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ',', "',' in __atomic_load_n"); - i64 ord = eval_const_int(p, p->cur.loc); - expect_punct(p, ')', "')' after __atomic_load_n"); - cg_set_loc(p->cg, loc); - cg_atomic_load(p->cg, (MemOrder)ord); - return 1; - } - - if (name == p->sym_a_store_n) { - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ',', "',' in __atomic_store_n"); - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ',', "',' in __atomic_store_n"); - i64 ord = eval_const_int(p, p->cur.loc); - expect_punct(p, ')', "')' after __atomic_store_n"); - cg_set_loc(p->cg, loc); - cg_atomic_store(p->cg, (MemOrder)ord); - cg_push_int(p->cg, 0, ty_int(p)); - return 1; - } - - if (name == p->sym_a_thread_fence || name == p->sym_a_signal_fence) { - i64 ord = eval_const_int(p, p->cur.loc); - expect_punct(p, ')', "')' after atomic fence"); - cg_set_loc(p->cg, loc); - cg_fence(p->cg, (MemOrder)ord); - cg_push_int(p->cg, 0, ty_int(p)); - return 1; - } - - if (name == p->sym_a_cas_n) { - parse_assign_expr(p); to_rvalue(p); /* ptr */ - expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); - - parse_assign_expr(p); to_rvalue(p); /* &expected */ - const Type* eptr_ty = cg_top_type(p->cg); - if (!eptr_ty || eptr_ty->kind != TY_PTR) { - perr(p, "__atomic_compare_exchange_n: arg 2 must be a pointer"); - } - const Type* val_ty = eptr_ty->ptr.pointee; - - FrameSlotDesc fsd; memset(&fsd, 0, sizeof fsd); - fsd.type = eptr_ty; fsd.size = 8; fsd.align = 8; fsd.kind = FS_LOCAL; - FrameSlot eslot = cg_local(p->cg, &fsd); - cg_push_local_typed(p->cg, eslot, eptr_ty); - cg_swap(p->cg); - cg_store(p->cg); cg_drop(p->cg); - - cg_push_local_typed(p->cg, eslot, eptr_ty); - cg_load(p->cg); - cg_deref(p->cg, val_ty); - cg_load(p->cg); - - expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); - parse_assign_expr(p); to_rvalue(p); /* desired */ - expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); - - (void)eval_const_int(p, p->cur.loc); /* weak */ - expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); - i64 succ = eval_const_int(p, p->cur.loc); - expect_punct(p, ',', "',' in __atomic_compare_exchange_n"); - i64 fail = eval_const_int(p, p->cur.loc); - expect_punct(p, ')', "')' after __atomic_compare_exchange_n"); - - cg_set_loc(p->cg, loc); - cg_atomic_cas(p->cg, (MemOrder)succ, (MemOrder)fail); - - const Type* ok_ty = cg_top_type(p->cg); - FrameSlotDesc okd; memset(&okd, 0, sizeof okd); - okd.type = ok_ty; okd.size = 4; okd.align = 4; okd.kind = FS_LOCAL; - FrameSlot okslot = cg_local(p->cg, &okd); - cg_push_local_typed(p->cg, okslot, ok_ty); - cg_swap(p->cg); cg_store(p->cg); cg_drop(p->cg); - - FrameSlotDesc pd; memset(&pd, 0, sizeof pd); - pd.type = val_ty; - pd.size = abi_sizeof(p->abi, val_ty); - pd.align = abi_alignof(p->abi, val_ty); - pd.kind = FS_LOCAL; - FrameSlot pslot = cg_local(p->cg, &pd); - cg_push_local_typed(p->cg, pslot, val_ty); - cg_swap(p->cg); cg_store(p->cg); cg_drop(p->cg); - - cg_push_local_typed(p->cg, okslot, ok_ty); - cg_load(p->cg); - CGLabel L_done = cg_label_new(p->cg); - cg_branch_true(p->cg, L_done); - cg_push_local_typed(p->cg, eslot, eptr_ty); - cg_load(p->cg); - cg_deref(p->cg, val_ty); - cg_push_local_typed(p->cg, pslot, val_ty); - cg_load(p->cg); - cg_store(p->cg); cg_drop(p->cg); - cg_label_place(p->cg, L_done); - - cg_push_local_typed(p->cg, okslot, ok_ty); - cg_load(p->cg); - return 1; - } - - AtomicOp op; - if (name == p->sym_a_exchange_n) op = AO_XCHG; - else if (name == p->sym_a_fetch_add) op = AO_ADD; - else if (name == p->sym_a_fetch_sub) op = AO_SUB; - else if (name == p->sym_a_fetch_and) op = AO_AND; - else if (name == p->sym_a_fetch_or) op = AO_OR; - else if (name == p->sym_a_fetch_xor) op = AO_XOR; - else { perr(p, "internal: unhandled builtin"); } - - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ',', "',' in atomic builtin"); - parse_assign_expr(p); - to_rvalue(p); - expect_punct(p, ',', "',' in atomic builtin"); - i64 ord = eval_const_int(p, p->cur.loc); - expect_punct(p, ')', "')' after atomic builtin"); - cg_set_loc(p->cg, loc); - cg_atomic_rmw(p->cg, op, (MemOrder)ord); - return 1; -} - -/* ============================================================ - * parse_primary, parse_postfix, parse_unary - * ============================================================ */ - -static void parse_primary(Parser* p) { - Tok t = p->cur; - if (t.kind == TOK_NUM) { - i64 v = parse_int_literal(p, &t); - const Type* lty = int_literal_type(p, &t); - advance(p); - cg_push_int(p->cg, v, lty); - return; - } - if (t.kind == TOK_FLT) { - double v = parse_float_literal(p, &t); - const Type* lty = float_literal_type(p, &t); - advance(p); - cg_push_float(p->cg, v, lty); - return; - } - if (is_punct(&t, '(')) { - advance(p); - parse_expr(p); - expect_punct(p, ')', "')'"); - return; - } - if (t.kind == TOK_IDENT) { - SymEntry* e; - if (ident_kw(p, t.v.ident) != KW_NONE) { - perr(p, "unexpected keyword in expression"); - } - { - Tok n = peek1(p); - if (is_punct(&n, '(') && try_parse_builtin_call(p)) return; - } - /* try_parse_builtin_call may rewrite the current ident in-place - * (e.g. __builtin_memcpy → memcpy) and return 0, asking us to - * resume normal lookup with the rewritten name. */ - t = p->cur; - /* C99 §6.4.2.2: `__func__` inside a function-body acts as - * static const char __func__[] = "<function-name>"; - * GCC also exposes `__FUNCTION__` and `__PRETTY_FUNCTION__` with - * the same value. We synthesize the string lazily — the symbol - * lives in .rodata and the resulting type is `char[N+1]` (with the - * trailing NUL). */ - if (t.v.ident == p->sym_func || t.v.ident == p->sym_func_gcc || - t.v.ident == p->sym_pretty_func_gcc) { - if (p->cur_func_name == 0) { - compiler_panic(p->c, t.loc, "'%s' used outside a function", - t.v.ident == p->sym_func ? "__func__" - : t.v.ident == p->sym_func_gcc ? "__FUNCTION__" - : "__PRETTY_FUNCTION__"); - } - size_t nlen = 0; - const char* fn_name = pool_str(p->pool, p->cur_func_name, &nlen); - Heap* h = p->c->env->heap; - u8* bytes = (u8*)h->alloc(h, nlen + 1u, 1u); - for (size_t i = 0; i < nlen; ++i) bytes[i] = (u8)fn_name[i]; - bytes[nlen] = 0; - ObjSymId sym = emit_string_to_rodata(p, bytes, nlen + 1u); - h->free(h, bytes, 0); - advance(p); - const Type* char_ty = type_prim(p->pool, TY_CHAR); - const Type* arr_ty = type_array(p->pool, char_ty, (u32)(nlen + 1u), 0); - cg_push_global(p->cg, sym, arr_ty); - return; - } - e = scope_lookup(p, t.v.ident); - if (!e) { - size_t nlen = 0; - const char* nm = pool_str(p->pool, t.v.ident, &nlen); - compiler_panic(p->c, t.loc, "undeclared identifier '%.*s'", (int)nlen, - nm ? nm : "?"); - } - advance(p); - switch (e->kind) { - case SEK_LOCAL: - cg_push_local_typed(p->cg, e->v.slot, e->type); - if (e->vla_byte_slot != FRAME_SLOT_NONE) { - p->last_pushed_vla_slot = e->vla_byte_slot; - } - return; - case SEK_GLOBAL: - case SEK_FUNC: - cg_push_global(p->cg, e->v.sym, e->type); - return; - case SEK_ENUM_CST: - cg_push_int(p->cg, e->v.enum_value, e->type); - return; - case SEK_TYPEDEF: - default: - perr(p, "identifier is not a value"); - } - } - if (t.kind == TOK_CHR) { - i64 v = decode_char_literal(p, &t); - advance(p); - cg_push_int(p->cg, v, ty_int(p)); - return; - } - if (t.kind == TOK_STR) { - size_t n = 0; - u8* bytes = decode_string_literal(p, &t, &n); - ObjSymId sym = emit_string_to_rodata(p, bytes, n); - p->c->env->heap->free(p->c->env->heap, bytes, 0); - advance(p); - { - const Type* char_ty = type_prim(p->pool, TY_CHAR); - const Type* arr_ty = type_array(p->pool, char_ty, (u32)n, 0); - cg_push_global(p->cg, sym, arr_ty); - } - return; - } - perr(p, "expected expression"); -} - -static void parse_postfix(Parser* p) { - parse_primary(p); - for (;;) { - Tok t = p->cur; - if (is_punct(&t, P_INC)) { - advance(p); - cg_inc_dec(p->cg, BO_IADD, /*post=*/1); - continue; - } - if (is_punct(&t, P_DEC)) { - advance(p); - cg_inc_dec(p->cg, BO_ISUB, /*post=*/1); - continue; - } - if (is_punct(&t, '(')) { - const Type* top = cg_top_type(p->cg); - const Type* fn_type; - if (top && top->kind == TY_FUNC) { - fn_type = top; - } else if (top && top->kind == TY_PTR && top->ptr.pointee && - top->ptr.pointee->kind == TY_FUNC) { - fn_type = top->ptr.pointee; - cg_load(p->cg); - } else { - perr(p, "called object is not a function"); - } - advance(p); /* '(' */ - u32 nargs = 0; - if (!is_punct(&p->cur, ')')) { - for (;;) { - parse_assign_expr(p); - to_rvalue(p); - ++nargs; - if (!accept_punct(p, ',')) break; - } - } - expect_punct(p, ')', "')' after argument list"); - if (fn_type->fn.nparams != nargs && !fn_type->fn.variadic) { - perr(p, "wrong number of arguments"); - } - if (fn_type->fn.variadic && nargs < fn_type->fn.nparams) { - perr(p, "too few arguments to variadic function"); - } - cg_call(p->cg, nargs, fn_type); - if (fn_type->fn.ret && fn_type->fn.ret->kind == TY_VOID) { - cg_push_int(p->cg, 0, ty_int(p)); - } - continue; - } - if (is_punct(&t, '[')) { - const Type* lt0 = cg_top_type(p->cg); - advance(p); /* '[' */ - if (lt0 && lt0->kind == TY_ARRAY) { - cg_addr(p->cg); - cg_retag_top(p->cg, type_ptr(p->pool, lt0->arr.elem)); - } else if (lt0 && lt0->kind == TY_PTR) { - cg_load(p->cg); - } - parse_expr(p); - { - const Type* it0 = cg_top_type(p->cg); - if (it0 && it0->kind == TY_ARRAY) { - cg_addr(p->cg); - cg_retag_top(p->cg, type_ptr(p->pool, it0->arr.elem)); - } else { - to_rvalue(p); - } - } - expect_punct(p, ']', "']' after subscript"); - { - const Type* lt = cg_top2_type(p->cg); - const Type* it = cg_top_type(p->cg); - const Type* elem; - if (lt && lt->kind == TY_PTR && type_is_int(it)) { - elem = lt->ptr.pointee; - } else if (it && it->kind == TY_PTR && type_is_int(lt)) { - cg_swap(p->cg); - elem = it->ptr.pointee; - } else { - perr(p, "invalid subscript: needs one pointer and one integer"); - } - if (!elem) perr(p, "subscript on incomplete pointee"); - u32 esz = abi_sizeof(p->abi, elem); - if (esz != 1) { - cg_push_int(p->cg, (i64)esz, ty_size_t(p)); - cg_binop(p->cg, BO_IMUL); - } - cg_binop(p->cg, BO_IADD); - cg_deref(p->cg, elem); - } - continue; - } - if (is_punct(&t, '.')) { - const Type* lt = cg_top_type(p->cg); - Sym mname; - const Type* mty = NULL; - u32 moff = 0; - const Field* mf = NULL; - advance(p); /* '.' */ - if (!lt || (lt->kind != TY_STRUCT && lt->kind != TY_UNION)) { - perr(p, "request for member in something that is not a struct or union"); - } - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected member name after '.'"); - } - mname = p->cur.v.ident; - advance(p); - { - const ABIRecordLayout* L = abi_record_layout(p->abi, lt); - if (!L) perr(p, "no such member"); - int found = 0; - for (u16 i = 0; i < lt->rec.nfields; ++i) { - const Field* f = &lt->rec.fields[i]; - if (f->name == mname && mname != 0) { - mty = f->type; - moff = L->fields[i].offset; - mf = f; - found = 1; - break; - } - /* anonymous member flattening */ - if ((f->flags & FIELD_ANON) && (f->type->kind == TY_STRUCT || - f->type->kind == TY_UNION)) { - const Type* inner_ty = NULL; - u32 inner_off = 0; - const Field* inner_f = NULL; - const ABIRecordLayout* IL = abi_record_layout(p->abi, f->type); - if (IL) { - for (u16 j = 0; j < f->type->rec.nfields; ++j) { - const Field* ff = &f->type->rec.fields[j]; - if (ff->name == mname && mname != 0) { - inner_ty = ff->type; - inner_off = IL->fields[j].offset; - inner_f = ff; - break; - } - } - } - if (inner_ty) { - mty = inner_ty; - moff = L->fields[i].offset + inner_off; - mf = inner_f; - found = 1; - break; - } - } - } - if (!found) perr(p, "no such member"); - } - (void)mf; - cg_addr(p->cg); - cg_retag_top(p->cg, type_ptr(p->pool, mty)); - if (moff > 0) { - cg_push_int(p->cg, (i64)moff, ty_size_t(p)); - cg_binop(p->cg, BO_IADD); - } - cg_deref(p->cg, mty); - continue; - } - if (is_punct(&t, P_ARROW)) { - const Type* lt0; - const Type* rec_ty; - Sym mname; - const Type* mty = NULL; - u32 moff = 0; - const Field* mf = NULL; - advance(p); /* `->` */ - to_rvalue(p); - lt0 = cg_top_type(p->cg); - if (!lt0 || lt0->kind != TY_PTR) { - perr(p, "'->' requires a pointer operand"); - } - rec_ty = lt0->ptr.pointee; - if (!rec_ty || (rec_ty->kind != TY_STRUCT && rec_ty->kind != TY_UNION)) { - perr(p, "'->' on pointer to non-struct/union"); - } - if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected member name after '->'"); - } - mname = p->cur.v.ident; - advance(p); - { - const ABIRecordLayout* L = abi_record_layout(p->abi, rec_ty); - if (!L) perr(p, "no such member"); - int found = 0; - for (u16 i = 0; i < rec_ty->rec.nfields; ++i) { - const Field* f = &rec_ty->rec.fields[i]; - if (f->name == mname && mname != 0) { - mty = f->type; - moff = L->fields[i].offset; - mf = f; - found = 1; - break; - } - if ((f->flags & FIELD_ANON) && (f->type->kind == TY_STRUCT || - f->type->kind == TY_UNION)) { - const ABIRecordLayout* IL = abi_record_layout(p->abi, f->type); - if (IL) { - for (u16 j = 0; j < f->type->rec.nfields; ++j) { - const Field* ff = &f->type->rec.fields[j]; - if (ff->name == mname && mname != 0) { - mty = ff->type; - moff = L->fields[i].offset + IL->fields[j].offset; - mf = ff; - found = 1; - break; - } - } - } - if (found) break; - } - } - if (!found) perr(p, "no such member"); - } - (void)mf; - if (moff > 0) { - cg_push_int(p->cg, (i64)moff, ty_size_t(p)); - cg_binop(p->cg, BO_IADD); - } - cg_deref(p->cg, mty); - continue; - } - break; - } -} - -void parse_unary(Parser* p) { - Tok t = p->cur; - if (is_punct(&t, '(')) { - Tok n = peek1(p); - if (starts_type_name(p, &n)) { - const Type* dst; - const Type* src; - advance(p); /* '(' */ - dst = parse_type_name(p); - expect_punct(p, ')', "')' after type-name"); - if (is_punct(&p->cur, '{')) { - FrameSlotDesc fsd; - FrameSlot slot; - const Type* lit_ty = dst; - if (lit_ty && lit_ty->kind == TY_ARRAY && lit_ty->arr.incomplete) { - lit_ty = complete_incomplete_array(p, lit_ty); - } - memset(&fsd, 0, sizeof fsd); - fsd.type = lit_ty; - fsd.size = abi_sizeof(p->abi, lit_ty); - fsd.align = abi_alignof(p->abi, lit_ty); - fsd.kind = FS_LOCAL; - fsd.flags = FSF_NONE; - slot = cg_local(p->cg, &fsd); - if (lit_ty && (lit_ty->kind == TY_ARRAY || lit_ty->kind == TY_STRUCT || - lit_ty->kind == TY_UNION)) { - init_at(p, slot, lit_ty, 0, lit_ty); - } else { - init_at(p, slot, lit_ty, 0, lit_ty); - } - cg_push_local_typed(p->cg, slot, lit_ty); - return; - } - parse_unary(p); - to_rvalue(p); - if (dst && dst->kind == TY_VOID) { - cg_drop(p->cg); - cg_push_int(p->cg, 0, ty_int(p)); - return; - } - src = cg_top_type(p->cg); - if (src && src->kind == TY_PTR && dst->kind == TY_PTR) { - cg_retag_top(p->cg, dst); - return; - } - cg_convert(p->cg, dst); - return; - } - } - if (is_punct(&t, '+')) { - advance(p); - parse_unary(p); - to_rvalue(p); - return; - } - if (is_punct(&t, '-')) { - advance(p); - parse_unary(p); - to_rvalue(p); - cg_unop(p->cg, UO_NEG); - return; - } - if (is_punct(&t, '!')) { - advance(p); - parse_unary(p); - to_rvalue(p); - cg_push_int(p->cg, 0, ty_int(p)); - cg_cmp(p->cg, CMP_EQ); - return; - } - if (is_punct(&t, '~')) { - advance(p); - parse_unary(p); - to_rvalue(p); - cg_unop(p->cg, UO_BNOT); - return; - } - if (is_punct(&t, '&')) { - advance(p); - parse_unary(p); - cg_addr(p->cg); - return; - } - if (is_punct(&t, '*')) { - const Type* pty; - const Type* pointee; - advance(p); - parse_unary(p); - to_rvalue(p); - pty = cg_top_type(p->cg); - if (!pty || pty->kind != TY_PTR) { - perr(p, "indirection requires pointer operand"); - } - pointee = pty->ptr.pointee; - if (pointee && pointee->kind == TY_VOID) { - perr(p, "dereferencing pointer to incomplete type"); - } - cg_deref(p->cg, pointee); - return; - } - if (is_punct(&t, P_INC) || is_punct(&t, P_DEC)) { - BinOp bop = is_punct(&t, P_INC) ? BO_IADD : BO_ISUB; - advance(p); - parse_unary(p); - cg_inc_dec(p->cg, bop, /*post=*/0); - return; - } - if (is_kw(p, &t, KW_SIZEOF)) { - const Type* ty = NULL; - FrameSlot vla_slot = FRAME_SLOT_NONE; - advance(p); - if (is_punct(&p->cur, '(')) { - Tok n = peek1(p); - if (starts_type_name(p, &n)) { - advance(p); - ty = parse_type_name(p); - expect_punct(p, ')', "')'"); - } else { - p->last_pushed_vla_slot = FRAME_SLOT_NONE; - parse_unary(p); - ty = cg_top_type(p->cg); - vla_slot = p->last_pushed_vla_slot; - cg_drop(p->cg); - } - } else { - p->last_pushed_vla_slot = FRAME_SLOT_NONE; - parse_unary(p); - ty = cg_top_type(p->cg); - vla_slot = p->last_pushed_vla_slot; - cg_drop(p->cg); - } - if (vla_slot != FRAME_SLOT_NONE) { - cg_push_local_typed(p->cg, vla_slot, ty_size_t(p)); - cg_load(p->cg); - } else { - cg_push_int(p->cg, (i64)abi_sizeof(p->abi, ty), ty_size_t(p)); - } - return; - } - if (is_kw(p, &t, KW_GENERIC)) { - advance(p); - expect_punct(p, '(', "'('"); - parse_assign_expr(p); - to_rvalue(p); - const Type* ctl_ty = cg_top_type(p->cg); - cg_drop(p->cg); - expect_punct(p, ',', "','"); - int emitted = 0; - Tok* default_buf = NULL; - u32 default_len = 0; - for (;;) { - const Type* assoc_ty = NULL; - int is_default = 0; - if (is_kw(p, &p->cur, KW_DEFAULT)) { - advance(p); - is_default = 1; - } else { - assoc_ty = parse_type_name(p); - } - expect_punct(p, ':', "':' in _Generic association"); - int take = 0; - if (!emitted && !is_default && ctl_ty && assoc_ty && - ctl_ty->kind == assoc_ty->kind) { - take = 1; - } - if (take) { - parse_assign_expr(p); - emitted = 1; - } else if (is_default && !default_buf) { - u32 cap = 16; - Tok* buf = arena_array(p->c->tu, Tok, cap); - u32 len = 0; - int paren_depth = 0, brack_depth = 0, brace_depth = 0; - while (p->cur.kind != TOK_EOF) { - if (paren_depth == 0 && brack_depth == 0 && brace_depth == 0) { - if (is_punct(&p->cur, ',') || is_punct(&p->cur, ')')) break; - } - if (len == cap) { - u32 new_cap = cap * 2; - Tok* nv = arena_array(p->c->tu, Tok, new_cap); - if (!nv) perr(p, "out of memory recording _Generic default"); - memcpy(nv, buf, len * sizeof(Tok)); - buf = nv; - cap = new_cap; - } - buf[len++] = p->cur; - if (is_punct(&p->cur, '(')) ++paren_depth; - else if (is_punct(&p->cur, ')')) --paren_depth; - else if (is_punct(&p->cur, '[')) ++brack_depth; - else if (is_punct(&p->cur, ']')) --brack_depth; - else if (is_punct(&p->cur, '{')) ++brace_depth; - else if (is_punct(&p->cur, '}')) --brace_depth; - advance(p); - } - if (len == cap) { - u32 new_cap = cap + 1; - Tok* nv = arena_array(p->c->tu, Tok, new_cap); - if (!nv) perr(p, "out of memory recording _Generic default"); - memcpy(nv, buf, len * sizeof(Tok)); - buf = nv; - cap = new_cap; - } - memset(&buf[len], 0, sizeof(Tok)); - buf[len].kind = TOK_PUNCT; - buf[len].v.punct = ','; - ++len; - default_buf = buf; - default_len = len; - } else { - int paren_depth = 0; - int brack_depth = 0; - int brace_depth = 0; - while (p->cur.kind != TOK_EOF) { - if (paren_depth == 0 && brack_depth == 0 && brace_depth == 0) { - if (is_punct(&p->cur, ',') || is_punct(&p->cur, ')')) break; - } - if (is_punct(&p->cur, '(')) ++paren_depth; - else if (is_punct(&p->cur, ')')) --paren_depth; - else if (is_punct(&p->cur, '[')) ++brack_depth; - else if (is_punct(&p->cur, ']')) --brack_depth; - else if (is_punct(&p->cur, '{')) ++brace_depth; - else if (is_punct(&p->cur, '}')) --brace_depth; - advance(p); - } - } - if (!accept_punct(p, ',')) break; - } - if (!emitted && default_buf) { - Tok* save_replay = p->replay; - u32 save_cap = p->replay_cap; - u32 save_len = p->replay_len; - u32 save_pos = p->replay_pos; - u8 save_active = p->replay_active; - Tok save_cur = p->cur; - int save_has_next = p->has_next; - p->replay = default_buf; - p->replay_cap = default_len; - p->replay_len = default_len; - p->replay_pos = 1; - p->replay_active = 1; - p->cur = default_buf[0]; - p->has_next = 0; - parse_assign_expr(p); - emitted = 1; - p->replay = save_replay; - p->replay_cap = save_cap; - p->replay_len = save_len; - p->replay_pos = save_pos; - p->replay_active = save_active; - p->cur = save_cur; - p->has_next = save_has_next; - } - expect_punct(p, ')', "')' after _Generic"); - if (!emitted) { - perr(p, "_Generic: no association matched and no default present"); - } - return; - } - if (is_kw(p, &t, KW_ALIGNOF)) { - const Type* ty; - advance(p); - expect_punct(p, '(', "'('"); - if (starts_type_name(p, &p->cur)) { - ty = parse_type_name(p); - } else { - parse_unary(p); - ty = cg_top_type(p->cg); - cg_drop(p->cg); - } - expect_punct(p, ')', "')'"); - cg_push_int(p->cg, (i64)abi_alignof(p->abi, ty), ty_size_t(p)); - return; - } - parse_postfix(p); -} - -/* ============================================================ - * Binary operator levels - * ============================================================ */ - -static int type_is_fp(const Type* t) { - return t && (t->kind == TY_FLOAT || t->kind == TY_DOUBLE || - t->kind == TY_LDOUBLE); -} - -static const Type* common_fp_type(Parser* p, const Type* a, const Type* b) { - if (!type_is_fp(a) && !type_is_fp(b)) return NULL; - if ((a && a->kind == TY_LDOUBLE) || (b && b->kind == TY_LDOUBLE)) { - return type_prim(p->pool, TY_LDOUBLE); - } - if ((a && a->kind == TY_DOUBLE) || (b && b->kind == TY_DOUBLE)) { - return type_prim(p->pool, TY_DOUBLE); - } - return type_prim(p->pool, TY_FLOAT); -} - -static void emit_fp_binop(Parser* p, BinOp bop, const Type* common) { - if (cg_top_type(p->cg) != common) cg_convert(p->cg, common); - cg_swap(p->cg); - if (cg_top_type(p->cg) != common) cg_convert(p->cg, common); - cg_swap(p->cg); - BinOp fop; - switch (bop) { - case BO_IADD: fop = BO_FADD; break; - case BO_ISUB: fop = BO_FSUB; break; - case BO_IMUL: fop = BO_FMUL; break; - case BO_SDIV: fop = BO_FDIV; break; - default: - perr(p, "operator does not apply to floating types"); - return; - } - cg_binop(p->cg, fop); -} - -static void parse_mul(Parser* p) { - parse_unary(p); - for (;;) { - Tok t = p->cur; - BinOp bop; - if (is_punct(&t, '*')) { - bop = BO_IMUL; - } else if (is_punct(&t, '/')) { - bop = BO_SDIV; - } else if (is_punct(&t, '%')) { - bop = BO_SREM; - } else { - break; - } - advance(p); - to_rvalue(p); - parse_unary(p); - to_rvalue(p); - const Type* lt = cg_top2_type(p->cg); - const Type* rt = cg_top_type(p->cg); - const Type* common = common_fp_type(p, lt, rt); - if (common) { - emit_fp_binop(p, bop, common); - } else { - cg_binop(p->cg, bop); - } - } -} - -static void emit_add_or_sub(Parser* p, BinOp bop) { - const Type* lt = cg_top2_type(p->cg); - const Type* rt = cg_top_type(p->cg); - int l_is_ptr = lt && lt->kind == TY_PTR; - int r_is_ptr = rt && rt->kind == TY_PTR; - if (bop == BO_IADD) { - if (l_is_ptr && type_is_int(rt)) { - u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); - if (esz != 1) { - cg_push_int(p->cg, (i64)esz, ty_size_t(p)); - cg_binop(p->cg, BO_IMUL); - } - cg_binop(p->cg, BO_IADD); - return; - } - if (r_is_ptr && type_is_int(lt)) { - cg_swap(p->cg); - u32 esz = abi_sizeof(p->abi, rt->ptr.pointee); - if (esz != 1) { - cg_push_int(p->cg, (i64)esz, ty_size_t(p)); - cg_binop(p->cg, BO_IMUL); - } - cg_binop(p->cg, BO_IADD); - return; - } - } else { /* BO_ISUB */ - if (l_is_ptr && type_is_int(rt)) { - u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); - if (esz != 1) { - cg_push_int(p->cg, (i64)esz, ty_size_t(p)); - cg_binop(p->cg, BO_IMUL); - } - cg_binop(p->cg, BO_ISUB); - return; - } - if (l_is_ptr && r_is_ptr) { - u32 esz = abi_sizeof(p->abi, lt->ptr.pointee); - cg_binop(p->cg, BO_ISUB); - if (esz != 1) { - cg_push_int(p->cg, (i64)esz, ty_size_t(p)); - cg_binop(p->cg, BO_SDIV); - } - return; - } - } - const Type* common = common_fp_type(p, lt, rt); - if (common) { - emit_fp_binop(p, bop, common); - return; - } - cg_binop(p->cg, bop); -} - -static void parse_add(Parser* p) { - parse_mul(p); - for (;;) { - Tok t = p->cur; - BinOp bop; - if (is_punct(&t, '+')) { - bop = BO_IADD; - } else if (is_punct(&t, '-')) { - bop = BO_ISUB; - } else { - break; - } - advance(p); - to_rvalue(p); - parse_mul(p); - to_rvalue(p); - emit_add_or_sub(p, bop); - } -} - -static void parse_shift(Parser* p) { - parse_add(p); - for (;;) { - Tok t = p->cur; - BinOp bop; - if (is_punct(&t, P_SHL)) { - bop = BO_SHL; - } else if (is_punct(&t, P_SHR)) { - bop = BO_SHR_S; - } else { - break; - } - advance(p); - to_rvalue(p); - parse_add(p); - to_rvalue(p); - cg_binop(p->cg, bop); - } -} - -static void parse_rel(Parser* p) { - parse_shift(p); - for (;;) { - Tok t = p->cur; - CmpOp cop; - if (is_punct(&t, '<')) { - cop = CMP_LT_S; - } else if (is_punct(&t, '>')) { - cop = CMP_GT_S; - } else if (is_punct(&t, P_LE)) { - cop = CMP_LE_S; - } else if (is_punct(&t, P_GE)) { - cop = CMP_GE_S; - } else { - break; - } - advance(p); - to_rvalue(p); - parse_shift(p); - to_rvalue(p); - cg_cmp(p->cg, cop); - } -} - -static void parse_eq(Parser* p) { - parse_rel(p); - for (;;) { - Tok t = p->cur; - CmpOp cop; - if (is_punct(&t, P_EQ)) { - cop = CMP_EQ; - } else if (is_punct(&t, P_NE)) { - cop = CMP_NE; - } else { - break; - } - advance(p); - to_rvalue(p); - parse_rel(p); - to_rvalue(p); - cg_cmp(p->cg, cop); - } -} - -static void parse_band(Parser* p) { - parse_eq(p); - while (is_punct(&p->cur, '&')) { - advance(p); - to_rvalue(p); - parse_eq(p); - to_rvalue(p); - cg_binop(p->cg, BO_AND); - } -} - -static void parse_bxor(Parser* p) { - parse_band(p); - while (is_punct(&p->cur, '^')) { - advance(p); - to_rvalue(p); - parse_band(p); - to_rvalue(p); - cg_binop(p->cg, BO_XOR); - } -} - -static void parse_bor(Parser* p) { - parse_bxor(p); - while (is_punct(&p->cur, '|')) { - advance(p); - to_rvalue(p); - parse_bxor(p); - to_rvalue(p); - cg_binop(p->cg, BO_OR); - } -} - -static FrameSlot ll_tmp_slot(Parser* p, const Type* ty) { - FrameSlotDesc fsd; - memset(&fsd, 0, sizeof fsd); - fsd.type = ty; - fsd.size = abi_sizeof(p->abi, ty); - fsd.align = abi_alignof(p->abi, ty); - fsd.kind = FS_LOCAL; - fsd.flags = FSF_NONE; - return cg_local(p->cg, &fsd); -} - -static void ll_store_const(Parser* p, FrameSlot tmp, const Type* ty, i64 v) { - cg_push_local_typed(p->cg, tmp, ty); - cg_push_int(p->cg, v, ty); - cg_store(p->cg); - cg_drop(p->cg); -} - -static void parse_land(Parser* p) { - parse_bor(p); - while (is_punct(&p->cur, P_AND)) { - CGLabel L_false = cg_label_new(p->cg); - CGLabel L_end = cg_label_new(p->cg); - const Type* result_ty = ty_int(p); - FrameSlot tmp = ll_tmp_slot(p, result_ty); - advance(p); - to_rvalue(p); - cg_branch_false(p->cg, L_false); - parse_bor(p); - to_rvalue(p); - cg_branch_false(p->cg, L_false); - ll_store_const(p, tmp, result_ty, 1); - cg_jump(p->cg, L_end); - cg_label_place(p->cg, L_false); - ll_store_const(p, tmp, result_ty, 0); - cg_label_place(p->cg, L_end); - cg_push_local_typed(p->cg, tmp, result_ty); - } -} - -static void parse_lor(Parser* p) { - parse_land(p); - while (is_punct(&p->cur, P_OR)) { - CGLabel L_true = cg_label_new(p->cg); - CGLabel L_end = cg_label_new(p->cg); - const Type* result_ty = ty_int(p); - FrameSlot tmp = ll_tmp_slot(p, result_ty); - advance(p); - to_rvalue(p); - cg_branch_true(p->cg, L_true); - parse_land(p); - to_rvalue(p); - cg_branch_true(p->cg, L_true); - ll_store_const(p, tmp, result_ty, 0); - cg_jump(p->cg, L_end); - cg_label_place(p->cg, L_true); - ll_store_const(p, tmp, result_ty, 1); - cg_label_place(p->cg, L_end); - cg_push_local_typed(p->cg, tmp, result_ty); - } -} - -static void parse_ternary(Parser* p) { - parse_lor(p); - if (!is_punct(&p->cur, '?')) return; - CGLabel L_else = cg_label_new(p->cg); - CGLabel L_end = cg_label_new(p->cg); - const Type* result_ty = ty_int(p); - FrameSlot tmp; - FrameSlotDesc fsd; - advance(p); /* '?' */ - to_rvalue(p); - cg_branch_false(p->cg, L_else); - parse_assign_expr(p); - to_rvalue(p); - result_ty = cg_top_type(p->cg); - if (!result_ty) result_ty = ty_int(p); - memset(&fsd, 0, sizeof fsd); - fsd.type = result_ty; - fsd.size = abi_sizeof(p->abi, result_ty); - fsd.align = abi_alignof(p->abi, result_ty); - fsd.kind = FS_LOCAL; - fsd.flags = FSF_NONE; - tmp = cg_local(p->cg, &fsd); - cg_push_local_typed(p->cg, tmp, result_ty); - cg_swap(p->cg); - cg_store(p->cg); - cg_drop(p->cg); - cg_jump(p->cg, L_end); - cg_label_place(p->cg, L_else); - expect_punct(p, ':', "':' in ternary"); - parse_assign_expr(p); - to_rvalue(p); - if (cg_top_type(p->cg) != result_ty) { - cg_convert(p->cg, result_ty); - } - cg_push_local_typed(p->cg, tmp, result_ty); - cg_swap(p->cg); - cg_store(p->cg); - cg_drop(p->cg); - cg_label_place(p->cg, L_end); - cg_push_local_typed(p->cg, tmp, result_ty); -} - -void parse_assign_expr(Parser* p) { - parse_ternary(p); - Tok t = p->cur; - BinOp compound; - int is_simple_assign; - if (is_punct(&t, '=')) { - is_simple_assign = 1; - compound = (BinOp)0; - } else if (is_punct(&t, P_ADD_ASSIGN)) { - is_simple_assign = 0; compound = BO_IADD; - } else if (is_punct(&t, P_SUB_ASSIGN)) { - is_simple_assign = 0; compound = BO_ISUB; - } else if (is_punct(&t, P_MUL_ASSIGN)) { - is_simple_assign = 0; compound = BO_IMUL; - } else if (is_punct(&t, P_DIV_ASSIGN)) { - is_simple_assign = 0; compound = BO_SDIV; - } else if (is_punct(&t, P_MOD_ASSIGN)) { - is_simple_assign = 0; compound = BO_SREM; - } else if (is_punct(&t, P_AND_ASSIGN)) { - is_simple_assign = 0; compound = BO_AND; - } else if (is_punct(&t, P_OR_ASSIGN)) { - is_simple_assign = 0; compound = BO_OR; - } else if (is_punct(&t, P_XOR_ASSIGN)) { - is_simple_assign = 0; compound = BO_XOR; - } else if (is_punct(&t, P_SHL_ASSIGN)) { - is_simple_assign = 0; compound = BO_SHL; - } else if (is_punct(&t, P_SHR_ASSIGN)) { - is_simple_assign = 0; compound = BO_SHR_S; - } else { - return; - } - advance(p); - if (is_simple_assign) { - parse_assign_expr(p); - to_rvalue(p); - coerce_top_to_lvalue(p); - cg_store(p->cg); - return; - } - cg_dup(p->cg); - cg_load(p->cg); - parse_assign_expr(p); - to_rvalue(p); - if (compound == BO_IADD || compound == BO_ISUB) { - emit_add_or_sub(p, compound); - } else { - cg_binop(p->cg, compound); - } - cg_store(p->cg); -} - -void parse_expr(Parser* p) { - parse_assign_expr(p); - while (is_punct(&p->cur, ',')) { - advance(p); - cg_drop(p->cg); - parse_assign_expr(p); - } -} - -/* parse_cond_expr is the ternary level, provided for completeness */ -void parse_cond_expr(Parser* p) { - parse_ternary(p); -} diff --git a/src/parse/parse_init.c b/src/parse/parse_init.c @@ -1,799 +0,0 @@ -/* parse_init.c — runtime and static-storage initializers. - * - * Covers §6.7.9 (initializers): - * - Runtime aggregate/scalar initializers (init_at, init_elided, - * init_struct_fields, init_string_at, parse_designator_chain, - * push_subobject_lv, emit_copy_leaf, emit_walk_copy, - * emit_struct_copy_into_slot, zero_init_at) - * - Static-storage object definition (parse_static_init_at, - * parse_static_string_at, try_parse_addr_const, encode_int_le, - * pick_object_section, define_static_object, srl_push) - */ - -#include "parse/parse_priv.h" - -/* ============================================================ - * File-local helpers - * ============================================================ */ - -static const Type* ty_size_t_init(Parser* p) { - return abi_size_type(p->abi, p->pool); -} - -static SrcLoc tok_loc_init(const Tok* t) { return t->loc; } - -static CKw ident_kw_init(const Parser* p, Sym name) { - return ident_kw_inline(p, name); -} - -/* True if `ty` is char/signed char/unsigned char. */ -int is_char_kind(const Type* ty) { - if (!ty) return 0; - return ty->kind == TY_CHAR || ty->kind == TY_SCHAR || ty->kind == TY_UCHAR; -} - -/* Decode the string token at p->cur without advancing. Returns a heap- - * allocated byte buffer (caller frees) and writes length (including NUL) - * to *nlen_out. */ -static u8* peek_string_bytes(Parser* p, size_t* nlen_out) { - Tok t = p->cur; - if (t.kind != TOK_STR) perr(p, "internal: peek_string_bytes on non-string"); - return decode_string_literal(p, &t, nlen_out); -} - -/* ============================================================ - * Runtime initializers - * ============================================================ */ - -/* Forward declaration for mutual recursion. */ -void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, - const Type* ty); -static u32 init_elided(Parser* p, FrameSlot slot, const Type* arr_ty, - u32 offset, const Type* ty); - -/* Push the lvalue of a sub-object at byte offset `offset` within the array - * local `slot` (whose type is `arr_ty`), with element type `elem_ty`. */ -void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty, - u32 offset, const Type* elem_ty) { - cg_push_local_typed(p->cg, slot, arr_ty); - cg_addr(p->cg); - cg_retag_top(p->cg, type_ptr(p->pool, elem_ty)); - if (offset > 0) { - cg_push_int(p->cg, (i64)offset, ty_size_t_init(p)); - cg_binop(p->cg, BO_IADD); - } - cg_deref(p->cg, elem_ty); -} - -/* Emit a load+store for one scalar leaf. */ -static void emit_copy_leaf(Parser* p, FrameSlot dst_slot, const Type* dst_arr_ty, - u32 dst_off, FrameSlot src_ptr_slot, - const Type* src_ptr_ty, u32 src_off, - const Type* leaf_ty) { - push_subobject_lv(p, dst_slot, dst_arr_ty, dst_off, leaf_ty); - cg_push_local_typed(p->cg, src_ptr_slot, src_ptr_ty); - cg_load(p->cg); - cg_retag_top(p->cg, type_ptr(p->pool, leaf_ty)); - if (src_off > 0) { - cg_push_int(p->cg, (i64)src_off, ty_size_t_init(p)); - cg_binop(p->cg, BO_IADD); - } - cg_deref(p->cg, leaf_ty); - cg_load(p->cg); - cg_store(p->cg); - cg_drop(p->cg); -} - -/* Walk a (possibly nested) aggregate, emitting a leaf load+store for each - * scalar member. */ -static void emit_walk_copy(Parser* p, FrameSlot dst_slot, - const Type* dst_arr_ty, u32 dst_off, - FrameSlot src_ptr_slot, const Type* src_ptr_ty, - u32 src_off, const Type* ty) { - if (ty->kind == TY_STRUCT) { - const ABIRecordLayout* L = abi_record_layout(p->abi, ty); - for (u16 i = 0; i < ty->rec.nfields; ++i) { - const Field* f = &ty->rec.fields[i]; - if (f->flags & FIELD_BITFIELD) continue; - u32 foff = L->fields[i].offset; - emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off + foff, - src_ptr_slot, src_ptr_ty, src_off + foff, f->type); - } - return; - } - if (ty->kind == TY_ARRAY) { - u32 esz = abi_sizeof(p->abi, ty->arr.elem); - for (u32 i = 0; i < ty->arr.count; ++i) { - emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off + i * esz, - src_ptr_slot, src_ptr_ty, src_off + i * esz, - ty->arr.elem); - } - return; - } - if (ty->kind == TY_UNION) { - u32 sz = abi_sizeof(p->abi, ty); - const Type* uchar_ty = type_prim(p->pool, TY_UCHAR); - for (u32 i = 0; i < sz; ++i) { - emit_copy_leaf(p, dst_slot, dst_arr_ty, dst_off + i, - src_ptr_slot, src_ptr_ty, src_off + i, uchar_ty); - } - return; - } - emit_copy_leaf(p, dst_slot, dst_arr_ty, dst_off, src_ptr_slot, src_ptr_ty, - src_off, ty); -} - -/* Source struct/union value is on top of the cg stack as an lvalue. - * Spill its address into a fresh pointer slot, then walk the type and - * copy each scalar leaf into the destination sub-object. */ -void emit_struct_copy_into_slot(Parser* p, FrameSlot dst_slot, - const Type* dst_arr_ty, u32 dst_off, - const Type* ty) { - const Type* ptr_ty = type_ptr(p->pool, ty); - FrameSlotDesc fsd; - FrameSlot src_ptr_slot; - cg_addr(p->cg); - memset(&fsd, 0, sizeof fsd); - fsd.type = ptr_ty; - fsd.size = abi_sizeof(p->abi, ptr_ty); - fsd.align = abi_alignof(p->abi, ptr_ty); - fsd.kind = FS_LOCAL; - fsd.flags = FSF_NONE; - src_ptr_slot = cg_local(p->cg, &fsd); - cg_push_local_typed(p->cg, src_ptr_slot, ptr_ty); - cg_swap(p->cg); - cg_store(p->cg); - cg_drop(p->cg); - emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off, src_ptr_slot, ptr_ty, 0, ty); -} - -/* Recursively zero-initialize the sub-object at `offset` of type `ty`. */ -static void zero_init_at(Parser* p, FrameSlot slot, const Type* arr_ty, - u32 offset, const Type* ty) { - if (ty->kind == TY_ARRAY) { - u32 esz = abi_sizeof(p->abi, ty->arr.elem); - for (u32 i = 0; i < ty->arr.count; ++i) { - zero_init_at(p, slot, arr_ty, offset + i * esz, ty->arr.elem); - } - return; - } - if (ty->kind == TY_STRUCT) { - const ABIRecordLayout* L = abi_record_layout(p->abi, ty); - for (u16 i = 0; i < ty->rec.nfields; ++i) { - const Field* f = &ty->rec.fields[i]; - zero_init_at(p, slot, arr_ty, offset + L->fields[i].offset, f->type); - } - return; - } - if (ty->kind == TY_UNION) { - if (ty->rec.nfields > 0) { - const Field* f = &ty->rec.fields[0]; - if (!(f->flags & FIELD_BITFIELD)) { - zero_init_at(p, slot, arr_ty, offset, f->type); - } - } - return; - } - push_subobject_lv(p, slot, arr_ty, offset, ty); - cg_push_int(p->cg, 0, ty); - cg_store(p->cg); - cg_drop(p->cg); -} - -/* Emit byte stores for a string literal initializing a char-array sub-object. */ -static void init_string_at(Parser* p, FrameSlot slot, const Type* arr_ty, - u32 offset, const Type* elem_ty, u32 count) { - size_t n = 0; - u8* bytes = peek_string_bytes(p, &n); - size_t copy = n; - size_t i; - if (copy > count) copy = count; - for (i = 0; i < copy; ++i) { - push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty); - cg_push_int(p->cg, (i64)bytes[i], elem_ty); - cg_store(p->cg); - cg_drop(p->cg); - } - for (; i < count; ++i) { - push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty); - cg_push_int(p->cg, 0, elem_ty); - cg_store(p->cg); - cg_drop(p->cg); - } - p->c->env->heap->free(p->c->env->heap, bytes, 0); - advance(p); /* consume TOK_STR */ -} - -/* Parse a designator chain (`[const]` and `.ident` repeats) ending at `=`. */ -static void parse_designator_chain(Parser* p, const Type* outer_ty, - u32 outer_offset, const Type** sub_ty_out, - u32* sub_offset_out, u32* top_index_out) { - const Type* cur_ty = outer_ty; - u32 cur_off = outer_offset; - int first = 1; - for (;;) { - if (is_punct(&p->cur, '[')) { - i64 idx; - u32 esz; - SrcLoc cloc = tok_loc_init(&p->cur); - advance(p); - idx = eval_const_int(p, cloc); - expect_punct(p, ']', "']' after designator index"); - if (!cur_ty || cur_ty->kind != TY_ARRAY) { - perr(p, "array designator on non-array"); - } - if (idx < 0 || (u32)idx >= cur_ty->arr.count) { - perr(p, "array designator index out of range"); - } - esz = abi_sizeof(p->abi, cur_ty->arr.elem); - cur_off += (u32)idx * esz; - cur_ty = cur_ty->arr.elem; - if (first) *top_index_out = (u32)idx; - first = 0; - } else if (is_punct(&p->cur, '.')) { - Sym fname; - const Type* fty; - u32 foff; - const Field* ff; - u16 fi; - advance(p); - if (p->cur.kind != TOK_IDENT || ident_kw_init(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected field name after '.'"); - } - fname = p->cur.v.ident; - advance(p); - if (!cur_ty || - (cur_ty->kind != TY_STRUCT && cur_ty->kind != TY_UNION)) { - perr(p, "field designator on non-record type"); - } - if (!find_field(p->abi, cur_ty, fname, &fty, &foff, &ff)) { - perr(p, "no such field in designator"); - } - cur_off += foff; - if (first) { - for (fi = 0; fi < cur_ty->rec.nfields; ++fi) { - const Field* g = &cur_ty->rec.fields[fi]; - if (g->name == fname && fname != 0) { - *top_index_out = fi; - break; - } - if ((g->flags & FIELD_ANON) && - (g->type->kind == TY_STRUCT || g->type->kind == TY_UNION)) { - const Type* tmp_ty; - u32 tmp_off; - const Field* tmp_f; - if (find_field(p->abi, g->type, fname, &tmp_ty, &tmp_off, - &tmp_f)) { - *top_index_out = fi; - break; - } - } - } - } - cur_ty = fty; - first = 0; - } else { - break; - } - } - if (first) perr(p, "internal: empty designator chain"); - expect_punct(p, '=', "'=' after designator"); - *sub_ty_out = cur_ty; - *sub_offset_out = cur_off; -} - -static u32 init_struct_fields(Parser* p, FrameSlot slot, const Type* arr_ty, - u32 offset, const Type* ty, u32 start_field, - int braced) { - const ABIRecordLayout* L = abi_record_layout(p->abi, ty); - u32 i = start_field; - u32 zero_lo = start_field; - for (; i < ty->rec.nfields; ++i) { - const Field* f = &ty->rec.fields[i]; - u32 foff = offset + L->fields[i].offset; - if (braced && (is_punct(&p->cur, '}') || p->cur.kind == TOK_EOF)) break; - if (braced && is_punct(&p->cur, '.')) { - const Type* sub_ty; - u32 sub_off; - u32 top_idx = 0; - parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); - while (zero_lo < top_idx) { - const Field* zf = &ty->rec.fields[zero_lo]; - u32 zoff = offset + L->fields[zero_lo].offset; - zero_init_at(p, slot, arr_ty, zoff, zf->type); - ++zero_lo; - } - init_at(p, slot, arr_ty, sub_off, sub_ty); - i = top_idx; - if (zero_lo <= top_idx) zero_lo = top_idx + 1; - goto next_item_struct; - } - init_at(p, slot, arr_ty, foff, f->type); - if (zero_lo <= i) zero_lo = i + 1; - if (!braced) { - ++i; - break; - } - next_item_struct: - if (!accept_punct(p, ',')) { - ++i; - break; - } - if (is_punct(&p->cur, '}')) { - ++i; - break; - } - } - if (braced) { - u32 j; - for (j = zero_lo; j < ty->rec.nfields; ++j) { - const Field* f = &ty->rec.fields[j]; - u32 foff = offset + L->fields[j].offset; - zero_init_at(p, slot, arr_ty, foff, f->type); - } - } - return i; -} - -static u32 init_elided(Parser* p, FrameSlot slot, const Type* arr_ty, - u32 offset, const Type* ty) { - if (ty->kind == TY_ARRAY) { - u32 esz = abi_sizeof(p->abi, ty->arr.elem); - init_at(p, slot, arr_ty, offset, ty->arr.elem); - (void)esz; - return 1; - } - if (ty->kind == TY_STRUCT) { - init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/0); - return 1; - } - /* Scalar / pointer / union: consume one assignment-expr. */ - int had_brace = accept_punct(p, '{'); - push_subobject_lv(p, slot, arr_ty, offset, ty); - parse_assign_expr(p); - to_rvalue(p); - cg_store(p->cg); - cg_drop(p->cg); - if (had_brace) { - accept_punct(p, ','); - expect_punct(p, '}', "'}' after scalar initializer"); - } - return 1; -} - -void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, - const Type* ty) { - if (ty->kind == TY_ARRAY) { - const Type* elem_ty = ty->arr.elem; - u32 esz = abi_sizeof(p->abi, elem_ty); - if (is_char_kind(elem_ty)) { - if (p->cur.kind == TOK_STR) { - init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count); - return; - } - if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) { - advance(p); - init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count); - accept_punct(p, ','); - expect_punct(p, '}', "'}' after string initializer"); - return; - } - } - if (!is_punct(&p->cur, '{')) { - init_elided(p, slot, arr_ty, offset, elem_ty); - return; - } - advance(p); /* '{' */ - { - u32 i = 0; - u32 zero_lo = 0; - if (!is_punct(&p->cur, '}')) { - for (;;) { - if (is_punct(&p->cur, '[')) { - const Type* sub_ty; - u32 sub_off; - u32 top_idx = 0; - parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, - &top_idx); - while (zero_lo < top_idx) { - zero_init_at(p, slot, arr_ty, offset + zero_lo * esz, elem_ty); - ++zero_lo; - } - init_at(p, slot, arr_ty, sub_off, sub_ty); - i = top_idx + 1; - if (zero_lo < i) zero_lo = i; - } else { - if (i >= ty->arr.count) { - perr(p, "too many initializers for array"); - } - init_at(p, slot, arr_ty, offset + i * esz, elem_ty); - ++i; - if (zero_lo < i) zero_lo = i; - } - if (!accept_punct(p, ',')) break; - if (is_punct(&p->cur, '}')) break; - } - } - expect_punct(p, '}', "'}' after array initializer"); - { - u32 j; - for (j = zero_lo; j < ty->arr.count; ++j) { - zero_init_at(p, slot, arr_ty, offset + j * esz, elem_ty); - } - } - } - return; - } - if (ty->kind == TY_STRUCT) { - if (!is_punct(&p->cur, '{')) { - init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/0); - return; - } - advance(p); /* '{' */ - init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/1); - expect_punct(p, '}', "'}' after struct initializer"); - return; - } - if (ty->kind == TY_UNION) { - int had_brace = accept_punct(p, '{'); - if (ty->rec.nfields == 0) { - if (had_brace) expect_punct(p, '}', "'}'"); - return; - } - if (had_brace && is_punct(&p->cur, '.')) { - const Type* sub_ty; - u32 sub_off; - u32 top_idx = 0; - parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); - init_at(p, slot, arr_ty, sub_off, sub_ty); - } else { - const Field* f = &ty->rec.fields[0]; - if (!(f->flags & FIELD_BITFIELD)) { - init_at(p, slot, arr_ty, offset, f->type); - } - } - if (had_brace) { - accept_punct(p, ','); - expect_punct(p, '}', "'}' after union initializer"); - } - return; - } - /* Scalar (incl. pointer). */ - int had_brace = accept_punct(p, '{'); - push_subobject_lv(p, slot, arr_ty, offset, ty); - parse_assign_expr(p); - to_rvalue(p); - coerce_top_to_lvalue(p); - cg_store(p->cg); - cg_drop(p->cg); - if (had_brace) { - accept_punct(p, ','); - expect_punct(p, '}', "'}' after scalar initializer"); - } -} - -/* ============================================================ - * Static-storage initializers - * ============================================================ */ - -void encode_int_le(u8* dst, u32 size, i64 v) { - for (u32 i = 0; i < size; ++i) { - dst[i] = (u8)((v >> (8u * i)) & 0xffu); - } -} - -/* Encode a string literal at *buf+offset for a char-array sub-object. */ -static void parse_static_string_at(Parser* p, u8* buf, u32 buflen, u32 offset, - u32 count) { - size_t n = 0; - u8* bytes = peek_string_bytes(p, &n); - size_t copy = n; - if (copy > count) copy = count; - if (offset + (u32)copy > buflen) perr(p, "string initializer overflows object"); - memcpy(buf + offset, bytes, copy); - p->c->env->heap->free(p->c->env->heap, bytes, 0); - advance(p); -} - -/* Append one pending relocation to the parser-side list. */ -void srl_push(Parser* p, u32 offset, u32 size, ObjSymId target, i64 addend) { - if (p->static_relocs_len == p->static_relocs_cap) { - u32 nc = p->static_relocs_cap ? p->static_relocs_cap * 2u : 4u; - void* nb = arena_array(p->c->tu, char, - nc * sizeof(*p->static_relocs)); - if (!nb) perr(p, "out of memory recording static relocs"); - if (p->static_relocs && p->static_relocs_len) { - memcpy(nb, p->static_relocs, - p->static_relocs_len * sizeof(*p->static_relocs)); - } - p->static_relocs = nb; - p->static_relocs_cap = nc; - } - p->static_relocs[p->static_relocs_len].offset = offset; - p->static_relocs[p->static_relocs_len].size = size; - p->static_relocs[p->static_relocs_len].target = target; - p->static_relocs[p->static_relocs_len].addend = addend; - ++p->static_relocs_len; -} - -/* Try to parse the current expression as an address constant. */ -static int try_parse_addr_const(Parser* p, const Type* ty, u8* buf, - u32 offset, u32 sz) { - Tok t = p->cur; - Sym name = 0; - SrcLoc nloc = tok_loc_init(&p->cur); - int saw_amp = 0; - i64 element_addend = 0; - i64 byte_addend = 0; - SymEntry* e; - const Type* tgt_ty; - ObjSymId tgt; - if (t.kind == TOK_STR) { - size_t n = 0; - u8* bytes = decode_string_literal(p, &t, &n); - ObjSymId str_sym = emit_string_to_rodata(p, bytes, n); - p->c->env->heap->free(p->c->env->heap, bytes, 0); - advance(p); - (void)ty; - (void)buf; - srl_push(p, offset, sz, str_sym, 0); - return 1; - } - if (is_punct(&t, '&')) { - saw_amp = 1; - advance(p); - if (p->cur.kind != TOK_IDENT || ident_kw_init(p, p->cur.v.ident) != KW_NONE) { - perr(p, "expected identifier after '&' in static initializer"); - } - name = p->cur.v.ident; - nloc = tok_loc_init(&p->cur); - advance(p); - } else if (t.kind == TOK_IDENT && ident_kw_init(p, t.v.ident) == KW_NONE) { - name = t.v.ident; - advance(p); - } else { - return 0; - } - e = scope_lookup(p, name); - if (!e || (e->kind != SEK_GLOBAL && e->kind != SEK_FUNC)) { - perr(p, "static initializer is not a constant address expression"); - } - tgt = e->v.sym; - tgt_ty = e->type; - if (saw_amp && is_punct(&p->cur, '[')) { - SrcLoc cloc; - advance(p); - cloc = tok_loc_init(&p->cur); - element_addend = eval_const_int(p, cloc); - expect_punct(p, ']', "']' after array-subscript constant"); - if (tgt_ty && tgt_ty->kind == TY_ARRAY) { - byte_addend += - element_addend * (i64)abi_sizeof(p->abi, tgt_ty->arr.elem); - } else { - byte_addend += element_addend; - } - } - while (is_punct(&p->cur, '+') || is_punct(&p->cur, '-')) { - int neg = is_punct(&p->cur, '-'); - SrcLoc cloc; - i64 v; - advance(p); - cloc = tok_loc_init(&p->cur); - v = eval_const_int(p, cloc); - if (neg) v = -v; - if (tgt_ty && tgt_ty->kind == TY_ARRAY) { - byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty->arr.elem); - } else if (tgt_ty && tgt_ty->kind == TY_PTR) { - byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty->ptr.pointee); - } else if (saw_amp) { - byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty); - } else { - byte_addend += v; - } - } - (void)nloc; - (void)ty; - (void)buf; - srl_push(p, offset, sz, tgt, byte_addend); - return 1; -} - -void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset, - const Type* ty) { - if (ty->kind == TY_ARRAY) { - const Type* elem = ty->arr.elem; - u32 esz = abi_sizeof(p->abi, elem); - u32 i = 0; - int had_brace; - if (is_char_kind(elem)) { - if (p->cur.kind == TOK_STR) { - parse_static_string_at(p, buf, buflen, offset, ty->arr.count); - return; - } - if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) { - advance(p); - parse_static_string_at(p, buf, buflen, offset, ty->arr.count); - accept_punct(p, ','); - expect_punct(p, '}', "'}' after string initializer"); - return; - } - } - had_brace = accept_punct(p, '{'); - if (!had_brace) { - perr(p, "expected '{' for static-storage array initializer"); - } - if (!is_punct(&p->cur, '}')) { - for (;;) { - if (is_punct(&p->cur, '[')) { - const Type* sub_ty; - u32 sub_off; - u32 top_idx = 0; - parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); - parse_static_init_at(p, buf, buflen, sub_off, sub_ty); - i = top_idx + 1; - } else { - if (i >= ty->arr.count) { - perr(p, "too many initializers for array"); - } - parse_static_init_at(p, buf, buflen, offset + i * esz, elem); - ++i; - } - if (!accept_punct(p, ',')) break; - if (is_punct(&p->cur, '}')) break; - } - } - expect_punct(p, '}', "'}' after array initializer"); - return; - } - if (ty->kind == TY_STRUCT) { - int had_brace = accept_punct(p, '{'); - const ABIRecordLayout* L = abi_record_layout(p->abi, ty); - u32 i = 0; - if (!had_brace) { - perr(p, "expected '{' for static-storage struct initializer"); - } - while (i < ty->rec.nfields && !is_punct(&p->cur, '}')) { - const Field* f = &ty->rec.fields[i]; - if (is_punct(&p->cur, '.')) { - const Type* sub_ty; - u32 sub_off; - u32 top_idx = 0; - parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx); - parse_static_init_at(p, buf, buflen, sub_off, sub_ty); - i = top_idx + 1; - if (!accept_punct(p, ',')) break; - continue; - } - parse_static_init_at(p, buf, buflen, offset + L->fields[i].offset, - f->type); - ++i; - if (!accept_punct(p, ',')) break; - } - expect_punct(p, '}', "'}' after struct initializer"); - return; - } - if (ty->kind == TY_UNION) { - perr(p, "static-storage union initializer not supported in Phase 4"); - } - /* Scalar / pointer. */ - { - int had_brace = accept_punct(p, '{'); - SrcLoc cloc = tok_loc_init(&p->cur); - u32 sz = abi_sizeof(p->abi, ty); - if (offset + sz > buflen) perr(p, "initializer overflows object"); - if (ty->kind == TY_PTR && try_parse_addr_const(p, ty, buf, offset, sz)) { - /* Address constant recorded as a reloc. */ - } else { - i64 v = eval_const_int(p, cloc); - encode_int_le(buf + offset, sz, v); - } - if (had_brace) { - accept_punct(p, ','); - expect_punct(p, '}', "'}' after scalar initializer"); - } - } -} - -/* Choose the section a defining object decl should land in. */ -static ObjSecId pick_object_section(Parser* p, u16 quals, int has_nonzero) { - if ((quals & Q_CONST) != 0 && has_nonzero) { - Sym secname = pool_intern_cstr(p->pool, ".rodata"); - return obj_section(decl_obj(p->decls), secname, SEC_RODATA, SF_ALLOC, 1u); - } - return OBJ_SEC_NONE; -} - -/* Define a static-storage object. */ -void define_static_object(Parser* p, ObjSymId sym, const Type* var_ty, - u16 quals, int has_init, SrcLoc loc, - u32 align_override) { - ObjBuilder* ob = decl_obj(p->decls); - u32 size = abi_sizeof(p->abi, var_ty); - u32 align = abi_alignof(p->abi, var_ty); - if (align_override > align) align = align_override; - u8* buf = NULL; - int has_nonzero = 0; - ObjSecId override_sec; - const ObjSym* os = obj_symbol_get(ob, sym); - int is_tls = (os && os->kind == SK_TLS); - - if (has_init) { - buf = (u8*)arena_array(p->c->tu, u8, size ? size : 1u); - memset(buf, 0, size); - p->static_relocs_len = 0; - parse_static_init_at(p, buf, size, 0, var_ty); - for (u32 i = 0; i < size; ++i) { - if (buf[i]) { has_nonzero = 1; break; } - } - if (p->static_relocs_len) has_nonzero = 1; - } - - if (is_tls) { - /* Format split (ELF .tdata/.tbss vs Mach-O TLV descriptor) lives in - * obj_define_tls. Translate the parser's static_relocs[] (size=4|8) - * into the obj-layer's RelocKind so the caller stays format-agnostic. */ - ObjTlsReloc* tr = NULL; - u32 nr = p->static_relocs_len; - if (nr) { - tr = arena_array(p->c->tu, ObjTlsReloc, nr); - for (u32 i = 0; i < nr; ++i) { - tr[i].offset = p->static_relocs[i].offset; - tr[i].kind = - (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32; - tr[i].target = p->static_relocs[i].target; - tr[i].addend = p->static_relocs[i].addend; - } - } - obj_define_tls(p->c, ob, sym, buf, size, has_nonzero, - align ? align : 1u, tr, nr); - p->static_relocs_len = 0; - (void)loc; - return; - } - - override_sec = pick_object_section(p, quals, has_nonzero); - if (override_sec != OBJ_SEC_NONE) { - u32 base = obj_align_to(ob, override_sec, align > 1u ? align : 1u); - { - u8* dst = obj_reserve(ob, override_sec, size); - if (dst && buf) memcpy(dst, buf, size); - } - obj_symbol_define(ob, sym, override_sec, base, size); - for (u32 i = 0; i < p->static_relocs_len; ++i) { - RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32; - obj_reloc(ob, override_sec, base + p->static_relocs[i].offset, rk, - p->static_relocs[i].target, p->static_relocs[i].addend); - } - p->static_relocs_len = 0; - (void)loc; - return; - } - - if (!has_init || !has_nonzero) { - Sym sname = pool_intern_cstr(p->pool, ".bss"); - ObjSecId sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS, - SF_ALLOC | SF_WRITE, - align ? align : 1u, 0, OBJ_SEC_NONE, 0); - u32 a = align ? align : 1u; - u32 base = obj_align_to(ob, sec, a); - obj_reserve_bss(ob, sec, base + size, a); - obj_symbol_define(ob, sym, sec, base, size); - return; - } - /* .data path. */ - { - Sym sname = pool_intern_cstr(p->pool, ".data"); - ObjSecId sec = obj_section(ob, sname, SEC_DATA, SF_ALLOC | SF_WRITE, - align ? align : 1u); - u32 base = obj_align_to(ob, sec, align ? align : 1u); - u8* dst = obj_reserve(ob, sec, size); - if (dst) memcpy(dst, buf, size); - obj_symbol_define(ob, sym, sec, base, size); - for (u32 i = 0; i < p->static_relocs_len; ++i) { - RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32; - obj_reloc(ob, sec, base + p->static_relocs[i].offset, rk, - p->static_relocs[i].target, p->static_relocs[i].addend); - } - p->static_relocs_len = 0; - } -} diff --git a/src/parse/parse_priv.h b/src/parse/parse_priv.h @@ -1,449 +0,0 @@ -/* parse_priv.h — private header shared across parse_*.c modules. - * - * Declares: Parser struct, shared types (Scope, SymEntry, TagEntry, - * DeclSpecs, TypeSpecAccum, CKw, TagDeclKind), forward decls of - * cross-module functions, and inline/shared helpers. */ - -#pragma once - -#include "parse/parse.h" - -#include <stdarg.h> -#include <string.h> - -#include "abi/abi.h" -#include "arch/arch.h" -#include "cg/cg.h" -#include "core/arena.h" -#include "core/core.h" -#include "core/heap.h" -#include "core/pool.h" -#include "debug/debug.h" -#include "decl/decl.h" -#include "decl/decl_attrs.h" -#include "lex/lex.h" -#include "obj/obj.h" -#include "parse/attr.h" -#include "pp/pp.h" -#include "type/type.h" - -/* Type-aware push for locals — exposed by cg.c, not in cg.h. */ -extern void cg_push_local_typed(CG*, FrameSlot, const Type*); -/* Pop pointer rvalue, push INDIRECT lvalue of given pointee. */ -extern void cg_deref(CG*, const Type* pointee); -/* Read SValue.type at top of stack without popping. */ -extern const Type* cg_top_type(CG*); -/* Read SValue.type at second-from-top. */ -extern const Type* cg_top2_type(CG*); -/* Replace the type tag on the top SValue without emitting code. */ -extern void cg_retag_top(CG*, const Type*); - -/* ============================================================ - * Keywords - * ============================================================ */ -typedef enum CKw { - KW_NONE = 0, - KW_AUTO, - KW_BREAK, - KW_CASE, - KW_CHAR, - KW_CONST, - KW_CONTINUE, - KW_DEFAULT, - KW_DO, - KW_DOUBLE, - KW_ELSE, - KW_ENUM, - KW_EXTERN, - KW_FLOAT, - KW_FOR, - KW_GOTO, - KW_IF, - KW_INLINE, - KW_INT, - KW_LONG, - KW_REGISTER, - KW_RESTRICT, - KW_RETURN, - KW_SHORT, - KW_SIGNED, - KW_SIZEOF, - KW_STATIC, - KW_STRUCT, - KW_SWITCH, - KW_TYPEDEF, - KW_UNION, - KW_UNSIGNED, - KW_VOID, - KW_VOLATILE, - KW_WHILE, - KW_BOOL, /* _Bool */ - KW_COMPLEX, /* _Complex */ - KW_IMAGINARY, /* _Imaginary */ - KW_ALIGNAS, /* _Alignas */ - KW_ALIGNOF, /* _Alignof */ - KW_ATOMIC, /* _Atomic */ - KW_GENERIC, /* _Generic */ - KW_NORETURN, /* _Noreturn */ - KW_STATIC_ASSERT, /* _Static_assert */ - KW_THREAD_LOCAL, /* _Thread_local */ - KW_ASM, /* GNU `asm` */ - KW_BUILTIN_ASM, /* GNU `__asm__` */ - KW_COUNT -} CKw; - -/* ============================================================ - * Scope stack types - * ============================================================ */ - -typedef enum SymEntryKind { - SEK_LOCAL, /* local variable, OPK_LOCAL via FrameSlot */ - SEK_GLOBAL, /* global var, OPK_GLOBAL via ObjSymId */ - SEK_FUNC, /* function decl, OPK_GLOBAL via ObjSymId */ - SEK_TYPEDEF, /* typedef name */ - SEK_ENUM_CST, /* enumeration constant */ -} SymEntryKind; - -typedef struct SymEntry SymEntry; -struct SymEntry { - Sym name; - u8 kind; /* SymEntryKind */ - u8 pad[3]; - const Type* type; - union { - FrameSlot slot; - ObjSymId sym; - i64 enum_value; - } v; - FrameSlot vla_byte_slot; - struct Attr* attrs; - SymEntry* next; -}; - -typedef struct TagEntry TagEntry; -struct TagEntry { - Sym name; - u8 kind; /* TagDeclKind */ - u8 complete; - u16 pad; - Type* type; - struct Attr* attrs; - TagEntry* next; -}; - -typedef struct Scope Scope; -struct Scope { - SymEntry* entries; /* LIFO */ - TagEntry* tags; /* LIFO */ - Scope* parent; -}; - -/* ============================================================ - * Switch/goto control-flow types - * ============================================================ */ - -typedef struct CaseEntry CaseEntry; -struct CaseEntry { - i64 value; - CGLabel label; - CaseEntry* next; -}; - -typedef struct SwitchCtx SwitchCtx; -struct SwitchCtx { - CaseEntry* cases; - CGLabel default_label; - FrameSlot value_slot; - const Type* value_type; - SwitchCtx* parent; -}; - -typedef struct GotoLabel GotoLabel; -struct GotoLabel { - Sym name; - CGLabel label; - u8 placed; - u8 pad[3]; - SrcLoc first_use; - GotoLabel* next; -}; - -/* ============================================================ - * Parser context - * ============================================================ */ - -typedef struct Parser { - Compiler* c; - Pp* pp; - DeclTable* decls; - CG* cg; - Debug* debug; - TargetABI* abi; - Pool* pool; - - Tok cur; - Tok next; - int has_next; - - Tok pending; - int has_pending; - - Sym kw_sym[KW_COUNT]; - - Sym sym_b_alloca; - Sym sym_b_ctz; - Sym sym_b_clz; - Sym sym_b_clzl; - Sym sym_b_clzll; - Sym sym_b_trap; - Sym sym_b_unreachable; - Sym sym_b_memcpy; - Sym sym_b_memmove; - Sym sym_b_memcmp; - Sym sym_b_memset; - Sym sym_func; /* __func__ */ - Sym sym_func_gcc; /* __FUNCTION__ */ - Sym sym_pretty_func_gcc; /* __PRETTY_FUNCTION__ */ - Sym cur_func_name; /* name of the function whose body we're in, - * 0 at file scope */ - Sym sym_b_expect; - Sym sym_b_offsetof; - Sym sym_b_va_list; - Sym sym_b_va_start; - Sym sym_b_va_arg; - Sym sym_b_va_end; - Sym sym_b_va_copy; - Sym sym_attribute; - Sym sym_volatile_alias; - Sym sym_alignof_alias; - Sym sym_int128; /* __int128 */ - Sym sym_int128_t; /* __int128_t */ - Sym sym_uint128_t; /* __uint128_t */ - Sym sym_a_load_n; - Sym sym_a_store_n; - Sym sym_a_exchange_n; - Sym sym_a_fetch_add; - Sym sym_a_fetch_sub; - Sym sym_a_fetch_and; - Sym sym_a_fetch_or; - Sym sym_a_fetch_xor; - Sym sym_a_cas_n; - Sym sym_a_thread_fence; - Sym sym_a_signal_fence; - - Scope* scope; - - ObjSecId text_sec; - - CGLabel cur_break; - CGLabel cur_continue; - - SwitchCtx* cur_switch; - - GotoLabel* goto_labels; - - u8 vla_pending; - FrameSlot vla_pending_count_slot; - - FrameSlot last_pushed_vla_slot; - - u8 in_param_decl; - - u32 static_local_counter; - - u32 compound_literal_counter; - - Tok* replay; - u32 replay_cap; - u32 replay_len; - u32 replay_pos; - u8 replay_active; - - struct { - u32 offset; - u32 size; - ObjSymId target; - i64 addend; - } *static_relocs; - u32 static_relocs_len; - u32 static_relocs_cap; -} Parser; - -/* ============================================================ - * DeclSpecs and TypeSpecAccum - * ============================================================ */ - -typedef struct DeclSpecs { - const Type* type; - DeclStorage storage; - u32 flags; /* DeclFlag */ - u16 quals; - u32 align; - FrameSlot vla_byte_slot; - Attr* attrs; -} DeclSpecs; - -typedef struct TypeSpecAccum { - u8 saw_void; - u8 saw_char; - u8 saw_int; - u8 saw_short; - u8 long_count; - u8 saw_signed; - u8 saw_unsigned; - u8 saw_bool; - u8 saw_float; - u8 saw_double; - u8 saw_int128; /* __int128 / __int128_t / __uint128_t */ - u8 saw_explicit_type; -} TypeSpecAccum; - -/* ============================================================ - * Shared token/diagnostic helpers (defined in parse.c) - * ============================================================ */ - -_Noreturn void perr(Parser* p, const char* fmt, ...); -void advance(Parser* p); -Tok peek1(Parser* p); -void expect_punct(Parser* p, u32 punct, const char* what); -int accept_punct(Parser* p, u32 punct); - -/* ============================================================ - * Scope/tag ops (defined in parse.c) - * ============================================================ */ - -Scope* scope_new(Parser* p, Scope* parent); -void scope_push(Parser* p); -void scope_pop(Parser* p); -SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind, const Type* type); -SymEntry* scope_lookup(Parser* p, Sym name); -TagEntry* tag_define(Parser* p, Sym name, TagDeclKind kind, Type* type, int complete); -TagEntry* tag_lookup(Parser* p, Sym name); -TagEntry* tag_lookup_local(Parser* p, Sym name); - -/* ============================================================ - * Token predicate helpers (defined in parse.c — file-scope static, - * exposed here as inline equivalents; each .c file sees its own copy) - * ============================================================ */ - -static inline int is_punct(const Tok* t, u32 punct) { - return t->kind == TOK_PUNCT && t->v.punct == punct; -} - -static inline int is_pp_hash(const Tok* t) { return t->kind == TOK_PP_HASH; } - -static inline CKw ident_kw_inline(const Parser* p, Sym name) { - CKw i; - for (i = (CKw)1; i < KW_COUNT; ++i) { - if (p->kw_sym[i] == name) return i; - } - if (name == p->sym_alignof_alias) return KW_ALIGNOF; - return KW_NONE; -} - -static inline int is_kw(const Parser* p, const Tok* t, CKw k) { - if (t->kind != TOK_IDENT) return 0; - if (t->v.ident == p->kw_sym[k]) return 1; - if (k == KW_ALIGNOF && t->v.ident == p->sym_alignof_alias) return 1; - return 0; -} - -/* ============================================================ - * Shared types (needed across multiple modules) - * ============================================================ */ - -typedef struct ParamInfo { - Sym name; - const Type* type; - SrcLoc loc; -} ParamInfo; - -/* ============================================================ - * Declarator suffix types (defined in parse_type.c, shared here) - * ============================================================ */ - -typedef enum DSuffKind { DS_ARRAY, DS_FUNC } DSuffKind; -typedef struct DeclSuffix { - u8 kind; /* DSuffKind */ - u32 count; /* element count; meaningful when !vla and !incomplete */ - u8 incomplete; /* true for `[]` (no size given) */ - u8 vla; /* true for `[expr]` with a non-constant size */ - FrameSlot vla_count_slot; - ParamInfo* params; - u16 nparams; - u8 variadic; -} DeclSuffix; - -/* ============================================================ - * Cross-module forward declarations - * ============================================================ */ - -/* parse_type.c */ - -int parse_decl_specs(Parser* p, DeclSpecs* out); -const Type* parse_struct_or_union(Parser* p, TypeKind kind, Attr** anon_attrs_out); -const Type* parse_enum(Parser* p, Attr** anon_attrs_out); -const Type* resolve_type_specs(Parser* p, const TypeSpecAccum* a, SrcLoc loc); -const Type* parse_type_name(Parser* p); -const Type* parse_pointer_layer(Parser* p, const Type* base); -const Type* parse_declarator_full(Parser* p, const Type* base, int allow_abstract, - Sym* name_out, SrcLoc* loc_out); -const Type* parse_declarator_full_ex(Parser* p, const Type* base, int allow_abstract, - Sym* name_out, SrcLoc* loc_out, Attr** attrs_out); -const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out, SrcLoc* loc_out); -const Type* complete_incomplete_array(Parser* p, const Type* ty); -int starts_type_name(const Parser* p, const Tok* t); -int starts_attr(const Parser* p); -Attr* parse_attribute_spec_list(Parser* p); -void parse_and_discard_attributes(Parser* p); -int find_field(TargetABI* abi, const Type* rec, Sym name, - const Type** out_type, u32* out_offset, const Field** out_field); -u32 attrs_pick_aligned(const Attr* a); -void attr_list_append(Attr** head, Attr* add); -void parse_attrs_into(Parser* p, Attr** sink); -int parse_decl_suffix(Parser* p, DeclSuffix* out); -const Type* apply_decl_suffix(Parser* p, const Type* base, const DeclSuffix* s); - -/* parse_expr.c */ -void parse_expr(Parser* p); -void parse_assign_expr(Parser* p); -void parse_cond_expr(Parser* p); -void parse_unary(Parser* p); -i64 eval_const_int(Parser* p, SrcLoc loc); -i64 parse_int_literal(Parser* p, const Tok* t); -i64 decode_char_literal(Parser* p, const Tok* t); -u8* decode_string_literal(Parser* p, const Tok* t, size_t* nlen_out); -void to_rvalue(Parser* p); -void coerce_top_to_lvalue(Parser* p); -ObjSymId emit_string_to_rodata(Parser* p, const u8* bytes, size_t n); - -/* parse_init.c */ -void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, const Type* ty); -void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset, const Type* ty); -void define_static_object(Parser* p, ObjSymId sym, const Type* var_ty, u16 quals, - int has_init, SrcLoc loc, u32 align_override); -void srl_push(Parser* p, u32 offset, u32 size, ObjSymId target, i64 addend); -void encode_int_le(u8* dst, u32 size, i64 v); -void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty, - u32 offset, const Type* elem_ty); -void emit_struct_copy_into_slot(Parser* p, FrameSlot dst_slot, - const Type* dst_arr_ty, u32 dst_off, - const Type* ty); -int is_char_kind(const Type* ty); - -/* parse_stmt.c */ -void parse_stmt(Parser* p); -void parse_compound_stmt(Parser* p); -void parse_static_assert(Parser* p); - -/* parse.c (residual — TU driver) */ -void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out, - u8* variadic_out); -void parse_local_decl(Parser* p, const DeclSpecs* specs); -FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc); -FrameSlot make_local_aligned(Parser* p, Sym name, const Type* type, SrcLoc loc, - u32 align_override); -Sym mint_static_local_sym(Parser* p, Sym orig); -void record_braced_block(Parser* p); -void replay_rewind(Parser* p); -u32 count_recorded_top_level_items(const Tok* vec, u32 len); diff --git a/src/pp/pp.h b/src/pp/pp.h @@ -1,36 +1,6 @@ -#ifndef CFREE_PP_H -#define CFREE_PP_H +#ifndef CFREE_SRC_PP_COMPAT_H +#define CFREE_SRC_PP_COMPAT_H -#include "lex/lex.h" - -typedef struct Pp Pp; - -/* PP reads file_io from c->env for include search. If include search is - * configured but c->env->file_io is missing, include resolution panics. */ -Pp* pp_new(Compiler*); -void pp_free(Pp*); - -void pp_add_include_dir(Pp*, const char* dir, int system); -void pp_define(Pp*, const char* name, const char* body); /* -D */ -void pp_undef(Pp*, const char* name); /* -U */ - -/* Pushes a Lexer onto the include stack. PP takes ownership of the Lexer: - * it is closed when the input hits EOF and is popped, or in pp_free if it - * is still on the stack. Callers must not call lex_close on a pushed - * Lexer. The borrowed source buffer (see lex_open_mem) must outlive - * pp_free. */ -void pp_push_input(Pp*, Lexer*); -void pp_add_include_edge(Pp*, u32 includer_file_id, u32 included_file_id, - SrcLoc include_loc, int system); - -/* Streaming. Yields preprocessed tokens (macro-expanded, directives consumed). - */ -Tok pp_next(Pp*); -const LitInfo* pp_lit(const Pp*, LitId); - -/* Drains pp_next into `out` as preprocessed C source text: token spellings - * separated by single spaces where TF_HAS_SPACE is set, with newlines for - * TF_AT_BOL transitions. Stops on TOK_EOF. Used by cfree_preprocess. */ -void pp_emit_text(Pp*, Writer* out); +#include "../../lang/c/pp/pp.h" #endif diff --git a/src/type/type.c b/src/type/type.c @@ -1,348 +0,0 @@ -/* C type construction. - * - * Types are interned per-Pool: a single `type_void(pool)` returns the same - * Type* on every call against the same pool, and structurally-equal calls - * to type_prim/type_ptr/type_func collapse to the same Type*. The cache is - * a small open structure stored through Pool.type_cache (opaque to other - * consumers). - * - * Storage: every Type and every supporting array (TY_FUNC param vectors, - * TY_STRUCT field arrays) is allocated from the Pool's arena, so pointers - * are stable for the Pool's lifetime. - * - * v1 covers what the cg test harness drives: - * void / scalars / pointer / function / struct / union - * Other constructors (array, qualified, enum) and predicates have minimal - * implementations sufficient for the cg test surface; they will grow with - * the parser. */ - -#include "type/type.h" - -#include <stdint.h> -#include <string.h> - -#include "core/arena.h" -#include "core/pool.h" - -#define NUM_PRIM_KINDS ((unsigned)TY_LDOUBLE + 1u) - -typedef struct TypeListNode TypeListNode; -struct TypeListNode { - TypeListNode* next; - Type ty; -}; - -typedef struct PoolTypeCache { - /* Direct slots for void + primitive kinds (TY_VOID..TY_LDOUBLE). */ - const Type* prim[NUM_PRIM_KINDS]; - /* Linked list of every other type allocated through this pool. */ - TypeListNode* derived; - /* Tag id allocator (1-based; TAG_NONE = 0). */ - u32 next_tag; -} PoolTypeCache; - -static PoolTypeCache* cache_get(Pool* p) { - PoolTypeCache* c = (PoolTypeCache*)p->type_cache; - if (c) return c; - c = arena_new(&p->arena, PoolTypeCache); - if (!c) return NULL; - memset(c, 0, sizeof *c); - c->next_tag = 1; - p->type_cache = c; - return c; -} - -static Type* alloc_type_node(Pool* p, PoolTypeCache* c) { - TypeListNode* n = arena_new(&p->arena, TypeListNode); - if (!n) return NULL; - memset(n, 0, sizeof *n); - n->next = c->derived; - c->derived = n; - return &n->ty; -} - -const Type* type_void(Pool* p) { return type_prim(p, TY_VOID); } - -const Type* type_prim(Pool* p, TypeKind kind) { - PoolTypeCache* c = cache_get(p); - if (!c) return NULL; - if ((unsigned)kind >= NUM_PRIM_KINDS) return NULL; - if (c->prim[kind]) return c->prim[kind]; - Type* t = alloc_type_node(p, c); - if (!t) return NULL; - t->kind = (u16)kind; - t->qual = 0; - c->prim[kind] = t; - return t; -} - -const Type* type_ptr(Pool* p, const Type* pointee) { - PoolTypeCache* c = cache_get(p); - if (!c) return NULL; - /* Linear search; small N in practice. */ - for (TypeListNode* n = c->derived; n; n = n->next) { - if (n->ty.kind == TY_PTR && n->ty.qual == 0 && - n->ty.ptr.pointee == pointee) { - return &n->ty; - } - } - Type* t = alloc_type_node(p, c); - if (!t) return NULL; - t->kind = TY_PTR; - t->qual = 0; - t->ptr.pointee = pointee; - return t; -} - -const Type* type_array(Pool* p, const Type* elem, u32 count, int incomplete) { - PoolTypeCache* c = cache_get(p); - if (!c) return NULL; - for (TypeListNode* n = c->derived; n; n = n->next) { - if (n->ty.kind == TY_ARRAY && n->ty.qual == 0 && n->ty.arr.elem == elem && - n->ty.arr.count == count && - n->ty.arr.incomplete == (u8)(incomplete ? 1 : 0)) { - return &n->ty; - } - } - Type* t = alloc_type_node(p, c); - if (!t) return NULL; - t->kind = TY_ARRAY; - t->qual = 0; - t->arr.elem = elem; - t->arr.count = count; - t->arr.incomplete = (u8)(incomplete ? 1 : 0); - return t; -} - -static int param_arrays_eq(const Type* const* a, const Type* const* b, u16 n) { - for (u16 i = 0; i < n; ++i) - if (a[i] != b[i]) return 0; - return 1; -} - -const Type* type_func(Pool* p, const Type* ret, const Type** params, u16 n, - int variadic) { - PoolTypeCache* c = cache_get(p); - if (!c) return NULL; - for (TypeListNode* nd = c->derived; nd; nd = nd->next) { - if (nd->ty.kind == TY_FUNC && nd->ty.qual == 0 && nd->ty.fn.ret == ret && - nd->ty.fn.nparams == n && - nd->ty.fn.variadic == (u8)(variadic ? 1 : 0) && - param_arrays_eq(nd->ty.fn.params, params, n)) { - return &nd->ty; - } - } - Type* t = alloc_type_node(p, c); - if (!t) return NULL; - t->kind = TY_FUNC; - t->qual = 0; - t->fn.ret = ret; - t->fn.nparams = n; - t->fn.variadic = (u8)(variadic ? 1 : 0); - if (n) { - const Type** dst = arena_array(&p->arena, const Type*, n); - if (!dst) return NULL; - for (u16 i = 0; i < n; ++i) dst[i] = params[i]; - t->fn.params = dst; - } else { - t->fn.params = NULL; - } - return t; -} - -const Type* type_qualified(Pool* p, const Type* base, u16 qual) { - if (!base || qual == 0) return base; - PoolTypeCache* c = cache_get(p); - if (!c) return NULL; - for (TypeListNode* n = c->derived; n; n = n->next) { - if (n->ty.kind == base->kind && n->ty.qual == qual) { - /* Compare body bytes other than qual. Cheap: types are POD. */ - Type tmpl = *base; - tmpl.qual = qual; - if (memcmp(&n->ty, &tmpl, sizeof(Type)) == 0) return &n->ty; - } - } - Type* t = alloc_type_node(p, c); - if (!t) return NULL; - *t = *base; - t->qual = qual; - return t; -} - -/* ---- aggregates ---- */ - -struct TypeRecordBuilder { - Pool* pool; - TypeKind kind; /* TY_STRUCT or TY_UNION */ - TagId tag_id; - Sym tag; - Field* fields; - u32 nfields; - u32 cap; - TypeRecordOpts opts; -}; - -TagId type_tag_new(Pool* p, TagDeclKind kind, Sym spelling, SrcLoc loc) { - PoolTypeCache* c = cache_get(p); - if (!c) return TAG_NONE; - (void)kind; - (void)spelling; - (void)loc; - return (TagId)(c->next_tag++); -} - -const TagDecl* type_tag_get(Pool* p, TagId id) { - (void)p; - (void)id; - /* TagDecl table is parser-territory; not modeled in v1. */ - return NULL; -} - -TypeRecordBuilder* type_record_begin(Pool* p, TypeKind kind, TagId tag_id, - Sym tag) { - TypeRecordOpts opts; - memset(&opts, 0, sizeof opts); - return type_record_begin_ex(p, kind, tag_id, tag, opts); -} - -TypeRecordBuilder* type_record_begin_ex(Pool* p, TypeKind kind, TagId tag_id, - Sym tag, TypeRecordOpts opts) { - TypeRecordBuilder* b = arena_new(&p->arena, TypeRecordBuilder); - if (!b) return NULL; - memset(b, 0, sizeof *b); - b->pool = p; - b->kind = kind; - b->tag_id = tag_id; - b->tag = tag; - b->opts = opts; - return b; -} - -void type_record_field(TypeRecordBuilder* b, Field f) { - if (b->nfields == b->cap) { - u32 nc = b->cap ? b->cap * 2 : 4; - Field* nf = arena_array(&b->pool->arena, Field, nc); - if (!nf) return; - if (b->fields) memcpy(nf, b->fields, sizeof(Field) * b->nfields); - b->fields = nf; - b->cap = nc; - } - b->fields[b->nfields++] = f; -} - -const Type* type_record_end(Pool* p, TypeRecordBuilder* b) { - PoolTypeCache* c = cache_get(p); - if (!c) return NULL; - Type* t = alloc_type_node(p, c); - if (!t) return NULL; - t->kind = (u16)b->kind; - t->qual = 0; - t->rec.tag_id = b->tag_id; - t->rec.tag = b->tag; - t->rec.fields = b->fields; - t->rec.nfields = (u16)b->nfields; - t->rec.incomplete = 0; - t->rec.packed = b->opts.packed; - t->rec.align_override = b->opts.align_override; - return t; -} - -Type* type_record_forward(Pool* p, TypeKind kind, TagId tag_id, Sym tag) { - PoolTypeCache* c = cache_get(p); - if (!c) return NULL; - Type* t = alloc_type_node(p, c); - if (!t) return NULL; - t->kind = (u16)kind; - t->qual = 0; - t->rec.tag_id = tag_id; - t->rec.tag = tag; - t->rec.fields = NULL; - t->rec.nfields = 0; - t->rec.incomplete = 1; - t->rec.packed = 0; - t->rec.align_override = 0; - return t; -} - -void type_record_install(Type* forward, const Field* fields, u16 nfields) { - if (!forward) return; - forward->rec.fields = fields; - forward->rec.nfields = nfields; - forward->rec.incomplete = 0; -} - -const Type* type_enum(Pool* p, TagId tag_id, Sym tag, const Type* base) { - PoolTypeCache* c = cache_get(p); - if (!c) return NULL; - Type* t = alloc_type_node(p, c); - if (!t) return NULL; - t->kind = TY_ENUM; - t->qual = 0; - t->enm.tag_id = tag_id; - t->enm.tag = tag; - t->enm.base = base; - return t; -} - -/* ---- predicates / utilities ---- */ - -const Type* type_unqual(Pool* p, const Type* t) { - if (!t || t->qual == 0) return t; - return type_qualified(p, t, 0); -} - -const Type* type_promoted(Pool* p, const Type* t) { - if (!t) return t; - switch (t->kind) { - case TY_BOOL: - case TY_CHAR: - case TY_SCHAR: - case TY_UCHAR: - case TY_SHORT: - case TY_USHORT: - return type_prim(p, TY_INT); - default: - return t; - } -} - -int type_compatible(const Type* a, const Type* b) { - if (a == b) return 1; - if (!a || !b) return 0; - if (a->kind != b->kind) return 0; - /* Strict structural compatibility past identity is parser territory; v1 - * relies on interning for the common cases. */ - return 0; -} - -int type_is_int(const Type* t) { - if (!t) return 0; - switch (t->kind) { - case TY_BOOL: - case TY_CHAR: - case TY_SCHAR: - case TY_UCHAR: - case TY_SHORT: - case TY_USHORT: - case TY_INT: - case TY_UINT: - case TY_LONG: - case TY_ULONG: - case TY_LLONG: - case TY_ULLONG: - case TY_INT128: - case TY_UINT128: - case TY_ENUM: - return 1; - default: - return 0; - } -} - -int type_is_arith(const Type* t) { - if (!t) return 0; - if (type_is_int(t)) return 1; - return t->kind == TY_FLOAT || t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE; -} - -int type_is_ptr(const Type* t) { return t && t->kind == TY_PTR; } diff --git a/src/type/type.h b/src/type/type.h @@ -1,169 +1,6 @@ -#ifndef CFREE_TYPE_H -#define CFREE_TYPE_H +#ifndef CFREE_SRC_TYPE_COMPAT_H +#define CFREE_SRC_TYPE_COMPAT_H -#include "core/core.h" -#include "core/pool.h" - -typedef enum TypeKind { - TY_VOID, - TY_BOOL, - TY_CHAR, - TY_SCHAR, - TY_UCHAR, - TY_SHORT, - TY_USHORT, - TY_INT, - TY_UINT, - TY_LONG, - TY_ULONG, - TY_LLONG, - TY_ULLONG, - TY_INT128, - TY_UINT128, - TY_FLOAT, - TY_DOUBLE, - TY_LDOUBLE, - TY_PTR, - TY_ARRAY, - TY_FUNC, - TY_STRUCT, - TY_UNION, - TY_ENUM, -} TypeKind; - -/* C tag identity is scoped declaration identity, not the spelling. `Sym tag` - * remains the diagnostic/debug name; TagId prevents two scoped `struct S` - * declarations from collapsing under global Type interning. */ -typedef u32 TagId; -#define TAG_NONE 0u - -typedef enum TagDeclKind { - TAG_STRUCT, - TAG_UNION, - TAG_ENUM, -} TagDeclKind; - -typedef struct TagDecl { - TagId id; - Sym spelling; - SrcLoc loc; - u8 kind; /* TagDeclKind */ - u8 complete; - u16 pad; -} TagDecl; - -typedef enum TypeQual { - Q_CONST = 1u << 0, - Q_VOLATILE = 1u << 1, - Q_RESTRICT = 1u << 2, - Q_ATOMIC = 1u << 3, -} TypeQual; - -typedef enum FieldFlag { - FIELD_NONE = 0, - FIELD_BITFIELD = 1u << 0, - FIELD_ZERO_WIDTH = 1u << 1, - FIELD_ANON = 1u << 2, - FIELD_FLEXIBLE_ARRAY = 1u << 3, -} FieldFlag; - -typedef struct Field { - Sym name; - const Type* type; - u16 bitfield_width; /* valid when FIELD_BITFIELD is set; may be 0 */ - u16 flags; /* FieldFlag */ - /* Phase 2 attribute carriers — populated by the parser when the member - * carries __attribute__((aligned(N))) / ((packed)). Zero means "no - * override"; abi_record_layout interprets them. */ - u16 align_override; - u8 packed; - u8 pad; -} Field; - -struct Type { - u16 kind; - u16 qual; - union { - struct { - const Type* pointee; - } ptr; - struct { - const Type* elem; - u32 count; - u8 incomplete; - } arr; - struct { - const Type* ret; - const Type** params; - u16 nparams; - u8 variadic; - } fn; - struct { - TagId tag_id; - Sym tag; - const Field* fields; - u16 nfields; - u8 incomplete; - /* Phase 2 attribute carriers — record-level - * __attribute__((packed)) / ((aligned(N))). Both zero means - * "natural layout". abi_record_layout honors them. */ - u8 packed; - u16 align_override; - } rec; /* struct / union */ - struct { - TagId tag_id; - Sym tag; - const Type* base; - } enm; - }; -}; - -const Type* type_void(Pool*); -const Type* type_prim(Pool*, TypeKind); -const Type* type_ptr(Pool*, const Type*); -const Type* type_array(Pool*, const Type* elem, u32 count, int incomplete); -const Type* type_func(Pool*, const Type* ret, const Type** params, u16 n, - int variadic); -const Type* type_qualified(Pool*, const Type*, u16 qual); - -/* Aggregate construction is mutable only through TypeRecordBuilder. The - * committed Type is immutable and interned; field offsets, record - * size/alignment, and bitfield storage are target ABI facts. */ -typedef struct TypeRecordBuilder TypeRecordBuilder; -TagId type_tag_new(Pool*, TagDeclKind, Sym spelling, SrcLoc); -const TagDecl* type_tag_get(Pool*, TagId); -TypeRecordBuilder* type_record_begin(Pool*, TypeKind kind, TagId, - Sym tag); /* TY_STRUCT or TY_UNION */ - -/* Phase 2 record options carried from __attribute__((packed))/aligned(N)). - * Zero-initialized = natural layout. Fields kept as a struct so future - * options (e.g. transparent_union) don't churn the call sites. */ -typedef struct TypeRecordOpts { - u8 packed; - u16 align_override; -} TypeRecordOpts; - -/* Variant of type_record_begin that records record-level attribute - * options on the builder; type_record_end copies them to Type.rec. The - * plain type_record_begin is equivalent to passing a zeroed - * TypeRecordOpts. */ -TypeRecordBuilder* type_record_begin_ex(Pool*, TypeKind kind, TagId, - Sym tag, TypeRecordOpts); -void type_record_field(TypeRecordBuilder*, Field); -const Type* type_record_end(Pool*, TypeRecordBuilder*); -/* Forward-declared struct/union: returns a mutable, incomplete Type with the - * given tag identity but no fields. Pointers to it are valid; sizeof/member - * access are not until type_record_install is called. The same Type* identity - * survives completion, so any TY_PTR(forward) pointer types remain valid. */ -Type* type_record_forward(Pool*, TypeKind kind, TagId, Sym tag); -void type_record_install(Type* forward, const Field* fields, u16 nfields); -const Type* type_enum(Pool*, TagId, Sym tag, const Type* base); - -const Type* type_unqual(Pool*, const Type*); -const Type* type_promoted(Pool*, const Type*); -int type_compatible(const Type*, const Type*); -int type_is_arith(const Type*); -int type_is_int(const Type*); -int type_is_ptr(const Type*); +#include "../../lang/c/type/type.h" #endif diff --git a/test/parse/cases/asm_02_file_scope.skip b/test/parse/cases/asm_02_file_scope.skip @@ -0,0 +1 @@ +file-scope asm is disabled while the C frontend is isolated from assembler internals