kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 36197435a80ec4f7368e486af514a63be5ee03ad
parent 835525a36cfb236d5f88ebea885ba4eb9c6aa062
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 04:15:43 -0700

core: add arena, buf, diag, pool, source implementations

Diffstat:
Minclude/cfree.h | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Asrc/core/arena.c | 111+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/core/buf.c | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/core/core.c | 142+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/core/diag.c | 23+++++++++++++++++++++++
Asrc/core/pool.c | 170+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/core/pool.h | 22++++++++++++++++++++++
Asrc/core/source.c | 192+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
8 files changed, 827 insertions(+), 8 deletions(-)

diff --git a/include/cfree.h b/include/cfree.h @@ -170,6 +170,10 @@ typedef enum CfreeSymKind { CFREE_SK_COMMON, CFREE_SK_TLS, CFREE_SK_ABS, + /* Defined symbol with no specific type (e.g., assembly label or + * AArch64 mapping symbol). Distinct from CFREE_SK_UNDEF, which is + * the "undefined external" sentinel. */ + CFREE_SK_NOTYPE, } CfreeSymKind; /* ============================================================ @@ -549,6 +553,29 @@ typedef struct CfreeCompileOptions { int cfree_preprocess (CfreeCompiler*, const CfreePpOptions* pp, const CfreeBytesInput* input, CfreeWriter* out); +/* Lex one C input and write a stable, diff-friendly token dump to `out`. + * + * The format is one S-expression per line, terminated by `(eof)`. Examples: + * + * (ident foo) + * (num 42) + * (flt 3.14f) + * (str "hello") ; spelling for TOK_STR includes its surrounding quotes + * (chr 'a') + * (punct +) + * (punct ->) + * (pp-hash) + * (pp-paste) + * (newline) + * (eof) + * + * Source locations and token flags are intentionally omitted so whitespace + * edits don't churn diffs; the preprocessed-text path (cfree_preprocess) + * is the right place to validate spacing and BOL behavior. The Writer is + * not closed. The input bytes must outlive this call. */ +int cfree_dump_tokens (CfreeCompiler*, + const CfreeBytesInput* input, CfreeWriter* out); + /* Compile one source TU (C or GAS-subset asm; selected by input->lang). * * cfree_compile_obj returns a CfreeObjBuilder owned by the CfreeCompiler. The @@ -1283,13 +1310,24 @@ void cfree_disasm_iter_free(CfreeDisasmIter*); * * cfree_ar_write packs member byte payloads into a POSIX ar archive written * to `out`. Options control reproducibility and format extensions: - * - `epoch` Unix seconds written to ar_date for every member; 0 - * leaves the field as the literal "0" (the default). - * - `symbol_index` if nonzero, emit a System V `/` symbol-index member. - * Not yet implemented; currently ignored. - * - `long_names` if nonzero, emit a `//` long-name table when any - * member name exceeds 15 characters or contains '/'. - * With long_names == 0, over-long names are truncated. + * - `epoch` Unix seconds written to ar_date for every member; 0 + * leaves the field as the literal "0" (the default). + * - `symbol_index` if nonzero, emit a System V `/` symbol-index member + * as the first member. The index payload is a 4-byte + * big-endian symbol count, then count 4-byte big-endian + * offsets pointing at member headers (relative to start + * of archive), then NUL-terminated symbol names. Symbol + * names are taken from `member_symbols`; an empty index + * (count==0) is emitted when no symbols are supplied. + * - `long_names` if nonzero, emit a `//` long-name table when any + * member name exceeds 15 characters or contains '/'. + * With long_names == 0, over-long names are truncated. + * - `member_symbols` optional; parallel to the `members` array. Entry i + * lists the global symbols defined by member i. NULL + * (or per-entry count==0) means that member contributes + * no symbols. Names point into caller-owned storage and + * need only outlive the cfree_ar_write call. Ignored + * when symbol_index == 0. * `opts` may be NULL to accept all defaults. * The Writer is not closed; I/O errors are detectable via out->error(). * Returns 0 on success, 1 on bad arguments. @@ -1307,10 +1345,18 @@ void cfree_disasm_iter_free(CfreeDisasmIter*); * long as the archive bytes remain alive. CfreeArMember.name is interned * in iterator-owned storage and is valid only until the next iter_next * call on the same iterator. */ +typedef struct CfreeArMemberSymbols { + const char* const* names; /* count entries; each NUL-terminated */ + uint32_t count; +} CfreeArMemberSymbols; + typedef struct CfreeArWriteOptions { uint64_t epoch; /* ar_date for every member; 0 = none */ - int symbol_index; /* emit System V '/' index entry (TODO) */ + int symbol_index; /* emit System V '/' symbol-index member */ int long_names; /* emit '//' long-name table when needed */ + /* Parallel to the `members` array; NULL means "no symbols anywhere". + * Only consulted when symbol_index is nonzero. */ + const CfreeArMemberSymbols* member_symbols; } CfreeArWriteOptions; int cfree_ar_write(CfreeWriter* out, diff --git a/src/core/arena.c b/src/core/arena.c @@ -0,0 +1,111 @@ +/* Bump-pointer arena. One linked list of fixed-size blocks; new blocks + * are allocated when a request doesn't fit. arena_reset releases all + * but the head block (so the freed-and-reallocated common case stays + * O(1)). Oversize allocations get their own dedicated block. */ + +#include "core/arena.h" + +#include <string.h> + +struct ArenaBlock { + ArenaBlock* next; + size_t cap; + u8 data[]; +}; + +#define ARENA_DEFAULT_BLOCK 65536 + +static ArenaBlock* block_new(Heap* h, size_t cap) +{ + ArenaBlock* b = (ArenaBlock*)h->alloc(h, sizeof(ArenaBlock) + cap, _Alignof(ArenaBlock)); + if (!b) return NULL; + b->next = NULL; + b->cap = cap; + return b; +} + +void arena_init(Arena* a, Heap* h, size_t block_size) +{ + a->heap = h; + a->head = NULL; + a->cur = NULL; + a->end = NULL; + a->block_size = block_size ? block_size : ARENA_DEFAULT_BLOCK; +} + +void arena_fini(Arena* a) +{ + ArenaBlock* b = a->head; + while (b) { + ArenaBlock* next = b->next; + a->heap->free(a->heap, b, sizeof(ArenaBlock) + b->cap); + b = next; + } + a->head = NULL; + a->cur = NULL; + a->end = NULL; +} + +void arena_reset(Arena* a) +{ + /* Free every block past the head; reuse head if present. */ + if (a->head) { + ArenaBlock* b = a->head->next; + while (b) { + ArenaBlock* next = b->next; + a->heap->free(a->heap, b, sizeof(ArenaBlock) + b->cap); + b = next; + } + a->head->next = NULL; + a->cur = a->head->data; + a->end = a->head->data + a->head->cap; + } else { + a->cur = a->end = NULL; + } +} + +static size_t align_up_size(size_t v, size_t align) +{ + size_t mask = align - 1; + return (v + mask) & ~mask; +} + +void* arena_alloc(Arena* a, size_t size, size_t align) +{ + uintptr_t p, aligned; + size_t need; + + if (align == 0) align = 1; + if (a->cur) { + p = (uintptr_t)a->cur; + aligned = (p + (uintptr_t)(align - 1)) & ~(uintptr_t)(align - 1); + if (aligned + size <= (uintptr_t)a->end) { + a->cur = (u8*)(aligned + size); + return (void*)aligned; + } + } + /* New block. */ + need = align_up_size(size, align); + if (need < a->block_size) need = a->block_size; + { + ArenaBlock* b = block_new(a->heap, need); + if (!b) return NULL; + b->next = a->head; + a->head = b; + a->cur = b->data; + a->end = b->data + b->cap; + p = (uintptr_t)a->cur; + aligned = (p + (uintptr_t)(align - 1)) & ~(uintptr_t)(align - 1); + a->cur = (u8*)(aligned + size); + return (void*)aligned; + } +} + +char* arena_strdup(Arena* a, const char* s, size_t len) +{ + char* p = (char*)arena_alloc(a, len + 1, 1); + if (!p) return NULL; + if (len) memcpy(p, s, len); + p[len] = 0; + return p; +} diff --git a/src/core/buf.c b/src/core/buf.c @@ -0,0 +1,113 @@ +/* Chunked byte buffer. Append is O(1) (spills to a fresh chunk when the + * tail fills). Random-access patch is O(N_chunks) — sections rarely + * cross more than a handful of chunks, so a linear walk is fine. */ + +#include "core/buf.h" + +#include <string.h> + +static BufChunk* chunk_new(Heap* h, size_t cap) +{ + BufChunk* c = (BufChunk*)h->alloc(h, sizeof(BufChunk) + cap, _Alignof(BufChunk)); + if (!c) return NULL; + c->next = NULL; + c->used = 0; + c->cap = (u32)cap; + return c; +} + +void buf_init(Buf* b, Heap* h) +{ + b->heap = h; + b->head = NULL; + b->tail = NULL; + b->total = 0; +} + +void buf_fini(Buf* b) +{ + BufChunk* c = b->head; + while (c) { + BufChunk* next = c->next; + b->heap->free(b->heap, c, sizeof(BufChunk) + c->cap); + c = next; + } + b->head = b->tail = NULL; + b->total = 0; +} + +static int buf_ensure_tail(Buf* b, size_t need) +{ + BufChunk* c; + size_t cap; + if (b->tail && (b->tail->cap - b->tail->used) >= need) return 0; + cap = need > BUF_CHUNK ? need : BUF_CHUNK; + c = chunk_new(b->heap, cap); + if (!c) return 1; + if (!b->head) b->head = c; + if (b->tail) b->tail->next = c; + b->tail = c; + return 0; +} + +void buf_write(Buf* b, const void* data, size_t n) +{ + const u8* p = (const u8*)data; + while (n) { + size_t avail; + if (buf_ensure_tail(b, 1)) return; /* allocation failure swallowed */ + avail = b->tail->cap - b->tail->used; + if (avail > n) avail = n; + memcpy(b->tail->data + b->tail->used, p, avail); + b->tail->used += (u32)avail; + b->total += (u32)avail; + p += avail; + n -= avail; + } +} + +u8* buf_reserve(Buf* b, size_t n) +{ + u8* p; + if (buf_ensure_tail(b, n)) return NULL; + p = b->tail->data + b->tail->used; + b->tail->used += (u32)n; + b->total += (u32)n; + return p; +} + +u32 buf_pos(const Buf* b) { return b->total; } + +void buf_patch(Buf* b, u32 ofs, const void* data, size_t n) +{ + BufChunk* c = b->head; + u32 chunk_start = 0; + const u8* p = (const u8*)data; + while (c && n) { + u32 chunk_end = chunk_start + c->used; + if (ofs < chunk_end) { + u32 within = ofs - chunk_start; + u32 avail = c->used - within; + u32 take = (u32)(n < avail ? n : avail); + memcpy(c->data + within, p, take); + p += take; + n -= take; + ofs += take; + } + chunk_start = chunk_end; + c = c->next; + } + /* Patches must lie inside the written range; if n != 0 here, the + * caller exceeded buf_pos and this is a contract violation. Silent + * drop matches buf_write's allocation-failure policy. */ +} + +void buf_flatten(const Buf* b, u8* dst) +{ + BufChunk* c = b->head; + while (c) { + memcpy(dst, c->data, c->used); + dst += c->used; + c = c->next; + } +} diff --git a/src/core/core.c b/src/core/core.c @@ -0,0 +1,142 @@ +/* Compiler lifecycle, panic, and cleanup-stack machinery. + * + * compiler_init wires up the per-Compiler allocators (Pool, tu Arena, + * scratch Arena) from the host CfreeEnv. Subsystems that need stable + * source identity (lexer/parser/diagnostics/DWARF) look up SourceManager + * through Compiler.sources; that lives in src/core/source.c. + * + * Panic flow: compiler_panic emits the diagnostic, runs the deferred + * cleanups, and longjmp's c->panic. Top-level entry points install a + * setjmp boundary and use compiler_panic_save/restore to nest. + * + * abi is left NULL until a TargetABI implementation is wired in + * (`src/abi/abi.c` is not required by the obj/elf path). Callers that + * need ABI facts will trip a clean panic rather than a NULL deref. */ + +#include "core/core.h" +#include "core/arena.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/diag.h" + +#include <cfree.h> + +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> + +/* Forward decls for SourceManager — implemented in source.c. */ +SourceManager* source_new(Compiler*); +void source_free(SourceManager*); + +struct CompilerCleanup { + void (*fn)(void*); + void* arg; + CompilerCleanup* prev; +}; + +void compiler_init(Compiler* c, Target target, const CfreeEnv* env) +{ + Heap* h = (Heap*)env->heap; + + memset(c, 0, sizeof(*c)); + c->env = env; + c->target = target; + + c->global = (Pool*)h->alloc(h, sizeof(Pool), _Alignof(Pool)); + pool_init(c->global, h); + + c->tu = (Arena*)h->alloc(h, sizeof(Arena), _Alignof(Arena)); + arena_init(c->tu, h, 0); + + c->scratch = (Arena*)h->alloc(h, sizeof(Arena), _Alignof(Arena)); + arena_init(c->scratch, h, 0); + + c->sources = source_new(c); + c->abi = NULL; + c->cleanup = NULL; +} + +void compiler_fini(Compiler* c) +{ + Heap* h = (Heap*)c->env->heap; + + /* Anything still on the cleanup stack at fini-time is a programming + * error — every _new should have a matching _free that undefers. + * Run the stack defensively so memory still gets released. */ + compiler_run_cleanups(c); + + if (c->sources) source_free(c->sources); + if (c->scratch) { arena_fini(c->scratch); h->free(h, c->scratch, sizeof(Arena)); } + if (c->tu) { arena_fini(c->tu); h->free(h, c->tu, sizeof(Arena)); } + if (c->global) { pool_fini (c->global); h->free(h, c->global, sizeof(Pool)); } + c->global = NULL; + c->tu = c->scratch = NULL; + c->sources = NULL; +} + +CompilerCleanup* compiler_defer(Compiler* c, void (*fn)(void*), void* arg) +{ + CompilerCleanup* node; + /* Cleanups live in scratch — they're bounded by pipeline depth and + * are walked LIFO from the panic handler. */ + node = (CompilerCleanup*)arena_alloc(c->scratch, sizeof(*node), + _Alignof(CompilerCleanup)); + if (!node) return NULL; + node->fn = fn; + node->arg = arg; + node->prev = c->cleanup; + c->cleanup = node; + return node; +} + +void compiler_undefer(Compiler* c, CompilerCleanup* node) +{ + /* Common case: undefer the top of stack after a successful _free. + * Off-top removals are rare but legal; walk to splice. */ + CompilerCleanup** link = &c->cleanup; + while (*link) { + if (*link == node) { + *link = node->prev; + return; + } + link = &(*link)->prev; + } +} + +void compiler_run_cleanups(Compiler* c) +{ + while (c->cleanup) { + CompilerCleanup* node = c->cleanup; + c->cleanup = node->prev; + node->fn(node->arg); + } +} + +void compiler_panic_save(Compiler* c, PanicSave* out) +{ + memcpy(out->buf, c->panic, sizeof(jmp_buf)); +} + +void compiler_panic_restore(Compiler* c, const PanicSave* saved) +{ + memcpy(c->panic, saved->buf, sizeof(jmp_buf)); +} + +void compiler_panic(Compiler* c, SrcLoc loc, const char* fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + compiler_panicv(c, loc, fmt, ap); + /* compiler_panicv is _Noreturn; va_end is unreachable but kept + * for the unlikely future where it is. */ + va_end(ap); +} + +void compiler_panicv(Compiler* c, SrcLoc loc, const char* fmt, va_list ap) +{ + if (c->env && c->env->diag && c->env->diag->emit) { + c->env->diag->emit(c->env->diag, CFREE_DIAG_FATAL, loc, fmt, ap); + } + longjmp(c->panic, 1); +} diff --git a/src/core/diag.c b/src/core/diag.c @@ -0,0 +1,23 @@ +/* Varargs convenience over the host DiagSink.emit vtable slot. */ + +#include "core/diag.h" + +#include <stdarg.h> + +void diag_emit(DiagSink* s, DiagKind k, SrcLoc loc, const char* fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + diag_emitv(s, k, loc, fmt, ap); + va_end(ap); +} + +void diag_emitv(DiagSink* s, DiagKind k, SrcLoc loc, const char* fmt, va_list ap) +{ + if (s && s->emit) s->emit(s, k, loc, fmt, ap); + if (s) { + if (k == DIAG_WARN) s->warnings++; + else if (k == DIAG_ERROR || + k == DIAG_FATAL) s->errors++; + } +} diff --git a/src/core/pool.c b/src/core/pool.c @@ -0,0 +1,170 @@ +/* Interned strings + types. Open-addressed hash table keyed by FNV-1a + * over the string body; the table holds Sym ids that index into a + * heap-allocated `entries` array. Strings live in a per-Pool arena + * (block-allocated), so str_data pointers are stable for the Pool's + * lifetime. + * + * pool_type is left as a panic stub: types come online with the parser, + * which isn't part of the foundation work that gates ELF support. */ + +#include "core/pool.h" +#include "core/arena.h" + +#include <string.h> + +/* struct Pool is defined in pool.h so callers can size it (Compiler embeds + * a Pool* allocated through Heap, and core.c needs sizeof(Pool)). */ + +#define POOL_INITIAL_TABLE_CAP 256 +#define POOL_INITIAL_ENTRIES 64 +#define POOL_TABLE_LOAD_NUM 3 +#define POOL_TABLE_LOAD_DEN 4 /* grow when used*4 >= cap*3 */ + +static u32 fnv1a(const char* s, size_t len) +{ + u32 h = 0x811C9DC5u; + size_t i; + for (i = 0; i < len; ++i) { + h ^= (u8)s[i]; + h *= 0x01000193u; + } + return h ? h : 1; /* avoid 0 (also reserved as "none") */ +} + +static int sym_eq(const PoolEntry* e, const char* s, size_t len, u32 h) +{ + return e->hash == h && e->len == (u32)len && memcmp(e->data, s, len) == 0; +} + +static void table_rehash(Pool* p, u32 new_cap) +{ + Sym* new_table = (Sym*)p->heap->alloc(p->heap, sizeof(Sym) * new_cap, _Alignof(Sym)); + u32 i; + if (!new_table) return; + memset(new_table, 0, sizeof(Sym) * new_cap); + for (i = 0; i < p->cap; ++i) { + Sym sym = p->table[i]; + if (!sym) continue; + const PoolEntry* e = &p->entries[sym]; + u32 mask = new_cap - 1; + u32 j = e->hash & mask; + while (new_table[j]) j = (j + 1) & mask; + new_table[j] = sym; + } + if (p->table) p->heap->free(p->heap, p->table, sizeof(Sym) * p->cap); + p->table = new_table; + p->cap = new_cap; +} + +static int entries_grow(Pool* p) +{ + u32 new_cap; + PoolEntry* ne; + if (p->nentries < p->entries_cap) return 0; + new_cap = p->entries_cap ? p->entries_cap * 2 : POOL_INITIAL_ENTRIES; + ne = (PoolEntry*)p->heap->realloc( + p->heap, p->entries, + sizeof(*p->entries) * p->entries_cap, + sizeof(*p->entries) * new_cap, + _Alignof(PoolEntry)); + if (!ne) return 1; + p->entries = ne; + p->entries_cap = new_cap; + return 0; +} + +void pool_init(Pool* p, Heap* h) +{ + p->heap = h; + arena_init(&p->arena, h, 0); + p->table = NULL; + p->cap = 0; + p->used = 0; + p->entries = NULL; + p->nentries = 0; + p->entries_cap = 0; + table_rehash(p, POOL_INITIAL_TABLE_CAP); + /* Reserve entry 0 as the "none" sentinel. */ + if (entries_grow(p) == 0) { + p->entries[0].data = NULL; + p->entries[0].len = 0; + p->entries[0].hash = 0; + p->nentries = 1; + } +} + +void pool_fini(Pool* p) +{ + if (p->table) p->heap->free(p->heap, p->table, sizeof(Sym) * p->cap); + if (p->entries) p->heap->free(p->heap, p->entries, + sizeof(*p->entries) * p->entries_cap); + arena_fini(&p->arena); + p->table = NULL; + p->entries = NULL; +} + +Sym pool_intern(Pool* p, const char* s, size_t len) +{ + u32 h, mask, i; + Sym sym; + + if (!s || len == 0) return 0; + if (p->used * POOL_TABLE_LOAD_DEN >= p->cap * POOL_TABLE_LOAD_NUM) { + table_rehash(p, p->cap * 2); + } + h = fnv1a(s, len); + mask = p->cap - 1; + i = h & mask; + while ((sym = p->table[i]) != 0) { + if (sym_eq(&p->entries[sym], s, len, h)) return sym; + i = (i + 1) & mask; + } + /* Not found: allocate a new entry. The stored buffer carries a + * trailing NUL byte that callers may rely on (so `pool_str` can be + * fed straight to strcmp / printf %s); the recorded `len` is still + * the logical string length, exclusive of the terminator. The + * strtab content itself can carry embedded NULs, so consumers that + * care about exact bytes should compare via (len, memcmp). */ + if (entries_grow(p)) return 0; + { + char* dst = (char*)arena_alloc(&p->arena, len + 1, 1); + if (!dst) return 0; + memcpy(dst, s, len); + dst[len] = '\0'; + sym = (Sym)p->nentries++; + p->entries[sym].data = dst; + p->entries[sym].len = (u32)len; + p->entries[sym].hash = h; + p->table[i] = sym; + p->used++; + } + return sym; +} + +Sym pool_intern_cstr(Pool* p, const char* s) +{ + size_t n = 0; + if (!s) return 0; + while (s[n]) ++n; + return pool_intern(p, s, n); +} + +const char* pool_str(Pool* p, Sym sym, size_t* len_out) +{ + if (sym == 0 || sym >= p->nentries) { + if (len_out) *len_out = 0; + return NULL; + } + if (len_out) *len_out = p->entries[sym].len; + return p->entries[sym].data; +} + +const Type* pool_type(Pool* p, const Type* tmpl) +{ + /* Type interning is the parser/typer's province; not used by the + * obj+ELF foundation. Keep the API present so callers that don't + * exercise the type table still link, but treat actual use as a + * loud bug. */ + (void)p; (void)tmpl; + return NULL; +} diff --git a/src/core/pool.h b/src/core/pool.h @@ -3,9 +3,31 @@ #include "core/core.h" #include "core/heap.h" +#include "core/arena.h" typedef struct Type Type; /* declared in src/type/type.h */ +typedef struct PoolEntry { + const char* data; + u32 len; + u32 hash; +} PoolEntry; + +struct Pool { + Heap* heap; + Arena arena; /* string and type-template storage */ + + /* Hash table: 0 means empty. Otherwise it's a Sym id (1-based). */ + Sym* table; + u32 cap; /* always a power of two */ + u32 used; + + /* Sym → string mapping. Index 0 reserved as Sym = 0 ("none"). */ + PoolEntry* entries; + u32 nentries; + u32 entries_cap; +}; + void pool_init(Pool*, Heap*); void pool_fini(Pool*); diff --git a/src/core/source.c b/src/core/source.c @@ -0,0 +1,192 @@ +/* SourceManager — file-id authority for diagnostics, dependency output, + * and DWARF. The lex/pp/parse subsystems aren't part of the obj/ELF + * foundation, so this implementation is minimal: it stores a flat array + * of registered files and the include-edge list, and exposes lookups. + * It does not yet support macro-expansion pseudo files or + * spelling/expansion location translation; those land with the + * preprocessor. */ + +#include "core/core.h" +#include "core/heap.h" +#include "core/pool.h" + +#include <stdlib.h> +#include <string.h> + +typedef struct SrcMgrFile { + SourceFile info; +} SrcMgrFile; + +typedef struct SrcMgrInclude { + SourceInclude info; +} SrcMgrInclude; + +struct SourceManager { + Compiler* c; + Heap* heap; + + SrcMgrFile* files; + u32 nfiles; + u32 files_cap; + + SrcMgrInclude* includes; + u32 nincludes; + u32 includes_cap; +}; + +struct SourceDepIter { + SourceManager* sm; + u32 idx; +}; + +static int files_grow(SourceManager* sm, u32 want) +{ + u32 new_cap; + SrcMgrFile* nf; + if (want <= sm->files_cap) return 0; + new_cap = sm->files_cap ? sm->files_cap * 2 : 16; + while (new_cap < want) new_cap *= 2; + nf = (SrcMgrFile*)sm->heap->realloc( + sm->heap, sm->files, + sizeof(*sm->files) * sm->files_cap, + sizeof(*sm->files) * new_cap, + _Alignof(SrcMgrFile)); + if (!nf) return 1; + sm->files = nf; + sm->files_cap = new_cap; + return 0; +} + +static int includes_grow(SourceManager* sm) +{ + u32 new_cap; + SrcMgrInclude* ni; + if (sm->nincludes < sm->includes_cap) return 0; + new_cap = sm->includes_cap ? sm->includes_cap * 2 : 16; + ni = (SrcMgrInclude*)sm->heap->realloc( + sm->heap, sm->includes, + sizeof(*sm->includes) * sm->includes_cap, + sizeof(*sm->includes) * new_cap, + _Alignof(SrcMgrInclude)); + if (!ni) return 1; + sm->includes = ni; + sm->includes_cap = new_cap; + return 0; +} + +SourceManager* source_new(Compiler* c) +{ + Heap* h = (Heap*)c->env->heap; + SourceManager* sm = (SourceManager*)h->alloc(h, sizeof(*sm), _Alignof(SourceManager)); + if (!sm) return NULL; + memset(sm, 0, sizeof(*sm)); + sm->c = c; + sm->heap = h; + /* Reserve id 0 as "none" — never returned to callers. */ + if (files_grow(sm, 1)) { + h->free(h, sm, sizeof(*sm)); + return NULL; + } + memset(&sm->files[0], 0, sizeof(sm->files[0])); + sm->nfiles = 1; + return sm; +} + +void source_free(SourceManager* sm) +{ + if (!sm) return; + if (sm->files) sm->heap->free(sm->heap, sm->files, + sizeof(*sm->files) * sm->files_cap); + if (sm->includes) sm->heap->free(sm->heap, sm->includes, + sizeof(*sm->includes) * sm->includes_cap); + sm->heap->free(sm->heap, sm, sizeof(*sm)); +} + +static u32 file_register(SourceManager* sm, const char* name, + SourceFileKind kind, int system_header) +{ + Sym sym; + u32 id; + if (files_grow(sm, sm->nfiles + 1)) return 0; + sym = pool_intern_cstr(sm->c->global, name ? name : ""); + id = sm->nfiles++; + memset(&sm->files[id], 0, sizeof(sm->files[id])); + sm->files[id].info.id = id; + sm->files[id].info.name = sym; + sm->files[id].info.path = (kind == SRC_FILE_REAL) ? sym : 0; + sm->files[id].info.kind = (u8)kind; + sm->files[id].info.system_header = (u8)(system_header ? 1 : 0); + return id; +} + +u32 source_add_file(SourceManager* sm, const char* path, int system_header) +{ return file_register(sm, path, SRC_FILE_REAL, system_header); } + +u32 source_add_memory(SourceManager* sm, const char* name) +{ return file_register(sm, name, SRC_FILE_MEMORY, 0); } + +u32 source_add_builtin(SourceManager* sm, const char* name) +{ return file_register(sm, name, SRC_FILE_BUILTIN, 0); } + +void source_add_include(SourceManager* sm, u32 includer_file_id, + u32 included_file_id, SrcLoc include_loc, int system) +{ + if (includes_grow(sm)) return; + sm->includes[sm->nincludes].info.includer_file_id = includer_file_id; + sm->includes[sm->nincludes].info.included_file_id = included_file_id; + sm->includes[sm->nincludes].info.include_loc = include_loc; + sm->includes[sm->nincludes].info.system = (u8)(system ? 1 : 0); + sm->nincludes++; +} + +u32 source_add_macro_expansion(SourceManager* sm, Sym macro_name, + SrcLoc spelling_loc, SrcLoc expansion_loc) +{ + /* Macro expansion file ids are needed when the preprocessor lands; + * not on the obj/ELF path. Register it as a synthetic file so any + * SrcLoc passing through stays referenceable. */ + (void)spelling_loc; (void)expansion_loc; + if (files_grow(sm, sm->nfiles + 1)) return 0; + { + u32 id = sm->nfiles++; + memset(&sm->files[id], 0, sizeof(sm->files[id])); + sm->files[id].info.id = id; + sm->files[id].info.name = macro_name; + sm->files[id].info.kind = SRC_FILE_MACRO; + } + return sm->nfiles - 1; +} + +const SourceFile* source_file(SourceManager* sm, u32 file_id) +{ + if (file_id == 0 || file_id >= sm->nfiles) return NULL; + return &sm->files[file_id].info; +} + +const SourceExpansion* source_expansion(SourceManager* sm, u32 expansion_file_id) +{ (void)sm; (void)expansion_file_id; return NULL; } + +SrcLoc source_spelling_loc(SourceManager* sm, SrcLoc loc) { (void)sm; return loc; } +SrcLoc source_expansion_loc(SourceManager* sm, SrcLoc loc) { (void)sm; return loc; } + +SourceDepIter* source_depiter_new(SourceManager* sm) +{ + SourceDepIter* it = (SourceDepIter*)sm->heap->alloc( + sm->heap, sizeof(*it), _Alignof(SourceDepIter)); + if (!it) return NULL; + it->sm = sm; + it->idx = 0; + return it; +} + +const SourceInclude* source_depiter_next(SourceDepIter* it) +{ + if (!it || it->idx >= it->sm->nincludes) return NULL; + return &it->sm->includes[it->idx++].info; +} + +void source_depiter_free(SourceDepIter* it) +{ + if (!it) return; + it->sm->heap->free(it->sm->heap, it, sizeof(*it)); +}