commit 36197435a80ec4f7368e486af514a63be5ee03ad
parent 835525a36cfb236d5f88ebea885ba4eb9c6aa062
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 9 May 2026 04:15:43 -0700
core: add arena, buf, diag, pool, source implementations
Diffstat:
| M | include/cfree.h | | | 62 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- |
| A | src/core/arena.c | | | 111 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/core/buf.c | | | 113 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/core/core.c | | | 142 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/core/diag.c | | | 23 | +++++++++++++++++++++++ |
| A | src/core/pool.c | | | 170 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | src/core/pool.h | | | 22 | ++++++++++++++++++++++ |
| A | src/core/source.c | | | 192 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
8 files changed, 827 insertions(+), 8 deletions(-)
diff --git a/include/cfree.h b/include/cfree.h
@@ -170,6 +170,10 @@ typedef enum CfreeSymKind {
CFREE_SK_COMMON,
CFREE_SK_TLS,
CFREE_SK_ABS,
+ /* Defined symbol with no specific type (e.g., assembly label or
+ * AArch64 mapping symbol). Distinct from CFREE_SK_UNDEF, which is
+ * the "undefined external" sentinel. */
+ CFREE_SK_NOTYPE,
} CfreeSymKind;
/* ============================================================
@@ -549,6 +553,29 @@ typedef struct CfreeCompileOptions {
int cfree_preprocess (CfreeCompiler*, const CfreePpOptions* pp,
const CfreeBytesInput* input, CfreeWriter* out);
+/* Lex one C input and write a stable, diff-friendly token dump to `out`.
+ *
+ * The format is one S-expression per line, terminated by `(eof)`. Examples:
+ *
+ * (ident foo)
+ * (num 42)
+ * (flt 3.14f)
+ * (str "hello") ; spelling for TOK_STR includes its surrounding quotes
+ * (chr 'a')
+ * (punct +)
+ * (punct ->)
+ * (pp-hash)
+ * (pp-paste)
+ * (newline)
+ * (eof)
+ *
+ * Source locations and token flags are intentionally omitted so whitespace
+ * edits don't churn diffs; the preprocessed-text path (cfree_preprocess)
+ * is the right place to validate spacing and BOL behavior. The Writer is
+ * not closed. The input bytes must outlive this call. */
+int cfree_dump_tokens (CfreeCompiler*,
+ const CfreeBytesInput* input, CfreeWriter* out);
+
/* Compile one source TU (C or GAS-subset asm; selected by input->lang).
*
* cfree_compile_obj returns a CfreeObjBuilder owned by the CfreeCompiler. The
@@ -1283,13 +1310,24 @@ void cfree_disasm_iter_free(CfreeDisasmIter*);
*
* cfree_ar_write packs member byte payloads into a POSIX ar archive written
* to `out`. Options control reproducibility and format extensions:
- * - `epoch` Unix seconds written to ar_date for every member; 0
- * leaves the field as the literal "0" (the default).
- * - `symbol_index` if nonzero, emit a System V `/` symbol-index member.
- * Not yet implemented; currently ignored.
- * - `long_names` if nonzero, emit a `//` long-name table when any
- * member name exceeds 15 characters or contains '/'.
- * With long_names == 0, over-long names are truncated.
+ * - `epoch` Unix seconds written to ar_date for every member; 0
+ * leaves the field as the literal "0" (the default).
+ * - `symbol_index` if nonzero, emit a System V `/` symbol-index member
+ * as the first member. The index payload is a 4-byte
+ * big-endian symbol count, then count 4-byte big-endian
+ * offsets pointing at member headers (relative to start
+ * of archive), then NUL-terminated symbol names. Symbol
+ * names are taken from `member_symbols`; an empty index
+ * (count==0) is emitted when no symbols are supplied.
+ * - `long_names` if nonzero, emit a `//` long-name table when any
+ * member name exceeds 15 characters or contains '/'.
+ * With long_names == 0, over-long names are truncated.
+ * - `member_symbols` optional; parallel to the `members` array. Entry i
+ * lists the global symbols defined by member i. NULL
+ * (or per-entry count==0) means that member contributes
+ * no symbols. Names point into caller-owned storage and
+ * need only outlive the cfree_ar_write call. Ignored
+ * when symbol_index == 0.
* `opts` may be NULL to accept all defaults.
* The Writer is not closed; I/O errors are detectable via out->error().
* Returns 0 on success, 1 on bad arguments.
@@ -1307,10 +1345,18 @@ void cfree_disasm_iter_free(CfreeDisasmIter*);
* long as the archive bytes remain alive. CfreeArMember.name is interned
* in iterator-owned storage and is valid only until the next iter_next
* call on the same iterator. */
+typedef struct CfreeArMemberSymbols {
+ const char* const* names; /* count entries; each NUL-terminated */
+ uint32_t count;
+} CfreeArMemberSymbols;
+
typedef struct CfreeArWriteOptions {
uint64_t epoch; /* ar_date for every member; 0 = none */
- int symbol_index; /* emit System V '/' index entry (TODO) */
+ int symbol_index; /* emit System V '/' symbol-index member */
int long_names; /* emit '//' long-name table when needed */
+ /* Parallel to the `members` array; NULL means "no symbols anywhere".
+ * Only consulted when symbol_index is nonzero. */
+ const CfreeArMemberSymbols* member_symbols;
} CfreeArWriteOptions;
int cfree_ar_write(CfreeWriter* out,
diff --git a/src/core/arena.c b/src/core/arena.c
@@ -0,0 +1,111 @@
+/* Bump-pointer arena. One linked list of fixed-size blocks; new blocks
+ * are allocated when a request doesn't fit. arena_reset releases all
+ * but the head block (so the freed-and-reallocated common case stays
+ * O(1)). Oversize allocations get their own dedicated block. */
+
+#include "core/arena.h"
+
+#include <string.h>
+
+struct ArenaBlock {
+ ArenaBlock* next;
+ size_t cap;
+ u8 data[];
+};
+
+#define ARENA_DEFAULT_BLOCK 65536
+
+static ArenaBlock* block_new(Heap* h, size_t cap)
+{
+ ArenaBlock* b = (ArenaBlock*)h->alloc(h, sizeof(ArenaBlock) + cap, _Alignof(ArenaBlock));
+ if (!b) return NULL;
+ b->next = NULL;
+ b->cap = cap;
+ return b;
+}
+
+void arena_init(Arena* a, Heap* h, size_t block_size)
+{
+ a->heap = h;
+ a->head = NULL;
+ a->cur = NULL;
+ a->end = NULL;
+ a->block_size = block_size ? block_size : ARENA_DEFAULT_BLOCK;
+}
+
+void arena_fini(Arena* a)
+{
+ ArenaBlock* b = a->head;
+ while (b) {
+ ArenaBlock* next = b->next;
+ a->heap->free(a->heap, b, sizeof(ArenaBlock) + b->cap);
+ b = next;
+ }
+ a->head = NULL;
+ a->cur = NULL;
+ a->end = NULL;
+}
+
+void arena_reset(Arena* a)
+{
+ /* Free every block past the head; reuse head if present. */
+ if (a->head) {
+ ArenaBlock* b = a->head->next;
+ while (b) {
+ ArenaBlock* next = b->next;
+ a->heap->free(a->heap, b, sizeof(ArenaBlock) + b->cap);
+ b = next;
+ }
+ a->head->next = NULL;
+ a->cur = a->head->data;
+ a->end = a->head->data + a->head->cap;
+ } else {
+ a->cur = a->end = NULL;
+ }
+}
+
+static size_t align_up_size(size_t v, size_t align)
+{
+ size_t mask = align - 1;
+ return (v + mask) & ~mask;
+}
+
+void* arena_alloc(Arena* a, size_t size, size_t align)
+{
+ uintptr_t p, aligned;
+ size_t need;
+
+ if (align == 0) align = 1;
+ if (a->cur) {
+ p = (uintptr_t)a->cur;
+ aligned = (p + (uintptr_t)(align - 1)) & ~(uintptr_t)(align - 1);
+ if (aligned + size <= (uintptr_t)a->end) {
+ a->cur = (u8*)(aligned + size);
+ return (void*)aligned;
+ }
+ }
+ /* New block. */
+ need = align_up_size(size, align);
+ if (need < a->block_size) need = a->block_size;
+ {
+ ArenaBlock* b = block_new(a->heap, need);
+ if (!b) return NULL;
+ b->next = a->head;
+ a->head = b;
+ a->cur = b->data;
+ a->end = b->data + b->cap;
+ p = (uintptr_t)a->cur;
+ aligned = (p + (uintptr_t)(align - 1)) & ~(uintptr_t)(align - 1);
+ a->cur = (u8*)(aligned + size);
+ return (void*)aligned;
+ }
+}
+
+char* arena_strdup(Arena* a, const char* s, size_t len)
+{
+ char* p = (char*)arena_alloc(a, len + 1, 1);
+ if (!p) return NULL;
+ if (len) memcpy(p, s, len);
+ p[len] = 0;
+ return p;
+}
diff --git a/src/core/buf.c b/src/core/buf.c
@@ -0,0 +1,113 @@
+/* Chunked byte buffer. Append is O(1) (spills to a fresh chunk when the
+ * tail fills). Random-access patch is O(N_chunks) — sections rarely
+ * cross more than a handful of chunks, so a linear walk is fine. */
+
+#include "core/buf.h"
+
+#include <string.h>
+
+static BufChunk* chunk_new(Heap* h, size_t cap)
+{
+ BufChunk* c = (BufChunk*)h->alloc(h, sizeof(BufChunk) + cap, _Alignof(BufChunk));
+ if (!c) return NULL;
+ c->next = NULL;
+ c->used = 0;
+ c->cap = (u32)cap;
+ return c;
+}
+
+void buf_init(Buf* b, Heap* h)
+{
+ b->heap = h;
+ b->head = NULL;
+ b->tail = NULL;
+ b->total = 0;
+}
+
+void buf_fini(Buf* b)
+{
+ BufChunk* c = b->head;
+ while (c) {
+ BufChunk* next = c->next;
+ b->heap->free(b->heap, c, sizeof(BufChunk) + c->cap);
+ c = next;
+ }
+ b->head = b->tail = NULL;
+ b->total = 0;
+}
+
+static int buf_ensure_tail(Buf* b, size_t need)
+{
+ BufChunk* c;
+ size_t cap;
+ if (b->tail && (b->tail->cap - b->tail->used) >= need) return 0;
+ cap = need > BUF_CHUNK ? need : BUF_CHUNK;
+ c = chunk_new(b->heap, cap);
+ if (!c) return 1;
+ if (!b->head) b->head = c;
+ if (b->tail) b->tail->next = c;
+ b->tail = c;
+ return 0;
+}
+
+void buf_write(Buf* b, const void* data, size_t n)
+{
+ const u8* p = (const u8*)data;
+ while (n) {
+ size_t avail;
+ if (buf_ensure_tail(b, 1)) return; /* allocation failure swallowed */
+ avail = b->tail->cap - b->tail->used;
+ if (avail > n) avail = n;
+ memcpy(b->tail->data + b->tail->used, p, avail);
+ b->tail->used += (u32)avail;
+ b->total += (u32)avail;
+ p += avail;
+ n -= avail;
+ }
+}
+
+u8* buf_reserve(Buf* b, size_t n)
+{
+ u8* p;
+ if (buf_ensure_tail(b, n)) return NULL;
+ p = b->tail->data + b->tail->used;
+ b->tail->used += (u32)n;
+ b->total += (u32)n;
+ return p;
+}
+
+u32 buf_pos(const Buf* b) { return b->total; }
+
+void buf_patch(Buf* b, u32 ofs, const void* data, size_t n)
+{
+ BufChunk* c = b->head;
+ u32 chunk_start = 0;
+ const u8* p = (const u8*)data;
+ while (c && n) {
+ u32 chunk_end = chunk_start + c->used;
+ if (ofs < chunk_end) {
+ u32 within = ofs - chunk_start;
+ u32 avail = c->used - within;
+ u32 take = (u32)(n < avail ? n : avail);
+ memcpy(c->data + within, p, take);
+ p += take;
+ n -= take;
+ ofs += take;
+ }
+ chunk_start = chunk_end;
+ c = c->next;
+ }
+ /* Patches must lie inside the written range; if n != 0 here, the
+ * caller exceeded buf_pos and this is a contract violation. Silent
+ * drop matches buf_write's allocation-failure policy. */
+}
+
+void buf_flatten(const Buf* b, u8* dst)
+{
+ BufChunk* c = b->head;
+ while (c) {
+ memcpy(dst, c->data, c->used);
+ dst += c->used;
+ c = c->next;
+ }
+}
diff --git a/src/core/core.c b/src/core/core.c
@@ -0,0 +1,142 @@
+/* Compiler lifecycle, panic, and cleanup-stack machinery.
+ *
+ * compiler_init wires up the per-Compiler allocators (Pool, tu Arena,
+ * scratch Arena) from the host CfreeEnv. Subsystems that need stable
+ * source identity (lexer/parser/diagnostics/DWARF) look up SourceManager
+ * through Compiler.sources; that lives in src/core/source.c.
+ *
+ * Panic flow: compiler_panic emits the diagnostic, runs the deferred
+ * cleanups, and longjmp's c->panic. Top-level entry points install a
+ * setjmp boundary and use compiler_panic_save/restore to nest.
+ *
+ * abi is left NULL until a TargetABI implementation is wired in
+ * (`src/abi/abi.c` is not required by the obj/elf path). Callers that
+ * need ABI facts will trip a clean panic rather than a NULL deref. */
+
+#include "core/core.h"
+#include "core/arena.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/diag.h"
+
+#include <cfree.h>
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Forward decls for SourceManager — implemented in source.c. */
+SourceManager* source_new(Compiler*);
+void source_free(SourceManager*);
+
+struct CompilerCleanup {
+ void (*fn)(void*);
+ void* arg;
+ CompilerCleanup* prev;
+};
+
+void compiler_init(Compiler* c, Target target, const CfreeEnv* env)
+{
+ Heap* h = (Heap*)env->heap;
+
+ memset(c, 0, sizeof(*c));
+ c->env = env;
+ c->target = target;
+
+ c->global = (Pool*)h->alloc(h, sizeof(Pool), _Alignof(Pool));
+ pool_init(c->global, h);
+
+ c->tu = (Arena*)h->alloc(h, sizeof(Arena), _Alignof(Arena));
+ arena_init(c->tu, h, 0);
+
+ c->scratch = (Arena*)h->alloc(h, sizeof(Arena), _Alignof(Arena));
+ arena_init(c->scratch, h, 0);
+
+ c->sources = source_new(c);
+ c->abi = NULL;
+ c->cleanup = NULL;
+}
+
+void compiler_fini(Compiler* c)
+{
+ Heap* h = (Heap*)c->env->heap;
+
+ /* Anything still on the cleanup stack at fini-time is a programming
+ * error — every _new should have a matching _free that undefers.
+ * Run the stack defensively so memory still gets released. */
+ compiler_run_cleanups(c);
+
+ if (c->sources) source_free(c->sources);
+ if (c->scratch) { arena_fini(c->scratch); h->free(h, c->scratch, sizeof(Arena)); }
+ if (c->tu) { arena_fini(c->tu); h->free(h, c->tu, sizeof(Arena)); }
+ if (c->global) { pool_fini (c->global); h->free(h, c->global, sizeof(Pool)); }
+ c->global = NULL;
+ c->tu = c->scratch = NULL;
+ c->sources = NULL;
+}
+
+CompilerCleanup* compiler_defer(Compiler* c, void (*fn)(void*), void* arg)
+{
+ CompilerCleanup* node;
+ /* Cleanups live in scratch — they're bounded by pipeline depth and
+ * are walked LIFO from the panic handler. */
+ node = (CompilerCleanup*)arena_alloc(c->scratch, sizeof(*node),
+ _Alignof(CompilerCleanup));
+ if (!node) return NULL;
+ node->fn = fn;
+ node->arg = arg;
+ node->prev = c->cleanup;
+ c->cleanup = node;
+ return node;
+}
+
+void compiler_undefer(Compiler* c, CompilerCleanup* node)
+{
+ /* Common case: undefer the top of stack after a successful _free.
+ * Off-top removals are rare but legal; walk to splice. */
+ CompilerCleanup** link = &c->cleanup;
+ while (*link) {
+ if (*link == node) {
+ *link = node->prev;
+ return;
+ }
+ link = &(*link)->prev;
+ }
+}
+
+void compiler_run_cleanups(Compiler* c)
+{
+ while (c->cleanup) {
+ CompilerCleanup* node = c->cleanup;
+ c->cleanup = node->prev;
+ node->fn(node->arg);
+ }
+}
+
+void compiler_panic_save(Compiler* c, PanicSave* out)
+{
+ memcpy(out->buf, c->panic, sizeof(jmp_buf));
+}
+
+void compiler_panic_restore(Compiler* c, const PanicSave* saved)
+{
+ memcpy(c->panic, saved->buf, sizeof(jmp_buf));
+}
+
+void compiler_panic(Compiler* c, SrcLoc loc, const char* fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ compiler_panicv(c, loc, fmt, ap);
+ /* compiler_panicv is _Noreturn; va_end is unreachable but kept
+ * for the unlikely future where it is. */
+ va_end(ap);
+}
+
+void compiler_panicv(Compiler* c, SrcLoc loc, const char* fmt, va_list ap)
+{
+ if (c->env && c->env->diag && c->env->diag->emit) {
+ c->env->diag->emit(c->env->diag, CFREE_DIAG_FATAL, loc, fmt, ap);
+ }
+ longjmp(c->panic, 1);
+}
diff --git a/src/core/diag.c b/src/core/diag.c
@@ -0,0 +1,23 @@
+/* Varargs convenience over the host DiagSink.emit vtable slot. */
+
+#include "core/diag.h"
+
+#include <stdarg.h>
+
+void diag_emit(DiagSink* s, DiagKind k, SrcLoc loc, const char* fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ diag_emitv(s, k, loc, fmt, ap);
+ va_end(ap);
+}
+
+void diag_emitv(DiagSink* s, DiagKind k, SrcLoc loc, const char* fmt, va_list ap)
+{
+ if (s && s->emit) s->emit(s, k, loc, fmt, ap);
+ if (s) {
+ if (k == DIAG_WARN) s->warnings++;
+ else if (k == DIAG_ERROR ||
+ k == DIAG_FATAL) s->errors++;
+ }
+}
diff --git a/src/core/pool.c b/src/core/pool.c
@@ -0,0 +1,170 @@
+/* Interned strings + types. Open-addressed hash table keyed by FNV-1a
+ * over the string body; the table holds Sym ids that index into a
+ * heap-allocated `entries` array. Strings live in a per-Pool arena
+ * (block-allocated), so str_data pointers are stable for the Pool's
+ * lifetime.
+ *
+ * pool_type is left as a panic stub: types come online with the parser,
+ * which isn't part of the foundation work that gates ELF support. */
+
+#include "core/pool.h"
+#include "core/arena.h"
+
+#include <string.h>
+
+/* struct Pool is defined in pool.h so callers can size it (Compiler embeds
+ * a Pool* allocated through Heap, and core.c needs sizeof(Pool)). */
+
+#define POOL_INITIAL_TABLE_CAP 256
+#define POOL_INITIAL_ENTRIES 64
+#define POOL_TABLE_LOAD_NUM 3
+#define POOL_TABLE_LOAD_DEN 4 /* grow when used*4 >= cap*3 */
+
+static u32 fnv1a(const char* s, size_t len)
+{
+ u32 h = 0x811C9DC5u;
+ size_t i;
+ for (i = 0; i < len; ++i) {
+ h ^= (u8)s[i];
+ h *= 0x01000193u;
+ }
+ return h ? h : 1; /* avoid 0 (also reserved as "none") */
+}
+
+static int sym_eq(const PoolEntry* e, const char* s, size_t len, u32 h)
+{
+ return e->hash == h && e->len == (u32)len && memcmp(e->data, s, len) == 0;
+}
+
+static void table_rehash(Pool* p, u32 new_cap)
+{
+ Sym* new_table = (Sym*)p->heap->alloc(p->heap, sizeof(Sym) * new_cap, _Alignof(Sym));
+ u32 i;
+ if (!new_table) return;
+ memset(new_table, 0, sizeof(Sym) * new_cap);
+ for (i = 0; i < p->cap; ++i) {
+ Sym sym = p->table[i];
+ if (!sym) continue;
+ const PoolEntry* e = &p->entries[sym];
+ u32 mask = new_cap - 1;
+ u32 j = e->hash & mask;
+ while (new_table[j]) j = (j + 1) & mask;
+ new_table[j] = sym;
+ }
+ if (p->table) p->heap->free(p->heap, p->table, sizeof(Sym) * p->cap);
+ p->table = new_table;
+ p->cap = new_cap;
+}
+
+static int entries_grow(Pool* p)
+{
+ u32 new_cap;
+ PoolEntry* ne;
+ if (p->nentries < p->entries_cap) return 0;
+ new_cap = p->entries_cap ? p->entries_cap * 2 : POOL_INITIAL_ENTRIES;
+ ne = (PoolEntry*)p->heap->realloc(
+ p->heap, p->entries,
+ sizeof(*p->entries) * p->entries_cap,
+ sizeof(*p->entries) * new_cap,
+ _Alignof(PoolEntry));
+ if (!ne) return 1;
+ p->entries = ne;
+ p->entries_cap = new_cap;
+ return 0;
+}
+
+void pool_init(Pool* p, Heap* h)
+{
+ p->heap = h;
+ arena_init(&p->arena, h, 0);
+ p->table = NULL;
+ p->cap = 0;
+ p->used = 0;
+ p->entries = NULL;
+ p->nentries = 0;
+ p->entries_cap = 0;
+ table_rehash(p, POOL_INITIAL_TABLE_CAP);
+ /* Reserve entry 0 as the "none" sentinel. */
+ if (entries_grow(p) == 0) {
+ p->entries[0].data = NULL;
+ p->entries[0].len = 0;
+ p->entries[0].hash = 0;
+ p->nentries = 1;
+ }
+}
+
+void pool_fini(Pool* p)
+{
+ if (p->table) p->heap->free(p->heap, p->table, sizeof(Sym) * p->cap);
+ if (p->entries) p->heap->free(p->heap, p->entries,
+ sizeof(*p->entries) * p->entries_cap);
+ arena_fini(&p->arena);
+ p->table = NULL;
+ p->entries = NULL;
+}
+
+Sym pool_intern(Pool* p, const char* s, size_t len)
+{
+ u32 h, mask, i;
+ Sym sym;
+
+ if (!s || len == 0) return 0;
+ if (p->used * POOL_TABLE_LOAD_DEN >= p->cap * POOL_TABLE_LOAD_NUM) {
+ table_rehash(p, p->cap * 2);
+ }
+ h = fnv1a(s, len);
+ mask = p->cap - 1;
+ i = h & mask;
+ while ((sym = p->table[i]) != 0) {
+ if (sym_eq(&p->entries[sym], s, len, h)) return sym;
+ i = (i + 1) & mask;
+ }
+ /* Not found: allocate a new entry. The stored buffer carries a
+ * trailing NUL byte that callers may rely on (so `pool_str` can be
+ * fed straight to strcmp / printf %s); the recorded `len` is still
+ * the logical string length, exclusive of the terminator. The
+ * strtab content itself can carry embedded NULs, so consumers that
+ * care about exact bytes should compare via (len, memcmp). */
+ if (entries_grow(p)) return 0;
+ {
+ char* dst = (char*)arena_alloc(&p->arena, len + 1, 1);
+ if (!dst) return 0;
+ memcpy(dst, s, len);
+ dst[len] = '\0';
+ sym = (Sym)p->nentries++;
+ p->entries[sym].data = dst;
+ p->entries[sym].len = (u32)len;
+ p->entries[sym].hash = h;
+ p->table[i] = sym;
+ p->used++;
+ }
+ return sym;
+}
+
+Sym pool_intern_cstr(Pool* p, const char* s)
+{
+ size_t n = 0;
+ if (!s) return 0;
+ while (s[n]) ++n;
+ return pool_intern(p, s, n);
+}
+
+const char* pool_str(Pool* p, Sym sym, size_t* len_out)
+{
+ if (sym == 0 || sym >= p->nentries) {
+ if (len_out) *len_out = 0;
+ return NULL;
+ }
+ if (len_out) *len_out = p->entries[sym].len;
+ return p->entries[sym].data;
+}
+
+const Type* pool_type(Pool* p, const Type* tmpl)
+{
+ /* Type interning is the parser/typer's province; not used by the
+ * obj+ELF foundation. Keep the API present so callers that don't
+ * exercise the type table still link, but treat actual use as a
+ * loud bug. */
+ (void)p; (void)tmpl;
+ return NULL;
+}
diff --git a/src/core/pool.h b/src/core/pool.h
@@ -3,9 +3,31 @@
#include "core/core.h"
#include "core/heap.h"
+#include "core/arena.h"
typedef struct Type Type; /* declared in src/type/type.h */
+typedef struct PoolEntry {
+ const char* data;
+ u32 len;
+ u32 hash;
+} PoolEntry;
+
+struct Pool {
+ Heap* heap;
+ Arena arena; /* string and type-template storage */
+
+ /* Hash table: 0 means empty. Otherwise it's a Sym id (1-based). */
+ Sym* table;
+ u32 cap; /* always a power of two */
+ u32 used;
+
+ /* Sym → string mapping. Index 0 reserved as Sym = 0 ("none"). */
+ PoolEntry* entries;
+ u32 nentries;
+ u32 entries_cap;
+};
+
void pool_init(Pool*, Heap*);
void pool_fini(Pool*);
diff --git a/src/core/source.c b/src/core/source.c
@@ -0,0 +1,192 @@
+/* SourceManager — file-id authority for diagnostics, dependency output,
+ * and DWARF. The lex/pp/parse subsystems aren't part of the obj/ELF
+ * foundation, so this implementation is minimal: it stores a flat array
+ * of registered files and the include-edge list, and exposes lookups.
+ * It does not yet support macro-expansion pseudo files or
+ * spelling/expansion location translation; those land with the
+ * preprocessor. */
+
+#include "core/core.h"
+#include "core/heap.h"
+#include "core/pool.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct SrcMgrFile {
+ SourceFile info;
+} SrcMgrFile;
+
+typedef struct SrcMgrInclude {
+ SourceInclude info;
+} SrcMgrInclude;
+
+struct SourceManager {
+ Compiler* c;
+ Heap* heap;
+
+ SrcMgrFile* files;
+ u32 nfiles;
+ u32 files_cap;
+
+ SrcMgrInclude* includes;
+ u32 nincludes;
+ u32 includes_cap;
+};
+
+struct SourceDepIter {
+ SourceManager* sm;
+ u32 idx;
+};
+
+static int files_grow(SourceManager* sm, u32 want)
+{
+ u32 new_cap;
+ SrcMgrFile* nf;
+ if (want <= sm->files_cap) return 0;
+ new_cap = sm->files_cap ? sm->files_cap * 2 : 16;
+ while (new_cap < want) new_cap *= 2;
+ nf = (SrcMgrFile*)sm->heap->realloc(
+ sm->heap, sm->files,
+ sizeof(*sm->files) * sm->files_cap,
+ sizeof(*sm->files) * new_cap,
+ _Alignof(SrcMgrFile));
+ if (!nf) return 1;
+ sm->files = nf;
+ sm->files_cap = new_cap;
+ return 0;
+}
+
+static int includes_grow(SourceManager* sm)
+{
+ u32 new_cap;
+ SrcMgrInclude* ni;
+ if (sm->nincludes < sm->includes_cap) return 0;
+ new_cap = sm->includes_cap ? sm->includes_cap * 2 : 16;
+ ni = (SrcMgrInclude*)sm->heap->realloc(
+ sm->heap, sm->includes,
+ sizeof(*sm->includes) * sm->includes_cap,
+ sizeof(*sm->includes) * new_cap,
+ _Alignof(SrcMgrInclude));
+ if (!ni) return 1;
+ sm->includes = ni;
+ sm->includes_cap = new_cap;
+ return 0;
+}
+
+SourceManager* source_new(Compiler* c)
+{
+ Heap* h = (Heap*)c->env->heap;
+ SourceManager* sm = (SourceManager*)h->alloc(h, sizeof(*sm), _Alignof(SourceManager));
+ if (!sm) return NULL;
+ memset(sm, 0, sizeof(*sm));
+ sm->c = c;
+ sm->heap = h;
+ /* Reserve id 0 as "none" — never returned to callers. */
+ if (files_grow(sm, 1)) {
+ h->free(h, sm, sizeof(*sm));
+ return NULL;
+ }
+ memset(&sm->files[0], 0, sizeof(sm->files[0]));
+ sm->nfiles = 1;
+ return sm;
+}
+
+void source_free(SourceManager* sm)
+{
+ if (!sm) return;
+ if (sm->files) sm->heap->free(sm->heap, sm->files,
+ sizeof(*sm->files) * sm->files_cap);
+ if (sm->includes) sm->heap->free(sm->heap, sm->includes,
+ sizeof(*sm->includes) * sm->includes_cap);
+ sm->heap->free(sm->heap, sm, sizeof(*sm));
+}
+
+static u32 file_register(SourceManager* sm, const char* name,
+ SourceFileKind kind, int system_header)
+{
+ Sym sym;
+ u32 id;
+ if (files_grow(sm, sm->nfiles + 1)) return 0;
+ sym = pool_intern_cstr(sm->c->global, name ? name : "");
+ id = sm->nfiles++;
+ memset(&sm->files[id], 0, sizeof(sm->files[id]));
+ sm->files[id].info.id = id;
+ sm->files[id].info.name = sym;
+ sm->files[id].info.path = (kind == SRC_FILE_REAL) ? sym : 0;
+ sm->files[id].info.kind = (u8)kind;
+ sm->files[id].info.system_header = (u8)(system_header ? 1 : 0);
+ return id;
+}
+
+u32 source_add_file(SourceManager* sm, const char* path, int system_header)
+{ return file_register(sm, path, SRC_FILE_REAL, system_header); }
+
+u32 source_add_memory(SourceManager* sm, const char* name)
+{ return file_register(sm, name, SRC_FILE_MEMORY, 0); }
+
+u32 source_add_builtin(SourceManager* sm, const char* name)
+{ return file_register(sm, name, SRC_FILE_BUILTIN, 0); }
+
+void source_add_include(SourceManager* sm, u32 includer_file_id,
+ u32 included_file_id, SrcLoc include_loc, int system)
+{
+ if (includes_grow(sm)) return;
+ sm->includes[sm->nincludes].info.includer_file_id = includer_file_id;
+ sm->includes[sm->nincludes].info.included_file_id = included_file_id;
+ sm->includes[sm->nincludes].info.include_loc = include_loc;
+ sm->includes[sm->nincludes].info.system = (u8)(system ? 1 : 0);
+ sm->nincludes++;
+}
+
+u32 source_add_macro_expansion(SourceManager* sm, Sym macro_name,
+ SrcLoc spelling_loc, SrcLoc expansion_loc)
+{
+ /* Macro expansion file ids are needed when the preprocessor lands;
+ * not on the obj/ELF path. Register it as a synthetic file so any
+ * SrcLoc passing through stays referenceable. */
+ (void)spelling_loc; (void)expansion_loc;
+ if (files_grow(sm, sm->nfiles + 1)) return 0;
+ {
+ u32 id = sm->nfiles++;
+ memset(&sm->files[id], 0, sizeof(sm->files[id]));
+ sm->files[id].info.id = id;
+ sm->files[id].info.name = macro_name;
+ sm->files[id].info.kind = SRC_FILE_MACRO;
+ }
+ return sm->nfiles - 1;
+}
+
+const SourceFile* source_file(SourceManager* sm, u32 file_id)
+{
+ if (file_id == 0 || file_id >= sm->nfiles) return NULL;
+ return &sm->files[file_id].info;
+}
+
+const SourceExpansion* source_expansion(SourceManager* sm, u32 expansion_file_id)
+{ (void)sm; (void)expansion_file_id; return NULL; }
+
+SrcLoc source_spelling_loc(SourceManager* sm, SrcLoc loc) { (void)sm; return loc; }
+SrcLoc source_expansion_loc(SourceManager* sm, SrcLoc loc) { (void)sm; return loc; }
+
+SourceDepIter* source_depiter_new(SourceManager* sm)
+{
+ SourceDepIter* it = (SourceDepIter*)sm->heap->alloc(
+ sm->heap, sizeof(*it), _Alignof(SourceDepIter));
+ if (!it) return NULL;
+ it->sm = sm;
+ it->idx = 0;
+ return it;
+}
+
+const SourceInclude* source_depiter_next(SourceDepIter* it)
+{
+ if (!it || it->idx >= it->sm->nincludes) return NULL;
+ return &it->sm->includes[it->idx++].info;
+}
+
+void source_depiter_free(SourceDepIter* it)
+{
+ if (!it) return;
+ it->sm->heap->free(it->sm->heap, it, sizeof(*it));
+}