commit 7b9993d49daf2887b1c1b9ba652806821d1ef0fe
parent 0a1b3558e46400ad6b0da26fa98aa0ffeb4f97f0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 9 May 2026 13:10:31 -0700
cg: bring up type/abi spine and pass Group A on AArch64
Adds the minimum type system, AAPCS64 classification, and AArch64
backend touch-ups needed for the test/cg Group A corpus to pass on
all four paths (in-process JIT, ELF roundtrip, qemu/podman exec,
jit-via-file). Group B still requires frame_slot/param/call/load/
store on the backend; c05_div_mod still needs BO_SREM.
- src/type/type.c: per-Pool interned type_void/prim/ptr/func plus
minimal record/enum/qualified support and type_is_* predicates.
- src/abi/abi.c: scalar+aggregate sizing/alignment, struct/union
layout, and AAPCS64 abi_func_info covering scalar/pointer/float
returns and small/large aggregate classification.
- src/core: bring up c->abi via abi_new in compiler_init.
- src/arch/aarch64.c: aa_load_imm now handles arbitrary 32/64-bit
constants via single MOVZ/MOVN or a MOVZ + MOVK chain (a06).
- src/core/pool.{h,c}: add a void* type_cache field on Pool for
type.c to hang its interning structure off.
Diffstat:
7 files changed, 783 insertions(+), 17 deletions(-)
diff --git a/src/abi/abi.c b/src/abi/abi.c
@@ -0,0 +1,373 @@
+/* TargetABI for AArch64 SysV (AAPCS64).
+ *
+ * Single authority for target-dependent C layout and calling convention
+ * decisions. Type stays structural and ABI-neutral; sizes, alignments,
+ * record layouts, and argument/return classification are derived here
+ * from Compiler.target.
+ *
+ * v1 implements only AArch64 SysV at the level needed for the cg test
+ * harness (Group A scalars + Group C arithmetic + the lowering surface
+ * for Group B). Other arches will land alongside their backends. */
+
+#include "abi/abi.h"
+#include "core/core.h"
+#include "core/arena.h"
+
+#include <cfree.h>
+
+#include <string.h>
+
+typedef struct FuncInfoCacheEntry FuncInfoCacheEntry;
+struct FuncInfoCacheEntry {
+ const Type* fn;
+ ABIFuncInfo* info;
+ FuncInfoCacheEntry* next;
+};
+
+typedef struct RecordLayoutCacheEntry RecordLayoutCacheEntry;
+struct RecordLayoutCacheEntry {
+ const Type* ty;
+ ABIRecordLayout* layout;
+ RecordLayoutCacheEntry* next;
+};
+
+struct TargetABI {
+ Compiler* c;
+ /* Per-TU cached lookups. */
+ FuncInfoCacheEntry* fn_cache;
+ RecordLayoutCacheEntry* rec_cache;
+};
+
+/* ---- scalar profile ---- */
+
+static ABITypeInfo prim_info(TargetABI* a, TypeKind k)
+{
+ ABITypeInfo r = { 0, 0, ABI_SC_INT, 0, 0, 0 };
+ switch (k) {
+ case TY_VOID:
+ r.size = 0; r.align = 1; r.scalar_kind = ABI_SC_VOID; return r;
+ case TY_BOOL:
+ r.size = 1; r.align = 1; r.scalar_kind = ABI_SC_BOOL; return r;
+ case TY_CHAR:
+ r.size = 1; r.align = 1; r.signed_ = 1; return r;
+ case TY_SCHAR:
+ r.size = 1; r.align = 1; r.signed_ = 1; return r;
+ case TY_UCHAR:
+ r.size = 1; r.align = 1; r.signed_ = 0; return r;
+ case TY_SHORT:
+ r.size = 2; r.align = 2; r.signed_ = 1; return r;
+ case TY_USHORT:
+ r.size = 2; r.align = 2; r.signed_ = 0; return r;
+ case TY_INT:
+ r.size = 4; r.align = 4; r.signed_ = 1; return r;
+ case TY_UINT:
+ r.size = 4; r.align = 4; r.signed_ = 0; return r;
+ case TY_LONG:
+ r.size = (a->c->target.arch == CFREE_ARCH_ARM_64 ||
+ a->c->target.arch == CFREE_ARCH_X86_64 ||
+ a->c->target.arch == CFREE_ARCH_RV64) ? 8 : 4;
+ r.align = r.size; r.signed_ = 1; return r;
+ case TY_ULONG:
+ r.size = (a->c->target.arch == CFREE_ARCH_ARM_64 ||
+ a->c->target.arch == CFREE_ARCH_X86_64 ||
+ a->c->target.arch == CFREE_ARCH_RV64) ? 8 : 4;
+ r.align = r.size; r.signed_ = 0; return r;
+ case TY_LLONG:
+ r.size = 8; r.align = 8; r.signed_ = 1; return r;
+ case TY_ULLONG:
+ r.size = 8; r.align = 8; r.signed_ = 0; return r;
+ case TY_FLOAT:
+ r.size = 4; r.align = 4; r.scalar_kind = ABI_SC_FLOAT; return r;
+ case TY_DOUBLE:
+ r.size = 8; r.align = 8; r.scalar_kind = ABI_SC_FLOAT; return r;
+ case TY_LDOUBLE:
+ r.size = 16; r.align = 16; r.scalar_kind = ABI_SC_FLOAT; return r;
+ default:
+ return r;
+ }
+}
+
+ABITypeInfo abi_type_info(TargetABI* a, const Type* t)
+{
+ ABITypeInfo r = { 0, 0, ABI_SC_VOID, 0, 0, 0 };
+ if (!t) return r;
+ switch (t->kind) {
+ case TY_PTR:
+ r.size = a->c->target.ptr_size ? a->c->target.ptr_size : 8;
+ r.align = a->c->target.ptr_align ? a->c->target.ptr_align : 8;
+ r.scalar_kind = ABI_SC_PTR;
+ return r;
+ case TY_ARRAY: {
+ ABITypeInfo e = abi_type_info(a, t->arr.elem);
+ r.size = e.size * t->arr.count;
+ r.align = e.align;
+ return r;
+ }
+ case TY_STRUCT: case TY_UNION: {
+ const ABIRecordLayout* L = abi_record_layout(a, t);
+ if (L) { r.size = L->size; r.align = L->align; }
+ return r;
+ }
+ case TY_ENUM:
+ return abi_type_info(a, t->enm.base ? t->enm.base
+ : type_prim(a->c->global, TY_INT));
+ case TY_FUNC:
+ /* sizeof(function) is undefined in C; AAPCS uses 1 for arithmetic. */
+ r.size = 1; r.align = 1; return r;
+ default:
+ return prim_info(a, (TypeKind)t->kind);
+ }
+}
+
+u32 abi_sizeof (TargetABI* a, const Type* t) { return abi_type_info(a, t).size; }
+u32 abi_alignof(TargetABI* a, const Type* t) { return abi_type_info(a, t).align; }
+
+/* ---- record layout (struct/union) ---- */
+
+static ABIRecordLayout* compute_record_layout(TargetABI* a, const Type* t)
+{
+ ABIRecordLayout* L = arena_new(a->c->tu, ABIRecordLayout);
+ if (!L) return NULL;
+ memset(L, 0, sizeof *L);
+ ABIFieldLayout* fl = NULL;
+ if (t->rec.nfields) {
+ fl = arena_array(a->c->tu, ABIFieldLayout, t->rec.nfields);
+ memset(fl, 0, sizeof(ABIFieldLayout) * t->rec.nfields);
+ }
+
+ u32 max_align = 1;
+ if (t->kind == TY_STRUCT) {
+ u32 off = 0;
+ for (u16 i = 0; i < t->rec.nfields; ++i) {
+ const Field* f = &t->rec.fields[i];
+ ABITypeInfo fi = abi_type_info(a, f->type);
+ if (fi.align > max_align) max_align = fi.align;
+ u32 mask = fi.align ? fi.align - 1 : 0;
+ off = (off + mask) & ~mask;
+ fl[i].offset = off;
+ fl[i].bit_offset = 0;
+ fl[i].bit_width = 0;
+ fl[i].storage_size = fi.size;
+ off += fi.size;
+ }
+ u32 mask = max_align - 1;
+ L->size = (off + mask) & ~mask;
+ } else { /* TY_UNION */
+ u32 mx = 0;
+ for (u16 i = 0; i < t->rec.nfields; ++i) {
+ const Field* f = &t->rec.fields[i];
+ ABITypeInfo fi = abi_type_info(a, f->type);
+ if (fi.align > max_align) max_align = fi.align;
+ if (fi.size > mx) mx = fi.size;
+ fl[i].offset = 0;
+ fl[i].storage_size = fi.size;
+ }
+ u32 mask = max_align - 1;
+ L->size = (mx + mask) & ~mask;
+ }
+ L->align = max_align;
+ L->nfields = t->rec.nfields;
+ L->fields = fl;
+ return L;
+}
+
+const ABIRecordLayout* abi_record_layout(TargetABI* a, const Type* t)
+{
+ if (!t || (t->kind != TY_STRUCT && t->kind != TY_UNION)) return NULL;
+ for (RecordLayoutCacheEntry* e = a->rec_cache; e; e = e->next) {
+ if (e->ty == t) return e->layout;
+ }
+ ABIRecordLayout* L = compute_record_layout(a, t);
+ if (!L) return NULL;
+ RecordLayoutCacheEntry* e = arena_new(a->c->tu, RecordLayoutCacheEntry);
+ e->ty = t; e->layout = L; e->next = a->rec_cache;
+ a->rec_cache = e;
+ return L;
+}
+
+/* ---- function classification (AArch64 SysV / AAPCS64) ----
+ *
+ * v1 covers the cases the cg test harness exercises:
+ * void -> IGNORE
+ * integer ≤ 8B -> DIRECT, one INT part in a register
+ * integer 16B -> DIRECT, two INT parts (X0+X1)
+ * pointer -> DIRECT, one INT part in a register
+ * float/double -> DIRECT, one FP part in a register
+ * small struct -> DIRECT, INT parts (HFA/HVA refinement: TODO)
+ * large struct -> INDIRECT (sret for return; passed by reference)
+ * Variadics, HFA classification, and split GPR+stack tail still
+ * land with the parser. */
+
+static void classify_scalar(TargetABI* a, const Type* t, ABIArgInfo* out)
+{
+ ABITypeInfo ti = abi_type_info(a, t);
+ out->kind = ABI_ARG_DIRECT;
+ out->flags = ABI_AF_NONE;
+ out->indirect_align = 0;
+
+ ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart);
+ memset(parts, 0, sizeof *parts);
+ parts->cls = (ti.scalar_kind == ABI_SC_FLOAT) ? ABI_CLASS_FP : ABI_CLASS_INT;
+ parts->loc = ABI_LOC_REG;
+ parts->size = ti.size;
+ parts->align = ti.align;
+ parts->src_offset = 0;
+
+ out->parts = parts;
+ out->nparts = 1;
+}
+
+static void classify_void(ABIArgInfo* out)
+{
+ memset(out, 0, sizeof *out);
+ out->kind = ABI_ARG_IGNORE;
+}
+
+static void classify_aggregate(TargetABI* a, const Type* t, ABIArgInfo* out,
+ int is_return)
+{
+ ABITypeInfo ti = abi_type_info(a, t);
+ if (ti.size == 0) { classify_void(out); return; }
+ /* AAPCS64: aggregates ≤ 16 bytes pass in up to 2 GPRs (or HFA in FP regs;
+ * v1 ignores HFA). Larger aggregates pass by reference (caller copy for
+ * args, sret pointer for return). */
+ if (ti.size <= 16) {
+ u32 nparts = (ti.size + 7) / 8;
+ ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, nparts);
+ memset(parts, 0, sizeof(ABIArgPart) * nparts);
+ u32 off = 0;
+ for (u32 i = 0; i < nparts; ++i) {
+ u32 chunk = (ti.size - off > 8) ? 8 : (ti.size - off);
+ parts[i].cls = ABI_CLASS_INT;
+ parts[i].loc = ABI_LOC_REG;
+ parts[i].size = chunk;
+ parts[i].align = 8;
+ parts[i].src_offset = off;
+ off += chunk;
+ }
+ out->kind = ABI_ARG_DIRECT;
+ out->flags = ABI_AF_NONE;
+ out->parts = parts;
+ out->nparts = (u16)nparts;
+ out->indirect_align = 0;
+ } else {
+ out->kind = ABI_ARG_INDIRECT;
+ out->flags = is_return ? ABI_AF_SRET : ABI_AF_BYVAL;
+ out->indirect_align = ti.align;
+ out->parts = NULL;
+ out->nparts = 0;
+ }
+}
+
+static void classify_one(TargetABI* a, const Type* t, ABIArgInfo* out, int is_return)
+{
+ if (!t || t->kind == TY_VOID) { classify_void(out); return; }
+ switch (t->kind) {
+ case TY_STRUCT: case TY_UNION:
+ classify_aggregate(a, t, out, is_return);
+ return;
+ default:
+ classify_scalar(a, t, out);
+ return;
+ }
+}
+
+static ABIFuncInfo* compute_func_info(TargetABI* a, const Type* fn)
+{
+ ABIFuncInfo* info = arena_new(a->c->tu, ABIFuncInfo);
+ memset(info, 0, sizeof *info);
+
+ classify_one(a, fn->fn.ret, &info->ret, /*is_return=*/1);
+ info->has_sret = (info->ret.kind == ABI_ARG_INDIRECT) ? 1 : 0;
+ info->variadic = fn->fn.variadic;
+
+ info->nparams = fn->fn.nparams;
+ if (fn->fn.nparams) {
+ ABIArgInfo* arr = arena_array(a->c->tu, ABIArgInfo, fn->fn.nparams);
+ memset(arr, 0, sizeof(ABIArgInfo) * fn->fn.nparams);
+ for (u16 i = 0; i < fn->fn.nparams; ++i) {
+ classify_one(a, fn->fn.params[i], &arr[i], /*is_return=*/0);
+ }
+ info->params = arr;
+ } else {
+ info->params = NULL;
+ }
+ return info;
+}
+
+const ABIFuncInfo* abi_func_info(TargetABI* a, const Type* fn_type)
+{
+ if (!fn_type || fn_type->kind != TY_FUNC) return NULL;
+ for (FuncInfoCacheEntry* e = a->fn_cache; e; e = e->next) {
+ if (e->fn == fn_type) return e->info;
+ }
+ ABIFuncInfo* info = compute_func_info(a, fn_type);
+ if (!info) return NULL;
+ FuncInfoCacheEntry* e = arena_new(a->c->tu, FuncInfoCacheEntry);
+ e->fn = fn_type; e->info = info; e->next = a->fn_cache;
+ a->fn_cache = e;
+ return info;
+}
+
+/* ---- target-defined library types ---- */
+
+static const Type* size_or_uintptr(TargetABI* a, Pool* p)
+{
+ return a->c->target.ptr_size == 8 ? type_prim(p, TY_ULLONG)
+ : type_prim(p, TY_UINT);
+}
+
+const Type* abi_size_type (TargetABI* a, Pool* p) { return size_or_uintptr(a, p); }
+const Type* abi_ptrdiff_type(TargetABI* a, Pool* p)
+{
+ return a->c->target.ptr_size == 8 ? type_prim(p, TY_LLONG)
+ : type_prim(p, TY_INT);
+}
+const Type* abi_intptr_type (TargetABI* a, Pool* p)
+{
+ return a->c->target.ptr_size == 8 ? type_prim(p, TY_LLONG)
+ : type_prim(p, TY_INT);
+}
+const Type* abi_uintptr_type(TargetABI* a, Pool* p) { return size_or_uintptr(a, p); }
+const Type* abi_va_list_type(TargetABI* a, Pool* p)
+{
+ /* AAPCS64: __va_list is a struct of three pointers + two ints. v1 returns
+ * a placeholder pointer; this is exercised only by the parser/builtin
+ * substitution path, which Group A does not reach. */
+ (void)a;
+ return type_ptr(p, type_void(p));
+}
+
+/* ---- lifecycle ---- */
+
+void abi_init(TargetABI* a, Compiler* c)
+{
+ memset(a, 0, sizeof *a);
+ a->c = c;
+}
+
+void abi_fini(TargetABI* a)
+{
+ /* Arena-backed; nothing to release. */
+ if (!a) return;
+ a->fn_cache = NULL;
+ a->rec_cache = NULL;
+ a->c = NULL;
+}
+
+TargetABI* abi_new(Compiler* c)
+{
+ Heap* h = (Heap*)c->env->heap;
+ TargetABI* a = (TargetABI*)h->alloc(h, sizeof(TargetABI), _Alignof(TargetABI));
+ if (!a) return NULL;
+ abi_init(a, c);
+ return a;
+}
+
+void abi_free(TargetABI* a)
+{
+ if (!a) return;
+ Heap* h = (Heap*)a->c->env->heap;
+ abi_fini(a);
+ h->free(h, a, sizeof(TargetABI));
+}
diff --git a/src/abi/abi.h b/src/abi/abi.h
@@ -105,6 +105,11 @@ typedef struct ABIFuncInfo {
void abi_init(TargetABI*, Compiler*);
void abi_fini(TargetABI*);
+/* Heap-allocating wrappers around abi_init/abi_fini, used by compiler_init.
+ * The returned pointer is valid until abi_free returns. */
+TargetABI* abi_new (Compiler*);
+void abi_free(TargetABI*);
+
/* Builtin scalar profiles and general type layout. */
ABITypeInfo abi_type_info(TargetABI*, const Type*);
u32 abi_sizeof (TargetABI*, const Type*);
diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c
@@ -138,24 +138,86 @@ static void aa_continue_to(CGTarget* t, CGScope s) { (void)s; aa_
static void aa_load_imm(CGTarget* t, Operand dst, i64 imm)
{
- AAImpl* a = impl_of(t);
MCEmitter* mc = t->mc;
u32 sf = type_is_64(dst.type) ? 1u : 0u;
u32 rd = reg_num(dst);
- if (imm >= 0 && imm <= 0xffff) {
- /* MOVZ Wd|Xd, #imm16, lsl #0 */
- u32 word = (sf << 31) | 0x52800000u | (((u32)imm & 0xffff) << 5) | rd;
- emit32(mc, word);
- } else if (imm < 0 && imm >= -0x10000) {
- /* MOVN Wd|Xd, #(~imm)16 */
- u32 inv = (u32)(~(u64)imm) & 0xffff;
- u32 word = (sf << 31) | 0x12800000u | (inv << 5) | rd;
+ /* Effective bit-width: 32 unless we're materializing into Xd. The 32-bit
+ * encoding zero-extends the result, so we mask the value to 32 bits when
+ * sf == 0 so a "negative" int constant materializes its low 32 bits
+ * exactly (caller's responsibility to keep that in range). */
+ const u32 nslots = sf ? 4u : 2u;
+ u64 v = sf ? (u64)imm : ((u64)imm & 0xffffffffu);
+
+ /* Single MOVZ — fits in one 16-bit slot. */
+ for (u32 i = 0; i < nslots; ++i) {
+ u64 slot = (v >> (i * 16)) & 0xffffu;
+ u64 cleared = v & ~((u64)0xffffu << (i * 16));
+ if (slot != 0 && cleared == 0) {
+ u32 word = (sf << 31) | 0x52800000u
+ | ((u32)i << 21)
+ | (((u32)slot & 0xffff) << 5) | rd;
+ emit32(mc, word);
+ return;
+ }
+ if (cleared == ~(u64)0 || (sf == 0 && cleared == 0xffffffff00000000ull
+ /* unreachable for sf==0 */)) {
+ /* MOVN: ~imm fits in this slot, all other slots are 1s. */
+ u64 inv_slot = (~v >> (i * 16)) & 0xffffu;
+ u32 word = (sf << 31) | 0x12800000u
+ | ((u32)i << 21)
+ | (((u32)inv_slot & 0xffff) << 5) | rd;
+ emit32(mc, word);
+ return;
+ }
+ }
+
+ /* Special-case for sf==0: any 32-bit value can be expressed as MOVN of
+ * the inverted slot when one slot is all-ones in the inverse. We already
+ * handled cleared==0; the sf==0 MOVN case above didn't trigger (its
+ * "all other slots are 1s" check uses 64-bit ones). Try MOVN explicitly. */
+ if (sf == 0) {
+ u64 nv = (~v) & 0xffffffffu;
+ for (u32 i = 0; i < 2; ++i) {
+ u64 slot = (nv >> (i * 16)) & 0xffffu;
+ u64 cleared = nv & ~((u64)0xffffu << (i * 16));
+ if (cleared == 0) {
+ u32 word = 0x12800000u | ((u32)i << 21)
+ | (((u32)slot & 0xffff) << 5) | rd;
+ emit32(mc, word);
+ return;
+ }
+ }
+ }
+
+ /* General path: MOVZ for the lowest non-zero slot, then MOVK for any
+ * other non-zero slot. For sf==0 with imm having both halves nonzero
+ * (e.g. 0xABCD0001), this is MOVZ + MOVK. For sf==1, up to 4 insns. */
+ int placed = 0;
+ for (u32 i = 0; i < nslots; ++i) {
+ u64 slot = (v >> (i * 16)) & 0xffffu;
+ if (!placed) {
+ /* First MOVZ (always emit even if slot==0 when v==0; that path
+ * was caught by the single-MOVZ branch above, so here v!=0 and
+ * we skip zero slots until the first nonzero one). */
+ if (slot == 0) continue;
+ u32 word = (sf << 31) | 0x52800000u
+ | ((u32)i << 21)
+ | (((u32)slot & 0xffff) << 5) | rd;
+ emit32(mc, word);
+ placed = 1;
+ } else if (slot != 0) {
+ /* MOVK Wd|Xd, #imm16, lsl #(16*i) */
+ u32 word = (sf << 31) | 0x72800000u
+ | ((u32)i << 21)
+ | (((u32)slot & 0xffff) << 5) | rd;
+ emit32(mc, word);
+ }
+ }
+ if (!placed) {
+ /* v == 0 path, already handled above; defensive emit MOVZ #0. */
+ u32 word = (sf << 31) | 0x52800000u | rd;
emit32(mc, word);
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64 load_imm: imm %lld out of 16-bit range",
- (long long)imm);
}
}
diff --git a/src/core/core.c b/src/core/core.c
@@ -9,15 +9,15 @@
* cleanups, and longjmp's c->panic. Top-level entry points install a
* setjmp boundary and use compiler_panic_save/restore to nest.
*
- * abi is left NULL until a TargetABI implementation is wired in
- * (`src/abi/abi.c` is not required by the obj/elf path). Callers that
- * need ABI facts will trip a clean panic rather than a NULL deref. */
+ * abi is brought up here via abi_init using c->target; sizes, alignments,
+ * record layouts, and call classification go through it. */
#include "core/core.h"
#include "core/arena.h"
#include "core/heap.h"
#include "core/pool.h"
#include "core/diag.h"
+#include "abi/abi.h"
#include <cfree.h>
@@ -53,7 +53,9 @@ void compiler_init(Compiler* c, Target target, const CfreeEnv* env)
arena_init(c->scratch, h, 0);
c->sources = source_new(c);
- c->abi = NULL;
+
+ c->abi = abi_new(c);
+
c->cleanup = NULL;
}
@@ -66,6 +68,7 @@ void compiler_fini(Compiler* c)
* Run the stack defensively so memory still gets released. */
compiler_run_cleanups(c);
+ if (c->abi) { abi_free(c->abi); c->abi = NULL; }
if (c->sources) source_free(c->sources);
if (c->scratch) { arena_fini(c->scratch); h->free(h, c->scratch, sizeof(Arena)); }
if (c->tu) { arena_fini(c->tu); h->free(h, c->tu, sizeof(Arena)); }
diff --git a/src/core/pool.c b/src/core/pool.c
@@ -83,6 +83,7 @@ void pool_init(Pool* p, Heap* h)
p->entries = NULL;
p->nentries = 0;
p->entries_cap = 0;
+ p->type_cache = NULL;
table_rehash(p, POOL_INITIAL_TABLE_CAP);
/* Reserve entry 0 as the "none" sentinel. */
if (entries_grow(p) == 0) {
diff --git a/src/core/pool.h b/src/core/pool.h
@@ -26,6 +26,10 @@ struct Pool {
PoolEntry* entries;
u32 nentries;
u32 entries_cap;
+
+ /* Lazily-initialized type interning cache (defined in src/type/type.c).
+ * Opaque to other consumers; type.c casts as needed. */
+ void* type_cache;
};
void pool_init(Pool*, Heap*);
diff --git a/src/type/type.c b/src/type/type.c
@@ -0,0 +1,318 @@
+/* C type construction.
+ *
+ * Types are interned per-Pool: a single `type_void(pool)` returns the same
+ * Type* on every call against the same pool, and structurally-equal calls
+ * to type_prim/type_ptr/type_func collapse to the same Type*. The cache is
+ * a small open structure stored through Pool.type_cache (opaque to other
+ * consumers).
+ *
+ * Storage: every Type and every supporting array (TY_FUNC param vectors,
+ * TY_STRUCT field arrays) is allocated from the Pool's arena, so pointers
+ * are stable for the Pool's lifetime.
+ *
+ * v1 covers what the cg test harness drives:
+ * void / scalars / pointer / function / struct / union
+ * Other constructors (array, qualified, enum) and predicates have minimal
+ * implementations sufficient for the cg test surface; they will grow with
+ * the parser. */
+
+#include "type/type.h"
+#include "core/pool.h"
+#include "core/arena.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#define NUM_PRIM_KINDS ((unsigned)TY_LDOUBLE + 1u)
+
+typedef struct TypeListNode TypeListNode;
+struct TypeListNode {
+ TypeListNode* next;
+ Type ty;
+};
+
+typedef struct PoolTypeCache {
+ /* Direct slots for void + primitive kinds (TY_VOID..TY_LDOUBLE). */
+ const Type* prim[NUM_PRIM_KINDS];
+ /* Linked list of every other type allocated through this pool. */
+ TypeListNode* derived;
+ /* Tag id allocator (1-based; TAG_NONE = 0). */
+ u32 next_tag;
+} PoolTypeCache;
+
+static PoolTypeCache* cache_get(Pool* p)
+{
+ PoolTypeCache* c = (PoolTypeCache*)p->type_cache;
+ if (c) return c;
+ c = arena_new(&p->arena, PoolTypeCache);
+ if (!c) return NULL;
+ memset(c, 0, sizeof *c);
+ c->next_tag = 1;
+ p->type_cache = c;
+ return c;
+}
+
+static Type* alloc_type_node(Pool* p, PoolTypeCache* c)
+{
+ TypeListNode* n = arena_new(&p->arena, TypeListNode);
+ if (!n) return NULL;
+ memset(n, 0, sizeof *n);
+ n->next = c->derived;
+ c->derived = n;
+ return &n->ty;
+}
+
+const Type* type_void(Pool* p)
+{
+ return type_prim(p, TY_VOID);
+}
+
+const Type* type_prim(Pool* p, TypeKind kind)
+{
+ PoolTypeCache* c = cache_get(p);
+ if (!c) return NULL;
+ if ((unsigned)kind >= NUM_PRIM_KINDS) return NULL;
+ if (c->prim[kind]) return c->prim[kind];
+ Type* t = alloc_type_node(p, c);
+ if (!t) return NULL;
+ t->kind = (u16)kind;
+ t->qual = 0;
+ c->prim[kind] = t;
+ return t;
+}
+
+const Type* type_ptr(Pool* p, const Type* pointee)
+{
+ PoolTypeCache* c = cache_get(p);
+ if (!c) return NULL;
+ /* Linear search; small N in practice. */
+ for (TypeListNode* n = c->derived; n; n = n->next) {
+ if (n->ty.kind == TY_PTR && n->ty.qual == 0 &&
+ n->ty.ptr.pointee == pointee) {
+ return &n->ty;
+ }
+ }
+ Type* t = alloc_type_node(p, c);
+ if (!t) return NULL;
+ t->kind = TY_PTR;
+ t->qual = 0;
+ t->ptr.pointee = pointee;
+ return t;
+}
+
+const Type* type_array(Pool* p, const Type* elem, u32 count, int incomplete)
+{
+ PoolTypeCache* c = cache_get(p);
+ if (!c) return NULL;
+ for (TypeListNode* n = c->derived; n; n = n->next) {
+ if (n->ty.kind == TY_ARRAY && n->ty.qual == 0 &&
+ n->ty.arr.elem == elem &&
+ n->ty.arr.count == count &&
+ n->ty.arr.incomplete == (u8)(incomplete ? 1 : 0)) {
+ return &n->ty;
+ }
+ }
+ Type* t = alloc_type_node(p, c);
+ if (!t) return NULL;
+ t->kind = TY_ARRAY;
+ t->qual = 0;
+ t->arr.elem = elem;
+ t->arr.count = count;
+ t->arr.incomplete = (u8)(incomplete ? 1 : 0);
+ return t;
+}
+
+static int param_arrays_eq(const Type* const* a, const Type* const* b, u16 n)
+{
+ for (u16 i = 0; i < n; ++i) if (a[i] != b[i]) return 0;
+ return 1;
+}
+
+const Type* type_func(Pool* p, const Type* ret, const Type** params, u16 n,
+ int variadic)
+{
+ PoolTypeCache* c = cache_get(p);
+ if (!c) return NULL;
+ for (TypeListNode* nd = c->derived; nd; nd = nd->next) {
+ if (nd->ty.kind == TY_FUNC && nd->ty.qual == 0 &&
+ nd->ty.fn.ret == ret &&
+ nd->ty.fn.nparams == n &&
+ nd->ty.fn.variadic == (u8)(variadic ? 1 : 0) &&
+ param_arrays_eq(nd->ty.fn.params, params, n)) {
+ return &nd->ty;
+ }
+ }
+ Type* t = alloc_type_node(p, c);
+ if (!t) return NULL;
+ t->kind = TY_FUNC;
+ t->qual = 0;
+ t->fn.ret = ret;
+ t->fn.nparams = n;
+ t->fn.variadic = (u8)(variadic ? 1 : 0);
+ if (n) {
+ const Type** dst = arena_array(&p->arena, const Type*, n);
+ if (!dst) return NULL;
+ for (u16 i = 0; i < n; ++i) dst[i] = params[i];
+ t->fn.params = dst;
+ } else {
+ t->fn.params = NULL;
+ }
+ return t;
+}
+
+const Type* type_qualified(Pool* p, const Type* base, u16 qual)
+{
+ if (!base || qual == 0) return base;
+ PoolTypeCache* c = cache_get(p);
+ if (!c) return NULL;
+ for (TypeListNode* n = c->derived; n; n = n->next) {
+ if (n->ty.kind == base->kind && n->ty.qual == qual) {
+ /* Compare body bytes other than qual. Cheap: types are POD. */
+ Type tmpl = *base;
+ tmpl.qual = qual;
+ if (memcmp(&n->ty, &tmpl, sizeof(Type)) == 0) return &n->ty;
+ }
+ }
+ Type* t = alloc_type_node(p, c);
+ if (!t) return NULL;
+ *t = *base;
+ t->qual = qual;
+ return t;
+}
+
+/* ---- aggregates ---- */
+
+struct TypeRecordBuilder {
+ Pool* pool;
+ TypeKind kind; /* TY_STRUCT or TY_UNION */
+ TagId tag_id;
+ Sym tag;
+ Field* fields;
+ u32 nfields;
+ u32 cap;
+};
+
+TagId type_tag_new(Pool* p, TagDeclKind kind, Sym spelling, SrcLoc loc)
+{
+ PoolTypeCache* c = cache_get(p);
+ if (!c) return TAG_NONE;
+ (void)kind; (void)spelling; (void)loc;
+ return (TagId)(c->next_tag++);
+}
+
+const TagDecl* type_tag_get(Pool* p, TagId id)
+{
+ (void)p; (void)id;
+ /* TagDecl table is parser-territory; not modeled in v1. */
+ return NULL;
+}
+
+TypeRecordBuilder* type_record_begin(Pool* p, TypeKind kind, TagId tag_id, Sym tag)
+{
+ TypeRecordBuilder* b = arena_new(&p->arena, TypeRecordBuilder);
+ if (!b) return NULL;
+ memset(b, 0, sizeof *b);
+ b->pool = p;
+ b->kind = kind;
+ b->tag_id = tag_id;
+ b->tag = tag;
+ return b;
+}
+
+void type_record_field(TypeRecordBuilder* b, Field f)
+{
+ if (b->nfields == b->cap) {
+ u32 nc = b->cap ? b->cap * 2 : 4;
+ Field* nf = arena_array(&b->pool->arena, Field, nc);
+ if (!nf) return;
+ if (b->fields) memcpy(nf, b->fields, sizeof(Field) * b->nfields);
+ b->fields = nf;
+ b->cap = nc;
+ }
+ b->fields[b->nfields++] = f;
+}
+
+const Type* type_record_end(Pool* p, TypeRecordBuilder* b)
+{
+ PoolTypeCache* c = cache_get(p);
+ if (!c) return NULL;
+ Type* t = alloc_type_node(p, c);
+ if (!t) return NULL;
+ t->kind = (u16)b->kind;
+ t->qual = 0;
+ t->rec.tag_id = b->tag_id;
+ t->rec.tag = b->tag;
+ t->rec.fields = b->fields;
+ t->rec.nfields = (u16)b->nfields;
+ t->rec.incomplete= 0;
+ return t;
+}
+
+const Type* type_enum(Pool* p, TagId tag_id, Sym tag, const Type* base)
+{
+ PoolTypeCache* c = cache_get(p);
+ if (!c) return NULL;
+ Type* t = alloc_type_node(p, c);
+ if (!t) return NULL;
+ t->kind = TY_ENUM;
+ t->qual = 0;
+ t->enm.tag_id = tag_id;
+ t->enm.tag = tag;
+ t->enm.base = base;
+ return t;
+}
+
+/* ---- predicates / utilities ---- */
+
+const Type* type_unqual(Pool* p, const Type* t)
+{
+ if (!t || t->qual == 0) return t;
+ return type_qualified(p, t, 0);
+}
+
+const Type* type_promoted(Pool* p, const Type* t)
+{
+ if (!t) return t;
+ switch (t->kind) {
+ case TY_BOOL: case TY_CHAR: case TY_SCHAR: case TY_UCHAR:
+ case TY_SHORT: case TY_USHORT:
+ return type_prim(p, TY_INT);
+ default:
+ return t;
+ }
+}
+
+int type_compatible(const Type* a, const Type* b)
+{
+ if (a == b) return 1;
+ if (!a || !b) return 0;
+ if (a->kind != b->kind) return 0;
+ /* Strict structural compatibility past identity is parser territory; v1
+ * relies on interning for the common cases. */
+ return 0;
+}
+
+int type_is_int(const Type* t)
+{
+ if (!t) return 0;
+ switch (t->kind) {
+ case TY_BOOL: case TY_CHAR: case TY_SCHAR: case TY_UCHAR:
+ case TY_SHORT: case TY_USHORT:
+ case TY_INT: case TY_UINT:
+ case TY_LONG: case TY_ULONG:
+ case TY_LLONG: case TY_ULLONG:
+ case TY_ENUM:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+int type_is_arith(const Type* t)
+{
+ if (!t) return 0;
+ if (type_is_int(t)) return 1;
+ return t->kind == TY_FLOAT || t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE;
+}
+
+int type_is_ptr(const Type* t) { return t && t->kind == TY_PTR; }