kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 07644c78ebe68b366f0516109ee887de254b9c6e
parent f4e8879166746eed53379bf53e7012989dda8736
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 20 May 2026 19:25:09 -0700

cg: support C local static label tables

Diffstat:
Minclude/cfree/cg.h | 3+++
Mlang/toy/parser.c | 2++
Msrc/arch/arch.h | 19+++++++++++++++++++
Msrc/arch/c_target/emit.c | 233+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/c_target/internal.h | 28++++++++++++++++++++++++++++
Msrc/arch/c_target/target.c | 8++++++++
Msrc/cg/data.c | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/cg/internal.h | 2++
Dtest/toy/cases/119_static_labeladdr_data.cbackend.skip | 1-
Dtest/toy/cases/123_spec_demo.cbackend.skip | 1-
10 files changed, 380 insertions(+), 2 deletions(-)

diff --git a/include/cfree/cg.h b/include/cfree/cg.h @@ -919,6 +919,9 @@ typedef enum CfreeCgDataDefFlag { CFREE_CG_DATADEF_STRINGS = 1u << 2, CFREE_CG_DATADEF_READONLY = 1u << 3, CFREE_CG_DATADEF_ZERO_FILL = 1u << 4, + /* Static storage with function/block scope for source backends. Native + * object backends may still emit ordinary private data. */ + CFREE_CG_DATADEF_FUNCTION_LOCAL = 1u << 5, } CfreeCgDataDefFlag; typedef struct CfreeCgDataDefAttrs { diff --git a/lang/toy/parser.c b/lang/toy/parser.c @@ -743,6 +743,8 @@ static int toy_parse_let_stmt(ToyParser* p) { if (!toy_add_static_local_typed(p, name, ty, toy_ty, sym, is_var)) return 0; memset(&data_attrs, 0, sizeof data_attrs); + data_attrs.flags |= CFREE_CG_DATADEF_FUNCTION_LOCAL; + if (!is_var) data_attrs.flags |= CFREE_CG_DATADEF_READONLY; cfree_cg_data_begin(p->cg, sym, data_attrs); if (array_data_init) { CfreeCgTypeId elem_ty = cfree_cg_type_array_elem(p->c, ty); diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -632,6 +632,13 @@ typedef struct CGSwitchDesc { u8 pad[3]; } CGSwitchDesc; +typedef struct CGLocalStaticDataDesc { + ObjSymId sym; + CfreeCgTypeId type; + CfreeCgDataDefAttrs attrs; + u32 align; +} CGLocalStaticDataDesc; + typedef struct CGTarget CGTarget; struct CGTarget { /* Typed IR lowering context. Subclasses extend. */ @@ -788,6 +795,18 @@ struct CGTarget { * by indirect_branch inside the defining function's activation. */ void (*load_label_addr)(CGTarget*, Operand dst_reg, Label label); + /* Optional source-backend hook for function-local static data definitions + * that need function label scope, currently used for C `&&label` + * dispatch-table initializers. Returning non-zero from begin means the + * target consumes bytes/zeros/label addresses until end; ordinary object + * data emission is skipped for that definition. */ + int (*local_static_data_begin)(CGTarget*, const CGLocalStaticDataDesc*); + /* data == NULL means append len zero bytes. */ + void (*local_static_data_write)(CGTarget*, const u8* data, u64 len); + void (*local_static_data_label_addr)(CGTarget*, Label target, i64 addend, + u32 width, u32 address_space); + void (*local_static_data_end)(CGTarget*); + /* ---- structured control flow ---- * Mirrors CG's scope ops. CG passes explicit break/continue targets so C * `for` continues can land on the increment expression rather than the loop diff --git a/src/arch/c_target/emit.c b/src/arch/c_target/emit.c @@ -1637,6 +1637,223 @@ void c_indirect_branch(CGTarget* T, Operand addr, const Label* valid_targets, t->last_was_terminator = 1; } +/* ===== function-local static label-address data ===== */ + +static int c_is_local_static_sym(CTarget* t, ObjSymId sym) { + for (u32 i = 0; i < t->local_static_nsyms; ++i) { + if (t->local_static_syms[i] == sym) return 1; + } + return 0; +} + +static void c_mark_local_static_sym(CTarget* t, ObjSymId sym) { + Heap* h = t->c->ctx->heap; + if (sym == OBJ_SYM_NONE || c_is_local_static_sym(t, sym)) return; + if (t->local_static_nsyms + 1u > t->local_static_syms_cap) { + u32 oldcap = t->local_static_syms_cap; + u32 newcap = oldcap ? oldcap * 2u : 16u; + ObjSymId* ns = (ObjSymId*)h->realloc( + h, t->local_static_syms, oldcap * sizeof(*t->local_static_syms), + newcap * sizeof(*t->local_static_syms), _Alignof(ObjSymId)); + if (!ns) { + compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, + "C target: out of memory"); + } + t->local_static_syms = ns; + t->local_static_syms_cap = newcap; + } + t->local_static_syms[t->local_static_nsyms++] = sym; +} + +static void c_grow_local_static_entries(CTarget* t, u32 want) { + Heap* h = t->c->ctx->heap; + if (want <= t->local_static_entries_cap) return; + u32 oldcap = t->local_static_entries_cap; + u32 newcap = oldcap ? oldcap * 2u : 8u; + while (newcap < want) newcap *= 2u; + CLocalStaticLabelEntry* ne = (CLocalStaticLabelEntry*)h->realloc( + h, t->local_static_entries, + oldcap * sizeof(*t->local_static_entries), + newcap * sizeof(*t->local_static_entries), + _Alignof(CLocalStaticLabelEntry)); + if (!ne) { + compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, + "C target: out of memory"); + } + t->local_static_entries = ne; + t->local_static_entries_cap = newcap; +} + +int c_local_static_data_begin(CGTarget* T, + const CGLocalStaticDataDesc* desc) { + CTarget* t = (CTarget*)T; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + if (!t->cur_fn) { + compiler_panic(t->c, loc, + "C target: function-local static data outside function"); + } + if (t->local_static_active) { + compiler_panic(t->c, loc, + "C target: nested function-local static data definition"); + } + const CgType* ty = cg_type_get(t->c, api_unalias_type(t->c, desc->type)); + if (!ty) { + compiler_panic(t->c, loc, "C target: unknown local static type %u", + (unsigned)desc->type); + } + + u64 count = 1; + int is_array = 0; + CfreeCgTypeId elem = desc->type; + if (ty->kind == CFREE_CG_TYPE_ARRAY) { + is_array = 1; + count = ty->array.count; + elem = ty->array.elem; + ty = cg_type_get(t->c, api_unalias_type(t->c, elem)); + } + if (!ty || ty->kind != CFREE_CG_TYPE_PTR) { + return 0; + } + if (count > UINT32_MAX) { + compiler_panic(t->c, loc, + "C target: local static pointer table too large"); + } + + c_grow_local_static_entries(t, (u32)count); + for (u32 i = 0; i < (u32)count; ++i) { + t->local_static_entries[i].label = LABEL_NONE; + t->local_static_entries[i].addend = 0; + t->local_static_entries[i].has_label = 0; + } + t->local_static_nentries = (u32)count; + t->local_static_sym = desc->sym; + t->local_static_type = desc->type; + t->local_static_count = count; + t->local_static_offset = 0; + t->local_static_ptr_width = (u32)cg_type_size(t->c, elem); + t->local_static_align = desc->align ? desc->align : cg_type_align(t->c, + desc->type); + t->local_static_active = 1; + t->local_static_is_array = (u8)is_array; + t->local_static_readonly = + (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) ? 1u : 0u; + c_mark_local_static_sym(t, desc->sym); + return 1; +} + +void c_local_static_data_write(CGTarget* T, const u8* data, u64 len) { + CTarget* t = (CTarget*)T; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + if (!t->local_static_active || !len) return; + if (data) { + for (u64 i = 0; i < len; ++i) { + if (data[i] != 0) { + compiler_panic(t->c, loc, + "C target: function-local static label table supports " + "only zero bytes and label addresses"); + } + } + } + t->local_static_offset += len; +} + +void c_local_static_data_label_addr(CGTarget* T, Label target, i64 addend, + u32 width, u32 address_space) { + CTarget* t = (CTarget*)T; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + (void)address_space; + if (!t->local_static_active) { + compiler_panic(t->c, loc, + "C target: label address outside local static data"); + } + if (width != t->local_static_ptr_width) { + compiler_panic(t->c, loc, + "C target: label address width %u does not match pointer " + "width %u", + (unsigned)width, (unsigned)t->local_static_ptr_width); + } + if ((t->local_static_offset % t->local_static_ptr_width) != 0) { + compiler_panic(t->c, loc, + "C target: unaligned label address in local static data"); + } + u64 idx = t->local_static_offset / t->local_static_ptr_width; + if (idx >= t->local_static_count) { + compiler_panic(t->c, loc, + "C target: too many local static label table entries"); + } + CLocalStaticLabelEntry* e = &t->local_static_entries[(u32)idx]; + if (e->has_label) { + compiler_panic(t->c, loc, + "C target: duplicate local static label table entry"); + } + e->label = target; + e->addend = addend; + e->has_label = 1; + t->local_static_offset += width; +} + +static void c_emit_local_static_label_expr(CTarget* t, + const CLocalStaticLabelEntry* e) { + char lbuf[24]; + if (!e->has_label) { + cbuf_puts(&t->decls, "(void*)0"); + return; + } + if (e->addend == 0) { + cbuf_puts(&t->decls, "&&"); + c_label_name(e->label, lbuf, sizeof lbuf); + cbuf_puts(&t->decls, lbuf); + return; + } + cbuf_puts(&t->decls, "(void*)((char*)&&"); + c_label_name(e->label, lbuf, sizeof lbuf); + cbuf_puts(&t->decls, lbuf); + cbuf_puts(&t->decls, " + "); + cbuf_put_i64(&t->decls, e->addend); + cbuf_puts(&t->decls, ")"); +} + +void c_local_static_data_end(CGTarget* T) { + CTarget* t = (CTarget*)T; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + if (!t->local_static_active) return; + u64 total_size = t->local_static_count * t->local_static_ptr_width; + if (t->local_static_offset > total_size) { + compiler_panic(t->c, loc, + "C target: local static initializer exceeds object size"); + } + const char* nm = c_sym_name(t, t->local_static_sym); + cbuf_puts(&t->decls, " static __attribute__((unused)) "); + cbuf_puts(&t->decls, "_Alignas("); + cbuf_put_u64(&t->decls, t->local_static_align ? t->local_static_align : 1); + cbuf_puts(&t->decls, ") void* "); + if (t->local_static_readonly) cbuf_puts(&t->decls, "const "); + cbuf_puts(&t->decls, nm); + if (t->local_static_is_array) { + cbuf_puts(&t->decls, "["); + cbuf_put_u64(&t->decls, t->local_static_count); + cbuf_puts(&t->decls, "]"); + } + cbuf_puts(&t->decls, " = {"); + for (u32 i = 0; i < t->local_static_nentries; ++i) { + if (i > 0) cbuf_putc(&t->decls, ','); + if ((i & 3u) == 0) cbuf_puts(&t->decls, "\n "); + c_emit_local_static_label_expr(t, &t->local_static_entries[i]); + } + cbuf_puts(&t->decls, "\n };\n"); + + t->local_static_active = 0; + t->local_static_sym = OBJ_SYM_NONE; + t->local_static_type = CFREE_CG_TYPE_NONE; + t->local_static_count = 0; + t->local_static_offset = 0; + t->local_static_ptr_width = 0; + t->local_static_align = 0; + t->local_static_nentries = 0; + t->local_static_is_array = 0; + t->local_static_readonly = 0; +} + /* ===== local, local_addr ===== */ CGLocalStorage c_local(CGTarget* T, const CGLocalDesc* d) { @@ -3040,6 +3257,7 @@ static void c_emit_sym_relocs_fixup(CTarget* t, const char* nm, * definition with bytes. Function symbols are skipped — those go through the * forwards path. */ static void c_emit_data_symbol(CTarget* t, ObjSymId id, const ObjSym* os) { + if (c_is_local_static_sym(t, id)) return; if (os->kind == SK_FUNC || os->kind == SK_IFUNC) return; if (os->kind == SK_SECTION || os->kind == SK_FILE) return; /* On Mach-O, obj_tls.c synthesizes `__tlv_bootstrap` as an SK_UNDEF @@ -3235,6 +3453,7 @@ static void c_emit_data_symbol(CTarget* t, ObjSymId id, const ObjSym* os) { * any relocation target later in the symbol order. */ static void c_emit_data_symbol_fixups(CTarget* t, ObjSymId id, const ObjSym* os) { + if (c_is_local_static_sym(t, id)) return; if (os->kind == SK_FUNC || os->kind == SK_IFUNC) return; if (os->kind == SK_SECTION || os->kind == SK_FILE) return; if (os->kind == SK_UNDEF || os->kind == SK_COMMON) return; @@ -3313,6 +3532,14 @@ void c_destroy(CGTarget* T) { if (t->sym_forwarded) h->free(h, t->sym_forwarded, t->sym_forwarded_cap); t->sym_forwarded = NULL; t->sym_forwarded_cap = 0; + if (t->local_static_syms) { + h->free(h, t->local_static_syms, + t->local_static_syms_cap * sizeof(*t->local_static_syms)); + } + if (t->local_static_entries) { + h->free(h, t->local_static_entries, + t->local_static_entries_cap * sizeof(*t->local_static_entries)); + } if (t->reg_declared) h->free(h, t->reg_declared, t->reg_cap); if (t->reg_type) h->free(h, t->reg_type, t->reg_cap * sizeof(CfreeCgTypeId)); @@ -3324,7 +3551,13 @@ void c_destroy(CGTarget* T) { t->reg_type = NULL; t->slot_type = NULL; t->scopes = NULL; + t->local_static_syms = NULL; + t->local_static_entries = NULL; t->reg_cap = 0; t->slot_cap = 0; t->scopes_cap = 0; + t->local_static_syms_cap = 0; + t->local_static_entries_cap = 0; + t->local_static_nsyms = 0; + t->local_static_nentries = 0; } diff --git a/src/arch/c_target/internal.h b/src/arch/c_target/internal.h @@ -32,6 +32,13 @@ void cbuf_putn(CBuf* b, const char* s, size_t n); void cbuf_put_i64(CBuf* b, i64 v); void cbuf_put_u64(CBuf* b, u64 v); +typedef struct CLocalStaticLabelEntry { + Label label; + i64 addend; + u8 has_label; + u8 pad[3]; +} CLocalStaticLabelEntry; + typedef struct CTarget { CGTarget base; @@ -76,6 +83,27 @@ typedef struct CTarget { CBuf decls; CBuf body; + /* Function-local static data consumed from CG's narrow source-backend data + * hook. These symbols are emitted inside the owning function and skipped by + * the TU-wide object-data walker. */ + ObjSymId* local_static_syms; + u32 local_static_nsyms; + u32 local_static_syms_cap; + + CLocalStaticLabelEntry* local_static_entries; + u32 local_static_nentries; + u32 local_static_entries_cap; + ObjSymId local_static_sym; + CfreeCgTypeId local_static_type; + u64 local_static_count; + u64 local_static_offset; + u32 local_static_ptr_width; + u32 local_static_align; + u8 local_static_active; + u8 local_static_is_array; + u8 local_static_readonly; + u8 pad_local_static; + /* Per-function regdecl tracking: for each Reg id seen, mark whether we * have already emitted a declaration into `decls`. Sized by reg_cap. * Grown lazily as new reg ids appear. */ diff --git a/src/arch/c_target/target.c b/src/arch/c_target/target.c @@ -39,6 +39,10 @@ void c_cmp_branch(CGTarget*, CmpOp, Operand, Operand, Label); void c_switch_(CGTarget*, const CGSwitchDesc*); void c_indirect_branch(CGTarget*, Operand, const Label*, u32); void c_load_label_addr(CGTarget*, Operand, Label); +int c_local_static_data_begin(CGTarget*, const CGLocalStaticDataDesc*); +void c_local_static_data_write(CGTarget*, const u8*, u64); +void c_local_static_data_label_addr(CGTarget*, Label, i64, u32, u32); +void c_local_static_data_end(CGTarget*); CGScope c_scope_begin(CGTarget*, const CGScopeDesc*); void c_scope_else(CGTarget*, CGScope); void c_scope_end(CGTarget*, CGScope); @@ -218,6 +222,10 @@ CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w) { t->switch_ = c_switch_; t->indirect_branch = c_indirect_branch; t->load_label_addr = c_load_label_addr; + t->local_static_data_begin = c_local_static_data_begin; + t->local_static_data_write = c_local_static_data_write; + t->local_static_data_label_addr = c_local_static_data_label_addr; + t->local_static_data_end = c_local_static_data_end; t->scope_begin = c_scope_begin; t->scope_else = c_scope_else; t->scope_end = c_scope_end; diff --git a/src/cg/data.c b/src/cg/data.c @@ -78,6 +78,24 @@ void cfree_cg_data_begin(CfreeCg* g, CfreeCgSym cg_sym, align = attrs.align ? attrs.align : (u32)abi_cg_alignof(c->abi, decl_attrs.type); + if ((attrs.flags & CFREE_CG_DATADEF_FUNCTION_LOCAL) && + g->target && g->target->local_static_data_begin) { + CGLocalStaticDataDesc desc; + memset(&desc, 0, sizeof desc); + desc.sym = sym; + desc.type = ty; + desc.attrs = attrs; + desc.align = align; + if (g->target->local_static_data_begin(g->target, &desc)) { + g->data_sec = OBJ_SEC_NONE; + g->data_sym = sym; + g->data_base = 0; + g->data_size = 0; + g->data_local_static_target = 1; + return; + } + } + if ((decl_attrs.as.object.flags & CFREE_CG_OBJ_TLS) && obj_format_tls_via_descriptor(c)) { g->data_sec = OBJ_SEC_NONE; @@ -171,6 +189,13 @@ void cfree_cg_data_common(CfreeCg* g, CfreeCgSym cg_sym, uint64_t size, } void cfree_cg_data_align(CfreeCg* g, uint32_t align) { + if (g && g->data_local_static_target) { + u32 a = align ? align : 1u; + u64 base = (g->data_size + (a - 1u)) & ~(u64)(a - 1u); + u64 pad = base - g->data_size; + if (pad) cfree_cg_data_zero(g, pad); + return; + } if (g && g->data_tls_collect) { u32 a = align ? align : 1u; u64 base = (g->data_size + (a - 1u)) & ~(u64)(a - 1u); @@ -192,6 +217,23 @@ void cfree_cg_data_align(CfreeCg* g, uint32_t align) { void cfree_cg_data_pad(CfreeCg* g, uint64_t size, uint8_t value) { u8 pad[64]; if (!g || !size) return; + if (g->data_local_static_target) { + if (value == 0) { + cfree_cg_data_zero(g, size); + return; + } + memset(pad, value, sizeof pad); + while (size >= sizeof pad) { + g->target->local_static_data_write(g->target, pad, sizeof pad); + size -= sizeof pad; + g->data_size += sizeof pad; + } + if (size) { + g->target->local_static_data_write(g->target, pad, size); + g->data_size += size; + } + return; + } if (g->data_tls_collect) { if (value == 0 && g->data_tls_zero_fill) { g->data_size += size; @@ -280,6 +322,11 @@ void cfree_cg_data_float(CfreeCg* g, double value, CfreeCgTypeId type) { void cfree_cg_data_bytes(CfreeCg* g, const uint8_t* data, size_t len) { if (!g || !len) return; + if (g->data_local_static_target) { + g->target->local_static_data_write(g->target, data, (u64)len); + g->data_size += len; + return; + } if (g->data_tls_collect) { api_data_tls_write(g, data, len); return; @@ -291,6 +338,11 @@ void cfree_cg_data_bytes(CfreeCg* g, const uint8_t* data, size_t len) { void cfree_cg_data_zero(CfreeCg* g, uint64_t size) { const Section* sec; if (!g || !size) return; + if (g->data_local_static_target) { + g->target->local_static_data_write(g->target, NULL, size); + g->data_size += size; + return; + } if (g->data_tls_collect) { if (g->data_tls_zero_fill) { g->data_size += size; @@ -345,6 +397,12 @@ void api_cg_data_reloc(CfreeCg* g, CfreeCgSym target, int64_t addend, void cfree_cg_data_addr(CfreeCg* g, CfreeCgSym target, int64_t addend, uint32_t width, uint32_t address_space) { (void)address_space; + if (g && g->data_local_static_target) { + compiler_panic(g->c, g->cur_loc, + "cfree_cg_data_addr: function-local static data address " + "relocations are not yet supported by this target"); + return; + } api_cg_data_reloc(g, target, addend, width, 0); } @@ -362,6 +420,12 @@ void cfree_cg_data_label_addr(CfreeCg* g, CfreeCgLabel target, int64_t addend, (unsigned)sizeof(pad), (unsigned)width); return; } + if (g->data_local_static_target) { + g->target->local_static_data_label_addr(g->target, (Label)target, addend, + width, address_space); + g->data_size += width; + return; + } if (!g->mc) { /* The C-source target has no MCEmitter and can't emit a relocation * that resolves to an intra-function label address: GCC's `&&L` @@ -417,6 +481,12 @@ void cfree_cg_data_label_addr(CfreeCg* g, CfreeCgLabel target, int64_t addend, void cfree_cg_data_pcrel(CfreeCg* g, CfreeCgSym target, int64_t addend, uint32_t width) { + if (g && g->data_local_static_target) { + compiler_panic(g->c, g->cur_loc, + "cfree_cg_data_pcrel: function-local static pcrel data is " + "not yet supported by this target"); + return; + } api_cg_data_reloc(g, target, addend, width, 1); } @@ -426,6 +496,12 @@ void cfree_cg_data_symdiff(CfreeCg* g, CfreeCgSym lhs, CfreeCgSym rhs, RelocKind add_kind; RelocKind sub_kind; if (!g || width > sizeof(pad)) return; + if (g->data_local_static_target) { + compiler_panic(g->c, g->cur_loc, + "cfree_cg_data_symdiff: function-local static symdiff data " + "is not yet supported by this target"); + return; + } switch (width) { case 1: add_kind = R_RV_ADD8; @@ -465,6 +541,15 @@ void cfree_cg_data_end(CfreeCg* g) { Heap* h; u8* flat; if (!g) return; + if (g->data_local_static_target) { + g->target->local_static_data_end(g->target); + g->data_sec = OBJ_SEC_NONE; + g->data_sym = OBJ_SYM_NONE; + g->data_base = 0; + g->data_size = 0; + g->data_local_static_target = 0; + return; + } if (g->data_tls_collect) { h = (Heap*)g->c->ctx->heap; flat = NULL; diff --git a/src/cg/internal.h b/src/cg/internal.h @@ -160,6 +160,8 @@ struct CfreeCg { ObjSymId data_sym; u32 data_base; u64 data_size; + u8 data_local_static_target; + u8 data_local_static_pad0[3]; u8 data_tls_collect; u8 data_tls_zero_fill; u8 data_tls_pad[2]; diff --git a/test/toy/cases/119_static_labeladdr_data.cbackend.skip b/test/toy/cases/119_static_labeladdr_data.cbackend.skip @@ -1 +0,0 @@ -C target: toy's @[.static] lowers to a file-scope object, but GCC's &&L only works in function-local-static initializers diff --git a/test/toy/cases/123_spec_demo.cbackend.skip b/test/toy/cases/123_spec_demo.cbackend.skip @@ -1 +0,0 @@ -C target: toy's @[.static] lowers to a file-scope object, but GCC's &&L only works in function-local-static initializers