kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 7a985cce08172385f81d3fa7e74540ffc3ed037a
parent 81d88fcf49dea5f3f1d389ac360bf7c588c9d3ff
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  5 Jun 2026 16:30:15 -0700

Defer O1 local static data symbols

Diffstat:
Mdoc/plan/TODO.md | 36++++++++++++++++++++----------------
Msrc/cg/data.c | 4++--
Msrc/cg/memory.c | 54++++++++++++++++++++++++++++++++++++++++++++++++------
Msrc/cg/native_direct_target.c | 4++--
Msrc/obj/obj.c | 63+++++++++++++++++++++++++++++++++++++++++++++++++--------------
Msrc/obj/obj.h | 7+++++++
Msrc/opt/pass_native_emit.c | 5+++--
Mtest/opt/run.sh | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
8 files changed, 196 insertions(+), 42 deletions(-)

diff --git a/doc/plan/TODO.md b/doc/plan/TODO.md @@ -34,27 +34,31 @@ undefined `.Lcfblk.N` (visible via `kit nm`). O0 is fine (the block is kept). The plain `goto *p; done:` form is also fine because `done` is a real goto target, so its block is retained. +This is distinct from the fixed `.Lkit_jt.*` / `.Lkit_ro.*` DCE leak class: +those were unreferenced orphan data symbols minted before O1 replay. `.Lcfblk.N` +is a referenced MC/code-label symbol; the object has live text relocations to +it, so the linker is correctly rejecting the undefined symbol. + Minimal repro: ```c -static int ext(void) { return 7; } -int main(void) { - int x; -before:; - x = ext(); -after:; - void* lo = &&before; /* address-taken, never a goto target */ - void* hi = &&after; - return (lo != hi) ? 42 : x; -} +int p(void) { L: return &&L != 0; } +``` + +This emits a live text relocation against an undefined `.Lcfblk.N` at O1 and +`kit run -O1` fails to link. The same construct inside a dead inline is clean: + +```c +static inline void *q(void) { L: return &&L; } +int p(void) { return 0; } ``` -`kit run -O1` on the above fails to link; `kit run -O0` returns 42. Likely in -the O1 CFG/block-merge passes (`src/opt/pass_cfg.c` / block dedup): a block that -is only referenced by a label-address relocation must be pinned (not merged or -dropped), or the relocation retargeted to the surviving block. Found while -writing the backtrace anchor test (doc/plan/BACKTRACE.md); worked around there -by anchoring on a function address instead of `&&label`. +Likely in the O1 CFG/block-merge passes (`src/opt/pass_cfg.c` / block dedup): +the target block of `IR_LOAD_LABEL_ADDR` must be treated as address-taken/live +through CFG cleanup and native emission, or any CFG merge/elision must retarget +the label-address relocation to the surviving block label. Found while writing +the backtrace anchor test (doc/plan/BACKTRACE.md); worked around there by +anchoring on a function address instead of `&&label`. ## `__kit_syscallN` (`rt/include/kit/syscall.h`) is declared/documented but unimplemented diff --git a/src/cg/data.c b/src/cg/data.c @@ -794,8 +794,8 @@ ObjSymId api_emit_label_table(KitCg* g, const Label* labels, u32 n) { strbuf_put_u64(&name_sb, g->rodata_counter++); name = pool_intern_slice(c->global, (Slice){.s = strbuf_cstr(&name_sb), .len = strbuf_len(&name_sb)}); - sym = obj_symbol(g->obj, name, SB_LOCAL, SK_OBJ, OBJ_SEC_NONE, 0, - (u64)n * (u64)c->target.ptr_size); + sym = obj_symbol_defer(g->obj, name, SB_LOCAL, SV_DEFAULT, SK_OBJ, + (u64)n * (u64)c->target.ptr_size); if (sym == OBJ_SYM_NONE) compiler_panic(c, g->cur_loc, "api_emit_label_table: symbol failed"); diff --git a/src/cg/memory.c b/src/cg/memory.c @@ -73,6 +73,39 @@ void kit_cg_push_null(KitCg* g, KitCgTypeId ptr_type) { api_push(g, api_make_sv(api_op_imm(0, ty), ty)); } +static int api_const_data_can_defer(const KitCg* g) { + if (!g || g->opt_level < 1) return 0; + if (g->fn_ret_type == KIT_CG_TYPE_NONE) return 0; + if (g->data_sym != OBJ_SYM_NONE || g->data_sec != OBJ_SEC_NONE) return 0; + if (g->data_local_static_target || g->data_discard || + g->data_tls_collect) { + return 0; + } + return g->target && g->target->local_static_data_begin && + g->target->local_static_data_write && + g->target->local_static_data_end; +} + +static int api_const_data_emit_deferred(KitCg* g, ObjSymId sym, + KitCgTypeId type, const uint8_t* data, + size_t len, uint32_t align) { + CGLocalStaticDataDesc desc; + KitCgDataDefAttrs data_attrs; + if (!api_const_data_can_defer(g)) return 0; + memset(&data_attrs, 0, sizeof data_attrs); + data_attrs.flags = KIT_CG_DATADEF_FUNCTION_LOCAL | KIT_CG_DATADEF_READONLY; + data_attrs.align = align; + memset(&desc, 0, sizeof desc); + desc.sym = sym; + desc.type = type; + desc.attrs = data_attrs; + desc.align = align; + if (!g->target->local_static_data_begin(g->target, &desc)) return 0; + if (len) g->target->local_static_data_write(g->target, data, (u64)len); + g->target->local_static_data_end(g->target); + return 1; +} + KitCgSym kit_cg_const_data(KitCg* g, const uint8_t* data, size_t len, uint32_t align, KitCgTypeId pointee_type) { Compiler* c; @@ -86,30 +119,39 @@ KitCgSym kit_cg_const_data(KitCg* g, const uint8_t* data, size_t len, Sym anon_name; ObjSymId sym; KitCgDecl attrs; + int defer; if (!g) return KIT_CG_SYM_NONE; c = g->c; ob = g->obj; pty = resolve_type(c, pointee_type); if (!pty) return KIT_CG_SYM_NONE; + align = align ? align : (u32)abi_cg_alignof(c->abi, pointee_type); sec_name = pool_intern_slice(c->global, SLICE_LIT(".rodata")); - sec = obj_section(ob, sec_name, SEC_RODATA, SF_ALLOC, - align ? align : (u32)abi_cg_alignof(c->abi, pointee_type)); - base = obj_align_to( - ob, sec, align ? align : (u32)abi_cg_alignof(c->abi, pointee_type)); - obj_write(ob, sec, data, len); strbuf_init(&name_sb, name_buf, sizeof(name_buf)); strbuf_put_slice(&name_sb, SLICE_LIT(".Lkit_ro.")); strbuf_put_u64(&name_sb, g->rodata_counter++); anon_name = pool_intern_slice( c->global, (Slice){.s = strbuf_cstr(&name_sb), .len = strbuf_len(&name_sb)}); - sym = obj_symbol(ob, anon_name, SB_LOCAL, SK_OBJ, sec, base, (u64)len); + defer = api_const_data_can_defer(g); + sym = defer ? obj_symbol_defer(ob, anon_name, SB_LOCAL, SV_DEFAULT, SK_OBJ, + (u64)len) + : obj_symbol(ob, anon_name, SB_LOCAL, SK_OBJ, OBJ_SEC_NONE, 0, + (u64)len); + if (sym == OBJ_SYM_NONE) + compiler_panic(c, g->cur_loc, "kit_cg_const_data: symbol failed"); memset(&attrs, 0, sizeof(attrs)); attrs.kind = KIT_CG_DECL_OBJECT; attrs.sym.bind = KIT_SB_LOCAL; attrs.sym.visibility = KIT_CG_VIS_DEFAULT; attrs.as.object.flags = KIT_CG_OBJ_READONLY; api_remember_sym(g, sym, pty, attrs); + if (defer && api_const_data_emit_deferred(g, sym, pty, data, len, align)) + return (KitCgSym)sym; + sec = obj_section(ob, sec_name, SEC_RODATA, SF_ALLOC, align); + base = obj_align_to(ob, sec, align); + obj_write(ob, sec, data, len); + obj_symbol_define_live(ob, sym, sec, base, (u64)len); return (KitCgSym)sym; } diff --git a/src/cg/native_direct_target.c b/src/cg/native_direct_target.c @@ -1179,8 +1179,8 @@ static void nd_local_static_data_label_addr(CgTarget* t, Label target, static void nd_local_static_data_end(CgTarget* t) { NativeDirectTarget* d = nd_of(t); if (!d->local_static_active) return; - obj_symbol_define(t->obj, d->local_static_sym, d->local_static_sec, - d->local_static_base, d->local_static_size); + obj_symbol_define_live(t->obj, d->local_static_sym, d->local_static_sec, + d->local_static_base, d->local_static_size); d->local_static_active = 0; d->local_static_sec = OBJ_SEC_NONE; d->local_static_sym = OBJ_SYM_NONE; diff --git a/src/obj/obj.c b/src/obj/obj.c @@ -519,15 +519,10 @@ void obj_patch(ObjBuilder* ob, ObjSecId id, u32 ofs, const void* data, if (s) buf_patch(&s->bytes, ofs, data, n); } -ObjSymId obj_symbol(ObjBuilder* ob, Sym name, SymBind bind, SymKind kind, - ObjSecId section_id, u64 value, u64 size) { - return obj_symbol_ex(ob, name, bind, SV_DEFAULT, kind, section_id, value, - size, 0); -} - -ObjSymId obj_symbol_ex(ObjBuilder* ob, Sym name, SymBind bind, SymVis vis, - SymKind kind, ObjSecId section_id, u64 value, u64 size, - u64 common_align) { +static ObjSymId obj_symbol_make(ObjBuilder* ob, Sym name, SymBind bind, + SymVis vis, SymKind kind, + ObjSecId section_id, u64 value, u64 size, + u64 common_align, int index_name) { u32 id; ObjSym* s = Symbols_push(&ob->symbols, &id); if (!s) return OBJ_SYM_NONE; @@ -543,16 +538,40 @@ ObjSymId obj_symbol_ex(ObjBuilder* ob, Sym name, SymBind bind, SymVis vis, /* First-wins: record the lowest id for this name so obj_symbol_find returns * the same symbol the linear scan would. Later same-name symbols (legal for * STB_LOCAL) do not overwrite. */ - if (name && !SymNameIndex_get(&ob->sym_by_name, name)) + if (index_name && name && !SymNameIndex_get(&ob->sym_by_name, name)) (void)SymNameIndex_set(&ob->sym_by_name, name, (ObjSymId)id); return (ObjSymId)id; } +ObjSymId obj_symbol(ObjBuilder* ob, Sym name, SymBind bind, SymKind kind, + ObjSecId section_id, u64 value, u64 size) { + return obj_symbol_ex(ob, name, bind, SV_DEFAULT, kind, section_id, value, + size, 0); +} + +ObjSymId obj_symbol_ex(ObjBuilder* ob, Sym name, SymBind bind, SymVis vis, + SymKind kind, ObjSecId section_id, u64 value, u64 size, + u64 common_align) { + return obj_symbol_make(ob, name, bind, vis, kind, section_id, value, size, + common_align, 1); +} + +ObjSymId obj_symbol_defer(ObjBuilder* ob, Sym name, SymBind bind, SymVis vis, + SymKind kind, u64 size) { + ObjSymId id; + ObjSym* s; + id = obj_symbol_make(ob, name, bind, vis, kind, OBJ_SEC_NONE, 0, size, 0, 0); + if (id == OBJ_SYM_NONE) return OBJ_SYM_NONE; + s = Symbols_at(&ob->symbols, id); + if (s) s->removed = 1; + return id; +} + ObjSymId obj_symbol_find(ObjBuilder* ob, Sym name) { - /* Authoritative O(1) lookup — never a linear scan. Every symbol is created - * through obj_symbol_ex (the only Symbols_push besides the id-0 sentinel), - * which indexes it, and obj_symbol_rename keeps the index exact, so the map - * always holds the first id for a live name. */ + /* Authoritative O(1) lookup — never a linear scan. Normal/live symbols are + * indexed when created or published, and obj_symbol_rename keeps the index + * exact. Deferred symbols deliberately stay out of this map until published. + */ ObjSymId* hit; if (!ob || !name) return OBJ_SYM_NONE; hit = SymNameIndex_get(&ob->sym_by_name, name); @@ -571,6 +590,22 @@ void obj_symbol_define(ObjBuilder* ob, ObjSymId id, ObjSecId section_id, if (s->kind == SK_UNDEF) s->kind = SK_OBJ; } +void obj_symbol_define_live(ObjBuilder* ob, ObjSymId id, ObjSecId section_id, + u64 value, u64 size) { + ObjSym* s; + ObjSymId* slot; + obj_symbol_define(ob, id, section_id, value, size); + if (!ob || id == OBJ_SYM_NONE) return; + s = Symbols_at(&ob->symbols, id); + if (!s) return; + s->removed = 0; + if (s->name) { + slot = SymNameIndex_get(&ob->sym_by_name, s->name); + if (!slot || *slot > id) + (void)SymNameIndex_set(&ob->sym_by_name, s->name, id); + } +} + void obj_symbol_set_flags(ObjBuilder* ob, ObjSymId id, u16 flags) { ObjSym* s; if (id == OBJ_SYM_NONE) return; diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -448,6 +448,11 @@ ObjSymId obj_symbol(ObjBuilder*, Sym name, SymBind, SymKind, ObjSymId obj_symbol_ex(ObjBuilder*, Sym name, SymBind, SymVis, SymKind, ObjSecId section_id, u64 value, u64 size, u64 common_align); +/* Allocate a stable symbol id for data that may be discarded before emission. + * The returned symbol is tombstoned and not entered in the name index; callers + * must publish it with obj_symbol_define_live if the data is actually emitted. */ +ObjSymId obj_symbol_defer(ObjBuilder*, Sym name, SymBind, SymVis, SymKind, + u64 size); ObjSymId obj_symbol_find(ObjBuilder*, Sym name); /* obj_symbol_ex creates a symbol; obj_symbol_define fills in the * (section_id, value, size) fields of an already-created symbol. The pair @@ -455,6 +460,8 @@ ObjSymId obj_symbol_find(ObjBuilder*, Sym name); * needed for a relocation, and defined later when its definition is emitted. */ void obj_symbol_define(ObjBuilder*, ObjSymId, ObjSecId section_id, u64 value, u64 size); +void obj_symbol_define_live(ObjBuilder*, ObjSymId, ObjSecId section_id, + u64 value, u64 size); void obj_reloc(ObjBuilder*, ObjSecId section_id, u32 offset, RelocKind, ObjSymId sym, i64 addend); diff --git a/src/opt/pass_native_emit.c b/src/opt/pass_native_emit.c @@ -120,8 +120,9 @@ static void emit_local_static_label_addr(NativeEmitCtx* e, MCLabel target, static void emit_local_static_end(NativeEmitCtx* e, SrcLoc loc) { if (!e->local_static_active) emit_panic(e, loc, "local static data inactive"); - obj_symbol_define(e->target->obj, e->local_static_sym, e->local_static_sec, - e->local_static_base, e->local_static_size); + obj_symbol_define_live(e->target->obj, e->local_static_sym, + e->local_static_sec, e->local_static_base, + e->local_static_size); e->local_static_active = 0; e->local_static_sec = OBJ_SEC_NONE; e->local_static_sym = OBJ_SYM_NONE; diff --git a/test/opt/run.sh b/test/opt/run.sh @@ -52,4 +52,69 @@ for opt in -O0 -O1; do fi done +cat > "$WORK/dead_jt.c" <<'EOF' +#include <kit/cg.h> +int p(void) { return 0; } +EOF +"$KIT" cc -O1 -I "$ROOT/include" -c "$WORK/dead_jt.c" \ + -o "$WORK/dead_jt.o" > "$WORK/dead_jt.cc.out" 2>&1 +"$KIT" nm "$WORK/dead_jt.o" > "$WORK/dead_jt.nm" 2>&1 +if grep -q '\.Lkit_jt' "$WORK/dead_jt.nm"; then + printf 'O1 deferred jump-table check FAILED: dead inline leaked .Lkit_jt:\n' >&2 + sed 's/^/ | /' "$WORK/dead_jt.nm" >&2 + exit 1 +fi + +cat > "$WORK/live_jt.c" <<'EOF' +int p(int x) { + switch (x) { + case 0: return 3; + case 1: return 4; + case 2: return 5; + case 3: return 6; + case 4: return 7; + case 5: return 8; + case 6: return 9; + case 7: return 10; + default: return 11; + } +} +EOF +"$KIT" cc -O1 -c "$WORK/live_jt.c" \ + -o "$WORK/live_jt.o" > "$WORK/live_jt.cc.out" 2>&1 +"$KIT" nm "$WORK/live_jt.o" > "$WORK/live_jt.nm" 2>&1 +if grep -Eq '[[:space:]][uU][[:space:]]+\.Lkit_jt' "$WORK/live_jt.nm" || + ! grep -Eq '[[:space:]][dDrR][[:space:]]+\.Lkit_jt' "$WORK/live_jt.nm"; then + printf 'O1 deferred jump-table check FAILED: live switch table not defined:\n' >&2 + sed 's/^/ | /' "$WORK/live_jt.nm" >&2 + exit 1 +fi + +cat > "$WORK/dead_ro.c" <<'EOF' +static inline const char *q(void) { return "abc"; } +int p(void) { return 0; } +EOF +"$KIT" cc -O1 -c "$WORK/dead_ro.c" \ + -o "$WORK/dead_ro.o" > "$WORK/dead_ro.cc.out" 2>&1 +"$KIT" nm "$WORK/dead_ro.o" > "$WORK/dead_ro.nm" 2>&1 +if grep -q '\.Lkit_ro' "$WORK/dead_ro.nm"; then + printf 'O1 deferred const-data check FAILED: dead inline leaked .Lkit_ro:\n' >&2 + sed 's/^/ | /' "$WORK/dead_ro.nm" >&2 + exit 1 +fi + +cat > "$WORK/live_ro.c" <<'EOF' +extern int sink(const char *); +int p(void) { return sink("abc"); } +EOF +"$KIT" cc -O1 -c "$WORK/live_ro.c" \ + -o "$WORK/live_ro.o" > "$WORK/live_ro.cc.out" 2>&1 +"$KIT" nm "$WORK/live_ro.o" > "$WORK/live_ro.nm" 2>&1 +if grep -Eq '[[:space:]][uU][[:space:]]+\.Lkit_ro' "$WORK/live_ro.nm" || + ! grep -Eq '[[:space:]][dDrR][[:space:]]+\.Lkit_ro' "$WORK/live_ro.nm"; then + printf 'O1 deferred const-data check FAILED: live const data not defined:\n' >&2 + sed 's/^/ | /' "$WORK/live_ro.nm" >&2 + exit 1 +fi + printf 'tiny-inline: ok\n'