kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 69fac96d4e100e83ff5c3f79b043c4aad3d327fd
parent 7adf047d2fff8c690dbee32572964fb1dd63c8fa
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed,  3 Jun 2026 10:35:48 -0700

debug: unify DWARF form decoders behind a section-parameterized reader

The line-program decoder carried its own copy of the form switch
(read_lp_form in dwarf_line.c) that read inline bytes from .debug_line
instead of .debug_info, duplicating the real decoder in dwarf_open.c and
silently dropping the strx/addr/ref forms. Factor the real decoder body
into dw_read_form_in(d, cu, sec, ...), make dw_read_form a thin wrapper
that passes &d->info, and route the line program through the same core
with &d->line and its owning CU. The section becomes data, not a second
control-flow copy; strx/addr/ref now resolve through the real CU.

Also drop genuinely dead code uncovered alongside: the no-op abbrev_fini
(teardown is abbrev_fini_heap) and the unused DebugStrTab struct.

No behavior change; make lib green, test-dwarf and test-debug pass.

(cherry picked from commit 7640ec0fd283141fc103fa7d4be8311c6b0b2f37)

Diffstat:
Msrc/debug/debug_abbrev.c | 27+++------------------------
Msrc/debug/debug_internal.h | 23++++-------------------
Msrc/debug/dwarf_internal.h | 10+++++++++-
Msrc/debug/dwarf_line.c | 71++---------------------------------------------------------------------
Msrc/debug/dwarf_open.c | 64+++++++++++++++++++++++++++++++++++++---------------------------
5 files changed, 55 insertions(+), 140 deletions(-)

diff --git a/src/debug/debug_abbrev.c b/src/debug/debug_abbrev.c @@ -18,30 +18,9 @@ void abbrev_init(DebugAbbrevPool* p, Heap* h) { p->cap = 0; } -void abbrev_fini(DebugAbbrevPool* p) { - /* DebugAbbrev.attrs are heap-owned. Free them. */ - u32 i; - if (!p->items) return; - /* We need a heap pointer; we stash one in the first attr's address? - * Simpler: callers pass heap on intern; we keep heap here too. */ - (void)i; - /* Attrs are freed in abbrev_intern's parent state when abbrev_pool's - * heap is known. We'll rely on the call-site freeing through their - * heap. Since this fini doesn't have a heap, we leak the attrs unless - * callers explicitly free. To keep things simple, we do free here via - * a known heap stored on the parent debug — but that's wrong. Punt: - * abbrev_fini is called with the same heap that abbrev_init received, - * and in our codebase the only consumer is Debug whose heap is also - * the one we used. Use a side struct... actually, easier: leak; the - * abbrev pool lifetime is the Debug object which is per-TU, and Debug - * already manages all its own allocations. We document leakage of the - * attr arrays here, but since debug_free is the death point and the - * underlying heap is the host's, whose policy may release at compiler - * close anyway, we instead store heap in the pool. */ - /* Left for debug.c to call abbrev_fini_with_heap. */ -} - -/* Variant that does free attrs given a heap. */ +/* Teardown frees the per-abbrev attr arrays, which requires a heap — so the + * sole entry point is abbrev_fini_heap below. Callers (debug.c) hold the + * heap that abbrev_init received and pass it back here. */ static void abbrev_free_attrs(DebugAbbrevPool* p, Heap* h) { u32 i; for (i = 0; i < p->n; ++i) { diff --git a/src/debug/debug_internal.h b/src/debug/debug_internal.h @@ -159,29 +159,14 @@ typedef struct DebugFile { Sym base; /* interned remapped basename */ } DebugFile; -/* String table for .debug_str / .debug_line_str. - * Maps Sym → offset in section. We just key off Sym; the string content - * is whatever pool_str gives us. - * - * Both .debug_str and .debug_line_str use the same shape (separate - * instances). */ +/* Shared Sym/u32 hashmaps. SymToU32 backs the .debug_str / .debug_line_str + * string tables (Sym → byte offset; see StrTab in debug_emit.c). */ #include "core/hashmap.h" HASHMAP_DEFINE(SymToU32, Sym, u32, hash_u32); HASHMAP_DEFINE(U32ToU32, u32, u32, hash_u32); HASHMAP_DEFINE(PtrToU32, u64, u32, hash_u64); -typedef struct DebugStrTab { - Buf buf; /* raw bytes */ - SymToU32 by_sym; /* Sym → offset */ - /* Index ordering for .debug_str_offsets — only used by .debug_str. */ - u32* sym_seq; - u32 sym_seq_n; - u32 sym_seq_cap; - /* For non-Sym strings (e.g. composed paths), we use append_raw and the - * caller stores the returned offset themselves. */ -} DebugStrTab; - /* Loclist entry (Phase 5 placeholder; we register the storage but do not * yet emit .debug_loclists). */ typedef struct DebugLocListEntry { @@ -268,9 +253,9 @@ void form_sleb(Buf*, i64); size_t form_uleb_size(u64); size_t form_sleb_size(i64); -/* Abbrev pool ops (debug_abbrev.c) */ +/* Abbrev pool ops (debug_abbrev.c). Teardown is abbrev_fini_heap (declared + * in debug_abbrev.c) — it needs the heap to free the per-abbrev attr arrays. */ void abbrev_init(DebugAbbrevPool*, Heap*); -void abbrev_fini(DebugAbbrevPool*); /* Find or insert; attrs are copied. Returns 1-based code. */ u32 abbrev_intern(DebugAbbrevPool*, Heap*, u16 tag, u8 has_children, const DebugAbbrevAttr* attrs, u32 nattrs); diff --git a/src/debug/dwarf_internal.h b/src/debug/dwarf_internal.h @@ -314,9 +314,17 @@ typedef struct DwAttrValue { u32 block_len; } DwAttrValue; -/* Read attr value at *off using `form`. Updates *off. */ +/* Read attr value at *off using `form`. Updates *off. + * + * Reads inline form bytes from .debug_info (the DIE stream). The + * section-parameterized core dw_read_form_in lets other consumers (the + * line-number program in dwarf_line.c) decode the same forms out of a + * different section while resolving strp/line_strp/strx into the shared + * string sections exactly as the DIE reader does. */ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const, u32* off, DwAttrValue* out); +void dw_read_form_in(KitDebugInfo* d, const DwCu* cu, const DwSection* sec, + u32 form, i64 implicit_const, u32* off, DwAttrValue* out); /* DIE iteration helpers. */ typedef struct DwDie { diff --git a/src/debug/dwarf_line.c b/src/debug/dwarf_line.c @@ -101,73 +101,6 @@ static u32 read_format(const u8* base, u32 size, u32* off, EntryFmt* fmt, return n; } -/* Wrapper around dw_read_form that reads from .debug_line bytes via a - * pseudo-CU configured with the line-program address size. */ -static void read_lp_form(KitDebugInfo* d, u32 form, u8 addr_size, u32* off, - DwAttrValue* out) { - /* This reads from .debug_line, not .debug_info — we duplicate the - * minimal subset we need (line_strp, strp, udata, data1/2/4/8, string). */ - out->form = form; - out->u = 0; - out->str = ""; - out->block = NULL; - out->block_len = 0; - switch (form) { - case DW_FORM_string: - out->str = dw_cstr(d->line.data, d->line.size, off); - break; - case DW_FORM_strp: - out->u = dw_u32(d->line.data, d->line.size, off); - out->str = dw_str(d, (u32)out->u); - break; - case DW_FORM_line_strp: - out->u = dw_u32(d->line.data, d->line.size, off); - out->str = dw_line_str(d, (u32)out->u); - break; - case DW_FORM_data1: - out->u = dw_u8(d->line.data, d->line.size, off); - break; - case DW_FORM_data2: - out->u = dw_u16(d->line.data, d->line.size, off); - break; - case DW_FORM_data4: - out->u = dw_u32(d->line.data, d->line.size, off); - break; - case DW_FORM_data8: - out->u = dw_u64(d->line.data, d->line.size, off); - break; - case DW_FORM_udata: - out->u = dw_uleb(d->line.data, d->line.size, off); - break; - case DW_FORM_sdata: - (void)dw_sleb(d->line.data, d->line.size, off); - break; - case DW_FORM_data16: - *off += 16; - break; - case DW_FORM_block: - case DW_FORM_exprloc: { - u32 n = (u32)dw_uleb(d->line.data, d->line.size, off); - out->block = d->line.data + *off; - out->block_len = n; - *off += n; - } break; - case DW_FORM_block1: { - u32 n = dw_u8(d->line.data, d->line.size, off); - out->block = d->line.data + *off; - out->block_len = n; - *off += n; - } break; - case DW_FORM_flag: - out->u = dw_u8(d->line.data, d->line.size, off); - break; - default: - /* Unknown form — heuristic: skip 0 bytes. Caller may read garbage. */ - (void)addr_size; - break; - } -} - /* Build a fully-qualified path for file_index in lp. */ static const char* build_file_norm(KitDebugInfo* d, DwLineProgram* lp, u32 idx) { @@ -273,7 +206,7 @@ void dw_build_line(KitDebugInfo* d, u32 cu_idx) { DwAttrValue v; const char* path = ""; for (j = 0; j < ndir_fmt; ++j) { - read_lp_form(d, dir_fmt[j].form, h.address_size, &off, &v); + dw_read_form_in(d, cu, &d->line, dir_fmt[j].form, 0, &off, &v); if (dir_fmt[j].content_type == DW_LNCT_path) { path = v.str ? v.str : ""; } @@ -298,7 +231,7 @@ void dw_build_line(KitDebugInfo* d, u32 cu_idx) { const char* path = ""; u32 dir_index = 0; for (j = 0; j < nfile_fmt; ++j) { - read_lp_form(d, file_fmt[j].form, h.address_size, &off, &v); + dw_read_form_in(d, cu, &d->line, file_fmt[j].form, 0, &off, &v); if (file_fmt[j].content_type == DW_LNCT_path) path = v.str ? v.str : ""; else if (file_fmt[j].content_type == DW_LNCT_directory_index) diff --git a/src/debug/dwarf_open.c b/src/debug/dwarf_open.c @@ -460,8 +460,13 @@ DwCu* dw_cu_at_die_offset(KitDebugInfo* d, u32 die_offset) { /* ---- form decoding ---------------------------------------------------- */ -void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const, - u32* off, DwAttrValue* out) { +/* Section-parameterized form decoder. Inline form bytes are pulled from + * `sec` (.debug_info for DIE attributes, .debug_line for line-program + * file/dir entry-format values); strp/line_strp/strx still resolve into the + * shared string sections via the CU. This is the single source of truth — + * dw_read_form wires it to .debug_info; the line decoder passes &d->line. */ +void dw_read_form_in(KitDebugInfo* d, const DwCu* cu, const DwSection* sec, + u32 form, i64 implicit_const, u32* off, DwAttrValue* out) { out->form = form; out->u = 0; out->s = 0; @@ -471,16 +476,16 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const, switch (form) { case DW_FORM_addr: if (cu->address_size == 8) - out->u = dw_u64(d->info.data, d->info.size, off); + out->u = dw_u64(sec->data, sec->size, off); else - out->u = dw_u32(d->info.data, d->info.size, off); + out->u = dw_u32(sec->data, sec->size, off); break; case DW_FORM_data1: case DW_FORM_ref1: case DW_FORM_flag: case DW_FORM_strx1: case DW_FORM_addrx1: - out->u = dw_u8(d->info.data, d->info.size, off); + out->u = dw_u8(sec->data, sec->size, off); out->s = (i64)(i8)out->u; if (form == DW_FORM_strx1) out->str = dw_strx(d, cu, out->u); break; @@ -488,20 +493,20 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const, case DW_FORM_ref2: case DW_FORM_strx2: case DW_FORM_addrx2: - out->u = dw_u16(d->info.data, d->info.size, off); + out->u = dw_u16(sec->data, sec->size, off); out->s = (i64)(i16)out->u; if (form == DW_FORM_strx2) out->str = dw_strx(d, cu, out->u); break; case DW_FORM_strx3: case DW_FORM_addrx3: - out->u = dw_u24(d->info.data, d->info.size, off); + out->u = dw_u24(sec->data, sec->size, off); if (form == DW_FORM_strx3) out->str = dw_strx(d, cu, out->u); break; case DW_FORM_data4: case DW_FORM_ref4: case DW_FORM_strx4: case DW_FORM_addrx4: - out->u = dw_u32(d->info.data, d->info.size, off); + out->u = dw_u32(sec->data, sec->size, off); out->s = (i64)(i32)out->u; if (form == DW_FORM_strx4) out->str = dw_strx(d, cu, out->u); break; @@ -509,7 +514,7 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const, case DW_FORM_ref8: case DW_FORM_ref_sig8: case DW_FORM_ref_sup8: - out->u = dw_u64(d->info.data, d->info.size, off); + out->u = dw_u64(sec->data, sec->size, off); out->s = (i64)out->u; break; case DW_FORM_data16: @@ -517,7 +522,7 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const, *off += 16; break; case DW_FORM_sdata: - out->s = dw_sleb(d->info.data, d->info.size, off); + out->s = dw_sleb(sec->data, sec->size, off); out->u = (u64)out->s; break; case DW_FORM_udata: @@ -526,30 +531,30 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const, case DW_FORM_addrx: case DW_FORM_loclistx: case DW_FORM_rnglistx: - out->u = dw_uleb(d->info.data, d->info.size, off); + out->u = dw_uleb(sec->data, sec->size, off); if (form == DW_FORM_strx) out->str = dw_strx(d, cu, out->u); break; case DW_FORM_string: - out->str = dw_cstr(d->info.data, d->info.size, off); + out->str = dw_cstr(sec->data, sec->size, off); break; case DW_FORM_strp: - out->u = dw_u32(d->info.data, d->info.size, off); + out->u = dw_u32(sec->data, sec->size, off); out->str = dw_str(d, (u32)out->u); break; case DW_FORM_line_strp: - out->u = dw_u32(d->info.data, d->info.size, off); + out->u = dw_u32(sec->data, sec->size, off); out->str = dw_line_str(d, (u32)out->u); break; case DW_FORM_strp_sup: case DW_FORM_ref_sup4: - out->u = dw_u32(d->info.data, d->info.size, off); + out->u = dw_u32(sec->data, sec->size, off); break; case DW_FORM_sec_offset: - out->u = dw_u32(d->info.data, d->info.size, off); + out->u = dw_u32(sec->data, sec->size, off); break; case DW_FORM_ref_addr: /* DWARF 5: 4 bytes for 32-bit DWARF (we don't support DWARF64). */ - out->u = dw_u32(d->info.data, d->info.size, off); + out->u = dw_u32(sec->data, sec->size, off); break; case DW_FORM_flag_present: out->u = 1; @@ -559,37 +564,37 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const, out->u = (u64)implicit_const; break; case DW_FORM_block1: { - u32 n = dw_u8(d->info.data, d->info.size, off); - out->block = d->info.data + *off; + u32 n = dw_u8(sec->data, sec->size, off); + out->block = sec->data + *off; out->block_len = n; out->u = n; *off += n; } break; case DW_FORM_block2: { - u32 n = dw_u16(d->info.data, d->info.size, off); - out->block = d->info.data + *off; + u32 n = dw_u16(sec->data, sec->size, off); + out->block = sec->data + *off; out->block_len = n; out->u = n; *off += n; } break; case DW_FORM_block4: { - u32 n = dw_u32(d->info.data, d->info.size, off); - out->block = d->info.data + *off; + u32 n = dw_u32(sec->data, sec->size, off); + out->block = sec->data + *off; out->block_len = n; out->u = n; *off += n; } break; case DW_FORM_block: case DW_FORM_exprloc: { - u32 n = (u32)dw_uleb(d->info.data, d->info.size, off); - out->block = d->info.data + *off; + u32 n = (u32)dw_uleb(sec->data, sec->size, off); + out->block = sec->data + *off; out->block_len = n; out->u = n; *off += n; } break; case DW_FORM_indirect: { - u32 ifrm = (u32)dw_uleb(d->info.data, d->info.size, off); - dw_read_form(d, cu, ifrm, 0, off, out); + u32 ifrm = (u32)dw_uleb(sec->data, sec->size, off); + dw_read_form_in(d, cu, sec, ifrm, 0, off, out); } break; default: /* Unknown form — best effort: skip nothing. */ @@ -597,6 +602,11 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const, } } +void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const, + u32* off, DwAttrValue* out) { + dw_read_form_in(d, cu, &d->info, form, implicit_const, off, out); +} + void dw_skip_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const, u32* off) { DwAttrValue tmp;