commit 69fac96d4e100e83ff5c3f79b043c4aad3d327fd
parent 7adf047d2fff8c690dbee32572964fb1dd63c8fa
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 3 Jun 2026 10:35:48 -0700
debug: unify DWARF form decoders behind a section-parameterized reader
The line-program decoder carried its own copy of the form switch
(read_lp_form in dwarf_line.c) that read inline bytes from .debug_line
instead of .debug_info, duplicating the real decoder in dwarf_open.c and
silently dropping the strx/addr/ref forms. Factor the real decoder body
into dw_read_form_in(d, cu, sec, ...), make dw_read_form a thin wrapper
that passes &d->info, and route the line program through the same core
with &d->line and its owning CU. The section becomes data, not a second
control-flow copy; strx/addr/ref now resolve through the real CU.
Also drop genuinely dead code uncovered alongside: the no-op abbrev_fini
(teardown is abbrev_fini_heap) and the unused DebugStrTab struct.
No behavior change; make lib green, test-dwarf and test-debug pass.
(cherry picked from commit 7640ec0fd283141fc103fa7d4be8311c6b0b2f37)
Diffstat:
5 files changed, 55 insertions(+), 140 deletions(-)
diff --git a/src/debug/debug_abbrev.c b/src/debug/debug_abbrev.c
@@ -18,30 +18,9 @@ void abbrev_init(DebugAbbrevPool* p, Heap* h) {
p->cap = 0;
}
-void abbrev_fini(DebugAbbrevPool* p) {
- /* DebugAbbrev.attrs are heap-owned. Free them. */
- u32 i;
- if (!p->items) return;
- /* We need a heap pointer; we stash one in the first attr's address?
- * Simpler: callers pass heap on intern; we keep heap here too. */
- (void)i;
- /* Attrs are freed in abbrev_intern's parent state when abbrev_pool's
- * heap is known. We'll rely on the call-site freeing through their
- * heap. Since this fini doesn't have a heap, we leak the attrs unless
- * callers explicitly free. To keep things simple, we do free here via
- * a known heap stored on the parent debug — but that's wrong. Punt:
- * abbrev_fini is called with the same heap that abbrev_init received,
- * and in our codebase the only consumer is Debug whose heap is also
- * the one we used. Use a side struct... actually, easier: leak; the
- * abbrev pool lifetime is the Debug object which is per-TU, and Debug
- * already manages all its own allocations. We document leakage of the
- * attr arrays here, but since debug_free is the death point and the
- * underlying heap is the host's, whose policy may release at compiler
- * close anyway, we instead store heap in the pool. */
- /* Left for debug.c to call abbrev_fini_with_heap. */
-}
-
-/* Variant that does free attrs given a heap. */
+/* Teardown frees the per-abbrev attr arrays, which requires a heap — so the
+ * sole entry point is abbrev_fini_heap below. Callers (debug.c) hold the
+ * heap that abbrev_init received and pass it back here. */
static void abbrev_free_attrs(DebugAbbrevPool* p, Heap* h) {
u32 i;
for (i = 0; i < p->n; ++i) {
diff --git a/src/debug/debug_internal.h b/src/debug/debug_internal.h
@@ -159,29 +159,14 @@ typedef struct DebugFile {
Sym base; /* interned remapped basename */
} DebugFile;
-/* String table for .debug_str / .debug_line_str.
- * Maps Sym → offset in section. We just key off Sym; the string content
- * is whatever pool_str gives us.
- *
- * Both .debug_str and .debug_line_str use the same shape (separate
- * instances). */
+/* Shared Sym/u32 hashmaps. SymToU32 backs the .debug_str / .debug_line_str
+ * string tables (Sym → byte offset; see StrTab in debug_emit.c). */
#include "core/hashmap.h"
HASHMAP_DEFINE(SymToU32, Sym, u32, hash_u32);
HASHMAP_DEFINE(U32ToU32, u32, u32, hash_u32);
HASHMAP_DEFINE(PtrToU32, u64, u32, hash_u64);
-typedef struct DebugStrTab {
- Buf buf; /* raw bytes */
- SymToU32 by_sym; /* Sym → offset */
- /* Index ordering for .debug_str_offsets — only used by .debug_str. */
- u32* sym_seq;
- u32 sym_seq_n;
- u32 sym_seq_cap;
- /* For non-Sym strings (e.g. composed paths), we use append_raw and the
- * caller stores the returned offset themselves. */
-} DebugStrTab;
-
/* Loclist entry (Phase 5 placeholder; we register the storage but do not
* yet emit .debug_loclists). */
typedef struct DebugLocListEntry {
@@ -268,9 +253,9 @@ void form_sleb(Buf*, i64);
size_t form_uleb_size(u64);
size_t form_sleb_size(i64);
-/* Abbrev pool ops (debug_abbrev.c) */
+/* Abbrev pool ops (debug_abbrev.c). Teardown is abbrev_fini_heap (declared
+ * in debug_abbrev.c) — it needs the heap to free the per-abbrev attr arrays. */
void abbrev_init(DebugAbbrevPool*, Heap*);
-void abbrev_fini(DebugAbbrevPool*);
/* Find or insert; attrs are copied. Returns 1-based code. */
u32 abbrev_intern(DebugAbbrevPool*, Heap*, u16 tag, u8 has_children,
const DebugAbbrevAttr* attrs, u32 nattrs);
diff --git a/src/debug/dwarf_internal.h b/src/debug/dwarf_internal.h
@@ -314,9 +314,17 @@ typedef struct DwAttrValue {
u32 block_len;
} DwAttrValue;
-/* Read attr value at *off using `form`. Updates *off. */
+/* Read attr value at *off using `form`. Updates *off.
+ *
+ * Reads inline form bytes from .debug_info (the DIE stream). The
+ * section-parameterized core dw_read_form_in lets other consumers (the
+ * line-number program in dwarf_line.c) decode the same forms out of a
+ * different section while resolving strp/line_strp/strx into the shared
+ * string sections exactly as the DIE reader does. */
void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const,
u32* off, DwAttrValue* out);
+void dw_read_form_in(KitDebugInfo* d, const DwCu* cu, const DwSection* sec,
+ u32 form, i64 implicit_const, u32* off, DwAttrValue* out);
/* DIE iteration helpers. */
typedef struct DwDie {
diff --git a/src/debug/dwarf_line.c b/src/debug/dwarf_line.c
@@ -101,73 +101,6 @@ static u32 read_format(const u8* base, u32 size, u32* off, EntryFmt* fmt,
return n;
}
-/* Wrapper around dw_read_form that reads from .debug_line bytes via a
- * pseudo-CU configured with the line-program address size. */
-static void read_lp_form(KitDebugInfo* d, u32 form, u8 addr_size, u32* off,
- DwAttrValue* out) {
- /* This reads from .debug_line, not .debug_info — we duplicate the
- * minimal subset we need (line_strp, strp, udata, data1/2/4/8, string). */
- out->form = form;
- out->u = 0;
- out->str = "";
- out->block = NULL;
- out->block_len = 0;
- switch (form) {
- case DW_FORM_string:
- out->str = dw_cstr(d->line.data, d->line.size, off);
- break;
- case DW_FORM_strp:
- out->u = dw_u32(d->line.data, d->line.size, off);
- out->str = dw_str(d, (u32)out->u);
- break;
- case DW_FORM_line_strp:
- out->u = dw_u32(d->line.data, d->line.size, off);
- out->str = dw_line_str(d, (u32)out->u);
- break;
- case DW_FORM_data1:
- out->u = dw_u8(d->line.data, d->line.size, off);
- break;
- case DW_FORM_data2:
- out->u = dw_u16(d->line.data, d->line.size, off);
- break;
- case DW_FORM_data4:
- out->u = dw_u32(d->line.data, d->line.size, off);
- break;
- case DW_FORM_data8:
- out->u = dw_u64(d->line.data, d->line.size, off);
- break;
- case DW_FORM_udata:
- out->u = dw_uleb(d->line.data, d->line.size, off);
- break;
- case DW_FORM_sdata:
- (void)dw_sleb(d->line.data, d->line.size, off);
- break;
- case DW_FORM_data16:
- *off += 16;
- break;
- case DW_FORM_block:
- case DW_FORM_exprloc: {
- u32 n = (u32)dw_uleb(d->line.data, d->line.size, off);
- out->block = d->line.data + *off;
- out->block_len = n;
- *off += n;
- } break;
- case DW_FORM_block1: {
- u32 n = dw_u8(d->line.data, d->line.size, off);
- out->block = d->line.data + *off;
- out->block_len = n;
- *off += n;
- } break;
- case DW_FORM_flag:
- out->u = dw_u8(d->line.data, d->line.size, off);
- break;
- default:
- /* Unknown form — heuristic: skip 0 bytes. Caller may read garbage. */
- (void)addr_size;
- break;
- }
-}
-
/* Build a fully-qualified path for file_index in lp. */
static const char* build_file_norm(KitDebugInfo* d, DwLineProgram* lp,
u32 idx) {
@@ -273,7 +206,7 @@ void dw_build_line(KitDebugInfo* d, u32 cu_idx) {
DwAttrValue v;
const char* path = "";
for (j = 0; j < ndir_fmt; ++j) {
- read_lp_form(d, dir_fmt[j].form, h.address_size, &off, &v);
+ dw_read_form_in(d, cu, &d->line, dir_fmt[j].form, 0, &off, &v);
if (dir_fmt[j].content_type == DW_LNCT_path) {
path = v.str ? v.str : "";
}
@@ -298,7 +231,7 @@ void dw_build_line(KitDebugInfo* d, u32 cu_idx) {
const char* path = "";
u32 dir_index = 0;
for (j = 0; j < nfile_fmt; ++j) {
- read_lp_form(d, file_fmt[j].form, h.address_size, &off, &v);
+ dw_read_form_in(d, cu, &d->line, file_fmt[j].form, 0, &off, &v);
if (file_fmt[j].content_type == DW_LNCT_path)
path = v.str ? v.str : "";
else if (file_fmt[j].content_type == DW_LNCT_directory_index)
diff --git a/src/debug/dwarf_open.c b/src/debug/dwarf_open.c
@@ -460,8 +460,13 @@ DwCu* dw_cu_at_die_offset(KitDebugInfo* d, u32 die_offset) {
/* ---- form decoding ---------------------------------------------------- */
-void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const,
- u32* off, DwAttrValue* out) {
+/* Section-parameterized form decoder. Inline form bytes are pulled from
+ * `sec` (.debug_info for DIE attributes, .debug_line for line-program
+ * file/dir entry-format values); strp/line_strp/strx still resolve into the
+ * shared string sections via the CU. This is the single source of truth —
+ * dw_read_form wires it to .debug_info; the line decoder passes &d->line. */
+void dw_read_form_in(KitDebugInfo* d, const DwCu* cu, const DwSection* sec,
+ u32 form, i64 implicit_const, u32* off, DwAttrValue* out) {
out->form = form;
out->u = 0;
out->s = 0;
@@ -471,16 +476,16 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const,
switch (form) {
case DW_FORM_addr:
if (cu->address_size == 8)
- out->u = dw_u64(d->info.data, d->info.size, off);
+ out->u = dw_u64(sec->data, sec->size, off);
else
- out->u = dw_u32(d->info.data, d->info.size, off);
+ out->u = dw_u32(sec->data, sec->size, off);
break;
case DW_FORM_data1:
case DW_FORM_ref1:
case DW_FORM_flag:
case DW_FORM_strx1:
case DW_FORM_addrx1:
- out->u = dw_u8(d->info.data, d->info.size, off);
+ out->u = dw_u8(sec->data, sec->size, off);
out->s = (i64)(i8)out->u;
if (form == DW_FORM_strx1) out->str = dw_strx(d, cu, out->u);
break;
@@ -488,20 +493,20 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const,
case DW_FORM_ref2:
case DW_FORM_strx2:
case DW_FORM_addrx2:
- out->u = dw_u16(d->info.data, d->info.size, off);
+ out->u = dw_u16(sec->data, sec->size, off);
out->s = (i64)(i16)out->u;
if (form == DW_FORM_strx2) out->str = dw_strx(d, cu, out->u);
break;
case DW_FORM_strx3:
case DW_FORM_addrx3:
- out->u = dw_u24(d->info.data, d->info.size, off);
+ out->u = dw_u24(sec->data, sec->size, off);
if (form == DW_FORM_strx3) out->str = dw_strx(d, cu, out->u);
break;
case DW_FORM_data4:
case DW_FORM_ref4:
case DW_FORM_strx4:
case DW_FORM_addrx4:
- out->u = dw_u32(d->info.data, d->info.size, off);
+ out->u = dw_u32(sec->data, sec->size, off);
out->s = (i64)(i32)out->u;
if (form == DW_FORM_strx4) out->str = dw_strx(d, cu, out->u);
break;
@@ -509,7 +514,7 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const,
case DW_FORM_ref8:
case DW_FORM_ref_sig8:
case DW_FORM_ref_sup8:
- out->u = dw_u64(d->info.data, d->info.size, off);
+ out->u = dw_u64(sec->data, sec->size, off);
out->s = (i64)out->u;
break;
case DW_FORM_data16:
@@ -517,7 +522,7 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const,
*off += 16;
break;
case DW_FORM_sdata:
- out->s = dw_sleb(d->info.data, d->info.size, off);
+ out->s = dw_sleb(sec->data, sec->size, off);
out->u = (u64)out->s;
break;
case DW_FORM_udata:
@@ -526,30 +531,30 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const,
case DW_FORM_addrx:
case DW_FORM_loclistx:
case DW_FORM_rnglistx:
- out->u = dw_uleb(d->info.data, d->info.size, off);
+ out->u = dw_uleb(sec->data, sec->size, off);
if (form == DW_FORM_strx) out->str = dw_strx(d, cu, out->u);
break;
case DW_FORM_string:
- out->str = dw_cstr(d->info.data, d->info.size, off);
+ out->str = dw_cstr(sec->data, sec->size, off);
break;
case DW_FORM_strp:
- out->u = dw_u32(d->info.data, d->info.size, off);
+ out->u = dw_u32(sec->data, sec->size, off);
out->str = dw_str(d, (u32)out->u);
break;
case DW_FORM_line_strp:
- out->u = dw_u32(d->info.data, d->info.size, off);
+ out->u = dw_u32(sec->data, sec->size, off);
out->str = dw_line_str(d, (u32)out->u);
break;
case DW_FORM_strp_sup:
case DW_FORM_ref_sup4:
- out->u = dw_u32(d->info.data, d->info.size, off);
+ out->u = dw_u32(sec->data, sec->size, off);
break;
case DW_FORM_sec_offset:
- out->u = dw_u32(d->info.data, d->info.size, off);
+ out->u = dw_u32(sec->data, sec->size, off);
break;
case DW_FORM_ref_addr:
/* DWARF 5: 4 bytes for 32-bit DWARF (we don't support DWARF64). */
- out->u = dw_u32(d->info.data, d->info.size, off);
+ out->u = dw_u32(sec->data, sec->size, off);
break;
case DW_FORM_flag_present:
out->u = 1;
@@ -559,37 +564,37 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const,
out->u = (u64)implicit_const;
break;
case DW_FORM_block1: {
- u32 n = dw_u8(d->info.data, d->info.size, off);
- out->block = d->info.data + *off;
+ u32 n = dw_u8(sec->data, sec->size, off);
+ out->block = sec->data + *off;
out->block_len = n;
out->u = n;
*off += n;
} break;
case DW_FORM_block2: {
- u32 n = dw_u16(d->info.data, d->info.size, off);
- out->block = d->info.data + *off;
+ u32 n = dw_u16(sec->data, sec->size, off);
+ out->block = sec->data + *off;
out->block_len = n;
out->u = n;
*off += n;
} break;
case DW_FORM_block4: {
- u32 n = dw_u32(d->info.data, d->info.size, off);
- out->block = d->info.data + *off;
+ u32 n = dw_u32(sec->data, sec->size, off);
+ out->block = sec->data + *off;
out->block_len = n;
out->u = n;
*off += n;
} break;
case DW_FORM_block:
case DW_FORM_exprloc: {
- u32 n = (u32)dw_uleb(d->info.data, d->info.size, off);
- out->block = d->info.data + *off;
+ u32 n = (u32)dw_uleb(sec->data, sec->size, off);
+ out->block = sec->data + *off;
out->block_len = n;
out->u = n;
*off += n;
} break;
case DW_FORM_indirect: {
- u32 ifrm = (u32)dw_uleb(d->info.data, d->info.size, off);
- dw_read_form(d, cu, ifrm, 0, off, out);
+ u32 ifrm = (u32)dw_uleb(sec->data, sec->size, off);
+ dw_read_form_in(d, cu, sec, ifrm, 0, off, out);
} break;
default:
/* Unknown form — best effort: skip nothing. */
@@ -597,6 +602,11 @@ void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const,
}
}
+void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const,
+ u32* off, DwAttrValue* out) {
+ dw_read_form_in(d, cu, &d->info, form, implicit_const, off, out);
+}
+
void dw_skip_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const,
u32* off) {
DwAttrValue tmp;