dwarf_internal.h (13756B)
1 #ifndef KIT_DWARF_INTERNAL_H 2 #define KIT_DWARF_INTERNAL_H 3 4 /* DWARF 5 consumer — internal types. 5 * 6 * This module reads DWARF bytes out of a KitObjFile and answers the 7 * kit_dwarf_* queries. It is colocated with the producer implementation 8 * but does not include debug/debug.h or share producer state; the public 9 * DWARF wire format is the only contract between producer and consumer. 10 */ 11 12 #include <kit/arch.h> 13 #include <kit/dwarf.h> 14 #include <kit/object.h> 15 16 #include "core/core.h" 17 #include "core/heap.h" 18 #include "debug/dwarf_defs.h" 19 20 /* ---- Section & byte slice helpers ------------------------------------- */ 21 22 typedef struct DwSection { 23 const u8* data; 24 u32 size; 25 u32 sec_idx; /* 0-based section index, or UINT32_MAX if missing */ 26 } DwSection; 27 28 /* ---- Abbrev table ---- */ 29 30 typedef struct DwAbbrevAttr { 31 u32 attr; /* DW_AT_* */ 32 u32 form; /* DW_FORM_* */ 33 i64 implicit_const; /* for DW_FORM_implicit_const */ 34 } DwAbbrevAttr; 35 36 typedef struct DwAbbrev { 37 u64 code; /* abbrev code; 0 if unused slot */ 38 u32 tag; /* DW_TAG_* */ 39 u8 has_children; 40 u32 nattrs; 41 DwAbbrevAttr* attrs; /* heap-allocated */ 42 } DwAbbrev; 43 44 typedef struct DwAbbrevTable { 45 u32 cu_abbrev_offset; /* offset into .debug_abbrev */ 46 /* Dense map: code → index (or 0 if absent). For typical small tables we 47 * keep them in a sorted array searched linearly. */ 48 DwAbbrev* abbrevs; 49 u32 nabbrevs; 50 u32 cap; 51 } DwAbbrevTable; 52 53 /* ---- Compilation unit ---- */ 54 55 typedef struct DwCu { 56 u32 hdr_offset; /* offset of CU header in .debug_info */ 57 u32 hdr_length; /* length of unit_length bytes (after the size field itself) 58 */ 59 u32 unit_total_size; /* hdr_length + length-field size (4 for 32-bit init) */ 60 u32 die_start_off; /* offset where the first DIE starts (in .debug_info) */ 61 u8 version; 62 u8 address_size; 63 u8 unit_type; 64 u8 is_64bit; /* DWARF64? */ 65 u32 abbrev_offset; /* into .debug_abbrev */ 66 u32 str_offsets_base; 67 u32 addr_base; 68 u32 loclists_base; 69 u32 rnglists_base; 70 u32 stmt_list; /* DW_AT_stmt_list value (offset into .debug_line) */ 71 u8 has_stmt_list; 72 const char* comp_dir; 73 const char* name; 74 /* Index of abbrev table in dbg->abbrevs */ 75 u32 abbrev_table_idx; 76 } DwCu; 77 78 /* ---- Materialized DIEs (we cache only what we need) ---- */ 79 80 /* A reference into .debug_info (compilation-unit relative). We store CU 81 * index plus offset-from-CU-header so we can resolve cross-CU later. */ 82 typedef struct DwDieRef { 83 u32 cu_idx; 84 u32 die_offset; /* absolute offset into .debug_info bytes */ 85 } DwDieRef; 86 87 /* ---- Type cache ---- */ 88 89 typedef enum DwTypeKind { 90 DTK_VOID, 91 DTK_BASE, /* maps to SINT/UINT/BOOL/FLOAT/CHAR by encoding */ 92 DTK_PTR, 93 DTK_ARRAY, 94 DTK_STRUCT, 95 DTK_UNION, 96 DTK_ENUM, 97 DTK_TYPEDEF, 98 DTK_FUNC, 99 DTK_CONST, /* alias to inner */ 100 DTK_VOLATILE, 101 DTK_RESTRICT, 102 } DwTypeKind; 103 104 typedef struct DwField { 105 const char* name; 106 u32 byte_offset; 107 u32 bit_offset; 108 u32 bit_size; 109 struct KitDwarfType* type; 110 } DwField; 111 112 typedef struct DwEnumVal { 113 const char* name; 114 i64 value; 115 } DwEnumVal; 116 117 struct KitDwarfType { 118 DwTypeKind kind; 119 u32 byte_size; 120 const char* name; 121 u32 element_count; 122 u32 die_offset; /* origin DIE for cycle-detection / dedup */ 123 /* DT_PTR/ARRAY/TYPEDEF/CONST/VOLATILE/RESTRICT/FUNC: inner type */ 124 struct KitDwarfType* inner; 125 /* Base type encoding (DW_ATE_*) — used to derive SINT/UINT/CHAR/BOOL/FLOAT */ 126 u32 base_encoding; 127 /* STRUCT/UNION fields */ 128 DwField* fields; 129 u32 nfields; 130 /* ENUM values */ 131 DwEnumVal* evals; 132 u32 nevals; 133 }; 134 135 /* ---- Line program decoded matrix ---- */ 136 137 typedef struct DwLineRow { 138 u64 address; 139 u32 file_index; 140 u32 line; 141 u32 column; 142 u8 is_stmt; 143 u8 end_sequence; 144 } DwLineRow; 145 146 typedef struct DwLineFile { 147 const char* path; /* interned in our string table */ 148 u32 dir_index; 149 } DwLineFile; 150 151 typedef struct DwLineProgram { 152 /* Per-CU line program decoding state. We materialize all rows into a 153 * single rows array for fast lookup. */ 154 DwLineRow* rows; 155 u32 nrows; 156 u32 cap; 157 /* File table (file_index 0 is the CU primary in DW5). */ 158 DwLineFile* files; 159 u32 nfiles; 160 const char** dirs; 161 u32 ndirs; 162 /* Cached fully-qualified path per file, lazily built. */ 163 const char** file_norm; 164 u32 nfile_norm; 165 } DwLineProgram; 166 167 /* ---- Subprogram descriptor (cached) ---- */ 168 169 typedef struct DwLocal { 170 const char* name; 171 u32 die_offset; 172 u32 type_die_offset; 173 u64 scope_lo; /* PCs at which the var is in scope. */ 174 u64 scope_hi; /* (low_pc, high_pc) of nearest enclosing block. */ 175 u32 scope_offset; /* offset of the lexical_block DIE; 0 = subprog scope */ 176 u8 has_scope; 177 /* Location form: either an exprloc or a loclistx index. */ 178 const u8* loc; 179 u32 loc_len; 180 u8 has_loclist; 181 u64 loclist_index; 182 /* Role: ARG vs LOCAL. */ 183 u8 is_param; 184 /* For globals only: the global variable role. */ 185 u8 is_global; 186 } DwLocal; 187 188 typedef struct DwSubprog { 189 const char* name; 190 u64 low_pc; 191 u64 high_pc; 192 const char* decl_file; 193 u32 decl_line; 194 u32 cu_idx; 195 u32 die_offset; /* offset of the subprogram DIE */ 196 u32 type_die_offset; 197 /* Frame base — DW_AT_frame_base exprloc bytes (or NULL). */ 198 const u8* frame_base; 199 u32 frame_base_len; 200 /* Cached params and locals (lazily). */ 201 DwLocal* params; 202 u32 nparams; 203 DwLocal* locals; 204 u32 nlocals; 205 u8 inlined; 206 u8 cached_locals; 207 } DwSubprog; 208 209 /* ---- The main consumer state ---- */ 210 211 typedef struct DwString { 212 Sym sym; /* interned in compiler->global pool */ 213 } DwString; 214 215 struct KitDebugInfo { 216 const KitContext* ctx; 217 Heap* h; 218 /* Local string pool for interned strings (file paths, etc). The 219 * consumer used to borrow the compiler's global pool, but the new 220 * kit_dwarf_open takes only a KitContext — no compiler. */ 221 struct Pool* strs; 222 const KitObjFile* obj; 223 224 /* Sections */ 225 DwSection abbrev; 226 DwSection info; 227 DwSection line; 228 DwSection str; 229 DwSection line_str; 230 DwSection str_offsets; 231 DwSection addr; 232 DwSection loclists; 233 DwSection rnglists; 234 DwSection eh_frame; 235 DwSection aranges; 236 237 /* Abbrev tables (one per unique abbrev_offset we've seen). */ 238 DwAbbrevTable* abbrevs; 239 u32 nabbrevs; 240 u32 abbrevs_cap; 241 242 /* CUs */ 243 DwCu* cus; 244 u32 ncus; 245 u32 cus_cap; 246 247 /* Line programs by CU index (parallel to cus). Each lazily built. */ 248 DwLineProgram* lines_by_cu; 249 u8* lines_built; /* parallel; 0 = not yet decoded */ 250 251 /* Subprograms (sorted by low_pc on first build). */ 252 DwSubprog* subs; 253 u32 nsubs; 254 u32 subs_cap; 255 u8 subs_built; 256 257 /* Type cache: DIE-offset → KitDwarfType*. */ 258 KitDwarfType** types_by_off; /* parallel arrays */ 259 u32* types_off; 260 u32 ntypes; 261 u32 types_cap; 262 263 /* Globals (top-level DW_TAG_variable in any CU). */ 264 DwLocal* globals; 265 u32 nglobals; 266 u32 globals_cap; 267 u8 globals_built; 268 }; 269 270 /* ---- API between the dwarf_*.c files ---------------------------------- */ 271 272 /* Section lookup by name. Sets out->data/size; sec_idx = UINT32_MAX if missing. 273 */ 274 void dw_find_section(KitDebugInfo* d, const char* name, DwSection* out); 275 276 /* Read primitives. Each returns the new offset on success and panics on EOF. */ 277 u8 dw_u8(const u8* base, u32 size, u32* off); 278 u16 dw_u16(const u8* base, u32 size, u32* off); 279 u32 dw_u24(const u8* base, u32 size, u32* off); 280 u32 dw_u32(const u8* base, u32 size, u32* off); 281 u64 dw_u64(const u8* base, u32 size, u32* off); 282 u64 dw_uleb(const u8* base, u32 size, u32* off); 283 i64 dw_sleb(const u8* base, u32 size, u32* off); 284 const char* dw_cstr(const u8* base, u32 size, u32* off); 285 286 /* Abbrev parsing: ensure (and return) the abbrev table for `offset`. */ 287 DwAbbrevTable* dw_abbrev_get(KitDebugInfo* d, u32 offset); 288 DwAbbrev* dw_abbrev_lookup(DwAbbrevTable* t, u64 code); 289 290 /* Parse the CU header at offset `off` in .debug_info into `cu`. 291 * Returns the offset of the next CU header. */ 292 u32 dw_cu_parse_header(KitDebugInfo* d, u32 off, DwCu* cu); 293 294 /* Skim every CU and populate dbg->cus. */ 295 void dw_parse_all_cus(KitDebugInfo* d); 296 297 /* Open the .debug_str_offsets table indexed by str_offsets_base. */ 298 const char* dw_str(KitDebugInfo* d, u32 offset); 299 const char* dw_line_str(KitDebugInfo* d, u32 offset); 300 const char* dw_strx(KitDebugInfo* d, const DwCu* cu, u64 idx); 301 302 /* Skip one attribute value of `form` size. *off is updated. */ 303 void dw_skip_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const, 304 u32* off); 305 306 /* Read attribute value into a typed accumulator. Caller picks which getter. */ 307 typedef struct DwAttrValue { 308 u32 form; 309 /* Values for various forms — only one slot is meaningful per form. */ 310 u64 u; /* udata, addr, ref (CU-relative offset for local refs) */ 311 i64 s; /* sdata */ 312 const char* str; /* strp/string/strx/line_strp resolved cstring */ 313 const u8* block; /* exprloc/block bytes */ 314 u32 block_len; 315 } DwAttrValue; 316 317 /* Read attr value at *off using `form`. Updates *off. 318 * 319 * Reads inline form bytes from .debug_info (the DIE stream). The 320 * section-parameterized core dw_read_form_in lets other consumers (the 321 * line-number program in dwarf_line.c) decode the same forms out of a 322 * different section while resolving strp/line_strp/strx into the shared 323 * string sections exactly as the DIE reader does. */ 324 void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const, 325 u32* off, DwAttrValue* out); 326 void dw_read_form_in(KitDebugInfo* d, const DwCu* cu, const DwSection* sec, 327 u32 form, i64 implicit_const, u32* off, DwAttrValue* out); 328 329 /* DIE iteration helpers. */ 330 typedef struct DwDie { 331 u64 abbrev_code; 332 DwAbbrev* abbrev; /* NULL if abbrev_code==0 (null entry) */ 333 u32 die_off; /* offset of this DIE itself in .debug_info */ 334 u32 attrs_off; /* where attribute encodings start */ 335 u32 next_sibling_off; /* lazily computed */ 336 } DwDie; 337 338 /* Read one DIE header at *off. Updates *off to point past the abbrev code, 339 * to the start of the attribute area. Returns 1 on success, 0 if this is a 340 * null-entry (terminates a sibling chain). */ 341 int dw_read_die(KitDebugInfo* d, const DwCu* cu, u32* off, DwDie* out); 342 343 /* Skip a DIE's attribute area, advancing *off past it. */ 344 void dw_skip_die_attrs(KitDebugInfo* d, const DwCu* cu, DwDie* die, u32* off); 345 346 /* Skip an entire DIE subtree (including children), starting at attrs_off. 347 * On entry, *off == die->attrs_off. On exit, *off is past the children 348 * terminator (if has_children) or just past the attrs (if no children). */ 349 void dw_skip_die_subtree(KitDebugInfo* d, const DwCu* cu, DwDie* die, u32* off); 350 351 /* Lookup an attribute on `die` by attr code. Returns 1 if found and fills 352 * *out; 0 otherwise. Restartable (rewinds the cursor). */ 353 int dw_die_attr(KitDebugInfo* d, const DwCu* cu, DwDie* die, u32 attr, 354 DwAttrValue* out); 355 356 /* String interning into the compiler's global pool. */ 357 const char* dw_intern(KitDebugInfo* d, const char* s, size_t len); 358 359 /* Inline strcmp/strlen — libkit avoids a runtime libc dep beyond the 360 * tightly-controlled allowlist (test/lib_deps.allowlist). */ 361 static inline int dw_streq(const char* a, const char* b) { 362 if (!a || !b) return 0; 363 while (*a && *b && *a == *b) { 364 a++; 365 b++; 366 } 367 return *a == 0 && *b == 0; 368 } 369 static inline size_t dw_strlen(const char* s) { 370 size_t n = 0; 371 if (!s) return 0; 372 while (s[n]) n++; 373 return n; 374 } 375 376 /* DIE attribute pack — shared between dwarf_die.c and dwarf_type.c. */ 377 typedef struct DieAttrPack { 378 const char* name; 379 u64 low_pc; 380 u64 high_pc_value; 381 u32 high_pc_form; 382 u8 has_low_pc; 383 u8 has_high_pc; 384 u32 type_die_offset; 385 u8 has_type; 386 u32 decl_file; 387 u32 decl_line; 388 const u8* loc_block; 389 u32 loc_block_len; 390 u8 has_loclist; 391 u64 loclist_index; 392 const u8* fb_block; 393 u32 fb_block_len; 394 i64 const_value; 395 u8 has_const_value; 396 u32 byte_offset; 397 u8 has_byte_offset; 398 u32 byte_size; 399 u8 has_byte_size; 400 u32 bit_size; 401 u8 has_bit_size; 402 u32 bit_offset; 403 u8 has_bit_offset; 404 u32 base_encoding; 405 u8 has_encoding; 406 u32 array_count; 407 u8 has_array_count; 408 u8 inlined; 409 } DieAttrPack; 410 411 void dw_die_pack(KitDebugInfo* d, const DwCu* cu, DwDie* die, DieAttrPack* p); 412 413 /* Subprograms */ 414 void dw_build_subs(KitDebugInfo* d); 415 DwSubprog* dw_find_subprog(KitDebugInfo* d, u64 pc); 416 void dw_build_locals(KitDebugInfo* d, DwSubprog* sp); 417 418 /* Globals */ 419 void dw_build_globals(KitDebugInfo* d); 420 421 /* Line program */ 422 void dw_build_line(KitDebugInfo* d, u32 cu_idx); 423 424 /* Type DIE → KitDwarfType*. die_offset is absolute offset in .debug_info. */ 425 KitDwarfType* dw_type_from_die(KitDebugInfo* d, u32 cu_idx, u32 die_offset); 426 KitDwarfType* dw_void_type(KitDebugInfo* d); 427 428 /* Loc-expr evaluator. Evaluates `expr` of length `len` in the context of 429 * `frame` (regs, cfa) and `frame_base_expr` (the subprog's DW_AT_frame_base 430 * expression — typically just DW_OP_call_frame_cfa). Returns 0 on success; 431 * fills *result with the location kind plus value. */ 432 typedef struct DwExprResult { 433 /* result_kind: 0 = address (memory), 1 = value-on-stack (DW_OP_stack_value), 434 * 2 = register, 3 = unsupported. */ 435 int kind; 436 u64 value; /* address if kind=0; literal if kind=1; reg# if kind=2 */ 437 } DwExprResult; 438 439 int dw_eval_expr(KitDebugInfo* d, const u8* expr, u32 len, const u8* fb_expr, 440 u32 fb_len, const KitUnwindFrame* frame, DwExprResult* out); 441 442 /* CU lookup helpers. */ 443 DwCu* dw_cu_at_die_offset(KitDebugInfo* d, u32 die_offset); 444 445 /* Resolve a DW_FORM_loclistx into the matching location list entry for 446 * `pc`. Returns 1 and fills bytes/len on success; 0 if the section is 447 * absent, the index is bad, or no entry covers `pc`. */ 448 int dw_loclist_resolve(KitDebugInfo* d, const DwCu* cu, u64 idx, u64 pc, 449 const u8** bytes, u32* len); 450 451 #endif