kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

dwarf_internal.h (13756B)


      1 #ifndef KIT_DWARF_INTERNAL_H
      2 #define KIT_DWARF_INTERNAL_H
      3 
      4 /* DWARF 5 consumer — internal types.
      5  *
      6  * This module reads DWARF bytes out of a KitObjFile and answers the
      7  * kit_dwarf_* queries. It is colocated with the producer implementation
      8  * but does not include debug/debug.h or share producer state; the public
      9  * DWARF wire format is the only contract between producer and consumer.
     10  */
     11 
     12 #include <kit/arch.h>
     13 #include <kit/dwarf.h>
     14 #include <kit/object.h>
     15 
     16 #include "core/core.h"
     17 #include "core/heap.h"
     18 #include "debug/dwarf_defs.h"
     19 
     20 /* ---- Section & byte slice helpers ------------------------------------- */
     21 
     22 typedef struct DwSection {
     23   const u8* data;
     24   u32 size;
     25   u32 sec_idx; /* 0-based section index, or UINT32_MAX if missing */
     26 } DwSection;
     27 
     28 /* ---- Abbrev table ---- */
     29 
     30 typedef struct DwAbbrevAttr {
     31   u32 attr;           /* DW_AT_* */
     32   u32 form;           /* DW_FORM_* */
     33   i64 implicit_const; /* for DW_FORM_implicit_const */
     34 } DwAbbrevAttr;
     35 
     36 typedef struct DwAbbrev {
     37   u64 code; /* abbrev code; 0 if unused slot */
     38   u32 tag;  /* DW_TAG_* */
     39   u8 has_children;
     40   u32 nattrs;
     41   DwAbbrevAttr* attrs; /* heap-allocated */
     42 } DwAbbrev;
     43 
     44 typedef struct DwAbbrevTable {
     45   u32 cu_abbrev_offset; /* offset into .debug_abbrev */
     46   /* Dense map: code → index (or 0 if absent). For typical small tables we
     47    * keep them in a sorted array searched linearly. */
     48   DwAbbrev* abbrevs;
     49   u32 nabbrevs;
     50   u32 cap;
     51 } DwAbbrevTable;
     52 
     53 /* ---- Compilation unit ---- */
     54 
     55 typedef struct DwCu {
     56   u32 hdr_offset; /* offset of CU header in .debug_info */
     57   u32 hdr_length; /* length of unit_length bytes (after the size field itself)
     58                    */
     59   u32 unit_total_size; /* hdr_length + length-field size (4 for 32-bit init) */
     60   u32 die_start_off;   /* offset where the first DIE starts (in .debug_info) */
     61   u8 version;
     62   u8 address_size;
     63   u8 unit_type;
     64   u8 is_64bit;       /* DWARF64? */
     65   u32 abbrev_offset; /* into .debug_abbrev */
     66   u32 str_offsets_base;
     67   u32 addr_base;
     68   u32 loclists_base;
     69   u32 rnglists_base;
     70   u32 stmt_list; /* DW_AT_stmt_list value (offset into .debug_line) */
     71   u8 has_stmt_list;
     72   const char* comp_dir;
     73   const char* name;
     74   /* Index of abbrev table in dbg->abbrevs */
     75   u32 abbrev_table_idx;
     76 } DwCu;
     77 
     78 /* ---- Materialized DIEs (we cache only what we need) ---- */
     79 
     80 /* A reference into .debug_info (compilation-unit relative). We store CU
     81  * index plus offset-from-CU-header so we can resolve cross-CU later. */
     82 typedef struct DwDieRef {
     83   u32 cu_idx;
     84   u32 die_offset; /* absolute offset into .debug_info bytes */
     85 } DwDieRef;
     86 
     87 /* ---- Type cache ---- */
     88 
     89 typedef enum DwTypeKind {
     90   DTK_VOID,
     91   DTK_BASE, /* maps to SINT/UINT/BOOL/FLOAT/CHAR by encoding */
     92   DTK_PTR,
     93   DTK_ARRAY,
     94   DTK_STRUCT,
     95   DTK_UNION,
     96   DTK_ENUM,
     97   DTK_TYPEDEF,
     98   DTK_FUNC,
     99   DTK_CONST, /* alias to inner */
    100   DTK_VOLATILE,
    101   DTK_RESTRICT,
    102 } DwTypeKind;
    103 
    104 typedef struct DwField {
    105   const char* name;
    106   u32 byte_offset;
    107   u32 bit_offset;
    108   u32 bit_size;
    109   struct KitDwarfType* type;
    110 } DwField;
    111 
    112 typedef struct DwEnumVal {
    113   const char* name;
    114   i64 value;
    115 } DwEnumVal;
    116 
    117 struct KitDwarfType {
    118   DwTypeKind kind;
    119   u32 byte_size;
    120   const char* name;
    121   u32 element_count;
    122   u32 die_offset; /* origin DIE for cycle-detection / dedup */
    123   /* DT_PTR/ARRAY/TYPEDEF/CONST/VOLATILE/RESTRICT/FUNC: inner type */
    124   struct KitDwarfType* inner;
    125   /* Base type encoding (DW_ATE_*) — used to derive SINT/UINT/CHAR/BOOL/FLOAT */
    126   u32 base_encoding;
    127   /* STRUCT/UNION fields */
    128   DwField* fields;
    129   u32 nfields;
    130   /* ENUM values */
    131   DwEnumVal* evals;
    132   u32 nevals;
    133 };
    134 
    135 /* ---- Line program decoded matrix ---- */
    136 
    137 typedef struct DwLineRow {
    138   u64 address;
    139   u32 file_index;
    140   u32 line;
    141   u32 column;
    142   u8 is_stmt;
    143   u8 end_sequence;
    144 } DwLineRow;
    145 
    146 typedef struct DwLineFile {
    147   const char* path; /* interned in our string table */
    148   u32 dir_index;
    149 } DwLineFile;
    150 
    151 typedef struct DwLineProgram {
    152   /* Per-CU line program decoding state. We materialize all rows into a
    153    * single rows array for fast lookup. */
    154   DwLineRow* rows;
    155   u32 nrows;
    156   u32 cap;
    157   /* File table (file_index 0 is the CU primary in DW5). */
    158   DwLineFile* files;
    159   u32 nfiles;
    160   const char** dirs;
    161   u32 ndirs;
    162   /* Cached fully-qualified path per file, lazily built. */
    163   const char** file_norm;
    164   u32 nfile_norm;
    165 } DwLineProgram;
    166 
    167 /* ---- Subprogram descriptor (cached) ---- */
    168 
    169 typedef struct DwLocal {
    170   const char* name;
    171   u32 die_offset;
    172   u32 type_die_offset;
    173   u64 scope_lo;     /* PCs at which the var is in scope. */
    174   u64 scope_hi;     /* (low_pc, high_pc) of nearest enclosing block.    */
    175   u32 scope_offset; /* offset of the lexical_block DIE; 0 = subprog scope */
    176   u8 has_scope;
    177   /* Location form: either an exprloc or a loclistx index. */
    178   const u8* loc;
    179   u32 loc_len;
    180   u8 has_loclist;
    181   u64 loclist_index;
    182   /* Role: ARG vs LOCAL. */
    183   u8 is_param;
    184   /* For globals only: the global variable role. */
    185   u8 is_global;
    186 } DwLocal;
    187 
    188 typedef struct DwSubprog {
    189   const char* name;
    190   u64 low_pc;
    191   u64 high_pc;
    192   const char* decl_file;
    193   u32 decl_line;
    194   u32 cu_idx;
    195   u32 die_offset; /* offset of the subprogram DIE */
    196   u32 type_die_offset;
    197   /* Frame base — DW_AT_frame_base exprloc bytes (or NULL). */
    198   const u8* frame_base;
    199   u32 frame_base_len;
    200   /* Cached params and locals (lazily). */
    201   DwLocal* params;
    202   u32 nparams;
    203   DwLocal* locals;
    204   u32 nlocals;
    205   u8 inlined;
    206   u8 cached_locals;
    207 } DwSubprog;
    208 
    209 /* ---- The main consumer state ---- */
    210 
    211 typedef struct DwString {
    212   Sym sym; /* interned in compiler->global pool */
    213 } DwString;
    214 
    215 struct KitDebugInfo {
    216   const KitContext* ctx;
    217   Heap* h;
    218   /* Local string pool for interned strings (file paths, etc).  The
    219    * consumer used to borrow the compiler's global pool, but the new
    220    * kit_dwarf_open takes only a KitContext — no compiler. */
    221   struct Pool* strs;
    222   const KitObjFile* obj;
    223 
    224   /* Sections */
    225   DwSection abbrev;
    226   DwSection info;
    227   DwSection line;
    228   DwSection str;
    229   DwSection line_str;
    230   DwSection str_offsets;
    231   DwSection addr;
    232   DwSection loclists;
    233   DwSection rnglists;
    234   DwSection eh_frame;
    235   DwSection aranges;
    236 
    237   /* Abbrev tables (one per unique abbrev_offset we've seen). */
    238   DwAbbrevTable* abbrevs;
    239   u32 nabbrevs;
    240   u32 abbrevs_cap;
    241 
    242   /* CUs */
    243   DwCu* cus;
    244   u32 ncus;
    245   u32 cus_cap;
    246 
    247   /* Line programs by CU index (parallel to cus). Each lazily built. */
    248   DwLineProgram* lines_by_cu;
    249   u8* lines_built; /* parallel; 0 = not yet decoded */
    250 
    251   /* Subprograms (sorted by low_pc on first build). */
    252   DwSubprog* subs;
    253   u32 nsubs;
    254   u32 subs_cap;
    255   u8 subs_built;
    256 
    257   /* Type cache: DIE-offset → KitDwarfType*. */
    258   KitDwarfType** types_by_off; /* parallel arrays */
    259   u32* types_off;
    260   u32 ntypes;
    261   u32 types_cap;
    262 
    263   /* Globals (top-level DW_TAG_variable in any CU). */
    264   DwLocal* globals;
    265   u32 nglobals;
    266   u32 globals_cap;
    267   u8 globals_built;
    268 };
    269 
    270 /* ---- API between the dwarf_*.c files ---------------------------------- */
    271 
    272 /* Section lookup by name. Sets out->data/size; sec_idx = UINT32_MAX if missing.
    273  */
    274 void dw_find_section(KitDebugInfo* d, const char* name, DwSection* out);
    275 
    276 /* Read primitives. Each returns the new offset on success and panics on EOF. */
    277 u8 dw_u8(const u8* base, u32 size, u32* off);
    278 u16 dw_u16(const u8* base, u32 size, u32* off);
    279 u32 dw_u24(const u8* base, u32 size, u32* off);
    280 u32 dw_u32(const u8* base, u32 size, u32* off);
    281 u64 dw_u64(const u8* base, u32 size, u32* off);
    282 u64 dw_uleb(const u8* base, u32 size, u32* off);
    283 i64 dw_sleb(const u8* base, u32 size, u32* off);
    284 const char* dw_cstr(const u8* base, u32 size, u32* off);
    285 
    286 /* Abbrev parsing: ensure (and return) the abbrev table for `offset`. */
    287 DwAbbrevTable* dw_abbrev_get(KitDebugInfo* d, u32 offset);
    288 DwAbbrev* dw_abbrev_lookup(DwAbbrevTable* t, u64 code);
    289 
    290 /* Parse the CU header at offset `off` in .debug_info into `cu`.
    291  * Returns the offset of the next CU header. */
    292 u32 dw_cu_parse_header(KitDebugInfo* d, u32 off, DwCu* cu);
    293 
    294 /* Skim every CU and populate dbg->cus. */
    295 void dw_parse_all_cus(KitDebugInfo* d);
    296 
    297 /* Open the .debug_str_offsets table indexed by str_offsets_base. */
    298 const char* dw_str(KitDebugInfo* d, u32 offset);
    299 const char* dw_line_str(KitDebugInfo* d, u32 offset);
    300 const char* dw_strx(KitDebugInfo* d, const DwCu* cu, u64 idx);
    301 
    302 /* Skip one attribute value of `form` size. *off is updated. */
    303 void dw_skip_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const,
    304                   u32* off);
    305 
    306 /* Read attribute value into a typed accumulator. Caller picks which getter. */
    307 typedef struct DwAttrValue {
    308   u32 form;
    309   /* Values for various forms — only one slot is meaningful per form. */
    310   u64 u;           /* udata, addr, ref (CU-relative offset for local refs) */
    311   i64 s;           /* sdata */
    312   const char* str; /* strp/string/strx/line_strp resolved cstring */
    313   const u8* block; /* exprloc/block bytes */
    314   u32 block_len;
    315 } DwAttrValue;
    316 
    317 /* Read attr value at *off using `form`. Updates *off.
    318  *
    319  * Reads inline form bytes from .debug_info (the DIE stream). The
    320  * section-parameterized core dw_read_form_in lets other consumers (the
    321  * line-number program in dwarf_line.c) decode the same forms out of a
    322  * different section while resolving strp/line_strp/strx into the shared
    323  * string sections exactly as the DIE reader does. */
    324 void dw_read_form(KitDebugInfo* d, const DwCu* cu, u32 form, i64 implicit_const,
    325                   u32* off, DwAttrValue* out);
    326 void dw_read_form_in(KitDebugInfo* d, const DwCu* cu, const DwSection* sec,
    327                      u32 form, i64 implicit_const, u32* off, DwAttrValue* out);
    328 
    329 /* DIE iteration helpers. */
    330 typedef struct DwDie {
    331   u64 abbrev_code;
    332   DwAbbrev* abbrev;     /* NULL if abbrev_code==0 (null entry) */
    333   u32 die_off;          /* offset of this DIE itself in .debug_info */
    334   u32 attrs_off;        /* where attribute encodings start */
    335   u32 next_sibling_off; /* lazily computed */
    336 } DwDie;
    337 
    338 /* Read one DIE header at *off. Updates *off to point past the abbrev code,
    339  * to the start of the attribute area. Returns 1 on success, 0 if this is a
    340  * null-entry (terminates a sibling chain). */
    341 int dw_read_die(KitDebugInfo* d, const DwCu* cu, u32* off, DwDie* out);
    342 
    343 /* Skip a DIE's attribute area, advancing *off past it. */
    344 void dw_skip_die_attrs(KitDebugInfo* d, const DwCu* cu, DwDie* die, u32* off);
    345 
    346 /* Skip an entire DIE subtree (including children), starting at attrs_off.
    347  * On entry, *off == die->attrs_off. On exit, *off is past the children
    348  * terminator (if has_children) or just past the attrs (if no children). */
    349 void dw_skip_die_subtree(KitDebugInfo* d, const DwCu* cu, DwDie* die, u32* off);
    350 
    351 /* Lookup an attribute on `die` by attr code. Returns 1 if found and fills
    352  * *out; 0 otherwise. Restartable (rewinds the cursor). */
    353 int dw_die_attr(KitDebugInfo* d, const DwCu* cu, DwDie* die, u32 attr,
    354                 DwAttrValue* out);
    355 
    356 /* String interning into the compiler's global pool. */
    357 const char* dw_intern(KitDebugInfo* d, const char* s, size_t len);
    358 
    359 /* Inline strcmp/strlen — libkit avoids a runtime libc dep beyond the
    360  * tightly-controlled allowlist (test/lib_deps.allowlist). */
    361 static inline int dw_streq(const char* a, const char* b) {
    362   if (!a || !b) return 0;
    363   while (*a && *b && *a == *b) {
    364     a++;
    365     b++;
    366   }
    367   return *a == 0 && *b == 0;
    368 }
    369 static inline size_t dw_strlen(const char* s) {
    370   size_t n = 0;
    371   if (!s) return 0;
    372   while (s[n]) n++;
    373   return n;
    374 }
    375 
    376 /* DIE attribute pack — shared between dwarf_die.c and dwarf_type.c. */
    377 typedef struct DieAttrPack {
    378   const char* name;
    379   u64 low_pc;
    380   u64 high_pc_value;
    381   u32 high_pc_form;
    382   u8 has_low_pc;
    383   u8 has_high_pc;
    384   u32 type_die_offset;
    385   u8 has_type;
    386   u32 decl_file;
    387   u32 decl_line;
    388   const u8* loc_block;
    389   u32 loc_block_len;
    390   u8 has_loclist;
    391   u64 loclist_index;
    392   const u8* fb_block;
    393   u32 fb_block_len;
    394   i64 const_value;
    395   u8 has_const_value;
    396   u32 byte_offset;
    397   u8 has_byte_offset;
    398   u32 byte_size;
    399   u8 has_byte_size;
    400   u32 bit_size;
    401   u8 has_bit_size;
    402   u32 bit_offset;
    403   u8 has_bit_offset;
    404   u32 base_encoding;
    405   u8 has_encoding;
    406   u32 array_count;
    407   u8 has_array_count;
    408   u8 inlined;
    409 } DieAttrPack;
    410 
    411 void dw_die_pack(KitDebugInfo* d, const DwCu* cu, DwDie* die, DieAttrPack* p);
    412 
    413 /* Subprograms */
    414 void dw_build_subs(KitDebugInfo* d);
    415 DwSubprog* dw_find_subprog(KitDebugInfo* d, u64 pc);
    416 void dw_build_locals(KitDebugInfo* d, DwSubprog* sp);
    417 
    418 /* Globals */
    419 void dw_build_globals(KitDebugInfo* d);
    420 
    421 /* Line program */
    422 void dw_build_line(KitDebugInfo* d, u32 cu_idx);
    423 
    424 /* Type DIE → KitDwarfType*. die_offset is absolute offset in .debug_info. */
    425 KitDwarfType* dw_type_from_die(KitDebugInfo* d, u32 cu_idx, u32 die_offset);
    426 KitDwarfType* dw_void_type(KitDebugInfo* d);
    427 
    428 /* Loc-expr evaluator. Evaluates `expr` of length `len` in the context of
    429  * `frame` (regs, cfa) and `frame_base_expr` (the subprog's DW_AT_frame_base
    430  * expression — typically just DW_OP_call_frame_cfa). Returns 0 on success;
    431  * fills *result with the location kind plus value. */
    432 typedef struct DwExprResult {
    433   /* result_kind: 0 = address (memory), 1 = value-on-stack (DW_OP_stack_value),
    434    * 2 = register, 3 = unsupported. */
    435   int kind;
    436   u64 value; /* address if kind=0; literal if kind=1; reg# if kind=2 */
    437 } DwExprResult;
    438 
    439 int dw_eval_expr(KitDebugInfo* d, const u8* expr, u32 len, const u8* fb_expr,
    440                  u32 fb_len, const KitUnwindFrame* frame, DwExprResult* out);
    441 
    442 /* CU lookup helpers. */
    443 DwCu* dw_cu_at_die_offset(KitDebugInfo* d, u32 die_offset);
    444 
    445 /* Resolve a DW_FORM_loclistx into the matching location list entry for
    446  * `pc`. Returns 1 and fills bytes/len on success; 0 if the section is
    447  * absent, the index is bad, or no entry covers `pc`. */
    448 int dw_loclist_resolve(KitDebugInfo* d, const DwCu* cu, u64 idx, u64 pc,
    449                        const u8** bytes, u32* len);
    450 
    451 #endif