kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

dwarf_line.c (18442B)


      1 /* dwarf_line.c — DWARF 5 line-number-program decoder.
      2  *
      3  * Per doc/DWARF.md §4.2: walk .debug_line for the CU's stmt_list, build
      4  * a row matrix, and index it for addr→line and (file, line)→addr lookup.
      5  */
      6 
      7 #include <kit/dwarf.h>
      8 #include <stddef.h>
      9 #include <stdint.h>
     10 #include <string.h>
     11 
     12 #include "core/core.h"
     13 #include "core/heap.h"
     14 #include "core/slice.h"
     15 #include "core/util.h"
     16 #include "debug/dwarf_internal.h"
     17 
     18 typedef struct LineState {
     19   u64 address;
     20   u32 op_index;
     21   u32 file;
     22   u32 line;
     23   u32 column;
     24   u8 is_stmt;
     25   u8 basic_block;
     26   u8 end_sequence;
     27   u8 prologue_end;
     28   u8 epilogue_begin;
     29   u32 isa;
     30   u32 discriminator;
     31 } LineState;
     32 
     33 typedef struct LineHdr {
     34   u32 unit_length;
     35   u8 version;
     36   u8 address_size;
     37   u8 segment_selector_size;
     38   u32 header_length;
     39   u8 min_inst_len;
     40   u8 max_ops_per_inst;
     41   u8 default_is_stmt;
     42   i8 line_base;
     43   u8 line_range;
     44   u8 opcode_base;
     45   u8 std_opcode_lengths[12]; /* version 5 has 12 standard opcodes */
     46 } LineHdr;
     47 
     48 static void rows_push(KitDebugInfo* d, DwLineProgram* lp, const LineState* st) {
     49   DwLineRow* r;
     50   if (lp->nrows == lp->cap) {
     51     u32 ncap = lp->cap ? lp->cap * 2 : 32;
     52     DwLineRow* na = (DwLineRow*)d->h->realloc(
     53         d->h, lp->rows, lp->cap * sizeof(*lp->rows), ncap * sizeof(*lp->rows),
     54         _Alignof(DwLineRow));
     55     if (!na) return;
     56     lp->rows = na;
     57     lp->cap = ncap;
     58   }
     59   r = &lp->rows[lp->nrows++];
     60   r->address = st->address;
     61   r->file_index = st->file;
     62   r->line = st->line;
     63   r->column = st->column;
     64   r->is_stmt = st->is_stmt;
     65   r->end_sequence = st->end_sequence;
     66 }
     67 
     68 static void state_init(LineState* st, u8 default_is_stmt) {
     69   st->address = 0;
     70   st->op_index = 0;
     71   st->file = 1;
     72   st->line = 1;
     73   st->column = 0;
     74   st->is_stmt = default_is_stmt;
     75   st->basic_block = 0;
     76   st->end_sequence = 0;
     77   st->prologue_end = 0;
     78   st->epilogue_begin = 0;
     79   st->isa = 0;
     80   st->discriminator = 0;
     81 }
     82 
     83 /* Read a DW5 file-or-dir entry-format header.
     84  * On entry: *off points at format_count.
     85  * Returns the number of (content_type, form) pairs. Caller must read
     86  * the format pairs before calling read_entries(). */
     87 typedef struct EntryFmt {
     88   u32 content_type;
     89   u32 form;
     90 } EntryFmt;
     91 
     92 static u32 read_format(const u8* base, u32 size, u32* off, EntryFmt* fmt,
     93                        u32 max) {
     94   u32 n = dw_u8(base, size, off);
     95   u32 i;
     96   if (n > max) n = max;
     97   for (i = 0; i < n; ++i) {
     98     fmt[i].content_type = (u32)dw_uleb(base, size, off);
     99     fmt[i].form = (u32)dw_uleb(base, size, off);
    100   }
    101   return n;
    102 }
    103 
    104 /* Build a fully-qualified path for file_index in lp. */
    105 static const char* build_file_norm(KitDebugInfo* d, DwLineProgram* lp,
    106                                    u32 idx) {
    107   const char* path;
    108   const char* dir;
    109   u32 dir_idx;
    110   size_t plen, dlen;
    111   char buf[4096];
    112   size_t pos = 0;
    113   if (idx >= lp->nfiles) return "";
    114   path = lp->files[idx].path;
    115   if (!path) path = "";
    116   dir_idx = lp->files[idx].dir_index;
    117   dir = (dir_idx < lp->ndirs) ? lp->dirs[dir_idx] : "";
    118   plen = slice_from_cstr(path).len;
    119   dlen = slice_from_cstr(dir).len;
    120   /* If path is already absolute (starts with /), return as-is. */
    121   if (plen > 0 && path[0] == '/') return path;
    122   if (dlen > 0) {
    123     if (dlen >= sizeof(buf) - 2) return path; /* fallback */
    124     memcpy(buf, dir, dlen);
    125     pos = dlen;
    126     if (buf[pos - 1] != '/') buf[pos++] = '/';
    127   }
    128   if (pos + plen >= sizeof(buf)) return path;
    129   memcpy(buf + pos, path, plen);
    130   pos += plen;
    131   buf[pos] = 0;
    132   return dw_intern(d, buf, pos);
    133 }
    134 
    135 void dw_build_line(KitDebugInfo* d, u32 cu_idx) {
    136   DwCu* cu;
    137   DwLineProgram* lp;
    138   u32 off;
    139   u32 stmt_off;
    140   LineHdr h;
    141   u32 unit_end;
    142   u32 prog_start;
    143   EntryFmt dir_fmt[8];
    144   EntryFmt file_fmt[8];
    145   u32 ndir_fmt, nfile_fmt;
    146   u32 ndirs_count, nfiles_count;
    147   u32 i;
    148   LineState st;
    149 
    150   if (cu_idx >= d->ncus) return;
    151   if (d->lines_built[cu_idx]) return;
    152   d->lines_built[cu_idx] = 1;
    153 
    154   cu = &d->cus[cu_idx];
    155   lp = &d->lines_by_cu[cu_idx];
    156   if (!cu->has_stmt_list) return;
    157   stmt_off = cu->stmt_list;
    158   if (stmt_off >= d->line.size) return;
    159 
    160   off = stmt_off;
    161   h.unit_length = dw_u32(d->line.data, d->line.size, &off);
    162   if (h.unit_length == 0xffffffffu) return; /* DWARF64 not supported */
    163   unit_end = off + h.unit_length;
    164   h.version = (u8)dw_u16(d->line.data, d->line.size, &off);
    165   if (h.version != 5) {
    166     /* DW4/3 layout differs. We only support DW5. */
    167     return;
    168   }
    169   h.address_size = dw_u8(d->line.data, d->line.size, &off);
    170   h.segment_selector_size = dw_u8(d->line.data, d->line.size, &off);
    171   h.header_length = dw_u32(d->line.data, d->line.size, &off);
    172   prog_start = off + h.header_length;
    173   h.min_inst_len = dw_u8(d->line.data, d->line.size, &off);
    174   h.max_ops_per_inst = dw_u8(d->line.data, d->line.size, &off);
    175   h.default_is_stmt = dw_u8(d->line.data, d->line.size, &off);
    176   h.line_base = (i8)dw_u8(d->line.data, d->line.size, &off);
    177   h.line_range = dw_u8(d->line.data, d->line.size, &off);
    178   h.opcode_base = dw_u8(d->line.data, d->line.size, &off);
    179   if (h.line_range == 0) h.line_range = 1;
    180   /* Read standard opcode lengths (opcode_base - 1 of them). */
    181   {
    182     u32 j;
    183     u32 cnt = h.opcode_base ? h.opcode_base - 1u : 0u;
    184     if (cnt > sizeof(h.std_opcode_lengths)) cnt = sizeof(h.std_opcode_lengths);
    185     for (j = 0; j < cnt; ++j)
    186       h.std_opcode_lengths[j] = dw_u8(d->line.data, d->line.size, &off);
    187     /* Skip any extra opcode-length bytes the header claims. */
    188     if (h.opcode_base > 1u + sizeof(h.std_opcode_lengths)) {
    189       off += (h.opcode_base - 1u) - (u32)sizeof(h.std_opcode_lengths);
    190     }
    191   }
    192 
    193   /* directories[] */
    194   ndir_fmt = read_format(d->line.data, d->line.size, &off, dir_fmt, 8);
    195   ndirs_count = (u32)dw_uleb(d->line.data, d->line.size, &off);
    196   if (ndirs_count > 0) {
    197     lp->dirs = (const char**)d->h->alloc(
    198         d->h, ndirs_count * sizeof(const char*), _Alignof(const char*));
    199     if (lp->dirs) {
    200       lp->ndirs = ndirs_count;
    201       memset(lp->dirs, 0, ndirs_count * sizeof(const char*));
    202     }
    203   }
    204   for (i = 0; i < ndirs_count; ++i) {
    205     u32 j;
    206     DwAttrValue v;
    207     const char* path = "";
    208     for (j = 0; j < ndir_fmt; ++j) {
    209       dw_read_form_in(d, cu, &d->line, dir_fmt[j].form, 0, &off, &v);
    210       if (dir_fmt[j].content_type == DW_LNCT_path) {
    211         path = v.str ? v.str : "";
    212       }
    213     }
    214     if (lp->dirs && i < lp->ndirs) lp->dirs[i] = path;
    215   }
    216 
    217   /* file_names[] */
    218   nfile_fmt = read_format(d->line.data, d->line.size, &off, file_fmt, 8);
    219   nfiles_count = (u32)dw_uleb(d->line.data, d->line.size, &off);
    220   if (nfiles_count > 0) {
    221     lp->files = (DwLineFile*)d->h->alloc(
    222         d->h, nfiles_count * sizeof(DwLineFile), _Alignof(DwLineFile));
    223     if (lp->files) {
    224       lp->nfiles = nfiles_count;
    225       memset(lp->files, 0, nfiles_count * sizeof(DwLineFile));
    226     }
    227   }
    228   for (i = 0; i < nfiles_count; ++i) {
    229     u32 j;
    230     DwAttrValue v;
    231     const char* path = "";
    232     u32 dir_index = 0;
    233     for (j = 0; j < nfile_fmt; ++j) {
    234       dw_read_form_in(d, cu, &d->line, file_fmt[j].form, 0, &off, &v);
    235       if (file_fmt[j].content_type == DW_LNCT_path)
    236         path = v.str ? v.str : "";
    237       else if (file_fmt[j].content_type == DW_LNCT_directory_index)
    238         dir_index = (u32)v.u;
    239     }
    240     if (lp->files && i < lp->nfiles) {
    241       lp->files[i].path = path;
    242       lp->files[i].dir_index = dir_index;
    243     }
    244   }
    245 
    246   /* Build per-file normalized path cache lazily on first query. */
    247   if (lp->nfiles) {
    248     lp->file_norm = (const char**)d->h->alloc(
    249         d->h, lp->nfiles * sizeof(const char*), _Alignof(const char*));
    250     if (lp->file_norm) {
    251       lp->nfile_norm = lp->nfiles;
    252       for (i = 0; i < lp->nfiles; ++i) lp->file_norm[i] = NULL;
    253     }
    254   }
    255 
    256   /* program */
    257   off = prog_start;
    258   state_init(&st, h.default_is_stmt);
    259   while (off < unit_end) {
    260     u8 op = dw_u8(d->line.data, d->line.size, &off);
    261     if (op == 0) {
    262       /* extended opcode */
    263       u64 elen = dw_uleb(d->line.data, d->line.size, &off);
    264       u32 eop_off = off;
    265       u8 eop;
    266       if (elen == 0 || off + elen > d->line.size) break;
    267       eop = dw_u8(d->line.data, d->line.size, &off);
    268       switch (eop) {
    269         case DW_LNE_end_sequence:
    270           st.end_sequence = 1;
    271           rows_push(d, lp, &st);
    272           state_init(&st, h.default_is_stmt);
    273           break;
    274         case DW_LNE_set_address:
    275           if (h.address_size == 8)
    276             st.address = dw_u64(d->line.data, d->line.size, &off);
    277           else
    278             st.address = dw_u32(d->line.data, d->line.size, &off);
    279           st.op_index = 0;
    280           break;
    281         case DW_LNE_set_discriminator:
    282           st.discriminator = (u32)dw_uleb(d->line.data, d->line.size, &off);
    283           break;
    284         default:
    285           /* Skip unknown extended opcode body. */
    286           off = eop_off + (u32)elen;
    287           break;
    288       }
    289       /* Sync to the declared end of the extended opcode. */
    290       off = eop_off + (u32)elen;
    291     } else if (op < h.opcode_base) {
    292       /* standard opcode */
    293       switch (op) {
    294         case DW_LNS_copy:
    295           rows_push(d, lp, &st);
    296           st.basic_block = 0;
    297           st.prologue_end = 0;
    298           st.epilogue_begin = 0;
    299           st.discriminator = 0;
    300           break;
    301         case DW_LNS_advance_pc: {
    302           u64 adv = dw_uleb(d->line.data, d->line.size, &off);
    303           st.address += adv * h.min_inst_len;
    304         } break;
    305         case DW_LNS_advance_line: {
    306           i64 adv = dw_sleb(d->line.data, d->line.size, &off);
    307           st.line = (u32)((i64)st.line + adv);
    308         } break;
    309         case DW_LNS_set_file:
    310           st.file = (u32)dw_uleb(d->line.data, d->line.size, &off);
    311           break;
    312         case DW_LNS_set_column:
    313           st.column = (u32)dw_uleb(d->line.data, d->line.size, &off);
    314           break;
    315         case DW_LNS_negate_stmt:
    316           st.is_stmt = !st.is_stmt;
    317           break;
    318         case DW_LNS_set_basic_block:
    319           st.basic_block = 1;
    320           break;
    321         case DW_LNS_const_add_pc: {
    322           u8 adj = (u8)(255 - h.opcode_base);
    323           u8 op_adv = (u8)(adj / h.line_range);
    324           st.address += op_adv * h.min_inst_len;
    325         } break;
    326         case DW_LNS_fixed_advance_pc:
    327           st.address += dw_u16(d->line.data, d->line.size, &off);
    328           st.op_index = 0;
    329           break;
    330         case DW_LNS_set_prologue_end:
    331           st.prologue_end = 1;
    332           break;
    333         case DW_LNS_set_epilogue_begin:
    334           st.epilogue_begin = 1;
    335           break;
    336         case DW_LNS_set_isa:
    337           st.isa = (u32)dw_uleb(d->line.data, d->line.size, &off);
    338           break;
    339         default: {
    340           /* Unknown standard opcode: skip its operands per
    341            * std_opcode_lengths. */
    342           u32 nops = (op - 1u) < sizeof(h.std_opcode_lengths)
    343                          ? h.std_opcode_lengths[op - 1]
    344                          : 0;
    345           u32 j;
    346           for (j = 0; j < nops; ++j)
    347             (void)dw_uleb(d->line.data, d->line.size, &off);
    348         } break;
    349       }
    350     } else {
    351       /* special opcode */
    352       u32 adj = (u32)(op - h.opcode_base);
    353       u32 op_adv = adj / h.line_range;
    354       i32 line_inc = (i32)h.line_base + (i32)(adj % h.line_range);
    355       st.address += op_adv * h.min_inst_len;
    356       st.line = (u32)((i32)st.line + line_inc);
    357       rows_push(d, lp, &st);
    358       st.basic_block = 0;
    359       st.prologue_end = 0;
    360       st.epilogue_begin = 0;
    361       st.discriminator = 0;
    362     }
    363   }
    364 
    365   /* Build file_norm lazily. */
    366   if (lp->file_norm) {
    367     for (i = 0; i < lp->nfiles; ++i) {
    368       lp->file_norm[i] = build_file_norm(d, lp, i);
    369     }
    370   }
    371 }
    372 
    373 /* Lookup helpers. Build all CU line tables on demand, walk each. */
    374 
    375 KitStatus kit_dwarf_addr_to_line(KitDebugInfo* d, uint64_t pc,
    376                                  KitSlice* file_out, uint32_t* line_out,
    377                                  uint32_t* col_out) {
    378   /* Status codes:
    379    *   KIT_OK         — PC has a line entry; outputs filled.
    380    *   KIT_NOT_FOUND  — PC has no line entry (either inside a CU's
    381    *                      coverage with no row, or outside every CU). */
    382   u32 i;
    383   if (file_out) *file_out = KIT_SLICE_NULL;
    384   if (line_out) *line_out = 0;
    385   if (col_out) *col_out = 0;
    386   if (!d) return KIT_INVALID;
    387   for (i = 0; i < d->ncus; ++i) {
    388     DwLineProgram* lp;
    389     u32 j;
    390     DwLineRow* best = NULL;
    391     if (!d->lines_built[i]) dw_build_line(d, i);
    392     lp = &d->lines_by_cu[i];
    393     /* A line row covers [row.address, next_row.address); an end_sequence
    394      * row terminates a sequence and is not itself a coverage row. `pc`
    395      * is covered by `best` only if it falls before the end_sequence that
    396      * closes best's sequence — otherwise best is stale and pc lies in a
    397      * gap (or in another CU). Without this bound, a CU whose code sits
    398      * before pc would always claim it, so multi-CU images (one CU per
    399      * linked input) resolved every address to the first input's CU. */
    400     for (j = 0; j < lp->nrows; ++j) {
    401       DwLineRow* r = &lp->rows[j];
    402       if (r->end_sequence) {
    403         /* `best` covers pc when pc is strictly inside the sequence
    404          * (pc < end_sequence.address) or sits exactly on the last row's
    405          * address (a zero-length final row, which some producers emit with
    406          * end_sequence at the same address). The second clause must test
    407          * best->address, not r->address: when two CUs abut (one input's
    408          * code ends exactly where the next begins, common with a single
    409          * contiguous .text), end_sequence.address equals the next CU's
    410          * first pc, and `pc == end_sequence.address` alone would let the
    411          * earlier CU swallow an address that belongs to the later one. */
    412         if (best && (pc < r->address || pc == best->address)) {
    413           const char* f = "";
    414           if (best->file_index < lp->nfile_norm && lp->file_norm)
    415             f = lp->file_norm[best->file_index];
    416           if (file_out) *file_out = kit_slice_cstr(f);
    417           if (line_out) *line_out = best->line;
    418           if (col_out) *col_out = best->column;
    419           return KIT_OK;
    420         }
    421         best = NULL; /* sequence closed; pc not covered here */
    422         continue;
    423       }
    424       if (r->address <= pc) best = r;
    425     }
    426   }
    427   return KIT_NOT_FOUND;
    428 }
    429 
    430 /* file_norm matches user-typed `file` if either it is exactly equal, or it
    431  * ends with `/<file>`.  Suffix matching keeps `b util.c:42` working when
    432  * the DWARF file_norm is the absolute path the compiler saw. */
    433 static int dw_file_matches(const char* file_norm, const char* user,
    434                            size_t ulen) {
    435   size_t flen;
    436   if (!file_norm) return 0;
    437   if (dw_streq(file_norm, user)) return 1;
    438   flen = slice_from_cstr(file_norm).len;
    439   if (flen <= ulen) return 0;
    440   if (file_norm[flen - ulen - 1] != '/') return 0;
    441   return memcmp(file_norm + flen - ulen, user, ulen) == 0;
    442 }
    443 
    444 KitStatus kit_dwarf_line_to_addr(KitDebugInfo* d, KitSlice file, uint32_t line,
    445                                  uint64_t* pc_out) {
    446   /* Status:
    447    *   KIT_OK         — unique match; *pc_out filled.
    448    *   KIT_NOT_FOUND  — file unknown, or known but no row at `line`.
    449    *   KIT_AMBIGUOUS  — multiple distinct file paths match (file,line);
    450    *                      *pc_out is the first hit, use
    451    *                      kit_dwarf_line_to_addr_all to enumerate. */
    452   u32 i;
    453   size_t ulen;
    454   const char* first_path = NULL;
    455   uint64_t first_pc = 0;
    456   const char* alt_path = NULL;
    457   int line_hits = 0;
    458   if (pc_out) *pc_out = 0;
    459   if (!d || !file.s) return KIT_INVALID;
    460   ulen = file.len;
    461   if (ulen == 0) return KIT_INVALID;
    462   for (i = 0; i < d->ncus; ++i) {
    463     DwLineProgram* lp;
    464     u32 j;
    465     if (!d->lines_built[i]) dw_build_line(d, i);
    466     lp = &d->lines_by_cu[i];
    467     for (j = 0; j < lp->nrows; ++j) {
    468       DwLineRow* r = &lp->rows[j];
    469       const char* f;
    470       if (r->end_sequence) continue;
    471       if (r->file_index >= lp->nfile_norm || !lp->file_norm) continue;
    472       f = lp->file_norm[r->file_index];
    473       if (!dw_file_matches(f, file.s, ulen)) continue;
    474       if (r->line != line) continue;
    475       ++line_hits;
    476       if (!first_path) {
    477         first_path = f;
    478         first_pc = r->address;
    479       } else if (!alt_path && f != first_path && !dw_streq(f, first_path)) {
    480         alt_path = f;
    481       }
    482     }
    483   }
    484   if (pc_out) *pc_out = first_pc;
    485   if (alt_path) return KIT_AMBIGUOUS;
    486   if (line_hits > 0) return KIT_OK;
    487   return KIT_NOT_FOUND;
    488 }
    489 
    490 /* Enumerate all distinct candidate (pc, file_norm) pairs for the given
    491  * (file, line) match.  Caller-supplied `out` array is filled up to `cap`;
    492  * `*n_out` receives the total candidate count (which may exceed cap, in
    493  * which case only the first `cap` are written).  Returns 0 on success
    494  * (including 0 candidates), 1 on invalid args.  Intended for REPL
    495  * disambiguation after kit_dwarf_line_to_addr returns 3. */
    496 KitStatus kit_dwarf_line_to_addr_all(KitDebugInfo* d, KitSlice file,
    497                                      uint32_t line, KitDwarfLineMatch* out,
    498                                      uint32_t cap, uint32_t* n_out) {
    499   /* One candidate per distinct file_norm path (not per PC).  PC is the
    500    * first matching row's address for that file_norm — i.e. the same PC
    501    * that kit_dwarf_line_to_addr would have returned for that file. */
    502   u32 i;
    503   size_t ulen;
    504   uint32_t total = 0;
    505   if (n_out) *n_out = 0;
    506   if (!d || !file.s) return KIT_INVALID;
    507   ulen = file.len;
    508   if (ulen == 0) return KIT_INVALID;
    509   for (i = 0; i < d->ncus; ++i) {
    510     DwLineProgram* lp;
    511     u32 j;
    512     if (!d->lines_built[i]) dw_build_line(d, i);
    513     lp = &d->lines_by_cu[i];
    514     for (j = 0; j < lp->nrows; ++j) {
    515       DwLineRow* r = &lp->rows[j];
    516       const char* f;
    517       uint32_t k;
    518       int dup = 0;
    519       if (r->end_sequence) continue;
    520       if (r->line != line) continue;
    521       if (r->file_index >= lp->nfile_norm || !lp->file_norm) continue;
    522       f = lp->file_norm[r->file_index];
    523       if (!dw_file_matches(f, file.s, ulen)) continue;
    524       /* Dedupe by file_norm path so the candidate list is one entry per
    525        * source file even if the line has many per-instruction rows. */
    526       if (out) {
    527         uint32_t lim = total < cap ? total : cap;
    528         for (k = 0; k < lim; ++k) {
    529           if (out[k].file.s == f ||
    530               (out[k].file.s && dw_streq(out[k].file.s, f))) {
    531             dup = 1;
    532             break;
    533           }
    534         }
    535       }
    536       if (dup) continue;
    537       if (out && total < cap) {
    538         out[total].pc = r->address;
    539         out[total].file = kit_slice_cstr(f);
    540       }
    541       ++total;
    542     }
    543   }
    544   if (n_out) *n_out = total;
    545   return KIT_OK;
    546 }