dwarf_line.c (18442B)
1 /* dwarf_line.c — DWARF 5 line-number-program decoder. 2 * 3 * Per doc/DWARF.md §4.2: walk .debug_line for the CU's stmt_list, build 4 * a row matrix, and index it for addr→line and (file, line)→addr lookup. 5 */ 6 7 #include <kit/dwarf.h> 8 #include <stddef.h> 9 #include <stdint.h> 10 #include <string.h> 11 12 #include "core/core.h" 13 #include "core/heap.h" 14 #include "core/slice.h" 15 #include "core/util.h" 16 #include "debug/dwarf_internal.h" 17 18 typedef struct LineState { 19 u64 address; 20 u32 op_index; 21 u32 file; 22 u32 line; 23 u32 column; 24 u8 is_stmt; 25 u8 basic_block; 26 u8 end_sequence; 27 u8 prologue_end; 28 u8 epilogue_begin; 29 u32 isa; 30 u32 discriminator; 31 } LineState; 32 33 typedef struct LineHdr { 34 u32 unit_length; 35 u8 version; 36 u8 address_size; 37 u8 segment_selector_size; 38 u32 header_length; 39 u8 min_inst_len; 40 u8 max_ops_per_inst; 41 u8 default_is_stmt; 42 i8 line_base; 43 u8 line_range; 44 u8 opcode_base; 45 u8 std_opcode_lengths[12]; /* version 5 has 12 standard opcodes */ 46 } LineHdr; 47 48 static void rows_push(KitDebugInfo* d, DwLineProgram* lp, const LineState* st) { 49 DwLineRow* r; 50 if (lp->nrows == lp->cap) { 51 u32 ncap = lp->cap ? lp->cap * 2 : 32; 52 DwLineRow* na = (DwLineRow*)d->h->realloc( 53 d->h, lp->rows, lp->cap * sizeof(*lp->rows), ncap * sizeof(*lp->rows), 54 _Alignof(DwLineRow)); 55 if (!na) return; 56 lp->rows = na; 57 lp->cap = ncap; 58 } 59 r = &lp->rows[lp->nrows++]; 60 r->address = st->address; 61 r->file_index = st->file; 62 r->line = st->line; 63 r->column = st->column; 64 r->is_stmt = st->is_stmt; 65 r->end_sequence = st->end_sequence; 66 } 67 68 static void state_init(LineState* st, u8 default_is_stmt) { 69 st->address = 0; 70 st->op_index = 0; 71 st->file = 1; 72 st->line = 1; 73 st->column = 0; 74 st->is_stmt = default_is_stmt; 75 st->basic_block = 0; 76 st->end_sequence = 0; 77 st->prologue_end = 0; 78 st->epilogue_begin = 0; 79 st->isa = 0; 80 st->discriminator = 0; 81 } 82 83 /* Read a DW5 file-or-dir entry-format header. 84 * On entry: *off points at format_count. 85 * Returns the number of (content_type, form) pairs. Caller must read 86 * the format pairs before calling read_entries(). */ 87 typedef struct EntryFmt { 88 u32 content_type; 89 u32 form; 90 } EntryFmt; 91 92 static u32 read_format(const u8* base, u32 size, u32* off, EntryFmt* fmt, 93 u32 max) { 94 u32 n = dw_u8(base, size, off); 95 u32 i; 96 if (n > max) n = max; 97 for (i = 0; i < n; ++i) { 98 fmt[i].content_type = (u32)dw_uleb(base, size, off); 99 fmt[i].form = (u32)dw_uleb(base, size, off); 100 } 101 return n; 102 } 103 104 /* Build a fully-qualified path for file_index in lp. */ 105 static const char* build_file_norm(KitDebugInfo* d, DwLineProgram* lp, 106 u32 idx) { 107 const char* path; 108 const char* dir; 109 u32 dir_idx; 110 size_t plen, dlen; 111 char buf[4096]; 112 size_t pos = 0; 113 if (idx >= lp->nfiles) return ""; 114 path = lp->files[idx].path; 115 if (!path) path = ""; 116 dir_idx = lp->files[idx].dir_index; 117 dir = (dir_idx < lp->ndirs) ? lp->dirs[dir_idx] : ""; 118 plen = slice_from_cstr(path).len; 119 dlen = slice_from_cstr(dir).len; 120 /* If path is already absolute (starts with /), return as-is. */ 121 if (plen > 0 && path[0] == '/') return path; 122 if (dlen > 0) { 123 if (dlen >= sizeof(buf) - 2) return path; /* fallback */ 124 memcpy(buf, dir, dlen); 125 pos = dlen; 126 if (buf[pos - 1] != '/') buf[pos++] = '/'; 127 } 128 if (pos + plen >= sizeof(buf)) return path; 129 memcpy(buf + pos, path, plen); 130 pos += plen; 131 buf[pos] = 0; 132 return dw_intern(d, buf, pos); 133 } 134 135 void dw_build_line(KitDebugInfo* d, u32 cu_idx) { 136 DwCu* cu; 137 DwLineProgram* lp; 138 u32 off; 139 u32 stmt_off; 140 LineHdr h; 141 u32 unit_end; 142 u32 prog_start; 143 EntryFmt dir_fmt[8]; 144 EntryFmt file_fmt[8]; 145 u32 ndir_fmt, nfile_fmt; 146 u32 ndirs_count, nfiles_count; 147 u32 i; 148 LineState st; 149 150 if (cu_idx >= d->ncus) return; 151 if (d->lines_built[cu_idx]) return; 152 d->lines_built[cu_idx] = 1; 153 154 cu = &d->cus[cu_idx]; 155 lp = &d->lines_by_cu[cu_idx]; 156 if (!cu->has_stmt_list) return; 157 stmt_off = cu->stmt_list; 158 if (stmt_off >= d->line.size) return; 159 160 off = stmt_off; 161 h.unit_length = dw_u32(d->line.data, d->line.size, &off); 162 if (h.unit_length == 0xffffffffu) return; /* DWARF64 not supported */ 163 unit_end = off + h.unit_length; 164 h.version = (u8)dw_u16(d->line.data, d->line.size, &off); 165 if (h.version != 5) { 166 /* DW4/3 layout differs. We only support DW5. */ 167 return; 168 } 169 h.address_size = dw_u8(d->line.data, d->line.size, &off); 170 h.segment_selector_size = dw_u8(d->line.data, d->line.size, &off); 171 h.header_length = dw_u32(d->line.data, d->line.size, &off); 172 prog_start = off + h.header_length; 173 h.min_inst_len = dw_u8(d->line.data, d->line.size, &off); 174 h.max_ops_per_inst = dw_u8(d->line.data, d->line.size, &off); 175 h.default_is_stmt = dw_u8(d->line.data, d->line.size, &off); 176 h.line_base = (i8)dw_u8(d->line.data, d->line.size, &off); 177 h.line_range = dw_u8(d->line.data, d->line.size, &off); 178 h.opcode_base = dw_u8(d->line.data, d->line.size, &off); 179 if (h.line_range == 0) h.line_range = 1; 180 /* Read standard opcode lengths (opcode_base - 1 of them). */ 181 { 182 u32 j; 183 u32 cnt = h.opcode_base ? h.opcode_base - 1u : 0u; 184 if (cnt > sizeof(h.std_opcode_lengths)) cnt = sizeof(h.std_opcode_lengths); 185 for (j = 0; j < cnt; ++j) 186 h.std_opcode_lengths[j] = dw_u8(d->line.data, d->line.size, &off); 187 /* Skip any extra opcode-length bytes the header claims. */ 188 if (h.opcode_base > 1u + sizeof(h.std_opcode_lengths)) { 189 off += (h.opcode_base - 1u) - (u32)sizeof(h.std_opcode_lengths); 190 } 191 } 192 193 /* directories[] */ 194 ndir_fmt = read_format(d->line.data, d->line.size, &off, dir_fmt, 8); 195 ndirs_count = (u32)dw_uleb(d->line.data, d->line.size, &off); 196 if (ndirs_count > 0) { 197 lp->dirs = (const char**)d->h->alloc( 198 d->h, ndirs_count * sizeof(const char*), _Alignof(const char*)); 199 if (lp->dirs) { 200 lp->ndirs = ndirs_count; 201 memset(lp->dirs, 0, ndirs_count * sizeof(const char*)); 202 } 203 } 204 for (i = 0; i < ndirs_count; ++i) { 205 u32 j; 206 DwAttrValue v; 207 const char* path = ""; 208 for (j = 0; j < ndir_fmt; ++j) { 209 dw_read_form_in(d, cu, &d->line, dir_fmt[j].form, 0, &off, &v); 210 if (dir_fmt[j].content_type == DW_LNCT_path) { 211 path = v.str ? v.str : ""; 212 } 213 } 214 if (lp->dirs && i < lp->ndirs) lp->dirs[i] = path; 215 } 216 217 /* file_names[] */ 218 nfile_fmt = read_format(d->line.data, d->line.size, &off, file_fmt, 8); 219 nfiles_count = (u32)dw_uleb(d->line.data, d->line.size, &off); 220 if (nfiles_count > 0) { 221 lp->files = (DwLineFile*)d->h->alloc( 222 d->h, nfiles_count * sizeof(DwLineFile), _Alignof(DwLineFile)); 223 if (lp->files) { 224 lp->nfiles = nfiles_count; 225 memset(lp->files, 0, nfiles_count * sizeof(DwLineFile)); 226 } 227 } 228 for (i = 0; i < nfiles_count; ++i) { 229 u32 j; 230 DwAttrValue v; 231 const char* path = ""; 232 u32 dir_index = 0; 233 for (j = 0; j < nfile_fmt; ++j) { 234 dw_read_form_in(d, cu, &d->line, file_fmt[j].form, 0, &off, &v); 235 if (file_fmt[j].content_type == DW_LNCT_path) 236 path = v.str ? v.str : ""; 237 else if (file_fmt[j].content_type == DW_LNCT_directory_index) 238 dir_index = (u32)v.u; 239 } 240 if (lp->files && i < lp->nfiles) { 241 lp->files[i].path = path; 242 lp->files[i].dir_index = dir_index; 243 } 244 } 245 246 /* Build per-file normalized path cache lazily on first query. */ 247 if (lp->nfiles) { 248 lp->file_norm = (const char**)d->h->alloc( 249 d->h, lp->nfiles * sizeof(const char*), _Alignof(const char*)); 250 if (lp->file_norm) { 251 lp->nfile_norm = lp->nfiles; 252 for (i = 0; i < lp->nfiles; ++i) lp->file_norm[i] = NULL; 253 } 254 } 255 256 /* program */ 257 off = prog_start; 258 state_init(&st, h.default_is_stmt); 259 while (off < unit_end) { 260 u8 op = dw_u8(d->line.data, d->line.size, &off); 261 if (op == 0) { 262 /* extended opcode */ 263 u64 elen = dw_uleb(d->line.data, d->line.size, &off); 264 u32 eop_off = off; 265 u8 eop; 266 if (elen == 0 || off + elen > d->line.size) break; 267 eop = dw_u8(d->line.data, d->line.size, &off); 268 switch (eop) { 269 case DW_LNE_end_sequence: 270 st.end_sequence = 1; 271 rows_push(d, lp, &st); 272 state_init(&st, h.default_is_stmt); 273 break; 274 case DW_LNE_set_address: 275 if (h.address_size == 8) 276 st.address = dw_u64(d->line.data, d->line.size, &off); 277 else 278 st.address = dw_u32(d->line.data, d->line.size, &off); 279 st.op_index = 0; 280 break; 281 case DW_LNE_set_discriminator: 282 st.discriminator = (u32)dw_uleb(d->line.data, d->line.size, &off); 283 break; 284 default: 285 /* Skip unknown extended opcode body. */ 286 off = eop_off + (u32)elen; 287 break; 288 } 289 /* Sync to the declared end of the extended opcode. */ 290 off = eop_off + (u32)elen; 291 } else if (op < h.opcode_base) { 292 /* standard opcode */ 293 switch (op) { 294 case DW_LNS_copy: 295 rows_push(d, lp, &st); 296 st.basic_block = 0; 297 st.prologue_end = 0; 298 st.epilogue_begin = 0; 299 st.discriminator = 0; 300 break; 301 case DW_LNS_advance_pc: { 302 u64 adv = dw_uleb(d->line.data, d->line.size, &off); 303 st.address += adv * h.min_inst_len; 304 } break; 305 case DW_LNS_advance_line: { 306 i64 adv = dw_sleb(d->line.data, d->line.size, &off); 307 st.line = (u32)((i64)st.line + adv); 308 } break; 309 case DW_LNS_set_file: 310 st.file = (u32)dw_uleb(d->line.data, d->line.size, &off); 311 break; 312 case DW_LNS_set_column: 313 st.column = (u32)dw_uleb(d->line.data, d->line.size, &off); 314 break; 315 case DW_LNS_negate_stmt: 316 st.is_stmt = !st.is_stmt; 317 break; 318 case DW_LNS_set_basic_block: 319 st.basic_block = 1; 320 break; 321 case DW_LNS_const_add_pc: { 322 u8 adj = (u8)(255 - h.opcode_base); 323 u8 op_adv = (u8)(adj / h.line_range); 324 st.address += op_adv * h.min_inst_len; 325 } break; 326 case DW_LNS_fixed_advance_pc: 327 st.address += dw_u16(d->line.data, d->line.size, &off); 328 st.op_index = 0; 329 break; 330 case DW_LNS_set_prologue_end: 331 st.prologue_end = 1; 332 break; 333 case DW_LNS_set_epilogue_begin: 334 st.epilogue_begin = 1; 335 break; 336 case DW_LNS_set_isa: 337 st.isa = (u32)dw_uleb(d->line.data, d->line.size, &off); 338 break; 339 default: { 340 /* Unknown standard opcode: skip its operands per 341 * std_opcode_lengths. */ 342 u32 nops = (op - 1u) < sizeof(h.std_opcode_lengths) 343 ? h.std_opcode_lengths[op - 1] 344 : 0; 345 u32 j; 346 for (j = 0; j < nops; ++j) 347 (void)dw_uleb(d->line.data, d->line.size, &off); 348 } break; 349 } 350 } else { 351 /* special opcode */ 352 u32 adj = (u32)(op - h.opcode_base); 353 u32 op_adv = adj / h.line_range; 354 i32 line_inc = (i32)h.line_base + (i32)(adj % h.line_range); 355 st.address += op_adv * h.min_inst_len; 356 st.line = (u32)((i32)st.line + line_inc); 357 rows_push(d, lp, &st); 358 st.basic_block = 0; 359 st.prologue_end = 0; 360 st.epilogue_begin = 0; 361 st.discriminator = 0; 362 } 363 } 364 365 /* Build file_norm lazily. */ 366 if (lp->file_norm) { 367 for (i = 0; i < lp->nfiles; ++i) { 368 lp->file_norm[i] = build_file_norm(d, lp, i); 369 } 370 } 371 } 372 373 /* Lookup helpers. Build all CU line tables on demand, walk each. */ 374 375 KitStatus kit_dwarf_addr_to_line(KitDebugInfo* d, uint64_t pc, 376 KitSlice* file_out, uint32_t* line_out, 377 uint32_t* col_out) { 378 /* Status codes: 379 * KIT_OK — PC has a line entry; outputs filled. 380 * KIT_NOT_FOUND — PC has no line entry (either inside a CU's 381 * coverage with no row, or outside every CU). */ 382 u32 i; 383 if (file_out) *file_out = KIT_SLICE_NULL; 384 if (line_out) *line_out = 0; 385 if (col_out) *col_out = 0; 386 if (!d) return KIT_INVALID; 387 for (i = 0; i < d->ncus; ++i) { 388 DwLineProgram* lp; 389 u32 j; 390 DwLineRow* best = NULL; 391 if (!d->lines_built[i]) dw_build_line(d, i); 392 lp = &d->lines_by_cu[i]; 393 /* A line row covers [row.address, next_row.address); an end_sequence 394 * row terminates a sequence and is not itself a coverage row. `pc` 395 * is covered by `best` only if it falls before the end_sequence that 396 * closes best's sequence — otherwise best is stale and pc lies in a 397 * gap (or in another CU). Without this bound, a CU whose code sits 398 * before pc would always claim it, so multi-CU images (one CU per 399 * linked input) resolved every address to the first input's CU. */ 400 for (j = 0; j < lp->nrows; ++j) { 401 DwLineRow* r = &lp->rows[j]; 402 if (r->end_sequence) { 403 /* `best` covers pc when pc is strictly inside the sequence 404 * (pc < end_sequence.address) or sits exactly on the last row's 405 * address (a zero-length final row, which some producers emit with 406 * end_sequence at the same address). The second clause must test 407 * best->address, not r->address: when two CUs abut (one input's 408 * code ends exactly where the next begins, common with a single 409 * contiguous .text), end_sequence.address equals the next CU's 410 * first pc, and `pc == end_sequence.address` alone would let the 411 * earlier CU swallow an address that belongs to the later one. */ 412 if (best && (pc < r->address || pc == best->address)) { 413 const char* f = ""; 414 if (best->file_index < lp->nfile_norm && lp->file_norm) 415 f = lp->file_norm[best->file_index]; 416 if (file_out) *file_out = kit_slice_cstr(f); 417 if (line_out) *line_out = best->line; 418 if (col_out) *col_out = best->column; 419 return KIT_OK; 420 } 421 best = NULL; /* sequence closed; pc not covered here */ 422 continue; 423 } 424 if (r->address <= pc) best = r; 425 } 426 } 427 return KIT_NOT_FOUND; 428 } 429 430 /* file_norm matches user-typed `file` if either it is exactly equal, or it 431 * ends with `/<file>`. Suffix matching keeps `b util.c:42` working when 432 * the DWARF file_norm is the absolute path the compiler saw. */ 433 static int dw_file_matches(const char* file_norm, const char* user, 434 size_t ulen) { 435 size_t flen; 436 if (!file_norm) return 0; 437 if (dw_streq(file_norm, user)) return 1; 438 flen = slice_from_cstr(file_norm).len; 439 if (flen <= ulen) return 0; 440 if (file_norm[flen - ulen - 1] != '/') return 0; 441 return memcmp(file_norm + flen - ulen, user, ulen) == 0; 442 } 443 444 KitStatus kit_dwarf_line_to_addr(KitDebugInfo* d, KitSlice file, uint32_t line, 445 uint64_t* pc_out) { 446 /* Status: 447 * KIT_OK — unique match; *pc_out filled. 448 * KIT_NOT_FOUND — file unknown, or known but no row at `line`. 449 * KIT_AMBIGUOUS — multiple distinct file paths match (file,line); 450 * *pc_out is the first hit, use 451 * kit_dwarf_line_to_addr_all to enumerate. */ 452 u32 i; 453 size_t ulen; 454 const char* first_path = NULL; 455 uint64_t first_pc = 0; 456 const char* alt_path = NULL; 457 int line_hits = 0; 458 if (pc_out) *pc_out = 0; 459 if (!d || !file.s) return KIT_INVALID; 460 ulen = file.len; 461 if (ulen == 0) return KIT_INVALID; 462 for (i = 0; i < d->ncus; ++i) { 463 DwLineProgram* lp; 464 u32 j; 465 if (!d->lines_built[i]) dw_build_line(d, i); 466 lp = &d->lines_by_cu[i]; 467 for (j = 0; j < lp->nrows; ++j) { 468 DwLineRow* r = &lp->rows[j]; 469 const char* f; 470 if (r->end_sequence) continue; 471 if (r->file_index >= lp->nfile_norm || !lp->file_norm) continue; 472 f = lp->file_norm[r->file_index]; 473 if (!dw_file_matches(f, file.s, ulen)) continue; 474 if (r->line != line) continue; 475 ++line_hits; 476 if (!first_path) { 477 first_path = f; 478 first_pc = r->address; 479 } else if (!alt_path && f != first_path && !dw_streq(f, first_path)) { 480 alt_path = f; 481 } 482 } 483 } 484 if (pc_out) *pc_out = first_pc; 485 if (alt_path) return KIT_AMBIGUOUS; 486 if (line_hits > 0) return KIT_OK; 487 return KIT_NOT_FOUND; 488 } 489 490 /* Enumerate all distinct candidate (pc, file_norm) pairs for the given 491 * (file, line) match. Caller-supplied `out` array is filled up to `cap`; 492 * `*n_out` receives the total candidate count (which may exceed cap, in 493 * which case only the first `cap` are written). Returns 0 on success 494 * (including 0 candidates), 1 on invalid args. Intended for REPL 495 * disambiguation after kit_dwarf_line_to_addr returns 3. */ 496 KitStatus kit_dwarf_line_to_addr_all(KitDebugInfo* d, KitSlice file, 497 uint32_t line, KitDwarfLineMatch* out, 498 uint32_t cap, uint32_t* n_out) { 499 /* One candidate per distinct file_norm path (not per PC). PC is the 500 * first matching row's address for that file_norm — i.e. the same PC 501 * that kit_dwarf_line_to_addr would have returned for that file. */ 502 u32 i; 503 size_t ulen; 504 uint32_t total = 0; 505 if (n_out) *n_out = 0; 506 if (!d || !file.s) return KIT_INVALID; 507 ulen = file.len; 508 if (ulen == 0) return KIT_INVALID; 509 for (i = 0; i < d->ncus; ++i) { 510 DwLineProgram* lp; 511 u32 j; 512 if (!d->lines_built[i]) dw_build_line(d, i); 513 lp = &d->lines_by_cu[i]; 514 for (j = 0; j < lp->nrows; ++j) { 515 DwLineRow* r = &lp->rows[j]; 516 const char* f; 517 uint32_t k; 518 int dup = 0; 519 if (r->end_sequence) continue; 520 if (r->line != line) continue; 521 if (r->file_index >= lp->nfile_norm || !lp->file_norm) continue; 522 f = lp->file_norm[r->file_index]; 523 if (!dw_file_matches(f, file.s, ulen)) continue; 524 /* Dedupe by file_norm path so the candidate list is one entry per 525 * source file even if the line has many per-instruction rows. */ 526 if (out) { 527 uint32_t lim = total < cap ? total : cap; 528 for (k = 0; k < lim; ++k) { 529 if (out[k].file.s == f || 530 (out[k].file.s && dw_streq(out[k].file.s, f))) { 531 dup = 1; 532 break; 533 } 534 } 535 } 536 if (dup) continue; 537 if (out && total < cap) { 538 out[total].pc = r->address; 539 out[total].file = kit_slice_cstr(f); 540 } 541 ++total; 542 } 543 } 544 if (n_out) *n_out = total; 545 return KIT_OK; 546 }