debug_emit.c (45997B)
1 /* Linearize accumulated Debug state into ObjBuilder .debug_* sections. 2 * 3 * Wire-format choices made here are documented in DWARF.md / the agent 4 * report. Highlights: 5 * 6 * - DWARF 5 only. 7 * - 32-bit (DWARF32) section length form. 8 * - DW_FORM_strx4 used uniformly for string refs from .debug_info. 9 * - DW_FORM_line_strp for line program file/dir paths. 10 * - DW_FORM_ref4 for intra-CU DIE refs (CU-relative offset). 11 * - DW_AT_low_pc encoded as DW_FORM_addr with R_ABS64 reloc against the 12 * function symbol; DW_AT_high_pc is DW_FORM_data4 holding func size. 13 * - DW_AT_frame_base is exprloc { DW_OP_call_frame_cfa }. 14 * - Abbrev codes are assigned in first-use order, starting at 1. 15 * - File 0 in .debug_line is the CU primary file (DW5 convention). */ 16 17 #include <string.h> 18 19 #include "arch/arch.h" 20 #include "core/buf.h" 21 #include "core/core.h" 22 #include "core/heap.h" 23 #include "core/pool.h" 24 #include "core/slice.h" 25 #include "core/vec.h" 26 #include "debug/debug_internal.h" 27 28 void abbrev_fini_heap(DebugAbbrevPool* p, Heap* h); 29 30 /* ---------------------------------------------------------------- */ 31 /* String tables. */ 32 33 typedef struct StrTab { 34 Buf buf; 35 SymToU32 by_sym; /* Sym → byte offset within buf */ 36 /* Insertion order — used to populate .debug_str_offsets. */ 37 Sym* syms; 38 u32 nsyms; 39 u32 syms_cap; 40 } StrTab; 41 42 static void str_init(StrTab* s, Heap* h) { 43 buf_init(&s->buf, h); 44 SymToU32_init(&s->by_sym, h); 45 s->syms = NULL; 46 s->nsyms = 0; 47 s->syms_cap = 0; 48 } 49 50 static void str_fini(StrTab* s, Heap* h) { 51 buf_fini(&s->buf); 52 SymToU32_fini(&s->by_sym); 53 if (s->syms) h->free(h, s->syms, sizeof(Sym) * s->syms_cap); 54 s->syms = NULL; 55 s->nsyms = 0; 56 s->syms_cap = 0; 57 } 58 59 static u32 str_intern(StrTab* s, Heap* h, Pool* pool, Sym sym) { 60 u32* found; 61 u32 ofs; 62 size_t len; 63 const char* str; 64 if (sym == 0) sym = pool_intern_slice(pool, SLICE_LIT("")); 65 found = SymToU32_get(&s->by_sym, sym); 66 if (found) return *found; 67 ofs = buf_pos(&s->buf); 68 { 69 Slice sl = pool_slice(pool, sym); 70 str = sl.s; 71 len = sl.len; 72 } 73 if (str && len) buf_write(&s->buf, str, len); 74 { 75 u8 nul = 0; 76 buf_write(&s->buf, &nul, 1); 77 } 78 SymToU32_set(&s->by_sym, sym, ofs); 79 if (VEC_GROW(h, s->syms, s->syms_cap, s->nsyms + 1)) return ofs; 80 s->syms[s->nsyms++] = sym; 81 return ofs; 82 } 83 84 static u32 str_index_of(StrTab* s, Sym sym) { 85 u32 i; 86 for (i = 0; i < s->nsyms; ++i) { 87 if (s->syms[i] == sym) return i; 88 } 89 return 0; 90 } 91 92 /* ---------------------------------------------------------------- */ 93 /* DIE forward refs and address relocs. */ 94 95 typedef struct DieFixup { 96 u32 buf_offset; /* offset within EmitCtx.info_body */ 97 DebugTypeId target; 98 } DieFixup; 99 100 typedef struct AddrReloc { 101 u32 buf_offset; /* offset within the section (assigned at flush) */ 102 ObjSymId sym; 103 ObjSecId section; /* set on flush */ 104 } AddrReloc; 105 106 typedef struct EmitCtx { 107 Debug* d; 108 Heap* heap; 109 Pool* pool; 110 ObjBuilder* ob; 111 112 StrTab str; /* .debug_str */ 113 StrTab line_str; /* .debug_line_str */ 114 115 DebugAbbrevPool abbr; 116 117 /* Pre-resolved abbrev codes */ 118 u32 abbr_cu; 119 u32 abbr_base; 120 u32 abbr_ptr; 121 u32 abbr_typedef; 122 u32 abbr_qual_const; 123 u32 abbr_qual_volatile; 124 u32 abbr_qual_restrict; 125 u32 abbr_array; 126 u32 abbr_array_subrange; 127 u32 abbr_array_subrange_unbounded; 128 u32 abbr_func_type; 129 u32 abbr_func_type_param; 130 u32 abbr_struct; 131 u32 abbr_union; 132 u32 abbr_member; 133 u32 abbr_enum; 134 u32 abbr_enum_val; 135 u32 abbr_subprogram; 136 u32 abbr_param; 137 u32 abbr_var; 138 u32 abbr_lexical_block; 139 140 /* CU body (post-CU-header DIE bytes). */ 141 Buf info_body; 142 143 /* Forward type-ref fixups (info_body-relative). */ 144 DieFixup* fixups; 145 u32 nfixups; 146 u32 fixups_cap; 147 148 /* low_pc relocs in .debug_info (info_body-relative offset). */ 149 AddrReloc* info_relocs; 150 u32 ninfo_relocs; 151 u32 info_relocs_cap; 152 153 /* line-program address relocs (.debug_line offset within program region). */ 154 AddrReloc* line_relocs; 155 u32 nline_relocs; 156 u32 line_relocs_cap; 157 158 /* aranges relocs (section-relative once we know offsets). */ 159 AddrReloc* aranges_relocs; 160 u32 naranges_relocs; 161 u32 aranges_relocs_cap; 162 163 /* rnglists relocs. */ 164 AddrReloc* rng_relocs; 165 u32 nrng_relocs; 166 u32 nrng_relocs_cap; 167 168 /* Section ids (pre-created up front so cross-section relocs can name 169 * their target before its bytes are written). */ 170 ObjSecId sec_str; 171 ObjSecId sec_line_str; 172 ObjSecId sec_str_off; 173 ObjSecId sec_abbrev; 174 ObjSecId sec_info; 175 ObjSecId sec_line; 176 ObjSecId sec_aranges; 177 ObjSecId sec_rnglists; 178 179 /* SK_SECTION ObjSyms over the same sections. They exist so the CU 180 * header + root DIE can encode cross-section offsets (debug_abbrev_offset, 181 * stmt_list, str_offsets_base, ranges) and the line / str-offsets 182 * payloads can encode their .debug_line_str / .debug_str references 183 * as relocations. The on-disk u32 stays zero; the relocation's addend 184 * carries the in-section offset. In a normal `.o` emit the linker 185 * applies R_ABS32 with S=section_vaddr=0 (debug sections are not laid 186 * out), so the written value equals the addend — byte-identical to the 187 * pre-reloc behaviour. In the JIT view, link_jit applies the same 188 * reloc against the section's accumulated prefix in the merged view, 189 * so concatenated multi-input debug bytes resolve to the right slot. */ 190 ObjSymId ssym_str; 191 ObjSymId ssym_line_str; 192 ObjSymId ssym_str_off; 193 ObjSymId ssym_abbrev; 194 ObjSymId ssym_line; 195 ObjSymId ssym_rnglists; 196 ObjSymId ssym_info; 197 198 /* Body-relative offsets of the three CU-root-DIE attributes whose 199 * payloads are cross-section offsets. Captured at the call sites in 200 * debug_emit() and consumed by emit_section_info() to emit R_ABS32 201 * relocs at cu_header_size + <at>. */ 202 u32 root_stmt_list_at; 203 u32 root_ranges_at; 204 u32 root_str_off_base_at; 205 } EmitCtx; 206 207 /* ---------------------------------------------------------------- */ 208 209 static void add_fixup(EmitCtx* e, u32 buf_offset, DebugTypeId target) { 210 DieFixup* fx; 211 if (VEC_GROW(e->heap, e->fixups, e->fixups_cap, e->nfixups + 1)) return; 212 fx = &e->fixups[e->nfixups++]; 213 fx->buf_offset = buf_offset; 214 fx->target = target; 215 } 216 217 static void add_info_reloc(EmitCtx* e, u32 buf_offset, ObjSymId sym) { 218 AddrReloc* r; 219 if (VEC_GROW(e->heap, e->info_relocs, e->info_relocs_cap, 220 e->ninfo_relocs + 1)) 221 return; 222 r = &e->info_relocs[e->ninfo_relocs++]; 223 r->buf_offset = buf_offset; 224 r->sym = sym; 225 r->section = OBJ_SEC_NONE; 226 } 227 228 static void add_line_reloc(EmitCtx* e, u32 buf_offset, ObjSymId sym) { 229 AddrReloc* r; 230 if (VEC_GROW(e->heap, e->line_relocs, e->line_relocs_cap, 231 e->nline_relocs + 1)) 232 return; 233 r = &e->line_relocs[e->nline_relocs++]; 234 r->buf_offset = buf_offset; 235 r->sym = sym; 236 r->section = OBJ_SEC_NONE; 237 } 238 239 static void add_aranges_reloc(EmitCtx* e, u32 buf_offset, ObjSymId sym) { 240 AddrReloc* r; 241 if (VEC_GROW(e->heap, e->aranges_relocs, e->aranges_relocs_cap, 242 e->naranges_relocs + 1)) 243 return; 244 r = &e->aranges_relocs[e->naranges_relocs++]; 245 r->buf_offset = buf_offset; 246 r->sym = sym; 247 r->section = OBJ_SEC_NONE; 248 } 249 250 static void add_rng_reloc(EmitCtx* e, u32 buf_offset, ObjSymId sym) { 251 AddrReloc* r; 252 if (VEC_GROW(e->heap, e->rng_relocs, e->nrng_relocs_cap, e->nrng_relocs + 1)) 253 return; 254 r = &e->rng_relocs[e->nrng_relocs++]; 255 r->buf_offset = buf_offset; 256 r->sym = sym; 257 r->section = OBJ_SEC_NONE; 258 } 259 260 /* ---------------------------------------------------------------- */ 261 /* String emit shortcuts. */ 262 263 static void emit_strx4(EmitCtx* e, Buf* b, Sym name) { 264 str_intern(&e->str, e->heap, e->pool, name); 265 { 266 Sym key = name ? name : pool_intern_slice(e->pool, SLICE_LIT("")); 267 u32 idx = str_index_of(&e->str, key); 268 form_u32(b, idx); 269 } 270 } 271 272 static u32 line_str_offset(EmitCtx* e, Sym sym) { 273 return str_intern(&e->line_str, e->heap, e->pool, sym); 274 } 275 276 /* ---------------------------------------------------------------- */ 277 /* Abbrev resolution. */ 278 279 static u32 abbr_intern(EmitCtx* e, u16 tag, u8 has_children, 280 const DebugAbbrevAttr* attrs, u32 nattrs) { 281 return abbrev_intern(&e->abbr, e->heap, tag, has_children, attrs, nattrs); 282 } 283 284 static void resolve_abbrevs(EmitCtx* e) { 285 /* Order of intern == order of code assignment. */ 286 { 287 DebugAbbrevAttr a[] = { 288 {DW_AT_producer, DW_FORM_strx4, 0}, 289 {DW_AT_language, DW_FORM_data2, 0}, 290 {DW_AT_name, DW_FORM_strx4, 0}, 291 {DW_AT_comp_dir, DW_FORM_strx4, 0}, 292 {DW_AT_stmt_list, DW_FORM_sec_offset, 0}, 293 {DW_AT_low_pc, DW_FORM_addr, 0}, 294 {DW_AT_ranges, DW_FORM_sec_offset, 0}, 295 {DW_AT_str_offsets_base, DW_FORM_sec_offset, 0}, 296 }; 297 e->abbr_cu = abbr_intern(e, DW_TAG_compile_unit, DW_CHILDREN_yes, a, 298 (u32)(sizeof(a) / sizeof(a[0]))); 299 } 300 { 301 DebugAbbrevAttr a[] = {{DW_AT_name, DW_FORM_strx4, 0}, 302 {DW_AT_encoding, DW_FORM_data1, 0}, 303 {DW_AT_byte_size, DW_FORM_data1, 0}}; 304 e->abbr_base = abbr_intern(e, DW_TAG_base_type, DW_CHILDREN_no, a, 3); 305 } 306 { 307 DebugAbbrevAttr a[] = {{DW_AT_byte_size, DW_FORM_data1, 0}, 308 {DW_AT_type, DW_FORM_ref4, 0}}; 309 e->abbr_ptr = abbr_intern(e, DW_TAG_pointer_type, DW_CHILDREN_no, a, 2); 310 } 311 { 312 DebugAbbrevAttr a[] = {{DW_AT_name, DW_FORM_strx4, 0}, 313 {DW_AT_type, DW_FORM_ref4, 0}}; 314 e->abbr_typedef = abbr_intern(e, DW_TAG_typedef, DW_CHILDREN_no, a, 2); 315 } 316 { 317 DebugAbbrevAttr a[] = {{DW_AT_type, DW_FORM_ref4, 0}}; 318 e->abbr_qual_const = 319 abbr_intern(e, DW_TAG_const_type, DW_CHILDREN_no, a, 1); 320 } 321 { 322 DebugAbbrevAttr a[] = {{DW_AT_type, DW_FORM_ref4, 0}}; 323 e->abbr_qual_volatile = 324 abbr_intern(e, DW_TAG_volatile_type, DW_CHILDREN_no, a, 1); 325 } 326 { 327 DebugAbbrevAttr a[] = {{DW_AT_type, DW_FORM_ref4, 0}}; 328 e->abbr_qual_restrict = 329 abbr_intern(e, DW_TAG_restrict_type, DW_CHILDREN_no, a, 1); 330 } 331 { 332 DebugAbbrevAttr a[] = {{DW_AT_type, DW_FORM_ref4, 0}}; 333 e->abbr_array = abbr_intern(e, DW_TAG_array_type, DW_CHILDREN_yes, a, 1); 334 } 335 { 336 DebugAbbrevAttr a[] = {{DW_AT_count, DW_FORM_udata, 0}}; 337 e->abbr_array_subrange = 338 abbr_intern(e, DW_TAG_subrange_type, DW_CHILDREN_no, a, 1); 339 } 340 { 341 e->abbr_array_subrange_unbounded = 342 abbr_intern(e, DW_TAG_subrange_type, DW_CHILDREN_no, NULL, 0); 343 } 344 { 345 DebugAbbrevAttr a[] = {{DW_AT_type, DW_FORM_ref4, 0}, 346 {DW_AT_prototyped, DW_FORM_flag_present, 0}}; 347 e->abbr_func_type = 348 abbr_intern(e, DW_TAG_subroutine_type, DW_CHILDREN_yes, a, 2); 349 } 350 { 351 DebugAbbrevAttr a[] = {{DW_AT_type, DW_FORM_ref4, 0}}; 352 e->abbr_func_type_param = 353 abbr_intern(e, DW_TAG_formal_parameter, DW_CHILDREN_no, a, 1); 354 } 355 { 356 DebugAbbrevAttr a[] = {{DW_AT_name, DW_FORM_strx4, 0}, 357 {DW_AT_byte_size, DW_FORM_udata, 0}}; 358 e->abbr_struct = 359 abbr_intern(e, DW_TAG_structure_type, DW_CHILDREN_yes, a, 2); 360 e->abbr_union = abbr_intern(e, DW_TAG_union_type, DW_CHILDREN_yes, a, 2); 361 } 362 { 363 DebugAbbrevAttr a[] = {{DW_AT_name, DW_FORM_strx4, 0}, 364 {DW_AT_type, DW_FORM_ref4, 0}, 365 {DW_AT_data_member_location, DW_FORM_udata, 0}}; 366 e->abbr_member = abbr_intern(e, DW_TAG_member, DW_CHILDREN_no, a, 3); 367 } 368 { 369 DebugAbbrevAttr a[] = {{DW_AT_name, DW_FORM_strx4, 0}, 370 {DW_AT_type, DW_FORM_ref4, 0}, 371 {DW_AT_byte_size, DW_FORM_udata, 0}}; 372 e->abbr_enum = 373 abbr_intern(e, DW_TAG_enumeration_type, DW_CHILDREN_yes, a, 3); 374 } 375 { 376 DebugAbbrevAttr a[] = {{DW_AT_name, DW_FORM_strx4, 0}, 377 {DW_AT_const_value, DW_FORM_sdata, 0}}; 378 e->abbr_enum_val = abbr_intern(e, DW_TAG_enumerator, DW_CHILDREN_no, a, 2); 379 } 380 { 381 /* Subprogram. We use a single abbrev with DW_AT_type even when 382 * return is void; emit_subprogram_die emits ref4=0 in that case 383 * (which the consumer interprets as void). */ 384 DebugAbbrevAttr a[] = {{DW_AT_external, DW_FORM_flag_present, 0}, 385 {DW_AT_name, DW_FORM_strx4, 0}, 386 {DW_AT_decl_file, DW_FORM_udata, 0}, 387 {DW_AT_decl_line, DW_FORM_udata, 0}, 388 {DW_AT_type, DW_FORM_ref4, 0}, 389 {DW_AT_low_pc, DW_FORM_addr, 0}, 390 {DW_AT_high_pc, DW_FORM_data4, 0}, 391 {DW_AT_frame_base, DW_FORM_exprloc, 0}}; 392 e->abbr_subprogram = 393 abbr_intern(e, DW_TAG_subprogram, DW_CHILDREN_yes, a, 8); 394 } 395 { 396 DebugAbbrevAttr a[] = {{DW_AT_name, DW_FORM_strx4, 0}, 397 {DW_AT_decl_file, DW_FORM_udata, 0}, 398 {DW_AT_decl_line, DW_FORM_udata, 0}, 399 {DW_AT_type, DW_FORM_ref4, 0}, 400 {DW_AT_location, DW_FORM_exprloc, 0}}; 401 e->abbr_param = 402 abbr_intern(e, DW_TAG_formal_parameter, DW_CHILDREN_no, a, 5); 403 e->abbr_var = abbr_intern(e, DW_TAG_variable, DW_CHILDREN_no, a, 5); 404 } 405 { 406 e->abbr_lexical_block = 407 abbr_intern(e, DW_TAG_lexical_block, DW_CHILDREN_yes, NULL, 0); 408 } 409 } 410 411 /* ---------------------------------------------------------------- */ 412 /* Per-type DIE emission. */ 413 414 static void emit_type_die(EmitCtx* e, DebugTypeId id); 415 416 static void emit_type_ref(EmitCtx* e, DebugTypeId tid) { 417 u32 ofs = buf_pos(&e->info_body); 418 u32 placeholder = 0; 419 buf_write(&e->info_body, &placeholder, 4); 420 if (tid != DEBUG_TYPE_NONE) { 421 add_fixup(e, ofs, tid); 422 } 423 } 424 425 static u8 base_enc(DebugBaseEncoding enc) { 426 switch (enc) { 427 case DEBUG_BE_BOOL: 428 return DW_ATE_boolean; 429 case DEBUG_BE_SIGNED: 430 return DW_ATE_signed; 431 case DEBUG_BE_UNSIGNED: 432 return DW_ATE_unsigned; 433 case DEBUG_BE_SIGNED_CHAR: 434 return DW_ATE_signed_char; 435 case DEBUG_BE_UNSIGNED_CHAR: 436 return DW_ATE_unsigned_char; 437 case DEBUG_BE_FLOAT: 438 return DW_ATE_float; 439 case DEBUG_BE_UTF: 440 return DW_ATE_UTF; 441 case DEBUG_BE_ADDRESS: 442 return DW_ATE_address; 443 } 444 return DW_ATE_signed; 445 } 446 447 static void emit_type_die(EmitCtx* e, DebugTypeId id) { 448 DebugType* t; 449 Debug* d = e->d; 450 if (id == DEBUG_TYPE_NONE || id > d->ntypes) return; 451 t = &d->types[id - 1]; 452 if (t->die_offset != 0) return; 453 switch ((DebugTypeKind)t->kind) { 454 case DTK_VOID: 455 /* No DIE — t->die_offset stays 0; refs will encode as 0 (consumer 456 * interprets as void). */ 457 return; 458 case DTK_BASE: 459 t->die_offset = buf_pos(&e->info_body); 460 form_uleb(&e->info_body, e->abbr_base); 461 emit_strx4(e, &e->info_body, t->name); 462 form_u8(&e->info_body, base_enc((DebugBaseEncoding)t->base_encoding)); 463 form_u8(&e->info_body, (u8)t->byte_size); 464 return; 465 case DTK_PTR: 466 t->die_offset = buf_pos(&e->info_body); 467 form_uleb(&e->info_body, e->abbr_ptr); 468 form_u8(&e->info_body, (u8)t->byte_size); 469 emit_type_ref(e, t->inner); 470 return; 471 case DTK_TYPEDEF: 472 t->die_offset = buf_pos(&e->info_body); 473 form_uleb(&e->info_body, e->abbr_typedef); 474 emit_strx4(e, &e->info_body, t->name); 475 emit_type_ref(e, t->inner); 476 return; 477 case DTK_CONST: 478 t->die_offset = buf_pos(&e->info_body); 479 form_uleb(&e->info_body, e->abbr_qual_const); 480 emit_type_ref(e, t->inner); 481 return; 482 case DTK_VOLATILE: 483 t->die_offset = buf_pos(&e->info_body); 484 form_uleb(&e->info_body, e->abbr_qual_volatile); 485 emit_type_ref(e, t->inner); 486 return; 487 case DTK_RESTRICT: 488 t->die_offset = buf_pos(&e->info_body); 489 form_uleb(&e->info_body, e->abbr_qual_restrict); 490 emit_type_ref(e, t->inner); 491 return; 492 case DTK_ARRAY: 493 t->die_offset = buf_pos(&e->info_body); 494 form_uleb(&e->info_body, e->abbr_array); 495 emit_type_ref(e, t->inner); 496 if (t->array_count) { 497 form_uleb(&e->info_body, e->abbr_array_subrange); 498 form_uleb(&e->info_body, t->array_count); 499 } else { 500 form_uleb(&e->info_body, e->abbr_array_subrange_unbounded); 501 } 502 form_uleb(&e->info_body, 0); 503 return; 504 case DTK_FUNC: { 505 u32 i; 506 t->die_offset = buf_pos(&e->info_body); 507 form_uleb(&e->info_body, e->abbr_func_type); 508 emit_type_ref(e, t->inner); 509 /* DW_AT_prototyped flag_present has no body */ 510 for (i = 0; i < t->nparams; ++i) { 511 form_uleb(&e->info_body, e->abbr_func_type_param); 512 emit_type_ref(e, t->params[i]); 513 } 514 form_uleb(&e->info_body, 0); 515 return; 516 } 517 case DTK_RECORD: { 518 u32 i; 519 t->die_offset = buf_pos(&e->info_body); 520 form_uleb(&e->info_body, t->is_union ? e->abbr_union : e->abbr_struct); 521 emit_strx4(e, &e->info_body, t->name); 522 form_uleb(&e->info_body, t->byte_size); 523 for (i = 0; i < t->nfields; ++i) { 524 DebugRecField* f = &t->fields[i]; 525 form_uleb(&e->info_body, e->abbr_member); 526 emit_strx4(e, &e->info_body, f->name); 527 emit_type_ref(e, f->type); 528 form_uleb(&e->info_body, f->byte_offset); 529 } 530 form_uleb(&e->info_body, 0); 531 return; 532 } 533 case DTK_ENUM: { 534 u32 i; 535 DebugType* base; 536 t->die_offset = buf_pos(&e->info_body); 537 form_uleb(&e->info_body, e->abbr_enum); 538 emit_strx4(e, &e->info_body, t->name); 539 emit_type_ref(e, t->inner); 540 base = (t->inner != DEBUG_TYPE_NONE && t->inner <= e->d->ntypes) 541 ? &e->d->types[t->inner - 1] 542 : NULL; 543 form_uleb(&e->info_body, base ? base->byte_size : 4); 544 for (i = 0; i < t->nenums; ++i) { 545 form_uleb(&e->info_body, e->abbr_enum_val); 546 emit_strx4(e, &e->info_body, t->enum_vals[i].name); 547 form_sleb(&e->info_body, t->enum_vals[i].value); 548 } 549 form_uleb(&e->info_body, 0); 550 return; 551 } 552 } 553 } 554 555 /* ---------------------------------------------------------------- */ 556 /* Variable / scope emission. */ 557 558 static void emit_var_loc_exprloc(EmitCtx* e, Buf* b, DebugVarLoc loc) { 559 u8 expr[32]; 560 u32 n = 0; 561 switch ((DebugVarLocKind)loc.kind) { 562 case DVL_REG: 563 if (loc.v.reg < 32) { 564 expr[n++] = (u8)(DW_OP_reg0 + loc.v.reg); 565 } else { 566 u64 v = loc.v.reg; 567 expr[n++] = DW_OP_regx; 568 while (v >= 0x80) { 569 expr[n++] = (u8)((v & 0x7f) | 0x80); 570 v >>= 7; 571 } 572 expr[n++] = (u8)v; 573 } 574 break; 575 case DVL_FRAME: { 576 i64 v = loc.v.frame_ofs; 577 int more = 1; 578 expr[n++] = DW_OP_fbreg; 579 while (more) { 580 u8 byte = (u8)(v & 0x7f); 581 v >>= 7; 582 if ((v == 0 && (byte & 0x40) == 0) || (v == -1 && (byte & 0x40) != 0)) { 583 more = 0; 584 } else { 585 byte |= 0x80; 586 } 587 expr[n++] = byte; 588 } 589 break; 590 } 591 case DVL_GLOBAL: { 592 /* DW_OP_addr <ptr_size>: relocation against the symbol. We can't 593 * place a section reloc inside an exprloc body without computing 594 * its absolute info-section offset post-emit. For Phase 1 we emit 595 * the literal symbol value as zero and trust that DVL_GLOBAL is 596 * not yet exercised by any harness case. Documented in the 597 * agent report as a Phase-1 limitation. */ 598 u32 i; 599 expr[n++] = DW_OP_addr; 600 for (i = 0; i < e->d->c->target.ptr_size; ++i) expr[n++] = 0; 601 (void)loc.v.global; 602 break; 603 } 604 case DVL_LOCLIST: 605 /* Phase 5: emit as DW_FORM_loclistx. Phase 1: empty expr. */ 606 break; 607 } 608 form_uleb(b, n); 609 buf_write(b, expr, n); 610 } 611 612 static void emit_var_die(EmitCtx* e, DebugVarDIE* v) { 613 u32 abbrev = v->is_param ? e->abbr_param : e->abbr_var; 614 v->die_offset = buf_pos(&e->info_body); 615 form_uleb(&e->info_body, abbrev); 616 emit_strx4(e, &e->info_body, v->name); 617 form_uleb(&e->info_body, debug_file(e->d, v->decl.file_id)); 618 form_uleb(&e->info_body, v->decl.line); 619 emit_type_ref(e, v->type); 620 emit_var_loc_exprloc(e, &e->info_body, v->loc); 621 } 622 623 static void emit_scope_subtree(EmitCtx* e, DebugFunc* f, i32 scope_idx); 624 625 static void emit_vars_in_scope(EmitCtx* e, DebugFunc* f, i32 scope_idx) { 626 u32 i; 627 for (i = 0; i < f->nvars; ++i) { 628 DebugVarDIE* v = &f->vars[i]; 629 if (v->is_param) continue; 630 if (v->scope_idx == scope_idx) emit_var_die(e, v); 631 } 632 { 633 u32 s; 634 for (s = 0; s < f->nscopes; ++s) { 635 if (f->scopes[s].parent_idx == scope_idx) { 636 emit_scope_subtree(e, f, (i32)s); 637 } 638 } 639 } 640 } 641 642 static void emit_scope_subtree(EmitCtx* e, DebugFunc* f, i32 scope_idx) { 643 f->scopes[scope_idx].die_offset = buf_pos(&e->info_body); 644 form_uleb(&e->info_body, e->abbr_lexical_block); 645 emit_vars_in_scope(e, f, scope_idx); 646 form_uleb(&e->info_body, 0); 647 } 648 649 static void emit_subprogram_die(EmitCtx* e, DebugFunc* f) { 650 const ObjSym* osym = obj_symbol_get(e->ob, f->sym); 651 Sym name = osym ? osym->name : 0; 652 u32 reloc_off; 653 u32 fn_size; 654 DebugTypeId ret_type = DEBUG_TYPE_NONE; 655 if (f->fn_type != DEBUG_TYPE_NONE && f->fn_type <= e->d->ntypes) { 656 DebugType* tt = &e->d->types[f->fn_type - 1]; 657 if (tt->kind == DTK_FUNC) ret_type = tt->inner; 658 } 659 f->die_offset = buf_pos(&e->info_body); 660 form_uleb(&e->info_body, e->abbr_subprogram); 661 /* DW_AT_external (flag_present, no body) */ 662 emit_strx4(e, &e->info_body, name); 663 form_uleb(&e->info_body, debug_file(e->d, f->decl.file_id)); 664 form_uleb(&e->info_body, f->decl.line); 665 emit_type_ref(e, ret_type); 666 reloc_off = buf_pos(&e->info_body); 667 { 668 u8 zero8[8] = {0}; 669 buf_write(&e->info_body, zero8, e->d->c->target.ptr_size); 670 } 671 add_info_reloc(e, reloc_off, f->sym); 672 fn_size = f->has_pc_range ? (f->end_ofs - f->begin_ofs) : 0; 673 form_u32(&e->info_body, fn_size); 674 { 675 u8 frame_expr[1] = {DW_OP_call_frame_cfa}; 676 form_uleb(&e->info_body, sizeof(frame_expr)); 677 buf_write(&e->info_body, frame_expr, sizeof(frame_expr)); 678 } 679 /* Children: params first, then top-level locals/scopes. */ 680 { 681 u32 i; 682 u32 emitted_params = 0; 683 for (i = 0; i < f->nvars; ++i) { 684 if (f->vars[i].is_param) { 685 emit_var_die(e, &f->vars[i]); 686 emitted_params++; 687 } 688 } 689 if (!emitted_params && f->fn_type != DEBUG_TYPE_NONE && 690 f->fn_type <= e->d->ntypes) { 691 DebugType* tt = &e->d->types[f->fn_type - 1]; 692 if (tt->kind == DTK_FUNC) { 693 for (i = 0; i < tt->nparams; ++i) { 694 form_uleb(&e->info_body, e->abbr_func_type_param); 695 emit_type_ref(e, tt->params[i]); 696 } 697 } 698 } 699 emit_vars_in_scope(e, f, -1); 700 form_uleb(&e->info_body, 0); 701 } 702 } 703 704 /* ---------------------------------------------------------------- */ 705 /* Section flushing. */ 706 707 static ObjSecId mk_section(EmitCtx* e, const char* name) { 708 Sym n = pool_intern_slice(e->pool, slice_from_cstr(name)); 709 return obj_section(e->ob, n, SEC_DEBUG, 0, 1); 710 } 711 712 /* Pre-create one SK_SECTION ObjSym pointing at `sec`. Section symbols are 713 * nameless (Sym 0); identity is the section_id they reference. SB_LOCAL 714 * because section symbols are always local in ELF/Mach-O. */ 715 static ObjSymId mk_section_sym(EmitCtx* e, ObjSecId sec) { 716 return obj_symbol(e->ob, 0, SB_LOCAL, SK_SECTION, sec, 0, 0); 717 } 718 719 static void flatten_to_section(EmitCtx* e, ObjSecId sec, const Buf* src) { 720 u32 total = buf_pos(src); 721 if (total == 0) return; 722 { 723 u8* dst = obj_reserve(e->ob, sec, total); 724 if (!dst) return; 725 buf_flatten(src, dst); 726 } 727 } 728 729 static void emit_section_str(EmitCtx* e) { 730 flatten_to_section(e, e->sec_str, &e->str.buf); 731 } 732 733 static void emit_section_line_str(EmitCtx* e) { 734 flatten_to_section(e, e->sec_line_str, &e->line_str.buf); 735 } 736 737 static void emit_section_str_offsets(EmitCtx* e) { 738 Buf b; 739 u32 i; 740 u32 unit_length; 741 u32 entries_off; /* byte offset of first entry within the section */ 742 buf_init(&b, e->heap); 743 unit_length = 4 + e->str.nsyms * 4; /* version+pad + N*4 */ 744 form_u32(&b, unit_length); 745 form_u16(&b, 5); 746 form_u16(&b, 0); 747 entries_off = buf_pos(&b); 748 for (i = 0; i < e->str.nsyms; ++i) { 749 /* Write the literal offset (so a bare-`.o` reader that doesn't apply 750 * relocs still sees the correct value), and *also* emit an R_ABS32 751 * reloc against .debug_str with addend = same literal. When the 752 * linker or JIT view-builder applies the reloc it overwrites the 753 * slot with S + addend; for a normal `.o` (debug sections not laid 754 * out) S=0 so the value is unchanged, and for the concatenated JIT 755 * view S = view-prefix into .debug_str so the slot picks up the 756 * right per-input offset. */ 757 u32* ofs = SymToU32_get(&e->str.by_sym, e->str.syms[i]); 758 form_u32(&b, ofs ? *ofs : 0); 759 } 760 flatten_to_section(e, e->sec_str_off, &b); 761 for (i = 0; i < e->str.nsyms; ++i) { 762 u32* ofs = SymToU32_get(&e->str.by_sym, e->str.syms[i]); 763 obj_reloc(e->ob, e->sec_str_off, entries_off + i * 4u, R_ABS32, e->ssym_str, 764 (i64)(ofs ? *ofs : 0)); 765 } 766 buf_fini(&b); 767 } 768 769 static void emit_section_abbrev(EmitCtx* e) { 770 Buf b; 771 buf_init(&b, e->heap); 772 abbrev_encode(&e->abbr, &b); 773 flatten_to_section(e, e->sec_abbrev, &b); 774 buf_fini(&b); 775 } 776 777 static ArchDwarfOps debug_dwarf_ops(const Debug* d) { 778 const ArchImpl* arch = arch_for_compiler(d ? d->c : NULL); 779 ArchDwarfOps ops; 780 ops.min_inst_len = 1u; 781 ops.max_ops_per_inst = 1u; 782 ops.pad[0] = 0; 783 ops.pad[1] = 0; 784 if (arch && arch->dwarf) { 785 if (arch->dwarf->min_inst_len) ops.min_inst_len = arch->dwarf->min_inst_len; 786 if (arch->dwarf->max_ops_per_inst) 787 ops.max_ops_per_inst = arch->dwarf->max_ops_per_inst; 788 } 789 return ops; 790 } 791 792 static void line_advance_pc(Buf* prog, u32 byte_delta, u32 min_inst_len) { 793 if (byte_delta == 0) return; 794 if (min_inst_len == 0) min_inst_len = 1; 795 if ((byte_delta % min_inst_len) == 0) { 796 form_u8(prog, DW_LNS_advance_pc); 797 form_uleb(prog, byte_delta / min_inst_len); 798 return; 799 } 800 801 while (byte_delta > 0xffffu) { 802 form_u8(prog, DW_LNS_fixed_advance_pc); 803 form_u16(prog, 0xffffu); 804 byte_delta -= 0xffffu; 805 } 806 form_u8(prog, DW_LNS_fixed_advance_pc); 807 form_u16(prog, (u16)byte_delta); 808 } 809 810 /* .debug_line program emission. 811 * 812 * Header layout (32-bit DWARF5): 813 * unit_length u32 814 * version u16 = 5 815 * address_size u8 816 * segment_selector_sz u8 817 * header_length u32 (excludes itself + earlier header fields) 818 * ... 819 * 820 * We emit, then track the program-start byte offset within the section so 821 * we can place address relocations. */ 822 static void emit_section_line(EmitCtx* e) { 823 Buf prog; 824 Buf hdr_body; /* header from min_inst_length onward */ 825 Buf out; 826 Pool* pool = e->pool; 827 u32 i, j; 828 u32 dir_count; 829 Sym* dirs = NULL; 830 u32 ndirs = 0, dirs_cap = 0; 831 ArchDwarfOps dwarf_ops = debug_dwarf_ops(e->d); 832 const u32 min_inst_len = dwarf_ops.min_inst_len ? dwarf_ops.min_inst_len : 1u; 833 const u32 max_ops_per_inst = 834 dwarf_ops.max_ops_per_inst ? dwarf_ops.max_ops_per_inst : 1u; 835 /* Pending line_strp relocs. Each slot is a u32 in hdr_body at 836 * `slot[k].at` with addend `slot[k].ofs` (the resolved .debug_line_str 837 * offset). Translated to section offsets and turned into R_ABS32 838 * relocs against e->ssym_line_str after we know hdr_body's location 839 * within the section. */ 840 struct LineStrpSlot { 841 u32 at; 842 u32 ofs; 843 }* lsp_slots = NULL; 844 u32 nlsp = 0, lsp_cap = 0; 845 846 buf_init(&prog, e->heap); 847 buf_init(&hdr_body, e->heap); 848 buf_init(&out, e->heap); 849 850 /* Build the program first (so we know its length). */ 851 for (i = 0; i < e->d->nfuncs; ++i) { 852 DebugFunc* f = &e->d->funcs[i]; 853 LineRow* prev = NULL; 854 u8 addr_size; 855 if (!f->has_pc_range) continue; 856 addr_size = e->d->c->target.ptr_size; 857 /* DW_LNE_set_address */ 858 form_u8(&prog, 0); 859 form_uleb(&prog, 1 + addr_size); 860 form_u8(&prog, DW_LNE_set_address); 861 { 862 u32 buf_ofs = buf_pos(&prog); 863 u8 zeros[8] = {0}; 864 buf_write(&prog, zeros, addr_size); 865 add_line_reloc(e, buf_ofs, f->sym); 866 } 867 for (j = 0; j < f->nrows; ++j) { 868 LineRow* r = &f->rows[j]; 869 u32 dwfile = debug_file(e->d, r->loc.file_id); 870 i64 prev_line = prev ? prev->loc.line : 1; 871 u32 prev_offset = prev ? prev->offset : f->begin_ofs; 872 u32 pc_delta = r->offset - prev_offset; 873 i64 line_delta; 874 if (!prev || prev->loc.file_id != r->loc.file_id) { 875 form_u8(&prog, DW_LNS_set_file); 876 form_uleb(&prog, dwfile); 877 } 878 if (r->loc.col != (prev ? prev->loc.col : 0)) { 879 form_u8(&prog, DW_LNS_set_column); 880 form_uleb(&prog, r->loc.col); 881 } 882 if (pc_delta != 0) { 883 line_advance_pc(&prog, pc_delta, min_inst_len); 884 } 885 line_delta = (i64)r->loc.line - prev_line; 886 if (line_delta != 0) { 887 form_u8(&prog, DW_LNS_advance_line); 888 form_sleb(&prog, line_delta); 889 } 890 form_u8(&prog, DW_LNS_copy); 891 prev = r; 892 } 893 /* advance to function end before end_sequence */ 894 { 895 u32 last = prev ? prev->offset : f->begin_ofs; 896 u32 delta = f->end_ofs - last; 897 if (delta != 0) { 898 line_advance_pc(&prog, delta, min_inst_len); 899 } 900 } 901 form_u8(&prog, 0); 902 form_uleb(&prog, 1); 903 form_u8(&prog, DW_LNE_end_sequence); 904 } 905 906 /* Build header body (from min_inst_length onward). */ 907 form_u8(&hdr_body, (u8)min_inst_len); /* min_inst_length */ 908 form_u8(&hdr_body, (u8)max_ops_per_inst); /* max_ops_per_inst */ 909 form_u8(&hdr_body, 1); /* default_is_stmt = 1 */ 910 form_u8(&hdr_body, (u8)(i8)-5); /* line_base */ 911 form_u8(&hdr_body, 14); /* line_range */ 912 form_u8(&hdr_body, 13); /* opcode_base = #standard ops + 1 */ 913 /* DWARF 5 standard_opcode_lengths for opcodes 1..12 */ 914 { 915 u8 lens[12]; 916 lens[0] = 0; /* copy */ 917 lens[1] = 1; /* advance_pc */ 918 lens[2] = 1; /* advance_line */ 919 lens[3] = 1; /* set_file */ 920 lens[4] = 1; /* set_column */ 921 lens[5] = 0; /* negate_stmt */ 922 lens[6] = 0; /* set_basic_block */ 923 lens[7] = 0; /* const_add_pc */ 924 lens[8] = 1; /* fixed_advance_pc */ 925 lens[9] = 0; /* set_prologue_end */ 926 lens[10] = 0; /* set_epilogue_begin */ 927 lens[11] = 1; /* set_isa */ 928 buf_write(&hdr_body, lens, 12); 929 } 930 /* directories */ 931 form_u8(&hdr_body, 1); 932 form_uleb(&hdr_body, DW_LNCT_path); 933 form_uleb(&hdr_body, DW_FORM_line_strp); 934 /* dedup directories; index 0 is primary file's dir. */ 935 if (e->d->nfiles > 0) { 936 if (!VEC_GROW(e->heap, dirs, dirs_cap, ndirs + 1)) 937 dirs[ndirs++] = e->d->files[0].dir; 938 } else { 939 if (!VEC_GROW(e->heap, dirs, dirs_cap, ndirs + 1)) 940 dirs[ndirs++] = pool_intern_slice(pool, SLICE_LIT("")); 941 } 942 for (i = 1; i < e->d->nfiles; ++i) { 943 Sym dir = e->d->files[i].dir; 944 u32 di; 945 int found = 0; 946 for (di = 0; di < ndirs; ++di) { 947 if (dirs[di] == dir) { 948 found = 1; 949 break; 950 } 951 } 952 if (!found) { 953 if (!VEC_GROW(e->heap, dirs, dirs_cap, ndirs + 1)) dirs[ndirs++] = dir; 954 } 955 } 956 dir_count = ndirs; 957 form_uleb(&hdr_body, dir_count); 958 for (i = 0; i < dir_count; ++i) { 959 u32 at = buf_pos(&hdr_body); 960 u32 ofs = line_str_offset(e, dirs[i]); 961 form_u32(&hdr_body, ofs); /* literal; also bound to a reloc below */ 962 if (!VEC_GROW(e->heap, lsp_slots, lsp_cap, nlsp + 1)) { 963 lsp_slots[nlsp].at = at; 964 lsp_slots[nlsp].ofs = ofs; 965 nlsp++; 966 } 967 } 968 969 /* file_name_entry_format: 2 entries */ 970 form_u8(&hdr_body, 2); 971 form_uleb(&hdr_body, DW_LNCT_path); 972 form_uleb(&hdr_body, DW_FORM_line_strp); 973 form_uleb(&hdr_body, DW_LNCT_directory_index); 974 form_uleb(&hdr_body, DW_FORM_udata); 975 976 if (e->d->nfiles == 0) { 977 u32 at; 978 u32 ofs; 979 form_uleb(&hdr_body, 1); 980 at = buf_pos(&hdr_body); 981 ofs = line_str_offset(e, pool_intern_slice(pool, SLICE_LIT(""))); 982 form_u32(&hdr_body, ofs); 983 if (!VEC_GROW(e->heap, lsp_slots, lsp_cap, nlsp + 1)) { 984 lsp_slots[nlsp].at = at; 985 lsp_slots[nlsp].ofs = ofs; 986 nlsp++; 987 } 988 form_uleb(&hdr_body, 0); 989 } else { 990 form_uleb(&hdr_body, e->d->nfiles); 991 for (i = 0; i < e->d->nfiles; ++i) { 992 DebugFile* df = &e->d->files[i]; 993 u32 di; 994 u32 at = buf_pos(&hdr_body); 995 u32 ofs = line_str_offset(e, df->base); 996 form_u32(&hdr_body, ofs); 997 if (!VEC_GROW(e->heap, lsp_slots, lsp_cap, nlsp + 1)) { 998 lsp_slots[nlsp].at = at; 999 lsp_slots[nlsp].ofs = ofs; 1000 nlsp++; 1001 } 1002 for (di = 0; di < ndirs; ++di) { 1003 if (dirs[di] == df->dir) break; 1004 } 1005 form_uleb(&hdr_body, di < ndirs ? di : 0); 1006 } 1007 } 1008 1009 if (dirs) e->heap->free(e->heap, dirs, sizeof(Sym) * dirs_cap); 1010 1011 /* Compose final section bytes: unit-length header + hdr_body + program. */ 1012 { 1013 u32 hl = buf_pos(&hdr_body); 1014 u32 plen = buf_pos(&prog); 1015 /* unit_length = (everything after the unit_length field itself) */ 1016 u32 unit_length = 2 + 1 + 1 + 4 + hl + plen; 1017 u8 addr_size = e->d->c->target.ptr_size; 1018 form_u32(&out, unit_length); 1019 form_u16(&out, 5); 1020 form_u8(&out, addr_size); 1021 form_u8(&out, 0); 1022 form_u32(&out, hl); 1023 /* Append hdr_body bytes */ 1024 { 1025 u8* tmp = (u8*)e->heap->alloc(e->heap, hl ? hl : 1, 1); 1026 if (tmp && hl) { 1027 buf_flatten(&hdr_body, tmp); 1028 buf_write(&out, tmp, hl); 1029 } 1030 if (tmp) e->heap->free(e->heap, tmp, hl ? hl : 1); 1031 } 1032 /* Append program bytes */ 1033 { 1034 u8* tmp = (u8*)e->heap->alloc(e->heap, plen ? plen : 1, 1); 1035 if (tmp && plen) { 1036 buf_flatten(&prog, tmp); 1037 buf_write(&out, tmp, plen); 1038 } 1039 if (tmp) e->heap->free(e->heap, tmp, plen ? plen : 1); 1040 } 1041 flatten_to_section(e, e->sec_line, &out); 1042 /* program-start in section bytes = 12 (unit_length+ver+addr+seg+hl) + hl. 1043 * hdr_body sits at section offset 12 (right after the unit header), 1044 * so a line_strp slot at hdr_body offset `at` is at section offset 1045 * `12 + at`. */ 1046 { 1047 u32 prog_start = 12 + hl; 1048 u32 hdr_start = 12; 1049 u32 k; 1050 for (k = 0; k < e->nline_relocs; ++k) { 1051 obj_reloc(e->ob, e->sec_line, prog_start + e->line_relocs[k].buf_offset, 1052 R_ABS64, e->line_relocs[k].sym, 0); 1053 } 1054 for (k = 0; k < nlsp; ++k) { 1055 obj_reloc(e->ob, e->sec_line, hdr_start + lsp_slots[k].at, R_ABS32, 1056 e->ssym_line_str, (i64)lsp_slots[k].ofs); 1057 } 1058 } 1059 } 1060 if (lsp_slots) 1061 e->heap->free(e->heap, lsp_slots, sizeof(*lsp_slots) * lsp_cap); 1062 buf_fini(&prog); 1063 buf_fini(&hdr_body); 1064 buf_fini(&out); 1065 } 1066 1067 /* .debug_aranges */ 1068 static void emit_section_aranges(EmitCtx* e) { 1069 Buf b; 1070 u32 i; 1071 u32 unit_length; 1072 u8 addr_size = e->d->c->target.ptr_size; 1073 u32 body_start; 1074 u32 padding; 1075 buf_init(&b, e->heap); 1076 form_u32(&b, 0); /* unit_length placeholder */ 1077 form_u16(&b, 2); /* aranges version */ 1078 form_u32(&b, 0); /* debug_info_offset — filled by R_ABS32 reloc below */ 1079 form_u8(&b, addr_size); 1080 form_u8(&b, 0); 1081 body_start = buf_pos(&b); 1082 /* Tuples are aligned to 2*addr_size from the section start. */ 1083 { 1084 u32 align = (u32)addr_size * 2; 1085 u32 mod = body_start % align; 1086 padding = mod ? (align - mod) : 0; 1087 while (padding--) { 1088 u8 z = 0; 1089 buf_write(&b, &z, 1); 1090 } 1091 } 1092 for (i = 0; i < e->d->nfuncs; ++i) { 1093 DebugFunc* f = &e->d->funcs[i]; 1094 if (!f->has_pc_range) continue; 1095 { 1096 u32 reloc_at = buf_pos(&b); 1097 u8 zeros[8] = {0}; 1098 buf_write(&b, zeros, addr_size); 1099 add_aranges_reloc(e, reloc_at, f->sym); 1100 } 1101 { 1102 u32 fn_size = f->end_ofs - f->begin_ofs; 1103 if (addr_size == 8) 1104 form_u64(&b, fn_size); 1105 else 1106 form_u32(&b, fn_size); 1107 } 1108 } 1109 /* Terminator (zero, zero) */ 1110 { 1111 u8 zeros[16] = {0}; 1112 buf_write(&b, zeros, addr_size * 2); 1113 } 1114 unit_length = buf_pos(&b) - 4; 1115 { 1116 u8 le[4]; 1117 le[0] = (u8)(unit_length & 0xff); 1118 le[1] = (u8)((unit_length >> 8) & 0xff); 1119 le[2] = (u8)((unit_length >> 16) & 0xff); 1120 le[3] = (u8)((unit_length >> 24) & 0xff); 1121 buf_patch(&b, 0, le, 4); 1122 } 1123 flatten_to_section(e, e->sec_aranges, &b); 1124 /* debug_info_offset (header byte 6) points at this CU within 1125 * .debug_info. Emit it as a section-relative R_ABS32 against the 1126 * .debug_info section symbol (addend 0 — one CU per object at 1127 * offset 0) so that when the linker / JIT view concatenate multiple 1128 * inputs, each aranges unit is rebased to its CU's merged offset. 1129 * Without this every unit would keep offset 0 and addr2line would 1130 * map all addresses to the first input's CU. */ 1131 obj_reloc(e->ob, e->sec_aranges, 6u, R_ABS32, e->ssym_info, 0); 1132 for (i = 0; i < e->naranges_relocs; ++i) { 1133 obj_reloc(e->ob, e->sec_aranges, e->aranges_relocs[i].buf_offset, R_ABS64, 1134 e->aranges_relocs[i].sym, 0); 1135 } 1136 buf_fini(&b); 1137 } 1138 1139 /* .debug_rnglists */ 1140 static void emit_section_rnglists(EmitCtx* e) { 1141 Buf b; 1142 u32 unit_length; 1143 u32 i; 1144 u8 addr_size = e->d->c->target.ptr_size; 1145 buf_init(&b, e->heap); 1146 form_u32(&b, 0); /* placeholder unit_length */ 1147 form_u16(&b, 5); 1148 form_u8(&b, addr_size); 1149 form_u8(&b, 0); 1150 form_u32(&b, 0); /* offset_entry_count */ 1151 for (i = 0; i < e->d->nfuncs; ++i) { 1152 DebugFunc* f = &e->d->funcs[i]; 1153 if (!f->has_pc_range) continue; 1154 form_u8(&b, DW_RLE_start_length); 1155 { 1156 u32 reloc_at = buf_pos(&b); 1157 u8 zeros[8] = {0}; 1158 buf_write(&b, zeros, addr_size); 1159 add_rng_reloc(e, reloc_at, f->sym); 1160 } 1161 form_uleb(&b, f->end_ofs - f->begin_ofs); 1162 } 1163 form_u8(&b, DW_RLE_end_of_list); 1164 unit_length = buf_pos(&b) - 4; 1165 { 1166 u8 le[4]; 1167 le[0] = (u8)(unit_length & 0xff); 1168 le[1] = (u8)((unit_length >> 8) & 0xff); 1169 le[2] = (u8)((unit_length >> 16) & 0xff); 1170 le[3] = (u8)((unit_length >> 24) & 0xff); 1171 buf_patch(&b, 0, le, 4); 1172 } 1173 flatten_to_section(e, e->sec_rnglists, &b); 1174 for (i = 0; i < e->nrng_relocs; ++i) { 1175 obj_reloc(e->ob, e->sec_rnglists, e->rng_relocs[i].buf_offset, R_ABS64, 1176 e->rng_relocs[i].sym, 0); 1177 } 1178 buf_fini(&b); 1179 } 1180 1181 /* .debug_info: prepend CU header, append body, apply relocs and fixups. */ 1182 static void emit_section_info(EmitCtx* e) { 1183 Buf out; 1184 u32 cu_header_size = 12; 1185 u32 body_size = buf_pos(&e->info_body); 1186 u32 unit_length = cu_header_size - 4 + body_size; 1187 buf_init(&out, e->heap); 1188 form_u32(&out, unit_length); 1189 form_u16(&out, 5); 1190 form_u8(&out, DW_UT_compile); 1191 form_u8(&out, e->d->c->target.ptr_size); 1192 form_u32(&out, 0); /* debug_abbrev_offset — filled by R_ABS32 reloc below */ 1193 /* Append body */ 1194 { 1195 u32 plen = body_size; 1196 u8* tmp = (u8*)e->heap->alloc(e->heap, plen ? plen : 1, 1); 1197 if (tmp && plen) { 1198 buf_flatten(&e->info_body, tmp); 1199 buf_write(&out, tmp, plen); 1200 } 1201 if (tmp) e->heap->free(e->heap, tmp, plen ? plen : 1); 1202 } 1203 flatten_to_section(e, e->sec_info, &out); 1204 /* CU header cross-section refs: debug_abbrev_offset at byte 8. Root 1205 * DIE cross-section refs (stmt_list / ranges / str_offsets_base) live 1206 * at body offsets captured during CU-body construction; section offset 1207 * is cu_header_size + body offset. Addend carries the in-target 1208 * offset (0 for abbrev/stmt_list, 12 for rnglists past its header, 8 1209 * for str_offsets past its header). */ 1210 obj_reloc(e->ob, e->sec_info, 8u, R_ABS32, e->ssym_abbrev, 0); 1211 obj_reloc(e->ob, e->sec_info, cu_header_size + e->root_stmt_list_at, R_ABS32, 1212 e->ssym_line, 0); 1213 obj_reloc(e->ob, e->sec_info, cu_header_size + e->root_ranges_at, R_ABS32, 1214 e->ssym_rnglists, 12); 1215 obj_reloc(e->ob, e->sec_info, cu_header_size + e->root_str_off_base_at, 1216 R_ABS32, e->ssym_str_off, 8); 1217 /* Apply forward DIE refs (DW_FORM_ref4 = CU-relative, where the CU 1218 * starts at the unit_length field. body offset 0 is at section 1219 * offset cu_header_size = 12 (post-header, post-unit_length). DW5 1220 * ref4 is unit-relative, i.e. distance from the start of the unit 1221 * (i.e. the unit_length field itself), so the on-disk u32 stored is 1222 * cu_header_size + target_body_offset. */ 1223 { 1224 u32 i; 1225 for (i = 0; i < e->nfixups; ++i) { 1226 DieFixup* fx = &e->fixups[i]; 1227 DebugType* tt = 1228 (fx->target != DEBUG_TYPE_NONE && fx->target <= e->d->ntypes) 1229 ? &e->d->types[fx->target - 1] 1230 : NULL; 1231 u32 target_body_ofs = (tt && tt->die_offset) ? tt->die_offset : 0; 1232 u32 cu_relative = 1233 target_body_ofs ? (cu_header_size + target_body_ofs) : 0; 1234 u8 le[4]; 1235 le[0] = (u8)(cu_relative & 0xff); 1236 le[1] = (u8)((cu_relative >> 8) & 0xff); 1237 le[2] = (u8)((cu_relative >> 16) & 0xff); 1238 le[3] = (u8)((cu_relative >> 24) & 0xff); 1239 obj_patch(e->ob, e->sec_info, cu_header_size + fx->buf_offset, le, 4); 1240 } 1241 for (i = 0; i < e->ninfo_relocs; ++i) { 1242 obj_reloc(e->ob, e->sec_info, 1243 cu_header_size + e->info_relocs[i].buf_offset, R_ABS64, 1244 e->info_relocs[i].sym, 0); 1245 } 1246 } 1247 buf_fini(&out); 1248 } 1249 1250 /* ---------------------------------------------------------------- */ 1251 1252 void debug_emit(Debug* d) { 1253 EmitCtx ec; 1254 Pool* pool = d->c->global; 1255 Sym producer_sym; 1256 Sym primary_dir = 0, primary_base = 0; 1257 u32 i; 1258 1259 /* Zero out via memset on a sized chunk. Avoid forms that clang lowers 1260 * to bzero on this size. We zero with an explicit byte-loop fallback 1261 * to match the lib_deps allowlist (which forbids _bzero). */ 1262 { 1263 u8* p = (u8*)&ec; 1264 size_t k; 1265 for (k = 0; k < sizeof(ec); ++k) p[k] = 0; 1266 } 1267 ec.d = d; 1268 ec.heap = d->heap; 1269 ec.pool = pool; 1270 ec.ob = d->ob; 1271 buf_init(&ec.info_body, d->heap); 1272 str_init(&ec.str, d->heap); 1273 str_init(&ec.line_str, d->heap); 1274 abbrev_init(&ec.abbr, d->heap); 1275 1276 resolve_abbrevs(&ec); 1277 1278 /* Pre-create every debug section + a paired SK_SECTION ObjSym, before 1279 * any DIE/program payload is emitted. Cross-section relocations 1280 * (CU-header debug_abbrev_offset, root-DIE stmt_list / ranges / 1281 * str_offsets_base, .debug_line line_strp slots, .debug_str_offsets 1282 * entries) name these symbols, so they must exist by the time the 1283 * relocs are recorded. Section order in the output `.o` is fixed by 1284 * obj_section call order and matches the previous emission. */ 1285 ec.sec_abbrev = mk_section(&ec, ".debug_abbrev"); 1286 ec.sec_line = mk_section(&ec, ".debug_line"); 1287 ec.sec_aranges = mk_section(&ec, ".debug_aranges"); 1288 ec.sec_rnglists = mk_section(&ec, ".debug_rnglists"); 1289 ec.sec_info = mk_section(&ec, ".debug_info"); 1290 ec.sec_str = mk_section(&ec, ".debug_str"); 1291 ec.sec_line_str = mk_section(&ec, ".debug_line_str"); 1292 ec.sec_str_off = mk_section(&ec, ".debug_str_offsets"); 1293 ec.ssym_abbrev = mk_section_sym(&ec, ec.sec_abbrev); 1294 ec.ssym_line = mk_section_sym(&ec, ec.sec_line); 1295 ec.ssym_rnglists = mk_section_sym(&ec, ec.sec_rnglists); 1296 ec.ssym_str = mk_section_sym(&ec, ec.sec_str); 1297 ec.ssym_line_str = mk_section_sym(&ec, ec.sec_line_str); 1298 ec.ssym_str_off = mk_section_sym(&ec, ec.sec_str_off); 1299 ec.ssym_info = mk_section_sym(&ec, ec.sec_info); 1300 1301 producer_sym = pool_intern_slice(pool, SLICE_LIT("kit 0.1")); 1302 /* Ensure the CU's primary source file occupies file-table slot 0 before 1303 * we read it for DW_AT_name/comp_dir. debug_file() is otherwise first 1304 * invoked later (child-DIE decl_file / line program), so without this the 1305 * CU name and comp_dir come out empty. Seed it from the first function's 1306 * declaration site. */ 1307 if (d->nfiles == 0 && d->nfuncs > 0) { 1308 (void)debug_file(d, d->funcs[0].decl.file_id); 1309 } 1310 if (d->nfiles > 0) { 1311 primary_dir = d->files[0].dir; 1312 primary_base = d->files[0].base; 1313 } else { 1314 primary_dir = pool_intern_slice(pool, SLICE_LIT("")); 1315 primary_base = pool_intern_slice(pool, SLICE_LIT("")); 1316 } 1317 1318 /* CU root DIE */ 1319 form_uleb(&ec.info_body, ec.abbr_cu); 1320 emit_strx4(&ec, &ec.info_body, producer_sym); 1321 form_u16(&ec.info_body, DW_LANG_C11); 1322 emit_strx4(&ec, &ec.info_body, primary_base); 1323 emit_strx4(&ec, &ec.info_body, primary_dir); 1324 /* DW_AT_stmt_list → offset 0 in .debug_line. Write the literal so a 1325 * bare-`.o` reader still sees the correct value; the paired R_ABS32 1326 * reloc emitted in emit_section_info() overwrites the slot in the 1327 * JIT view path where multiple inputs' .debug_line sections are 1328 * concatenated. */ 1329 ec.root_stmt_list_at = buf_pos(&ec.info_body); 1330 form_u32(&ec.info_body, 0); 1331 { 1332 u8 z[8] = {0}; 1333 buf_write(&ec.info_body, z, d->c->target.ptr_size); 1334 } 1335 /* DW_AT_ranges → 12 bytes into .debug_rnglists, post header. */ 1336 ec.root_ranges_at = buf_pos(&ec.info_body); 1337 form_u32(&ec.info_body, 12); 1338 /* DW_AT_str_offsets_base → 8 bytes into .debug_str_offsets, post header. */ 1339 ec.root_str_off_base_at = buf_pos(&ec.info_body); 1340 form_u32(&ec.info_body, 8); 1341 1342 for (i = 0; i < d->ntypes; ++i) emit_type_die(&ec, (DebugTypeId)(i + 1)); 1343 for (i = 0; i < d->nfuncs; ++i) emit_subprogram_die(&ec, &d->funcs[i]); 1344 form_uleb(&ec.info_body, 0); /* end of CU children */ 1345 1346 /* Order: build sections that don't depend on later ones first. The str 1347 * tables are populated lazily during emission, so flush them last. */ 1348 emit_section_abbrev(&ec); 1349 emit_section_line(&ec); 1350 emit_section_aranges(&ec); 1351 emit_section_rnglists(&ec); 1352 emit_section_info(&ec); 1353 emit_section_str(&ec); 1354 emit_section_line_str(&ec); 1355 emit_section_str_offsets(&ec); 1356 1357 /* Cleanup */ 1358 buf_fini(&ec.info_body); 1359 str_fini(&ec.str, ec.heap); 1360 str_fini(&ec.line_str, ec.heap); 1361 abbrev_fini_heap(&ec.abbr, ec.heap); 1362 if (ec.fixups) 1363 ec.heap->free(ec.heap, ec.fixups, sizeof(DieFixup) * ec.fixups_cap); 1364 if (ec.info_relocs) 1365 ec.heap->free(ec.heap, ec.info_relocs, 1366 sizeof(AddrReloc) * ec.info_relocs_cap); 1367 if (ec.line_relocs) 1368 ec.heap->free(ec.heap, ec.line_relocs, 1369 sizeof(AddrReloc) * ec.line_relocs_cap); 1370 if (ec.aranges_relocs) 1371 ec.heap->free(ec.heap, ec.aranges_relocs, 1372 sizeof(AddrReloc) * ec.aranges_relocs_cap); 1373 if (ec.rng_relocs) 1374 ec.heap->free(ec.heap, ec.rng_relocs, 1375 sizeof(AddrReloc) * ec.nrng_relocs_cap); 1376 }