obj.c (44523B)
1 /* In-memory ObjBuilder. Section, symbol, group, and reloc storage all 2 * use segmented arrays (core/segvec.h) so the T* pointers obj_*_get 3 * returns stay valid as the table grows. Section bytes use the chunked 4 * Buf type. Index 0 of each id space is reserved as "none". 5 * 6 * obj_finalize is the read-side gate: post-finalize, write-side calls 7 * are still legal (the reader paths use them too) but consumers can 8 * count on the index spaces being stable. */ 9 10 #include "obj/obj.h" 11 12 #include <string.h> 13 14 #include "core/hashmap.h" 15 #include "core/heap.h" 16 #include "core/pool.h" 17 #include "core/segvec.h" 18 #include "core/vec.h" 19 20 SEGVEC_DEFINE(Sections, Section, 5); /* 32 entries per segment */ 21 SEGVEC_DEFINE(Symbols, ObjSym, 6); /* 64 entries per segment */ 22 23 /* name (interned Sym) -> first defining ObjSymId. A validated fast-path index 24 * for obj_symbol_find: the whole-program LTO builder holds every TU's symbols 25 * in one builder, so the historical linear scan is O(n^2) at decl time. The 26 * index stores the first id seen for a name (matching the scan's "first match" 27 * semantics); obj_symbol_find re-checks the hit's name and falls back to a 28 * linear scan if it is stale (after obj_symbol_rename), so it is always exact. */ 29 HASHMAP_DEFINE(SymNameIndex, Sym, ObjSymId, hash_u32); 30 SEGVEC_DEFINE(Relocs, Reloc, 6); /* 64 entries per segment */ 31 SEGVEC_DEFINE(Groups, ObjGroup, 3); /* 8 entries per segment */ 32 SEGVEC_DEFINE(Atoms, ObjAtom, 5); /* 32 entries per segment */ 33 34 /* COFF WEAK_EXTERNAL alias declaration: symbol `sym` is an alias for the 35 * symbol named `target`. Rare (only import-archive members and the like 36 * carry these), so a side-vector keyed by ObjSymId keeps ObjSym lean 37 * instead of growing every symbol. See obj_set_weak_alias. */ 38 typedef struct ObjWeakAlias { 39 ObjSymId sym; 40 Sym target; 41 } ObjWeakAlias; 42 SEGVEC_DEFINE(WeakAliases, ObjWeakAlias, 3); /* 8 entries per segment */ 43 44 #define OBJ_EXT_SLOT_COUNT 6 /* OBJ_EXT_NONE..OBJ_EXT_WASM_IMPORTS */ 45 46 typedef struct ObjExtSlot { 47 void* payload; 48 ObjExtFreeFn free_fn; 49 } ObjExtSlot; 50 51 struct KitObjBuilder { 52 Compiler* c; 53 Heap* heap; 54 Sections sections; /* index 0 reserved as "none" */ 55 Symbols symbols; /* index 0 reserved as "none" */ 56 Relocs relocs; /* flat across all sections; filtered on read */ 57 Groups groups; /* index 0 reserved as "none" */ 58 Atoms atoms; /* index 0 reserved as "none" */ 59 SymNameIndex sym_by_name; /* name -> first ObjSymId; accelerates find */ 60 /* Format-specific ELF e_flags. Set by read_elf to the input's 61 * e_flags (e.g. on RISC-V, EF_RISCV_RVC | EF_RISCV_FLOAT_ABI_DOUBLE); 62 * consumed by emit_elf to round-trip. Zero when not set — emit_elf 63 * derives a sensible default by arch. */ 64 u32 elf_e_flags; 65 u8 elf_e_flags_set; 66 /* COFF short-import annotation. Carries the providing DLL name when 67 * the builder was synthesized by read_coff from a Microsoft short 68 * import record; zero / unset otherwise. See obj_set_coff_import_dll. */ 69 Sym coff_import_dll; 70 u8 coff_import_dll_set; 71 /* COFF short-import IMPORT NAME override. The Microsoft short-import 72 * NameType field can make the name the loader resolves in the DLL differ 73 * from the local symbol name (NOPREFIX/UNDECORATE strip decoration; 74 * EXPORTAS stores an explicit export name). Carries that resolved import 75 * name when it differs from the symbol name; zero / unset otherwise. The 76 * local symbol keeps its own name so references still resolve; only the PE 77 * hint/name-table entry uses this. See obj_set_coff_import_name. */ 78 Sym coff_import_name; 79 u8 coff_import_name_set; 80 /* COFF WEAK_EXTERNAL alias declarations read from the input (symbol -> 81 * fallback/default symbol name). Empty on builders that carry none. See 82 * obj_set_weak_alias / obj_get_weak_alias. */ 83 WeakAliases weak_aliases; 84 /* Cached undef extern `__tlv_bootstrap` (Mach-O on-disk name) used by 85 * obj_define_tls when emitting `_Thread_local` storage on Mach-O. 86 * Lazily materialized on the first TLV emission; OBJ_SYM_NONE otherwise. */ 87 ObjSymId tlv_bootstrap_sym; 88 /* Format-specific extension payloads keyed by ObjExtKind. */ 89 ObjExtSlot ext[OBJ_EXT_SLOT_COUNT]; 90 /* Linked-image view (segments + dynamic info). NULL on relocatable 91 * inputs; lazily created by obj_image_ensure. See obj.h. */ 92 ObjImage* image; 93 }; 94 95 struct ObjSymIter { 96 const ObjBuilder* ob; 97 u32 idx; /* next index to return */ 98 }; 99 100 static void obj_image_free_(ObjBuilder*); 101 102 /* ---- lifecycle ---- */ 103 104 ObjBuilder* obj_new(Compiler* c) { 105 Heap* h = (Heap*)c->ctx->heap; 106 ObjBuilder* ob = (ObjBuilder*)h->alloc(h, sizeof(*ob), _Alignof(ObjBuilder)); 107 if (!ob) return NULL; 108 memset(ob, 0, sizeof(*ob)); 109 ob->c = c; 110 ob->heap = h; 111 Sections_init(&ob->sections, h); 112 Symbols_init(&ob->symbols, h); 113 Relocs_init(&ob->relocs, h); 114 Groups_init(&ob->groups, h); 115 Atoms_init(&ob->atoms, h); 116 SymNameIndex_init(&ob->sym_by_name, h); 117 WeakAliases_init(&ob->weak_aliases, h); 118 119 /* Reserve index 0 in each id space as the "none" sentinel. SegVec 120 * pushes are zeroed, so the sentinel slots have all-zero fields. */ 121 if (!Sections_push(&ob->sections, NULL) || 122 !Symbols_push(&ob->symbols, NULL) || !Groups_push(&ob->groups, NULL) || 123 !Atoms_push(&ob->atoms, NULL)) { 124 obj_free(ob); 125 return NULL; 126 } 127 return ob; 128 } 129 130 Compiler* obj_compiler(const ObjBuilder* ob) { return ob ? ob->c : NULL; } 131 132 /* Private accessors for the `_tlv_bootstrap` cache used by obj_define_tls. 133 * Lives in obj.c so the ObjBuilder field doesn't leak through obj.h. */ 134 ObjSymId obj_tlv_bootstrap_get(const ObjBuilder* ob) { 135 return ob ? ob->tlv_bootstrap_sym : OBJ_SYM_NONE; 136 } 137 void obj_tlv_bootstrap_set(ObjBuilder* ob, ObjSymId id) { 138 if (ob) ob->tlv_bootstrap_sym = id; 139 } 140 141 void obj_free(ObjBuilder* ob) { 142 u32 i, n; 143 if (!ob) return; 144 for (i = 0; i < OBJ_EXT_SLOT_COUNT; ++i) { 145 if (ob->ext[i].payload && ob->ext[i].free_fn) { 146 ob->ext[i].free_fn(ob->c, ob->ext[i].payload); 147 } 148 ob->ext[i].payload = NULL; 149 ob->ext[i].free_fn = NULL; 150 } 151 n = Sections_count(&ob->sections); 152 for (i = 1; i < n; ++i) { 153 Section* s = Sections_at(&ob->sections, i); 154 if (s) buf_fini(&s->bytes); 155 } 156 n = Groups_count(&ob->groups); 157 for (i = 1; i < n; ++i) { 158 ObjGroup* g = Groups_at(&ob->groups, i); 159 if (g && g->sections) { 160 ob->heap->free(ob->heap, g->sections, sizeof(ObjSecId) * g->nsections); 161 } 162 } 163 Sections_fini(&ob->sections); 164 Symbols_fini(&ob->symbols); 165 Relocs_fini(&ob->relocs); 166 Groups_fini(&ob->groups); 167 Atoms_fini(&ob->atoms); 168 SymNameIndex_fini(&ob->sym_by_name); 169 WeakAliases_fini(&ob->weak_aliases); 170 obj_image_free_(ob); 171 ob->heap->free(ob->heap, ob, sizeof(*ob)); 172 } 173 174 void obj_set_elf_e_flags(ObjBuilder* ob, u32 e_flags) { 175 if (!ob) return; 176 ob->elf_e_flags = e_flags; 177 ob->elf_e_flags_set = 1; 178 } 179 180 int obj_get_elf_e_flags(const ObjBuilder* ob, u32* out) { 181 if (!ob || !ob->elf_e_flags_set) return 0; 182 if (out) *out = ob->elf_e_flags; 183 return 1; 184 } 185 186 void obj_set_coff_import_dll(ObjBuilder* ob, Sym dll_name) { 187 if (!ob) return; 188 ob->coff_import_dll = dll_name; 189 ob->coff_import_dll_set = 1; 190 } 191 192 int obj_get_coff_import_dll(const ObjBuilder* ob, Sym* out) { 193 if (!ob || !ob->coff_import_dll_set) return 0; 194 if (out) *out = ob->coff_import_dll; 195 return 1; 196 } 197 198 void obj_set_coff_import_name(ObjBuilder* ob, Sym import_name) { 199 if (!ob) return; 200 ob->coff_import_name = import_name; 201 ob->coff_import_name_set = 1; 202 } 203 204 int obj_get_coff_import_name(const ObjBuilder* ob, Sym* out) { 205 if (!ob || !ob->coff_import_name_set) return 0; 206 if (out) *out = ob->coff_import_name; 207 return 1; 208 } 209 210 void obj_set_weak_alias(ObjBuilder* ob, ObjSymId sym, Sym target) { 211 if (!ob || sym == OBJ_SYM_NONE || target == 0) return; 212 /* Overwrite an existing entry for this sym rather than duplicating. */ 213 u32 n = WeakAliases_count(&ob->weak_aliases); 214 for (u32 i = 0; i < n; ++i) { 215 ObjWeakAlias* a = WeakAliases_at(&ob->weak_aliases, i); 216 if (a->sym == sym) { 217 a->target = target; 218 return; 219 } 220 } 221 ObjWeakAlias* slot = WeakAliases_push(&ob->weak_aliases, NULL); 222 if (!slot) return; /* OOM: alias recovery falls back to the name heuristic */ 223 slot->sym = sym; 224 slot->target = target; 225 } 226 227 Sym obj_get_weak_alias(const ObjBuilder* ob, ObjSymId sym) { 228 if (!ob || sym == OBJ_SYM_NONE) return 0; 229 u32 n = WeakAliases_count(&ob->weak_aliases); 230 for (u32 i = 0; i < n; ++i) { 231 const ObjWeakAlias* a = WeakAliases_at(&ob->weak_aliases, i); 232 if (a->sym == sym) return a->target; 233 } 234 return 0; 235 } 236 237 u32 obj_weak_alias_count(const ObjBuilder* ob) { 238 return ob ? WeakAliases_count(&ob->weak_aliases) : 0; 239 } 240 241 int obj_weak_alias_at(const ObjBuilder* ob, u32 i, ObjSymId* sym_out, 242 Sym* target_out) { 243 if (!ob || i >= WeakAliases_count(&ob->weak_aliases)) return 0; 244 const ObjWeakAlias* a = WeakAliases_at(&ob->weak_aliases, i); 245 if (sym_out) *sym_out = a->sym; 246 if (target_out) *target_out = a->target; 247 return 1; 248 } 249 250 /* ---- linked-image view ---- */ 251 252 struct ObjImage { 253 Heap* heap; 254 ObjKind kind; 255 u64 entry; 256 u64 image_base; 257 Sym interp; 258 Sym soname; 259 ObjSegment* segs; 260 u32 nsegs, cap_segs; 261 ObjImageDep* deps; 262 u32 ndeps, cap_deps; 263 Sym* rpaths; 264 u32 nrpaths, cap_rpaths; 265 ObjImageSym* dynsyms; 266 u32 ndynsyms, cap_dynsyms; 267 ObjImageReloc* dynrelocs; 268 u32 ndynrelocs, cap_dynrelocs; 269 ObjImageRaw* raws; 270 u32 nraws, cap_raws; 271 /* Undefined symbol names a DSO references (interned). Used by the linker's 272 * --gc-sections pass to keep executable-defined symbols a shared library 273 * needs (e.g. libc.so.7's `environ` / `__progname`) from being collected. */ 274 Sym* undefs; 275 u32 nundefs, cap_undefs; 276 }; 277 278 static void obj_image_free_(ObjBuilder* ob) { 279 ObjImage* im; 280 if (!ob || !ob->image) return; 281 im = ob->image; 282 /* The image owns each dep's imports[] array container (allocated from 283 * im->heap by the PE reader); the Sym values inside stay interned in the 284 * global pool and are not freed here. ELF/Mach-O deps carry imports==NULL. */ 285 if (im->deps) { 286 for (u32 i = 0; i < im->ndeps; ++i) { 287 const ObjImageDep* d = &im->deps[i]; 288 if (d->imports) 289 im->heap->free(im->heap, (void*)d->imports, 290 sizeof(*d->imports) * d->nimports); 291 } 292 } 293 if (im->segs) 294 im->heap->free(im->heap, im->segs, sizeof(*im->segs) * im->cap_segs); 295 if (im->deps) 296 im->heap->free(im->heap, im->deps, sizeof(*im->deps) * im->cap_deps); 297 if (im->rpaths) 298 im->heap->free(im->heap, im->rpaths, sizeof(*im->rpaths) * im->cap_rpaths); 299 if (im->dynsyms) 300 im->heap->free(im->heap, im->dynsyms, 301 sizeof(*im->dynsyms) * im->cap_dynsyms); 302 if (im->dynrelocs) 303 im->heap->free(im->heap, im->dynrelocs, 304 sizeof(*im->dynrelocs) * im->cap_dynrelocs); 305 if (im->raws) 306 im->heap->free(im->heap, im->raws, sizeof(*im->raws) * im->cap_raws); 307 if (im->undefs) 308 im->heap->free(im->heap, im->undefs, sizeof(*im->undefs) * im->cap_undefs); 309 ob->heap->free(ob->heap, im, sizeof(*im)); 310 ob->image = NULL; 311 } 312 313 const ObjImage* obj_image(const ObjBuilder* ob) { 314 return ob ? ob->image : NULL; 315 } 316 317 ObjImage* obj_image_ensure(ObjBuilder* ob, ObjKind kind) { 318 ObjImage* im; 319 if (!ob) return NULL; 320 if (ob->image) { 321 ob->image->kind = kind; 322 return ob->image; 323 } 324 im = (ObjImage*)ob->heap->alloc(ob->heap, sizeof(*im), _Alignof(ObjImage)); 325 if (!im) return NULL; 326 memset(im, 0, sizeof(*im)); 327 im->heap = ob->heap; 328 im->kind = kind; 329 ob->image = im; 330 return im; 331 } 332 333 void obj_image_set_entry(ObjImage* im, u64 entry) { 334 if (im) im->entry = entry; 335 } 336 void obj_image_set_base(ObjImage* im, u64 image_base) { 337 if (im) im->image_base = image_base; 338 } 339 void obj_image_set_interp(ObjImage* im, Sym interp) { 340 if (im) im->interp = interp; 341 } 342 void obj_image_set_soname(ObjImage* im, Sym soname) { 343 if (im) im->soname = soname; 344 } 345 346 void obj_image_add_segment(ObjImage* im, const ObjSegment* seg) { 347 if (!im || !seg) return; 348 if (VEC_GROW(im->heap, im->segs, im->cap_segs, im->nsegs + 1)) return; 349 im->segs[im->nsegs++] = *seg; 350 } 351 void obj_image_add_dep(ObjImage* im, const ObjImageDep* dep) { 352 ObjImageDep d; 353 if (!im || !dep) return; 354 if (VEC_GROW(im->heap, im->deps, im->cap_deps, im->ndeps + 1)) return; 355 d = *dep; 356 /* Deep-copy the imports[] name array into image-heap-owned memory so the 357 * reader may pass a transient (scratch/arena) array; obj_image_free_ 358 * releases this copy. The Sym values inside are global-interned and not 359 * owned here. ELF/Mach-O deps carry imports==NULL (nimports==0). */ 360 if (d.nimports && dep->imports) { 361 Sym* copy = (Sym*)im->heap->alloc(im->heap, sizeof(Sym) * d.nimports, 362 _Alignof(Sym)); 363 if (!copy) { 364 d.imports = NULL; 365 d.nimports = 0; 366 } else { 367 memcpy(copy, dep->imports, sizeof(Sym) * d.nimports); 368 d.imports = copy; 369 } 370 } else { 371 d.imports = NULL; 372 d.nimports = 0; 373 } 374 im->deps[im->ndeps++] = d; 375 } 376 void obj_image_add_rpath(ObjImage* im, Sym rpath) { 377 if (!im) return; 378 if (VEC_GROW(im->heap, im->rpaths, im->cap_rpaths, im->nrpaths + 1)) return; 379 im->rpaths[im->nrpaths++] = rpath; 380 } 381 void obj_image_add_dynsym(ObjImage* im, const ObjImageSym* sym) { 382 if (!im || !sym) return; 383 if (VEC_GROW(im->heap, im->dynsyms, im->cap_dynsyms, im->ndynsyms + 1)) 384 return; 385 im->dynsyms[im->ndynsyms++] = *sym; 386 } 387 void obj_image_add_dynreloc(ObjImage* im, const ObjImageReloc* rel) { 388 if (!im || !rel) return; 389 if (VEC_GROW(im->heap, im->dynrelocs, im->cap_dynrelocs, im->ndynrelocs + 1)) 390 return; 391 im->dynrelocs[im->ndynrelocs++] = *rel; 392 } 393 void obj_image_add_undef(ObjImage* im, Sym name) { 394 if (!im || !name) return; 395 if (VEC_GROW(im->heap, im->undefs, im->cap_undefs, im->nundefs + 1)) return; 396 im->undefs[im->nundefs++] = name; 397 } 398 void obj_image_add_raw(ObjImage* im, const ObjImageRaw* raw) { 399 if (!im || !raw) return; 400 if (VEC_GROW(im->heap, im->raws, im->cap_raws, im->nraws + 1)) return; 401 im->raws[im->nraws++] = *raw; 402 } 403 404 ObjKind obj_image_kind(const ObjImage* im) { 405 return im ? im->kind : OBJ_KIND_REL; 406 } 407 u64 obj_image_entry(const ObjImage* im) { return im ? im->entry : 0; } 408 u64 obj_image_base(const ObjImage* im) { return im ? im->image_base : 0; } 409 Sym obj_image_interp(const ObjImage* im) { return im ? im->interp : 0; } 410 Sym obj_image_soname(const ObjImage* im) { return im ? im->soname : 0; } 411 412 u32 obj_image_nsegments(const ObjImage* im) { return im ? im->nsegs : 0; } 413 const ObjSegment* obj_image_segment(const ObjImage* im, u32 idx) { 414 return (im && idx < im->nsegs) ? &im->segs[idx] : NULL; 415 } 416 u32 obj_image_ndeps(const ObjImage* im) { return im ? im->ndeps : 0; } 417 const ObjImageDep* obj_image_dep(const ObjImage* im, u32 idx) { 418 return (im && idx < im->ndeps) ? &im->deps[idx] : NULL; 419 } 420 u32 obj_image_nrpaths(const ObjImage* im) { return im ? im->nrpaths : 0; } 421 Sym obj_image_rpath(const ObjImage* im, u32 idx) { 422 return (im && idx < im->nrpaths) ? im->rpaths[idx] : 0; 423 } 424 u32 obj_image_ndynsyms(const ObjImage* im) { return im ? im->ndynsyms : 0; } 425 const ObjImageSym* obj_image_dynsym(const ObjImage* im, u32 idx) { 426 return (im && idx < im->ndynsyms) ? &im->dynsyms[idx] : NULL; 427 } 428 u32 obj_image_ndynrelocs(const ObjImage* im) { return im ? im->ndynrelocs : 0; } 429 const ObjImageReloc* obj_image_dynreloc(const ObjImage* im, u32 idx) { 430 return (im && idx < im->ndynrelocs) ? &im->dynrelocs[idx] : NULL; 431 } 432 u32 obj_image_nundefs(const ObjImage* im) { return im ? im->nundefs : 0; } 433 Sym obj_image_undef(const ObjImage* im, u32 idx) { 434 return (im && idx < im->nundefs) ? im->undefs[idx] : 0; 435 } 436 u32 obj_image_nraws(const ObjImage* im) { return im ? im->nraws : 0; } 437 const ObjImageRaw* obj_image_raw(const ObjImage* im, u32 idx) { 438 return (im && idx < im->nraws) ? &im->raws[idx] : NULL; 439 } 440 441 void obj_ext_set(ObjBuilder* ob, ObjExtKind kind, void* payload, 442 ObjExtFreeFn free_fn) { 443 if (!ob || (u32)kind >= OBJ_EXT_SLOT_COUNT) return; 444 if (ob->ext[kind].payload && ob->ext[kind].free_fn && 445 ob->ext[kind].payload != payload) { 446 ob->ext[kind].free_fn(ob->c, ob->ext[kind].payload); 447 } 448 ob->ext[kind].payload = payload; 449 ob->ext[kind].free_fn = free_fn; 450 } 451 452 void* obj_ext_get(const ObjBuilder* ob, ObjExtKind kind) { 453 if (!ob || (u32)kind >= OBJ_EXT_SLOT_COUNT) return NULL; 454 return ob->ext[kind].payload; 455 } 456 457 void obj_ext_clear(ObjBuilder* ob, ObjExtKind kind) { 458 if (!ob || (u32)kind >= OBJ_EXT_SLOT_COUNT) return; 459 if (ob->ext[kind].payload && ob->ext[kind].free_fn) { 460 ob->ext[kind].free_fn(ob->c, ob->ext[kind].payload); 461 } 462 ob->ext[kind].payload = NULL; 463 ob->ext[kind].free_fn = NULL; 464 } 465 466 /* ---- write side ---- */ 467 468 ObjSecId obj_section(ObjBuilder* ob, Sym name, SecKind kind, u16 flags, 469 u32 align) { 470 /* Find-or-create by (name, kind, sem=PROGBITS). Repeated calls for the 471 * same logical section — e.g. one .rodata per FP/string literal, or one 472 * .data per static initializer — collapse onto a single Section and 473 * accumulate bytes into it instead of emitting a fan-out of identically- 474 * named output sections. Merge align (max) and flags (union) so a 475 * stricter requirement from a later caller wins. */ 476 u32 n = Sections_count(&ob->sections); 477 for (u32 i = 1; i < n; ++i) { 478 Section* s = Sections_at(&ob->sections, i); 479 if (s && s->name == name && s->kind == (u16)kind && 480 s->sem == SSEM_PROGBITS) { 481 if (align > s->align) s->align = align; 482 s->flags = (u16)(s->flags | flags); 483 /* Pad to align so the next obj_reserve / obj_write lands at an 484 * offset that satisfies this caller's alignment. Without this 485 * each contribution is laid out at whatever offset the prior 486 * write happened to leave, so a 4-byte global following a 6-byte 487 * string lands at .data+6 — and any LDST32 reloc against the 488 * containing section breaks at link time. */ 489 u32 a = align ? align : 1u; 490 if (a > 1u) { 491 u32 cur = buf_pos(&s->bytes); 492 u32 mis = cur & (a - 1u); 493 if (mis) { 494 u32 pad = a - mis; 495 u8* dst = buf_reserve(&s->bytes, pad); 496 if (dst) memset(dst, 0, pad); 497 } 498 } 499 return (ObjSecId)i; 500 } 501 } 502 return obj_section_ex(ob, name, kind, SSEM_PROGBITS, flags, align, 0, 503 OBJ_SEC_NONE, 0); 504 } 505 506 ObjSecId obj_section_ex(ObjBuilder* ob, Sym name, SecKind kind, SecSem sem, 507 u16 flags, u32 align, u32 entsize, u32 link, u32 info) { 508 u32 id; 509 Section* s = Sections_push(&ob->sections, &id); 510 if (!s) return OBJ_SEC_NONE; 511 s->name = name; 512 s->kind = (u16)kind; 513 s->flags = flags; 514 s->sem = (u16)sem; 515 s->ext_kind = OBJ_EXT_NONE; 516 s->align = align ? align : 1; 517 s->entsize = entsize; 518 s->link = (ObjSecId)link; 519 s->info = info; 520 s->group_id = OBJ_GROUP_NONE; 521 s->bss_size = 0; 522 s->addr = 0; 523 buf_init(&s->bytes, ob->heap); 524 return (ObjSecId)id; 525 } 526 527 void obj_section_set_addr(ObjBuilder* ob, ObjSecId id, u64 addr) { 528 Section* s = Sections_at(&ob->sections, id); 529 if (s && id != OBJ_SEC_NONE) s->addr = addr; 530 } 531 532 void obj_section_set_flags(ObjBuilder* ob, ObjSecId id, u16 flags) { 533 Section* s = Sections_at(&ob->sections, id); 534 if (s && id != OBJ_SEC_NONE) s->flags = flags; 535 } 536 537 void obj_section_set_entsize(ObjBuilder* ob, ObjSecId id, u32 entsize) { 538 Section* s = Sections_at(&ob->sections, id); 539 if (s && id != OBJ_SEC_NONE) s->entsize = entsize; 540 } 541 542 void obj_section_set_align(ObjBuilder* ob, ObjSecId id, u32 align) { 543 Section* s = Sections_at(&ob->sections, id); 544 if (s && id != OBJ_SEC_NONE) s->align = align ? align : 1; 545 } 546 547 void obj_section_set_group(ObjBuilder* ob, ObjSecId id, ObjGroupId gid) { 548 Section* s = Sections_at(&ob->sections, id); 549 if (s && id != OBJ_SEC_NONE) s->group_id = gid; 550 } 551 552 void obj_section_set_link_info(ObjBuilder* ob, ObjSecId id, ObjSecId link, 553 u32 info) { 554 Section* s; 555 if (id == OBJ_SEC_NONE) return; 556 s = Sections_at(&ob->sections, id); 557 if (!s) return; 558 s->link = link; 559 s->info = info; 560 } 561 562 void obj_section_set_ext(ObjBuilder* ob, ObjSecId id, ObjExtKind ek, 563 u32 ext_type, u32 ext_flags) { 564 Section* s; 565 if (id == OBJ_SEC_NONE) return; 566 s = Sections_at(&ob->sections, id); 567 if (!s) return; 568 s->ext_kind = (u16)ek; 569 s->ext_type = ext_type; 570 s->ext_flags = ext_flags; 571 } 572 573 /* A NOBITS section (.bss / .tbss) stores no bytes — only a size. decl.c and 574 * obj_align_to already treat SEC_BSS this way regardless of sem; obj_write and 575 * obj_pos must agree so the MCEmitter path (the standalone assembler's 576 * `.zero`/`.skip` fills and label positions) advances and reports the bss_size 577 * cursor instead of a byte buffer that the emitters then ignore. Codegen never 578 * writes/positions a BSS section through these (it uses obj_reserve_bss and its 579 * own counter), so this only affects the assembler's path. */ 580 static int sec_is_nobits(const Section* s) { 581 return s->sem == SSEM_NOBITS || s->kind == SEC_BSS; 582 } 583 584 void obj_write(ObjBuilder* ob, ObjSecId id, const void* data, size_t n) { 585 Section* s; 586 if (id == OBJ_SEC_NONE) return; 587 s = Sections_at(&ob->sections, id); 588 if (!s) return; 589 if (sec_is_nobits(s)) { 590 s->bss_size += (u32)n; /* reserve zero-fill space; store nothing */ 591 return; 592 } 593 buf_write(&s->bytes, data, n); 594 } 595 596 u8* obj_reserve(ObjBuilder* ob, ObjSecId id, size_t n) { 597 Section* s; 598 if (id == OBJ_SEC_NONE) return NULL; 599 s = Sections_at(&ob->sections, id); 600 return s ? buf_reserve(&s->bytes, n) : NULL; 601 } 602 603 void obj_reserve_bss(ObjBuilder* ob, ObjSecId id, u32 size, u32 align) { 604 Section* s; 605 if (id == OBJ_SEC_NONE) return; 606 s = Sections_at(&ob->sections, id); 607 if (!s) return; 608 s->bss_size = size; 609 if (align) s->align = align; 610 } 611 612 u32 obj_align_to(ObjBuilder* ob, ObjSecId id, u32 align) { 613 Section* s; 614 u32 a, cur, base, pad; 615 if (id == OBJ_SEC_NONE) return 0; 616 s = Sections_at(&ob->sections, id); 617 if (!s) return 0; 618 a = align ? align : 1u; 619 /* Treat SEC_BSS like NOBITS even when sem is the default PROGBITS — 620 * decl.c creates .bss via the simple obj_section, but emit_macho / 621 * emit_elf both route SEC_BSS through the zerofill path regardless 622 * of sem, so the byte buf is ignored on output and only bss_size 623 * matters. */ 624 if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) { 625 base = (s->bss_size + (a - 1u)) & ~(a - 1u); 626 s->bss_size = base; 627 if (a > s->align) s->align = a; 628 return base; 629 } 630 cur = buf_pos(&s->bytes); 631 base = (cur + (a - 1u)) & ~(a - 1u); 632 pad = base - cur; 633 if (pad) { 634 u8* p = buf_reserve(&s->bytes, pad); 635 if (p) memset(p, 0, pad); 636 } 637 if (a > s->align) s->align = a; 638 return base; 639 } 640 641 u32 obj_pos(ObjBuilder* ob, ObjSecId id) { 642 Section* s; 643 if (id == OBJ_SEC_NONE) return 0; 644 s = Sections_at(&ob->sections, id); 645 if (!s) return 0; 646 return sec_is_nobits(s) ? s->bss_size : buf_pos(&s->bytes); 647 } 648 649 void obj_patch(ObjBuilder* ob, ObjSecId id, u32 ofs, const void* data, 650 size_t n) { 651 Section* s; 652 if (id == OBJ_SEC_NONE) return; 653 s = Sections_at(&ob->sections, id); 654 if (s) buf_patch(&s->bytes, ofs, data, n); 655 } 656 657 static ObjSymId obj_symbol_make(ObjBuilder* ob, Sym name, SymBind bind, 658 SymVis vis, SymKind kind, 659 ObjSecId section_id, u64 value, u64 size, 660 u64 common_align, int index_name) { 661 u32 id; 662 ObjSym* s = Symbols_push(&ob->symbols, &id); 663 if (!s) return OBJ_SYM_NONE; 664 s->name = name; 665 s->bind = (u16)bind; 666 s->kind = (u16)kind; 667 s->vis = (u8)vis; 668 s->ext_kind = OBJ_EXT_NONE; 669 s->section_id = section_id; 670 s->value = value; 671 s->size = size; 672 s->common_align = common_align; 673 /* First-wins: record the lowest id for this name so obj_symbol_find returns 674 * the same symbol the linear scan would. Later same-name symbols (legal for 675 * STB_LOCAL) do not overwrite. */ 676 if (index_name && name && !SymNameIndex_get(&ob->sym_by_name, name)) 677 (void)SymNameIndex_set(&ob->sym_by_name, name, (ObjSymId)id); 678 return (ObjSymId)id; 679 } 680 681 ObjSymId obj_symbol(ObjBuilder* ob, Sym name, SymBind bind, SymKind kind, 682 ObjSecId section_id, u64 value, u64 size) { 683 return obj_symbol_ex(ob, name, bind, SV_DEFAULT, kind, section_id, value, 684 size, 0); 685 } 686 687 ObjSymId obj_symbol_ex(ObjBuilder* ob, Sym name, SymBind bind, SymVis vis, 688 SymKind kind, ObjSecId section_id, u64 value, u64 size, 689 u64 common_align) { 690 return obj_symbol_make(ob, name, bind, vis, kind, section_id, value, size, 691 common_align, 1); 692 } 693 694 ObjSymId obj_symbol_defer(ObjBuilder* ob, Sym name, SymBind bind, SymVis vis, 695 SymKind kind, u64 size) { 696 ObjSymId id; 697 ObjSym* s; 698 id = obj_symbol_make(ob, name, bind, vis, kind, OBJ_SEC_NONE, 0, size, 0, 0); 699 if (id == OBJ_SYM_NONE) return OBJ_SYM_NONE; 700 s = Symbols_at(&ob->symbols, id); 701 if (s) s->removed = 1; 702 return id; 703 } 704 705 ObjSymId obj_symbol_find(ObjBuilder* ob, Sym name) { 706 /* Authoritative O(1) lookup — never a linear scan. Normal/live symbols are 707 * indexed when created or published, and obj_symbol_rename keeps the index 708 * exact. Deferred symbols deliberately stay out of this map until published. 709 */ 710 ObjSymId* hit; 711 if (!ob || !name) return OBJ_SYM_NONE; 712 hit = SymNameIndex_get(&ob->sym_by_name, name); 713 return hit ? *hit : OBJ_SYM_NONE; 714 } 715 716 void obj_symbol_define(ObjBuilder* ob, ObjSymId id, ObjSecId section_id, 717 u64 value, u64 size) { 718 ObjSym* s; 719 if (id == OBJ_SYM_NONE) return; 720 s = Symbols_at(&ob->symbols, id); 721 if (!s) return; 722 s->section_id = section_id; 723 s->value = value; 724 s->size = size; 725 if (s->kind == SK_UNDEF) s->kind = SK_OBJ; 726 } 727 728 void obj_symbol_define_live(ObjBuilder* ob, ObjSymId id, ObjSecId section_id, 729 u64 value, u64 size) { 730 ObjSym* s; 731 ObjSymId* slot; 732 obj_symbol_define(ob, id, section_id, value, size); 733 if (!ob || id == OBJ_SYM_NONE) return; 734 s = Symbols_at(&ob->symbols, id); 735 if (!s) return; 736 s->removed = 0; 737 if (s->name) { 738 slot = SymNameIndex_get(&ob->sym_by_name, s->name); 739 if (!slot || *slot > id) 740 (void)SymNameIndex_set(&ob->sym_by_name, s->name, id); 741 } 742 } 743 744 void obj_symbol_set_flags(ObjBuilder* ob, ObjSymId id, u16 flags) { 745 ObjSym* s; 746 if (id == OBJ_SYM_NONE) return; 747 s = Symbols_at(&ob->symbols, id); 748 if (!s) return; 749 s->flags = flags; 750 } 751 752 void obj_reloc(ObjBuilder* ob, ObjSecId section_id, u32 offset, RelocKind kind, 753 ObjSymId sym, i64 addend) { 754 obj_reloc_ex(ob, section_id, offset, kind, sym, addend, 1, 0); 755 } 756 757 void obj_reloc_ex(ObjBuilder* ob, ObjSecId section_id, u32 offset, 758 RelocKind kind, ObjSymId sym, i64 addend, int explicit_addend, 759 int pair) { 760 Reloc* r = Relocs_push(&ob->relocs, NULL); 761 if (!r) return; 762 r->section_id = section_id; 763 r->offset = offset; 764 r->kind = (u16)kind; 765 r->has_explicit_addend = (u8)(explicit_addend ? 1 : 0); 766 r->pair = (u8)pair; 767 r->sym = sym; 768 r->addend = addend; 769 /* Any reloc against this symbol is enough to retain it through the 770 * emit-time UNDEF prune. See ObjSym::referenced. */ 771 obj_sym_mark_referenced(ob, sym); 772 } 773 774 void obj_sym_mark_referenced(ObjBuilder* ob, ObjSymId id) { 775 ObjSym* s; 776 if (id == OBJ_SYM_NONE) return; 777 s = Symbols_at(&ob->symbols, id); 778 if (s) s->referenced = 1; 779 } 780 781 void obj_sym_set_referenced(ObjBuilder* ob, ObjSymId id, int referenced) { 782 ObjSym* s; 783 if (id == OBJ_SYM_NONE) return; 784 s = Symbols_at(&ob->symbols, id); 785 if (s) s->referenced = referenced ? 1u : 0u; 786 } 787 788 ObjAtomId obj_atom_define(ObjBuilder* ob, ObjSecId section_id, u32 offset, 789 u32 size, ObjSymId signature, u32 flags) { 790 u32 id; 791 ObjAtom* a; 792 if (!ob || section_id == OBJ_SEC_NONE) return OBJ_ATOM_NONE; 793 a = Atoms_push(&ob->atoms, &id); 794 if (!a) return OBJ_ATOM_NONE; 795 a->section_id = section_id; 796 a->offset = offset; 797 a->size = size; 798 a->signature = signature; 799 a->flags = flags; 800 return (ObjAtomId)id; 801 } 802 803 ObjGroupId obj_group(ObjBuilder* ob, Sym name, ObjSymId signature, u32 flags) { 804 u32 id; 805 ObjGroup* g = Groups_push(&ob->groups, &id); 806 if (!g) return OBJ_GROUP_NONE; 807 g->name = name; 808 g->signature = signature; 809 g->flags = flags; 810 return (ObjGroupId)id; 811 } 812 813 void obj_group_add_section(ObjBuilder* ob, ObjGroupId gid, ObjSecId sec) { 814 ObjGroup* g; 815 ObjSecId* p; 816 if (gid == OBJ_GROUP_NONE) return; 817 g = Groups_at(&ob->groups, gid); 818 if (!g) return; 819 /* Linear realloc — group section counts are tiny (handful per group). */ 820 p = (ObjSecId*)ob->heap->realloc( 821 ob->heap, g->sections, sizeof(ObjSecId) * g->nsections, 822 sizeof(ObjSecId) * (g->nsections + 1), _Alignof(ObjSecId)); 823 if (!p) return; 824 p[g->nsections++] = sec; 825 g->sections = p; 826 } 827 828 void obj_finalize(ObjBuilder* ob) { 829 /* No flat-offset patching needed yet — section bytes are read out via 830 * buf_flatten on demand by emitters. Keep this hook in place: when a 831 * future writer wants intra-section fixups (e.g. label-to-offset 832 * resolution after the full section is written), this is where they 833 * land. */ 834 (void)ob; 835 } 836 837 /* ---- mutators (strip / objcopy support) ---- */ 838 839 void obj_section_remove(ObjBuilder* ob, ObjSecId id) { 840 Section* s; 841 if (!ob || id == OBJ_SEC_NONE) return; 842 s = Sections_at(&ob->sections, id); 843 if (!s) return; 844 s->removed = 1; 845 } 846 847 void obj_symbol_remove(ObjBuilder* ob, ObjSymId id) { 848 ObjSym* s; 849 if (!ob || id == OBJ_SYM_NONE) return; 850 s = Symbols_at(&ob->symbols, id); 851 if (!s) return; 852 s->removed = 1; 853 } 854 855 void obj_group_remove(ObjBuilder* ob, ObjGroupId id) { 856 ObjGroup* g; 857 if (!ob || id == OBJ_GROUP_NONE) return; 858 g = Groups_at(&ob->groups, id); 859 if (!g) return; 860 g->removed = 1; 861 } 862 863 void obj_section_rename(ObjBuilder* ob, ObjSecId id, Sym new_name) { 864 Section* s; 865 if (!ob || id == OBJ_SEC_NONE) return; 866 s = Sections_at(&ob->sections, id); 867 if (!s) return; 868 s->name = new_name; 869 } 870 871 void obj_symbol_rename(ObjBuilder* ob, ObjSymId id, Sym new_name) { 872 ObjSym* s; 873 Sym old; 874 ObjSymId* slot; 875 if (!ob || id == OBJ_SYM_NONE) return; 876 s = Symbols_at(&ob->symbols, id); 877 if (!s) return; 878 old = s->name; 879 s->name = new_name; 880 if (old == new_name) return; 881 /* Keep the name index exact so obj_symbol_find stays a pure hash lookup. 882 * If this symbol was the indexed entry for its old name, hand the entry to 883 * the next-lowest symbol still carrying that name (duplicate STB_LOCAL names 884 * are legal), or drop it. This is the only scan in the symbol-index path and 885 * it is confined to obj_symbol_rename — a cold objcopy-style operation, never 886 * the codegen/find hot path. */ 887 if (old) { 888 slot = SymNameIndex_get(&ob->sym_by_name, old); 889 if (slot && *slot == id) { 890 ObjSymId repl = OBJ_SYM_NONE; 891 u32 n = Symbols_count(&ob->symbols); 892 for (u32 i = 1; i < n; ++i) { 893 ObjSym* t = Symbols_at(&ob->symbols, i); 894 if (t && (ObjSymId)i != id && t->name == old) { 895 repl = (ObjSymId)i; 896 break; 897 } 898 } 899 if (repl != OBJ_SYM_NONE) 900 (void)SymNameIndex_set(&ob->sym_by_name, old, repl); 901 else 902 SymNameIndex_del(&ob->sym_by_name, old); 903 } 904 } 905 /* new_name resolves to the lowest id that carries it (first-match order). A 906 * rename can give an existing lower-id symbol this name, so lower an existing 907 * entry when warranted. */ 908 if (new_name) { 909 slot = SymNameIndex_get(&ob->sym_by_name, new_name); 910 if (!slot || *slot > id) 911 (void)SymNameIndex_set(&ob->sym_by_name, new_name, id); 912 } 913 } 914 915 void obj_symbol_set_bind(ObjBuilder* ob, ObjSymId id, SymBind bind) { 916 ObjSym* s; 917 if (!ob || id == OBJ_SYM_NONE) return; 918 s = Symbols_at(&ob->symbols, id); 919 if (!s) return; 920 s->bind = (u16)bind; 921 } 922 923 void obj_symbol_set_vis(ObjBuilder* ob, ObjSymId id, SymVis vis) { 924 ObjSym* s; 925 if (!ob || id == OBJ_SYM_NONE) return; 926 s = Symbols_at(&ob->symbols, id); 927 if (!s) return; 928 s->vis = (u8)vis; 929 } 930 931 void obj_section_replace_bytes(ObjBuilder* ob, ObjSecId id, const u8* data, 932 size_t n) { 933 Section* s; 934 if (!ob || id == OBJ_SEC_NONE) return; 935 s = Sections_at(&ob->sections, id); 936 if (!s) return; 937 /* Drop the old chunked Buf and reinitialize empty, then write the new 938 * bytes. Cheaper than scanning + patching when the replacement is 939 * different-sized — which it usually is (objcopy --update-section). */ 940 buf_fini(&s->bytes); 941 buf_init(&s->bytes, ob->heap); 942 s->bss_size = 0; 943 if (data && n) buf_write(&s->bytes, data, n); 944 } 945 946 void obj_sweep_dead(ObjBuilder* ob) { 947 u32 nsec = Sections_count(&ob->sections); 948 u32 nsym = Symbols_count(&ob->symbols); 949 u32 nrel = Relocs_count(&ob->relocs); 950 u32 ngrp = Groups_count(&ob->groups); 951 u32 i; 952 953 /* Pass 1: cascade removed sections into their defining symbols. Also 954 * absorbs the historical UNDEF-prune predicate: any non-referenced 955 * global/weak symbol that lacks a defining section (and isn't an ABS 956 * or COMMON definition, both of which legitimately have section_id == 957 * OBJ_SEC_NONE) is a spurious extern from a header — drop it. 958 * 959 * The "no defining section" test matches macho_emit's sym_is_undef, 960 * which is stronger than `kind == SK_UNDEF`: frontends mint SK_OBJ / 961 * SK_TLS / SK_FUNC entries for extern decls and only set them to 962 * SK_UNDEF for true references, so checking section_id catches both. */ 963 for (i = 1; i < nsym; ++i) { 964 ObjSym* s = Symbols_at(&ob->symbols, i); 965 if (!s || s->removed) continue; 966 if (s->section_id != OBJ_SEC_NONE) { 967 const Section* sec = Sections_at(&ob->sections, s->section_id); 968 if (sec && sec->removed) { 969 s->removed = 1; 970 continue; 971 } 972 } 973 if (s->section_id == OBJ_SEC_NONE && s->kind != SK_ABS && 974 s->kind != SK_COMMON && !s->referenced && 975 (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) { 976 s->removed = 1; 977 } 978 } 979 980 /* Pass 2: drop relocs that became dangling. A reloc is dead if its 981 * containing section, its target symbol, or the symbol's defining 982 * section is gone. */ 983 for (i = 0; i < nrel; ++i) { 984 Reloc* r = Relocs_at(&ob->relocs, i); 985 if (!r || r->removed) continue; 986 if (r->section_id != OBJ_SEC_NONE) { 987 const Section* sec = Sections_at(&ob->sections, r->section_id); 988 if (!sec || sec->removed) { 989 r->removed = 1; 990 continue; 991 } 992 } 993 if (r->sym != OBJ_SYM_NONE) { 994 const ObjSym* ts = Symbols_at(&ob->symbols, r->sym); 995 if (!ts || ts->removed) r->removed = 1; 996 } 997 } 998 999 { 1000 u32 natom = Atoms_count(&ob->atoms); 1001 for (i = 1; i < natom; ++i) { 1002 ObjAtom* a = Atoms_at(&ob->atoms, i); 1003 const Section* sec; 1004 const ObjSym* sig; 1005 if (!a || a->removed) continue; 1006 sec = Sections_at(&ob->sections, a->section_id); 1007 if (!sec || sec->removed) { 1008 a->removed = 1; 1009 continue; 1010 } 1011 if (a->signature != OBJ_SYM_NONE) { 1012 sig = Symbols_at(&ob->symbols, a->signature); 1013 if (!sig || sig->removed) a->removed = 1; 1014 } 1015 } 1016 } 1017 1018 /* Pass 3: compact each group's member list to drop removed sections; 1019 * tombstone the group if its list empties out or its signature symbol 1020 * is removed. Member list is rewritten in place — the storage stays 1021 * the same size, the trailing slots just become unused. */ 1022 for (i = 1; i < ngrp; ++i) { 1023 ObjGroup* g = Groups_at(&ob->groups, i); 1024 u32 w, r; 1025 if (!g || g->removed) continue; 1026 if (g->signature != OBJ_SYM_NONE) { 1027 const ObjSym* sig = Symbols_at(&ob->symbols, g->signature); 1028 if (!sig || sig->removed) { 1029 g->removed = 1; 1030 continue; 1031 } 1032 } 1033 w = 0; 1034 for (r = 0; r < g->nsections; ++r) { 1035 ObjSecId sid = g->sections[r]; 1036 const Section* sec = 1037 (sid != OBJ_SEC_NONE) ? Sections_at(&ob->sections, sid) : NULL; 1038 if (sec && !sec->removed) g->sections[w++] = sid; 1039 } 1040 g->nsections = w; 1041 if (w == 0) g->removed = 1; 1042 } 1043 1044 /* Pass 4: clear Section.link if it now points at a removed section. 1045 * (Section.info is type-dependent — leave it to the emitter, which 1046 * already inspects the sem to interpret it.) */ 1047 for (i = 1; i < nsec; ++i) { 1048 Section* s = Sections_at(&ob->sections, i); 1049 if (!s || s->removed) continue; 1050 if (s->link != OBJ_SEC_NONE) { 1051 const Section* lk = Sections_at(&ob->sections, s->link); 1052 if (!lk || lk->removed) s->link = OBJ_SEC_NONE; 1053 } 1054 } 1055 } 1056 1057 /* ---- read side ---- */ 1058 1059 u32 obj_section_count(const ObjBuilder* ob) { 1060 return Sections_count(&ob->sections); 1061 } 1062 1063 const Section* obj_section_get(const ObjBuilder* ob, ObjSecId id) { 1064 if (id == OBJ_SEC_NONE) return NULL; 1065 return Sections_at(&ob->sections, id); 1066 } 1067 1068 u32 obj_reloc_count(const ObjBuilder* ob, ObjSecId id) { 1069 u32 i, total = Relocs_count(&ob->relocs), n = 0; 1070 for (i = 0; i < total; ++i) { 1071 const Reloc* r = Relocs_at(&ob->relocs, i); 1072 if (r->removed) continue; 1073 if (r->section_id == id) ++n; 1074 } 1075 return n; 1076 } 1077 1078 u32 obj_reloc_total(const ObjBuilder* ob) { return Relocs_count(&ob->relocs); } 1079 1080 const Reloc* obj_reloc_at(const ObjBuilder* ob, u32 idx) { 1081 return Relocs_at(&ob->relocs, idx); 1082 } 1083 1084 const ObjSym* obj_symbol_get(const ObjBuilder* ob, ObjSymId id) { 1085 if (id == OBJ_SYM_NONE) return NULL; 1086 return Symbols_at(&ob->symbols, id); 1087 } 1088 1089 u32 obj_atom_count(const ObjBuilder* ob) { return Atoms_count(&ob->atoms); } 1090 1091 const ObjAtom* obj_atom_get(const ObjBuilder* ob, ObjAtomId id) { 1092 if (id == OBJ_ATOM_NONE) return NULL; 1093 return Atoms_at(&ob->atoms, id); 1094 } 1095 1096 int obj_section_has_atoms(const ObjBuilder* ob, ObjSecId sid) { 1097 u32 n; 1098 if (!ob || sid == OBJ_SEC_NONE) return 0; 1099 n = Atoms_count(&ob->atoms); 1100 for (u32 i = 1; i < n; ++i) { 1101 const ObjAtom* a = Atoms_at(&ob->atoms, i); 1102 if (a && !a->removed && a->section_id == sid) return 1; 1103 } 1104 return 0; 1105 } 1106 1107 ObjAtomId obj_atom_find(const ObjBuilder* ob, ObjSecId sid, u32 offset) { 1108 u32 n; 1109 if (!ob || sid == OBJ_SEC_NONE) return OBJ_ATOM_NONE; 1110 n = Atoms_count(&ob->atoms); 1111 for (u32 i = 1; i < n; ++i) { 1112 const ObjAtom* a = Atoms_at(&ob->atoms, i); 1113 u64 begin, end; 1114 if (!a || a->removed || a->section_id != sid) continue; 1115 begin = a->offset; 1116 end = begin + a->size; 1117 if (a->size != 0 && (u64)offset >= begin && (u64)offset < end) 1118 return (ObjAtomId)i; 1119 } 1120 for (u32 i = 1; i < n; ++i) { 1121 const ObjAtom* a = Atoms_at(&ob->atoms, i); 1122 if (!a || a->removed || a->section_id != sid) continue; 1123 if (a->size == 0 && offset == a->offset) return (ObjAtomId)i; 1124 } 1125 return OBJ_ATOM_NONE; 1126 } 1127 1128 ObjAtomId obj_atom_find_symbol(const ObjBuilder* ob, ObjSymId sym) { 1129 const ObjSym* s; 1130 ObjAtomId aid; 1131 u32 n; 1132 if (!ob || sym == OBJ_SYM_NONE) return OBJ_ATOM_NONE; 1133 s = obj_symbol_get(ob, sym); 1134 if (!s || s->section_id == OBJ_SEC_NONE) return OBJ_ATOM_NONE; 1135 aid = obj_atom_find(ob, s->section_id, (u32)s->value); 1136 if (aid != OBJ_ATOM_NONE) return aid; 1137 n = Atoms_count(&ob->atoms); 1138 for (u32 i = 1; i < n; ++i) { 1139 const ObjAtom* a = Atoms_at(&ob->atoms, i); 1140 if (a && !a->removed && a->signature == sym) return (ObjAtomId)i; 1141 } 1142 return OBJ_ATOM_NONE; 1143 } 1144 1145 u32 obj_group_count(const ObjBuilder* ob) { return Groups_count(&ob->groups); } 1146 1147 const ObjGroup* obj_group_get(const ObjBuilder* ob, ObjGroupId id) { 1148 if (id == OBJ_GROUP_NONE) return NULL; 1149 return Groups_at(&ob->groups, id); 1150 } 1151 1152 ObjSymIter* obj_symiter_new(const ObjBuilder* ob) { 1153 ObjSymIter* it = 1154 (ObjSymIter*)ob->heap->alloc(ob->heap, sizeof(*it), _Alignof(ObjSymIter)); 1155 if (!it) return NULL; 1156 it->ob = ob; 1157 it->idx = 1; /* skip the id-0 sentinel */ 1158 return it; 1159 } 1160 1161 int obj_symiter_next(ObjSymIter* it, ObjSymEntry* out) { 1162 const ObjSym* s; 1163 if (!it) return 0; 1164 s = Symbols_at(&it->ob->symbols, it->idx); 1165 if (!s) return 0; 1166 out->id = it->idx; 1167 out->sym = s; 1168 it->idx++; 1169 return 1; 1170 } 1171 1172 void obj_symiter_free(ObjSymIter* it) { 1173 if (!it) return; 1174 ((Heap*)it->ob->heap)->free((Heap*)it->ob->heap, it, sizeof(*it)); 1175 } 1176 1177 struct ObjGroupIter { 1178 const ObjBuilder* ob; 1179 u32 idx; /* next index to return */ 1180 }; 1181 1182 ObjGroupIter* obj_groupiter_new(const ObjBuilder* ob) { 1183 ObjGroupIter* it = (ObjGroupIter*)ob->heap->alloc(ob->heap, sizeof(*it), 1184 _Alignof(ObjGroupIter)); 1185 if (!it) return NULL; 1186 it->ob = ob; 1187 it->idx = 1; /* skip the id-0 sentinel */ 1188 return it; 1189 } 1190 1191 int obj_groupiter_next(ObjGroupIter* it, ObjGroupEntry* out) { 1192 const ObjGroup* g; 1193 if (!it) return 0; 1194 g = Groups_at(&it->ob->groups, it->idx); 1195 if (!g) return 0; 1196 out->id = it->idx; 1197 out->group = g; 1198 it->idx++; 1199 return 1; 1200 } 1201 1202 void obj_groupiter_free(ObjGroupIter* it) { 1203 if (!it) return; 1204 ((Heap*)it->ob->heap)->free((Heap*)it->ob->heap, it, sizeof(*it)); 1205 } 1206 1207 /* Diagnostic spelling for a RelocKind. Drops the leading R_ from the enum 1208 * spelling so output reads like "RV_CALL" / "AARCH64_CALL26" — the same 1209 * spelling GNU objdump uses minus its arch prefix. */ 1210 const char* reloc_kind_name(RelocKind k) { 1211 switch (k) { 1212 #define _CASE(name) \ 1213 case name: \ 1214 return &(#name)[2] /* strip "R_" */ 1215 _CASE(R_NONE); 1216 _CASE(R_ABS32); 1217 _CASE(R_ABS64); 1218 _CASE(R_REL32); 1219 _CASE(R_REL64); 1220 _CASE(R_PC32); 1221 _CASE(R_PC64); 1222 _CASE(R_GOT32); 1223 _CASE(R_PLT32); 1224 _CASE(R_ABS8); 1225 _CASE(R_ABS16); 1226 _CASE(R_PREL16); 1227 _CASE(R_TPOFF64); 1228 _CASE(R_AARCH64_ADR_GOT_PAGE); 1229 _CASE(R_AARCH64_LD64_GOT_LO12_NC); 1230 _CASE(R_ARM_CALL); 1231 _CASE(R_ARM_MOVW); 1232 _CASE(R_ARM_MOVT); 1233 _CASE(R_ARM_B26); 1234 _CASE(R_AARCH64_JUMP26); 1235 _CASE(R_AARCH64_CALL26); 1236 _CASE(R_AARCH64_CONDBR19); 1237 _CASE(R_AARCH64_TSTBR14); 1238 _CASE(R_AARCH64_LD_PREL_LO19); 1239 _CASE(R_AARCH64_ADR_PREL_LO21); 1240 _CASE(R_AARCH64_INTRA_LABEL_ADDR); 1241 _CASE(R_AARCH64_ADR_PREL_PG_HI21); 1242 _CASE(R_AARCH64_ADR_PREL_PG_HI21_NC); 1243 _CASE(R_AARCH64_ADD_ABS_LO12_NC); 1244 _CASE(R_AARCH64_LDST8_ABS_LO12_NC); 1245 _CASE(R_AARCH64_LDST16_ABS_LO12_NC); 1246 _CASE(R_AARCH64_LDST32_ABS_LO12_NC); 1247 _CASE(R_AARCH64_LDST64_ABS_LO12_NC); 1248 _CASE(R_AARCH64_LDST128_ABS_LO12_NC); 1249 _CASE(R_AARCH64_TLVP_LOAD_PAGE21); 1250 _CASE(R_AARCH64_TLVP_LOAD_PAGEOFF12); 1251 _CASE(R_AARCH64_TLSLE_ADD_TPREL_HI12); 1252 _CASE(R_AARCH64_TLSLE_ADD_TPREL_LO12); 1253 _CASE(R_AARCH64_TLSLE_ADD_TPREL_LO12_NC); 1254 _CASE(R_AARCH64_TLSLE_LDST8_TPREL_LO12); 1255 _CASE(R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC); 1256 _CASE(R_AARCH64_TLSLE_LDST16_TPREL_LO12); 1257 _CASE(R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC); 1258 _CASE(R_AARCH64_TLSLE_LDST32_TPREL_LO12); 1259 _CASE(R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC); 1260 _CASE(R_AARCH64_TLSLE_LDST64_TPREL_LO12); 1261 _CASE(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC); 1262 _CASE(R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21); 1263 _CASE(R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC); 1264 _CASE(R_COFF_ADDR32NB); 1265 _CASE(R_AARCH64_GLOB_DAT); 1266 _CASE(R_AARCH64_JUMP_SLOT); 1267 _CASE(R_AARCH64_RELATIVE); 1268 _CASE(R_AARCH64_COPY); 1269 _CASE(R_X64_PC8); 1270 _CASE(R_X64_32S); 1271 _CASE(R_X64_PLT32); 1272 _CASE(R_X64_GOTPCREL); 1273 _CASE(R_X64_GOTPCRELX); 1274 _CASE(R_X64_REX_GOTPCRELX); 1275 _CASE(R_X64_GOTPC32); 1276 _CASE(R_X64_GOTOFF64); 1277 _CASE(R_X64_TPOFF32); 1278 _CASE(R_X64_DTPOFF32); 1279 _CASE(R_X64_DTPMOD64); 1280 _CASE(R_X64_DTPOFF64); 1281 _CASE(R_X64_TLSGD); 1282 _CASE(R_X64_TLSLD); 1283 _CASE(R_X64_GOTTPOFF); 1284 _CASE(R_X64_GLOB_DAT); 1285 _CASE(R_X64_JUMP_SLOT); 1286 _CASE(R_X64_RELATIVE); 1287 _CASE(R_X64_COPY); 1288 _CASE(R_RV_HI20); 1289 _CASE(R_RV_LO12_I); 1290 _CASE(R_RV_LO12_S); 1291 _CASE(R_RV_BRANCH); 1292 _CASE(R_RV_JAL); 1293 _CASE(R_RV_CALL); 1294 _CASE(R_RV_PCREL_HI20); 1295 _CASE(R_RV_PCREL_LO12_I); 1296 _CASE(R_RV_PCREL_LO12_S); 1297 _CASE(R_RV_INTRA_AUIPC_ADDI); 1298 _CASE(R_RV_GOT_HI20); 1299 _CASE(R_RV_TLS_GOT_HI20); 1300 _CASE(R_RV_TPREL_HI20); 1301 _CASE(R_RV_TPREL_LO12_I); 1302 _CASE(R_RV_TPREL_LO12_S); 1303 _CASE(R_RV_TPREL_ADD); 1304 _CASE(R_ADD8); 1305 _CASE(R_ADD16); 1306 _CASE(R_ADD32); 1307 _CASE(R_ADD64); 1308 _CASE(R_SUB8); 1309 _CASE(R_SUB16); 1310 _CASE(R_SUB32); 1311 _CASE(R_SUB64); 1312 _CASE(R_RV_ALIGN); 1313 _CASE(R_RV_RVC_BRANCH); 1314 _CASE(R_RV_RVC_JUMP); 1315 _CASE(R_RV_RELAX); 1316 _CASE(R_SUB6); 1317 _CASE(R_SET6); 1318 _CASE(R_SET_ULEB128); 1319 _CASE(R_SUB_ULEB128); 1320 _CASE(R_WASM_FUNCIDX); 1321 _CASE(R_WASM_TABLEIDX); 1322 _CASE(R_WASM_MEMOFS); 1323 _CASE(R_WASM_TYPEIDX); 1324 _CASE(R_COFF_SECREL); 1325 _CASE(R_COFF_SECTION); 1326 _CASE(R_COFF_AARCH64_SECREL_LOW12A); 1327 _CASE(R_COFF_AARCH64_SECREL_HIGH12A); 1328 #undef _CASE 1329 } 1330 return "UNKNOWN"; 1331 }