obj.h (49860B)
1 #ifndef KIT_OBJ_H 2 #define KIT_OBJ_H 3 4 #include "core/buf.h" 5 #include "core/core.h" 6 7 /* Forward decl: the synthetic-input hook (obj_format_synth_inputs) takes a 8 * Linker but obj.h must not pull in the link subsystem. Defined in 9 * src/link; only used here as an opaque pointer. */ 10 typedef struct Linker Linker; 11 12 typedef enum SecKind { 13 SEC_TEXT, 14 SEC_RODATA, 15 SEC_DATA, 16 SEC_BSS, 17 SEC_DEBUG, 18 SEC_OTHER, 19 } SecKind; 20 21 typedef enum SecFlag { 22 SF_EXEC = 1u << 0, 23 SF_WRITE = 1u << 1, 24 SF_ALLOC = 1u << 2, 25 SF_TLS = 1u << 3, 26 SF_MERGE = 1u << 4, 27 SF_STRINGS = 1u << 5, 28 SF_GROUP = 1u << 6, 29 SF_LINK_ORDER = 1u << 7, 30 SF_RETAIN = 1u << 8, /* SHF_GNU_RETAIN: do not GC even if unreferenced */ 31 } SecFlag; 32 33 typedef enum SecSem { 34 SSEM_PROGBITS, 35 SSEM_NOBITS, 36 SSEM_SYMTAB, 37 SSEM_STRTAB, 38 SSEM_RELA, 39 SSEM_REL, 40 SSEM_NOTE, 41 SSEM_INIT_ARRAY, 42 SSEM_FINI_ARRAY, 43 SSEM_PREINIT_ARRAY, 44 SSEM_GROUP, 45 SSEM_WASM_CUSTOM, 46 } SecSem; 47 48 typedef enum SymBind { 49 SB_LOCAL, 50 SB_GLOBAL, 51 SB_WEAK, 52 } SymBind; 53 54 typedef enum SymVis { 55 SV_DEFAULT, 56 SV_HIDDEN, 57 SV_PROTECTED, 58 SV_INTERNAL, 59 } SymVis; 60 61 typedef enum SymKind { 62 SK_UNDEF, 63 SK_FUNC, 64 SK_OBJ, 65 SK_SECTION, 66 SK_FILE, 67 SK_COMMON, 68 SK_TLS, 69 SK_ABS, 70 /* Defined symbol with no specific type — assembly labels, AArch64 71 * mapping symbols (`$x`, `$d`). Distinct from SK_UNDEF (undefined 72 * external) so the linker keeps definedness keyed on SK_UNDEF. */ 73 SK_NOTYPE, 74 /* GNU IFUNC: a function whose implementation is selected at runtime 75 * by a resolver. Round-trips as STT_GNU_IFUNC (10); presence forces 76 * EI_OSABI=ELFOSABI_GNU on emit. */ 77 SK_IFUNC, 78 } SymKind; 79 80 typedef enum ObjExtKind { 81 OBJ_EXT_NONE, 82 OBJ_EXT_ELF, 83 OBJ_EXT_COFF, 84 OBJ_EXT_MACHO, 85 OBJ_EXT_WASM, 86 /* Wasm-target frontend-supplied import descriptors keyed by symbol name. 87 * Populated by lang/c when an extern declaration carries 88 * __attribute__((import_module(...), import_name(...))); consumed by the 89 * wasm backend when promoting undefined function symbols to imports. */ 90 OBJ_EXT_WASM_IMPORTS, 91 } ObjExtKind; 92 93 typedef u32 ObjSecId; 94 #define OBJ_SEC_NONE 0u 95 96 typedef u32 ObjGroupId; 97 #define OBJ_GROUP_NONE 0u 98 99 /* Per-ObjBuilder symbol handle. Object files own their symbol namespace: 100 * local/static symbols, section symbols, file symbols, unnamed labels, common 101 * definitions, and external references are all represented by ObjSymId values 102 * scoped to one builder. 0 is reserved as "none". */ 103 typedef u32 ObjSymId; 104 #define OBJ_SYM_NONE 0u 105 106 typedef u32 ObjAtomId; 107 #define OBJ_ATOM_NONE 0u 108 109 typedef enum ObjAtomFlag { 110 OBJ_ATOM_RETAIN = 1u << 0, 111 } ObjAtomFlag; 112 113 typedef enum RelocKind { 114 R_NONE = 0, 115 R_ABS32, 116 R_ABS64, 117 R_REL32, 118 R_REL64, 119 R_PC32, 120 R_PC64, 121 R_GOT32, 122 R_PLT32, 123 /* Neutral data-word kinds completing the ABS/PREL/TPOFF families. */ 124 R_ABS8, 125 R_ABS16, 126 R_PREL16, 127 /* Internal-only: a raw 64-bit local-exec tpoff written into a TLS GOT 128 * slot by link_emit_internal_tpoff64. Never appears on the wire. 129 * x86_64 stores variant-II (X - tls_memsz); AArch64 and RISC-V store 130 * variant-I ((X - tls_vaddr) + TCB). Byte encoding is identical on 131 * all three arches: a plain 64-bit little-endian write. */ 132 R_TPOFF64, 133 R_AARCH64_ADR_GOT_PAGE, 134 R_AARCH64_LD64_GOT_LO12_NC, 135 R_ARM_CALL, 136 R_ARM_MOVW, 137 R_ARM_MOVT, 138 R_ARM_B26, 139 R_AARCH64_JUMP26, 140 R_AARCH64_CALL26, 141 R_AARCH64_CONDBR19, 142 R_AARCH64_TSTBR14, 143 R_AARCH64_LD_PREL_LO19, 144 R_AARCH64_ADR_PREL_LO21, 145 /* MCEmitter-only function-local label address materialization. The fixup 146 * patches a fixed 16-byte sequence as either ADR+B+literal when in range, 147 * or LDR-literal+B+relocated-literal when the ADR range is exceeded. */ 148 R_AARCH64_INTRA_LABEL_ADDR, 149 R_AARCH64_ADR_PREL_PG_HI21, 150 R_AARCH64_ADR_PREL_PG_HI21_NC, 151 R_AARCH64_ADD_ABS_LO12_NC, 152 R_AARCH64_LDST8_ABS_LO12_NC, 153 R_AARCH64_LDST16_ABS_LO12_NC, 154 R_AARCH64_LDST32_ABS_LO12_NC, 155 R_AARCH64_LDST64_ABS_LO12_NC, 156 R_AARCH64_LDST128_ABS_LO12_NC, 157 /* AArch64 Mach-O TLV (thread-local variable) descriptor access. The 158 * compiler emits these to reference a TLV descriptor in 159 * __DATA,__thread_vars; the linker routes both through a synthetic 160 * __DATA,__thread_ptrs slot (analogous to __got for non-TLV externs). 161 * 162 * adrp x0, _var@TLVPPAGE ; TLVP_LOAD_PAGE21 163 * ldr x0, [x0, _var@TLVPPAGEOFF]; TLVP_LOAD_PAGEOFF12 -> descriptor 164 * ldr x1, [x0] ; thunk (filled by dyld) 165 * blr x1 ; thunk(x0=descriptor) -> x0 = TLV addr 166 * 167 * Encoding-wise PAGE21 is ADRP-form and PAGEOFF12 is a 64-bit-LDR 168 * lo12 (scale=3). The linker rewrites S to the matching __thread_ptrs 169 * slot's vaddr before applying. */ 170 R_AARCH64_TLVP_LOAD_PAGE21, 171 R_AARCH64_TLVP_LOAD_PAGEOFF12, 172 /* AArch64 TLS Local-Exec model. */ 173 R_AARCH64_TLSLE_ADD_TPREL_HI12, 174 R_AARCH64_TLSLE_ADD_TPREL_LO12, 175 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC, 176 R_AARCH64_TLSLE_LDST8_TPREL_LO12, 177 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC, 178 R_AARCH64_TLSLE_LDST16_TPREL_LO12, 179 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC, 180 R_AARCH64_TLSLE_LDST32_TPREL_LO12, 181 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC, 182 R_AARCH64_TLSLE_LDST64_TPREL_LO12, 183 R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC, 184 /* Dynamic-only relocs: emitted into .rela.dyn / .rela.plt of an 185 * ET_DYN/ET_EXEC output and processed by the runtime loader. They 186 * never appear in ET_REL inputs from a compiler; the linker may 187 * synthesize them during dynamic-exe / shared-lib emit, and the 188 * reader recognizes them when it walks an ET_DYN's .rela.* sections 189 * (currently only used for symbol-name extraction, not applied). */ 190 R_AARCH64_GLOB_DAT, 191 R_AARCH64_JUMP_SLOT, 192 R_AARCH64_RELATIVE, 193 R_AARCH64_COPY, 194 /* x86_64 reloc kinds. Most map directly to the existing R_ABS and 195 * R_PC entries; the few here are the x86_64-only encodings (8-bit 196 * displacements, GOT/PLT, dynamic linker-only entries). */ 197 R_X64_PC8, 198 R_X64_32S, 199 R_X64_PLT32, 200 R_X64_GOTPCREL, 201 R_X64_GOTPCRELX, 202 R_X64_REX_GOTPCRELX, 203 R_X64_GOTPC32, 204 R_X64_GOTOFF64, 205 R_X64_TPOFF32, 206 R_X64_DTPOFF32, 207 R_X64_DTPMOD64, 208 R_X64_DTPOFF64, 209 R_X64_TLSGD, 210 R_X64_TLSLD, 211 R_X64_GOTTPOFF, 212 R_X64_GLOB_DAT, 213 R_X64_JUMP_SLOT, 214 R_X64_RELATIVE, 215 R_X64_COPY, 216 R_RV_HI20, 217 R_RV_LO12_I, 218 R_RV_LO12_S, 219 R_RV_BRANCH, 220 R_RV_JAL, 221 R_RV_CALL, 222 R_RV_PCREL_HI20, 223 R_RV_PCREL_LO12_I, 224 R_RV_PCREL_LO12_S, 225 /* Intra-section label address materialization via an AUIPC+ADDI pair. 226 * Used only by MCEmitter intra-section label fixups (CGTarget 227 * load_label_addr). Width is 8 bytes, covering both instructions; the 228 * fixup site is the AUIPC and the disp is the label byte offset 229 * relative to the AUIPC site. */ 230 R_RV_INTRA_AUIPC_ADDI, 231 R_RV_GOT_HI20, 232 /* TLS Initial-Exec: %tls_ie_pcrel_hi(sym). Paired with R_RV_PCREL_LO12_I 233 * on the follow-on ld. The GOT entry holds (&sym - tp); the AUIPC/ld 234 * pair materializes that offset into a register so the caller adds tp. */ 235 R_RV_TLS_GOT_HI20, 236 R_RV_TPREL_HI20, 237 R_RV_TPREL_LO12_I, 238 R_RV_TPREL_LO12_S, 239 R_RV_TPREL_ADD, 240 R_ADD8, 241 R_ADD16, 242 R_ADD32, 243 R_ADD64, 244 R_SUB8, 245 R_SUB16, 246 R_SUB32, 247 R_SUB64, 248 R_RV_ALIGN, 249 R_RV_RVC_BRANCH, 250 R_RV_RVC_JUMP, 251 R_RV_RELAX, 252 R_SUB6, 253 R_SET6, 254 R_SET_ULEB128, 255 R_SUB_ULEB128, 256 R_WASM_FUNCIDX, 257 R_WASM_TABLEIDX, 258 R_WASM_MEMOFS, 259 R_WASM_TYPEIDX, 260 /* COFF/PE-only reloc kinds — section-relative fixups used by Windows 261 * TLS Local-Exec lowering and debug info. SECREL = 32-bit offset 262 * from the start of the containing section. SECTION = 16-bit section 263 * index (1-based). Both arch-independent on the kit side; the 264 * per-arch translators map to IMAGE_REL_{AMD64,ARM64}_SECREL/SECTION. */ 265 R_COFF_SECREL, 266 R_COFF_SECTION, 267 /* AArch64 Windows TLS access uses an ADD-imm12-pair to materialize a 268 * 24-bit SECREL value into a register: 269 * add xd, xd, #:secrel_hi12:sym, lsl #12 ; HIGH12A bits [23:12] 270 * add xd, xd, #:secrel_lo12:sym ; LOW12A bits [11:0] 271 * The instruction at the patch site already has sh=1 (HIGH) or sh=0 272 * (LOW) preset by the codegen; the linker only patches the imm12 273 * field at bits [21:10]. NC variants ("no carry / no overflow check" 274 * in PE terminology) mean the high bits of SECREL above 24 are 275 * discarded — fine for any .tls section under 16 MiB. */ 276 R_COFF_AARCH64_SECREL_LOW12A, 277 R_COFF_AARCH64_SECREL_HIGH12A, 278 /* AArch64 TLS Initial-Exec. The ADRP/LDR pair loads the symbol's 279 * TP-relative offset from a GOT slot; the linker fills that slot with a 280 * 64-bit tpoff (R_TPOFF64) and redirects these to the slot, so they apply 281 * exactly like the regular ADR_GOT_PAGE / LD64_GOT_LO12_NC pair. 282 * Appended at the enum tail so the public KIT_RELOC_* values (object.h) 283 * keep their pinned numbering. */ 284 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, 285 R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, 286 /* COFF ADDR32NB: 32-bit image-relative RVA (S + A - ImageBase), used by 287 * PE exception tables and other image metadata. */ 288 R_COFF_ADDR32NB, 289 } RelocKind; 290 291 typedef struct Section { 292 Sym name; 293 u16 kind; 294 u16 flags; 295 u16 sem; /* SecSem */ 296 u16 ext_kind; /* ObjExtKind */ 297 u32 align; 298 u32 entsize; 299 ObjSecId link; /* section index or OBJ_SEC_NONE */ 300 u32 info; /* section-format dependent, typed by sem/ext_kind */ 301 ObjGroupId group_id; /* OBJ_GROUP_NONE if not in a COMDAT/group */ 302 u32 bss_size; /* nonzero only for SEC_BSS */ 303 u64 addr; /* load vaddr (sh_addr); 0 for relocatable inputs */ 304 /* Format-specific raw section type (ELF sh_type, COFF Characteristics 305 * subfield, etc.). Set by .o readers when the canonical SecSem 306 * mapping is lossy — e.g., SHT_LLVM_ADDRSIG (0x6FFF4C03) and 307 * SHT_ARM_ATTRIBUTES (0x70000003) collapse to SSEM_PROGBITS but 308 * the emitter must write back the original value to round-trip. 309 * Zero means "no override; derive from sem". */ 310 u32 ext_type; 311 u32 ext_flags; /* same idea for format-specific sh_flags bits 312 not represented in SecFlag (e.g. SHF_EXCLUDE) */ 313 /* Tombstone for strip/objcopy-style mutations. Set by 314 * obj_section_remove; honored by obj_sweep_dead and the emitters. 315 * Iterators / direct ID-based access on the builder must consult this 316 * bit and skip removed entries. */ 317 u8 removed; 318 Buf bytes; 319 } Section; 320 321 typedef struct Reloc { 322 ObjSecId section_id; 323 u32 offset; 324 u16 kind; 325 u8 has_explicit_addend; 326 u8 pair; /* paired/following relocation, format-specific */ 327 /* Tombstone set by obj_sweep_dead when the reloc points at a removed 328 * section or symbol. Lives in the slack between `pair` and `sym` — no 329 * struct-size change. */ 330 u8 removed; 331 ObjSymId sym; 332 i64 addend; 333 } Reloc; 334 335 typedef struct ObjSym { 336 Sym name; 337 u16 bind; 338 u16 kind; 339 u8 vis; 340 u8 ext_kind; 341 u16 flags; 342 ObjSecId section_id; /* OBJ_SEC_NONE if undef */ 343 u64 value; /* offset within section, or absolute */ 344 u64 size; 345 u64 common_align; /* nonzero for SK_COMMON */ 346 /* Lifecycle gate for spurious-UNDEF pruning at .o emit time. 347 * 348 * The C frontend mints an ObjSym for every `extern` declaration it 349 * parses (so a header like <stdio.h> creates 50+ ObjSyms in one TU). 350 * Most of those are never the target of any relocation. `referenced` 351 * tracks that distinction: obj_reloc_ex sets it on the target, and 352 * the file emitters (elf_emit / macho_emit) drop entries that are 353 * still SK_UNDEF + (SB_GLOBAL|SB_WEAK) + !referenced from the output 354 * symbol table. 355 * 356 * Definitions never need the gate — kind != SK_UNDEF for those, so 357 * the filter never considers them. Readers (elf_read, macho_read) 358 * mark every read-in symbol referenced=1 so a roundtrip preserves 359 * UNDEFs that came from another tool's output. */ 360 u8 referenced; 361 /* Tombstone for strip/objcopy. Set by obj_symbol_remove or cascaded 362 * by obj_sweep_dead when this symbol is defined in a removed section. 363 * The UNDEF-prune predicate (was: !referenced && SK_UNDEF && global/weak) 364 * is also folded into the sweep, so emit-time symbol loops only need to 365 * check `removed`. */ 366 u8 removed; 367 } ObjSym; 368 369 typedef struct ObjGroup { 370 Sym name; 371 ObjSymId signature; 372 ObjSecId* sections; 373 u32 nsections; 374 u32 flags; 375 /* Tombstone — set by obj_group_remove, or cascaded by obj_sweep_dead 376 * when every member section has been removed (or the signature symbol 377 * has been removed). */ 378 u8 removed; 379 } ObjGroup; 380 381 typedef struct ObjAtom { 382 ObjSecId section_id; 383 u32 offset; 384 u32 size; 385 ObjSymId signature; 386 u32 flags; 387 u8 removed; 388 } ObjAtom; 389 390 /* The single concrete in-memory object representation. 391 * Written by MCEmitter/CGTarget (during compile) or by an .o reader (during 392 * link). Read by file emitters, the linker (file and JIT), and objdump. 393 * 394 * Invariant: post-finalize state is identical in shape to what an .o reader 395 * would produce from a written-out object — so consumers don't care which 396 * path produced it. 397 * 398 * Lifecycle gates: 399 * 1. MCEmitter/CGTarget (or a .o reader) issues writes. 400 * 2. cgtarget_finalize must be called before any debug_emit or read access on 401 * the builder. At -O2 it flushes lowered code into sections. 402 * 3. debug_emit (if -g) writes .debug_* sections. 403 * 4. obj_finalize closes the builder: computes flat section offsets, applies 404 * pending fixups within sections, and freezes the read-side view. 405 * No further writes are permitted afterward. 406 * 5. File emitters and the linker consume via the read API. 407 * 408 * The handle type itself is the public KitObjBuilder, aliased to ObjBuilder 409 * inside libkit (see src/core/core.h). */ 410 411 ObjBuilder* obj_new(Compiler*); 412 void obj_free(ObjBuilder*); 413 414 /* The owning Compiler; needed by consumers (e.g. kit_disasm_iter_new) 415 * that take a bare ObjBuilder and still must pool_str() symbol names 416 * against the right pool. */ 417 Compiler* obj_compiler(const ObjBuilder*); 418 419 /* ---- write side (MCEmitter/CGTarget and .o readers) ---- */ 420 ObjSecId obj_section(ObjBuilder*, Sym name, SecKind, u16 flags, u32 align); 421 ObjSecId obj_section_ex(ObjBuilder*, Sym name, SecKind, SecSem, u16 flags, 422 u32 align, u32 entsize, u32 link, u32 info); 423 void obj_section_set_flags(ObjBuilder*, ObjSecId, u16 flags); 424 void obj_section_set_entsize(ObjBuilder*, ObjSecId, u32 entsize); 425 void obj_section_set_align(ObjBuilder*, ObjSecId, u32 align); 426 void obj_section_set_group(ObjBuilder*, ObjSecId, ObjGroupId); 427 void obj_section_set_link_info(ObjBuilder*, ObjSecId, ObjSecId link, u32 info); 428 void obj_section_set_addr(ObjBuilder*, ObjSecId, u64 addr); 429 /* Set format-specific raw sh_type/sh_flags overrides (see Section.ext_type 430 * comment). Zero ext_type means "no override". */ 431 void obj_section_set_ext(ObjBuilder*, ObjSecId, ObjExtKind, u32 ext_type, 432 u32 ext_flags); 433 void obj_write(ObjBuilder*, ObjSecId section_id, const void* data, size_t n); 434 u8* obj_reserve(ObjBuilder*, ObjSecId section_id, size_t n); 435 void obj_reserve_bss(ObjBuilder*, ObjSecId section_id, u32 size, u32 align); 436 /* Pad `section_id` to `align`, returning the resulting offset. For 437 * PROGBITS sections this writes zero bytes; for NOBITS it bumps 438 * bss_size. Callers that share a section across multiple symbols use 439 * this to ensure each placement starts at the symbol's required 440 * alignment, since dedup of obj_section means a placement isn't 441 * automatically aligned just because the section's own align is set. */ 442 u32 obj_align_to(ObjBuilder*, ObjSecId section_id, u32 align); 443 u32 obj_pos(ObjBuilder*, ObjSecId section_id); 444 void obj_patch(ObjBuilder*, ObjSecId section_id, u32 ofs, const void* data, 445 size_t n); 446 447 ObjSymId obj_symbol(ObjBuilder*, Sym name, SymBind, SymKind, 448 ObjSecId section_id, u64 value, u64 size); 449 ObjSymId obj_symbol_ex(ObjBuilder*, Sym name, SymBind, SymVis, SymKind, 450 ObjSecId section_id, u64 value, u64 size, 451 u64 common_align); 452 /* Allocate a stable symbol id for data that may be discarded before emission. 453 * The returned symbol is tombstoned and not entered in the name index; callers 454 * must publish it with obj_symbol_define_live if the data is actually emitted. 455 */ 456 ObjSymId obj_symbol_defer(ObjBuilder*, Sym name, SymBind, SymVis, SymKind, 457 u64 size); 458 ObjSymId obj_symbol_find(ObjBuilder*, Sym name); 459 /* obj_symbol_ex creates a symbol; obj_symbol_define fills in the 460 * (section_id, value, size) fields of an already-created symbol. The pair 461 * supports forward references: an undefined ObjSymId is created when first 462 * needed for a relocation, and defined later when its definition is emitted. */ 463 void obj_symbol_define(ObjBuilder*, ObjSymId, ObjSecId section_id, u64 value, 464 u64 size); 465 void obj_symbol_define_live(ObjBuilder*, ObjSymId, ObjSecId section_id, 466 u64 value, u64 size); 467 468 void obj_reloc(ObjBuilder*, ObjSecId section_id, u32 offset, RelocKind, 469 ObjSymId sym, i64 addend); 470 void obj_reloc_ex(ObjBuilder*, ObjSecId section_id, u32 offset, RelocKind, 471 ObjSymId sym, i64 addend, int explicit_addend, int pair); 472 473 /* Force ObjSym::referenced = 1 on the named symbol. obj_reloc_ex calls this 474 * automatically; the readers (elf_read / macho_read) call it on every 475 * ingested symbol so a roundtrip preserves UNDEFs that another tool 476 * emitted into the input. */ 477 void obj_sym_mark_referenced(ObjBuilder*, ObjSymId); 478 void obj_sym_set_referenced(ObjBuilder*, ObjSymId, int referenced); 479 480 ObjAtomId obj_atom_define(ObjBuilder*, ObjSecId section_id, u32 offset, 481 u32 size, ObjSymId signature, u32 flags); 482 483 ObjGroupId obj_group(ObjBuilder*, Sym name, ObjSymId signature, u32 flags); 484 void obj_group_add_section(ObjBuilder*, ObjGroupId group_id, 485 ObjSecId section_id); 486 487 void obj_finalize(ObjBuilder*); 488 489 /* ---- post-finalize mutators (strip / objcopy support) ---- 490 * 491 * Mutators flip per-entry fields and / or `removed` tombstones. Cascading 492 * cleanup (drop relocs against removed sections, etc.) is deferred to 493 * obj_sweep_dead, which the emitters call automatically. Mutators are 494 * cheap individual field writes; they do not re-index or compact storage, 495 * so ObjSecId / ObjSymId / ObjGroupId remain stable. 496 * 497 * No-ops when given OBJ_SEC_NONE / OBJ_SYM_NONE / OBJ_GROUP_NONE, and 498 * silently ignore ids that are out of range or already removed (the 499 * driver tools call these in bulk and benefit from idempotency). */ 500 void obj_section_remove(ObjBuilder*, ObjSecId); 501 void obj_symbol_remove(ObjBuilder*, ObjSymId); 502 void obj_group_remove(ObjBuilder*, ObjGroupId); 503 void obj_section_rename(ObjBuilder*, ObjSecId, Sym new_name); 504 void obj_symbol_rename(ObjBuilder*, ObjSymId, Sym new_name); 505 void obj_symbol_set_bind(ObjBuilder*, ObjSymId, SymBind); 506 void obj_symbol_set_vis(ObjBuilder*, ObjSymId, SymVis); 507 /* Replace `section_id`'s contents wholesale with `n` bytes from `data`. 508 * Resets bss_size (so a former NOBITS section gains real bytes) and 509 * preserves the section's other attributes (name, kind, flags, align). 510 * Existing relocations against the section are kept — caller is 511 * responsible for issuing obj_symbol_remove on any defined symbols whose 512 * (value, size) no longer fits, etc. */ 513 void obj_section_replace_bytes(ObjBuilder*, ObjSecId, const u8* data, size_t n); 514 515 /* Tombstone-driven consistency sweep. Called by each file-format emitter 516 * at the top of emit; consumers that walk a builder by raw section/symbol/ 517 * reloc/group ID after sweep must respect the `removed` bit on each entry. 518 * 519 * Does the following passes: 520 * 1. Cascade: any symbol defined in a removed section becomes removed. 521 * 2. UNDEF prune: any non-referenced SK_UNDEF global/weak becomes removed 522 * (folds the historical "spurious extern from a header" filter). 523 * 3. Reloc cleanup: any reloc whose containing section, defining section, 524 * or target symbol is removed becomes removed. 525 * 4. Group compaction: each group's section list is filtered in place to 526 * drop removed members; a group whose list empties out (or whose 527 * signature symbol has been removed) is itself marked removed. 528 * 5. Section link cleanup: Section.link cleared if it points at a 529 * removed section. 530 * 531 * Idempotent — safe to call multiple times. On a never-mutated builder 532 * only pass 2 has any effect. */ 533 void obj_sweep_dead(ObjBuilder*); 534 535 /* Format-specific ELF e_flags (per-arch ABI bits, e.g. EF_RISCV_RVC | 536 * EF_RISCV_FLOAT_ABI_DOUBLE on RV64). Set by read_elf during input 537 * parsing; consumed by emit_elf for round-trip. The setter records 538 * a presence bit so emit_elf can distinguish "preserve from input" 539 * from "no input — synthesize per-arch default". */ 540 void obj_set_elf_e_flags(ObjBuilder*, u32 e_flags); 541 int obj_get_elf_e_flags(const ObjBuilder*, u32* out); 542 543 /* COFF short-import shim annotation. Set by read_coff when the input 544 * is a Microsoft "short import" record (Sig1=0, Sig2=0xFFFF) found 545 * inside a .lib archive member: the ObjBuilder synthesizes the 546 * imported symbol(s) the long-form import object would have provided, 547 * and stores the providing DLL name here so the archive-ingestion 548 * layer (Phase 4.3) can reclassify the resulting LinkInput as a 549 * DSO with this name as the soname. Unset (returns 0 from the 550 * getter) on every other input. The setter records a presence bit 551 * the same way obj_set_elf_e_flags does. */ 552 void obj_set_coff_import_dll(ObjBuilder*, Sym dll_name); 553 int obj_get_coff_import_dll(const ObjBuilder*, Sym* out); 554 /* COFF short-import IMPORT NAME override: the name the loader resolves in the 555 * DLL when the short-import NameType makes it differ from the local symbol 556 * name (NOPREFIX/UNDECORATE strip decoration; EXPORTAS carries an explicit 557 * export name). Set by read_coff_short_import; consumed by the COFF 558 * import-table synthesis for the PE hint/name-table entry. The local symbol 559 * keeps its own name so kit's references still resolve. Unset on inputs whose 560 * import name equals the symbol name. */ 561 void obj_set_coff_import_name(ObjBuilder*, Sym import_name); 562 int obj_get_coff_import_name(const ObjBuilder*, Sym* out); 563 564 /* COFF WEAK_EXTERNAL alias: symbol `sym` is an alias for the symbol named 565 * `target` (the aux record's fall-back/default symbol). Recorded by read_coff 566 * for genuine alias declarations (IMAGE_WEAK_EXTERN_SEARCH_ALIAS) so the linker 567 * can resolve the weak symbol to its target by name — e.g. mingw x86_64's 568 * `_setjmp` aliasing `__intrinsic_setjmp`, a redirection the single-underscore 569 * naming heuristic can't derive. The getter returns 0 when `sym` has no 570 * recorded alias (the common case). See src/link/link_resolve.c. */ 571 void obj_set_weak_alias(ObjBuilder*, ObjSymId sym, Sym target); 572 Sym obj_get_weak_alias(const ObjBuilder*, ObjSymId sym); 573 /* Enumerate the recorded weak-external aliases (for building a cross-input 574 * name->target map at link time). Count is 0 on inputs that carry none. */ 575 u32 obj_weak_alias_count(const ObjBuilder*); 576 int obj_weak_alias_at(const ObjBuilder*, u32 i, ObjSymId* sym_out, 577 Sym* target_out); 578 579 /* Per-symbol format-specific flag bits. ObjSym.flags is otherwise 580 * unused; readers stash format-specific attribute bits there so the 581 * matching emitter can re-apply them. Today this is Mach-O n_desc 582 * pass-through (N_NO_DEAD_STRIP, etc.) — bits the canonical 583 * ObjSym.bind/vis/kind triple doesn't model. ELF callers are free 584 * to use the same field for their own pass-through; the contract is 585 * "bits go in / same bits come out", not a shared semantic. */ 586 void obj_symbol_set_flags(ObjBuilder*, ObjSymId, u16 flags); 587 588 /* ---- read side (linker, file emitters, objdump) ---- */ 589 u32 obj_section_count(const ObjBuilder*); 590 const Section* obj_section_get(const ObjBuilder*, ObjSecId id); 591 u32 obj_reloc_count(const ObjBuilder*, ObjSecId section_id); 592 u32 obj_reloc_total(const ObjBuilder*); 593 const Reloc* obj_reloc_at(const ObjBuilder*, u32 idx); /* 0..total-1 */ 594 595 /* Diagnostic spelling for a RelocKind. The returned pointer is a static 596 * literal that mirrors the enum identifier without the R_ prefix (e.g. 597 * R_RV_CALL -> "RV_CALL", R_AARCH64_CALL26 -> "AARCH64_CALL26"). NULL is 598 * never returned; unknown kinds collapse to "UNKNOWN". */ 599 const char* reloc_kind_name(RelocKind); 600 const ObjSym* obj_symbol_get(const ObjBuilder*, ObjSymId); 601 u32 obj_atom_count(const ObjBuilder*); 602 const ObjAtom* obj_atom_get(const ObjBuilder*, ObjAtomId); 603 int obj_section_has_atoms(const ObjBuilder*, ObjSecId); 604 ObjAtomId obj_atom_find(const ObjBuilder*, ObjSecId section_id, u32 offset); 605 ObjAtomId obj_atom_find_symbol(const ObjBuilder*, ObjSymId); 606 u32 obj_group_count(const ObjBuilder*); 607 const ObjGroup* obj_group_get(const ObjBuilder*, ObjGroupId id); 608 609 /* Symbol iteration: ObjSymId is scoped to this builder, but callers should not 610 * assume dense contiguous ids or direct indexing. The builder may store symbols 611 * in segments internally; use the cursor. 612 * 613 * The iterator is raw — it visits every symbol slot including those whose 614 * `removed` tombstone is set. Callers that want post-sweep semantics must 615 * check ObjSym::removed themselves. (Consistent with Section.removed and 616 * Reloc.removed: tombstones live as a per-entry field, not behind the 617 * iterator.) */ 618 typedef struct ObjSymIter ObjSymIter; 619 typedef struct ObjSymEntry { 620 ObjSymId id; 621 const ObjSym* sym; 622 } ObjSymEntry; 623 ObjSymIter* obj_symiter_new(const ObjBuilder*); 624 int obj_symiter_next(ObjSymIter*, ObjSymEntry* out); /* returns 0 at end */ 625 void obj_symiter_free(ObjSymIter*); 626 627 /* Group iteration: peer of obj_symiter for groups (COMDAT and friends). 628 * Same segmented-storage caveat — use the cursor, don't index directly. 629 * Like obj_symiter, this is raw: tombstoned groups are still returned; 630 * callers consult ObjGroup::removed. */ 631 typedef struct ObjGroupIter ObjGroupIter; 632 typedef struct ObjGroupEntry { 633 ObjGroupId id; 634 const ObjGroup* group; 635 } ObjGroupEntry; 636 ObjGroupIter* obj_groupiter_new(const ObjBuilder*); 637 int obj_groupiter_next(ObjGroupIter*, ObjGroupEntry* out); /* 0 at end */ 638 void obj_groupiter_free(ObjGroupIter*); 639 640 /* Writer is the public KitWriter type aliased to Writer inside libkit 641 * (see src/core/core.h). The streaming API lives in <kit/core.h> as 642 * kit_writer_*. */ 643 644 /* ---- format-aware canonical section names ---- 645 * 646 * For sections the linker synthesizes (init/fini arrays, TLS template 647 * sections), the spelling diverges across object formats: ELF uses 648 * `.init_array` / `.tdata` / etc., Mach-O uses 649 * `__DATA,__mod_init_func` / `__DATA,__thread_data` / etc. These 650 * helpers pick the right name for the active target.obj so the linker 651 * doesn't carry per-format switches at every synthesis site. ELF 652 * returns the historical names; Mach-O / COFF panic until those 653 * writers land. */ 654 Sym obj_secname_init_array(Compiler*); 655 Sym obj_secname_fini_array(Compiler*); 656 Sym obj_secname_preinit_array(Compiler*); 657 Sym obj_secname_tdata(Compiler*); 658 Sym obj_secname_tbss(Compiler*); 659 660 /* DWARF debug-section name translation for Mach-O. 661 * 662 * kit carries DWARF sections under their ELF spelling (".debug_info") 663 * internally; on Mach-O they live in the __DWARF segment with "__"- 664 * prefixed section names ("__debug_info"). The transform drops the 665 * leading '.', prepends "__", and truncates to Mach-O's 16-byte 666 * `sectname` field — which reproduces the names Apple's toolchain uses 667 * (e.g. ".debug_str_offsets" -> "__debug_str_offs"). 668 * 669 * Writes the bare Mach-O section name (NUL-terminated, <=16 chars) into 670 * `out` (>=17 bytes) and returns 1 when (`name`,`len`) is a ".debug_*" 671 * section; returns 0 otherwise, leaving `out` untouched. Shared by the 672 * Mach-O writer (emit) and the DWARF reader (section lookup) so the two 673 * agree on the truncated spelling. */ 674 int obj_macho_debug_sectname(const char* name, size_t len, char out[17]); 675 676 /* Canonical Mach-O "segname,sectname" spelling for a SecKind, as a 677 * NUL-terminated literal. The single source of truth shared by the Mach-O 678 * object writer (name_to_seg_sect) and the `cc -S` printer (asm_emit.c), so 679 * the textual `.section` directive and the binary section header never drift: 680 * SEC_RODATA -> "__TEXT,__const", SEC_DATA -> "__DATA,__data", 681 * SEC_BSS -> "__DATA,__bss", SEC_TEXT -> "__TEXT,__text". 682 * Returns NULL for kinds with no fixed canonical Mach-O home (SEC_OTHER / 683 * SEC_DEBUG), which callers spell from the section's own name. */ 684 const char* obj_macho_canon_secname(SecKind kind); 685 686 /* Inverse of obj_macho_canon_secname: classify a Mach-O native 687 * "segname,sectname" spelling (e.g. "__TEXT,__text", "__DATA,__bss") 688 * into a SecKind. Used by a format-neutral reader / objdump path that 689 * holds the on-disk Mach-O section name and wants the canonical kit 690 * SecKind without re-deriving the per-segment rules at every call. 691 * `name` / `len` are the comma-joined spelling. Returns 1 and writes 692 * *kind on a recognized spelling; returns 0 (leaving *kind untouched) 693 * for an unrecognized name (caller treats as SEC_OTHER). */ 694 int obj_macho_seckind_for_secname(const char* name, size_t len, SecKind* kind); 695 696 /* Translate a kit-internal (ELF-spelled) section name to its Mach-O 697 * native spelling. Generalizes obj_macho_debug_sectname: handles the 698 * ".debug_*" -> "__DWARF,__debug_*" DWARF case and ".eh_frame" -> 699 * "__TEXT,__eh_frame". Writes the comma-joined "segname,sectname" 700 * (NUL-terminated) into `out` (>= 40 bytes covers seg(16)+','+sect(16)+ 701 * NUL) and returns 1 when `name` is one of the recognized 702 * format-divergent sections; returns 0 (leaving `out` untouched) 703 * otherwise, so the caller falls back to its own spelling. */ 704 int obj_macho_native_secname(const char* name, size_t len, char out[40]); 705 706 /* ---- thread-local storage emission --------------------------------- 707 * 708 * The frontend collects a `_Thread_local` definition's bytes (or marks 709 * it BSS), alignment, and any pointer-init relocs, then calls 710 * obj_define_tls to materialize the storage and bind the user-visible 711 * symbol. The obj layer owns the format split: 712 * 713 * ELF : `sym` is defined directly in `.tdata` / `.tbss`; the 714 * supplied relocs are applied at the same section/offset. 715 * 716 * Mach-O: the data lives under a private `<name>$tlv$init` symbol in 717 * `__DATA,__thread_data` / `__DATA,__thread_bss`; `sym` is 718 * defined onto a 24-byte TLV *descriptor* in 719 * `__DATA,__thread_vars` whose three slots are 720 * [_tlv_bootstrap, 0, &init]. dyld rewrites slot[0] to a 721 * per-descriptor thunk and fills slot[1] with a pthread_key 722 * during image-load; the compiler's TLVP_LOAD_PAGE21 / 723 * PAGEOFF12 codegen sequence targets the descriptor. 724 * 725 * The `_tlv_bootstrap` undef extern is cached on the ObjBuilder so a 726 * second TLV var in the same TU shares one symbol entry. */ 727 typedef struct ObjTlsReloc { 728 u32 offset; /* within the data buffer */ 729 RelocKind kind; 730 ObjSymId target; 731 i64 addend; 732 } ObjTlsReloc; 733 734 void obj_define_tls(Compiler*, ObjBuilder*, ObjSymId sym, const u8* data, 735 u32 size, int has_nonzero_init, u32 align, 736 const ObjTlsReloc* relocs, u32 nrelocs); 737 738 /* True when reads of `_Thread_local` storage go through a per-variable 739 * descriptor + thunk call rather than a direct TP-relative offset. 740 * Mach-O: yes (TLVP_LOAD_PAGE21 + thunk in descriptor[0]). 741 * ELF: no (Local-Exec / Initial-Exec: `mrs tpidr_el0` + tprel offset). */ 742 int obj_format_tls_via_descriptor(const Compiler*); 743 744 /* ---- format-aware codegen policy ---- 745 * 746 * Backends consult these predicates instead of branching on 747 * target.os / target.obj directly, so the OS/format knowledge stays 748 * concentrated in src/obj/ and a future format lands as one case here 749 * rather than fan-out in every CGTarget. */ 750 751 /* True when references to undefined external symbols must be 752 * materialized via an indirection slot (GOT / non-lazy pointer) 753 * rather than direct page+offset addressing. Mach-O: yes — dyld 754 * binds dylib imports through __DATA,__got at runtime, and the 755 * direct PAGE21/PAGEOFF12 fixups can't carry that binding. ELF 756 * static link: no — the linker resolves SK_UNDEFs at link time and 757 * patches the direct ADRP/ADD bytes in place. */ 758 int obj_format_extern_via_got(const Compiler*); 759 760 /* True when `sym` must be reached via the GOT at the current site: the 761 * format binds extern data through indirection 762 * (obj_format_extern_via_got) AND the symbol is undefined in this 763 * object (section_id == OBJ_SEC_NONE). Pure format/symbol policy with 764 * no per-arch behavior — shared by every backend that emits GOT loads. */ 765 int obj_symbol_extern_via_got(const Compiler*, ObjBuilder*, ObjSymId); 766 int obj_format_split_sections_as_atoms(const Compiler*); 767 768 /* Apply the active object format's C-symbol mangling to `name` (a 769 * NUL-terminated C string) and return the result interned in 770 * `c->global`. Mach-O prepends a single `_`; ELF / COFF / Wasm intern 771 * verbatim. Mirrors the on-disk policy that decl.c / cc.c emit, so 772 * link-time and JIT-time lookups by source-level name find the symbol 773 * regardless of target. Mach-O temp buffer is allocated from 774 * `c->ctx->heap`. */ 775 Sym obj_format_c_mangle(Compiler*, const char* name); 776 777 /* Inverse of obj_format_c_mangle for diagnostic display: if `*name` 778 * carries the active format's leading C-mangle byte, advance the 779 * pointer past it and decrement `*len`. No-op for formats with no 780 * prefix. Lets diagnostics print the source-level symbol name across 781 * targets. */ 782 void obj_format_demangle_c(const Compiler*, const char** name, size_t* len); 783 784 /* Default entry symbol name for a freshly created Linker on the active 785 * object format: `_main` for Mach-O (LC_MAIN names main, dyld owns 786 * startup), `_start` for ELF / COFF / Wasm (set by crt1.o). Returned 787 * as a NUL-terminated literal; the caller interns. */ 788 const char* obj_format_default_entry_name(const Compiler*); 789 790 /* C source-level symbol prefix the active object format prepends on disk: 791 * "_" for Mach-O, "" for ELF / COFF / Wasm. The single source of truth 792 * read by obj_format_c_mangle / obj_format_demangle_c; never NULL (a 793 * format with no prefix returns ""). */ 794 const char* obj_format_c_label_prefix(const Compiler*); 795 796 /* ---- thread-local storage model ---- 797 * 798 * How compiled code reaches a `_Thread_local` on a given (format, OS): 799 * OBJ_TLS_ELF_LE : direct TP-relative offset (ELF Local-Exec / 800 * Initial-Exec): `mrs tpidr_el0` + tprel. 801 * OBJ_TLS_MACHO_DESCRIPTOR: per-variable descriptor + thunk call; the 802 * TLVP reloc pair targets the descriptor. 803 * OBJ_TLS_WINDOWS_TEB : Windows TEB-based access (SECREL into the 804 * per-thread TLS block via the TEB). */ 805 typedef enum ObjTlsModel { 806 OBJ_TLS_ELF_LE = 0, 807 OBJ_TLS_MACHO_DESCRIPTOR = 1, 808 OBJ_TLS_WINDOWS_TEB = 2, 809 } ObjTlsModel; 810 811 /* Returns how compiled code reaches a `_Thread_local` on the active 812 * (format, OS): OBJ_TLS_WINDOWS_TEB for COFF, OBJ_TLS_MACHO_DESCRIPTOR 813 * for Mach-O, OBJ_TLS_ELF_LE otherwise. The single source of truth for 814 * the TLS-access decision; obj_format_tls_via_descriptor is now a thin 815 * wrapper over (model == OBJ_TLS_MACHO_DESCRIPTOR). */ 816 ObjTlsModel obj_format_tls_model(const Compiler*); 817 818 /* In-process JIT: true when a reference to symbol `name` is dropped because the 819 * format's TLS access idiom that materializes it is relaxed to in-image 820 * addressing (COFF Windows `_tls_index`; none elsewhere). Beside 821 * obj_format_tls_model as the TLS-mechanism authority. */ 822 int obj_format_jit_drops_symbol_ref(const Compiler*, Sym name); 823 824 /* True when the active object format carries DWARF debug sections 825 * file-only (not mapped into a loadable segment): ELF / Mach-O yes, 826 * COFF no. */ 827 int obj_format_carries_file_only_debug(const Compiler*); 828 829 /* True when the active object format builds its own static GOT / 830 * non-lazy-pointer table at link time even for a static image: 831 * Mach-O yes, else no. */ 832 int obj_format_builds_own_static_got(const Compiler*); 833 834 /* True when the active object format can represent a KitCgSymFeat 835 * `symfeat`. Today this is the TLS-model axis: ELF / Mach-O can 836 * represent every modeled TLS feature, COFF cannot (Windows TEB TLS 837 * uses a different mechanism). Non-TLS features return 1 for every 838 * format. `symfeat` is a KitCgSymFeat value (cast to int at the 839 * boundary). */ 840 int obj_format_supports_symbol_feature(const Compiler*, int symfeat); 841 842 /* True when the active object format pulls an archive member to satisfy a 843 * *weak* undefined reference (PE/COFF COMDAT semantics). COFF yes, 844 * ELF / Mach-O no (they pull only for strong undefs). */ 845 int obj_format_weak_undef_pulls_archive_member(const Compiler*); 846 847 /* True when the active object format recovers weak-external / undefined 848 * references via the mingw single-underscore alias convention (e.g. 849 * `__set_app_type` <-> `_set_app_type`) during link symbol resolution. 850 * COFF yes, ELF / Mach-O / Wasm no. */ 851 int obj_format_weak_extern_underscore_alias(const Compiler*); 852 853 /* True when static-IFUNC resolution on the active target goes through a 854 * `[__rela_iplt_start, __rela_iplt_end)` table of R_*_IRELATIVE relocs 855 * (walked by FreeBSD's crt before main) rather than kit's ctor-based 856 * __kit_ifunc_init path. The one place the (os == FREEBSD && obj == ELF) 857 * knowledge lives. */ 858 int obj_format_static_ifunc_via_rela_iplt(const Compiler*); 859 860 /* The R_*_IRELATIVE resolver reloc wire type for the active target's 861 * __rela_iplt table (paired with the predicate above), resolved through the 862 * target object format so the generic iplt pass names no format literal. 863 * Returns 0 when the format has no such reloc. */ 864 u32 obj_format_static_ifunc_irelative_type(const Compiler*); 865 866 /* Per-arch variant-I TP bias for the active target's ELF arch: distance 867 * from the TLS image start to where `tp` points in kit's freestanding 868 * layout (16 for AArch64/RISC-V, 0 for x86_64 variant-II). Returns 0 869 * for a non-ELF target or an arch with no ELF descriptor. The 870 * hosted-vs-freestanding RISC-V split is applied by the caller. */ 871 u32 obj_format_elf_tls_tp_bias(const Compiler*); 872 873 /* Format boundary-symbol classifier. Asks the active object format 874 * whether `name` is a symbol the format itself owns as a boundary / 875 * synthetic global, and if so what SymKind it carries. Returns 1 and 876 * writes *symkind (a SymKind value) when the format owns `name` 877 * (PE `__ImageBase` / `_tls_used` -> SK_ABS); returns 0 otherwise, 878 * leaving *symkind untouched. Lets generic link code classify boundary 879 * symbols without a per-format switch. */ 880 int obj_format_boundary_sym_kind(const Compiler*, KitSlice name, int* symkind); 881 882 /* Invoke the active object format's synthetic-input hook (if any) before 883 * symbol resolution. No-op for formats with no synthetic inputs. The 884 * hook builds and appends a synthetic LinkInput via Linker internals, so 885 * it takes the Linker; declared here as the obj-side dispatch point. 886 * (The COFF body is wired by T-LINK — see registry.c synth_inputs note.) */ 887 void obj_format_synth_inputs(const Compiler*, Linker*); 888 889 /* ---- format-specific extension payload ---- 890 * 891 * Generic object tables stay format-neutral. Format-specific module-level 892 * metadata (today: only the in-progress Wasm module model) hangs off the 893 * builder under an ObjExtKind tag. One payload per kind. ObjBuilder owns the 894 * pointer's lifetime — obj_free invokes the registered free function. */ 895 typedef void (*ObjExtFreeFn)(Compiler*, void*); 896 void obj_ext_set(ObjBuilder*, ObjExtKind, void* payload, ObjExtFreeFn); 897 void* obj_ext_get(const ObjBuilder*, ObjExtKind); 898 void obj_ext_clear(ObjBuilder*, ObjExtKind); 899 900 /* ============================================================ 901 * Linked-image view (executables / shared objects) 902 * 903 * Relocatable inputs (ET_REL / MH_OBJECT / COFF .obj) have no image: 904 * obj_image() returns NULL. The ET_EXEC / ET_DYN (and Mach-O / PE peer) 905 * readers attach an ObjImage carrying the segment + dynamic view that the 906 * section / symbol tables don't model. The section and symbol tables stay 907 * populated where the format still carries them; the image is the extra 908 * dimension. The builder owns the image; obj_free releases it. 909 * ============================================================ */ 910 911 typedef enum ObjKind { 912 OBJ_KIND_REL, /* relocatable object — no image */ 913 OBJ_KIND_EXEC, /* executable */ 914 OBJ_KIND_DYN, /* shared object / dylib / DLL */ 915 OBJ_KIND_CORE, /* core dump — detected, not parsed (reserved) */ 916 } ObjKind; 917 918 enum { /* ObjSegment.perms bits */ 919 OBJ_SEG_X = 1u << 0, 920 OBJ_SEG_W = 1u << 1, 921 OBJ_SEG_R = 1u << 2 922 }; 923 924 typedef struct ObjSegment { 925 Sym name; /* PT_* spelling / Mach-O segname, or 0 */ 926 u64 vaddr; /* virtual address */ 927 u64 vsize; /* size in memory */ 928 u64 file_off; /* offset of segment contents in the file */ 929 u64 file_size; /* size on disk (< vsize when the segment carries bss) */ 930 u32 perms; /* OBJ_SEG_R | _W | _X */ 931 u32 align; /* power of two; 1 if none */ 932 } ObjSegment; 933 934 typedef struct ObjImageDep { 935 Sym name; /* DT_NEEDED / imported DLL / dylib install-name */ 936 const Sym* imports; /* imported symbol names (PE/Mach-O); NULL for ELF */ 937 u32 nimports; 938 } ObjImageDep; 939 940 /* Dynamic-table symbol. Distinct from the .symtab entries in the Symbols 941 * table — these come from .dynsym / dyld export trie / PE export table. */ 942 typedef struct ObjImageSym { 943 Sym name; 944 SymBind bind; 945 SymKind kind; 946 ObjSecId section; /* OBJ_SEC_NONE for undefined imports */ 947 u64 value; 948 u64 size; 949 /* ELF symbol-version name (interned), set only for a DSO export that is the 950 * *default* (non-hidden) version of `name` — e.g. libc.so.7's 951 * fstat@@FBSD_1.5. 0 when the input carries no versioning, or this entry is a 952 * hidden compatibility alias (fstat@FBSD_1.0). The linker uses it to emit a 953 * matching .gnu.version_r requirement so the runtime binds the right version 954 * (mandatory on FreeBSD, where the INO64 transition gave `fstat`/`stat` two 955 * incompatible struct-stat layouts behind FBSD_1.0 vs FBSD_1.5). */ 956 Sym version; 957 } ObjImageSym; 958 959 /* Dynamic relocation (.rela.dyn / .rela.plt, dyld binds, PE base relocs). 960 * References the dynamic symbol by interned name; the sym index is implicit 961 * in the dynamic table and not preserved here. */ 962 typedef struct ObjImageReloc { 963 ObjSecId section; /* OBJ_SEC_NONE when the file has no section table */ 964 u64 offset; 965 Sym sym_name; /* 0 for symbol-less relative relocs */ 966 i64 addend; 967 RelocKind kind; 968 } ObjImageReloc; 969 970 /* Raw, format-specific image field that doesn't fit the neutral model. 971 * One flat triple list per image, in the spirit of the per-section 972 * kit_obj_section_format_flags escape hatch: a neutral container with 973 * per-format tag semantics (documented on the public KitObjImageRaw): 974 * PE : data dirs tag=0..15 (index), value=RVA, extra=size; 975 * subsystem tag=KIT_OBJ_RAW_PE_SUBSYSTEM, value=u16; 976 * dllchars tag=KIT_OBJ_RAW_PE_DLLCHARS, value=u16 977 * ELF : .dynamic tag=d_tag, value=d_val, extra=0 978 * Mach-O: load cmds tag=cmd, value=file offset, extra=cmdsize */ 979 typedef struct ObjImageRaw { 980 u32 tag; 981 u64 value; 982 u64 extra; 983 } ObjImageRaw; 984 985 typedef struct ObjImage ObjImage; /* defined in obj.c */ 986 987 /* Accessor — NULL on relocatable inputs. */ 988 const ObjImage* obj_image(const ObjBuilder*); 989 /* Lazily create (and return) the builder's image with the given kind. 990 * Readers call this once they know the input is EXEC/DYN. Idempotent; 991 * a second call updates the kind and returns the existing image. */ 992 ObjImage* obj_image_ensure(ObjBuilder*, ObjKind); 993 994 /* Image scalar setters (readers). */ 995 void obj_image_set_entry(ObjImage*, u64 entry); 996 void obj_image_set_base(ObjImage*, u64 image_base); 997 void obj_image_set_interp(ObjImage*, Sym interp); 998 void obj_image_set_soname(ObjImage*, Sym soname); 999 1000 /* Image table appenders (readers). Each copies its argument by value into a 1001 * builder-heap-owned vector. obj_image_add_dep additionally deep-copies the 1002 * ObjImageDep.imports[] name array into image-heap memory, so the reader may 1003 * pass a transient (scratch) array; the Sym values themselves must still be 1004 * interned in the compiler's global pool. */ 1005 void obj_image_add_segment(ObjImage*, const ObjSegment*); 1006 void obj_image_add_dep(ObjImage*, const ObjImageDep*); 1007 void obj_image_add_rpath(ObjImage*, Sym rpath); 1008 void obj_image_add_dynsym(ObjImage*, const ObjImageSym*); 1009 void obj_image_add_dynreloc(ObjImage*, const ObjImageReloc*); 1010 /* Raw format-specific image fields (see ObjImageRaw). Copied by value. */ 1011 void obj_image_add_raw(ObjImage*, const ObjImageRaw*); 1012 /* Undefined symbol names a DSO references (interned). The linker's 1013 * --gc-sections pass roots executable definitions of these so a shared 1014 * library's back-references (e.g. libc.so.7 → `environ` / `__progname`) 1015 * survive section GC. */ 1016 void obj_image_add_undef(ObjImage*, Sym name); 1017 1018 /* Image read-side queries (object_file.c glue, objdump). */ 1019 ObjKind obj_image_kind(const ObjImage*); 1020 u64 obj_image_entry(const ObjImage*); 1021 u64 obj_image_base(const ObjImage*); 1022 Sym obj_image_interp(const ObjImage*); 1023 Sym obj_image_soname(const ObjImage*); 1024 u32 obj_image_nsegments(const ObjImage*); 1025 const ObjSegment* obj_image_segment(const ObjImage*, u32 idx); 1026 u32 obj_image_ndeps(const ObjImage*); 1027 const ObjImageDep* obj_image_dep(const ObjImage*, u32 idx); 1028 u32 obj_image_nrpaths(const ObjImage*); 1029 Sym obj_image_rpath(const ObjImage*, u32 idx); 1030 u32 obj_image_ndynsyms(const ObjImage*); 1031 const ObjImageSym* obj_image_dynsym(const ObjImage*, u32 idx); 1032 u32 obj_image_ndynrelocs(const ObjImage*); 1033 const ObjImageReloc* obj_image_dynreloc(const ObjImage*, u32 idx); 1034 u32 obj_image_nundefs(const ObjImage*); 1035 Sym obj_image_undef(const ObjImage*, u32 idx); 1036 u32 obj_image_nraws(const ObjImage*); 1037 const ObjImageRaw* obj_image_raw(const ObjImage*, u32 idx); 1038 1039 /* ---- file format emitters ---- */ 1040 void emit_elf(Compiler*, ObjBuilder*, Writer*); 1041 void emit_coff(Compiler*, ObjBuilder*, Writer*); 1042 void emit_macho(Compiler*, ObjBuilder*, Writer*); 1043 void emit_wasm(Compiler*, ObjBuilder*, Writer*); 1044 1045 /* ---- file format readers (for ld and objdump) ---- */ 1046 ObjBuilder* read_elf(Compiler*, const char* name, const u8* data, size_t len); 1047 /* ELF ET_DYN reader. Produces an ObjBuilder containing only the DSO's 1048 * exported (dynsym) symbols. Defined dynsym entries land as ObjSyms 1049 * with their original SymBind/SymKind so the linker's symbol-resolution 1050 * pass can match them by name. The DSO's sections, relocations, and 1051 * groups are all skipped — DSOs contribute no bytes to the output. 1052 * 1053 * If `soname_out` is non-NULL, *soname_out receives the DT_SONAME 1054 * interned into the compiler's global Sym pool, or 0 if the DSO has 1055 * no SONAME. */ 1056 ObjBuilder* read_elf_dso(Compiler*, const char* name, const u8* data, 1057 size_t len, Sym* soname_out); 1058 ObjBuilder* read_coff(Compiler*, const char* name, const u8* data, size_t len); 1059 /* PE32+ DLL reader. Walks the IMAGE_DIRECTORY_ENTRY_EXPORT data 1060 * directory and produces an ObjBuilder containing one defined symbol 1061 * (OBJ_SEC_NONE, SB_GLOBAL, SK_FUNC) per name in the Export Name 1062 * Table — the peer of read_elf_dso / read_macho_dso. The DLL's 1063 * own Name string (the analogue of DT_SONAME / LC_ID_DYLIB) is 1064 * interned and returned via *soname_out, or 0 if missing. 1065 * 1066 * Scope: PE32+ images with IMAGE_FILE_DLL set, machine AMD64 or 1067 * ARM64. Ordinal-only exports (in the EAT but not the ENT) are not 1068 * synthesized in v1 — almost all real-world imports are by name. */ 1069 ObjBuilder* read_coff_dso(Compiler*, const char* name, const u8* data, 1070 size_t len, Sym* soname_out); 1071 /* PE32+ linked-image reader (peer of read_elf_image / read_macho_image). 1072 * Handles both executables (IMAGE_FILE_DLL clear -> OBJ_KIND_EXEC) and 1073 * DLLs (set -> OBJ_KIND_DYN), populating the neutral ObjImage: one 1074 * segment per PE section, exports -> dynsyms + soname, imports -> deps + 1075 * undefined dynsyms, base relocs -> RELATIVE dynrelocs, plus a full 1076 * section/symbol view via the ObjBuilder Section table, and the raw 1077 * data-directory / subsystem / dllchars escape-hatch entries. Lenient: 1078 * malformed sub-tables are skipped; truncated core headers panic. 1079 * Dispatched from read_coff on the DOS 'MZ' magic. */ 1080 ObjBuilder* read_coff_image(Compiler*, const char* name, const u8* data, 1081 size_t len); 1082 ObjBuilder* read_macho(Compiler*, const char* name, const u8* data, size_t len); 1083 /* Mach-O MH_DYLIB reader. Produces an ObjBuilder containing only the 1084 * dylib's exported symbols (as defined OBJ_SEC_NONE entries — the 1085 * peer of read_elf_dso). LC_ID_DYLIB's install-name is interned and 1086 * returned via *install_name_out (the Mach-O analogue of DT_SONAME). 1087 * 1088 * arm64-only for v1; other cputypes panic. */ 1089 ObjBuilder* read_macho_dso(Compiler*, const char* name, const u8* data, 1090 size_t len, Sym* install_name_out); 1091 /* Apple `.tbd` (text-based stub) reader. Parses the YAML-shaped TAPI 1092 * format produced by Apple's SDKs (see /usr/lib/lib*.tbd in 1093 * `xcrun --show-sdk-path`). Extracts the umbrella install-name and the 1094 * union of every exported / re-exported symbol whose `targets:` block 1095 * names the active arch (e.g. arm64-macos). Symbols are emitted into 1096 * the ObjBuilder verbatim (they already include the leading `_` Apple 1097 * uses for C symbols), so resolve_undefs matches them against the 1098 * Mach-O on-disk symbol names directly. 1099 * 1100 * The arch string ("arm64" or "x86_64") comes from Compiler.target. */ 1101 ObjBuilder* read_tbd(Compiler*, const char* name, const u8* data, size_t len, 1102 Sym* install_name_out); 1103 1104 #endif