kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

obj.h (49860B)


      1 #ifndef KIT_OBJ_H
      2 #define KIT_OBJ_H
      3 
      4 #include "core/buf.h"
      5 #include "core/core.h"
      6 
      7 /* Forward decl: the synthetic-input hook (obj_format_synth_inputs) takes a
      8  * Linker but obj.h must not pull in the link subsystem. Defined in
      9  * src/link; only used here as an opaque pointer. */
     10 typedef struct Linker Linker;
     11 
     12 typedef enum SecKind {
     13   SEC_TEXT,
     14   SEC_RODATA,
     15   SEC_DATA,
     16   SEC_BSS,
     17   SEC_DEBUG,
     18   SEC_OTHER,
     19 } SecKind;
     20 
     21 typedef enum SecFlag {
     22   SF_EXEC = 1u << 0,
     23   SF_WRITE = 1u << 1,
     24   SF_ALLOC = 1u << 2,
     25   SF_TLS = 1u << 3,
     26   SF_MERGE = 1u << 4,
     27   SF_STRINGS = 1u << 5,
     28   SF_GROUP = 1u << 6,
     29   SF_LINK_ORDER = 1u << 7,
     30   SF_RETAIN = 1u << 8, /* SHF_GNU_RETAIN: do not GC even if unreferenced */
     31 } SecFlag;
     32 
     33 typedef enum SecSem {
     34   SSEM_PROGBITS,
     35   SSEM_NOBITS,
     36   SSEM_SYMTAB,
     37   SSEM_STRTAB,
     38   SSEM_RELA,
     39   SSEM_REL,
     40   SSEM_NOTE,
     41   SSEM_INIT_ARRAY,
     42   SSEM_FINI_ARRAY,
     43   SSEM_PREINIT_ARRAY,
     44   SSEM_GROUP,
     45   SSEM_WASM_CUSTOM,
     46 } SecSem;
     47 
     48 typedef enum SymBind {
     49   SB_LOCAL,
     50   SB_GLOBAL,
     51   SB_WEAK,
     52 } SymBind;
     53 
     54 typedef enum SymVis {
     55   SV_DEFAULT,
     56   SV_HIDDEN,
     57   SV_PROTECTED,
     58   SV_INTERNAL,
     59 } SymVis;
     60 
     61 typedef enum SymKind {
     62   SK_UNDEF,
     63   SK_FUNC,
     64   SK_OBJ,
     65   SK_SECTION,
     66   SK_FILE,
     67   SK_COMMON,
     68   SK_TLS,
     69   SK_ABS,
     70   /* Defined symbol with no specific type — assembly labels, AArch64
     71    * mapping symbols (`$x`, `$d`). Distinct from SK_UNDEF (undefined
     72    * external) so the linker keeps definedness keyed on SK_UNDEF. */
     73   SK_NOTYPE,
     74   /* GNU IFUNC: a function whose implementation is selected at runtime
     75    * by a resolver. Round-trips as STT_GNU_IFUNC (10); presence forces
     76    * EI_OSABI=ELFOSABI_GNU on emit. */
     77   SK_IFUNC,
     78 } SymKind;
     79 
     80 typedef enum ObjExtKind {
     81   OBJ_EXT_NONE,
     82   OBJ_EXT_ELF,
     83   OBJ_EXT_COFF,
     84   OBJ_EXT_MACHO,
     85   OBJ_EXT_WASM,
     86   /* Wasm-target frontend-supplied import descriptors keyed by symbol name.
     87    * Populated by lang/c when an extern declaration carries
     88    * __attribute__((import_module(...), import_name(...))); consumed by the
     89    * wasm backend when promoting undefined function symbols to imports. */
     90   OBJ_EXT_WASM_IMPORTS,
     91 } ObjExtKind;
     92 
     93 typedef u32 ObjSecId;
     94 #define OBJ_SEC_NONE 0u
     95 
     96 typedef u32 ObjGroupId;
     97 #define OBJ_GROUP_NONE 0u
     98 
     99 /* Per-ObjBuilder symbol handle. Object files own their symbol namespace:
    100  * local/static symbols, section symbols, file symbols, unnamed labels, common
    101  * definitions, and external references are all represented by ObjSymId values
    102  * scoped to one builder. 0 is reserved as "none". */
    103 typedef u32 ObjSymId;
    104 #define OBJ_SYM_NONE 0u
    105 
    106 typedef u32 ObjAtomId;
    107 #define OBJ_ATOM_NONE 0u
    108 
    109 typedef enum ObjAtomFlag {
    110   OBJ_ATOM_RETAIN = 1u << 0,
    111 } ObjAtomFlag;
    112 
    113 typedef enum RelocKind {
    114   R_NONE = 0,
    115   R_ABS32,
    116   R_ABS64,
    117   R_REL32,
    118   R_REL64,
    119   R_PC32,
    120   R_PC64,
    121   R_GOT32,
    122   R_PLT32,
    123   /* Neutral data-word kinds completing the ABS/PREL/TPOFF families. */
    124   R_ABS8,
    125   R_ABS16,
    126   R_PREL16,
    127   /* Internal-only: a raw 64-bit local-exec tpoff written into a TLS GOT
    128    * slot by link_emit_internal_tpoff64.  Never appears on the wire.
    129    * x86_64 stores variant-II (X - tls_memsz); AArch64 and RISC-V store
    130    * variant-I ((X - tls_vaddr) + TCB).  Byte encoding is identical on
    131    * all three arches: a plain 64-bit little-endian write. */
    132   R_TPOFF64,
    133   R_AARCH64_ADR_GOT_PAGE,
    134   R_AARCH64_LD64_GOT_LO12_NC,
    135   R_ARM_CALL,
    136   R_ARM_MOVW,
    137   R_ARM_MOVT,
    138   R_ARM_B26,
    139   R_AARCH64_JUMP26,
    140   R_AARCH64_CALL26,
    141   R_AARCH64_CONDBR19,
    142   R_AARCH64_TSTBR14,
    143   R_AARCH64_LD_PREL_LO19,
    144   R_AARCH64_ADR_PREL_LO21,
    145   /* MCEmitter-only function-local label address materialization. The fixup
    146    * patches a fixed 16-byte sequence as either ADR+B+literal when in range,
    147    * or LDR-literal+B+relocated-literal when the ADR range is exceeded. */
    148   R_AARCH64_INTRA_LABEL_ADDR,
    149   R_AARCH64_ADR_PREL_PG_HI21,
    150   R_AARCH64_ADR_PREL_PG_HI21_NC,
    151   R_AARCH64_ADD_ABS_LO12_NC,
    152   R_AARCH64_LDST8_ABS_LO12_NC,
    153   R_AARCH64_LDST16_ABS_LO12_NC,
    154   R_AARCH64_LDST32_ABS_LO12_NC,
    155   R_AARCH64_LDST64_ABS_LO12_NC,
    156   R_AARCH64_LDST128_ABS_LO12_NC,
    157   /* AArch64 Mach-O TLV (thread-local variable) descriptor access. The
    158    * compiler emits these to reference a TLV descriptor in
    159    * __DATA,__thread_vars; the linker routes both through a synthetic
    160    * __DATA,__thread_ptrs slot (analogous to __got for non-TLV externs).
    161    *
    162    *   adrp x0, _var@TLVPPAGE         ; TLVP_LOAD_PAGE21
    163    *   ldr  x0, [x0, _var@TLVPPAGEOFF]; TLVP_LOAD_PAGEOFF12  -> descriptor
    164    *   ldr  x1, [x0]                  ; thunk (filled by dyld)
    165    *   blr  x1                        ; thunk(x0=descriptor) -> x0 = TLV addr
    166    *
    167    * Encoding-wise PAGE21 is ADRP-form and PAGEOFF12 is a 64-bit-LDR
    168    * lo12 (scale=3). The linker rewrites S to the matching __thread_ptrs
    169    * slot's vaddr before applying. */
    170   R_AARCH64_TLVP_LOAD_PAGE21,
    171   R_AARCH64_TLVP_LOAD_PAGEOFF12,
    172   /* AArch64 TLS Local-Exec model. */
    173   R_AARCH64_TLSLE_ADD_TPREL_HI12,
    174   R_AARCH64_TLSLE_ADD_TPREL_LO12,
    175   R_AARCH64_TLSLE_ADD_TPREL_LO12_NC,
    176   R_AARCH64_TLSLE_LDST8_TPREL_LO12,
    177   R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC,
    178   R_AARCH64_TLSLE_LDST16_TPREL_LO12,
    179   R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC,
    180   R_AARCH64_TLSLE_LDST32_TPREL_LO12,
    181   R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC,
    182   R_AARCH64_TLSLE_LDST64_TPREL_LO12,
    183   R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC,
    184   /* Dynamic-only relocs: emitted into .rela.dyn / .rela.plt of an
    185    * ET_DYN/ET_EXEC output and processed by the runtime loader. They
    186    * never appear in ET_REL inputs from a compiler; the linker may
    187    * synthesize them during dynamic-exe / shared-lib emit, and the
    188    * reader recognizes them when it walks an ET_DYN's .rela.* sections
    189    * (currently only used for symbol-name extraction, not applied). */
    190   R_AARCH64_GLOB_DAT,
    191   R_AARCH64_JUMP_SLOT,
    192   R_AARCH64_RELATIVE,
    193   R_AARCH64_COPY,
    194   /* x86_64 reloc kinds. Most map directly to the existing R_ABS and
    195    * R_PC entries; the few here are the x86_64-only encodings (8-bit
    196    * displacements, GOT/PLT, dynamic linker-only entries). */
    197   R_X64_PC8,
    198   R_X64_32S,
    199   R_X64_PLT32,
    200   R_X64_GOTPCREL,
    201   R_X64_GOTPCRELX,
    202   R_X64_REX_GOTPCRELX,
    203   R_X64_GOTPC32,
    204   R_X64_GOTOFF64,
    205   R_X64_TPOFF32,
    206   R_X64_DTPOFF32,
    207   R_X64_DTPMOD64,
    208   R_X64_DTPOFF64,
    209   R_X64_TLSGD,
    210   R_X64_TLSLD,
    211   R_X64_GOTTPOFF,
    212   R_X64_GLOB_DAT,
    213   R_X64_JUMP_SLOT,
    214   R_X64_RELATIVE,
    215   R_X64_COPY,
    216   R_RV_HI20,
    217   R_RV_LO12_I,
    218   R_RV_LO12_S,
    219   R_RV_BRANCH,
    220   R_RV_JAL,
    221   R_RV_CALL,
    222   R_RV_PCREL_HI20,
    223   R_RV_PCREL_LO12_I,
    224   R_RV_PCREL_LO12_S,
    225   /* Intra-section label address materialization via an AUIPC+ADDI pair.
    226    * Used only by MCEmitter intra-section label fixups (CGTarget
    227    * load_label_addr). Width is 8 bytes, covering both instructions; the
    228    * fixup site is the AUIPC and the disp is the label byte offset
    229    * relative to the AUIPC site. */
    230   R_RV_INTRA_AUIPC_ADDI,
    231   R_RV_GOT_HI20,
    232   /* TLS Initial-Exec: %tls_ie_pcrel_hi(sym). Paired with R_RV_PCREL_LO12_I
    233    * on the follow-on ld. The GOT entry holds (&sym - tp); the AUIPC/ld
    234    * pair materializes that offset into a register so the caller adds tp. */
    235   R_RV_TLS_GOT_HI20,
    236   R_RV_TPREL_HI20,
    237   R_RV_TPREL_LO12_I,
    238   R_RV_TPREL_LO12_S,
    239   R_RV_TPREL_ADD,
    240   R_ADD8,
    241   R_ADD16,
    242   R_ADD32,
    243   R_ADD64,
    244   R_SUB8,
    245   R_SUB16,
    246   R_SUB32,
    247   R_SUB64,
    248   R_RV_ALIGN,
    249   R_RV_RVC_BRANCH,
    250   R_RV_RVC_JUMP,
    251   R_RV_RELAX,
    252   R_SUB6,
    253   R_SET6,
    254   R_SET_ULEB128,
    255   R_SUB_ULEB128,
    256   R_WASM_FUNCIDX,
    257   R_WASM_TABLEIDX,
    258   R_WASM_MEMOFS,
    259   R_WASM_TYPEIDX,
    260   /* COFF/PE-only reloc kinds — section-relative fixups used by Windows
    261    * TLS Local-Exec lowering and debug info. SECREL = 32-bit offset
    262    * from the start of the containing section. SECTION = 16-bit section
    263    * index (1-based). Both arch-independent on the kit side; the
    264    * per-arch translators map to IMAGE_REL_{AMD64,ARM64}_SECREL/SECTION. */
    265   R_COFF_SECREL,
    266   R_COFF_SECTION,
    267   /* AArch64 Windows TLS access uses an ADD-imm12-pair to materialize a
    268    * 24-bit SECREL value into a register:
    269    *   add  xd, xd, #:secrel_hi12:sym, lsl #12   ; HIGH12A bits [23:12]
    270    *   add  xd, xd, #:secrel_lo12:sym            ; LOW12A  bits [11:0]
    271    * The instruction at the patch site already has sh=1 (HIGH) or sh=0
    272    * (LOW) preset by the codegen; the linker only patches the imm12
    273    * field at bits [21:10]. NC variants ("no carry / no overflow check"
    274    * in PE terminology) mean the high bits of SECREL above 24 are
    275    * discarded — fine for any .tls section under 16 MiB. */
    276   R_COFF_AARCH64_SECREL_LOW12A,
    277   R_COFF_AARCH64_SECREL_HIGH12A,
    278   /* AArch64 TLS Initial-Exec. The ADRP/LDR pair loads the symbol's
    279    * TP-relative offset from a GOT slot; the linker fills that slot with a
    280    * 64-bit tpoff (R_TPOFF64) and redirects these to the slot, so they apply
    281    * exactly like the regular ADR_GOT_PAGE / LD64_GOT_LO12_NC pair.
    282    * Appended at the enum tail so the public KIT_RELOC_* values (object.h)
    283    * keep their pinned numbering. */
    284   R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21,
    285   R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC,
    286   /* COFF ADDR32NB: 32-bit image-relative RVA (S + A - ImageBase), used by
    287    * PE exception tables and other image metadata. */
    288   R_COFF_ADDR32NB,
    289 } RelocKind;
    290 
    291 typedef struct Section {
    292   Sym name;
    293   u16 kind;
    294   u16 flags;
    295   u16 sem;      /* SecSem */
    296   u16 ext_kind; /* ObjExtKind */
    297   u32 align;
    298   u32 entsize;
    299   ObjSecId link;       /* section index or OBJ_SEC_NONE */
    300   u32 info;            /* section-format dependent, typed by sem/ext_kind */
    301   ObjGroupId group_id; /* OBJ_GROUP_NONE if not in a COMDAT/group */
    302   u32 bss_size;        /* nonzero only for SEC_BSS */
    303   u64 addr;            /* load vaddr (sh_addr); 0 for relocatable inputs */
    304   /* Format-specific raw section type (ELF sh_type, COFF Characteristics
    305    * subfield, etc.).  Set by .o readers when the canonical SecSem
    306    * mapping is lossy — e.g., SHT_LLVM_ADDRSIG (0x6FFF4C03) and
    307    * SHT_ARM_ATTRIBUTES (0x70000003) collapse to SSEM_PROGBITS but
    308    * the emitter must write back the original value to round-trip.
    309    * Zero means "no override; derive from sem". */
    310   u32 ext_type;
    311   u32 ext_flags; /* same idea for format-specific sh_flags bits
    312                     not represented in SecFlag (e.g. SHF_EXCLUDE) */
    313   /* Tombstone for strip/objcopy-style mutations. Set by
    314    * obj_section_remove; honored by obj_sweep_dead and the emitters.
    315    * Iterators / direct ID-based access on the builder must consult this
    316    * bit and skip removed entries. */
    317   u8 removed;
    318   Buf bytes;
    319 } Section;
    320 
    321 typedef struct Reloc {
    322   ObjSecId section_id;
    323   u32 offset;
    324   u16 kind;
    325   u8 has_explicit_addend;
    326   u8 pair; /* paired/following relocation, format-specific */
    327   /* Tombstone set by obj_sweep_dead when the reloc points at a removed
    328    * section or symbol. Lives in the slack between `pair` and `sym` — no
    329    * struct-size change. */
    330   u8 removed;
    331   ObjSymId sym;
    332   i64 addend;
    333 } Reloc;
    334 
    335 typedef struct ObjSym {
    336   Sym name;
    337   u16 bind;
    338   u16 kind;
    339   u8 vis;
    340   u8 ext_kind;
    341   u16 flags;
    342   ObjSecId section_id; /* OBJ_SEC_NONE if undef */
    343   u64 value;           /* offset within section, or absolute */
    344   u64 size;
    345   u64 common_align; /* nonzero for SK_COMMON */
    346   /* Lifecycle gate for spurious-UNDEF pruning at .o emit time.
    347    *
    348    * The C frontend mints an ObjSym for every `extern` declaration it
    349    * parses (so a header like <stdio.h> creates 50+ ObjSyms in one TU).
    350    * Most of those are never the target of any relocation. `referenced`
    351    * tracks that distinction: obj_reloc_ex sets it on the target, and
    352    * the file emitters (elf_emit / macho_emit) drop entries that are
    353    * still SK_UNDEF + (SB_GLOBAL|SB_WEAK) + !referenced from the output
    354    * symbol table.
    355    *
    356    * Definitions never need the gate — kind != SK_UNDEF for those, so
    357    * the filter never considers them. Readers (elf_read, macho_read)
    358    * mark every read-in symbol referenced=1 so a roundtrip preserves
    359    * UNDEFs that came from another tool's output. */
    360   u8 referenced;
    361   /* Tombstone for strip/objcopy. Set by obj_symbol_remove or cascaded
    362    * by obj_sweep_dead when this symbol is defined in a removed section.
    363    * The UNDEF-prune predicate (was: !referenced && SK_UNDEF && global/weak)
    364    * is also folded into the sweep, so emit-time symbol loops only need to
    365    * check `removed`. */
    366   u8 removed;
    367 } ObjSym;
    368 
    369 typedef struct ObjGroup {
    370   Sym name;
    371   ObjSymId signature;
    372   ObjSecId* sections;
    373   u32 nsections;
    374   u32 flags;
    375   /* Tombstone — set by obj_group_remove, or cascaded by obj_sweep_dead
    376    * when every member section has been removed (or the signature symbol
    377    * has been removed). */
    378   u8 removed;
    379 } ObjGroup;
    380 
    381 typedef struct ObjAtom {
    382   ObjSecId section_id;
    383   u32 offset;
    384   u32 size;
    385   ObjSymId signature;
    386   u32 flags;
    387   u8 removed;
    388 } ObjAtom;
    389 
    390 /* The single concrete in-memory object representation.
    391  * Written by MCEmitter/CGTarget (during compile) or by an .o reader (during
    392  * link). Read by file emitters, the linker (file and JIT), and objdump.
    393  *
    394  * Invariant: post-finalize state is identical in shape to what an .o reader
    395  * would produce from a written-out object — so consumers don't care which
    396  * path produced it.
    397  *
    398  * Lifecycle gates:
    399  *   1. MCEmitter/CGTarget (or a .o reader) issues writes.
    400  *   2. cgtarget_finalize must be called before any debug_emit or read access on
    401  *      the builder. At -O2 it flushes lowered code into sections.
    402  *   3. debug_emit (if -g) writes .debug_* sections.
    403  *   4. obj_finalize closes the builder: computes flat section offsets, applies
    404  *      pending fixups within sections, and freezes the read-side view.
    405  *      No further writes are permitted afterward.
    406  *   5. File emitters and the linker consume via the read API.
    407  *
    408  * The handle type itself is the public KitObjBuilder, aliased to ObjBuilder
    409  * inside libkit (see src/core/core.h). */
    410 
    411 ObjBuilder* obj_new(Compiler*);
    412 void obj_free(ObjBuilder*);
    413 
    414 /* The owning Compiler; needed by consumers (e.g. kit_disasm_iter_new)
    415  * that take a bare ObjBuilder and still must pool_str() symbol names
    416  * against the right pool. */
    417 Compiler* obj_compiler(const ObjBuilder*);
    418 
    419 /* ---- write side (MCEmitter/CGTarget and .o readers) ---- */
    420 ObjSecId obj_section(ObjBuilder*, Sym name, SecKind, u16 flags, u32 align);
    421 ObjSecId obj_section_ex(ObjBuilder*, Sym name, SecKind, SecSem, u16 flags,
    422                         u32 align, u32 entsize, u32 link, u32 info);
    423 void obj_section_set_flags(ObjBuilder*, ObjSecId, u16 flags);
    424 void obj_section_set_entsize(ObjBuilder*, ObjSecId, u32 entsize);
    425 void obj_section_set_align(ObjBuilder*, ObjSecId, u32 align);
    426 void obj_section_set_group(ObjBuilder*, ObjSecId, ObjGroupId);
    427 void obj_section_set_link_info(ObjBuilder*, ObjSecId, ObjSecId link, u32 info);
    428 void obj_section_set_addr(ObjBuilder*, ObjSecId, u64 addr);
    429 /* Set format-specific raw sh_type/sh_flags overrides (see Section.ext_type
    430  * comment).  Zero ext_type means "no override". */
    431 void obj_section_set_ext(ObjBuilder*, ObjSecId, ObjExtKind, u32 ext_type,
    432                          u32 ext_flags);
    433 void obj_write(ObjBuilder*, ObjSecId section_id, const void* data, size_t n);
    434 u8* obj_reserve(ObjBuilder*, ObjSecId section_id, size_t n);
    435 void obj_reserve_bss(ObjBuilder*, ObjSecId section_id, u32 size, u32 align);
    436 /* Pad `section_id` to `align`, returning the resulting offset.  For
    437  * PROGBITS sections this writes zero bytes; for NOBITS it bumps
    438  * bss_size.  Callers that share a section across multiple symbols use
    439  * this to ensure each placement starts at the symbol's required
    440  * alignment, since dedup of obj_section means a placement isn't
    441  * automatically aligned just because the section's own align is set. */
    442 u32 obj_align_to(ObjBuilder*, ObjSecId section_id, u32 align);
    443 u32 obj_pos(ObjBuilder*, ObjSecId section_id);
    444 void obj_patch(ObjBuilder*, ObjSecId section_id, u32 ofs, const void* data,
    445                size_t n);
    446 
    447 ObjSymId obj_symbol(ObjBuilder*, Sym name, SymBind, SymKind,
    448                     ObjSecId section_id, u64 value, u64 size);
    449 ObjSymId obj_symbol_ex(ObjBuilder*, Sym name, SymBind, SymVis, SymKind,
    450                        ObjSecId section_id, u64 value, u64 size,
    451                        u64 common_align);
    452 /* Allocate a stable symbol id for data that may be discarded before emission.
    453  * The returned symbol is tombstoned and not entered in the name index; callers
    454  * must publish it with obj_symbol_define_live if the data is actually emitted.
    455  */
    456 ObjSymId obj_symbol_defer(ObjBuilder*, Sym name, SymBind, SymVis, SymKind,
    457                           u64 size);
    458 ObjSymId obj_symbol_find(ObjBuilder*, Sym name);
    459 /* obj_symbol_ex creates a symbol; obj_symbol_define fills in the
    460  * (section_id, value, size) fields of an already-created symbol. The pair
    461  * supports forward references: an undefined ObjSymId is created when first
    462  * needed for a relocation, and defined later when its definition is emitted. */
    463 void obj_symbol_define(ObjBuilder*, ObjSymId, ObjSecId section_id, u64 value,
    464                        u64 size);
    465 void obj_symbol_define_live(ObjBuilder*, ObjSymId, ObjSecId section_id,
    466                             u64 value, u64 size);
    467 
    468 void obj_reloc(ObjBuilder*, ObjSecId section_id, u32 offset, RelocKind,
    469                ObjSymId sym, i64 addend);
    470 void obj_reloc_ex(ObjBuilder*, ObjSecId section_id, u32 offset, RelocKind,
    471                   ObjSymId sym, i64 addend, int explicit_addend, int pair);
    472 
    473 /* Force ObjSym::referenced = 1 on the named symbol. obj_reloc_ex calls this
    474  * automatically; the readers (elf_read / macho_read) call it on every
    475  * ingested symbol so a roundtrip preserves UNDEFs that another tool
    476  * emitted into the input. */
    477 void obj_sym_mark_referenced(ObjBuilder*, ObjSymId);
    478 void obj_sym_set_referenced(ObjBuilder*, ObjSymId, int referenced);
    479 
    480 ObjAtomId obj_atom_define(ObjBuilder*, ObjSecId section_id, u32 offset,
    481                           u32 size, ObjSymId signature, u32 flags);
    482 
    483 ObjGroupId obj_group(ObjBuilder*, Sym name, ObjSymId signature, u32 flags);
    484 void obj_group_add_section(ObjBuilder*, ObjGroupId group_id,
    485                            ObjSecId section_id);
    486 
    487 void obj_finalize(ObjBuilder*);
    488 
    489 /* ---- post-finalize mutators (strip / objcopy support) ----
    490  *
    491  * Mutators flip per-entry fields and / or `removed` tombstones. Cascading
    492  * cleanup (drop relocs against removed sections, etc.) is deferred to
    493  * obj_sweep_dead, which the emitters call automatically. Mutators are
    494  * cheap individual field writes; they do not re-index or compact storage,
    495  * so ObjSecId / ObjSymId / ObjGroupId remain stable.
    496  *
    497  * No-ops when given OBJ_SEC_NONE / OBJ_SYM_NONE / OBJ_GROUP_NONE, and
    498  * silently ignore ids that are out of range or already removed (the
    499  * driver tools call these in bulk and benefit from idempotency). */
    500 void obj_section_remove(ObjBuilder*, ObjSecId);
    501 void obj_symbol_remove(ObjBuilder*, ObjSymId);
    502 void obj_group_remove(ObjBuilder*, ObjGroupId);
    503 void obj_section_rename(ObjBuilder*, ObjSecId, Sym new_name);
    504 void obj_symbol_rename(ObjBuilder*, ObjSymId, Sym new_name);
    505 void obj_symbol_set_bind(ObjBuilder*, ObjSymId, SymBind);
    506 void obj_symbol_set_vis(ObjBuilder*, ObjSymId, SymVis);
    507 /* Replace `section_id`'s contents wholesale with `n` bytes from `data`.
    508  * Resets bss_size (so a former NOBITS section gains real bytes) and
    509  * preserves the section's other attributes (name, kind, flags, align).
    510  * Existing relocations against the section are kept — caller is
    511  * responsible for issuing obj_symbol_remove on any defined symbols whose
    512  * (value, size) no longer fits, etc. */
    513 void obj_section_replace_bytes(ObjBuilder*, ObjSecId, const u8* data, size_t n);
    514 
    515 /* Tombstone-driven consistency sweep. Called by each file-format emitter
    516  * at the top of emit; consumers that walk a builder by raw section/symbol/
    517  * reloc/group ID after sweep must respect the `removed` bit on each entry.
    518  *
    519  * Does the following passes:
    520  *   1. Cascade: any symbol defined in a removed section becomes removed.
    521  *   2. UNDEF prune: any non-referenced SK_UNDEF global/weak becomes removed
    522  *      (folds the historical "spurious extern from a header" filter).
    523  *   3. Reloc cleanup: any reloc whose containing section, defining section,
    524  *      or target symbol is removed becomes removed.
    525  *   4. Group compaction: each group's section list is filtered in place to
    526  *      drop removed members; a group whose list empties out (or whose
    527  *      signature symbol has been removed) is itself marked removed.
    528  *   5. Section link cleanup: Section.link cleared if it points at a
    529  *      removed section.
    530  *
    531  * Idempotent — safe to call multiple times. On a never-mutated builder
    532  * only pass 2 has any effect. */
    533 void obj_sweep_dead(ObjBuilder*);
    534 
    535 /* Format-specific ELF e_flags (per-arch ABI bits, e.g. EF_RISCV_RVC |
    536  * EF_RISCV_FLOAT_ABI_DOUBLE on RV64). Set by read_elf during input
    537  * parsing; consumed by emit_elf for round-trip. The setter records
    538  * a presence bit so emit_elf can distinguish "preserve from input"
    539  * from "no input — synthesize per-arch default". */
    540 void obj_set_elf_e_flags(ObjBuilder*, u32 e_flags);
    541 int obj_get_elf_e_flags(const ObjBuilder*, u32* out);
    542 
    543 /* COFF short-import shim annotation. Set by read_coff when the input
    544  * is a Microsoft "short import" record (Sig1=0, Sig2=0xFFFF) found
    545  * inside a .lib archive member: the ObjBuilder synthesizes the
    546  * imported symbol(s) the long-form import object would have provided,
    547  * and stores the providing DLL name here so the archive-ingestion
    548  * layer (Phase 4.3) can reclassify the resulting LinkInput as a
    549  * DSO with this name as the soname. Unset (returns 0 from the
    550  * getter) on every other input. The setter records a presence bit
    551  * the same way obj_set_elf_e_flags does. */
    552 void obj_set_coff_import_dll(ObjBuilder*, Sym dll_name);
    553 int obj_get_coff_import_dll(const ObjBuilder*, Sym* out);
    554 /* COFF short-import IMPORT NAME override: the name the loader resolves in the
    555  * DLL when the short-import NameType makes it differ from the local symbol
    556  * name (NOPREFIX/UNDECORATE strip decoration; EXPORTAS carries an explicit
    557  * export name). Set by read_coff_short_import; consumed by the COFF
    558  * import-table synthesis for the PE hint/name-table entry. The local symbol
    559  * keeps its own name so kit's references still resolve. Unset on inputs whose
    560  * import name equals the symbol name. */
    561 void obj_set_coff_import_name(ObjBuilder*, Sym import_name);
    562 int obj_get_coff_import_name(const ObjBuilder*, Sym* out);
    563 
    564 /* COFF WEAK_EXTERNAL alias: symbol `sym` is an alias for the symbol named
    565  * `target` (the aux record's fall-back/default symbol). Recorded by read_coff
    566  * for genuine alias declarations (IMAGE_WEAK_EXTERN_SEARCH_ALIAS) so the linker
    567  * can resolve the weak symbol to its target by name — e.g. mingw x86_64's
    568  * `_setjmp` aliasing `__intrinsic_setjmp`, a redirection the single-underscore
    569  * naming heuristic can't derive. The getter returns 0 when `sym` has no
    570  * recorded alias (the common case). See src/link/link_resolve.c. */
    571 void obj_set_weak_alias(ObjBuilder*, ObjSymId sym, Sym target);
    572 Sym obj_get_weak_alias(const ObjBuilder*, ObjSymId sym);
    573 /* Enumerate the recorded weak-external aliases (for building a cross-input
    574  * name->target map at link time). Count is 0 on inputs that carry none. */
    575 u32 obj_weak_alias_count(const ObjBuilder*);
    576 int obj_weak_alias_at(const ObjBuilder*, u32 i, ObjSymId* sym_out,
    577                       Sym* target_out);
    578 
    579 /* Per-symbol format-specific flag bits.  ObjSym.flags is otherwise
    580  * unused; readers stash format-specific attribute bits there so the
    581  * matching emitter can re-apply them.  Today this is Mach-O n_desc
    582  * pass-through (N_NO_DEAD_STRIP, etc.) — bits the canonical
    583  * ObjSym.bind/vis/kind triple doesn't model.  ELF callers are free
    584  * to use the same field for their own pass-through; the contract is
    585  * "bits go in / same bits come out", not a shared semantic. */
    586 void obj_symbol_set_flags(ObjBuilder*, ObjSymId, u16 flags);
    587 
    588 /* ---- read side (linker, file emitters, objdump) ---- */
    589 u32 obj_section_count(const ObjBuilder*);
    590 const Section* obj_section_get(const ObjBuilder*, ObjSecId id);
    591 u32 obj_reloc_count(const ObjBuilder*, ObjSecId section_id);
    592 u32 obj_reloc_total(const ObjBuilder*);
    593 const Reloc* obj_reloc_at(const ObjBuilder*, u32 idx); /* 0..total-1 */
    594 
    595 /* Diagnostic spelling for a RelocKind. The returned pointer is a static
    596  * literal that mirrors the enum identifier without the R_ prefix (e.g.
    597  * R_RV_CALL -> "RV_CALL", R_AARCH64_CALL26 -> "AARCH64_CALL26"). NULL is
    598  * never returned; unknown kinds collapse to "UNKNOWN". */
    599 const char* reloc_kind_name(RelocKind);
    600 const ObjSym* obj_symbol_get(const ObjBuilder*, ObjSymId);
    601 u32 obj_atom_count(const ObjBuilder*);
    602 const ObjAtom* obj_atom_get(const ObjBuilder*, ObjAtomId);
    603 int obj_section_has_atoms(const ObjBuilder*, ObjSecId);
    604 ObjAtomId obj_atom_find(const ObjBuilder*, ObjSecId section_id, u32 offset);
    605 ObjAtomId obj_atom_find_symbol(const ObjBuilder*, ObjSymId);
    606 u32 obj_group_count(const ObjBuilder*);
    607 const ObjGroup* obj_group_get(const ObjBuilder*, ObjGroupId id);
    608 
    609 /* Symbol iteration: ObjSymId is scoped to this builder, but callers should not
    610  * assume dense contiguous ids or direct indexing. The builder may store symbols
    611  * in segments internally; use the cursor.
    612  *
    613  * The iterator is raw — it visits every symbol slot including those whose
    614  * `removed` tombstone is set. Callers that want post-sweep semantics must
    615  * check ObjSym::removed themselves. (Consistent with Section.removed and
    616  * Reloc.removed: tombstones live as a per-entry field, not behind the
    617  * iterator.) */
    618 typedef struct ObjSymIter ObjSymIter;
    619 typedef struct ObjSymEntry {
    620   ObjSymId id;
    621   const ObjSym* sym;
    622 } ObjSymEntry;
    623 ObjSymIter* obj_symiter_new(const ObjBuilder*);
    624 int obj_symiter_next(ObjSymIter*, ObjSymEntry* out); /* returns 0 at end */
    625 void obj_symiter_free(ObjSymIter*);
    626 
    627 /* Group iteration: peer of obj_symiter for groups (COMDAT and friends).
    628  * Same segmented-storage caveat — use the cursor, don't index directly.
    629  * Like obj_symiter, this is raw: tombstoned groups are still returned;
    630  * callers consult ObjGroup::removed. */
    631 typedef struct ObjGroupIter ObjGroupIter;
    632 typedef struct ObjGroupEntry {
    633   ObjGroupId id;
    634   const ObjGroup* group;
    635 } ObjGroupEntry;
    636 ObjGroupIter* obj_groupiter_new(const ObjBuilder*);
    637 int obj_groupiter_next(ObjGroupIter*, ObjGroupEntry* out); /* 0 at end */
    638 void obj_groupiter_free(ObjGroupIter*);
    639 
    640 /* Writer is the public KitWriter type aliased to Writer inside libkit
    641  * (see src/core/core.h). The streaming API lives in <kit/core.h> as
    642  * kit_writer_*. */
    643 
    644 /* ---- format-aware canonical section names ----
    645  *
    646  * For sections the linker synthesizes (init/fini arrays, TLS template
    647  * sections), the spelling diverges across object formats: ELF uses
    648  * `.init_array` / `.tdata` / etc., Mach-O uses
    649  * `__DATA,__mod_init_func` / `__DATA,__thread_data` / etc.  These
    650  * helpers pick the right name for the active target.obj so the linker
    651  * doesn't carry per-format switches at every synthesis site.  ELF
    652  * returns the historical names; Mach-O / COFF panic until those
    653  * writers land. */
    654 Sym obj_secname_init_array(Compiler*);
    655 Sym obj_secname_fini_array(Compiler*);
    656 Sym obj_secname_preinit_array(Compiler*);
    657 Sym obj_secname_tdata(Compiler*);
    658 Sym obj_secname_tbss(Compiler*);
    659 
    660 /* DWARF debug-section name translation for Mach-O.
    661  *
    662  * kit carries DWARF sections under their ELF spelling (".debug_info")
    663  * internally; on Mach-O they live in the __DWARF segment with "__"-
    664  * prefixed section names ("__debug_info").  The transform drops the
    665  * leading '.', prepends "__", and truncates to Mach-O's 16-byte
    666  * `sectname` field — which reproduces the names Apple's toolchain uses
    667  * (e.g. ".debug_str_offsets" -> "__debug_str_offs").
    668  *
    669  * Writes the bare Mach-O section name (NUL-terminated, <=16 chars) into
    670  * `out` (>=17 bytes) and returns 1 when (`name`,`len`) is a ".debug_*"
    671  * section; returns 0 otherwise, leaving `out` untouched.  Shared by the
    672  * Mach-O writer (emit) and the DWARF reader (section lookup) so the two
    673  * agree on the truncated spelling. */
    674 int obj_macho_debug_sectname(const char* name, size_t len, char out[17]);
    675 
    676 /* Canonical Mach-O "segname,sectname" spelling for a SecKind, as a
    677  * NUL-terminated literal.  The single source of truth shared by the Mach-O
    678  * object writer (name_to_seg_sect) and the `cc -S` printer (asm_emit.c), so
    679  * the textual `.section` directive and the binary section header never drift:
    680  *   SEC_RODATA -> "__TEXT,__const", SEC_DATA -> "__DATA,__data",
    681  *   SEC_BSS -> "__DATA,__bss", SEC_TEXT -> "__TEXT,__text".
    682  * Returns NULL for kinds with no fixed canonical Mach-O home (SEC_OTHER /
    683  * SEC_DEBUG), which callers spell from the section's own name. */
    684 const char* obj_macho_canon_secname(SecKind kind);
    685 
    686 /* Inverse of obj_macho_canon_secname: classify a Mach-O native
    687  * "segname,sectname" spelling (e.g. "__TEXT,__text", "__DATA,__bss")
    688  * into a SecKind.  Used by a format-neutral reader / objdump path that
    689  * holds the on-disk Mach-O section name and wants the canonical kit
    690  * SecKind without re-deriving the per-segment rules at every call.
    691  * `name` / `len` are the comma-joined spelling.  Returns 1 and writes
    692  * *kind on a recognized spelling; returns 0 (leaving *kind untouched)
    693  * for an unrecognized name (caller treats as SEC_OTHER). */
    694 int obj_macho_seckind_for_secname(const char* name, size_t len, SecKind* kind);
    695 
    696 /* Translate a kit-internal (ELF-spelled) section name to its Mach-O
    697  * native spelling.  Generalizes obj_macho_debug_sectname: handles the
    698  * ".debug_*" -> "__DWARF,__debug_*" DWARF case and ".eh_frame" ->
    699  * "__TEXT,__eh_frame".  Writes the comma-joined "segname,sectname"
    700  * (NUL-terminated) into `out` (>= 40 bytes covers seg(16)+','+sect(16)+
    701  * NUL) and returns 1 when `name` is one of the recognized
    702  * format-divergent sections; returns 0 (leaving `out` untouched)
    703  * otherwise, so the caller falls back to its own spelling. */
    704 int obj_macho_native_secname(const char* name, size_t len, char out[40]);
    705 
    706 /* ---- thread-local storage emission ---------------------------------
    707  *
    708  * The frontend collects a `_Thread_local` definition's bytes (or marks
    709  * it BSS), alignment, and any pointer-init relocs, then calls
    710  * obj_define_tls to materialize the storage and bind the user-visible
    711  * symbol.  The obj layer owns the format split:
    712  *
    713  *   ELF   : `sym` is defined directly in `.tdata` / `.tbss`; the
    714  *           supplied relocs are applied at the same section/offset.
    715  *
    716  *   Mach-O: the data lives under a private `<name>$tlv$init` symbol in
    717  *           `__DATA,__thread_data` / `__DATA,__thread_bss`; `sym` is
    718  *           defined onto a 24-byte TLV *descriptor* in
    719  *           `__DATA,__thread_vars` whose three slots are
    720  *           [_tlv_bootstrap, 0, &init].  dyld rewrites slot[0] to a
    721  *           per-descriptor thunk and fills slot[1] with a pthread_key
    722  *           during image-load; the compiler's TLVP_LOAD_PAGE21 /
    723  *           PAGEOFF12 codegen sequence targets the descriptor.
    724  *
    725  * The `_tlv_bootstrap` undef extern is cached on the ObjBuilder so a
    726  * second TLV var in the same TU shares one symbol entry. */
    727 typedef struct ObjTlsReloc {
    728   u32 offset; /* within the data buffer */
    729   RelocKind kind;
    730   ObjSymId target;
    731   i64 addend;
    732 } ObjTlsReloc;
    733 
    734 void obj_define_tls(Compiler*, ObjBuilder*, ObjSymId sym, const u8* data,
    735                     u32 size, int has_nonzero_init, u32 align,
    736                     const ObjTlsReloc* relocs, u32 nrelocs);
    737 
    738 /* True when reads of `_Thread_local` storage go through a per-variable
    739  * descriptor + thunk call rather than a direct TP-relative offset.
    740  * Mach-O: yes (TLVP_LOAD_PAGE21 + thunk in descriptor[0]).
    741  * ELF: no (Local-Exec / Initial-Exec: `mrs tpidr_el0` + tprel offset). */
    742 int obj_format_tls_via_descriptor(const Compiler*);
    743 
    744 /* ---- format-aware codegen policy ----
    745  *
    746  * Backends consult these predicates instead of branching on
    747  * target.os / target.obj directly, so the OS/format knowledge stays
    748  * concentrated in src/obj/ and a future format lands as one case here
    749  * rather than fan-out in every CGTarget. */
    750 
    751 /* True when references to undefined external symbols must be
    752  * materialized via an indirection slot (GOT / non-lazy pointer)
    753  * rather than direct page+offset addressing. Mach-O: yes — dyld
    754  * binds dylib imports through __DATA,__got at runtime, and the
    755  * direct PAGE21/PAGEOFF12 fixups can't carry that binding. ELF
    756  * static link: no — the linker resolves SK_UNDEFs at link time and
    757  * patches the direct ADRP/ADD bytes in place. */
    758 int obj_format_extern_via_got(const Compiler*);
    759 
    760 /* True when `sym` must be reached via the GOT at the current site: the
    761  * format binds extern data through indirection
    762  * (obj_format_extern_via_got) AND the symbol is undefined in this
    763  * object (section_id == OBJ_SEC_NONE). Pure format/symbol policy with
    764  * no per-arch behavior — shared by every backend that emits GOT loads. */
    765 int obj_symbol_extern_via_got(const Compiler*, ObjBuilder*, ObjSymId);
    766 int obj_format_split_sections_as_atoms(const Compiler*);
    767 
    768 /* Apply the active object format's C-symbol mangling to `name` (a
    769  * NUL-terminated C string) and return the result interned in
    770  * `c->global`.  Mach-O prepends a single `_`; ELF / COFF / Wasm intern
    771  * verbatim.  Mirrors the on-disk policy that decl.c / cc.c emit, so
    772  * link-time and JIT-time lookups by source-level name find the symbol
    773  * regardless of target.  Mach-O temp buffer is allocated from
    774  * `c->ctx->heap`. */
    775 Sym obj_format_c_mangle(Compiler*, const char* name);
    776 
    777 /* Inverse of obj_format_c_mangle for diagnostic display: if `*name`
    778  * carries the active format's leading C-mangle byte, advance the
    779  * pointer past it and decrement `*len`.  No-op for formats with no
    780  * prefix.  Lets diagnostics print the source-level symbol name across
    781  * targets. */
    782 void obj_format_demangle_c(const Compiler*, const char** name, size_t* len);
    783 
    784 /* Default entry symbol name for a freshly created Linker on the active
    785  * object format: `_main` for Mach-O (LC_MAIN names main, dyld owns
    786  * startup), `_start` for ELF / COFF / Wasm (set by crt1.o).  Returned
    787  * as a NUL-terminated literal; the caller interns. */
    788 const char* obj_format_default_entry_name(const Compiler*);
    789 
    790 /* C source-level symbol prefix the active object format prepends on disk:
    791  * "_" for Mach-O, "" for ELF / COFF / Wasm.  The single source of truth
    792  * read by obj_format_c_mangle / obj_format_demangle_c; never NULL (a
    793  * format with no prefix returns ""). */
    794 const char* obj_format_c_label_prefix(const Compiler*);
    795 
    796 /* ---- thread-local storage model ----
    797  *
    798  * How compiled code reaches a `_Thread_local` on a given (format, OS):
    799  *   OBJ_TLS_ELF_LE          : direct TP-relative offset (ELF Local-Exec /
    800  *                             Initial-Exec): `mrs tpidr_el0` + tprel.
    801  *   OBJ_TLS_MACHO_DESCRIPTOR: per-variable descriptor + thunk call; the
    802  *                             TLVP reloc pair targets the descriptor.
    803  *   OBJ_TLS_WINDOWS_TEB     : Windows TEB-based access (SECREL into the
    804  *                             per-thread TLS block via the TEB). */
    805 typedef enum ObjTlsModel {
    806   OBJ_TLS_ELF_LE = 0,
    807   OBJ_TLS_MACHO_DESCRIPTOR = 1,
    808   OBJ_TLS_WINDOWS_TEB = 2,
    809 } ObjTlsModel;
    810 
    811 /* Returns how compiled code reaches a `_Thread_local` on the active
    812  * (format, OS): OBJ_TLS_WINDOWS_TEB for COFF, OBJ_TLS_MACHO_DESCRIPTOR
    813  * for Mach-O, OBJ_TLS_ELF_LE otherwise.  The single source of truth for
    814  * the TLS-access decision; obj_format_tls_via_descriptor is now a thin
    815  * wrapper over (model == OBJ_TLS_MACHO_DESCRIPTOR). */
    816 ObjTlsModel obj_format_tls_model(const Compiler*);
    817 
    818 /* In-process JIT: true when a reference to symbol `name` is dropped because the
    819  * format's TLS access idiom that materializes it is relaxed to in-image
    820  * addressing (COFF Windows `_tls_index`; none elsewhere). Beside
    821  * obj_format_tls_model as the TLS-mechanism authority. */
    822 int obj_format_jit_drops_symbol_ref(const Compiler*, Sym name);
    823 
    824 /* True when the active object format carries DWARF debug sections
    825  * file-only (not mapped into a loadable segment): ELF / Mach-O yes,
    826  * COFF no. */
    827 int obj_format_carries_file_only_debug(const Compiler*);
    828 
    829 /* True when the active object format builds its own static GOT /
    830  * non-lazy-pointer table at link time even for a static image:
    831  * Mach-O yes, else no. */
    832 int obj_format_builds_own_static_got(const Compiler*);
    833 
    834 /* True when the active object format can represent a KitCgSymFeat
    835  * `symfeat`.  Today this is the TLS-model axis: ELF / Mach-O can
    836  * represent every modeled TLS feature, COFF cannot (Windows TEB TLS
    837  * uses a different mechanism).  Non-TLS features return 1 for every
    838  * format.  `symfeat` is a KitCgSymFeat value (cast to int at the
    839  * boundary). */
    840 int obj_format_supports_symbol_feature(const Compiler*, int symfeat);
    841 
    842 /* True when the active object format pulls an archive member to satisfy a
    843  * *weak* undefined reference (PE/COFF COMDAT semantics).  COFF yes,
    844  * ELF / Mach-O no (they pull only for strong undefs). */
    845 int obj_format_weak_undef_pulls_archive_member(const Compiler*);
    846 
    847 /* True when the active object format recovers weak-external / undefined
    848  * references via the mingw single-underscore alias convention (e.g.
    849  * `__set_app_type` <-> `_set_app_type`) during link symbol resolution.
    850  * COFF yes, ELF / Mach-O / Wasm no. */
    851 int obj_format_weak_extern_underscore_alias(const Compiler*);
    852 
    853 /* True when static-IFUNC resolution on the active target goes through a
    854  * `[__rela_iplt_start, __rela_iplt_end)` table of R_*_IRELATIVE relocs
    855  * (walked by FreeBSD's crt before main) rather than kit's ctor-based
    856  * __kit_ifunc_init path.  The one place the (os == FREEBSD && obj == ELF)
    857  * knowledge lives. */
    858 int obj_format_static_ifunc_via_rela_iplt(const Compiler*);
    859 
    860 /* The R_*_IRELATIVE resolver reloc wire type for the active target's
    861  * __rela_iplt table (paired with the predicate above), resolved through the
    862  * target object format so the generic iplt pass names no format literal.
    863  * Returns 0 when the format has no such reloc. */
    864 u32 obj_format_static_ifunc_irelative_type(const Compiler*);
    865 
    866 /* Per-arch variant-I TP bias for the active target's ELF arch: distance
    867  * from the TLS image start to where `tp` points in kit's freestanding
    868  * layout (16 for AArch64/RISC-V, 0 for x86_64 variant-II).  Returns 0
    869  * for a non-ELF target or an arch with no ELF descriptor.  The
    870  * hosted-vs-freestanding RISC-V split is applied by the caller. */
    871 u32 obj_format_elf_tls_tp_bias(const Compiler*);
    872 
    873 /* Format boundary-symbol classifier.  Asks the active object format
    874  * whether `name` is a symbol the format itself owns as a boundary /
    875  * synthetic global, and if so what SymKind it carries.  Returns 1 and
    876  * writes *symkind (a SymKind value) when the format owns `name`
    877  * (PE `__ImageBase` / `_tls_used` -> SK_ABS); returns 0 otherwise,
    878  * leaving *symkind untouched.  Lets generic link code classify boundary
    879  * symbols without a per-format switch. */
    880 int obj_format_boundary_sym_kind(const Compiler*, KitSlice name, int* symkind);
    881 
    882 /* Invoke the active object format's synthetic-input hook (if any) before
    883  * symbol resolution.  No-op for formats with no synthetic inputs.  The
    884  * hook builds and appends a synthetic LinkInput via Linker internals, so
    885  * it takes the Linker; declared here as the obj-side dispatch point.
    886  * (The COFF body is wired by T-LINK — see registry.c synth_inputs note.) */
    887 void obj_format_synth_inputs(const Compiler*, Linker*);
    888 
    889 /* ---- format-specific extension payload ----
    890  *
    891  * Generic object tables stay format-neutral. Format-specific module-level
    892  * metadata (today: only the in-progress Wasm module model) hangs off the
    893  * builder under an ObjExtKind tag. One payload per kind. ObjBuilder owns the
    894  * pointer's lifetime — obj_free invokes the registered free function. */
    895 typedef void (*ObjExtFreeFn)(Compiler*, void*);
    896 void obj_ext_set(ObjBuilder*, ObjExtKind, void* payload, ObjExtFreeFn);
    897 void* obj_ext_get(const ObjBuilder*, ObjExtKind);
    898 void obj_ext_clear(ObjBuilder*, ObjExtKind);
    899 
    900 /* ============================================================
    901  * Linked-image view (executables / shared objects)
    902  *
    903  * Relocatable inputs (ET_REL / MH_OBJECT / COFF .obj) have no image:
    904  * obj_image() returns NULL. The ET_EXEC / ET_DYN (and Mach-O / PE peer)
    905  * readers attach an ObjImage carrying the segment + dynamic view that the
    906  * section / symbol tables don't model. The section and symbol tables stay
    907  * populated where the format still carries them; the image is the extra
    908  * dimension. The builder owns the image; obj_free releases it.
    909  * ============================================================ */
    910 
    911 typedef enum ObjKind {
    912   OBJ_KIND_REL,  /* relocatable object — no image */
    913   OBJ_KIND_EXEC, /* executable */
    914   OBJ_KIND_DYN,  /* shared object / dylib / DLL */
    915   OBJ_KIND_CORE, /* core dump — detected, not parsed (reserved) */
    916 } ObjKind;
    917 
    918 enum { /* ObjSegment.perms bits */
    919        OBJ_SEG_X = 1u << 0,
    920        OBJ_SEG_W = 1u << 1,
    921        OBJ_SEG_R = 1u << 2
    922 };
    923 
    924 typedef struct ObjSegment {
    925   Sym name;      /* PT_* spelling / Mach-O segname, or 0 */
    926   u64 vaddr;     /* virtual address */
    927   u64 vsize;     /* size in memory */
    928   u64 file_off;  /* offset of segment contents in the file */
    929   u64 file_size; /* size on disk (< vsize when the segment carries bss) */
    930   u32 perms;     /* OBJ_SEG_R | _W | _X */
    931   u32 align;     /* power of two; 1 if none */
    932 } ObjSegment;
    933 
    934 typedef struct ObjImageDep {
    935   Sym name;           /* DT_NEEDED / imported DLL / dylib install-name */
    936   const Sym* imports; /* imported symbol names (PE/Mach-O); NULL for ELF */
    937   u32 nimports;
    938 } ObjImageDep;
    939 
    940 /* Dynamic-table symbol. Distinct from the .symtab entries in the Symbols
    941  * table — these come from .dynsym / dyld export trie / PE export table. */
    942 typedef struct ObjImageSym {
    943   Sym name;
    944   SymBind bind;
    945   SymKind kind;
    946   ObjSecId section; /* OBJ_SEC_NONE for undefined imports */
    947   u64 value;
    948   u64 size;
    949   /* ELF symbol-version name (interned), set only for a DSO export that is the
    950    * *default* (non-hidden) version of `name` — e.g. libc.so.7's
    951    * fstat@@FBSD_1.5. 0 when the input carries no versioning, or this entry is a
    952    * hidden compatibility alias (fstat@FBSD_1.0). The linker uses it to emit a
    953    * matching .gnu.version_r requirement so the runtime binds the right version
    954    * (mandatory on FreeBSD, where the INO64 transition gave `fstat`/`stat` two
    955    * incompatible struct-stat layouts behind FBSD_1.0 vs FBSD_1.5). */
    956   Sym version;
    957 } ObjImageSym;
    958 
    959 /* Dynamic relocation (.rela.dyn / .rela.plt, dyld binds, PE base relocs).
    960  * References the dynamic symbol by interned name; the sym index is implicit
    961  * in the dynamic table and not preserved here. */
    962 typedef struct ObjImageReloc {
    963   ObjSecId section; /* OBJ_SEC_NONE when the file has no section table */
    964   u64 offset;
    965   Sym sym_name; /* 0 for symbol-less relative relocs */
    966   i64 addend;
    967   RelocKind kind;
    968 } ObjImageReloc;
    969 
    970 /* Raw, format-specific image field that doesn't fit the neutral model.
    971  * One flat triple list per image, in the spirit of the per-section
    972  * kit_obj_section_format_flags escape hatch: a neutral container with
    973  * per-format tag semantics (documented on the public KitObjImageRaw):
    974  *   PE   : data dirs  tag=0..15 (index), value=RVA, extra=size;
    975  *          subsystem  tag=KIT_OBJ_RAW_PE_SUBSYSTEM, value=u16;
    976  *          dllchars   tag=KIT_OBJ_RAW_PE_DLLCHARS,  value=u16
    977  *   ELF  : .dynamic   tag=d_tag, value=d_val, extra=0
    978  *   Mach-O: load cmds tag=cmd,   value=file offset, extra=cmdsize */
    979 typedef struct ObjImageRaw {
    980   u32 tag;
    981   u64 value;
    982   u64 extra;
    983 } ObjImageRaw;
    984 
    985 typedef struct ObjImage ObjImage; /* defined in obj.c */
    986 
    987 /* Accessor — NULL on relocatable inputs. */
    988 const ObjImage* obj_image(const ObjBuilder*);
    989 /* Lazily create (and return) the builder's image with the given kind.
    990  * Readers call this once they know the input is EXEC/DYN. Idempotent;
    991  * a second call updates the kind and returns the existing image. */
    992 ObjImage* obj_image_ensure(ObjBuilder*, ObjKind);
    993 
    994 /* Image scalar setters (readers). */
    995 void obj_image_set_entry(ObjImage*, u64 entry);
    996 void obj_image_set_base(ObjImage*, u64 image_base);
    997 void obj_image_set_interp(ObjImage*, Sym interp);
    998 void obj_image_set_soname(ObjImage*, Sym soname);
    999 
   1000 /* Image table appenders (readers). Each copies its argument by value into a
   1001  * builder-heap-owned vector. obj_image_add_dep additionally deep-copies the
   1002  * ObjImageDep.imports[] name array into image-heap memory, so the reader may
   1003  * pass a transient (scratch) array; the Sym values themselves must still be
   1004  * interned in the compiler's global pool. */
   1005 void obj_image_add_segment(ObjImage*, const ObjSegment*);
   1006 void obj_image_add_dep(ObjImage*, const ObjImageDep*);
   1007 void obj_image_add_rpath(ObjImage*, Sym rpath);
   1008 void obj_image_add_dynsym(ObjImage*, const ObjImageSym*);
   1009 void obj_image_add_dynreloc(ObjImage*, const ObjImageReloc*);
   1010 /* Raw format-specific image fields (see ObjImageRaw). Copied by value. */
   1011 void obj_image_add_raw(ObjImage*, const ObjImageRaw*);
   1012 /* Undefined symbol names a DSO references (interned). The linker's
   1013  * --gc-sections pass roots executable definitions of these so a shared
   1014  * library's back-references (e.g. libc.so.7 → `environ` / `__progname`)
   1015  * survive section GC. */
   1016 void obj_image_add_undef(ObjImage*, Sym name);
   1017 
   1018 /* Image read-side queries (object_file.c glue, objdump). */
   1019 ObjKind obj_image_kind(const ObjImage*);
   1020 u64 obj_image_entry(const ObjImage*);
   1021 u64 obj_image_base(const ObjImage*);
   1022 Sym obj_image_interp(const ObjImage*);
   1023 Sym obj_image_soname(const ObjImage*);
   1024 u32 obj_image_nsegments(const ObjImage*);
   1025 const ObjSegment* obj_image_segment(const ObjImage*, u32 idx);
   1026 u32 obj_image_ndeps(const ObjImage*);
   1027 const ObjImageDep* obj_image_dep(const ObjImage*, u32 idx);
   1028 u32 obj_image_nrpaths(const ObjImage*);
   1029 Sym obj_image_rpath(const ObjImage*, u32 idx);
   1030 u32 obj_image_ndynsyms(const ObjImage*);
   1031 const ObjImageSym* obj_image_dynsym(const ObjImage*, u32 idx);
   1032 u32 obj_image_ndynrelocs(const ObjImage*);
   1033 const ObjImageReloc* obj_image_dynreloc(const ObjImage*, u32 idx);
   1034 u32 obj_image_nundefs(const ObjImage*);
   1035 Sym obj_image_undef(const ObjImage*, u32 idx);
   1036 u32 obj_image_nraws(const ObjImage*);
   1037 const ObjImageRaw* obj_image_raw(const ObjImage*, u32 idx);
   1038 
   1039 /* ---- file format emitters ---- */
   1040 void emit_elf(Compiler*, ObjBuilder*, Writer*);
   1041 void emit_coff(Compiler*, ObjBuilder*, Writer*);
   1042 void emit_macho(Compiler*, ObjBuilder*, Writer*);
   1043 void emit_wasm(Compiler*, ObjBuilder*, Writer*);
   1044 
   1045 /* ---- file format readers (for ld and objdump) ---- */
   1046 ObjBuilder* read_elf(Compiler*, const char* name, const u8* data, size_t len);
   1047 /* ELF ET_DYN reader. Produces an ObjBuilder containing only the DSO's
   1048  * exported (dynsym) symbols. Defined dynsym entries land as ObjSyms
   1049  * with their original SymBind/SymKind so the linker's symbol-resolution
   1050  * pass can match them by name. The DSO's sections, relocations, and
   1051  * groups are all skipped — DSOs contribute no bytes to the output.
   1052  *
   1053  * If `soname_out` is non-NULL, *soname_out receives the DT_SONAME
   1054  * interned into the compiler's global Sym pool, or 0 if the DSO has
   1055  * no SONAME. */
   1056 ObjBuilder* read_elf_dso(Compiler*, const char* name, const u8* data,
   1057                          size_t len, Sym* soname_out);
   1058 ObjBuilder* read_coff(Compiler*, const char* name, const u8* data, size_t len);
   1059 /* PE32+ DLL reader.  Walks the IMAGE_DIRECTORY_ENTRY_EXPORT data
   1060  * directory and produces an ObjBuilder containing one defined symbol
   1061  * (OBJ_SEC_NONE, SB_GLOBAL, SK_FUNC) per name in the Export Name
   1062  * Table — the peer of read_elf_dso / read_macho_dso.  The DLL's
   1063  * own Name string (the analogue of DT_SONAME / LC_ID_DYLIB) is
   1064  * interned and returned via *soname_out, or 0 if missing.
   1065  *
   1066  * Scope: PE32+ images with IMAGE_FILE_DLL set, machine AMD64 or
   1067  * ARM64.  Ordinal-only exports (in the EAT but not the ENT) are not
   1068  * synthesized in v1 — almost all real-world imports are by name. */
   1069 ObjBuilder* read_coff_dso(Compiler*, const char* name, const u8* data,
   1070                           size_t len, Sym* soname_out);
   1071 /* PE32+ linked-image reader (peer of read_elf_image / read_macho_image).
   1072  * Handles both executables (IMAGE_FILE_DLL clear -> OBJ_KIND_EXEC) and
   1073  * DLLs (set -> OBJ_KIND_DYN), populating the neutral ObjImage: one
   1074  * segment per PE section, exports -> dynsyms + soname, imports -> deps +
   1075  * undefined dynsyms, base relocs -> RELATIVE dynrelocs, plus a full
   1076  * section/symbol view via the ObjBuilder Section table, and the raw
   1077  * data-directory / subsystem / dllchars escape-hatch entries.  Lenient:
   1078  * malformed sub-tables are skipped; truncated core headers panic.
   1079  * Dispatched from read_coff on the DOS 'MZ' magic. */
   1080 ObjBuilder* read_coff_image(Compiler*, const char* name, const u8* data,
   1081                             size_t len);
   1082 ObjBuilder* read_macho(Compiler*, const char* name, const u8* data, size_t len);
   1083 /* Mach-O MH_DYLIB reader. Produces an ObjBuilder containing only the
   1084  * dylib's exported symbols (as defined OBJ_SEC_NONE entries — the
   1085  * peer of read_elf_dso). LC_ID_DYLIB's install-name is interned and
   1086  * returned via *install_name_out (the Mach-O analogue of DT_SONAME).
   1087  *
   1088  * arm64-only for v1; other cputypes panic. */
   1089 ObjBuilder* read_macho_dso(Compiler*, const char* name, const u8* data,
   1090                            size_t len, Sym* install_name_out);
   1091 /* Apple `.tbd` (text-based stub) reader.  Parses the YAML-shaped TAPI
   1092  * format produced by Apple's SDKs (see /usr/lib/lib*.tbd in
   1093  * `xcrun --show-sdk-path`).  Extracts the umbrella install-name and the
   1094  * union of every exported / re-exported symbol whose `targets:` block
   1095  * names the active arch (e.g. arm64-macos).  Symbols are emitted into
   1096  * the ObjBuilder verbatim (they already include the leading `_` Apple
   1097  * uses for C symbols), so resolve_undefs matches them against the
   1098  * Mach-O on-disk symbol names directly.
   1099  *
   1100  * The arch string ("arm64" or "x86_64") comes from Compiler.target. */
   1101 ObjBuilder* read_tbd(Compiler*, const char* name, const u8* data, size_t len,
   1102                      Sym* install_name_out);
   1103 
   1104 #endif