format.h (10337B)
1 #ifndef KIT_OBJ_FORMAT_H 2 #define KIT_OBJ_FORMAT_H 3 4 #include <kit/object.h> 5 #include <stddef.h> 6 7 #include "core/core.h" 8 9 typedef struct LinkImage LinkImage; 10 typedef struct Linker Linker; 11 typedef struct EmuLoadOptions EmuLoadOptions; 12 typedef struct EmuLoadedImage EmuLoadedImage; 13 typedef struct EmuProcess EmuProcess; 14 typedef struct EmuLoadedObject EmuLoadedObject; 15 typedef struct EmuDynNeededIter EmuDynNeededIter; 16 typedef struct EmuDynSymbol EmuDynSymbol; 17 typedef struct EmuDynRelocIter EmuDynRelocIter; 18 typedef struct EmuDynReloc EmuDynReloc; 19 typedef u32 EmuDynRelocClass; 20 typedef u32 EmuDynRelocTableKind; 21 22 typedef ObjBuilder* (*ObjFormatReadFn)(Compiler*, const char* name, 23 const u8* data, size_t len); 24 typedef ObjBuilder* (*ObjFormatReadDsoFn)(Compiler*, const char* name, 25 const u8* data, size_t len, 26 Sym* soname_out); 27 typedef void (*ObjFormatEmitFn)(Compiler*, ObjBuilder*, Writer*); 28 typedef void (*ObjFormatLinkEmitFn)(LinkImage*, Writer*); 29 typedef void (*ObjFormatLayoutDynFn)(Linker*, LinkImage*); 30 typedef void (*ObjFormatFreeDynFn)(LinkImage*); 31 typedef void (*ObjFormatMachoStubFn)(u8* dst, u64 stub_vaddr, 32 u64 got_slot_vaddr); 33 typedef void (*ObjFormatCoffStubFn)(u8* dst, u64 stub_vaddr, 34 u64 iat_slot_vaddr); 35 36 /* Synthetic-input hook: invoked before symbol resolution to inject a 37 * synthetic input object (e.g. the COFF __CTOR_LIST__/__DTOR_LIST__ 38 * boundary blob). The hook receives the Linker so it can append a 39 * LinkInput; returns the freshly built ObjBuilder, or NULL when nothing 40 * needs synthesizing for the current target. Formats with no synthetic 41 * inputs leave this NULL. */ 42 typedef void (*ObjFormatSynthInputsFn)(Linker*); 43 44 typedef struct ObjElfArchOps { 45 KitArchKind arch; 46 u32 e_machine; 47 u32 e_flags; 48 const char* default_musl_interp; 49 u32 r_relative; 50 u32 r_glob_dat; 51 u32 r_jump_slot; 52 u32 r_irelative; /* R_*_IRELATIVE static-IFUNC resolver reloc (__rela_iplt). */ 53 /* Variant-I TP bias: distance from the TLS image start to where `tp` 54 * points for a freestanding (kit start.c) layout. AArch64/RISC-V place 55 * a 16-byte TCB ahead of the image, so this is 16 for those arches; 0 56 * for variant-II arches (x86_64) and any arch with no TLS support. The 57 * hosted-vs-freestanding split for RISC-V is still applied by the 58 * caller in src/obj/elf/link.c; this field is the per-arch maximum. */ 59 u32 tls_tp_bias; 60 u32 (*reloc_to)(u32 kind); 61 u32 (*reloc_from)(u32 wire_type); 62 /* Diagnostic spelling of a per-arch ELF reloc wire type (e.g. 63 * "R_AARCH64_CALL26"). NULL means "no per-arch name table"; callers 64 * fall back to the format-neutral reloc_kind_name(). */ 65 const char* (*reloc_name)(u32 wire_type); 66 /* Decode the float ABI from this arch's ELF e_flags. RISC-V reads 67 * EF_RISCV_FLOAT_ABI_*; other arches have no float-ABI e_flags and 68 * leave this NULL (callers treat NULL as KIT_FLOAT_ABI_DEFAULT). */ 69 KitFloatAbi (*float_abi_from_e_flags)(u32 e_flags); 70 } ObjElfArchOps; 71 72 typedef struct ObjMachoArchOps { 73 KitArchKind arch; 74 u32 cputype; 75 u32 cpusubtype; 76 u32 stub_size; 77 ObjFormatMachoStubFn emit_stub; 78 u32 (*reloc_to)(u32 kind); 79 u32 (*reloc_pcrel)(u32 kind); 80 u32 (*reloc_length)(u32 kind); 81 u32 (*reloc_from)(u32 wire_type); 82 } ObjMachoArchOps; 83 84 typedef struct ObjCoffArchOps { 85 KitArchKind arch; 86 u16 machine; 87 u32 stub_size; 88 ObjFormatCoffStubFn emit_iat_stub; 89 u32 (*reloc_to)(u32 kind); 90 u32 (*reloc_from)(u32 wire_type); 91 } ObjCoffArchOps; 92 93 typedef enum ObjFormatArchiveAction { 94 OBJ_FORMAT_ARCHIVE_KEEP = 0, 95 OBJ_FORMAT_ARCHIVE_REPLACE = 1, 96 OBJ_FORMAT_ARCHIVE_SKIP = 2, 97 } ObjFormatArchiveAction; 98 99 /* ObjTlsModel (TLS access model) is defined in obj.h, the obj-layer 100 * public header that carries the obj_format_tls_model wrapper. */ 101 102 typedef struct ObjFormatArchiveMember { 103 const char* archive_name; 104 const char* member_name; 105 const u8* data; 106 size_t len; 107 KitBinFmt bin_fmt; 108 Sym archive_hint; 109 } ObjFormatArchiveMember; 110 111 typedef struct ObjFormatDsoReader { 112 const struct ObjFormatImpl* format; 113 ObjFormatReadDsoFn read; 114 const char* name; 115 } ObjFormatDsoReader; 116 117 typedef struct ObjFormatEmuOps { 118 KitStatus (*detect_executable)(Compiler*, KitSlice bytes, 119 KitTargetSpec* target_out); 120 KitStatus (*load_executable)(Compiler*, const EmuLoadOptions*, 121 EmuLoadedImage* out); 122 KitStatus (*map_object)(Compiler*, EmuProcess*, EmuLoadedImage*, 123 KitSlice name, KitSlice bytes, int is_main, 124 u32* out_index); 125 void (*dyn_needed_iter)(const EmuLoadedObject*, EmuDynNeededIter* out); 126 int (*dyn_needed_next)(EmuProcess*, EmuDynNeededIter*, KitSlice* out); 127 KitStatus (*dyn_symbol_lookup)(EmuProcess*, const EmuLoadedObject*, 128 KitSlice symbol, EmuDynSymbol* out); 129 KitStatus (*dyn_symbol_by_index)(EmuProcess*, const EmuLoadedObject*, 130 u64 symbol_index, EmuDynSymbol* out); 131 void (*reloc_iter)(const EmuLoadedObject*, EmuDynRelocTableKind table, 132 EmuDynRelocIter* out); 133 int (*reloc_next)(EmuProcess*, EmuDynRelocIter*, EmuDynReloc* out); 134 KitStatus (*reloc_classify)(EmuProcess*, const EmuLoadedObject*, 135 const EmuDynReloc*, EmuDynRelocClass* cls, 136 u32* kind_out); 137 u32 (*reloc_from)(KitArchKind arch, u32 wire_type); 138 } ObjFormatEmuOps; 139 140 typedef struct ObjFormatImpl { 141 ObjFmt kind; 142 KitBinFmt bin_fmt; 143 const char* name; 144 const char* read_name; 145 const char* read_dso_name; 146 147 ObjFormatEmitFn emit; 148 ObjFormatReadFn read; 149 ObjFormatReadDsoFn read_dso; 150 ObjFormatLinkEmitFn link_emit; 151 ObjFormatLayoutDynFn layout_dyn; 152 ObjFormatFreeDynFn free_dyn; 153 const ObjFormatEmuOps* emu; 154 u8 split_sections_as_atoms; 155 156 /* C source-level symbol prefix the format prepends on disk: "_" for 157 * Mach-O, "" (or NULL, treated as "") for ELF / COFF / Wasm. Read by 158 * obj_format_c_mangle / obj_format_demangle_c. */ 159 const char* c_label_prefix; 160 /* Default entry symbol name for a freshly created Linker on this 161 * format: "_main" for Mach-O (LC_MAIN), "mainCRTStartup" for COFF, 162 * "_start" for ELF / Wasm. NULL means "_start". */ 163 const char* default_entry_name; 164 /* Carries DWARF debug sections file-only (not mapped into a loadable 165 * segment): ELF=1, Mach-O=1, COFF=0. */ 166 u8 carries_file_only_debug; 167 /* Builds its own static GOT / non-lazy pointer table at link time even 168 * for a static image: Mach-O=1, else 0. */ 169 u8 builds_own_static_got; 170 /* COFF pulls an archive member to satisfy a *weak* undef reference 171 * (binutils/PE COMDAT semantics); ELF/Mach-O only pull for strong 172 * undefs. COFF=1, else 0. */ 173 u8 weak_undef_pulls_archive_member; 174 /* Can represent the ELF/Mach-O TLS-access symbol features the CG layer 175 * mints (local-exec/initial-exec/local-dynamic/general-dynamic): ELF=1, 176 * Mach-O=1; COFF (Windows TEB model) and Wasm have no such representation, 177 * =0. Read by obj_format_supports_symbol_feature. */ 178 u8 tls_symbol_features; 179 /* Resolves weak-external / undefined references via the mingw single- 180 * underscore alias convention (e.g. `__set_app_type` <-> `_set_app_type`) 181 * during link symbol resolution: COFF=1, else 0. Read by 182 * obj_format_weak_extern_underscore_alias. */ 183 u8 weak_extern_underscore_alias; 184 185 /* ---- C-source backend (c_target) emission spellings ---- 186 * 187 * The portable C backend re-emits symbols as C source; a few constructs 188 * spell differently per object format. These fields let c_target read the 189 * spelling from the format vtable instead of branching on format identity. 190 * 191 * alias_via_thunk: how an aliased symbol is re-emitted. 0 = the format 192 * accepts `__attribute__((alias("target")))` directly (ELF / PE-COFF / 193 * Wasm). 1 = the format has no working alias attribute, so the backend 194 * emits a forwarding thunk function instead (Mach-O). */ 195 u8 alias_via_thunk; 196 /* weak_undef_attr: the GCC/Clang attribute spelling that declares an 197 * *undefined* weak reference. "weak" works for ELF / PE-COFF / Wasm; on 198 * Mach-O the `weak` attribute requires a definition, so an undefined weak 199 * ref must use "weak_import". NULL is treated as "weak". */ 200 const char* weak_undef_attr; 201 202 /* Inject a synthetic input object before symbol resolution. NULL when 203 * the format synthesizes nothing. */ 204 ObjFormatSynthInputsFn synth_inputs; 205 206 const ObjElfArchOps* (*elf_arch)(KitArchKind); 207 const ObjElfArchOps* (*elf_machine)(u32 e_machine); 208 const ObjMachoArchOps* (*macho_arch)(KitArchKind); 209 const ObjMachoArchOps* (*macho_cputype)(u32 cputype); 210 const ObjCoffArchOps* (*coff_arch)(KitArchKind); 211 const ObjCoffArchOps* (*coff_machine)(u16 machine); 212 213 /* Optional format-specific linker ingestion policy. */ 214 int (*classify_obj_input)(Compiler*, ObjBuilder*, Sym* soname_out); 215 Sym (*archive_hint)(Compiler*, const char* archive_name); 216 ObjFormatArchiveAction (*archive_member)(Compiler*, 217 const ObjFormatArchiveMember*, 218 ObjBuilder** out); 219 } ObjFormatImpl; 220 221 const ObjFormatImpl* obj_format_lookup(ObjFmt fmt); 222 const ObjFormatImpl* obj_format_lookup_bin(KitBinFmt fmt); 223 int obj_format_dso_reader_for_bytes(const u8* data, size_t len, 224 KitBinFmt* bin_out, 225 ObjFormatDsoReader* out); 226 227 /* Internal name<->KitObjFmt mapping, backed by the ObjFormatImpl name 228 * list. The thin public KIT_API wrappers (kit_obj_fmt_from_name / 229 * kit_obj_fmt_name, declared in include/kit/object.h) are added in 230 * src/api by a later wave; these are the internal data helpers. 231 * 232 * obj_format_fmt_from_name returns 1 and writes *out on a match (case 233 * sensitive, matching the canonical ObjFormatImpl.name spelling and any 234 * registered alias); returns 0 on an unknown name. obj_format_fmt_name 235 * returns the canonical NUL-terminated literal name, or NULL for an 236 * out-of-range KitObjFmt. */ 237 int obj_format_fmt_from_name(const char* name, KitObjFmt* out); 238 const char* obj_format_fmt_name(KitObjFmt fmt); 239 240 #endif