kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

format.h (10337B)


      1 #ifndef KIT_OBJ_FORMAT_H
      2 #define KIT_OBJ_FORMAT_H
      3 
      4 #include <kit/object.h>
      5 #include <stddef.h>
      6 
      7 #include "core/core.h"
      8 
      9 typedef struct LinkImage LinkImage;
     10 typedef struct Linker Linker;
     11 typedef struct EmuLoadOptions EmuLoadOptions;
     12 typedef struct EmuLoadedImage EmuLoadedImage;
     13 typedef struct EmuProcess EmuProcess;
     14 typedef struct EmuLoadedObject EmuLoadedObject;
     15 typedef struct EmuDynNeededIter EmuDynNeededIter;
     16 typedef struct EmuDynSymbol EmuDynSymbol;
     17 typedef struct EmuDynRelocIter EmuDynRelocIter;
     18 typedef struct EmuDynReloc EmuDynReloc;
     19 typedef u32 EmuDynRelocClass;
     20 typedef u32 EmuDynRelocTableKind;
     21 
     22 typedef ObjBuilder* (*ObjFormatReadFn)(Compiler*, const char* name,
     23                                        const u8* data, size_t len);
     24 typedef ObjBuilder* (*ObjFormatReadDsoFn)(Compiler*, const char* name,
     25                                           const u8* data, size_t len,
     26                                           Sym* soname_out);
     27 typedef void (*ObjFormatEmitFn)(Compiler*, ObjBuilder*, Writer*);
     28 typedef void (*ObjFormatLinkEmitFn)(LinkImage*, Writer*);
     29 typedef void (*ObjFormatLayoutDynFn)(Linker*, LinkImage*);
     30 typedef void (*ObjFormatFreeDynFn)(LinkImage*);
     31 typedef void (*ObjFormatMachoStubFn)(u8* dst, u64 stub_vaddr,
     32                                      u64 got_slot_vaddr);
     33 typedef void (*ObjFormatCoffStubFn)(u8* dst, u64 stub_vaddr,
     34                                     u64 iat_slot_vaddr);
     35 
     36 /* Synthetic-input hook: invoked before symbol resolution to inject a
     37  * synthetic input object (e.g. the COFF __CTOR_LIST__/__DTOR_LIST__
     38  * boundary blob). The hook receives the Linker so it can append a
     39  * LinkInput; returns the freshly built ObjBuilder, or NULL when nothing
     40  * needs synthesizing for the current target. Formats with no synthetic
     41  * inputs leave this NULL. */
     42 typedef void (*ObjFormatSynthInputsFn)(Linker*);
     43 
     44 typedef struct ObjElfArchOps {
     45   KitArchKind arch;
     46   u32 e_machine;
     47   u32 e_flags;
     48   const char* default_musl_interp;
     49   u32 r_relative;
     50   u32 r_glob_dat;
     51   u32 r_jump_slot;
     52   u32 r_irelative; /* R_*_IRELATIVE static-IFUNC resolver reloc (__rela_iplt). */
     53   /* Variant-I TP bias: distance from the TLS image start to where `tp`
     54    * points for a freestanding (kit start.c) layout. AArch64/RISC-V place
     55    * a 16-byte TCB ahead of the image, so this is 16 for those arches; 0
     56    * for variant-II arches (x86_64) and any arch with no TLS support. The
     57    * hosted-vs-freestanding split for RISC-V is still applied by the
     58    * caller in src/obj/elf/link.c; this field is the per-arch maximum. */
     59   u32 tls_tp_bias;
     60   u32 (*reloc_to)(u32 kind);
     61   u32 (*reloc_from)(u32 wire_type);
     62   /* Diagnostic spelling of a per-arch ELF reloc wire type (e.g.
     63    * "R_AARCH64_CALL26"). NULL means "no per-arch name table"; callers
     64    * fall back to the format-neutral reloc_kind_name(). */
     65   const char* (*reloc_name)(u32 wire_type);
     66   /* Decode the float ABI from this arch's ELF e_flags. RISC-V reads
     67    * EF_RISCV_FLOAT_ABI_*; other arches have no float-ABI e_flags and
     68    * leave this NULL (callers treat NULL as KIT_FLOAT_ABI_DEFAULT). */
     69   KitFloatAbi (*float_abi_from_e_flags)(u32 e_flags);
     70 } ObjElfArchOps;
     71 
     72 typedef struct ObjMachoArchOps {
     73   KitArchKind arch;
     74   u32 cputype;
     75   u32 cpusubtype;
     76   u32 stub_size;
     77   ObjFormatMachoStubFn emit_stub;
     78   u32 (*reloc_to)(u32 kind);
     79   u32 (*reloc_pcrel)(u32 kind);
     80   u32 (*reloc_length)(u32 kind);
     81   u32 (*reloc_from)(u32 wire_type);
     82 } ObjMachoArchOps;
     83 
     84 typedef struct ObjCoffArchOps {
     85   KitArchKind arch;
     86   u16 machine;
     87   u32 stub_size;
     88   ObjFormatCoffStubFn emit_iat_stub;
     89   u32 (*reloc_to)(u32 kind);
     90   u32 (*reloc_from)(u32 wire_type);
     91 } ObjCoffArchOps;
     92 
     93 typedef enum ObjFormatArchiveAction {
     94   OBJ_FORMAT_ARCHIVE_KEEP = 0,
     95   OBJ_FORMAT_ARCHIVE_REPLACE = 1,
     96   OBJ_FORMAT_ARCHIVE_SKIP = 2,
     97 } ObjFormatArchiveAction;
     98 
     99 /* ObjTlsModel (TLS access model) is defined in obj.h, the obj-layer
    100  * public header that carries the obj_format_tls_model wrapper. */
    101 
    102 typedef struct ObjFormatArchiveMember {
    103   const char* archive_name;
    104   const char* member_name;
    105   const u8* data;
    106   size_t len;
    107   KitBinFmt bin_fmt;
    108   Sym archive_hint;
    109 } ObjFormatArchiveMember;
    110 
    111 typedef struct ObjFormatDsoReader {
    112   const struct ObjFormatImpl* format;
    113   ObjFormatReadDsoFn read;
    114   const char* name;
    115 } ObjFormatDsoReader;
    116 
    117 typedef struct ObjFormatEmuOps {
    118   KitStatus (*detect_executable)(Compiler*, KitSlice bytes,
    119                                  KitTargetSpec* target_out);
    120   KitStatus (*load_executable)(Compiler*, const EmuLoadOptions*,
    121                                EmuLoadedImage* out);
    122   KitStatus (*map_object)(Compiler*, EmuProcess*, EmuLoadedImage*,
    123                           KitSlice name, KitSlice bytes, int is_main,
    124                           u32* out_index);
    125   void (*dyn_needed_iter)(const EmuLoadedObject*, EmuDynNeededIter* out);
    126   int (*dyn_needed_next)(EmuProcess*, EmuDynNeededIter*, KitSlice* out);
    127   KitStatus (*dyn_symbol_lookup)(EmuProcess*, const EmuLoadedObject*,
    128                                  KitSlice symbol, EmuDynSymbol* out);
    129   KitStatus (*dyn_symbol_by_index)(EmuProcess*, const EmuLoadedObject*,
    130                                    u64 symbol_index, EmuDynSymbol* out);
    131   void (*reloc_iter)(const EmuLoadedObject*, EmuDynRelocTableKind table,
    132                      EmuDynRelocIter* out);
    133   int (*reloc_next)(EmuProcess*, EmuDynRelocIter*, EmuDynReloc* out);
    134   KitStatus (*reloc_classify)(EmuProcess*, const EmuLoadedObject*,
    135                               const EmuDynReloc*, EmuDynRelocClass* cls,
    136                               u32* kind_out);
    137   u32 (*reloc_from)(KitArchKind arch, u32 wire_type);
    138 } ObjFormatEmuOps;
    139 
    140 typedef struct ObjFormatImpl {
    141   ObjFmt kind;
    142   KitBinFmt bin_fmt;
    143   const char* name;
    144   const char* read_name;
    145   const char* read_dso_name;
    146 
    147   ObjFormatEmitFn emit;
    148   ObjFormatReadFn read;
    149   ObjFormatReadDsoFn read_dso;
    150   ObjFormatLinkEmitFn link_emit;
    151   ObjFormatLayoutDynFn layout_dyn;
    152   ObjFormatFreeDynFn free_dyn;
    153   const ObjFormatEmuOps* emu;
    154   u8 split_sections_as_atoms;
    155 
    156   /* C source-level symbol prefix the format prepends on disk: "_" for
    157    * Mach-O, "" (or NULL, treated as "") for ELF / COFF / Wasm. Read by
    158    * obj_format_c_mangle / obj_format_demangle_c. */
    159   const char* c_label_prefix;
    160   /* Default entry symbol name for a freshly created Linker on this
    161    * format: "_main" for Mach-O (LC_MAIN), "mainCRTStartup" for COFF,
    162    * "_start" for ELF / Wasm. NULL means "_start". */
    163   const char* default_entry_name;
    164   /* Carries DWARF debug sections file-only (not mapped into a loadable
    165    * segment): ELF=1, Mach-O=1, COFF=0. */
    166   u8 carries_file_only_debug;
    167   /* Builds its own static GOT / non-lazy pointer table at link time even
    168    * for a static image: Mach-O=1, else 0. */
    169   u8 builds_own_static_got;
    170   /* COFF pulls an archive member to satisfy a *weak* undef reference
    171    * (binutils/PE COMDAT semantics); ELF/Mach-O only pull for strong
    172    * undefs. COFF=1, else 0. */
    173   u8 weak_undef_pulls_archive_member;
    174   /* Can represent the ELF/Mach-O TLS-access symbol features the CG layer
    175    * mints (local-exec/initial-exec/local-dynamic/general-dynamic): ELF=1,
    176    * Mach-O=1; COFF (Windows TEB model) and Wasm have no such representation,
    177    * =0. Read by obj_format_supports_symbol_feature. */
    178   u8 tls_symbol_features;
    179   /* Resolves weak-external / undefined references via the mingw single-
    180    * underscore alias convention (e.g. `__set_app_type` <-> `_set_app_type`)
    181    * during link symbol resolution: COFF=1, else 0. Read by
    182    * obj_format_weak_extern_underscore_alias. */
    183   u8 weak_extern_underscore_alias;
    184 
    185   /* ---- C-source backend (c_target) emission spellings ----
    186    *
    187    * The portable C backend re-emits symbols as C source; a few constructs
    188    * spell differently per object format. These fields let c_target read the
    189    * spelling from the format vtable instead of branching on format identity.
    190    *
    191    * alias_via_thunk: how an aliased symbol is re-emitted. 0 = the format
    192    * accepts `__attribute__((alias("target")))` directly (ELF / PE-COFF /
    193    * Wasm). 1 = the format has no working alias attribute, so the backend
    194    * emits a forwarding thunk function instead (Mach-O). */
    195   u8 alias_via_thunk;
    196   /* weak_undef_attr: the GCC/Clang attribute spelling that declares an
    197    * *undefined* weak reference. "weak" works for ELF / PE-COFF / Wasm; on
    198    * Mach-O the `weak` attribute requires a definition, so an undefined weak
    199    * ref must use "weak_import". NULL is treated as "weak". */
    200   const char* weak_undef_attr;
    201 
    202   /* Inject a synthetic input object before symbol resolution. NULL when
    203    * the format synthesizes nothing. */
    204   ObjFormatSynthInputsFn synth_inputs;
    205 
    206   const ObjElfArchOps* (*elf_arch)(KitArchKind);
    207   const ObjElfArchOps* (*elf_machine)(u32 e_machine);
    208   const ObjMachoArchOps* (*macho_arch)(KitArchKind);
    209   const ObjMachoArchOps* (*macho_cputype)(u32 cputype);
    210   const ObjCoffArchOps* (*coff_arch)(KitArchKind);
    211   const ObjCoffArchOps* (*coff_machine)(u16 machine);
    212 
    213   /* Optional format-specific linker ingestion policy. */
    214   int (*classify_obj_input)(Compiler*, ObjBuilder*, Sym* soname_out);
    215   Sym (*archive_hint)(Compiler*, const char* archive_name);
    216   ObjFormatArchiveAction (*archive_member)(Compiler*,
    217                                            const ObjFormatArchiveMember*,
    218                                            ObjBuilder** out);
    219 } ObjFormatImpl;
    220 
    221 const ObjFormatImpl* obj_format_lookup(ObjFmt fmt);
    222 const ObjFormatImpl* obj_format_lookup_bin(KitBinFmt fmt);
    223 int obj_format_dso_reader_for_bytes(const u8* data, size_t len,
    224                                     KitBinFmt* bin_out,
    225                                     ObjFormatDsoReader* out);
    226 
    227 /* Internal name<->KitObjFmt mapping, backed by the ObjFormatImpl name
    228  * list. The thin public KIT_API wrappers (kit_obj_fmt_from_name /
    229  * kit_obj_fmt_name, declared in include/kit/object.h) are added in
    230  * src/api by a later wave; these are the internal data helpers.
    231  *
    232  * obj_format_fmt_from_name returns 1 and writes *out on a match (case
    233  * sensitive, matching the canonical ObjFormatImpl.name spelling and any
    234  * registered alias); returns 0 on an unknown name. obj_format_fmt_name
    235  * returns the canonical NUL-terminated literal name, or NULL for an
    236  * out-of-range KitObjFmt. */
    237 int obj_format_fmt_from_name(const char* name, KitObjFmt* out);
    238 const char* obj_format_fmt_name(KitObjFmt fmt);
    239 
    240 #endif