kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

macho.h (8337B)


      1 /* Mach-O wire-format constants, structs, and per-arch reloc translators
      2  * shared between obj/macho_emit.c, obj/macho_read.c, and link/link_macho.c
      3  * (none of which exist yet).
      4  *
      5  * Private to src/. The public ObjBuilder/Linker surface is format-neutral
      6  * (obj/obj.h, link/link.h); the Mach-O spelling of those abstractions only
      7  * exists inside libkit.
      8  *
      9  * Scope: 64-bit little-endian only (MH_MAGIC_64). The per-arch reloc
     10  * mapping is split across macho_reloc_<arch>.c, mirroring the ELF
     11  * arrangement; emit_macho and the linker dispatch to the right
     12  * translator by Compiler.target.arch. */
     13 
     14 #ifndef KIT_OBJ_MACHO_H
     15 #define KIT_OBJ_MACHO_H
     16 
     17 #include "core/core.h"
     18 #include "obj/obj.h"
     19 
     20 /* ---- magic ---- */
     21 #define MH_MAGIC_64 0xfeedfacfu
     22 #define MH_CIGAM_64                                        \
     23   0xcffaedfeu /* byte-swapped (big-endian host reading LE) \
     24                */
     25 
     26 /* ---- cputype / cpusubtype (subset kit cares about) ---- */
     27 #define CPU_TYPE_X86 0x00000007
     28 #define CPU_TYPE_X86_64 0x01000007
     29 #define CPU_TYPE_ARM 0x0000000C
     30 #define CPU_TYPE_ARM64 0x0100000C
     31 
     32 #define CPU_SUBTYPE_X86_64_ALL 3
     33 #define CPU_SUBTYPE_ARM64_ALL 0
     34 
     35 /* ---- filetype ---- */
     36 #define MH_OBJECT 0x1  /* relocatable .o (no segments split) */
     37 #define MH_EXECUTE 0x2 /* main executable */
     38 #define MH_DYLIB 0x6   /* dynamically bound shared library */
     39 #define MH_DYLINKER 0x7
     40 #define MH_BUNDLE 0x8
     41 
     42 /* ---- mach_header flags (subset) ---- */
     43 #define MH_NOUNDEFS 0x00000001u
     44 #define MH_DYLDLINK 0x00000004u
     45 #define MH_TWOLEVEL 0x00000080u
     46 #define MH_SUBSECTIONS_VIA_SYMBOLS 0x00002000u
     47 #define MH_PIE 0x00200000u
     48 #define MH_HAS_TLV_DESCRIPTORS 0x00800000u
     49 
     50 /* ---- load command IDs (subset kit will emit / consume) ---- */
     51 #define LC_REQ_DYLD 0x80000000u
     52 #define LC_SEGMENT_64 0x19u
     53 #define LC_SYMTAB 0x02u
     54 #define LC_UNIXTHREAD 0x05u
     55 #define LC_DYSYMTAB 0x0bu
     56 #define LC_LOAD_DYLIB 0x0cu
     57 #define LC_ID_DYLIB 0x0du
     58 #define LC_LOAD_DYLINKER 0x0eu
     59 #define LC_LOAD_WEAK_DYLIB (0x18u | LC_REQ_DYLD)
     60 #define LC_RPATH (0x1cu | LC_REQ_DYLD)
     61 #define LC_REEXPORT_DYLIB (0x1fu | LC_REQ_DYLD)
     62 #define LC_UUID 0x1bu
     63 #define LC_FUNCTION_STARTS 0x26u
     64 #define LC_DATA_IN_CODE 0x29u
     65 #define LC_SOURCE_VERSION 0x2au
     66 #define LC_BUILD_VERSION 0x32u
     67 #define LC_DYLD_EXPORTS_TRIE (0x33u | LC_REQ_DYLD)
     68 #define LC_DYLD_CHAINED_FIXUPS (0x34u | LC_REQ_DYLD)
     69 #define LC_MAIN (0x28u | LC_REQ_DYLD)
     70 
     71 /* ---- VM protection bits (segment maxprot / initprot) ---- */
     72 #define VM_PROT_READ 0x1u
     73 #define VM_PROT_WRITE 0x2u
     74 #define VM_PROT_EXECUTE 0x4u
     75 
     76 /* ---- LC_DYLD_CHAINED_FIXUPS encodings (subset kit emits/reads) ---- */
     77 #define DYLD_CHAINED_PTR_64 2u /* pointer_format */
     78 #define DYLD_CHAINED_IMPORT 1u /* imports_format */
     79 
     80 /* ---- header sizes ---- */
     81 #define MACHO_HDR64_SIZE 32u
     82 #define MACHO_SEGCMD64_SIZE 72u
     83 #define MACHO_SECT64_SIZE 80u
     84 #define MACHO_SYMTAB_CMD_SIZE 24u
     85 #define MACHO_DYSYMTAB_CMD_SIZE 80u
     86 #define MACHO_NLIST64_SIZE 16u
     87 #define MACHO_RELOC_SIZE 8u
     88 
     89 /* ---- on-disk structures (LE) ---- */
     90 
     91 typedef struct MachHeader64 {
     92   u32 magic;      /* MH_MAGIC_64 */
     93   u32 cputype;    /* CPU_TYPE_*  */
     94   u32 cpusubtype; /* CPU_SUBTYPE_* (low 24 bits) | feature flags */
     95   u32 filetype;   /* MH_OBJECT / MH_EXECUTE / ... */
     96   u32 ncmds;      /* number of load commands */
     97   u32 sizeofcmds; /* total bytes of load commands */
     98   u32 flags;      /* MH_* */
     99   u32 reserved;
    100 } MachHeader64;
    101 
    102 typedef struct MachLoadCmd {
    103   u32 cmd;     /* LC_* */
    104   u32 cmdsize; /* size of this command including header */
    105 } MachLoadCmd;
    106 
    107 /* LC_SEGMENT_64: one per Mach-O segment.  Followed by `nsects`
    108  * MachSection64 records inline. */
    109 typedef struct MachSegmentCmd64 {
    110   u32 cmd;     /* LC_SEGMENT_64 */
    111   u32 cmdsize; /* sizeof(this) + nsects * sizeof(MachSection64) */
    112   char segname[16];
    113   u64 vmaddr;
    114   u64 vmsize;
    115   u64 fileoff;
    116   u64 filesize;
    117   u32 maxprot;
    118   u32 initprot;
    119   u32 nsects;
    120   u32 flags;
    121 } MachSegmentCmd64;
    122 
    123 /* Mach-O section descriptor, embedded inside an LC_SEGMENT_64. */
    124 typedef struct MachSection64 {
    125   char sectname[16];
    126   char segname[16];
    127   u64 addr;
    128   u64 size;
    129   u32 offset;
    130   u32 align; /* power of 2 (so 3 means 8-byte align) */
    131   u32 reloff;
    132   u32 nreloc;
    133   u32 flags;
    134   u32 reserved1;
    135   u32 reserved2;
    136   u32 reserved3;
    137 } MachSection64;
    138 
    139 typedef struct MachSymtabCmd {
    140   u32 cmd; /* LC_SYMTAB */
    141   u32 cmdsize;
    142   u32 symoff;
    143   u32 nsyms;
    144   u32 stroff;
    145   u32 strsize;
    146 } MachSymtabCmd;
    147 
    148 typedef struct MachDysymtabCmd {
    149   u32 cmd; /* LC_DYSYMTAB */
    150   u32 cmdsize;
    151   u32 ilocalsym;
    152   u32 nlocalsym;
    153   u32 iextdefsym;
    154   u32 nextdefsym;
    155   u32 iundefsym;
    156   u32 nundefsym;
    157   u32 tocoff;
    158   u32 ntoc;
    159   u32 modtaboff;
    160   u32 nmodtab;
    161   u32 extrefsymoff;
    162   u32 nextrefsyms;
    163   u32 indirectsymoff;
    164   u32 nindirectsyms;
    165   u32 extreloff;
    166   u32 nextrel;
    167   u32 locreloff;
    168   u32 nlocrel;
    169 } MachDysymtabCmd;
    170 
    171 /* nlist_64 entry. n_type packs N_STAB | N_PEXT | N_TYPE | N_EXT. */
    172 typedef struct MachNlist64 {
    173   u32 n_strx;
    174   u8 n_type;
    175   u8 n_sect; /* 1-based section index, 0 = NO_SECT */
    176   u16 n_desc;
    177   u64 n_value;
    178 } MachNlist64;
    179 
    180 /* ---- nlist n_type bits ---- */
    181 #define N_STAB 0xe0u
    182 #define N_PEXT 0x10u
    183 #define N_TYPE 0x0eu
    184 #define N_EXT 0x01u
    185 
    186 /* N_TYPE values */
    187 #define N_UNDF 0x0u
    188 #define N_ABS 0x2u
    189 #define N_SECT 0xeu
    190 #define N_PBUD 0xcu
    191 #define N_INDR 0xau
    192 
    193 #define NO_SECT 0u
    194 
    195 /* n_desc bits (subset) */
    196 #define N_NO_DEAD_STRIP 0x0020u
    197 #define N_WEAK_REF 0x0040u
    198 #define N_WEAK_DEF 0x0080u
    199 #define REFERENCE_FLAG_UNDEFINED_NON_LAZY 0x0u
    200 #define REFERENCE_FLAG_UNDEFINED_LAZY 0x1u
    201 
    202 /* ---- section type / attributes (subset of section.flags) ---- */
    203 #define SECTION_TYPE 0x000000ffu
    204 #define SECTION_ATTRIBUTES 0xffffff00u
    205 
    206 #define S_REGULAR 0x0u
    207 #define S_ZEROFILL 0x1u
    208 #define S_CSTRING_LITERALS 0x2u
    209 #define S_NON_LAZY_SYMBOL_POINTERS 0x6u
    210 #define S_LAZY_SYMBOL_POINTERS 0x7u
    211 #define S_SYMBOL_STUBS 0x8u
    212 #define S_MOD_INIT_FUNC_POINTERS 0x9u
    213 #define S_MOD_TERM_FUNC_POINTERS 0xau
    214 #define S_COALESCED 0xbu
    215 #define S_INTERPOSING 0xdu
    216 #define S_THREAD_LOCAL_REGULAR 0x11u
    217 #define S_THREAD_LOCAL_ZEROFILL 0x12u
    218 #define S_THREAD_LOCAL_VARIABLES 0x13u
    219 #define S_THREAD_LOCAL_VARIABLE_POINTERS 0x14u
    220 #define S_THREAD_LOCAL_INIT_FUNCTION_POINTERS 0x15u
    221 
    222 #define S_ATTR_PURE_INSTRUCTIONS 0x80000000u
    223 #define S_ATTR_SOME_INSTRUCTIONS 0x00000400u
    224 #define S_ATTR_DEBUG 0x02000000u
    225 #define S_ATTR_NO_DEAD_STRIP 0x10000000u
    226 
    227 /* ---- relocation_info (external/scattered union; kit emits only the
    228  *      external form for arm64 / x86_64) ----
    229  *
    230  * Wire layout (little-endian):
    231  *   u32 r_address;       offset within the section the reloc patches
    232  *   u32 packed;          bitfield: r_symbolnum:24, r_pcrel:1, r_length:2,
    233  *                                  r_extern:1, r_type:4
    234  *
    235  * length encoding: 0=byte, 1=word, 2=long, 3=quad. */
    236 typedef struct MachRelocInfo {
    237   u32 r_address;
    238   u32 r_packed;
    239 } MachRelocInfo;
    240 
    241 /* ---- arm64 reloc types (r_type field) ---- */
    242 #define ARM64_RELOC_UNSIGNED 0u
    243 #define ARM64_RELOC_SUBTRACTOR 1u
    244 #define ARM64_RELOC_BRANCH26 2u
    245 #define ARM64_RELOC_PAGE21 3u
    246 #define ARM64_RELOC_PAGEOFF12 4u
    247 #define ARM64_RELOC_GOT_LOAD_PAGE21 5u
    248 #define ARM64_RELOC_GOT_LOAD_PAGEOFF12 6u
    249 #define ARM64_RELOC_POINTER_TO_GOT 7u
    250 #define ARM64_RELOC_TLVP_LOAD_PAGE21 8u
    251 #define ARM64_RELOC_TLVP_LOAD_PAGEOFF12 9u
    252 #define ARM64_RELOC_ADDEND 10u
    253 
    254 /* ---- x86_64 reloc types (for the translator that lands when x64
    255  *      codegen does) ---- */
    256 #define X86_64_RELOC_UNSIGNED 0u
    257 #define X86_64_RELOC_SIGNED 1u
    258 #define X86_64_RELOC_BRANCH 2u
    259 #define X86_64_RELOC_GOT_LOAD 3u
    260 #define X86_64_RELOC_GOT 4u
    261 #define X86_64_RELOC_SUBTRACTOR 5u
    262 #define X86_64_RELOC_SIGNED_1 6u
    263 #define X86_64_RELOC_SIGNED_2 7u
    264 #define X86_64_RELOC_SIGNED_4 8u
    265 #define X86_64_RELOC_TLV 9u
    266 
    267 /* Map kit-canonical RelocKind <-> arm64 Mach-O reloc type.  Returns
    268  * (u32)-1 on unsupported kinds; the caller (emit_macho / read_macho)
    269  * panics with a diagnostic.  Stubs in macho_reloc_aarch64.c until the
    270  * Phase 2 writer lands. */
    271 u32 macho_aarch64_reloc_to(u32 kind /* RelocKind */);
    272 u32 macho_aarch64_reloc_pcrel(u32 kind /* RelocKind */);
    273 u32 macho_aarch64_reloc_length(u32 kind /* RelocKind */);
    274 u32 macho_aarch64_reloc_from(u32 macho_type);
    275 u32 macho_x86_64_reloc_to(u32 kind /* RelocKind */);
    276 u32 macho_x86_64_reloc_pcrel(u32 kind /* RelocKind */);
    277 u32 macho_x86_64_reloc_length(u32 kind /* RelocKind */);
    278 u32 macho_x86_64_reloc_from(u32 macho_type);
    279 
    280 #endif