object_detect.c (12236B)
1 /* Binary format and target detection from object header bytes. */ 2 3 #include <kit/cg.h> 4 #include <kit/config.h> 5 #include <kit/object.h> 6 7 #include "core/core.h" 8 #include "obj/elf/elf.h" 9 #include "obj/format.h" 10 11 /* COFF Machine numbers recognized as the COFF *binary* format by 12 * kit_detect_fmt. This is format membership only — the arch mapping (in 13 * detect_coff) routes through the registry's coff_machine reverse map. 14 * 15 * The set here is intentionally broader than the registry's coff_machine 16 * ops (which models only the link/codegen-supported AMD64 / ARM64): a COFF 17 * header for any of these machines should still be classified as COFF so 18 * the reader/target paths report the precise reason it cannot be consumed. 19 * ARM64EC (0xA641) is plain AArch64 (same encoding, differing ABI); the 20 * registry aliases it to ARM64 before lookup (coff.h). */ 21 static const u16 COFF_MACHINES[] = { 22 0x8664, /* AMD64 */ 23 0x014C, /* I386 */ 24 0xAA64, /* ARM64 */ 25 0xA641, /* ARM64EC */ 26 0x01C4, /* ARMNT */ 27 0x5032, /* RISCV32 */ 28 0x5064, /* RISCV64 */ 29 }; 30 31 /* Is `machine` a COFF Machine number we classify as the COFF format? */ 32 static int coff_machine_known(u16 machine) { 33 size_t i; 34 for (i = 0; i < sizeof COFF_MACHINES / sizeof COFF_MACHINES[0]; i++) { 35 if (COFF_MACHINES[i] == machine) return 1; 36 } 37 return 0; 38 } 39 40 KitBinFmt kit_detect_fmt(const uint8_t* data, size_t len) { 41 u32 m; 42 u16 coff_machine; 43 44 if (!data) return KIT_BIN_UNKNOWN; 45 if (len >= 8 && data[0] == '!' && data[1] == '<' && data[2] == 'a' && 46 data[3] == 'r' && data[4] == 'c' && data[5] == 'h' && data[6] == '>' && 47 data[7] == '\n') { 48 return KIT_BIN_AR; 49 } 50 if (len >= 4 && data[0] == 0x7f && data[1] == 'E' && data[2] == 'L' && 51 data[3] == 'F') { 52 return KIT_BIN_ELF; 53 } 54 if (len >= 4 && data[0] == 0x00 && data[1] == 'a' && data[2] == 's' && 55 data[3] == 'm') { 56 return KIT_BIN_WASM; 57 } 58 if (len >= 4) { 59 m = (u32)data[0] | ((u32)data[1] << 8) | ((u32)data[2] << 16) | 60 ((u32)data[3] << 24); 61 if (m == 0xFEEDFACEu || m == 0xFEEDFACFu || m == 0xCEFAEDFEu || 62 m == 0xCFFAEDFEu || m == 0xCAFEBABEu) { 63 return KIT_BIN_MACHO; 64 } 65 } 66 if (len >= 2 && data[0] == 'M' && data[1] == 'Z') { 67 return KIT_BIN_PE; 68 } 69 if (len >= 2) { 70 coff_machine = (u16)data[0] | ((u16)data[1] << 8); 71 if (coff_machine_known(coff_machine)) return KIT_BIN_COFF; 72 } 73 /* Microsoft "short import" record: Sig1=0, Sig2=0xFFFF. Routed 74 * through read_coff (which dispatches to read_coff_short_import). 75 * The header continues with a Machine word, which we also sanity- 76 * check so a stray 00 00 FF FF prefix on some other format does 77 * not mis-route. */ 78 if (len >= 8 && data[0] == 0x00 && data[1] == 0x00 && data[2] == 0xFF && 79 data[3] == 0xFF) { 80 u16 mach = (u16)data[6] | ((u16)data[7] << 8); 81 if (coff_machine_known(mach)) return KIT_BIN_COFF; 82 } 83 return KIT_BIN_UNKNOWN; 84 } 85 86 static void detect_target_defaults(KitTargetSpec* t) { 87 t->big_endian = 0; 88 t->pic = KIT_PIC_NONE; 89 t->code_model = KIT_CM_DEFAULT; 90 t->float_abi = KIT_FLOAT_ABI_DEFAULT; 91 } 92 93 static void detect_set_ptr(KitTargetSpec* t, KitArchKind arch) { 94 /* kit_arch_ptr_size is the single source of truth for an arch's byte 95 * pointer width (cg.h); ptr_align tracks ptr_size for every arch this 96 * detector resolves. wasm32 reports 4 here; the wasm path in 97 * kit_detect_target sets its own spec and does not call detect_set_ptr, 98 * so wasm64's 8-byte width is unaffected. */ 99 uint8_t w = kit_arch_ptr_size(arch); 100 t->arch = arch; 101 t->ptr_size = w; 102 t->ptr_align = w; 103 } 104 105 static KitStatus detect_elf(const u8* d, size_t len, KitTargetSpec* out) { 106 u8 ei_class, ei_data, ei_osabi; 107 u16 e_machine; 108 if (len < 20) return KIT_MALFORMED; 109 ei_class = d[4]; 110 ei_data = d[5]; 111 ei_osabi = d[7]; 112 if (ei_data == 1) { 113 e_machine = (u16)d[18] | ((u16)d[19] << 8); 114 } else if (ei_data == 2) { 115 e_machine = (u16)d[19] | ((u16)d[18] << 8); 116 } else { 117 return KIT_MALFORMED; 118 } 119 120 detect_target_defaults(out); 121 out->big_endian = (ei_data == 2); 122 out->obj = KIT_OBJ_ELF; 123 124 /* Resolve the arch through the ELF format's machine reverse map 125 * (obj_elf_machine_class also splits EM_RISCV into RV32/RV64 by 126 * EI_CLASS). The registry models only the link/codegen arches, so the 127 * legacy 32-bit ABI-classifiable machines it does not carry (EM_386, 128 * EM_ARM) are mapped here explicitly to preserve detection. */ 129 { 130 const ObjElfArchOps* ops = obj_elf_machine_class(e_machine, ei_class); 131 if (ops) { 132 detect_set_ptr(out, ops->arch); 133 } else if (e_machine == 0x03) { /* EM_386 */ 134 detect_set_ptr(out, KIT_ARCH_X86_32); 135 } else if (e_machine == 0x28) { /* EM_ARM */ 136 detect_set_ptr(out, KIT_ARCH_ARM_32); 137 } else { 138 return KIT_UNSUPPORTED; 139 } 140 } 141 /* EI_CLASS must agree with the arch's pointer width: 32-bit arches are 142 * ELFCLASS32, 64-bit arches ELFCLASS64. EM_RISCV is already disambiguated 143 * by class above; this also rejects a class/machine mismatch such as a 144 * 64-bit arch object whose EI_CLASS byte claims ELFCLASS32. */ 145 if (ei_class != ((out->ptr_size == 4) ? 1u : 2u)) return KIT_MALFORMED; 146 if (ei_osabi == 0 || ei_osabi == 3) 147 out->os = KIT_OS_LINUX; 148 else if (ei_osabi == 9) 149 out->os = KIT_OS_FREEBSD; 150 else 151 out->os = KIT_OS_FREESTANDING; 152 153 /* Recover the float ABI from e_flags via the per-arch decoder so a detected 154 * target selects the matching runtime variant (rv32 ilp32 soft vs ilp32f 155 * single share an arch + pointer width and differ only here). e_flags is a 156 * 4-byte LE field after the three native-width addr fields: offset 36 on 157 * ELFCLASS32, 48 on ELFCLASS64. Only RISC-V supplies a decoder 158 * (float_abi_from_e_flags non-NULL); other arches leave the default. The 159 * decoder is only consulted when the header carries the full e_flags word, 160 * leaving DEFAULT (the wildcard) when it is truncated. */ 161 { 162 const ObjElfArchOps* ops = obj_elf_machine_class(e_machine, ei_class); 163 if (ops && ops->float_abi_from_e_flags) { 164 size_t flags_off = (ei_class == 1) ? 36u : 48u; 165 if (len >= flags_off + 4u) { 166 u32 e_flags; 167 if (ei_data == 1) 168 e_flags = (u32)d[flags_off] | ((u32)d[flags_off + 1] << 8) | 169 ((u32)d[flags_off + 2] << 16) | 170 ((u32)d[flags_off + 3] << 24); 171 else 172 e_flags = (u32)d[flags_off + 3] | ((u32)d[flags_off + 2] << 8) | 173 ((u32)d[flags_off + 1] << 16) | ((u32)d[flags_off] << 24); 174 out->float_abi = (u8)ops->float_abi_from_e_flags(e_flags); 175 } 176 } 177 } 178 return KIT_OK; 179 } 180 181 /* Resolve a COFF Machine number to a KitArchKind through the registry's 182 * coff_machine reverse map (which aliases ARM64EC -> ARM64). The registry 183 * models only the link/codegen arches (AMD64 / ARM64); the legacy 184 * ABI-classifiable machines it does not carry are mapped explicitly to 185 * preserve detection. Returns 1 and writes *out on success; 0 if the 186 * machine is unsupported. Shared by detect_coff (.obj) and detect_pe 187 * (linked image). */ 188 static int coff_machine_to_arch(u16 machine, KitArchKind* out) { 189 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_COFF); 190 const ObjCoffArchOps* ops = 191 (fmt && fmt->coff_machine) ? fmt->coff_machine(machine) : NULL; 192 if (ops) { 193 *out = ops->arch; 194 } else if (machine == 0x014Cu) { /* IMAGE_FILE_MACHINE_I386 */ 195 *out = KIT_ARCH_X86_32; 196 } else if (machine == 0x01C4u) { /* IMAGE_FILE_MACHINE_ARMNT */ 197 *out = KIT_ARCH_ARM_32; 198 } else if (machine == 0x5032u) { /* IMAGE_FILE_MACHINE_RISCV32 */ 199 *out = KIT_ARCH_RV32; 200 } else if (machine == 0x5064u) { /* IMAGE_FILE_MACHINE_RISCV64 */ 201 *out = KIT_ARCH_RV64; 202 } else { 203 return 0; 204 } 205 return 1; 206 } 207 208 static KitStatus detect_coff(const u8* d, size_t len, KitTargetSpec* out) { 209 u16 machine; 210 KitArchKind arch; 211 if (len < 2) return KIT_MALFORMED; 212 machine = (u16)d[0] | ((u16)d[1] << 8); 213 if (!coff_machine_to_arch(machine, &arch)) return KIT_UNSUPPORTED; 214 detect_target_defaults(out); 215 out->obj = KIT_OBJ_COFF; 216 out->os = KIT_OS_WINDOWS; 217 detect_set_ptr(out, arch); 218 return KIT_OK; 219 } 220 221 /* PE image (DOS 'MZ' stub + "PE\0\0" signature). Unlike a bare .obj, the 222 * COFF Machine word lives in the file header at e_lfanew+4, not at offset 0 223 * (the DOS stub), so this can't reuse detect_coff's offset-0 read. Routes a 224 * well-formed image to KIT_OBJ_COFF / KIT_OS_WINDOWS; read_coff then 225 * dispatches the 'MZ' magic to read_coff_image. A 'MZ' prefix with no valid 226 * PE signature (a DOS-only stub) is rejected as malformed. */ 227 static KitStatus detect_pe(const u8* d, size_t len, KitTargetSpec* out) { 228 u32 e_lfanew, pe_sig; 229 u16 machine; 230 KitArchKind arch; 231 if (len < 64) return KIT_MALFORMED; /* DOS header */ 232 if (!(d[0] == 'M' && d[1] == 'Z')) return KIT_MALFORMED; 233 e_lfanew = (u32)d[60] | ((u32)d[61] << 8) | ((u32)d[62] << 16) | 234 ((u32)d[63] << 24); 235 /* Need the 4-byte PE signature + the 20-byte IMAGE_FILE_HEADER. */ 236 if ((u64)e_lfanew + 4u + 20u > (u64)len) return KIT_MALFORMED; 237 pe_sig = (u32)d[e_lfanew] | ((u32)d[e_lfanew + 1] << 8) | 238 ((u32)d[e_lfanew + 2] << 16) | ((u32)d[e_lfanew + 3] << 24); 239 if (pe_sig != 0x00004550u) return KIT_MALFORMED; /* "PE\0\0" */ 240 machine = (u16)d[e_lfanew + 4] | ((u16)d[e_lfanew + 5] << 8); 241 if (!coff_machine_to_arch(machine, &arch)) return KIT_UNSUPPORTED; 242 detect_target_defaults(out); 243 out->obj = KIT_OBJ_COFF; 244 out->os = KIT_OS_WINDOWS; 245 detect_set_ptr(out, arch); 246 return KIT_OK; 247 } 248 249 static KitStatus detect_macho(const u8* d, size_t len, KitTargetSpec* out) { 250 u32 magic, cputype; 251 int swap, is64; 252 if (len < 8) return KIT_MALFORMED; 253 magic = (u32)d[0] | ((u32)d[1] << 8) | ((u32)d[2] << 16) | ((u32)d[3] << 24); 254 switch (magic) { 255 case 0xFEEDFACEu: 256 swap = 0; 257 is64 = 0; 258 break; 259 case 0xFEEDFACFu: 260 swap = 0; 261 is64 = 1; 262 break; 263 case 0xCEFAEDFEu: 264 swap = 1; 265 is64 = 0; 266 break; 267 case 0xCFFAEDFEu: 268 swap = 1; 269 is64 = 1; 270 break; 271 default: 272 return KIT_MALFORMED; 273 } 274 if (!swap) { 275 cputype = 276 (u32)d[4] | ((u32)d[5] << 8) | ((u32)d[6] << 16) | ((u32)d[7] << 24); 277 } else { 278 cputype = 279 (u32)d[7] | ((u32)d[6] << 8) | ((u32)d[5] << 16) | ((u32)d[4] << 24); 280 } 281 detect_target_defaults(out); 282 out->obj = KIT_OBJ_MACHO; 283 out->os = KIT_OS_MACOS; 284 285 /* Resolve the arch through the Mach-O format's cputype reverse map. The 286 * registry models only the link/codegen arches (ARM64 / X86_64); the 287 * legacy 32-bit ABI-classifiable cputypes it does not carry (CPU_TYPE_X86, 288 * CPU_TYPE_ARM) are mapped explicitly to preserve detection. */ 289 { 290 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_MACHO); 291 const ObjMachoArchOps* ops = 292 (fmt && fmt->macho_cputype) ? fmt->macho_cputype(cputype) : NULL; 293 if (ops) { 294 detect_set_ptr(out, ops->arch); 295 } else if (cputype == 0x00000007u) { /* CPU_TYPE_X86 */ 296 detect_set_ptr(out, KIT_ARCH_X86_32); 297 } else if (cputype == 0x0000000Cu) { /* CPU_TYPE_ARM */ 298 detect_set_ptr(out, KIT_ARCH_ARM_32); 299 } else { 300 return KIT_UNSUPPORTED; 301 } 302 } 303 (void)is64; 304 return KIT_OK; 305 } 306 307 KitStatus kit_detect_target(const uint8_t* data, size_t len, 308 KitTargetSpec* out) { 309 KitBinFmt bin; 310 if (!data || !out) return KIT_INVALID; 311 bin = kit_detect_fmt(data, len); 312 switch (bin) { 313 #if KIT_OBJ_ELF_ENABLED 314 case KIT_BIN_ELF: 315 return detect_elf(data, len, out); 316 #endif 317 #if KIT_OBJ_COFF_ENABLED 318 case KIT_BIN_PE: 319 return detect_pe(data, len, out); 320 case KIT_BIN_COFF: 321 return detect_coff(data, len, out); 322 #endif 323 #if KIT_OBJ_MACHO_ENABLED 324 case KIT_BIN_MACHO: 325 return detect_macho(data, len, out); 326 #endif 327 #if KIT_OBJ_WASM_ENABLED 328 case KIT_BIN_WASM: { 329 KitTargetSpec t; 330 t.big_endian = 0; 331 t.pic = KIT_PIC_NONE; 332 t.code_model = KIT_CM_DEFAULT; 333 t.arch = KIT_ARCH_WASM; 334 t.ptr_size = 4; 335 t.ptr_align = 4; 336 t.obj = KIT_OBJ_WASM; 337 t.os = KIT_OS_WASI; 338 *out = t; 339 return KIT_OK; 340 } 341 #endif 342 default: 343 return KIT_UNSUPPORTED; 344 } 345 }