read_image.c (15857B)
1 /* PE32+ linked-image reader. Peer of read_elf_image / read_macho_image: 2 * parses a linked Windows executable (.exe) or DLL (.dll) into the neutral 3 * ObjImage view that kit_obj_open / objdump consume — segments, entry point, 4 * image base, dependencies + imports, dynamic symbols (exports + imports), 5 * and dynamic relocations (base relocs). A full section/symbol view is 6 * populated through the ObjBuilder Section table as well, so -h / -s / -d 7 * work the same way they do for ELF / Mach-O images. 8 * 9 * Dispatched from read_coff on the DOS 'MZ' magic (read.c). Handles both 10 * subkinds: IMAGE_FILE_DLL clear -> OBJ_KIND_EXEC, set -> OBJ_KIND_DYN. 11 * 12 * Leniency: truncated *core* headers (DOS / PE sig / file / optional / 13 * section table) panic -> the kit_obj_open setjmp turns that into 14 * KIT_MALFORMED. Malformed *sub-tables* (export / import / base-reloc 15 * directories) are skipped, yielding a partial-but-useful inspection view, 16 * matching read_elf_image / read_macho_image. */ 17 18 #include <string.h> 19 20 #include <kit/cg.h> 21 #include <kit/object.h> /* KIT_OBJ_RAW_PE_* reserved tags */ 22 23 #include "core/arena.h" 24 #include "core/heap.h" 25 #include "core/pool.h" 26 #include "core/slice.h" 27 #include "obj/coff/coff.h" 28 #include "obj/coff/read_util.h" 29 #include "obj/format.h" 30 31 static Sym intern(Compiler* c, const char* s, u32 n) { 32 return n ? pool_intern_slice(c->global, (Slice){.s = s, .len = n}) : 0; 33 } 34 35 /* ---- exports -> dynsyms + soname ---- 36 * Mirrors read_coff_dso's export-directory walk, but emits ObjImageSym 37 * entries (defined, value = ImageBase + func RVA) and sets the DLL's own 38 * Name as the image soname. Lenient: any out-of-range sub-table aborts the 39 * export view rather than panicking. */ 40 static void read_pe_exports(Compiler* c, ObjImage* im, const u8* data, 41 size_t len, const u8* shdrs, u16 nsec, 42 const u8* data_dir, u32 num_dirs, u64 image_base) { 43 if ((u32)IMAGE_DIRECTORY_ENTRY_EXPORT >= num_dirs) return; 44 const u8* dd = 45 data_dir + IMAGE_DIRECTORY_ENTRY_EXPORT * COFF_DATA_DIRECTORY_SIZE; 46 u32 export_rva = coff_rd_u32(dd); 47 u32 export_size = coff_rd_u32(dd + 4); 48 if (!export_rva || !export_size) return; 49 50 u64 exp_off; 51 if (!coff_rva_to_offset(shdrs, nsec, export_rva, len, &exp_off)) return; 52 if (exp_off + COFF_EXPORT_DIR_SIZE > len) return; 53 const u8* ed = data + exp_off; 54 u32 name_rva = coff_rd_u32(ed + 12); 55 u32 num_funcs = coff_rd_u32(ed + 20); 56 u32 num_names = coff_rd_u32(ed + 24); 57 u32 eat_rva = coff_rd_u32(ed + 28); 58 u32 ent_rva = coff_rd_u32(ed + 32); 59 u32 ord_rva = coff_rd_u32(ed + 36); 60 61 /* soname = the DLL's own Name (DT_SONAME / LC_ID_DYLIB analogue). */ 62 if (name_rva) { 63 u64 noff; 64 if (coff_rva_to_offset(shdrs, nsec, name_rva, len, &noff)) { 65 const char* dn; 66 u32 dl = coff_read_cstr(data, len, noff, &dn); 67 if (dl) obj_image_set_soname(im, intern(c, dn, dl)); 68 } 69 } 70 71 if (!num_names) return; 72 u64 eat_off, ent_off, ord_off; 73 if (!coff_rva_to_offset(shdrs, nsec, eat_rva, len, &eat_off)) return; 74 if (!coff_rva_to_offset(shdrs, nsec, ent_rva, len, &ent_off)) return; 75 if (!coff_rva_to_offset(shdrs, nsec, ord_rva, len, &ord_off)) return; 76 if (ent_off + (u64)num_names * 4u > len || 77 ord_off + (u64)num_names * 2u > len) 78 return; 79 if (eat_off + (u64)num_funcs * 4u > len) return; 80 81 for (u32 i = 0; i < num_names; ++i) { 82 u32 nrva = coff_rd_u32(data + ent_off + (u64)i * 4u); 83 u16 ord = coff_rd_u16(data + ord_off + (u64)i * 2u); 84 if (ord >= num_funcs) continue; /* malformed; skip */ 85 u32 func_rva = coff_rd_u32(data + eat_off + (u64)ord * 4u); 86 u64 noff; 87 if (!coff_rva_to_offset(shdrs, nsec, nrva, len, &noff)) continue; 88 const char* en; 89 u32 el = coff_read_cstr(data, len, noff, &en); 90 if (!el) continue; 91 92 ObjImageSym ds; 93 memset(&ds, 0, sizeof ds); 94 ds.name = intern(c, en, el); 95 ds.bind = SB_GLOBAL; 96 ds.kind = SK_FUNC; /* forwarders point at the export-dir string; still SK_FUNC */ 97 ds.section = OBJ_SEC_NONE; 98 ds.value = image_base + func_rva; 99 obj_image_add_dynsym(im, &ds); 100 } 101 } 102 103 /* ---- imports -> deps + undefined dynsyms ---- 104 * Walks the import directory descriptors. Each provider DLL becomes one 105 * ObjImageDep carrying its imported-name list; every by-name import also 106 * lands as an undefined ObjImageSym so -T lists imports like ELF .dynsym. 107 * By-ordinal imports are not named in v1 and are skipped. */ 108 static void read_pe_imports(Compiler* c, ObjImage* im, const u8* data, 109 size_t len, const u8* shdrs, u16 nsec, 110 const u8* data_dir, u32 num_dirs) { 111 if ((u32)IMAGE_DIRECTORY_ENTRY_IMPORT >= num_dirs) return; 112 const u8* dd = 113 data_dir + IMAGE_DIRECTORY_ENTRY_IMPORT * COFF_DATA_DIRECTORY_SIZE; 114 u32 imp_rva = coff_rd_u32(dd); 115 if (!imp_rva) return; 116 u64 imp_off; 117 if (!coff_rva_to_offset(shdrs, nsec, imp_rva, len, &imp_off)) return; 118 119 for (u32 d = 0;; ++d) { 120 u64 desc = imp_off + (u64)d * COFF_IMPORT_DESCRIPTOR_SIZE; 121 if (desc + COFF_IMPORT_DESCRIPTOR_SIZE > len) break; 122 u32 oft = coff_rd_u32(data + desc + 0); /* OriginalFirstThunk (ILT) */ 123 u32 dll_name_rva = coff_rd_u32(data + desc + 12); 124 u32 ft = coff_rd_u32(data + desc + 16); /* FirstThunk (IAT) */ 125 if (oft == 0 && dll_name_rva == 0 && ft == 0) break; /* null terminator */ 126 if (dll_name_rva == 0) continue; 127 128 u64 noff; 129 if (!coff_rva_to_offset(shdrs, nsec, dll_name_rva, len, &noff)) continue; 130 const char* dll; 131 u32 dll_len = coff_read_cstr(data, len, noff, &dll); 132 if (!dll_len) continue; 133 Sym dep_name = intern(c, dll, dll_len); 134 135 /* Prefer the ILT (OriginalFirstThunk); fall back to the IAT when the 136 * image was bound and the ILT is absent. */ 137 u32 thunk_rva = oft ? oft : ft; 138 Sym* imports = NULL; 139 u32 nimports = 0, cap = 0; 140 u64 toff; 141 if (thunk_rva && coff_rva_to_offset(shdrs, nsec, thunk_rva, len, &toff)) { 142 for (u32 t = 0;; ++t) { 143 u64 te = toff + (u64)t * COFF_THUNK_DATA64_SIZE; 144 if (te + COFF_THUNK_DATA64_SIZE > len) break; 145 u64 thunk = coff_rd_u64(data + te); 146 if (thunk == 0) break; /* null-terminated table */ 147 if (thunk & IMAGE_ORDINAL_FLAG64) continue; /* by-ordinal: skip (v1) */ 148 u32 ibn_rva = (u32)(thunk & 0x7fffffffu); 149 u64 hoff; 150 if (!coff_rva_to_offset(shdrs, nsec, ibn_rva, len, &hoff)) continue; 151 /* IMAGE_IMPORT_BY_NAME: u16 Hint, then NUL-terminated name. */ 152 const char* inm; 153 u32 il = coff_read_cstr(data, len, hoff + 2u, &inm); 154 if (!il) continue; 155 Sym isym = intern(c, inm, il); 156 157 if (nimports == cap) { 158 u32 ncap = cap ? cap * 2u : 8u; 159 Sym* grown = arena_array(c->scratch, Sym, ncap); 160 if (nimports) memcpy(grown, imports, sizeof(Sym) * nimports); 161 imports = grown; 162 cap = ncap; 163 } 164 imports[nimports++] = isym; 165 166 ObjImageSym us; 167 memset(&us, 0, sizeof us); 168 us.name = isym; 169 us.bind = SB_GLOBAL; 170 us.kind = SK_NOTYPE; /* PE import descriptors don't distinguish func/data */ 171 us.section = OBJ_SEC_NONE; 172 obj_image_add_dynsym(im, &us); 173 } 174 } 175 176 ObjImageDep dep; 177 dep.name = dep_name; 178 dep.imports = imports; /* transient scratch; add_dep deep-copies */ 179 dep.nimports = nimports; 180 obj_image_add_dep(im, &dep); 181 } 182 } 183 184 /* ---- base relocations -> dynrelocs ---- 185 * Walks the .reloc base-relocation blocks. Each DIR64/HIGHLOW fixup is a 186 * symbol-less load-bias adjustment, mapped to the arch's RELATIVE kind. 187 * ABSOLUTE entries are block padding and skipped. */ 188 static void read_pe_basereloc(ObjImage* im, const u8* data, size_t len, 189 const u8* shdrs, u16 nsec, const u8* data_dir, 190 u32 num_dirs, u64 image_base, 191 RelocKind relative_kind) { 192 if ((u32)IMAGE_DIRECTORY_ENTRY_BASERELOC >= num_dirs) return; 193 const u8* dd = 194 data_dir + IMAGE_DIRECTORY_ENTRY_BASERELOC * COFF_DATA_DIRECTORY_SIZE; 195 u32 rel_rva = coff_rd_u32(dd); 196 u32 rel_size = coff_rd_u32(dd + 4); 197 if (!rel_rva || !rel_size) return; 198 u64 rel_off; 199 if (!coff_rva_to_offset(shdrs, nsec, rel_rva, len, &rel_off)) return; 200 u64 end = rel_off + rel_size; 201 if (end > len) end = len; 202 203 u64 pos = rel_off; 204 while (pos + COFF_BASE_RELOCATION_SIZE <= end) { 205 u32 page_rva = coff_rd_u32(data + pos + 0); 206 u32 block_size = coff_rd_u32(data + pos + 4); 207 if (block_size < COFF_BASE_RELOCATION_SIZE) break; /* malformed */ 208 if (pos + block_size > end) block_size = (u32)(end - pos); 209 u32 nent = (block_size - COFF_BASE_RELOCATION_SIZE) / 2u; 210 for (u32 e = 0; e < nent; ++e) { 211 u16 ent = coff_rd_u16(data + pos + COFF_BASE_RELOCATION_SIZE + (u64)e * 2u); 212 u32 type = (u32)ent >> 12; 213 u32 off = (u32)ent & 0x0fffu; 214 if (type == IMAGE_REL_BASED_ABSOLUTE) continue; /* padding */ 215 if (type != IMAGE_REL_BASED_DIR64 && type != IMAGE_REL_BASED_HIGHLOW) 216 continue; 217 ObjImageReloc dr; 218 memset(&dr, 0, sizeof dr); 219 dr.section = OBJ_SEC_NONE; /* offset is a vaddr */ 220 dr.offset = image_base + page_rva + off; 221 dr.kind = relative_kind; 222 obj_image_add_dynreloc(im, &dr); 223 } 224 pos += block_size; 225 } 226 } 227 228 ObjBuilder* read_coff_image(Compiler* c, const char* name, const u8* data, 229 size_t len) { 230 (void)name; 231 232 /* ---- DOS header + PE signature (truncation panics) ---- */ 233 if (len < COFF_DOS_HEADER_SIZE) 234 compiler_panic(c, SRCLOC_NONE, 235 "read_coff_image: input shorter than DOS header"); 236 if (coff_rd_u16(data + 0) != IMAGE_DOS_SIGNATURE) 237 compiler_panic(c, SRCLOC_NONE, "read_coff_image: bad DOS magic"); 238 u32 e_lfanew = coff_rd_u32(data + 60); 239 u64 nt_end = 240 (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + COFF_OPT_HDR64_SIZE; 241 if (nt_end > len) 242 compiler_panic(c, SRCLOC_NONE, 243 "read_coff_image: PE headers extend past end of file"); 244 if (coff_rd_u32(data + e_lfanew) != IMAGE_NT_SIGNATURE) 245 compiler_panic(c, SRCLOC_NONE, "read_coff_image: bad PE signature"); 246 247 /* ---- IMAGE_FILE_HEADER ---- */ 248 const u8* fh = data + e_lfanew + 4u; 249 u16 machine = coff_rd_u16(fh + 0); 250 u16 nsec = coff_rd_u16(fh + 2); 251 u16 size_of_opt = coff_rd_u16(fh + 16); 252 u16 chars = coff_rd_u16(fh + 18); 253 if (machine != IMAGE_FILE_MACHINE_AMD64 && 254 machine != IMAGE_FILE_MACHINE_ARM64 && 255 machine != IMAGE_FILE_MACHINE_ARM64EC) 256 compiler_panic(c, SRCLOC_NONE, "read_coff_image: unsupported machine %#x", 257 (u32)machine); 258 if (size_of_opt < COFF_OPT_HDR64_SIZE) 259 compiler_panic(c, SRCLOC_NONE, 260 "read_coff_image: optional header %u too small for PE32+", 261 (u32)size_of_opt); 262 263 /* ---- IMAGE_OPTIONAL_HEADER64 ---- */ 264 const u8* oh = fh + COFF_FILE_HEADER_SIZE; 265 if (coff_rd_u16(oh + 0) != IMAGE_NT_OPTIONAL_HDR64_MAGIC) 266 compiler_panic(c, SRCLOC_NONE, "read_coff_image: not PE32+"); 267 u32 entry_rva = coff_rd_u32(oh + 16); 268 u64 image_base = coff_rd_u64(oh + 24); 269 u32 sect_align = coff_rd_u32(oh + 32); 270 u16 subsystem = coff_rd_u16(oh + 68); 271 u16 dllchars = coff_rd_u16(oh + 70); 272 u32 num_dirs = coff_rd_u32(oh + 108); 273 if (num_dirs > COFF_NUM_DATA_DIRECTORIES) num_dirs = COFF_NUM_DATA_DIRECTORIES; 274 const u8* data_dir = oh + COFF_OPT_HDR64_SIZE - 275 COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE; 276 277 /* ---- section table ---- */ 278 u64 shdrs_off = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + size_of_opt; 279 if (shdrs_off + (u64)nsec * COFF_SECTION_HEADER_SIZE > len) 280 compiler_panic(c, SRCLOC_NONE, 281 "read_coff_image: section table extends past end of file"); 282 const u8* shdrs = data + shdrs_off; 283 284 /* Arch ops resolve the RELATIVE base-reloc kind (machine validated above). */ 285 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_COFF); 286 const ObjCoffArchOps* aops = 287 (fmt && fmt->coff_machine) ? fmt->coff_machine(machine) : NULL; 288 if (!aops) 289 compiler_panic(c, SRCLOC_NONE, 290 "read_coff_image: no arch impl for machine %#x", 291 (u32)machine); 292 RelocKind relative_kind = (aops->arch == KIT_ARCH_X86_64) ? R_X64_RELATIVE 293 : (aops->arch == KIT_ARCH_ARM_64) ? R_AARCH64_RELATIVE 294 : R_X64_RELATIVE; 295 296 ObjBuilder* ob = obj_new(c); 297 if (!ob) compiler_panic(c, SRCLOC_NONE, "read_coff_image: obj_new failed"); 298 ObjImage* im = obj_image_ensure( 299 ob, (chars & IMAGE_FILE_DLL) ? OBJ_KIND_DYN : OBJ_KIND_EXEC); 300 if (!im) 301 compiler_panic(c, SRCLOC_NONE, "read_coff_image: obj_image_ensure failed"); 302 obj_image_set_base(im, image_base); 303 obj_image_set_entry(im, entry_rva ? image_base + entry_rva : 0); 304 305 /* ---- sections + segments (dual-emit) ---- */ 306 for (u16 i = 0; i < nsec; ++i) { 307 const u8* sh = shdrs + (u64)i * COFF_SECTION_HEADER_SIZE; 308 const char* raw = (const char*)sh; /* Name[8], NUL-padded (no long form) */ 309 u32 nlen = 0; 310 while (nlen < 8 && raw[nlen] != '\0') ++nlen; 311 u32 vsize = coff_rd_u32(sh + 8); 312 u32 vaddr = coff_rd_u32(sh + 12); 313 u32 rawsize = coff_rd_u32(sh + 16); 314 u32 rawptr = coff_rd_u32(sh + 20); 315 u32 ch = coff_rd_u32(sh + 36); 316 317 Sym sn = intern(c, raw, nlen); 318 u16 kind = coff_sec_kind(raw, nlen, ch); 319 u16 flags = coff_sec_flags(raw, nlen, ch); 320 u32 align = coff_sec_align(ch); 321 int is_bss = (ch & IMAGE_SCN_CNT_UNINITIALIZED_DATA) != 0; 322 u16 sem = is_bss ? SSEM_NOBITS : SSEM_PROGBITS; 323 324 ObjSecId id = 325 obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags, align, 0u, 0u, 0u); 326 if (id != OBJ_SEC_NONE) { 327 obj_section_set_ext(ob, id, OBJ_EXT_COFF, ch, 0); 328 obj_section_set_addr(ob, id, image_base + vaddr); 329 if (is_bss) { 330 obj_reserve_bss(ob, id, vsize ? vsize : rawsize, align); 331 } else if (rawsize) { 332 /* Images FileAlignment-pad raw data; copy at most VirtualSize, and 333 * clamp leniently to the file length (vs the strict .obj path). */ 334 u32 copy = rawsize; 335 if (vsize && vsize < copy) copy = vsize; 336 if ((u64)rawptr + copy > len) 337 copy = (rawptr < len) ? (u32)(len - rawptr) : 0; 338 if (copy) { 339 u8* dst = obj_reserve(ob, id, copy); 340 if (dst) memcpy(dst, data + rawptr, copy); 341 } 342 } 343 } 344 345 ObjSegment seg; 346 memset(&seg, 0, sizeof seg); 347 seg.name = sn; 348 seg.vaddr = image_base + vaddr; 349 seg.vsize = vsize; 350 seg.file_off = rawptr; 351 seg.file_size = rawsize; 352 seg.perms = ((ch & IMAGE_SCN_MEM_READ) ? OBJ_SEG_R : 0u) | 353 ((ch & IMAGE_SCN_MEM_WRITE) ? OBJ_SEG_W : 0u) | 354 ((ch & IMAGE_SCN_MEM_EXECUTE) ? OBJ_SEG_X : 0u); 355 seg.align = sect_align ? sect_align : 1u; 356 obj_image_add_segment(im, &seg); 357 } 358 359 /* ---- raw escape-hatch entries: 16 data dirs + subsystem + dllchars ---- */ 360 for (u32 i = 0; i < COFF_NUM_DATA_DIRECTORIES; ++i) { 361 const u8* e = data_dir + (u64)i * COFF_DATA_DIRECTORY_SIZE; 362 ObjImageRaw r; 363 r.tag = i; 364 r.value = (i < num_dirs) ? coff_rd_u32(e) : 0; 365 r.extra = (i < num_dirs) ? coff_rd_u32(e + 4) : 0; 366 obj_image_add_raw(im, &r); 367 } 368 { 369 ObjImageRaw r = {KIT_OBJ_RAW_PE_SUBSYSTEM, subsystem, 0}; 370 obj_image_add_raw(im, &r); 371 } 372 { 373 ObjImageRaw r = {KIT_OBJ_RAW_PE_DLLCHARS, dllchars, 0}; 374 obj_image_add_raw(im, &r); 375 } 376 377 read_pe_exports(c, im, data, len, shdrs, nsec, data_dir, num_dirs, image_base); 378 read_pe_imports(c, im, data, len, shdrs, nsec, data_dir, num_dirs); 379 read_pe_basereloc(im, data, len, shdrs, nsec, data_dir, num_dirs, image_base, 380 relative_kind); 381 382 obj_finalize(ob); 383 return ob; 384 }