kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

read_image.c (15857B)


      1 /* PE32+ linked-image reader.  Peer of read_elf_image / read_macho_image:
      2  * parses a linked Windows executable (.exe) or DLL (.dll) into the neutral
      3  * ObjImage view that kit_obj_open / objdump consume — segments, entry point,
      4  * image base, dependencies + imports, dynamic symbols (exports + imports),
      5  * and dynamic relocations (base relocs).  A full section/symbol view is
      6  * populated through the ObjBuilder Section table as well, so -h / -s / -d
      7  * work the same way they do for ELF / Mach-O images.
      8  *
      9  * Dispatched from read_coff on the DOS 'MZ' magic (read.c).  Handles both
     10  * subkinds: IMAGE_FILE_DLL clear -> OBJ_KIND_EXEC, set -> OBJ_KIND_DYN.
     11  *
     12  * Leniency: truncated *core* headers (DOS / PE sig / file / optional /
     13  * section table) panic -> the kit_obj_open setjmp turns that into
     14  * KIT_MALFORMED.  Malformed *sub-tables* (export / import / base-reloc
     15  * directories) are skipped, yielding a partial-but-useful inspection view,
     16  * matching read_elf_image / read_macho_image. */
     17 
     18 #include <string.h>
     19 
     20 #include <kit/cg.h>
     21 #include <kit/object.h> /* KIT_OBJ_RAW_PE_* reserved tags */
     22 
     23 #include "core/arena.h"
     24 #include "core/heap.h"
     25 #include "core/pool.h"
     26 #include "core/slice.h"
     27 #include "obj/coff/coff.h"
     28 #include "obj/coff/read_util.h"
     29 #include "obj/format.h"
     30 
     31 static Sym intern(Compiler* c, const char* s, u32 n) {
     32   return n ? pool_intern_slice(c->global, (Slice){.s = s, .len = n}) : 0;
     33 }
     34 
     35 /* ---- exports -> dynsyms + soname ----
     36  * Mirrors read_coff_dso's export-directory walk, but emits ObjImageSym
     37  * entries (defined, value = ImageBase + func RVA) and sets the DLL's own
     38  * Name as the image soname.  Lenient: any out-of-range sub-table aborts the
     39  * export view rather than panicking. */
     40 static void read_pe_exports(Compiler* c, ObjImage* im, const u8* data,
     41                             size_t len, const u8* shdrs, u16 nsec,
     42                             const u8* data_dir, u32 num_dirs, u64 image_base) {
     43   if ((u32)IMAGE_DIRECTORY_ENTRY_EXPORT >= num_dirs) return;
     44   const u8* dd =
     45       data_dir + IMAGE_DIRECTORY_ENTRY_EXPORT * COFF_DATA_DIRECTORY_SIZE;
     46   u32 export_rva = coff_rd_u32(dd);
     47   u32 export_size = coff_rd_u32(dd + 4);
     48   if (!export_rva || !export_size) return;
     49 
     50   u64 exp_off;
     51   if (!coff_rva_to_offset(shdrs, nsec, export_rva, len, &exp_off)) return;
     52   if (exp_off + COFF_EXPORT_DIR_SIZE > len) return;
     53   const u8* ed = data + exp_off;
     54   u32 name_rva = coff_rd_u32(ed + 12);
     55   u32 num_funcs = coff_rd_u32(ed + 20);
     56   u32 num_names = coff_rd_u32(ed + 24);
     57   u32 eat_rva = coff_rd_u32(ed + 28);
     58   u32 ent_rva = coff_rd_u32(ed + 32);
     59   u32 ord_rva = coff_rd_u32(ed + 36);
     60 
     61   /* soname = the DLL's own Name (DT_SONAME / LC_ID_DYLIB analogue). */
     62   if (name_rva) {
     63     u64 noff;
     64     if (coff_rva_to_offset(shdrs, nsec, name_rva, len, &noff)) {
     65       const char* dn;
     66       u32 dl = coff_read_cstr(data, len, noff, &dn);
     67       if (dl) obj_image_set_soname(im, intern(c, dn, dl));
     68     }
     69   }
     70 
     71   if (!num_names) return;
     72   u64 eat_off, ent_off, ord_off;
     73   if (!coff_rva_to_offset(shdrs, nsec, eat_rva, len, &eat_off)) return;
     74   if (!coff_rva_to_offset(shdrs, nsec, ent_rva, len, &ent_off)) return;
     75   if (!coff_rva_to_offset(shdrs, nsec, ord_rva, len, &ord_off)) return;
     76   if (ent_off + (u64)num_names * 4u > len ||
     77       ord_off + (u64)num_names * 2u > len)
     78     return;
     79   if (eat_off + (u64)num_funcs * 4u > len) return;
     80 
     81   for (u32 i = 0; i < num_names; ++i) {
     82     u32 nrva = coff_rd_u32(data + ent_off + (u64)i * 4u);
     83     u16 ord = coff_rd_u16(data + ord_off + (u64)i * 2u);
     84     if (ord >= num_funcs) continue; /* malformed; skip */
     85     u32 func_rva = coff_rd_u32(data + eat_off + (u64)ord * 4u);
     86     u64 noff;
     87     if (!coff_rva_to_offset(shdrs, nsec, nrva, len, &noff)) continue;
     88     const char* en;
     89     u32 el = coff_read_cstr(data, len, noff, &en);
     90     if (!el) continue;
     91 
     92     ObjImageSym ds;
     93     memset(&ds, 0, sizeof ds);
     94     ds.name = intern(c, en, el);
     95     ds.bind = SB_GLOBAL;
     96     ds.kind = SK_FUNC; /* forwarders point at the export-dir string; still SK_FUNC */
     97     ds.section = OBJ_SEC_NONE;
     98     ds.value = image_base + func_rva;
     99     obj_image_add_dynsym(im, &ds);
    100   }
    101 }
    102 
    103 /* ---- imports -> deps + undefined dynsyms ----
    104  * Walks the import directory descriptors.  Each provider DLL becomes one
    105  * ObjImageDep carrying its imported-name list; every by-name import also
    106  * lands as an undefined ObjImageSym so -T lists imports like ELF .dynsym.
    107  * By-ordinal imports are not named in v1 and are skipped. */
    108 static void read_pe_imports(Compiler* c, ObjImage* im, const u8* data,
    109                             size_t len, const u8* shdrs, u16 nsec,
    110                             const u8* data_dir, u32 num_dirs) {
    111   if ((u32)IMAGE_DIRECTORY_ENTRY_IMPORT >= num_dirs) return;
    112   const u8* dd =
    113       data_dir + IMAGE_DIRECTORY_ENTRY_IMPORT * COFF_DATA_DIRECTORY_SIZE;
    114   u32 imp_rva = coff_rd_u32(dd);
    115   if (!imp_rva) return;
    116   u64 imp_off;
    117   if (!coff_rva_to_offset(shdrs, nsec, imp_rva, len, &imp_off)) return;
    118 
    119   for (u32 d = 0;; ++d) {
    120     u64 desc = imp_off + (u64)d * COFF_IMPORT_DESCRIPTOR_SIZE;
    121     if (desc + COFF_IMPORT_DESCRIPTOR_SIZE > len) break;
    122     u32 oft = coff_rd_u32(data + desc + 0);       /* OriginalFirstThunk (ILT) */
    123     u32 dll_name_rva = coff_rd_u32(data + desc + 12);
    124     u32 ft = coff_rd_u32(data + desc + 16);       /* FirstThunk (IAT) */
    125     if (oft == 0 && dll_name_rva == 0 && ft == 0) break; /* null terminator */
    126     if (dll_name_rva == 0) continue;
    127 
    128     u64 noff;
    129     if (!coff_rva_to_offset(shdrs, nsec, dll_name_rva, len, &noff)) continue;
    130     const char* dll;
    131     u32 dll_len = coff_read_cstr(data, len, noff, &dll);
    132     if (!dll_len) continue;
    133     Sym dep_name = intern(c, dll, dll_len);
    134 
    135     /* Prefer the ILT (OriginalFirstThunk); fall back to the IAT when the
    136      * image was bound and the ILT is absent. */
    137     u32 thunk_rva = oft ? oft : ft;
    138     Sym* imports = NULL;
    139     u32 nimports = 0, cap = 0;
    140     u64 toff;
    141     if (thunk_rva && coff_rva_to_offset(shdrs, nsec, thunk_rva, len, &toff)) {
    142       for (u32 t = 0;; ++t) {
    143         u64 te = toff + (u64)t * COFF_THUNK_DATA64_SIZE;
    144         if (te + COFF_THUNK_DATA64_SIZE > len) break;
    145         u64 thunk = coff_rd_u64(data + te);
    146         if (thunk == 0) break;                    /* null-terminated table */
    147         if (thunk & IMAGE_ORDINAL_FLAG64) continue; /* by-ordinal: skip (v1) */
    148         u32 ibn_rva = (u32)(thunk & 0x7fffffffu);
    149         u64 hoff;
    150         if (!coff_rva_to_offset(shdrs, nsec, ibn_rva, len, &hoff)) continue;
    151         /* IMAGE_IMPORT_BY_NAME: u16 Hint, then NUL-terminated name. */
    152         const char* inm;
    153         u32 il = coff_read_cstr(data, len, hoff + 2u, &inm);
    154         if (!il) continue;
    155         Sym isym = intern(c, inm, il);
    156 
    157         if (nimports == cap) {
    158           u32 ncap = cap ? cap * 2u : 8u;
    159           Sym* grown = arena_array(c->scratch, Sym, ncap);
    160           if (nimports) memcpy(grown, imports, sizeof(Sym) * nimports);
    161           imports = grown;
    162           cap = ncap;
    163         }
    164         imports[nimports++] = isym;
    165 
    166         ObjImageSym us;
    167         memset(&us, 0, sizeof us);
    168         us.name = isym;
    169         us.bind = SB_GLOBAL;
    170         us.kind = SK_NOTYPE; /* PE import descriptors don't distinguish func/data */
    171         us.section = OBJ_SEC_NONE;
    172         obj_image_add_dynsym(im, &us);
    173       }
    174     }
    175 
    176     ObjImageDep dep;
    177     dep.name = dep_name;
    178     dep.imports = imports;  /* transient scratch; add_dep deep-copies */
    179     dep.nimports = nimports;
    180     obj_image_add_dep(im, &dep);
    181   }
    182 }
    183 
    184 /* ---- base relocations -> dynrelocs ----
    185  * Walks the .reloc base-relocation blocks.  Each DIR64/HIGHLOW fixup is a
    186  * symbol-less load-bias adjustment, mapped to the arch's RELATIVE kind.
    187  * ABSOLUTE entries are block padding and skipped. */
    188 static void read_pe_basereloc(ObjImage* im, const u8* data, size_t len,
    189                               const u8* shdrs, u16 nsec, const u8* data_dir,
    190                               u32 num_dirs, u64 image_base,
    191                               RelocKind relative_kind) {
    192   if ((u32)IMAGE_DIRECTORY_ENTRY_BASERELOC >= num_dirs) return;
    193   const u8* dd =
    194       data_dir + IMAGE_DIRECTORY_ENTRY_BASERELOC * COFF_DATA_DIRECTORY_SIZE;
    195   u32 rel_rva = coff_rd_u32(dd);
    196   u32 rel_size = coff_rd_u32(dd + 4);
    197   if (!rel_rva || !rel_size) return;
    198   u64 rel_off;
    199   if (!coff_rva_to_offset(shdrs, nsec, rel_rva, len, &rel_off)) return;
    200   u64 end = rel_off + rel_size;
    201   if (end > len) end = len;
    202 
    203   u64 pos = rel_off;
    204   while (pos + COFF_BASE_RELOCATION_SIZE <= end) {
    205     u32 page_rva = coff_rd_u32(data + pos + 0);
    206     u32 block_size = coff_rd_u32(data + pos + 4);
    207     if (block_size < COFF_BASE_RELOCATION_SIZE) break; /* malformed */
    208     if (pos + block_size > end) block_size = (u32)(end - pos);
    209     u32 nent = (block_size - COFF_BASE_RELOCATION_SIZE) / 2u;
    210     for (u32 e = 0; e < nent; ++e) {
    211       u16 ent = coff_rd_u16(data + pos + COFF_BASE_RELOCATION_SIZE + (u64)e * 2u);
    212       u32 type = (u32)ent >> 12;
    213       u32 off = (u32)ent & 0x0fffu;
    214       if (type == IMAGE_REL_BASED_ABSOLUTE) continue; /* padding */
    215       if (type != IMAGE_REL_BASED_DIR64 && type != IMAGE_REL_BASED_HIGHLOW)
    216         continue;
    217       ObjImageReloc dr;
    218       memset(&dr, 0, sizeof dr);
    219       dr.section = OBJ_SEC_NONE; /* offset is a vaddr */
    220       dr.offset = image_base + page_rva + off;
    221       dr.kind = relative_kind;
    222       obj_image_add_dynreloc(im, &dr);
    223     }
    224     pos += block_size;
    225   }
    226 }
    227 
    228 ObjBuilder* read_coff_image(Compiler* c, const char* name, const u8* data,
    229                             size_t len) {
    230   (void)name;
    231 
    232   /* ---- DOS header + PE signature (truncation panics) ---- */
    233   if (len < COFF_DOS_HEADER_SIZE)
    234     compiler_panic(c, SRCLOC_NONE,
    235                    "read_coff_image: input shorter than DOS header");
    236   if (coff_rd_u16(data + 0) != IMAGE_DOS_SIGNATURE)
    237     compiler_panic(c, SRCLOC_NONE, "read_coff_image: bad DOS magic");
    238   u32 e_lfanew = coff_rd_u32(data + 60);
    239   u64 nt_end =
    240       (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + COFF_OPT_HDR64_SIZE;
    241   if (nt_end > len)
    242     compiler_panic(c, SRCLOC_NONE,
    243                    "read_coff_image: PE headers extend past end of file");
    244   if (coff_rd_u32(data + e_lfanew) != IMAGE_NT_SIGNATURE)
    245     compiler_panic(c, SRCLOC_NONE, "read_coff_image: bad PE signature");
    246 
    247   /* ---- IMAGE_FILE_HEADER ---- */
    248   const u8* fh = data + e_lfanew + 4u;
    249   u16 machine = coff_rd_u16(fh + 0);
    250   u16 nsec = coff_rd_u16(fh + 2);
    251   u16 size_of_opt = coff_rd_u16(fh + 16);
    252   u16 chars = coff_rd_u16(fh + 18);
    253   if (machine != IMAGE_FILE_MACHINE_AMD64 &&
    254       machine != IMAGE_FILE_MACHINE_ARM64 &&
    255       machine != IMAGE_FILE_MACHINE_ARM64EC)
    256     compiler_panic(c, SRCLOC_NONE, "read_coff_image: unsupported machine %#x",
    257                    (u32)machine);
    258   if (size_of_opt < COFF_OPT_HDR64_SIZE)
    259     compiler_panic(c, SRCLOC_NONE,
    260                    "read_coff_image: optional header %u too small for PE32+",
    261                    (u32)size_of_opt);
    262 
    263   /* ---- IMAGE_OPTIONAL_HEADER64 ---- */
    264   const u8* oh = fh + COFF_FILE_HEADER_SIZE;
    265   if (coff_rd_u16(oh + 0) != IMAGE_NT_OPTIONAL_HDR64_MAGIC)
    266     compiler_panic(c, SRCLOC_NONE, "read_coff_image: not PE32+");
    267   u32 entry_rva = coff_rd_u32(oh + 16);
    268   u64 image_base = coff_rd_u64(oh + 24);
    269   u32 sect_align = coff_rd_u32(oh + 32);
    270   u16 subsystem = coff_rd_u16(oh + 68);
    271   u16 dllchars = coff_rd_u16(oh + 70);
    272   u32 num_dirs = coff_rd_u32(oh + 108);
    273   if (num_dirs > COFF_NUM_DATA_DIRECTORIES) num_dirs = COFF_NUM_DATA_DIRECTORIES;
    274   const u8* data_dir = oh + COFF_OPT_HDR64_SIZE -
    275                        COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE;
    276 
    277   /* ---- section table ---- */
    278   u64 shdrs_off = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + size_of_opt;
    279   if (shdrs_off + (u64)nsec * COFF_SECTION_HEADER_SIZE > len)
    280     compiler_panic(c, SRCLOC_NONE,
    281                    "read_coff_image: section table extends past end of file");
    282   const u8* shdrs = data + shdrs_off;
    283 
    284   /* Arch ops resolve the RELATIVE base-reloc kind (machine validated above). */
    285   const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_COFF);
    286   const ObjCoffArchOps* aops =
    287       (fmt && fmt->coff_machine) ? fmt->coff_machine(machine) : NULL;
    288   if (!aops)
    289     compiler_panic(c, SRCLOC_NONE,
    290                    "read_coff_image: no arch impl for machine %#x",
    291                    (u32)machine);
    292   RelocKind relative_kind = (aops->arch == KIT_ARCH_X86_64)  ? R_X64_RELATIVE
    293                             : (aops->arch == KIT_ARCH_ARM_64) ? R_AARCH64_RELATIVE
    294                                                               : R_X64_RELATIVE;
    295 
    296   ObjBuilder* ob = obj_new(c);
    297   if (!ob) compiler_panic(c, SRCLOC_NONE, "read_coff_image: obj_new failed");
    298   ObjImage* im = obj_image_ensure(
    299       ob, (chars & IMAGE_FILE_DLL) ? OBJ_KIND_DYN : OBJ_KIND_EXEC);
    300   if (!im)
    301     compiler_panic(c, SRCLOC_NONE, "read_coff_image: obj_image_ensure failed");
    302   obj_image_set_base(im, image_base);
    303   obj_image_set_entry(im, entry_rva ? image_base + entry_rva : 0);
    304 
    305   /* ---- sections + segments (dual-emit) ---- */
    306   for (u16 i = 0; i < nsec; ++i) {
    307     const u8* sh = shdrs + (u64)i * COFF_SECTION_HEADER_SIZE;
    308     const char* raw = (const char*)sh; /* Name[8], NUL-padded (no long form) */
    309     u32 nlen = 0;
    310     while (nlen < 8 && raw[nlen] != '\0') ++nlen;
    311     u32 vsize = coff_rd_u32(sh + 8);
    312     u32 vaddr = coff_rd_u32(sh + 12);
    313     u32 rawsize = coff_rd_u32(sh + 16);
    314     u32 rawptr = coff_rd_u32(sh + 20);
    315     u32 ch = coff_rd_u32(sh + 36);
    316 
    317     Sym sn = intern(c, raw, nlen);
    318     u16 kind = coff_sec_kind(raw, nlen, ch);
    319     u16 flags = coff_sec_flags(raw, nlen, ch);
    320     u32 align = coff_sec_align(ch);
    321     int is_bss = (ch & IMAGE_SCN_CNT_UNINITIALIZED_DATA) != 0;
    322     u16 sem = is_bss ? SSEM_NOBITS : SSEM_PROGBITS;
    323 
    324     ObjSecId id =
    325         obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags, align, 0u, 0u, 0u);
    326     if (id != OBJ_SEC_NONE) {
    327       obj_section_set_ext(ob, id, OBJ_EXT_COFF, ch, 0);
    328       obj_section_set_addr(ob, id, image_base + vaddr);
    329       if (is_bss) {
    330         obj_reserve_bss(ob, id, vsize ? vsize : rawsize, align);
    331       } else if (rawsize) {
    332         /* Images FileAlignment-pad raw data; copy at most VirtualSize, and
    333          * clamp leniently to the file length (vs the strict .obj path). */
    334         u32 copy = rawsize;
    335         if (vsize && vsize < copy) copy = vsize;
    336         if ((u64)rawptr + copy > len)
    337           copy = (rawptr < len) ? (u32)(len - rawptr) : 0;
    338         if (copy) {
    339           u8* dst = obj_reserve(ob, id, copy);
    340           if (dst) memcpy(dst, data + rawptr, copy);
    341         }
    342       }
    343     }
    344 
    345     ObjSegment seg;
    346     memset(&seg, 0, sizeof seg);
    347     seg.name = sn;
    348     seg.vaddr = image_base + vaddr;
    349     seg.vsize = vsize;
    350     seg.file_off = rawptr;
    351     seg.file_size = rawsize;
    352     seg.perms = ((ch & IMAGE_SCN_MEM_READ) ? OBJ_SEG_R : 0u) |
    353                 ((ch & IMAGE_SCN_MEM_WRITE) ? OBJ_SEG_W : 0u) |
    354                 ((ch & IMAGE_SCN_MEM_EXECUTE) ? OBJ_SEG_X : 0u);
    355     seg.align = sect_align ? sect_align : 1u;
    356     obj_image_add_segment(im, &seg);
    357   }
    358 
    359   /* ---- raw escape-hatch entries: 16 data dirs + subsystem + dllchars ---- */
    360   for (u32 i = 0; i < COFF_NUM_DATA_DIRECTORIES; ++i) {
    361     const u8* e = data_dir + (u64)i * COFF_DATA_DIRECTORY_SIZE;
    362     ObjImageRaw r;
    363     r.tag = i;
    364     r.value = (i < num_dirs) ? coff_rd_u32(e) : 0;
    365     r.extra = (i < num_dirs) ? coff_rd_u32(e + 4) : 0;
    366     obj_image_add_raw(im, &r);
    367   }
    368   {
    369     ObjImageRaw r = {KIT_OBJ_RAW_PE_SUBSYSTEM, subsystem, 0};
    370     obj_image_add_raw(im, &r);
    371   }
    372   {
    373     ObjImageRaw r = {KIT_OBJ_RAW_PE_DLLCHARS, dllchars, 0};
    374     obj_image_add_raw(im, &r);
    375   }
    376 
    377   read_pe_exports(c, im, data, len, shdrs, nsec, data_dir, num_dirs, image_base);
    378   read_pe_imports(c, im, data, len, shdrs, nsec, data_dir, num_dirs);
    379   read_pe_basereloc(im, data, len, shdrs, nsec, data_dir, num_dirs, image_base,
    380                     relative_kind);
    381 
    382   obj_finalize(ob);
    383   return ob;
    384 }