kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

pe-image-read.c (13980B)


      1 /* PE32+ linked-image reader round-trip (read_coff_image, no external
      2  * toolchain).
      3  *
      4  * Links a tiny PIE executable in memory with kit's own COFF linker — a
      5  * .text entry plus a .data slot that takes an absolute (R_ABS64) reference
      6  * to an imported ExitProcess from KERNEL32.dll (via a short-import shim) —
      7  * then re-opens the emitted bytes through the public kit_obj_open and
      8  * asserts the neutral image view the reader populates:
      9  *   - kind EXEC, nonzero entry / image base
     10  *   - segments + sections (one per PE section, .text executable)
     11  *   - dependency KERNEL32.dll carrying the ExitProcess import
     12  *   - dynamic symbol ExitProcess (undefined import)
     13  *   - base relocation(s) for the absolute .data pointer (PIE)
     14  *   - raw escape hatch: 16 data directories + subsystem + dllchars,
     15  *     IMPORT directory populated
     16  *
     17  * Runs on every host (the reader is ours); covers both x86_64 and aarch64
     18  * Windows targets. */
     19 
     20 #include <kit/core.h>
     21 #include <kit/link.h>
     22 #include <kit/object.h>
     23 #include <setjmp.h>
     24 #include <stdarg.h>
     25 #include <stdio.h>
     26 #include <stdlib.h>
     27 #include <string.h>
     28 
     29 #include "core/core.h"
     30 #include "core/pool.h"
     31 #include "link/link.h"
     32 #include "obj/obj.h"
     33 
     34 /* ---- short-import wire constants (mirror pe-import-smoke.c). ---- */
     35 #define SHIM_HEADER_SIZE 20u
     36 #define SHIM_SYM_CSTR "ExitProcess"
     37 #define SHIM_DLL_CSTR "KERNEL32.dll"
     38 #define SHIM_SYM_NUL_LEN 12u /* "ExitProcess\0" */
     39 #define SHIM_DLL_NUL_LEN 13u /* "KERNEL32.dll\0" */
     40 #define SHIM_DATA_LEN (SHIM_SYM_NUL_LEN + SHIM_DLL_NUL_LEN)
     41 #define SHIM_TOTAL_LEN (SHIM_HEADER_SIZE + SHIM_DATA_LEN)
     42 #define COFF_SHIMP_SIG2 0xFFFFu
     43 /* TypeFlags = Type=CODE(0) | (NameType=NAME(1) << 2) = 0x0004. */
     44 #define COFF_SHIMP_TYPEFLAGS 0x0004u
     45 
     46 /* ---- env vtables --------------------------------------------------- */
     47 
     48 static void* heap_alloc(KitHeap* h, size_t n, size_t a) {
     49   (void)h;
     50   (void)a;
     51   return n ? malloc(n) : NULL;
     52 }
     53 static void* heap_realloc(KitHeap* h, void* p, size_t o, size_t n, size_t a) {
     54   (void)h;
     55   (void)o;
     56   (void)a;
     57   return realloc(p, n);
     58 }
     59 static void heap_free(KitHeap* h, void* p, size_t n) {
     60   (void)h;
     61   (void)n;
     62   free(p);
     63 }
     64 static KitHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL};
     65 
     66 static void diag_emit(KitDiagSink* s, KitDiagKind k, KitSrcLoc loc,
     67                       const char* fmt, va_list ap) {
     68   static const char* names[] = {"note", "warning", "error", "fatal"};
     69   (void)s;
     70   (void)loc;
     71   fprintf(stderr, "%s: ", names[k]);
     72   vfprintf(stderr, fmt, ap);
     73   fputc('\n', stderr);
     74 }
     75 static KitDiagSink g_diag = {diag_emit, NULL, 0, 0};
     76 static KitContext g_ctx;
     77 
     78 static int g_failures;
     79 static const char* g_case = "?";
     80 #define EXPECT(cond, ...)                                          \
     81   do {                                                             \
     82     if (!(cond)) {                                                 \
     83       fprintf(stderr, "FAIL [%s] %s:%d: ", g_case, __FILE__, __LINE__); \
     84       fprintf(stderr, __VA_ARGS__);                                \
     85       fputc('\n', stderr);                                         \
     86       g_failures++;                                                \
     87     }                                                              \
     88   } while (0)
     89 
     90 /* ---- target / compiler ------------------------------------------- */
     91 
     92 static void target_windows(KitTargetSpec* t, KitArchKind arch) {
     93   memset(t, 0, sizeof *t);
     94   t->arch = arch;
     95   t->os = KIT_OS_WINDOWS;
     96   t->obj = KIT_OBJ_COFF;
     97   t->ptr_size = 8;
     98   t->ptr_align = 8;
     99   t->big_endian = false;
    100   t->pic = KIT_PIC_PIE;
    101   t->code_model = KIT_CM_SMALL;
    102 }
    103 
    104 static Compiler* make_compiler(const KitTargetSpec* t) {
    105   KitTargetOptions opts;
    106   KitTarget* target = NULL;
    107   KitCompiler* cc = NULL;
    108   memset(&opts, 0, sizeof opts);
    109   opts.spec = *t;
    110   if (kit_target_new(&g_ctx, &opts, &target) != KIT_OK || !target) return NULL;
    111   if (kit_compiler_new(target, &g_ctx, &cc) != KIT_OK || !cc) {
    112     kit_target_free(target);
    113     return NULL;
    114   }
    115   return (Compiler*)cc;
    116 }
    117 
    118 static void free_compiler(Compiler* c) {
    119   const KitTarget* target;
    120   if (!c) return;
    121   target = kit_compiler_target((KitCompiler*)c);
    122   kit_compiler_free((KitCompiler*)c);
    123   kit_target_free((KitTarget*)target);
    124 }
    125 
    126 /* ---- short-import shim builder ------------------------------------ */
    127 
    128 static void build_short_import(uint8_t buf[SHIM_TOTAL_LEN], uint16_t machine) {
    129   memset(buf, 0, SHIM_TOTAL_LEN);
    130   buf[2] = (uint8_t)(COFF_SHIMP_SIG2 & 0xFF); /* Sig2 = 0xFFFF */
    131   buf[3] = (uint8_t)((COFF_SHIMP_SIG2 >> 8) & 0xFF);
    132   buf[6] = (uint8_t)(machine & 0xFF);
    133   buf[7] = (uint8_t)((machine >> 8) & 0xFF);
    134   buf[12] = (uint8_t)(SHIM_DATA_LEN & 0xFFu); /* SizeOfData */
    135   buf[13] = (uint8_t)((SHIM_DATA_LEN >> 8) & 0xFFu);
    136   buf[18] = (uint8_t)(COFF_SHIMP_TYPEFLAGS & 0xFF);
    137   buf[19] = (uint8_t)((COFF_SHIMP_TYPEFLAGS >> 8) & 0xFF);
    138   memcpy(buf + SHIM_HEADER_SIZE, SHIM_SYM_CSTR, SHIM_SYM_NUL_LEN);
    139   memcpy(buf + SHIM_HEADER_SIZE + SHIM_SYM_NUL_LEN, SHIM_DLL_CSTR,
    140          SHIM_DLL_NUL_LEN);
    141 }
    142 
    143 /* ---- program ObjBuilder ------------------------------------------- */
    144 
    145 /* mainCRTStartup body: a single return. The exact encoding is irrelevant
    146  * to the reader; differ per arch only so the linker sees plausible code. */
    147 static const uint8_t TEXT_X64[1] = {0xc3};                   /* ret */
    148 static const uint8_t TEXT_AA64[4] = {0xc0, 0x03, 0x5f, 0xd6}; /* ret */
    149 
    150 static ObjBuilder* build_program(Compiler* c, KitArchKind arch) {
    151   ObjBuilder* ob = obj_new(c);
    152   Pool* p = c->global;
    153   Sym text_name = pool_intern_slice(p, SLICE_LIT(".text"));
    154   Sym data_name = pool_intern_slice(p, SLICE_LIT(".data"));
    155   Sym main_name = pool_intern_slice(p, SLICE_LIT("mainCRTStartup"));
    156   Sym exit_name = pool_intern_slice(p, SLICE_LIT(SHIM_SYM_CSTR));
    157   const uint8_t* text = arch == KIT_ARCH_X86_64 ? TEXT_X64 : TEXT_AA64;
    158   u32 text_len = arch == KIT_ARCH_X86_64 ? (u32)sizeof TEXT_X64 : (u32)sizeof TEXT_AA64;
    159   ObjSecId tsec = obj_section(ob, text_name, SEC_TEXT, SF_ALLOC | SF_EXEC, 16);
    160   ObjSecId dsec = obj_section(ob, data_name, SEC_DATA, SF_ALLOC | SF_WRITE, 8);
    161   ObjSymId exit_sym;
    162   uint8_t zeros[8] = {0};
    163 
    164   obj_write(ob, tsec, text, text_len);
    165   obj_symbol(ob, main_name, SB_GLOBAL, SK_FUNC, tsec, 0, text_len);
    166 
    167   /* .data: an 8-byte absolute pointer to the imported ExitProcess. The
    168    * R_ABS64 both forces ExitProcess to be imported and (in a PIE) yields a
    169    * base relocation, so the reader's import + base-reloc paths both run. */
    170   exit_sym = obj_symbol(ob, exit_name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
    171   obj_write(ob, dsec, zeros, sizeof zeros);
    172   obj_reloc(ob, dsec, 0, R_ABS64, exit_sym, 0);
    173 
    174   obj_finalize(ob);
    175   return ob;
    176 }
    177 
    178 /* Link a PE image and copy the emitted bytes into a fresh malloc buffer
    179  * (so the reader runs fully independent of the producing compiler).
    180  * Returns NULL on failure. */
    181 static uint8_t* link_pe(Compiler* c, KitArchKind arch, uint16_t machine,
    182                         size_t* out_len) {
    183   ObjBuilder* prog = build_program(c, arch);
    184   uint8_t shim[SHIM_TOTAL_LEN];
    185   Linker* l;
    186   LinkImage* img;
    187   KitWriter* w = NULL;
    188   const uint8_t* bytes;
    189   size_t n = 0;
    190   uint8_t* copy = NULL;
    191 
    192   build_short_import(shim, machine);
    193 
    194   l = link_new(c);
    195   if (!l) return NULL;
    196   link_add_obj(l, prog);
    197   (void)link_add_obj_bytes(l, "ExitProcess.lib-member", shim, SHIM_TOTAL_LEN);
    198   link_set_entry(l, KIT_SLICE_LIT("mainCRTStartup"));
    199   link_set_pie(l, 1);
    200   link_set_emit_static_exe(l, 1);
    201 
    202   img = link_resolve(l);
    203   if (!img) {
    204     link_free(l);
    205     return NULL;
    206   }
    207   if (kit_writer_mem(&g_heap, &w) != KIT_OK || !w) {
    208     link_image_free(img);
    209     link_free(l);
    210     return NULL;
    211   }
    212   link_emit_image_writer(img, w);
    213   bytes = kit_writer_mem_bytes(w, &n);
    214   if (bytes && n) {
    215     copy = (uint8_t*)malloc(n);
    216     if (copy) memcpy(copy, bytes, n);
    217   }
    218   *out_len = n;
    219   kit_writer_close(w);
    220   link_image_free(img);
    221   link_free(l);
    222   return copy;
    223 }
    224 
    225 /* ---- the round-trip assertions ------------------------------------ */
    226 
    227 static void run_case(const char* name, KitArchKind arch, uint16_t machine) {
    228   Compiler* c;
    229   uint8_t* pe;
    230   size_t pe_len = 0;
    231   KitTargetSpec t;
    232   KitObjFile* f = NULL;
    233   KitSlice input;
    234   KitObjImageInfo info;
    235   KitStatus st;
    236 
    237   g_case = name;
    238   target_windows(&t, arch);
    239   c = make_compiler(&t);
    240   if (!c) {
    241     EXPECT(0, "make_compiler failed");
    242     return;
    243   }
    244   if (setjmp(c->panic)) {
    245     EXPECT(0, "panic while linking PE");
    246     compiler_run_cleanups(c);
    247     free_compiler(c);
    248     return;
    249   }
    250   pe = link_pe(c, arch, machine, &pe_len);
    251   free_compiler(c);
    252   if (!pe || !pe_len) {
    253     EXPECT(0, "link_pe produced no bytes");
    254     free(pe);
    255     return;
    256   }
    257 
    258   /* Detection should route the image to COFF/Windows. */
    259   EXPECT(kit_detect_fmt(pe, pe_len) == KIT_BIN_PE, "detect_fmt != KIT_BIN_PE");
    260 
    261   input.data = pe;
    262   input.len = pe_len;
    263   st = kit_obj_open(&g_ctx, KIT_SLICE_LIT("image.exe"), &input, &f);
    264   EXPECT(st == KIT_OK && f, "kit_obj_open failed (st=%d)", (int)st);
    265   if (!f) {
    266     free(pe);
    267     return;
    268   }
    269 
    270   EXPECT(kit_obj_kind(f) == KIT_OBJ_KIND_EXEC, "kind != EXEC (%d)",
    271          (int)kit_obj_kind(f));
    272 
    273   st = kit_obj_image_info(f, &info);
    274   EXPECT(st == KIT_OK, "image_info failed");
    275   EXPECT(info.image_base != 0, "image_base == 0");
    276   EXPECT(info.entry > info.image_base, "entry (%llu) not above base (%llu)",
    277          (unsigned long long)info.entry, (unsigned long long)info.image_base);
    278 
    279   /* Sections + a .text section. */
    280   {
    281     KitObjSection idx;
    282     EXPECT(kit_obj_nsections(f) > 0, "no sections");
    283     EXPECT(kit_obj_section_by_name(f, KIT_SLICE_LIT(".text"), &idx) == KIT_OK,
    284            ".text section not found");
    285   }
    286 
    287   /* Segments: at least one, with an executable one present. */
    288   {
    289     KitObjSegIter* it = NULL;
    290     KitObjSegInfo seg;
    291     int nseg = 0, nexec = 0;
    292     EXPECT(kit_obj_segiter_new(f, &it) == KIT_OK, "segiter_new failed");
    293     while (it && kit_obj_segiter_next(it, &seg) == KIT_ITER_ITEM) {
    294       ++nseg;
    295       if (seg.perms & KIT_SEG_X) ++nexec;
    296       EXPECT(seg.vaddr >= info.image_base, "segment vaddr below image base");
    297     }
    298     kit_obj_segiter_free(it);
    299     EXPECT(nseg > 0, "no segments");
    300     EXPECT(nexec > 0, "no executable segment");
    301   }
    302 
    303   /* Dependency KERNEL32.dll carrying the ExitProcess import. */
    304   {
    305     KitObjDepIter* it = NULL;
    306     KitObjDepInfo dep;
    307     int found_dll = 0, found_imp = 0;
    308     EXPECT(kit_obj_depiter_new(f, &it) == KIT_OK, "depiter_new failed");
    309     while (it && kit_obj_depiter_next(it, &dep) == KIT_ITER_ITEM) {
    310       if (kit_slice_eq_cstr(dep.name, SHIM_DLL_CSTR)) {
    311         found_dll = 1;
    312         for (uint32_t i = 0; i < dep.nimports; ++i)
    313           if (kit_slice_eq_cstr(dep.imports[i], SHIM_SYM_CSTR)) found_imp = 1;
    314       }
    315     }
    316     kit_obj_depiter_free(it);
    317     EXPECT(found_dll, "KERNEL32.dll dependency not found");
    318     EXPECT(found_imp, "ExitProcess import not listed under KERNEL32.dll");
    319   }
    320 
    321   /* Dynamic symbol ExitProcess (undefined import). */
    322   {
    323     KitObjSymIter* it = NULL;
    324     KitObjSymInfo sym;
    325     int found = 0;
    326     EXPECT(kit_obj_dynsymiter_new(f, &it) == KIT_OK, "dynsymiter_new failed");
    327     while (it && kit_obj_symiter_next(it, &sym) == KIT_ITER_ITEM)
    328       if (kit_slice_eq_cstr(sym.name, SHIM_SYM_CSTR)) found = 1;
    329     kit_obj_symiter_free(it);
    330     EXPECT(found, "ExitProcess not in dynamic symbols");
    331   }
    332 
    333   /* Raw escape hatch: 16 data dirs + subsystem + dllchars; IMPORT set. */
    334   {
    335     KitObjImageRawIter* it = NULL;
    336     KitObjImageRaw r;
    337     int ndatadir = 0, have_subsys = 0, have_dllchars = 0;
    338     uint64_t import_rva = 0;
    339     EXPECT(kit_obj_image_rawiter_new(f, &it) == KIT_OK, "rawiter_new failed");
    340     while (it && kit_obj_image_rawiter_next(it, &r) == KIT_ITER_ITEM) {
    341       if (r.tag < 16) {
    342         ++ndatadir;
    343         if (r.tag == 1) import_rva = r.value; /* IMAGE_DIRECTORY_ENTRY_IMPORT */
    344       } else if (r.tag == KIT_OBJ_RAW_PE_SUBSYSTEM) {
    345         have_subsys = 1;
    346         EXPECT(r.value == 3, "subsystem != WINDOWS_CUI (%llu)",
    347                (unsigned long long)r.value);
    348       } else if (r.tag == KIT_OBJ_RAW_PE_DLLCHARS) {
    349         have_dllchars = 1;
    350       }
    351     }
    352     kit_obj_image_rawiter_free(it);
    353     EXPECT(ndatadir == 16, "expected 16 data directories, saw %d", ndatadir);
    354     EXPECT(have_subsys, "subsystem raw entry missing");
    355     EXPECT(have_dllchars, "dllcharacteristics raw entry missing");
    356     EXPECT(import_rva != 0, "IMPORT data directory RVA is zero");
    357   }
    358 
    359   /* Base relocations: the PIE .data absolute pointer needs at least one. */
    360   {
    361     KitObjRelocIter* it = NULL;
    362     KitObjReloc rel;
    363     int n = 0;
    364     EXPECT(kit_obj_dynreliter_new(f, &it) == KIT_OK, "dynreliter_new failed");
    365     while (it && kit_obj_reliter_next(it, &rel) == KIT_ITER_ITEM) ++n;
    366     kit_obj_reliter_free(it);
    367     EXPECT(n > 0, "no base relocations for PIE image");
    368   }
    369 
    370   kit_obj_free(f);
    371   free(pe);
    372 }
    373 
    374 int main(int argc, char** argv) {
    375   memset(&g_ctx, 0, sizeof g_ctx);
    376   g_ctx.heap = &g_heap;
    377   g_ctx.diag = &g_diag;
    378   g_ctx.now = -1;
    379 
    380   /* Optional: regenerate the committed x86_64 PE objdump fixture (no
    381    * asserts). Used to produce test/objdump/x86_64-windows/cases/pe-image.exe
    382    * from this same in-memory link, so the non-gated objdump golden is
    383    * reproducible. */
    384   if (argc > 1) {
    385     KitTargetSpec t;
    386     Compiler* c;
    387     target_windows(&t, KIT_ARCH_X86_64);
    388     c = make_compiler(&t);
    389     if (c && setjmp(c->panic) == 0) {
    390       size_t n = 0;
    391       uint8_t* pe = link_pe(c, KIT_ARCH_X86_64, 0x8664u, &n);
    392       if (pe && n) {
    393         FILE* fp = fopen(argv[1], "wb");
    394         if (fp) {
    395           fwrite(pe, 1, n, fp);
    396           fclose(fp);
    397         }
    398         fprintf(stderr, "wrote %zu bytes to %s\n", n, argv[1]);
    399       }
    400       free(pe);
    401     }
    402     free_compiler(c);
    403     return 0;
    404   }
    405 
    406   run_case("x86_64-windows", KIT_ARCH_X86_64, 0x8664u);
    407   run_case("aarch64-windows", KIT_ARCH_ARM_64, 0xAA64u);
    408 
    409   if (g_failures) {
    410     fprintf(stderr, "FAILED %d assertion(s)\n", g_failures);
    411     return 1;
    412   }
    413   fprintf(stderr, "OK pe-image-read\n");
    414   return 0;
    415 }