pe-image-read.c (13980B)
1 /* PE32+ linked-image reader round-trip (read_coff_image, no external 2 * toolchain). 3 * 4 * Links a tiny PIE executable in memory with kit's own COFF linker — a 5 * .text entry plus a .data slot that takes an absolute (R_ABS64) reference 6 * to an imported ExitProcess from KERNEL32.dll (via a short-import shim) — 7 * then re-opens the emitted bytes through the public kit_obj_open and 8 * asserts the neutral image view the reader populates: 9 * - kind EXEC, nonzero entry / image base 10 * - segments + sections (one per PE section, .text executable) 11 * - dependency KERNEL32.dll carrying the ExitProcess import 12 * - dynamic symbol ExitProcess (undefined import) 13 * - base relocation(s) for the absolute .data pointer (PIE) 14 * - raw escape hatch: 16 data directories + subsystem + dllchars, 15 * IMPORT directory populated 16 * 17 * Runs on every host (the reader is ours); covers both x86_64 and aarch64 18 * Windows targets. */ 19 20 #include <kit/core.h> 21 #include <kit/link.h> 22 #include <kit/object.h> 23 #include <setjmp.h> 24 #include <stdarg.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 29 #include "core/core.h" 30 #include "core/pool.h" 31 #include "link/link.h" 32 #include "obj/obj.h" 33 34 /* ---- short-import wire constants (mirror pe-import-smoke.c). ---- */ 35 #define SHIM_HEADER_SIZE 20u 36 #define SHIM_SYM_CSTR "ExitProcess" 37 #define SHIM_DLL_CSTR "KERNEL32.dll" 38 #define SHIM_SYM_NUL_LEN 12u /* "ExitProcess\0" */ 39 #define SHIM_DLL_NUL_LEN 13u /* "KERNEL32.dll\0" */ 40 #define SHIM_DATA_LEN (SHIM_SYM_NUL_LEN + SHIM_DLL_NUL_LEN) 41 #define SHIM_TOTAL_LEN (SHIM_HEADER_SIZE + SHIM_DATA_LEN) 42 #define COFF_SHIMP_SIG2 0xFFFFu 43 /* TypeFlags = Type=CODE(0) | (NameType=NAME(1) << 2) = 0x0004. */ 44 #define COFF_SHIMP_TYPEFLAGS 0x0004u 45 46 /* ---- env vtables --------------------------------------------------- */ 47 48 static void* heap_alloc(KitHeap* h, size_t n, size_t a) { 49 (void)h; 50 (void)a; 51 return n ? malloc(n) : NULL; 52 } 53 static void* heap_realloc(KitHeap* h, void* p, size_t o, size_t n, size_t a) { 54 (void)h; 55 (void)o; 56 (void)a; 57 return realloc(p, n); 58 } 59 static void heap_free(KitHeap* h, void* p, size_t n) { 60 (void)h; 61 (void)n; 62 free(p); 63 } 64 static KitHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL}; 65 66 static void diag_emit(KitDiagSink* s, KitDiagKind k, KitSrcLoc loc, 67 const char* fmt, va_list ap) { 68 static const char* names[] = {"note", "warning", "error", "fatal"}; 69 (void)s; 70 (void)loc; 71 fprintf(stderr, "%s: ", names[k]); 72 vfprintf(stderr, fmt, ap); 73 fputc('\n', stderr); 74 } 75 static KitDiagSink g_diag = {diag_emit, NULL, 0, 0}; 76 static KitContext g_ctx; 77 78 static int g_failures; 79 static const char* g_case = "?"; 80 #define EXPECT(cond, ...) \ 81 do { \ 82 if (!(cond)) { \ 83 fprintf(stderr, "FAIL [%s] %s:%d: ", g_case, __FILE__, __LINE__); \ 84 fprintf(stderr, __VA_ARGS__); \ 85 fputc('\n', stderr); \ 86 g_failures++; \ 87 } \ 88 } while (0) 89 90 /* ---- target / compiler ------------------------------------------- */ 91 92 static void target_windows(KitTargetSpec* t, KitArchKind arch) { 93 memset(t, 0, sizeof *t); 94 t->arch = arch; 95 t->os = KIT_OS_WINDOWS; 96 t->obj = KIT_OBJ_COFF; 97 t->ptr_size = 8; 98 t->ptr_align = 8; 99 t->big_endian = false; 100 t->pic = KIT_PIC_PIE; 101 t->code_model = KIT_CM_SMALL; 102 } 103 104 static Compiler* make_compiler(const KitTargetSpec* t) { 105 KitTargetOptions opts; 106 KitTarget* target = NULL; 107 KitCompiler* cc = NULL; 108 memset(&opts, 0, sizeof opts); 109 opts.spec = *t; 110 if (kit_target_new(&g_ctx, &opts, &target) != KIT_OK || !target) return NULL; 111 if (kit_compiler_new(target, &g_ctx, &cc) != KIT_OK || !cc) { 112 kit_target_free(target); 113 return NULL; 114 } 115 return (Compiler*)cc; 116 } 117 118 static void free_compiler(Compiler* c) { 119 const KitTarget* target; 120 if (!c) return; 121 target = kit_compiler_target((KitCompiler*)c); 122 kit_compiler_free((KitCompiler*)c); 123 kit_target_free((KitTarget*)target); 124 } 125 126 /* ---- short-import shim builder ------------------------------------ */ 127 128 static void build_short_import(uint8_t buf[SHIM_TOTAL_LEN], uint16_t machine) { 129 memset(buf, 0, SHIM_TOTAL_LEN); 130 buf[2] = (uint8_t)(COFF_SHIMP_SIG2 & 0xFF); /* Sig2 = 0xFFFF */ 131 buf[3] = (uint8_t)((COFF_SHIMP_SIG2 >> 8) & 0xFF); 132 buf[6] = (uint8_t)(machine & 0xFF); 133 buf[7] = (uint8_t)((machine >> 8) & 0xFF); 134 buf[12] = (uint8_t)(SHIM_DATA_LEN & 0xFFu); /* SizeOfData */ 135 buf[13] = (uint8_t)((SHIM_DATA_LEN >> 8) & 0xFFu); 136 buf[18] = (uint8_t)(COFF_SHIMP_TYPEFLAGS & 0xFF); 137 buf[19] = (uint8_t)((COFF_SHIMP_TYPEFLAGS >> 8) & 0xFF); 138 memcpy(buf + SHIM_HEADER_SIZE, SHIM_SYM_CSTR, SHIM_SYM_NUL_LEN); 139 memcpy(buf + SHIM_HEADER_SIZE + SHIM_SYM_NUL_LEN, SHIM_DLL_CSTR, 140 SHIM_DLL_NUL_LEN); 141 } 142 143 /* ---- program ObjBuilder ------------------------------------------- */ 144 145 /* mainCRTStartup body: a single return. The exact encoding is irrelevant 146 * to the reader; differ per arch only so the linker sees plausible code. */ 147 static const uint8_t TEXT_X64[1] = {0xc3}; /* ret */ 148 static const uint8_t TEXT_AA64[4] = {0xc0, 0x03, 0x5f, 0xd6}; /* ret */ 149 150 static ObjBuilder* build_program(Compiler* c, KitArchKind arch) { 151 ObjBuilder* ob = obj_new(c); 152 Pool* p = c->global; 153 Sym text_name = pool_intern_slice(p, SLICE_LIT(".text")); 154 Sym data_name = pool_intern_slice(p, SLICE_LIT(".data")); 155 Sym main_name = pool_intern_slice(p, SLICE_LIT("mainCRTStartup")); 156 Sym exit_name = pool_intern_slice(p, SLICE_LIT(SHIM_SYM_CSTR)); 157 const uint8_t* text = arch == KIT_ARCH_X86_64 ? TEXT_X64 : TEXT_AA64; 158 u32 text_len = arch == KIT_ARCH_X86_64 ? (u32)sizeof TEXT_X64 : (u32)sizeof TEXT_AA64; 159 ObjSecId tsec = obj_section(ob, text_name, SEC_TEXT, SF_ALLOC | SF_EXEC, 16); 160 ObjSecId dsec = obj_section(ob, data_name, SEC_DATA, SF_ALLOC | SF_WRITE, 8); 161 ObjSymId exit_sym; 162 uint8_t zeros[8] = {0}; 163 164 obj_write(ob, tsec, text, text_len); 165 obj_symbol(ob, main_name, SB_GLOBAL, SK_FUNC, tsec, 0, text_len); 166 167 /* .data: an 8-byte absolute pointer to the imported ExitProcess. The 168 * R_ABS64 both forces ExitProcess to be imported and (in a PIE) yields a 169 * base relocation, so the reader's import + base-reloc paths both run. */ 170 exit_sym = obj_symbol(ob, exit_name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); 171 obj_write(ob, dsec, zeros, sizeof zeros); 172 obj_reloc(ob, dsec, 0, R_ABS64, exit_sym, 0); 173 174 obj_finalize(ob); 175 return ob; 176 } 177 178 /* Link a PE image and copy the emitted bytes into a fresh malloc buffer 179 * (so the reader runs fully independent of the producing compiler). 180 * Returns NULL on failure. */ 181 static uint8_t* link_pe(Compiler* c, KitArchKind arch, uint16_t machine, 182 size_t* out_len) { 183 ObjBuilder* prog = build_program(c, arch); 184 uint8_t shim[SHIM_TOTAL_LEN]; 185 Linker* l; 186 LinkImage* img; 187 KitWriter* w = NULL; 188 const uint8_t* bytes; 189 size_t n = 0; 190 uint8_t* copy = NULL; 191 192 build_short_import(shim, machine); 193 194 l = link_new(c); 195 if (!l) return NULL; 196 link_add_obj(l, prog); 197 (void)link_add_obj_bytes(l, "ExitProcess.lib-member", shim, SHIM_TOTAL_LEN); 198 link_set_entry(l, KIT_SLICE_LIT("mainCRTStartup")); 199 link_set_pie(l, 1); 200 link_set_emit_static_exe(l, 1); 201 202 img = link_resolve(l); 203 if (!img) { 204 link_free(l); 205 return NULL; 206 } 207 if (kit_writer_mem(&g_heap, &w) != KIT_OK || !w) { 208 link_image_free(img); 209 link_free(l); 210 return NULL; 211 } 212 link_emit_image_writer(img, w); 213 bytes = kit_writer_mem_bytes(w, &n); 214 if (bytes && n) { 215 copy = (uint8_t*)malloc(n); 216 if (copy) memcpy(copy, bytes, n); 217 } 218 *out_len = n; 219 kit_writer_close(w); 220 link_image_free(img); 221 link_free(l); 222 return copy; 223 } 224 225 /* ---- the round-trip assertions ------------------------------------ */ 226 227 static void run_case(const char* name, KitArchKind arch, uint16_t machine) { 228 Compiler* c; 229 uint8_t* pe; 230 size_t pe_len = 0; 231 KitTargetSpec t; 232 KitObjFile* f = NULL; 233 KitSlice input; 234 KitObjImageInfo info; 235 KitStatus st; 236 237 g_case = name; 238 target_windows(&t, arch); 239 c = make_compiler(&t); 240 if (!c) { 241 EXPECT(0, "make_compiler failed"); 242 return; 243 } 244 if (setjmp(c->panic)) { 245 EXPECT(0, "panic while linking PE"); 246 compiler_run_cleanups(c); 247 free_compiler(c); 248 return; 249 } 250 pe = link_pe(c, arch, machine, &pe_len); 251 free_compiler(c); 252 if (!pe || !pe_len) { 253 EXPECT(0, "link_pe produced no bytes"); 254 free(pe); 255 return; 256 } 257 258 /* Detection should route the image to COFF/Windows. */ 259 EXPECT(kit_detect_fmt(pe, pe_len) == KIT_BIN_PE, "detect_fmt != KIT_BIN_PE"); 260 261 input.data = pe; 262 input.len = pe_len; 263 st = kit_obj_open(&g_ctx, KIT_SLICE_LIT("image.exe"), &input, &f); 264 EXPECT(st == KIT_OK && f, "kit_obj_open failed (st=%d)", (int)st); 265 if (!f) { 266 free(pe); 267 return; 268 } 269 270 EXPECT(kit_obj_kind(f) == KIT_OBJ_KIND_EXEC, "kind != EXEC (%d)", 271 (int)kit_obj_kind(f)); 272 273 st = kit_obj_image_info(f, &info); 274 EXPECT(st == KIT_OK, "image_info failed"); 275 EXPECT(info.image_base != 0, "image_base == 0"); 276 EXPECT(info.entry > info.image_base, "entry (%llu) not above base (%llu)", 277 (unsigned long long)info.entry, (unsigned long long)info.image_base); 278 279 /* Sections + a .text section. */ 280 { 281 KitObjSection idx; 282 EXPECT(kit_obj_nsections(f) > 0, "no sections"); 283 EXPECT(kit_obj_section_by_name(f, KIT_SLICE_LIT(".text"), &idx) == KIT_OK, 284 ".text section not found"); 285 } 286 287 /* Segments: at least one, with an executable one present. */ 288 { 289 KitObjSegIter* it = NULL; 290 KitObjSegInfo seg; 291 int nseg = 0, nexec = 0; 292 EXPECT(kit_obj_segiter_new(f, &it) == KIT_OK, "segiter_new failed"); 293 while (it && kit_obj_segiter_next(it, &seg) == KIT_ITER_ITEM) { 294 ++nseg; 295 if (seg.perms & KIT_SEG_X) ++nexec; 296 EXPECT(seg.vaddr >= info.image_base, "segment vaddr below image base"); 297 } 298 kit_obj_segiter_free(it); 299 EXPECT(nseg > 0, "no segments"); 300 EXPECT(nexec > 0, "no executable segment"); 301 } 302 303 /* Dependency KERNEL32.dll carrying the ExitProcess import. */ 304 { 305 KitObjDepIter* it = NULL; 306 KitObjDepInfo dep; 307 int found_dll = 0, found_imp = 0; 308 EXPECT(kit_obj_depiter_new(f, &it) == KIT_OK, "depiter_new failed"); 309 while (it && kit_obj_depiter_next(it, &dep) == KIT_ITER_ITEM) { 310 if (kit_slice_eq_cstr(dep.name, SHIM_DLL_CSTR)) { 311 found_dll = 1; 312 for (uint32_t i = 0; i < dep.nimports; ++i) 313 if (kit_slice_eq_cstr(dep.imports[i], SHIM_SYM_CSTR)) found_imp = 1; 314 } 315 } 316 kit_obj_depiter_free(it); 317 EXPECT(found_dll, "KERNEL32.dll dependency not found"); 318 EXPECT(found_imp, "ExitProcess import not listed under KERNEL32.dll"); 319 } 320 321 /* Dynamic symbol ExitProcess (undefined import). */ 322 { 323 KitObjSymIter* it = NULL; 324 KitObjSymInfo sym; 325 int found = 0; 326 EXPECT(kit_obj_dynsymiter_new(f, &it) == KIT_OK, "dynsymiter_new failed"); 327 while (it && kit_obj_symiter_next(it, &sym) == KIT_ITER_ITEM) 328 if (kit_slice_eq_cstr(sym.name, SHIM_SYM_CSTR)) found = 1; 329 kit_obj_symiter_free(it); 330 EXPECT(found, "ExitProcess not in dynamic symbols"); 331 } 332 333 /* Raw escape hatch: 16 data dirs + subsystem + dllchars; IMPORT set. */ 334 { 335 KitObjImageRawIter* it = NULL; 336 KitObjImageRaw r; 337 int ndatadir = 0, have_subsys = 0, have_dllchars = 0; 338 uint64_t import_rva = 0; 339 EXPECT(kit_obj_image_rawiter_new(f, &it) == KIT_OK, "rawiter_new failed"); 340 while (it && kit_obj_image_rawiter_next(it, &r) == KIT_ITER_ITEM) { 341 if (r.tag < 16) { 342 ++ndatadir; 343 if (r.tag == 1) import_rva = r.value; /* IMAGE_DIRECTORY_ENTRY_IMPORT */ 344 } else if (r.tag == KIT_OBJ_RAW_PE_SUBSYSTEM) { 345 have_subsys = 1; 346 EXPECT(r.value == 3, "subsystem != WINDOWS_CUI (%llu)", 347 (unsigned long long)r.value); 348 } else if (r.tag == KIT_OBJ_RAW_PE_DLLCHARS) { 349 have_dllchars = 1; 350 } 351 } 352 kit_obj_image_rawiter_free(it); 353 EXPECT(ndatadir == 16, "expected 16 data directories, saw %d", ndatadir); 354 EXPECT(have_subsys, "subsystem raw entry missing"); 355 EXPECT(have_dllchars, "dllcharacteristics raw entry missing"); 356 EXPECT(import_rva != 0, "IMPORT data directory RVA is zero"); 357 } 358 359 /* Base relocations: the PIE .data absolute pointer needs at least one. */ 360 { 361 KitObjRelocIter* it = NULL; 362 KitObjReloc rel; 363 int n = 0; 364 EXPECT(kit_obj_dynreliter_new(f, &it) == KIT_OK, "dynreliter_new failed"); 365 while (it && kit_obj_reliter_next(it, &rel) == KIT_ITER_ITEM) ++n; 366 kit_obj_reliter_free(it); 367 EXPECT(n > 0, "no base relocations for PIE image"); 368 } 369 370 kit_obj_free(f); 371 free(pe); 372 } 373 374 int main(int argc, char** argv) { 375 memset(&g_ctx, 0, sizeof g_ctx); 376 g_ctx.heap = &g_heap; 377 g_ctx.diag = &g_diag; 378 g_ctx.now = -1; 379 380 /* Optional: regenerate the committed x86_64 PE objdump fixture (no 381 * asserts). Used to produce test/objdump/x86_64-windows/cases/pe-image.exe 382 * from this same in-memory link, so the non-gated objdump golden is 383 * reproducible. */ 384 if (argc > 1) { 385 KitTargetSpec t; 386 Compiler* c; 387 target_windows(&t, KIT_ARCH_X86_64); 388 c = make_compiler(&t); 389 if (c && setjmp(c->panic) == 0) { 390 size_t n = 0; 391 uint8_t* pe = link_pe(c, KIT_ARCH_X86_64, 0x8664u, &n); 392 if (pe && n) { 393 FILE* fp = fopen(argv[1], "wb"); 394 if (fp) { 395 fwrite(pe, 1, n, fp); 396 fclose(fp); 397 } 398 fprintf(stderr, "wrote %zu bytes to %s\n", n, argv[1]); 399 } 400 free(pe); 401 } 402 free_compiler(c); 403 return 0; 404 } 405 406 run_case("x86_64-windows", KIT_ARCH_X86_64, 0x8664u); 407 run_case("aarch64-windows", KIT_ARCH_ARM_64, 0xAA64u); 408 409 if (g_failures) { 410 fprintf(stderr, "FAILED %d assertion(s)\n", g_failures); 411 return 1; 412 } 413 fprintf(stderr, "OK pe-image-read\n"); 414 return 0; 415 }