kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

rv64_interp_smoke_test.c (19305B)


      1 /* Differential smoke test for the emu's INTERP execution mode
      2  * (doc/INTERPRETER.md Phase 4). Builds a tiny static rv64 ELF whose _start
      3  * computes a data-page address with auipc, stores a value there (SD), loads it
      4  * back (LD), and exits via ecall with the loaded value. The SD/LD exercise the
      5  * guest-memory helper path (__emu_*_checked) — the part a wrong "guest-VA
      6  * frame" model would corrupt — which the addi/ecall-only smoke fixture cannot
      7  * reach.
      8  *
      9  * The guest is run twice through the PUBLIC kit_emu_run API, once in JIT mode
     10  * and once in INTERP mode, BOTH at -O1, and the two exit codes are asserted
     11  * equal (and equal to the expected memory-derived value). This isolates
     12  * interpreter-vs-JIT semantics rather than O0-vs-O1.
     13  *
     14  * Self-contained: uses only the public API plus its own ELF builder and rv64
     15  * encoders, so it links cleanly against the (visibility-hidden) library objects
     16  * without reaching internal emu symbols. */
     17 
     18 #include <kit/compile.h>
     19 #include <kit/core.h>
     20 #include <kit/emu.h>
     21 #include <kit/jit.h>
     22 #include <stdint.h>
     23 #include <stdio.h>
     24 #include <stdlib.h>
     25 #include <string.h>
     26 #include <sys/mman.h>
     27 #include <unistd.h>
     28 
     29 #include "emu/emu.h"
     30 #include "lib/kit_unit.h"
     31 
     32 #if defined(__APPLE__)
     33 #include <mach/mach.h>
     34 #include <mach/mach_vm.h>
     35 #define XM_DUAL_APPLE 1
     36 #else
     37 #define XM_DUAL_APPLE 0
     38 #endif
     39 #if defined(__linux__)
     40 #include <sys/syscall.h>
     41 #define XM_DUAL_LINUX 1
     42 #else
     43 #define XM_DUAL_LINUX 0
     44 #endif
     45 
     46 /* Not in the public header; provided by the linked library objects. */
     47 EmuCPUState* emu_internal_cpu(KitEmu*);
     48 
     49 /* One shared test context replaces the per-file heap/diag/counter globals;
     50  * EXPECT aliases CU_EXPECT so the call sites below are unchanged. */
     51 static KitUnit g_u;
     52 #define EXPECT(cond, ...) CU_EXPECT(&g_u, cond, __VA_ARGS__)
     53 
     54 /* ---- Dual-mapped (W^X) exec memory for the per-block JIT image (still built
     55  * in INTERP mode to resolve helper externs). Same shape as rv64_smoke_test.
     56  * ---- */
     57 static int xm_to_posix(int p) {
     58   int q = 0;
     59   if (p & KIT_PROT_READ) q |= PROT_READ;
     60   if (p & KIT_PROT_WRITE) q |= PROT_WRITE;
     61   if (p & KIT_PROT_EXEC) q |= PROT_EXEC;
     62   return q;
     63 }
     64 
     65 typedef struct XmTok {
     66   void* w;
     67   void* r;
     68   size_t n;
     69 } XmTok;
     70 
     71 static KitStatus xm_reserve_single(size_t n, KitExecMemRegion* out) {
     72   void* m =
     73       mmap(NULL, n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
     74   if (m == MAP_FAILED) return KIT_NOMEM;
     75   out->write = m;
     76   out->runtime = m;
     77   out->size = n;
     78   out->token = NULL;
     79   return KIT_OK;
     80 }
     81 
     82 static KitStatus xm_reserve(void* user, size_t n, int prot,
     83                             KitExecMemRegion* out) {
     84   (void)user;
     85   if (!out || !n) return KIT_INVALID;
     86   if (!(prot & KIT_PROT_EXEC)) return xm_reserve_single(n, out);
     87 #if XM_DUAL_APPLE
     88   {
     89     void* w =
     90         mmap(NULL, n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
     91     mach_vm_address_t r = 0;
     92     vm_prot_t cur = 0, max = 0;
     93     XmTok* tok;
     94     if (w == MAP_FAILED) return KIT_NOMEM;
     95     if (mach_vm_remap(mach_task_self(), &r, (mach_vm_size_t)n, 0,
     96                       VM_FLAGS_ANYWHERE, mach_task_self(),
     97                       (mach_vm_address_t)(uintptr_t)w, FALSE, &cur, &max,
     98                       VM_INHERIT_NONE) != KERN_SUCCESS) {
     99       munmap(w, n);
    100       return KIT_NOMEM;
    101     }
    102     if (mprotect((void*)(uintptr_t)r, n, PROT_READ) != 0) {
    103       munmap((void*)(uintptr_t)r, n);
    104       munmap(w, n);
    105       return KIT_NOMEM;
    106     }
    107     tok = (XmTok*)malloc(sizeof(*tok));
    108     if (!tok) {
    109       munmap((void*)(uintptr_t)r, n);
    110       munmap(w, n);
    111       return KIT_NOMEM;
    112     }
    113     tok->w = w;
    114     tok->r = (void*)(uintptr_t)r;
    115     tok->n = n;
    116     out->write = w;
    117     out->runtime = (void*)(uintptr_t)r;
    118     out->size = n;
    119     out->token = tok;
    120     return KIT_OK;
    121   }
    122 #elif XM_DUAL_LINUX
    123   {
    124     int fd = (int)syscall(SYS_memfd_create, "kit-emu-interp-test", 0u);
    125     void *w, *r;
    126     XmTok* tok;
    127     if (fd < 0) return KIT_NOMEM;
    128     if (ftruncate(fd, (off_t)n) != 0) {
    129       close(fd);
    130       return KIT_NOMEM;
    131     }
    132     w = mmap(NULL, n, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    133     if (w == MAP_FAILED) {
    134       close(fd);
    135       return KIT_NOMEM;
    136     }
    137     r = mmap(NULL, n, PROT_READ, MAP_SHARED, fd, 0);
    138     close(fd);
    139     if (r == MAP_FAILED) {
    140       munmap(w, n);
    141       return KIT_NOMEM;
    142     }
    143     tok = (XmTok*)malloc(sizeof(*tok));
    144     if (!tok) {
    145       munmap(r, n);
    146       munmap(w, n);
    147       return KIT_NOMEM;
    148     }
    149     tok->w = w;
    150     tok->r = r;
    151     tok->n = n;
    152     out->write = w;
    153     out->runtime = r;
    154     out->size = n;
    155     out->token = tok;
    156     return KIT_OK;
    157   }
    158 #else
    159   return xm_reserve_single(n, out);
    160 #endif
    161 }
    162 
    163 static KitStatus xm_protect(void* user, void* addr, size_t n, int prot) {
    164   (void)user;
    165   return mprotect(addr, n, xm_to_posix(prot)) == 0 ? KIT_OK : KIT_IO;
    166 }
    167 
    168 static void xm_release(void* user, KitExecMemRegion* r) {
    169   (void)user;
    170   if (!r || !r->size) return;
    171   if (r->token) {
    172     XmTok* tok = (XmTok*)r->token;
    173     if (tok->r && tok->r != tok->w) munmap(tok->r, tok->n);
    174     if (tok->w) munmap(tok->w, tok->n);
    175     free(tok);
    176   } else if (r->write) {
    177     munmap(r->write, r->size);
    178   }
    179   memset(r, 0, sizeof(*r));
    180 }
    181 
    182 static void xm_flush(void* user, void* addr, size_t n) {
    183   (void)user;
    184 #if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
    185   __builtin___clear_cache((char*)addr, (char*)addr + n);
    186 #else
    187   (void)addr;
    188   (void)n;
    189 #endif
    190 }
    191 
    192 static KitExecMem g_execmem = {
    193     16 * 1024, xm_reserve, xm_protect, xm_release, xm_flush, NULL,
    194 };
    195 
    196 static KitCompiler* new_host_compiler(void) {
    197   KitArchKind arch;
    198   KitOSKind os;
    199   KitObjFmt obj;
    200   KitTargetSpec t;
    201   KitCompiler* c = NULL;
    202 #if defined(__x86_64__) || defined(_M_X64)
    203   arch = KIT_ARCH_X86_64;
    204 #elif defined(__aarch64__) || defined(_M_ARM64)
    205   arch = KIT_ARCH_ARM_64;
    206 #elif defined(__riscv) && __riscv_xlen == 64
    207   arch = KIT_ARCH_RV64;
    208 #else
    209   return NULL;
    210 #endif
    211 #if defined(__APPLE__)
    212   os = KIT_OS_MACOS;
    213   obj = KIT_OBJ_MACHO;
    214 #elif defined(__linux__)
    215   os = KIT_OS_LINUX;
    216   obj = KIT_OBJ_ELF;
    217 #else
    218   return NULL;
    219 #endif
    220   t = kit_unit_target(arch, os, obj);
    221   if (kit_unit_compiler_new(&g_u, t, &c) != KIT_OK || !c) {
    222     fprintf(stderr, "host compiler_new failed\n");
    223     exit(2);
    224   }
    225   return c;
    226 }
    227 
    228 /* ---- Minimal rv64 ELF + instruction encoders (self-contained) ---- */
    229 static void put16(unsigned char* b, size_t off, unsigned v) {
    230   b[off + 0] = (unsigned char)v;
    231   b[off + 1] = (unsigned char)(v >> 8);
    232 }
    233 static void put32(unsigned char* b, size_t off, uint32_t v) {
    234   b[off + 0] = (unsigned char)v;
    235   b[off + 1] = (unsigned char)(v >> 8);
    236   b[off + 2] = (unsigned char)(v >> 16);
    237   b[off + 3] = (unsigned char)(v >> 24);
    238 }
    239 static void put64(unsigned char* b, size_t off, uint64_t v) {
    240   size_t i;
    241   for (i = 0; i < 8; ++i) b[off + i] = (unsigned char)(v >> (8 * i));
    242 }
    243 
    244 /* The rv64 emu lifter implements a minimal op set (src/arch/rv64/emu.c):
    245  * ADDI, ADD, AUIPC, LD, SD, JALR, ECALL. The guests below use only those —
    246  * calls go through JALR, and constants come from LD'd data, not LUI. */
    247 enum {
    248   RV_ZERO = 0,
    249   RV_RA = 1,
    250   RV_T0 = 5,
    251   RV_T1 = 6,
    252   RV_T2 = 7,
    253   RV_A0 = 10,
    254   RV_A7 = 17
    255 };
    256 
    257 static uint32_t enc_addi(uint32_t rd, uint32_t rs1, int32_t imm) {
    258   return (((uint32_t)imm & 0xfffu) << 20) | (rs1 << 15) | (0x0u << 12) |
    259          (rd << 7) | 0x13u;
    260 }
    261 static uint32_t enc_auipc(uint32_t rd, uint32_t imm20) {
    262   return ((imm20 & 0xfffffu) << 12) | (rd << 7) | 0x17u;
    263 }
    264 static uint32_t enc_ld(uint32_t rd, uint32_t rs1, int32_t imm) {
    265   return (((uint32_t)imm & 0xfffu) << 20) | (rs1 << 15) | (0x3u << 12) |
    266          (rd << 7) | 0x03u;
    267 }
    268 static uint32_t enc_sd(uint32_t rs2, uint32_t rs1, int32_t imm) {
    269   uint32_t u = (uint32_t)imm;
    270   return (((u >> 5) & 0x7fu) << 25) | (rs2 << 20) | (rs1 << 15) | (0x3u << 12) |
    271          ((u & 0x1fu) << 7) | 0x23u;
    272 }
    273 static uint32_t enc_jalr(uint32_t rd, uint32_t rs1, int32_t imm) {
    274   return (((uint32_t)imm & 0xfffu) << 20) | (rs1 << 15) | (0x0u << 12) |
    275          (rd << 7) | 0x67u;
    276 }
    277 static uint32_t enc_ecall(void) { return 0x00000073u; }
    278 
    279 /* Build a 2-segment rv64 ELF (text R+X at 0x10000, data R+W at 0x20000) whose
    280  * _start does: SD value -> data; LD data -> a0; exit(a0). Returns a heap buffer
    281  * (free with free()); *out_len gets the size. */
    282 static unsigned char* build_sd_ld_elf(size_t* out_len, unsigned value) {
    283   enum {
    284     PAGE = 0x1000u,
    285     TEXT_VA = 0x10000ull,
    286     TEXT_OFF = 0x1000u,
    287     DATA_VA = 0x20000ull,
    288     DATA_OFF = 0x2000u,
    289     DATA_LEN = 0x1000u,
    290   };
    291   uint64_t entry = TEXT_VA + TEXT_OFF;
    292   int64_t delta = (int64_t)DATA_VA - (int64_t)entry; /* auipc is at entry */
    293   uint32_t hi20 = (uint32_t)(((uint64_t)(delta + 0x800)) >> 12) & 0xfffffu;
    294   int32_t lo12 = (int32_t)(delta - ((int64_t)hi20 << 12));
    295   size_t text_len;
    296   size_t total = DATA_OFF + DATA_LEN;
    297   unsigned char* b = (unsigned char*)calloc(1, total);
    298   uint32_t code[8];
    299   size_t n = 0;
    300   if (!b) return NULL;
    301 
    302   code[n++] = enc_auipc(RV_T0, hi20);               /* t0 = hi(data) */
    303   code[n++] = enc_addi(RV_T0, RV_T0, lo12);         /* t0 = &data */
    304   code[n++] = enc_addi(RV_T1, RV_ZERO, (int)value); /* t1 = value */
    305   code[n++] = enc_sd(RV_T1, RV_T0, 0);              /* [data] = t1 */
    306   code[n++] = enc_ld(RV_A0, RV_T0, 0);              /* a0 = [data] */
    307   code[n++] = enc_addi(RV_A7, RV_ZERO, 94);         /* a7 = exit_group */
    308   code[n++] = enc_ecall();                          /* exit(a0) */
    309   text_len = n * 4u;
    310 
    311   /* ELF64 header. */
    312   b[0] = 0x7f;
    313   b[1] = 'E';
    314   b[2] = 'L';
    315   b[3] = 'F';
    316   b[4] = 2;            /* ELFCLASS64 */
    317   b[5] = 1;            /* ELFDATA2LSB */
    318   b[6] = 1;            /* EV_CURRENT */
    319   b[7] = 0;            /* ELFOSABI_NONE */
    320   put16(b, 16, 2);     /* e_type = ET_EXEC */
    321   put16(b, 18, 243);   /* e_machine = EM_RISCV */
    322   put32(b, 20, 1);     /* e_version */
    323   put64(b, 24, entry); /* e_entry */
    324   put64(b, 32, 64);    /* e_phoff */
    325   put64(b, 40, 0);     /* e_shoff */
    326   put32(b, 48, 0);     /* e_flags */
    327   put16(b, 52, 64);    /* e_ehsize */
    328   put16(b, 54, 56);    /* e_phentsize */
    329   put16(b, 56, 2);     /* e_phnum */
    330   put16(b, 58, 0);     /* e_shentsize */
    331   put16(b, 60, 0);     /* e_shnum */
    332   put16(b, 62, 0);     /* e_shstrndx */
    333 
    334   /* PT_LOAD #0: text, R+X, [0, TEXT_OFF+text_len) at VA TEXT_VA. */
    335   put32(b, 64 + 0, 1);                    /* p_type = PT_LOAD */
    336   put32(b, 64 + 4, 0x4u | 0x1u);          /* p_flags = PF_R|PF_X */
    337   put64(b, 64 + 8, 0);                    /* p_offset */
    338   put64(b, 64 + 16, TEXT_VA);             /* p_vaddr */
    339   put64(b, 64 + 24, TEXT_VA);             /* p_paddr */
    340   put64(b, 64 + 32, TEXT_OFF + text_len); /* p_filesz */
    341   put64(b, 64 + 40, TEXT_OFF + text_len); /* p_memsz */
    342   put64(b, 64 + 48, PAGE);                /* p_align */
    343 
    344   /* PT_LOAD #1: data, R+W, [DATA_OFF, DATA_OFF+DATA_LEN) at VA DATA_VA. */
    345   put32(b, 120 + 0, 1);           /* p_type = PT_LOAD */
    346   put32(b, 120 + 4, 0x4u | 0x2u); /* p_flags = PF_R|PF_W */
    347   put64(b, 120 + 8, DATA_OFF);    /* p_offset */
    348   put64(b, 120 + 16, DATA_VA);    /* p_vaddr */
    349   put64(b, 120 + 24, DATA_VA);    /* p_paddr */
    350   put64(b, 120 + 32, DATA_LEN);   /* p_filesz */
    351   put64(b, 120 + 40, DATA_LEN);   /* p_memsz */
    352   put64(b, 120 + 48, PAGE);       /* p_align */
    353 
    354   /* .text. */
    355   {
    356     size_t i;
    357     for (i = 0; i < n; ++i) put32(b, TEXT_OFF + i * 4u, code[i]);
    358   }
    359 
    360   *out_len = total;
    361   return b;
    362 }
    363 
    364 /* Build a self-modifying rv64 ELF (single R+W+X segment at 0x10000, entry
    365  * 0x11000) to exercise the code-cache + interp-capture invalidation path. Uses
    366  * only lifter-supported ops (calls via JALR; the replacement instruction word
    367  * is LD'd from a data slot rather than built with LUI):
    368  *
    369  *   entry 0x11000:  auipc t0,0; addi t0,t0,0x100   # t0 = &target (0x11100)
    370  *                   jalr  ra, t0, 0                # 1st call -> translates
    371  * target, a0=1 0x1100c:        auipc t1,0; addi t1,t1,0x1f4   # t1 = &patchword
    372  * (0x11200) ld    t2, 0(t1)                # t2 = [addi a0,2 | jalr ra] sd t2,
    373  * 0(t0)                # overwrite target IN A TRANSLATED PAGE 0x1101c: jalr
    374  * ra, t0, 0                # 2nd call -> must re-decode, a0=2 0x11020: addi
    375  * a7,zero,94; ecall        # exit(a0) target 0x11100: addi a0,zero,1; jalr
    376  * zero,ra,0 # patched in place to addi a0,zero,2 patch  0x11200: u64 =
    377  * (jalr<<32)|addi-a0-2     # 8 bytes covering both target words
    378  *
    379  * The store to the already-translated code page bumps the addr-space generation
    380  * and flushes the code cache, so the 2nd call must run the FRESH block. Both
    381  * JIT and INTERP must exit 2; a stale interp-capture lookup would re-run the
    382  * old block and exit 1 — the divergence this guards. */
    383 static unsigned char* build_smc_elf(size_t* out_len) {
    384   enum {
    385     PAGE = 0x1000u,
    386     TEXT_VA = 0x10000ull,
    387     TEXT_OFF = 0x1000u,
    388     TARGET_OFF = 0x1100u, /* VA 0x11100 */
    389     PATCH_OFF = 0x1200u,  /* VA 0x11200 */
    390     TEXT_END = 0x1208u,   /* end of the 8-byte patch word */
    391   };
    392   uint64_t entry = TEXT_VA + TEXT_OFF;       /* 0x11000 */
    393   uint64_t target_va = TEXT_VA + TARGET_OFF; /* 0x11100 */
    394   uint64_t patch_va = TEXT_VA + PATCH_OFF;   /* 0x11200 */
    395   /* 8 bytes = [addi a0,zero,2 ; jalr zero,ra,0], so the SD rewrites target's
    396    * first word (1 -> 2) and preserves its ret. */
    397   uint64_t patch_word = ((uint64_t)enc_jalr(RV_ZERO, RV_RA, 0) << 32) |
    398                         (uint64_t)enc_addi(RV_A0, RV_ZERO, 2);
    399   unsigned char* b = (unsigned char*)calloc(1, TEXT_END);
    400   if (!b) return NULL;
    401 
    402   /* ELF64 header. */
    403   b[0] = 0x7f;
    404   b[1] = 'E';
    405   b[2] = 'L';
    406   b[3] = 'F';
    407   b[4] = 2;
    408   b[5] = 1;
    409   b[6] = 1;
    410   b[7] = 0;
    411   put16(b, 16, 2);   /* ET_EXEC */
    412   put16(b, 18, 243); /* EM_RISCV */
    413   put32(b, 20, 1);
    414   put64(b, 24, entry);
    415   put64(b, 32, 64); /* e_phoff */
    416   put64(b, 40, 0);
    417   put32(b, 48, 0);
    418   put16(b, 52, 64);
    419   put16(b, 54, 56);
    420   put16(b, 56, 1); /* e_phnum = 1 */
    421   put16(b, 58, 0);
    422   put16(b, 60, 0);
    423   put16(b, 62, 0);
    424 
    425   /* One PT_LOAD: R+W+X, [0, TEXT_END) at VA TEXT_VA (writable so the guest can
    426    * patch its own code; executable so it runs). */
    427   put32(b, 64 + 0, 1);                  /* PT_LOAD */
    428   put32(b, 64 + 4, 0x4u | 0x2u | 0x1u); /* PF_R|PF_W|PF_X */
    429   put64(b, 64 + 8, 0);                  /* p_offset */
    430   put64(b, 64 + 16, TEXT_VA);
    431   put64(b, 64 + 24, TEXT_VA);
    432   put64(b, 64 + 32, TEXT_END); /* p_filesz */
    433   put64(b, 64 + 40, TEXT_END); /* p_memsz */
    434   put64(b, 64 + 48, PAGE);
    435 
    436   /* entry: compute &target, call it, load patch word, patch, call again, exit.
    437    */
    438   put32(b, TEXT_OFF + 0x00, enc_auipc(RV_T0, 0)); /* t0 = 0x11000 */
    439   put32(
    440       b, TEXT_OFF + 0x04,
    441       enc_addi(RV_T0, RV_T0, (int32_t)(target_va - entry))); /* t0 = &target */
    442   put32(b, TEXT_OFF + 0x08, enc_jalr(RV_RA, RV_T0, 0));      /* call target */
    443   put32(b, TEXT_OFF + 0x0c, enc_auipc(RV_T1, 0));            /* t1 = 0x1100c */
    444   put32(b, TEXT_OFF + 0x10,
    445         enc_addi(RV_T1, RV_T1,
    446                  (int32_t)(patch_va - (entry + 0x0c)))); /* &patch */
    447   put32(b, TEXT_OFF + 0x14, enc_ld(RV_T2, RV_T1, 0));    /* t2 = patch word */
    448   put32(b, TEXT_OFF + 0x18, enc_sd(RV_T2, RV_T0, 0));    /* patch target page */
    449   put32(b, TEXT_OFF + 0x1c, enc_jalr(RV_RA, RV_T0, 0));  /* call target again */
    450   put32(b, TEXT_OFF + 0x20, enc_addi(RV_A7, RV_ZERO, 94));
    451   put32(b, TEXT_OFF + 0x24, enc_ecall());
    452 
    453   /* target. */
    454   put32(b, TARGET_OFF + 0x00, enc_addi(RV_A0, RV_ZERO, 1));
    455   put32(b, TARGET_OFF + 0x04, enc_jalr(RV_ZERO, RV_RA, 0));
    456 
    457   /* patch word data. */
    458   put64(b, PATCH_OFF, patch_word);
    459 
    460   *out_len = TEXT_END;
    461   return b;
    462 }
    463 
    464 /* Run the guest to completion in `mode` at -O1; returns the exit code, or -1 on
    465  * a non-OK run (and sets *ok = 0). */
    466 static int run_guest(const unsigned char* elf, size_t elf_len, KitEmuMode mode,
    467                      int* ok) {
    468   KitCompiler* c = new_host_compiler();
    469   KitJitHost host;
    470   KitEmuOptions opts;
    471   KitTargetSpec gt;
    472   int exit_code = -1;
    473   KitStatus st;
    474   long ps;
    475 
    476   *ok = 0;
    477   if (!c) return -1;
    478   ps = sysconf(_SC_PAGESIZE);
    479   if (ps > 0) g_execmem.page_size = (size_t)ps;
    480 
    481   memset(&host, 0, sizeof(host));
    482   host.execmem = &g_execmem;
    483   memset(&gt, 0, sizeof(gt));
    484   gt.arch = KIT_ARCH_RV64;
    485   gt.os = KIT_OS_LINUX;
    486   gt.obj = KIT_OBJ_ELF;
    487   gt.ptr_size = 8u;
    488   gt.ptr_align = 8u;
    489   memset(&opts, 0, sizeof(opts));
    490   opts.guest_bytes.data = elf;
    491   opts.guest_bytes.len = elf_len;
    492   opts.guest_target = gt;
    493   opts.has_guest_target = true;
    494   opts.jit_host = &host;
    495   opts.optimize = 1; /* both arms at -O1: isolate interp-vs-jit, not O0-vs-O1 */
    496   opts.mode = mode;
    497 
    498   /* Bounded stepping (instead of the unbounded kit_emu_run) so a guest that
    499    * fails to terminate surfaces as a finite failure, not a hang. */
    500   {
    501     KitEmu* emu = NULL;
    502     EmuCPUState* cpu;
    503     EmuTrapReason trap = EMU_TRAP_NONE;
    504     uint32_t i;
    505     enum { MAX_BLOCKS = 256u };
    506     st = kit_emu_new(c, &opts, &emu);
    507     if (st != KIT_OK || !emu) {
    508       kit_compiler_free(c);
    509       return -1;
    510     }
    511     for (i = 0; i < MAX_BLOCKS; ++i) {
    512       st = kit_emu_step(emu, 1);
    513       if (st != KIT_OK) break;
    514       cpu = emu_internal_cpu(emu);
    515       trap = emu_cpu_trap_reason(cpu);
    516       if (trap != EMU_TRAP_NONE) break;
    517     }
    518     cpu = emu_internal_cpu(emu);
    519     trap = emu_cpu_trap_reason(cpu);
    520     if (st == KIT_OK && trap == EMU_TRAP_EXIT) {
    521       exit_code = emu_cpu_exit_code(cpu);
    522       *ok = 1;
    523     } else {
    524       fprintf(stderr, "  run: st=%d trap=%d after %u blocks, pc=0x%llx\n",
    525               (int)st, (int)trap, i, (unsigned long long)emu_cpu_pc(cpu));
    526     }
    527     kit_emu_free(emu);
    528   }
    529   kit_compiler_free(c);
    530   return exit_code;
    531 }
    532 
    533 /* Run `elf` under both modes (both -O1), asserting both complete, both equal
    534  * `expect`, and the two agree. Takes ownership of `elf` (frees it). */
    535 static void check_differential(const char* name, unsigned char* elf,
    536                                size_t elf_len, int expect) {
    537   int jit_ok = 0, interp_ok = 0;
    538   int jit_exit, interp_exit;
    539 
    540   if (!elf) {
    541     EXPECT(0, "%s: ELF buffer allocation failed", name);
    542     return;
    543   }
    544   jit_exit = run_guest(elf, elf_len, KIT_EMU_MODE_JIT, &jit_ok);
    545   interp_exit = run_guest(elf, elf_len, KIT_EMU_MODE_INTERP, &interp_ok);
    546   free(elf);
    547 
    548   EXPECT(jit_ok, "%s: JIT run did not complete cleanly", name);
    549   EXPECT(interp_ok, "%s: INTERP run did not complete cleanly", name);
    550   EXPECT(jit_exit == expect, "%s: JIT exit should be %d, got %d", name, expect,
    551          jit_exit);
    552   EXPECT(interp_exit == expect, "%s: INTERP exit should be %d, got %d", name,
    553          expect, interp_exit);
    554   EXPECT(jit_exit == interp_exit,
    555          "%s: JIT vs INTERP exit codes differ: jit=%d interp=%d", name,
    556          jit_exit, interp_exit);
    557   if (jit_ok && interp_ok && jit_exit == expect && interp_exit == expect)
    558     fprintf(stderr, "PASS %s (jit=%d interp=%d)\n", name, jit_exit,
    559             interp_exit);
    560 }
    561 
    562 int main(void) {
    563   size_t len = 0;
    564 
    565   kit_unit_init(&g_u);
    566 
    567   /* (1) SD/LD/ecall: exercises the guest-memory helper path. */
    568   check_differential("sd-ld-ecall", build_sd_ld_elf(&len, 99u), len, 99);
    569 
    570   /* (2) Self-modifying code: exercises code-cache + interp-capture
    571    * invalidation (a stale interp lookup would make INTERP diverge from JIT). */
    572   check_differential("self-modifying-code", build_smc_elf(&len), len, 2);
    573 
    574   fprintf(stderr, "interp-emu-smoke: %d failure%s\n", g_u.fails,
    575           g_u.fails == 1 ? "" : "s");
    576   return g_u.fails ? 1 : 0;
    577 }