kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

runtime.c (15719B)


      1 /* Emulator runtime: code cache, reserved JIT VA region, runtime
      2  * helper trampolines, and the extern resolver that wires lifted
      3  * blocks to host helper addresses. The runtime is in-process — no
      4  * separate runtime object — so the JIT linker just hands back the
      5  * helper addresses through emu_runtime_extern_resolver.
      6  *
      7  * Block chaining lives here too (a runtime mprotect-and-patch pass
      8  * outside the linker) but lands with the per-ISA lifter; see
      9  * doc/EMU.md §6 for why it sits outside link/. */
     10 
     11 #include <string.h>
     12 
     13 #include "core/util.h"
     14 #include "emu/emu.h"
     15 
     16 /* ============================================================
     17  * Reserved code region
     18  * ============================================================
     19  * One up-front PROT_NONE reservation through the JitHost-supplied
     20  * execmem. The base address is fed to link_resolve_at as the image's
     21  * runtime VA; per-block link_resolve_extend bump-allocates within.
     22  * Pages are committed (protect to RX) lazily as blocks land — the
     23  * runtime flips them after the linker writes the section bytes and
     24  * applies relocations.
     25  */
     26 
     27 static u64 page_size_bytes(const KitExecMem* m) {
     28   return m->page_size ? (u64)m->page_size : 0x4000u;
     29 }
     30 
     31 struct EmuCodeRegion {
     32   Compiler* c;
     33   const KitExecMem* mem;   /* borrowed; outlives the region */
     34   KitExecMemRegion region; /* dual-aliased on hosts that support it */
     35   uintptr_t rx_end;        /* high-water of runtime-alias pages
     36                               currently flipped to RX */
     37 };
     38 
     39 EmuCodeRegion* emu_code_region_new(Compiler* c, const KitExecMem* mem,
     40                                    size_t reserve_size) {
     41   Heap* h;
     42   EmuCodeRegion* r;
     43   size_t aligned;
     44   KitExecMemRegion region;
     45 
     46   if (!c || !mem || !mem->reserve || !mem->protect || !mem->release) {
     47     return NULL;
     48   }
     49   h = c->ctx->heap;
     50   aligned = (size_t)ALIGN_UP((u64)reserve_size, page_size_bytes(mem));
     51 
     52   /* Reserve as a code region. The host returns dual-mapped memory
     53    * (writable alias / runtime alias) so the linker can write through
     54    * the write alias while the runtime alias starts read-only and is
     55    * flipped to RX page-by-page as cold blocks are committed. */
     56   if (mem->reserve(mem->user, aligned, KIT_PROT_READ | KIT_PROT_EXEC,
     57                    &region) != KIT_OK) {
     58     return NULL;
     59   }
     60 
     61   r = (EmuCodeRegion*)h->alloc(h, sizeof(*r), _Alignof(EmuCodeRegion));
     62   if (!r) {
     63     mem->release(mem->user, &region);
     64     return NULL;
     65   }
     66   r->c = c;
     67   r->mem = mem;
     68   r->region = region;
     69   r->rx_end = (uintptr_t)region.runtime;
     70   return r;
     71 }
     72 
     73 void emu_code_region_free(EmuCodeRegion* r) {
     74   Heap* h;
     75   if (!r) return;
     76   h = r->c->ctx->heap;
     77   if (r->region.size && r->mem && r->mem->release) {
     78     r->mem->release(r->mem->user, &r->region);
     79   }
     80   h->free(h, r, sizeof(*r));
     81 }
     82 
     83 uintptr_t emu_code_region_base(const EmuCodeRegion* r) {
     84   /* Runtime alias — what JIT'd block addresses must use. */
     85   return r ? (uintptr_t)r->region.runtime : 0;
     86 }
     87 
     88 size_t emu_code_region_size(const EmuCodeRegion* r) {
     89   return r ? r->region.size : 0;
     90 }
     91 
     92 void emu_code_region_commit_rx_to(EmuCodeRegion* r, uintptr_t end) {
     93   uintptr_t base, page_end;
     94   size_t len;
     95   if (!r || !r->mem) return;
     96   base = (uintptr_t)r->region.runtime;
     97   page_end = (uintptr_t)ALIGN_UP((u64)end, page_size_bytes(r->mem));
     98   /* Monotonic: never lower the high-water; chaining patches
     99    * already-committed code and depends on it staying RX. */
    100   if (page_end <= r->rx_end) return;
    101   if (page_end > base + r->region.size) page_end = base + r->region.size;
    102   if (page_end <= r->rx_end) return;
    103 
    104   len = (size_t)(page_end - r->rx_end);
    105   /* Bytes are written through the WRITE alias by link_resolve_extend
    106    * (a stub today). The runtime alias starts at PROT_READ and we flip
    107    * it to PROT_READ|PROT_EXEC here; W^X is preserved because the two
    108    * aliases are distinct VAs and neither holds W and X at the same
    109    * time. */
    110   if (r->mem->protect(r->mem->user, (void*)r->rx_end, len,
    111                       KIT_PROT_READ | KIT_PROT_EXEC) == KIT_OK) {
    112     if (r->mem->flush_icache) {
    113       r->mem->flush_icache(r->mem->user, (void*)r->rx_end, len);
    114     }
    115     r->rx_end = page_end;
    116   }
    117 }
    118 
    119 /* ============================================================
    120  * Code cache (guest_pc -> host entry)
    121  * ============================================================
    122  * Open-addressed linear-probe hash on the guest PC. Capacity grows
    123  * by doubling; v1 never evicts. */
    124 
    125 #include "core/hashmap.h"
    126 
    127 HASHMAP_DEFINE(PcMap, u64, void*, hash_u64);
    128 
    129 struct EmuCodeCache {
    130   Compiler* c;
    131   PcMap map;
    132 };
    133 
    134 EmuCodeCache* emu_cache_new(Compiler* c) {
    135   Heap* h;
    136   EmuCodeCache* k;
    137   if (!c) return NULL;
    138   h = c->ctx->heap;
    139   k = (EmuCodeCache*)h->alloc(h, sizeof(*k), _Alignof(EmuCodeCache));
    140   if (!k) return NULL;
    141   memset(k, 0, sizeof(*k));
    142   k->c = c;
    143   PcMap_init_cap(&k->map, h, 64u);
    144   return k;
    145 }
    146 
    147 void emu_cache_free(EmuCodeCache* c) {
    148   Heap* h;
    149   if (!c) return;
    150   h = c->c->ctx->heap;
    151   PcMap_fini(&c->map);
    152   h->free(h, c, sizeof(*c));
    153 }
    154 
    155 void emu_cache_insert(EmuCodeCache* c, u64 guest_pc, void* host_entry) {
    156   if (!c || guest_pc == 0) return;
    157   PcMap_set(&c->map, guest_pc, host_entry);
    158 }
    159 
    160 void* emu_cache_lookup(const EmuCodeCache* c, u64 guest_pc) {
    161   void** slot;
    162   if (!c) return NULL;
    163   slot = PcMap_get(&c->map, guest_pc);
    164   return slot ? *slot : NULL;
    165 }
    166 
    167 /* ============================================================
    168  * Runtime helper trampolines
    169  * ============================================================
    170  * Lifted blocks call into these through extern symbols whose names
    171  * are EMU_SYM_*. The resolver below maps each name to the address
    172  * of the matching function (or, for EMU_SYM_CPU_STATE, the address
    173  * of the running emu's CPUState). */
    174 
    175 /* Forward-declare the host-private KitEmu shape so the resolver
    176  * can pull the CPUState pointer without dragging emu.c's struct
    177  * definition into this TU's contract. */
    178 EmuCPUState* emu_internal_cpu(KitEmu*);
    179 EmuProcess* emu_internal_process(KitEmu*);
    180 
    181 /* Memory helpers. Bounds-checked through the CPUState's guest-AS window
    182  * (cpu.c). Checked helpers let an OS convert faults to its own delivery
    183  * mechanism and return a non-zero resume PC to the lifted block. */
    184 
    185 static u64 emu_deliver_memory_fault(EmuThread* thread, u64 addr, u8 access,
    186                                     u64 fault_pc, u64 next_pc) {
    187   EmuProcess* process = thread ? thread->process : NULL;
    188   EmuCPUState* cpu = emu_thread_cpu(thread);
    189   const EmuMemFault* fault;
    190   EmuFaultEvent ev;
    191   u64 delivered_pc = next_pc;
    192   if (!process || !cpu) return next_pc;
    193   fault = emu_addr_space_last_fault(&process->image.addr_space);
    194   memset(&ev, 0, sizeof(ev));
    195   ev.kind = fault ? fault->kind : EMU_FAULT_NONE;
    196   ev.addr = addr;
    197   ev.pc = fault_pc;
    198   ev.next_pc = next_pc;
    199   ev.access = access;
    200   if (emu_fault_deliver(process, thread, &ev, &delivered_pc) != KIT_OK) {
    201     emu_cpu_trap_fault(cpu);
    202     return fault_pc ? fault_pc : next_pc;
    203   }
    204   return delivered_pc;
    205 }
    206 
    207 static u64 emu_mem_load_checked(EmuThread* t, u64 addr, u64 nbytes, u8 access,
    208                                 u64 fault_pc, u64 next_pc, u64* value_out) {
    209   EmuCPUState* s = emu_thread_cpu(t);
    210   u8* p = emu_cpu_va_to_host_perm(s, addr, nbytes, access);
    211   u64 v = 0;
    212   u64 i;
    213   if (!value_out) {
    214     emu_cpu_trap_fault(s);
    215     return fault_pc ? fault_pc : next_pc;
    216   }
    217   if (!p) return emu_deliver_memory_fault(t, addr, access, fault_pc, next_pc);
    218   for (i = 0; i < nbytes; ++i) v |= ((u64)p[i]) << (8u * (u32)i);
    219   *value_out = v;
    220   return 0;
    221 }
    222 
    223 /* Bounds-checked fast (unchecked-resume) load/store of `nbytes` (1..4)
    224  * little-endian bytes through the CPUState guest-AS window. A load miss
    225  * trap-faults the CPU and yields 0; a store miss routes through the OS fault
    226  * delivery and returns its resume PC. These back the fixed-width shims. */
    227 static u64 emu_mem_load_raw(EmuThread* t, u64 addr, u32 nbytes) {
    228   EmuCPUState* s = emu_thread_cpu(t);
    229   u8* p = emu_cpu_va_to_host_perm(s, addr, nbytes, EMU_MEM_READ);
    230   u64 v = 0;
    231   u32 i;
    232   if (!p) {
    233     emu_cpu_trap_fault(s);
    234     return 0;
    235   }
    236   for (i = 0; i < nbytes; ++i) v |= ((u64)p[i]) << (8u * i);
    237   return v;
    238 }
    239 
    240 static u64 emu_mem_store_raw(EmuThread* t, u64 addr, u64 v, u32 nbytes,
    241                              u64 fault_pc, u64 next_pc) {
    242   EmuCPUState* s = emu_thread_cpu(t);
    243   u8* p = emu_cpu_va_to_host_perm(s, addr, nbytes, EMU_MEM_WRITE);
    244   u32 i;
    245   if (!p)
    246     return emu_deliver_memory_fault(t, addr, EMU_MEM_WRITE, fault_pc, next_pc);
    247   for (i = 0; i < nbytes; ++i) p[i] = (u8)(v >> (8u * i));
    248   return next_pc;
    249 }
    250 
    251 u8 emu_mem_load8(EmuThread* t, u64 addr) {
    252   return (u8)emu_mem_load_raw(t, addr, 1u);
    253 }
    254 u16 emu_mem_load16(EmuThread* t, u64 addr) {
    255   return (u16)emu_mem_load_raw(t, addr, 2u);
    256 }
    257 u32 emu_mem_load32(EmuThread* t, u64 addr) {
    258   return (u32)emu_mem_load_raw(t, addr, 4u);
    259 }
    260 u64 emu_mem_load64(EmuThread* t, u64 addr) {
    261   /* Two-half compose preserves the legacy per-map-boundary translation: each
    262    * half is bounds-checked through its own va_to_host_perm window. */
    263   u32 lo = emu_mem_load32(t, addr);
    264   u32 hi = emu_mem_load32(t, addr + 4u);
    265   return (u64)lo | ((u64)hi << 32);
    266 }
    267 
    268 u64 emu_mem_load8_checked(EmuThread* t, u64 addr, u64 fault_pc, u64 next_pc,
    269                           u64* value_out) {
    270   return emu_mem_load_checked(t, addr, 1u, EMU_MEM_READ, fault_pc, next_pc,
    271                               value_out);
    272 }
    273 
    274 u64 emu_mem_load16_checked(EmuThread* t, u64 addr, u64 fault_pc, u64 next_pc,
    275                            u64* value_out) {
    276   return emu_mem_load_checked(t, addr, 2u, EMU_MEM_READ, fault_pc, next_pc,
    277                               value_out);
    278 }
    279 
    280 u64 emu_mem_load32_checked(EmuThread* t, u64 addr, u64 fault_pc, u64 next_pc,
    281                            u64* value_out) {
    282   return emu_mem_load_checked(t, addr, 4u, EMU_MEM_READ, fault_pc, next_pc,
    283                               value_out);
    284 }
    285 
    286 u64 emu_mem_load64_checked(EmuThread* t, u64 addr, u64 fault_pc, u64 next_pc,
    287                            u64* value_out) {
    288   return emu_mem_load_checked(t, addr, 8u, EMU_MEM_READ, fault_pc, next_pc,
    289                               value_out);
    290 }
    291 
    292 u64 emu_mem_store8(EmuThread* t, u64 addr, u8 v, u64 fault_pc, u64 next_pc) {
    293   return emu_mem_store_raw(t, addr, v, 1u, fault_pc, next_pc);
    294 }
    295 u64 emu_mem_store16(EmuThread* t, u64 addr, u16 v, u64 fault_pc, u64 next_pc) {
    296   return emu_mem_store_raw(t, addr, v, 2u, fault_pc, next_pc);
    297 }
    298 u64 emu_mem_store32(EmuThread* t, u64 addr, u32 v, u64 fault_pc, u64 next_pc) {
    299   return emu_mem_store_raw(t, addr, v, 4u, fault_pc, next_pc);
    300 }
    301 u64 emu_mem_store64(EmuThread* t, u64 addr, u64 v, u64 fault_pc, u64 next_pc) {
    302   return emu_mem_store_raw(t, addr, v, 8u, fault_pc, next_pc);
    303 }
    304 
    305 static void emu_syscall_decoded(EmuThread* thread,
    306                                 const EmuSyscallRequest* req) {
    307   EmuCPUState* s;
    308   EmuProcess* process;
    309   EmuSyscallResult result;
    310   KitStatus st;
    311 
    312   s = emu_thread_cpu(thread);
    313   process = thread ? thread->process : NULL;
    314   if (!s || !thread || !process || !req || !process->os ||
    315       !process->os->emu_encode_syscall_result || !process->bindings.syscall) {
    316     emu_cpu_trap_fault(s);
    317     return;
    318   }
    319   st = process->bindings.syscall(process->bindings.user, process, thread, req,
    320                                  &result);
    321   if (st != KIT_OK) {
    322     emu_cpu_trap_fault(s);
    323     return;
    324   }
    325 
    326   if (emu_cpu_trap_reason(s) == EMU_TRAP_EXIT) return;
    327   if (!(result.flags & EMU_SYSCALL_RESULT_SKIP_ENCODE)) {
    328     st = process->os->emu_encode_syscall_result(process, thread, &result);
    329     if (st != KIT_OK) emu_cpu_trap_fault(s);
    330   }
    331 }
    332 
    333 void emu_syscall(EmuThread* thread) {
    334   EmuCPUState* s = emu_thread_cpu(thread);
    335   EmuProcess* process = thread ? thread->process : NULL;
    336   EmuSyscallRequest req;
    337   if (!s || !process || !process->os || !process->os->emu_decode_syscall) {
    338     emu_cpu_trap_fault(s);
    339     return;
    340   }
    341   if (process->os->emu_decode_syscall(process, thread, &req) != KIT_OK) {
    342     emu_cpu_trap_fault(s);
    343     return;
    344   }
    345   emu_syscall_decoded(thread, &req);
    346 }
    347 
    348 u64 emu_syscall_next(EmuThread* thread, u64 next_pc) {
    349   EmuCPUState* s = emu_thread_cpu(thread);
    350   EmuProcess* process = thread ? thread->process : NULL;
    351   EmuSyscallRequest req;
    352   if (!s || !process || !process->os || !process->os->emu_decode_syscall) {
    353     emu_cpu_trap_fault(s);
    354     return next_pc;
    355   }
    356   if (process->os->emu_decode_syscall(process, thread, &req) != KIT_OK) {
    357     emu_cpu_trap_fault(s);
    358     return next_pc;
    359   }
    360   emu_syscall_decoded(thread, &req);
    361   if (process->os->emu_syscall_next_pc &&
    362       emu_cpu_trap_reason(s) == EMU_TRAP_NONE)
    363     return process->os->emu_syscall_next_pc(process, thread, &req, next_pc);
    364   return next_pc;
    365 }
    366 
    367 /* ============================================================
    368  * Extern resolver
    369  * ============================================================
    370  * Called by the linker for any undefined symbol the per-block ObjBuilder
    371  * references. Returns the host VA of the named helper or NULL for the
    372  * linker's ordinary undefined-symbol diagnostic. */
    373 
    374 void* emu_runtime_extern_resolver(void* user, KitSlice name) {
    375   KitSlice demangled;
    376   if (!name.s) return NULL;
    377   demangled = name;
    378   if (demangled.len > 2u && demangled.s[0] == '_' && demangled.s[1] == '_' &&
    379       demangled.s[2] == '_') {
    380     demangled.s++;
    381     demangled.len--;
    382   }
    383 
    384   if (kit_slice_eq_cstr(demangled, EMU_SYM_CPU_STATE)) {
    385     KitEmu* e = (KitEmu*)user;
    386     return (void*)emu_internal_cpu(e);
    387   }
    388 
    389   if (kit_slice_eq_cstr(demangled, EMU_SYM_LOAD8)) return (void*)emu_mem_load8;
    390   if (kit_slice_eq_cstr(demangled, EMU_SYM_LOAD16))
    391     return (void*)emu_mem_load16;
    392   if (kit_slice_eq_cstr(demangled, EMU_SYM_LOAD32))
    393     return (void*)emu_mem_load32;
    394   if (kit_slice_eq_cstr(demangled, EMU_SYM_LOAD64))
    395     return (void*)emu_mem_load64;
    396   if (kit_slice_eq_cstr(demangled, EMU_SYM_LOAD8_CHECKED))
    397     return (void*)emu_mem_load8_checked;
    398   if (kit_slice_eq_cstr(demangled, EMU_SYM_LOAD16_CHECKED))
    399     return (void*)emu_mem_load16_checked;
    400   if (kit_slice_eq_cstr(demangled, EMU_SYM_LOAD32_CHECKED))
    401     return (void*)emu_mem_load32_checked;
    402   if (kit_slice_eq_cstr(demangled, EMU_SYM_LOAD64_CHECKED))
    403     return (void*)emu_mem_load64_checked;
    404   if (kit_slice_eq_cstr(demangled, EMU_SYM_STORE8))
    405     return (void*)emu_mem_store8;
    406   if (kit_slice_eq_cstr(demangled, EMU_SYM_STORE16))
    407     return (void*)emu_mem_store16;
    408   if (kit_slice_eq_cstr(demangled, EMU_SYM_STORE32))
    409     return (void*)emu_mem_store32;
    410   if (kit_slice_eq_cstr(demangled, EMU_SYM_STORE64))
    411     return (void*)emu_mem_store64;
    412   if (kit_slice_eq_cstr(demangled, EMU_SYM_SYSCALL))
    413     return (void*)emu_syscall_next;
    414   {
    415     KitEmu* e = (KitEmu*)user;
    416     EmuProcess* process = emu_internal_process(e);
    417     if (process && process->arch && process->arch->emu &&
    418         process->arch->emu->resolve_runtime_helper) {
    419       void* p = process->arch->emu->resolve_runtime_helper(user, demangled);
    420       if (p) return p;
    421     }
    422   }
    423 
    424   /* EMU_SYM_DISPATCH is the cross-block tail-call helper; it shares
    425    * the host address of the dispatcher entry. The dispatcher loop
    426    * lives inside kit_emu_step, so the lifter can also synthesize
    427    * a return-of-next_pc instead of a real call here. v1 returns
    428    * NULL — lifters that don't yet emit DISPATCH calls are fine. */
    429 
    430   return NULL;
    431 }
    432 
    433 /* Tracing. v1 emits to the ctx's diag sink at KIT_DIAG_NOTE. The
    434  * full implementation lands with the lifter so it can format guest
    435  * PCs and decoded instruction text consistently. */
    436 
    437 void emu_trace_pc(Compiler* c, u64 pc) {
    438   (void)c;
    439   (void)pc;
    440 }
    441 void emu_trace_block(Compiler* c, u64 pc) {
    442   (void)c;
    443   (void)pc;
    444 }
    445 void emu_trace_insn(Compiler* c, u64 guest_pc, const KitDecodedInsn* insn) {
    446   (void)c;
    447   (void)guest_pc;
    448   (void)insn;
    449 }