kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 6a3230be6f5368231610c029cd4076aa97a4d98b
parent 39ee5e11fb5963cf7b89f2f43f7abf2c163899c6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 25 May 2026 04:23:42 -0700

Emulator redesign, RV64 Linux minimal impl

Diffstat:
Mdoc/EMU.md | 516+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Mdriver/emu.c | 148+++++++++++++++++++++++++++++++++++++------------------------------------------
Minclude/cfree/emu.h | 85++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Msrc/arch/arch.h | 108+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/disasm.c | 44++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/rv64/arch.c | 3+++
Msrc/arch/rv64/disasm.c | 447+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Msrc/arch/rv64/disasm.h | 1+
Asrc/arch/rv64/emu.c | 516+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/rv64/isa.h | 12++++++++++++
Msrc/emu/cpu.c | 1051+++++++------------------------------------------------------------------------
Dsrc/emu/decode.c | 729-------------------------------------------------------------------------------
Asrc/emu/dl.c | 370+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/emu/elf_load.c | 552-------------------------------------------------------------------------------
Msrc/emu/emu.c | 492++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Msrc/emu/emu.h | 499++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Asrc/emu/image.c | 469+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/emu/lift.c | 38--------------------------------------
Msrc/emu/runtime.c | 490+++++++++++++++++++++++++++++++++++--------------------------------------------
Dsrc/emu/rv64_ops.h | 241-------------------------------------------------------------------------------
Asrc/emu/signal.c | 12++++++++++++
Asrc/emu/tls.c | 121+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/link/link_reloc.c | 516-------------------------------------------------------------------------------
Asrc/obj/elf/emu_load.c | 564+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/obj/format.h | 34++++++++++++++++++++++++++++++++++
Msrc/obj/registry.c | 2++
Asrc/obj/reloc_apply.c | 516+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/reloc_apply.h | 9+++++++++
Asrc/os/linux/linux.c | 848+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/os/registry.c | 12++++++++++++
Atest/arch/rv64_decode_test.c | 177+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dtest/emu/rv64_extras_test.c | 614-------------------------------------------------------------------------------
Mtest/emu/rv64_smoke_test.c | 1375++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Mtest/test.mk | 20++++++++++----------
34 files changed, 7167 insertions(+), 4464 deletions(-)

diff --git a/doc/EMU.md b/doc/EMU.md @@ -4,8 +4,62 @@ This document describes the target design for `cfree emu`: a user-mode guest executable runner built out of the same registries and pipeline boundaries used by the rest of cfree. -This is a design document, not a description of the current prototype. -The prototype can be discarded or migrated behind these interfaces. +This is both the target design and the status note for the current +implementation. Sections marked as target shape describe where the code is +going; sections marked as landed describe behavior already present in the +tree. + +## Current Status + +Landed: + +- `CfreeEmu` lifecycle, block lookup, and dispatch through lifted CG and the + existing JIT/link path. +- Object-format executable loading through `ObjFormatImpl.emu`; ELF maps one + object at a time, records `PT_INTERP`, parses `PT_DYNAMIC` metadata, and + records `PT_TLS`. +- Linux user-mode ABI code lives under `src/os/linux/`, selected by + `src/os/registry.c`. +- RV64 uses shared `ArchDecodeOps` and `ArchEmuOps`; the current emu subset + covers the smoke-test instructions (`addi`, `add`, `auipc`, `ld`, `sd`, + `jalr`, `ecall`). +- Runtime helpers use `EmuThread *` as their context. `EmuCPUState` remains + the arch-owned register/trap payload below the thread. +- `EmuAddrSpace` is now a sparse VM model with ordered mappings, unmapped + holes, guard pages, anonymous/file-backed maps, per-page dirty/translated + state, structured fault reporting, and VM operations for map/unmap/protect, + gap search, brk, copy, and destroy. +- Linux `brk`, anonymous/private `mmap`, `munmap`, and `mprotect` route + through the VM API with Linux-style result/errno behavior for the supported + subset. +- The emulator dynamic-loader layer owns dependency loading, ordered + link-map traversal, dynamic symbol lookup, runtime relocation scheduling, + import binding records, TLS module-list construction, and dynamic-loader + policy state. +- Runtime relocation byte application is factored into a neutral helper used + by both the linker and emulator dynamic loader. +- Import bindings can now produce either guest addresses or typed host-native + bridges through generated guest thunk records. The current RV64 bridge + supports the tested integer signatures and stores the declared signature in + the public binding result. +- TLS state is process/thread-owned: loaded `PT_TLS` modules are collected in + `EmuProcess.tls_state`, and each `EmuThread` owns its per-module TLS blocks. +- Signal delivery is layered: runtime helpers emit fault/signal events, and + the Linux/RV64 OS layer builds/restores the guest frame for the current + signal fixtures. + +Still target work: + +- Behind-interface dynamic-loader breadth: multi-level search paths, symbol + versioning, weak/interposition edge cases, RELRO enforcement, lazy binding, + `COPY`, IFUNC/IRELATIVE, init/fini execution, and additional relocation + families. +- Behind-interface TLS breadth: DTV/dynamic lookup helpers, descriptor + variants, additional TLS relocation models, and thread creation. +- Behind-interface signal breadth: exact production frame layouts, + blocked/pending behavior, restart behavior, alternate signal stacks, + default dispositions, and nested signals. +- Broader RV64 coverage and additional guest ISAs. ## Goals @@ -14,7 +68,6 @@ The prototype can be discarded or migrated behind these interfaces. registry/vtable system described in `doc/REGISTRY.md`. - Treat the guest ISA lifter as a frontend: decode guest bytes, emit CG, then reuse the existing opt, backend, object, link, and JIT pipeline. -- Support a non-executable-memory execution mode by interpreting lifted IR. - Keep executable loading separate from object building. Loading maps a guest process image; lifting/codegen produces new host objects. @@ -44,7 +97,7 @@ guest executable bytes -> arch decoder: bytes at guest PC -> CfreeDecodedInsn[] -> arch lifter: CfreeDecodedInsn[] -> CG function -> optional opt - -> either IR interpreter or backend/JIT + -> backend/JIT -> dispatch loop and runtime helpers ``` @@ -126,6 +179,12 @@ tables, and auxv data are the executable-loading contract. `emu` needs OS-specific user ABI behavior that does not belong in an architecture or object format module. +The core C calling-convention ABI is already derived by +`src/abi/registry.c` from `(CfreeArchKind, CfreeObjFmt)`. The OS vtable +does not select `ABIVtable`s. It owns user-mode process conventions: +initial stack shape, syscall register ABI, errno/restart behavior, +signal frames, TLS process setup, and dynamic-loader policy. + Implementation location: - Linux behavior lives with the Linux OS implementation. @@ -139,8 +198,6 @@ typedef struct CfreeOsImpl { CfreeOSKind kind; const char *name; - const ABIVtable *(*abi_for_arch)(CfreeCompiler *, CfreeArchKind); - CfreeStatus (*emu_init_process)(CfreeCompiler *, EmuProcess *, const EmuLoadOptions *, const EmuLoadedImage *); @@ -170,8 +227,9 @@ symbols, or deliver host signals directly. It translates between guest OS ABI state and emulator-level requests. The emulator/embedder bindings decide what those requests mean. -This also makes the ABI axis in `doc/REGISTRY.md` cleaner: ABI selection -can remain derived, but OS behavior has an explicit registry home. +This preserves the `doc/REGISTRY.md` split: C ABI selection remains a +derived ABI-registry lookup, while guest OS behavior has its own explicit +registry home. ### Outside Interaction Bindings @@ -516,14 +574,9 @@ Architecture-specific lowering belongs in `ArchEmuOps.lift_block`. Shared helpers for CPU field addressing, helper calls, and block function construction belong in `src/emu/lift/`. -## Execution Engines - -`emu` should support multiple execution engines over the same process and -decode/lift infrastructure. +## JIT Engine -### JIT Engine - -The JIT engine uses the normal pipeline: +`emu` uses the normal CG/backend/JIT pipeline: ```text decoded block -> CG -> optional opt -> ObjBuilder -> link -> CfreeJit @@ -549,41 +602,10 @@ The first implementation should choose the simpler strategy that fits the current JIT API. The design requirement is stable lookup and eventual invalidation, not a specific linker-internal shape. -### IR Interpreter - -An IR interpreter is a first-class execution engine, not just a test -helper. It allows `emu` to run where executable memory is unavailable and -validates lifted semantics independently from machine-code emission. - -The intended flow is: - -```text -decoded block -> CG -> canonical IR -> optional opt -> IR interpreter -``` - -The interpreter should run the canonical pre-machinized IR. Interpreting -post-lowering machine IR would mix backend details into the emulation -contract and make cross-architecture behavior harder to test. - -This engine requires an explicit IR execution ABI: - -- call a lifted block function with an `EmuThread *`; -- model helper calls through a host function table; -- read/write CPU-state memory through normal IR memory operations; -- return the next guest PC or propagate a trap. - -### Instruction Interpreter - -A direct instruction interpreter is useful for bring-up and differential -testing, but it should not become the primary semantic source. The -primary semantics should live in the lifter. If an instruction -interpreter exists, it should consume `CfreeDecodedInsn`, not private -decoder records. - ## Runtime Helpers Runtime helpers live in `src/emu/runtime/` and are linked by symbol or -function table into JIT/IR execution. +function table into JIT execution. They own: @@ -621,14 +643,14 @@ Proposed ownership: ```text src/emu/ emu.c public API glue and lifecycle - process.c EmuProcess / EmuThread ownership - mem.c guest address space - dispatch.c block cache and run loop - bindings.c embedder outside-interaction binding glue - lift/common.c arch-independent CG helpers for lifters - runtime/*.c memory, traps, helper table, tracing - interp/ir.c lifted IR interpreter - interp/inst.c optional decoded-instruction interpreter + cpu.c arch-owned CPU payload allocation/accessors + image.c loaded-image cleanup and address translation helpers + runtime.c code cache, memory helpers, syscall/import helpers + and tracing + process.c target home for broader EmuProcess / EmuThread ownership + mem.c optional split-out home for sparse guest address space + bindings.c target home for embedder outside-interaction binding glue + lift/common.c target home for arch-independent CG lifter helpers src/arch/<arch>/ decode.c ArchDecodeOps implementation @@ -639,36 +661,47 @@ src/arch/<arch>/ src/obj/<format>/ *_read.c relocatable/shared object reader *_emit.c relocatable object emitter - *_image.c executable image loader for ObjFormatEmuOps + *_load.c executable image loader for ObjFormatEmuOps registry.c ObjFormatImpl entries src/os/<os>/ - os.c CfreeOsImpl entry - syscall.c guest syscall ABI decode/encode - process.c stack, auxv, TLS, signal conventions + <os>.c CfreeOsImpl entry and current small ABI slice + syscall.c target home for guest syscall ABI decode/encode + process.c target home for stack, auxv, TLS, signal conventions ``` Exact filenames can vary. The boundary should not: format code loads files, arch code decodes/lifts instructions, OS code models user ABI, and `emu` coordinates process execution. -## First Target +## First Slice: Landed Shape -The first implementation target is a deliberately small vertical slice -that exercises every intended boundary without requiring broad ISA, -loader, OS, or execution-engine coverage. +The first implementation target was a deliberately small vertical slice +that exercised every intended boundary without requiring broad ISA, +loader, OS, or execution-engine coverage. That slice has landed and has +been extended just enough to cover dynamic ELF metadata, imports, TLS, page +permissions, sparse VM behavior, Linux VM syscalls, and minimal signal +delivery. -Target: +Current tested slice: - guest arch: RV64; - guest OS: Linux; - object format: ELF; -- executable shape: static ELF64 little-endian `ET_EXEC`; -- execution engine: lifted IR interpreter; -- outside interaction: `EmuExternalBindings.syscall`; -- guest behavior: `_start` exits with a code through `ecall`. - -The test program can be: +- executable shape: ELF64 little-endian `ET_EXEC`, static plus the small + dynamic fixture shape (`PT_INTERP`, `PT_DYNAMIC`, `DT_NEEDED`, + `R_RISCV_JUMP_SLOT`, `PT_TLS`); +- execution engine: lifted CG through the existing JIT path; +- outside interaction: `EmuExternalBindings.syscall` and the public + `resolve_import` surface; the default import path currently handles a + synthetic guest-callable no-op thunk for the test import; +- guest behavior: `_start` exits with a code through `ecall`, can read the + initial TLS word through `tp`, and can redirect an RX-page write fault to + a registered SIGSEGV handler. +- guest memory: executable segments, import thunks, stack, stack guard, brk, + anonymous mmap, mprotect, and munmap all flow through `EmuAddrSpace`. + +The original acceptance program was: ```asm addi a0, zero, 42 # exit code @@ -676,33 +709,46 @@ addi a7, zero, 93 # Linux rv64 SYS_exit ecall ``` -`SYS_exit_group` (`a7 = 94`) may be accepted too, but `SYS_exit` is -enough for the first acceptance test. +`SYS_exit_group` (`a7 = 94`) is accepted too. -Minimum implementation requirements: +Landed implementation surface: - `ObjFormatImpl(ELF).emu` - Detect ELF64 little-endian `EM_RISCV`. - - Accept static `ET_EXEC` with `PT_LOAD` segments. - - Map loadable segments into `EmuAddrSpace`. - - Set `EmuLoadedImage.entry_pc`. - - Return `CFREE_UNSUPPORTED` for dynamic executables, PIE, unsupported - relocations, TLS, and malformed inputs. + - Accept `ET_EXEC` with `PT_LOAD` segments. + - Map loadable segments into `EmuAddrSpace` and apply page permissions. + - Set entry, initial stack, program-header metadata, interpreter path, + dynamic import metadata, and TLS metadata. + - Create a main executable `EmuLoadedObject` / `EmuLinkMap` record and + parse the dynamic table into `EmuDynInfo` for later DSO relocation work. + - Patch the current RV64 `JUMP_SLOT` fixture through a reserved guest + import-thunk range. - `CfreeOsImpl(Linux)` - Initialize one process and one thread. - - Provide a valid aligned stack pointer, even if argv/envp/auxv are - minimal. + - Provide a valid aligned stack pointer with argv/envp/auxv. + - Initialize `tp` from `PT_TLS` for the current local-exec-style fixture. - Decode RV64 Linux syscall ABI: number in `a7`, arguments in `a0-a5`. - - Encode syscall results for non-exit calls, although the first test - should not need them. + - Encode syscall results in `a0`. + - Implement the supported VM syscall subset through `EmuAddrSpace`: + `brk`, anonymous/private `mmap`, `munmap`, and `mprotect`. + - Record `rt_sigaction` handlers for the current SIGSEGV delivery path. +- `EmuAddrSpace` + - Own sparse ordered maps with explicit page size. + - Represent anonymous, file-backed, and guard mappings. + - Track permissions, dirty pages, translated pages, and structured faults. + - Provide map, unmap, protect, gap search, brk, copy, checked pointer, and + invalidation APIs. - `EmuExternalBindings` - Implement `syscall`. - Recognize syscall `93` and set the emulated exit state from `a0`. - - Optionally recognize syscall `94`. + - Recognize syscall `94`. - Return a deterministic unsupported result for every other syscall. + - Expose `resolve_import` for dynamic import policy, though the current + default test path does not require host-native import calls. - `ArchDecodeOps(RV64)` - - Decode `ADDI` and `ECALL`. - - Mark `ECALL` as a terminator. + - Decode the current smoke subset: `ADDI`, `ADD`, `AUIPC`, `LD`, `SD`, + `JALR`, `ECALL`. + - Mark control/trap instructions as terminators. - Provide a formatter over the same `CfreeDecodedInsn` records so the disassembler path uses the shared decoder shape. - `ArchEmuOps(RV64)` @@ -710,11 +756,13 @@ Minimum implementation requirements: code. - Initialize `pc` from `EmuLoadedImage.entry_pc` and `x2` from the initial stack pointer. - - Lift `ADDI` and `ECALL` to CG. -- IR interpreter + - Lift the current smoke subset to CG. +- JIT engine + - Compile the lifted block through the existing CG, opt, object, link, + and JIT APIs. - Execute the lifted block function with an `EmuThread *`. - Route runtime helper calls through the same helper/binding table that - the JIT engine will use later. + later execution engines can reuse. Acceptance criteria: @@ -723,36 +771,278 @@ Acceptance criteria: - Instruction decode goes through `ArchDecodeOps(RV64)`. - The disassembler formatter can format the same decoded instructions. - The lifter goes through `ArchEmuOps(RV64)` and emits CG. -- The lifted IR interpreter runs the block without executable memory. +- The existing CG/JIT path materializes and runs the lifted block. - The exit syscall goes through `EmuExternalBindings.syscall`, not a built-in host syscall. -- The emulated process exits cleanly with code `42`. +- The static, dynamic-import/TLS, host-import, DSO-import/reloc, + distinct-TLS, signal/perms, and signal/`sigreturn` fixtures exit cleanly. + +This slice still intentionally excludes libc startup, broad dynamic-loader +policy, broad relocation and TLS families, exact production signal semantics, +broad file I/O, clocks, host-backed syscall coverage, and broad ISA coverage. +Those features should extend the same boundaries rather than introduce new +ones. + +## Remaining Architecture Work + +The core architectural split is now in place: executable loading is behind +object-format hooks, guest OS behavior is behind `CfreeOsImpl`, guest ISA +decode/lift is behind arch hooks, outside effects route through bindings, and +guest virtual memory is owned by `EmuAddrSpace`. Remaining work is primarily +coverage and deeper semantics within those boundaries, not a new top-level +architecture. + +The main remaining coverage and semantic-completeness areas are: + +1. General dynamic loading breadth: deeper dependency search/order, symbol + versioning, weak/interposition policy, RELRO, init/fini execution, and + interpreter modeling. +2. Broader dynamic relocation coverage and factoring shared byte patching out + of linker-internal ownership. +3. Broader import bridges: declared signatures, more ABI adapters, lazy PLT + resolver state, and data imports. +4. Full TLS relocation models on top of the loader's module list. +5. Production signal semantics: exact frame layouts, masks/pending sets, + restart behavior, alternate stacks, and nested delivery. +6. Broader Linux syscall semantics, RV64 instruction coverage, and additional + guest ISAs. + +Each remaining slice should continue to land red-green with narrow `test-emu` +fixtures first, then one hosted smoke binary only after the architecture for +that behavior is in place. + +### Dynamic Loading And Relocations + +`EmuLoadedImage` now has a process `EmuLinkMap` with the main executable and +binding-supplied DSOs for the minimal tested path. `EmuDynInfo` records parsed +dynamic-table metadata, and the loader applies the RV64 `RELATIVE`, 64-bit, +and `JUMP_SLOT` relocations needed by the current fixtures. The next step is +to broaden that model into production dynamic-loader policy and relocation +coverage. + +The ELF image loader owns program-header parsing and segment mapping for one +ELF object. The OS dynamic-loader layer owns dependency order, object search +policy, initial link-map construction, relocation scheduling, TLS allocation, +init/fini ordering, and auxv values such as `AT_BASE` when an interpreter is +modeled. + +The broader dynamic-loader pass should support: + +- loading the main executable and named DSOs supplied by a binding; +- `ET_DYN` load-bias assignment into sparse VM gaps; +- `PT_LOAD`, `PT_DYNAMIC`, `PT_TLS`, `PT_GNU_RELRO`, and `PT_INTERP` + metadata across all loaded objects; +- `DT_NEEDED`, `DT_STRTAB`, `DT_SYMTAB`, `DT_HASH`/`DT_GNU_HASH`, + `DT_RELA*`, `DT_JMPREL`, `DT_PLTGOT`, init/fini arrays, and symbol version + metadata parsed enough to reject unsupported cases cleanly; +- breadth-first dependency loading in link-map order; +- explicit symbol lookup policy over ordered scopes. + +Relocation handling should be data-driven by object format plus architecture, +with a sharp split between loader work and byte-patching work. The normal +linker and JIT linker already know how to patch relocation encodings once the +final values are known; the emulator should reuse that relocation-apply +primitive, not the `LinkImage` layout/resolution machinery. + +The emulator dynamic loader owns the runtime-only work: + +- parse `DT_RELA*` / `DT_JMPREL` records from already linked objects; +- map ELF relocation type numbers to existing `RelocKind` values through the + object-format/arch relocation tables; +- compute the mapped patch address `P` from object load bias plus relocation + offset; +- resolve `S` through the emulated link map, import bindings, weak rules, TLS + module state, or loader-specific relocation semantics; +- obtain writable patch bytes through `EmuAddrSpace`, respecting permissions, + RELRO, dirty tracking, and code-cache invalidation; +- call the shared relocation byte patcher with final `RelocKind`, `P_bytes`, + `S`, `A`, and `P`. + +The shared relocation apply function should therefore be factored out of the +linker-specific internal surface into a neutral internal module. The linker +continues to feed it `LinkRelocApply` records after laying out a new output +image; the emulator feeds it runtime relocation records after mapping an input +image. Both paths share encoding semantics for relocations such as PC-relative +branches, ADRP/LO12 pairs, absolute writes, `GLOB_DAT`, `JUMP_SLOT`, and TLS +offset encodings. + +Some relocation kinds remain loader semantics around the shared patcher rather +than plain byte encodings: + +- `RELATIVE`: compute `S` from the object's load bias, then patch normally; +- `GLOB_DAT` / `JUMP_SLOT`: perform symbol/import binding first, then patch + the GOT/PLT slot normally; +- `COPY`: copy data from a DSO definition into the main executable, or reject + with a specific unsupported diagnostic until data interposition is modeled; +- TLS families: compute module IDs, thread-pointer offsets, DTV entries, or + descriptors in the TLS layer, then use the shared patcher for the final + encoding when applicable; +- IFUNC/IRELATIVE: execute or deny resolver policy in the loader before + patching the resolved target. + +Minimum relocation set for the first full ELF/RV64 pass: + +- `RELATIVE`; +- `GLOB_DAT`; +- `JUMP_SLOT`; +- `COPY` rejected with a specific unsupported diagnostic unless main-exe data + interposition is implemented in the same slice; +- TLS relocation families deferred until the TLS slice below, but parsed and + rejected by name rather than falling through. + +Symbol lookup policy should be explicit and testable: + +- lookup starts in the requesting object's scope list; +- global/default visibility participates, local/hidden does not; +- weak undefined resolves to zero when absent; +- strong undefined without a binding result is a load failure; +- main executable interposition and `LD_PRELOAD`-like bindings are represented + as ordered scope entries, even if the driver does not expose preload flags + immediately. + +### Import Binding + +The current implementation has import records that describe what was bound and +how calls cross the guest/host boundary for the minimal eager-binding path. +Unresolved legacy fixture imports can still fall back to a no-op guest thunk so +old smoke coverage remains deterministic. + +There are two valid binding results: + +- a guest address inside an emulated object or generated guest-callable thunk; +- a native host function plus an ABI adapter selected by guest architecture, + guest OS ABI, and the binding's declared signature. + +The public `resolve_import` callback should remain policy-only: it decides +whether an object/symbol may resolve and what it resolves to. It should not +patch GOT slots directly. Loader/runtime code owns GOT/PLT writes, +lazy/eager binding state, and generated thunks. + +Target shape: + +```c +typedef struct EmuImportBinding { + uint32_t object_id; + uint32_t symbol_index; + uint64_t got_vaddr; + uint64_t plt_vaddr; + uint64_t resolved_guest_addr; + void *resolved_host_fn; + uint32_t flags; +} EmuImportBinding; +``` -This slice intentionally excludes dynamic linking, libc startup, memory -loads/stores, branches, JIT execution, signals, file I/O, clocks, and -host-backed syscalls. Those features should extend the same boundaries -rather than introduce new ones. +For eager binding, `GLOB_DAT` and `JUMP_SLOT` write the resolved guest +address immediately. For lazy binding, PLT entries route to an arch-owned +resolver trampoline that calls a runtime helper, resolves once, patches the +GOT slot, and returns the final target. + +Native host calls require bindings that provide or imply a signature the +adapter can marshal. The current RV64 adapter supports the tested integer +signatures; broader host calls should extend that adapter behind the same +descriptor. Do not guess libc signatures inside libcfree. The default driver +can provide a small allowlist for smoke tests later; the library API stays +explicit. + +Test order: + +- guest-to-guest DSO function import through `JUMP_SLOT`; +- data import through `GLOB_DAT`; +- missing weak import resolves to zero; +- missing strong import fails load; +- host-native import through a declared integer adapter. + +### TLS Model + +TLS setup has moved from "set RV64 `tp` to the PT_TLS vaddr" to a per-thread +TLS allocation for the initial `PT_TLS` image. The remaining work is to apply +the same model across loaded TLS modules and the broader TLS relocation +families. + +The model needs: + +- one TLS module ID per loaded object with `PT_TLS`; +- static TLS image allocation for the initial executable and startup DSOs; +- per-thread copy of `.tdata` plus zeroed `.tbss`; +- target-specific thread-pointer layout, selected by OS/arch ABI; +- dynamic TLS blocks for modules that are loaded after thread creation; +- DTV-like bookkeeping where the target ABI requires it; +- relocation support for local-exec, initial-exec, local-dynamic, + general-dynamic, and TLS descriptor variants as they become reachable. + +Do this in layers: + +1. Static TLS for initial objects, with ABI-correct `tp` and local-exec / + initial-exec relocations. +2. Dynamic TLS lookup helper used by general-dynamic/local-dynamic models. +3. TLS descriptors and lazy descriptor patching where the target ABI uses + them. +4. Thread creation support only after the single-thread model has correct + per-thread data structures. + +The architecture backend already emits several TLS relocation forms for link +outputs. The emulator loader should share relocation constants and ABI layout +helpers where possible, but it should patch mapped guest memory through +`EmuAddrSpace`, not through linker section buffers. + +### Signal Frames And `sigreturn` + +The current write-fault-to-handler shortcut has been replaced for the tested +path with signal delivery that builds a guest frame on the guest stack and +lets `rt_sigreturn` restore the interrupted register state. The remaining work +is exact Linux/RV64 frame compatibility and broader signal semantics. + +Target responsibilities: + +- `rt_sigaction`: store handler, flags, restorer, mask, and ABI-specific + action layout; +- `rt_sigprocmask`: maintain per-thread blocked/pending signal sets; +- faults from memory/fetch/decode carry `EmuSignalEvent` with signal number, + code, fault address, and fault PC; +- `emu_deliver_signal` chooses action/default behavior and writes a real guest + frame; +- the thread register state is saved in the frame and replaced with handler + entry state; +- `rt_sigreturn` validates/restores the frame and resumes the interrupted PC; +- `SA_ONSTACK`, `SA_RESTORER`, `SA_SIGINFO`, default dispositions, and nested + signals are represented, even if some return `CFREE_UNSUPPORTED` initially. + +The signal frame layout is OS/arch-owned. The emulator core should only route +fault events and stop/continue decisions. + +Test order: + +- SIGSEGV handler with restorer returns to the faulting program path through + `rt_sigreturn`; +- handler sees `siginfo.si_addr` for a protection fault; +- blocked SIGSEGV causes default termination/fault instead of handler entry; +- alternate signal stack is used when `SA_ONSTACK` is set. ## Implementation Order -1. Add `CfreeDecodedInsn` and `ArchDecodeOps` behind `ArchImpl`. - Convert one architecture's disassembler to `decode_one + format` - without changing public `cfree_disasm_*`. -2. Convert `objdump` and debugger instruction decode paths to the shared - decoder where practical. -3. Add object-format executable loader hooks and return `EmuLoadedImage` - instead of an object-builder-like shape. -4. Add an OS registry, outside-interaction bindings, and a minimal OS - vtable with process init and syscall ABI decode/encode. -5. Add `ArchEmuOps` for one guest architecture with CPU layout and a - small lifted block subset. -6. Add the IR interpreter execution engine. -7. Add the JIT execution engine using existing CG, opt, object, link, and - JIT APIs. -8. Expand ISA coverage and binding-backed syscall/import coverage with +The architectural split through sparse VM, Linux VM syscalls, dynamic-loader +ownership, neutral relocation byte patching, typed import bridge descriptors, +process/thread TLS ownership, and OS-owned signal frame construction has +landed. + +Remaining work is behind-interface broadening: + +1. Extend dynamic-loader policy coverage: multi-level dependency search, + version/interposition rules, RELRO, lazy binding, init/fini, and + interpreter behavior. +2. Add broader relocation families, including `COPY`, IFUNC/IRELATIVE, data + imports, and architecture-specific TLS relocations. +3. Broaden generated guest/native bridges beyond the tested integer + signatures. +4. Implement DTV/dynamic TLS lookup, descriptors, additional TLS models, and + thread creation. +5. Complete production signal semantics: exact frame layouts, masks/pending + sets, restart behavior, alternate signal stacks, default dispositions, and + nested signals. +6. Expand ISA coverage and binding-backed syscall/import coverage with differential tests against known guest binaries. -Each step should be testable in isolation. The decoder split can land -before any executable runs; the loader can be tested by inspecting -`EmuLoadedImage`; the lifter can be tested through the IR interpreter -before executable memory is involved. +Each step should be testable in isolation. Loader work can be tested by +inspecting `EmuLoadedImage` / `EmuLinkMap`; relocation work can be tested by +mapping small synthetic DSOs; lifter work can continue to use tiny +in-memory RV64 ELF fixtures through the existing JIT path. diff --git a/driver/emu.c b/driver/emu.c @@ -7,13 +7,13 @@ #include <cfree/emu.h> #include <cfree/object.h> -/* `cfree emu` — run a guest user-mode ELF on the host via libcfree's - * per-basic-block JIT translator. v1 guest archs: aarch64, riscv64. +/* `cfree emu` — run a guest user-mode executable on the host via libcfree's + * per-basic-block JIT translator. * * Argv shape mirrors `cfree run`: a single positional input (the guest - * ELF path) followed by `--` and the guest argv. Flags configure the + * executable path) followed by `--` and the guest argv. Flags configure the * translator (optimize level), tracing (PC / instruction / block), and - * the guest arch (auto-detected from the ELF when -arch is absent). + * the guest arch (auto-detected when -arch is absent). * * The freestanding emu core takes guest bytes; this driver handles the * path -> bytes step and the argv/envp marshalling. The driver returns @@ -27,12 +27,14 @@ typedef struct EmuOptions { int opt_level; CfreeEmuTraceFlags trace; - CfreeEmuArch guest_arch; + CfreeArchKind guest_arch; int guest_arch_set; + CfreeTarget guest_target; + int guest_target_set; - const char* elf_path; /* positional input (required) */ + const char* guest_path; /* positional input (required) */ - /* Guest argv collected after `--`. argv[0] defaults to elf_path + /* Guest argv collected after `--`. argv[0] defaults to guest_path * when the user supplied no `--` segment. The trailing NULL is * added at marshalling time and is not counted in nguest_argv. */ const char** guest_argv; @@ -42,7 +44,7 @@ typedef struct EmuOptions { static void emu_usage(void) { driver_errf(EMU_TOOL, "%.*s", CFREE_SLICE_ARG(CFREE_SLICE_LIT( - "usage: cfree emu [options] guest.elf [-- guest-arg...]\n" + "usage: cfree emu [options] guest-exe [-- guest-arg...]\n" " cfree emu --help for full option reference"))); } @@ -50,36 +52,35 @@ void driver_help_emu(void) { driver_printf( "%.*s", CFREE_SLICE_ARG(CFREE_SLICE_LIT( - "cfree emu — run a guest user-mode ELF on the host\n" + "cfree emu — run a guest user-mode executable on the host\n" "\n" "USAGE\n" - " cfree emu [options] guest.elf [-- guest-arg...]\n" + " cfree emu [options] guest-exe [-- guest-arg...]\n" "\n" "DESCRIPTION\n" - " Loads a static guest user-mode ELF and runs it on the host via the\n" - " per-basic-block JIT translator. v1 supports two guest architectures:\n" - " aarch64 and riscv64; the host code generated by the translator runs\n" - " natively on the host arch.\n" + " Loads a static guest user-mode executable and runs it on the host via\n" + " the per-basic-block JIT translator. The host code generated by the\n" + " translator runs natively on the host arch.\n" "\n" " The driver returns the guest's exit code on a clean exit, or 1 on\n" " internal failure. Argv shape mirrors `cfree run`: anything after\n" " `--` is forwarded as the guest argv. With no `--`, argv[0] defaults\n" - " to the guest ELF path.\n" + " to the guest executable path.\n" "\n" "OPTIONS\n" " -O0 -O1 -O2 Translator optimization level (default -O0)\n" " -arch ARCH Force guest arch: aarch64 (alias arm64) or\n" " riscv64 (alias rv64). When omitted the arch is\n" - " auto-detected from the ELF.\n" + " auto-detected from the executable.\n" " -tracepc Trace each translated PC\n" " -traceinsn Trace each guest instruction\n" " -traceblock Trace each translated basic block\n" " -h, --help Show this help and exit\n" "\n" "EXAMPLES\n" - " cfree emu hello-arm64.elf\n" - " cfree emu -arch riscv64 hello-rv64.elf -- foo bar\n" - " cfree emu -O2 -tracepc prog.elf\n" + " cfree emu hello\n" + " cfree emu -arch riscv64 hello -- foo bar\n" + " cfree emu -O2 -tracepc prog\n" "\n" "EXIT CODES\n" " Returns the guest's exit code on clean exit, or 1 on internal\n" @@ -89,7 +90,7 @@ void driver_help_emu(void) { static int emu_alloc_arrays(EmuOptions* o, int argc) { size_t bound = (size_t)argc; o->argv_bound = bound; - /* +1 to leave room for the elf_path default at index 0. */ + /* +1 to leave room for the guest_path default at index 0. */ o->guest_argv = driver_alloc_zeroed(o->env, (bound + 1) * sizeof(*o->guest_argv)); if (!o->guest_argv) { @@ -101,12 +102,12 @@ static int emu_alloc_arrays(EmuOptions* o, int argc) { static int emu_record_arch(EmuOptions* o, const char* val) { if (driver_streq(val, "aarch64") || driver_streq(val, "arm64")) { - o->guest_arch = CFREE_EMU_ARCH_AARCH64; + o->guest_arch = CFREE_ARCH_ARM_64; o->guest_arch_set = 1; return 0; } if (driver_streq(val, "riscv64") || driver_streq(val, "rv64")) { - o->guest_arch = CFREE_EMU_ARCH_RISCV64; + o->guest_arch = CFREE_ARCH_RV64; o->guest_arch_set = 1; return 0; } @@ -173,17 +174,17 @@ static int emu_parse(int argc, char** argv, EmuOptions* o) { return 1; } - if (o->elf_path) { - driver_errf(EMU_TOOL, "multiple guest ELF inputs: %.*s, %.*s", - CFREE_SLICE_ARG(cfree_slice_cstr(o->elf_path)), + if (o->guest_path) { + driver_errf(EMU_TOOL, "multiple guest executable inputs: %.*s, %.*s", + CFREE_SLICE_ARG(cfree_slice_cstr(o->guest_path)), CFREE_SLICE_ARG(cfree_slice_cstr(a))); return 1; } - o->elf_path = a; + o->guest_path = a; } - if (!o->elf_path) { - driver_errf(EMU_TOOL, "missing guest ELF input"); + if (!o->guest_path) { + driver_errf(EMU_TOOL, "missing guest executable input"); emu_usage(); return 1; } @@ -197,60 +198,46 @@ static void emu_options_release(EmuOptions* o) { } } -/* Map a CfreeArchKind from cfree_detect_target onto the emu's guest-arch - * enum. Returns 0 on a supported arch, 1 otherwise. */ -static int emu_arch_from_kind(CfreeArchKind k, CfreeEmuArch* out) { - switch (k) { - case CFREE_ARCH_ARM_64: - *out = CFREE_EMU_ARCH_AARCH64; - return 0; - case CFREE_ARCH_RV64: - *out = CFREE_EMU_ARCH_RISCV64; - return 0; - default: - return 1; - } -} - -static const char* emu_arch_name(CfreeEmuArch a) { +static const char* emu_arch_name(CfreeArchKind a) { switch (a) { - case CFREE_EMU_ARCH_AARCH64: + case CFREE_ARCH_ARM_64: return "aarch64"; - case CFREE_EMU_ARCH_RISCV64: + case CFREE_ARCH_RV64: return "riscv64"; + case CFREE_ARCH_X86_64: + return "x86_64"; + case CFREE_ARCH_X86_32: + return "x86"; + case CFREE_ARCH_ARM_32: + return "arm"; + case CFREE_ARCH_RV32: + return "riscv32"; + case CFREE_ARCH_WASM: + return "wasm"; } return "?"; } -/* Auto-detect the guest arch from the ELF magic when -arch was not set. - * Honors a user-supplied -arch verbatim (no cross-check against the ELF; - * mismatches surface as decode failures inside the emu). */ -static int emu_resolve_arch(EmuOptions* o, const CfreeSlice* elf) { +static int emu_resolve_target(EmuOptions* o, const CfreeSlice* input) { CfreeTarget detected; - if (o->guest_arch_set) return 0; - if (cfree_detect_target(elf->data, elf->len, &detected) != CFREE_OK) { - driver_errf(EMU_TOOL, "could not detect target from %.*s; pass -arch", - CFREE_SLICE_ARG(cfree_slice_cstr(o->elf_path))); - return 1; - } - if (emu_arch_from_kind(detected.arch, &o->guest_arch) != 0) { - driver_errf(EMU_TOOL, - "unsupported guest arch in %.*s; v1 supports aarch64 and " - "riscv64", - CFREE_SLICE_ARG(cfree_slice_cstr(o->elf_path))); + if (cfree_detect_target(input->data, input->len, &detected) != CFREE_OK) { + driver_errf(EMU_TOOL, "could not detect target from %.*s", + CFREE_SLICE_ARG(cfree_slice_cstr(o->guest_path))); return 1; } - o->guest_arch_set = 1; + if (o->guest_arch_set) detected.arch = o->guest_arch; + o->guest_target = detected; + o->guest_target_set = 1; return 0; } /* Build a NULL-terminated argv for the guest. argv[0] defaults to the - * guest ELF path if the user supplied no `--` segment, matching Unix + * guest executable path if the user supplied no `--` segment, matching Unix * convention. The returned array points into the caller-owned argv; the * trailing NULL slot lives in the EmuOptions back-store. */ static void emu_finalize_argv(EmuOptions* o, const char*** out_argv) { if (o->nguest_argv == 0) { - o->guest_argv[0] = o->elf_path; + o->guest_argv[0] = o->guest_path; o->guest_argv[1] = 0; } else { o->guest_argv[o->nguest_argv] = 0; @@ -263,9 +250,10 @@ int driver_emu(int argc, char** argv) { EmuOptions eo = {0}; CfreeContext ctx; CfreeCompiler* compiler = NULL; - DriverLoad elf_lf = {0}; - CfreeSlice elf_in; + DriverLoad guest_lf = {0}; + CfreeSlice guest_in; CfreeEmuOptions opts; + CfreeJitHost jhost; const char** guest_argv; int exit_code = 0; int rc = 1; @@ -290,30 +278,33 @@ int driver_emu(int argc, char** argv) { goto out; } - if (driver_load_bytes(ctx.file_io, EMU_TOOL, eo.elf_path, &elf_lf, - &elf_in) != 0) { + if (driver_load_bytes(ctx.file_io, EMU_TOOL, eo.guest_path, &guest_lf, + &guest_in) != 0) { goto out; } - if (emu_resolve_arch(&eo, &elf_in) != 0) goto out; + if (emu_resolve_target(&eo, &guest_in) != 0) goto out; /* The emu's host-side compiler runs at the host's native target — - * the JIT image holds host code, the *guest* arch is configured - * through CfreeEmuOptions.guest_arch. */ + * the JIT image holds host code, while the guest target is resolved + * from the executable and optional driver flags. */ if (driver_compiler_new(driver_host_target(), &ctx, &compiler) != CFREE_OK) { driver_errf(EMU_TOOL, "failed to initialize compiler"); goto out; } emu_finalize_argv(&eo, &guest_argv); + jhost = driver_env_to_jit_host(&env); { CfreeEmuOptions z = {0}; opts = z; } - opts.guest_arch = eo.guest_arch; - opts.guest_elf_bytes = elf_in.data; - opts.guest_elf_len = elf_in.len; + opts.guest_name = cfree_slice_cstr(eo.guest_path); + opts.guest_bytes = guest_in; + opts.guest_target = eo.guest_target; + opts.has_guest_target = eo.guest_target_set != 0; + opts.jit_host = &jhost; opts.optimize = eo.opt_level; opts.trace = eo.trace; opts.argv = (const char* const*)guest_argv; @@ -321,8 +312,9 @@ int driver_emu(int argc, char** argv) { if (cfree_emu_run(compiler, &opts, &exit_code) != CFREE_OK) { driver_errf(EMU_TOOL, "emulation of %.*s (%.*s) failed", - CFREE_SLICE_ARG(cfree_slice_cstr(eo.elf_path)), - CFREE_SLICE_ARG(cfree_slice_cstr(emu_arch_name(eo.guest_arch)))); + CFREE_SLICE_ARG(cfree_slice_cstr(eo.guest_path)), + CFREE_SLICE_ARG( + cfree_slice_cstr(emu_arch_name(eo.guest_target.arch)))); goto out; } @@ -330,8 +322,8 @@ int driver_emu(int argc, char** argv) { out: if (compiler) driver_compiler_free(compiler); - if (elf_lf.loaded && ctx.file_io) - driver_release_bytes(ctx.file_io, &elf_lf); + if (guest_lf.loaded && ctx.file_io) + driver_release_bytes(ctx.file_io, &guest_lf); emu_options_release(&eo); driver_env_fini(&env); return rc; diff --git a/include/cfree/emu.h b/include/cfree/emu.h @@ -3,19 +3,16 @@ #include <cfree/core.h> +typedef struct CfreeJitHost CfreeJitHost; + /* - * User-mode guest ELF emulator. + * User-mode guest executable emulator. * * The emulator translates guest basic blocks through the cfree backend and * executes them in-process. It is intentionally separate from the JIT API: * embedders that only run native JIT code do not need this surface. */ -typedef enum CfreeEmuArch { - CFREE_EMU_ARCH_AARCH64, - CFREE_EMU_ARCH_RISCV64, -} CfreeEmuArch; - typedef enum CfreeEmuTraceFlag { CFREE_EMU_TRACE_PC = 1u << 0, CFREE_EMU_TRACE_INSN = 1u << 1, @@ -24,12 +21,82 @@ typedef enum CfreeEmuTraceFlag { typedef uint32_t CfreeEmuTraceFlags; +typedef struct CfreeEmuSyscallRequest { + uint64_t number; + uint64_t args[6]; +} CfreeEmuSyscallRequest; + +typedef struct CfreeEmuSyscallResult { + int64_t result; + int32_t guest_errno; + uint32_t flags; +} CfreeEmuSyscallResult; + +typedef enum CfreeEmuImportAbi { + CFREE_EMU_IMPORT_ABI_DEFAULT = 0, + CFREE_EMU_IMPORT_ABI_GUEST_C = 1, +} CfreeEmuImportAbi; + +typedef enum CfreeEmuValueKind { + CFREE_EMU_VALUE_VOID = 0, + CFREE_EMU_VALUE_U64 = 1, + CFREE_EMU_VALUE_I64 = 2, + CFREE_EMU_VALUE_PTR = 3, +} CfreeEmuValueKind; + +typedef struct CfreeEmuImportSignature { + uint8_t abi; + uint8_t result; + uint8_t nargs; + uint8_t args[8]; +} CfreeEmuImportSignature; + +typedef struct CfreeEmuImportRequest { + CfreeSlice object_name; + CfreeSlice symbol_name; + uint32_t bind_flags; + CfreeEmuImportSignature signature; +} CfreeEmuImportRequest; + +typedef struct CfreeEmuResolvedImport { + uint64_t guest_addr; + void *host_fn; + uint32_t flags; + CfreeEmuImportSignature signature; +} CfreeEmuResolvedImport; + +typedef struct CfreeEmuObjectRequest { + CfreeSlice object_name; + uint32_t flags; +} CfreeEmuObjectRequest; + +typedef struct CfreeEmuResolvedObject { + CfreeSlice object_bytes; + uint32_t flags; +} CfreeEmuResolvedObject; + +typedef struct CfreeEmuExternalBindings { + CfreeStatus (*syscall)(void *user, CfreeEmu *, + const CfreeEmuSyscallRequest *, + CfreeEmuSyscallResult *out); + CfreeStatus (*resolve_import)(void *user, CfreeEmu *, + const CfreeEmuImportRequest *, + CfreeEmuResolvedImport *out); + CfreeStatus (*resolve_object)(void *user, CfreeEmu *, + const CfreeEmuObjectRequest *, + CfreeEmuResolvedObject *out); + void *user; +} CfreeEmuExternalBindings; + typedef struct CfreeEmuOptions { - CfreeEmuArch guest_arch; - const uint8_t *guest_elf_bytes; - size_t guest_elf_len; + CfreeSlice guest_name; + CfreeSlice guest_bytes; + CfreeTarget guest_target; + bool has_guest_target; + const CfreeJitHost *jit_host; int optimize; CfreeEmuTraceFlags trace; + CfreeEmuExternalBindings bindings; const char *const *argv; const char *const *envp; } CfreeEmuOptions; diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -1044,6 +1044,104 @@ struct ArchDisasm { void (*destroy)(ArchDisasm*); }; +#define CFREE_DECODE_MAX_OPERANDS 6u + +typedef enum CfreeDecodeFlag { + CFREE_DECODE_TERMINATOR = 1u << 0, + CFREE_DECODE_BRANCH = 1u << 1, + CFREE_DECODE_CALL = 1u << 2, + CFREE_DECODE_RET = 1u << 3, + CFREE_DECODE_MEMORY = 1u << 4, + CFREE_DECODE_TRAP = 1u << 5, +} CfreeDecodeFlag; + +typedef enum CfreeDecodedOperandKind { + CFREE_DECOP_NONE, + CFREE_DECOP_REG, + CFREE_DECOP_IMM, + CFREE_DECOP_MEM, + CFREE_DECOP_PCREL, + CFREE_DECOP_SYSREG, +} CfreeDecodedOperandKind; + +typedef struct CfreeDecodedOperand { + u8 kind; + u8 width_bits; + u16 flags; + u32 reg; + u32 index_reg; + i64 imm; + u8 scale; + u8 pad[7]; +} CfreeDecodedOperand; + +typedef struct CfreeDecodedInsn { + u64 pc; + const u8* bytes; + u8 nbytes; + u8 noperands; + u16 flags; + u32 opcode; /* Arch-owned stable opcode id. */ + u32 encoding_id; /* Optional row/table id for formatting. */ + CfreeDecodedOperand operands[CFREE_DECODE_MAX_OPERANDS]; + u64 arch[2]; /* Small arch-private payload. */ +} CfreeDecodedInsn; + +typedef struct ArchInsnFormatter ArchInsnFormatter; +typedef struct CfreeCg CfreeCg; +typedef struct EmuCPUState EmuCPUState; +typedef struct EmuLiftCtx EmuLiftCtx; +typedef struct EmuProcess EmuProcess; +typedef struct EmuThread EmuThread; +struct ArchInsnFormatter { + CfreeStatus (*format)(ArchInsnFormatter*, const CfreeDecodedInsn*, + CfreeInsn* out); + void (*destroy)(ArchInsnFormatter*); +}; + +typedef struct ArchDecodeOps { + u8 min_insn_len; + u8 max_insn_len; + + CfreeStatus (*decode_one)(Compiler*, const u8* bytes, size_t len, u64 pc, + CfreeDecodedInsn* out); + CfreeStatus (*decode_block)(Compiler*, const u8* bytes, size_t len, u64 pc, + CfreeDecodedInsn* out, u32 cap, u32* n_out); + + ArchInsnFormatter* (*formatter_new)(Compiler*); + CfreeStatus (*format)(ArchInsnFormatter*, const CfreeDecodedInsn*, + CfreeInsn* out); + void (*formatter_free)(ArchInsnFormatter*); +} ArchDecodeOps; + +typedef struct ArchEmuOps { + EmuCPUState* (*cpu_new)(Compiler*, u64 initial_pc, u64 initial_sp); + CfreeCgTypeId (*cpu_type)(Compiler*); + CfreeCgTypeId (*block_fn_type)(Compiler*); + CfreeStatus (*lift_block)(Compiler*, CfreeCg*, const CfreeDecodedInsn*, u32 n, + const EmuLiftCtx*); + u64 (*get_gpr)(EmuThread*, u32 reg); + void (*set_gpr)(EmuThread*, u32 reg, u64 value); + u64 (*get_syscall_no)(EmuThread*); + u64 (*get_syscall_arg)(EmuThread*, u32 index); + void (*set_syscall_result)(EmuThread*, u64 value); + u64 (*get_sp)(EmuThread*); + void (*set_sp)(EmuThread*, u64 value); + u64 (*get_tp)(EmuThread*); + void (*set_tp)(EmuThread*, u64 value); + u64 (*signal_context_size)(EmuProcess*, EmuThread*); + CfreeStatus (*save_signal_context)(EmuProcess*, EmuThread*, u8* dst, + u64 size); + CfreeStatus (*restore_signal_context)(EmuProcess*, EmuThread*, + const u8* src, u64 size); + CfreeStatus (*set_signal_handler_args)(EmuProcess*, EmuThread*, int signo, + u64 siginfo, u64 ucontext); + u64 (*signal_stack_align)(EmuProcess*, EmuThread*); + u32 import_thunk_size; + CfreeStatus (*emit_import_thunk)(EmuProcess*, u64 thunk_vaddr); + void* (*resolve_runtime_helper)(void* emu, CfreeSlice name); +} ArchEmuOps; + typedef struct LinkArchDesc LinkArchDesc; typedef struct ArchDwarfOps { @@ -1108,6 +1206,8 @@ typedef struct ArchImpl { ArchDisasm* (*disasm_new)(Compiler*); int (*apply_label_fixup)(Compiler*, const ArchLabelFixup*); + const ArchDecodeOps* decode; + const ArchEmuOps* emu; const LinkArchDesc* link; const ArchDwarfOps* dwarf; const ArchDbgOps* dbg; @@ -1150,5 +1250,13 @@ ArchDisasm* arch_disasm_new(Compiler*); u32 arch_disasm_decode(ArchDisasm*, const u8* bytes, size_t len, u64 vaddr, CfreeInsn* out); void arch_disasm_free(ArchDisasm*); +CfreeStatus arch_decode_one(Compiler*, const u8* bytes, size_t len, u64 pc, + CfreeDecodedInsn* out); +CfreeStatus arch_decode_block(Compiler*, const u8* bytes, size_t len, u64 pc, + CfreeDecodedInsn* out, u32 cap, u32* n_out); +ArchInsnFormatter* arch_insn_formatter_new(Compiler*); +CfreeStatus arch_format_insn(ArchInsnFormatter*, const CfreeDecodedInsn*, + CfreeInsn* out); +void arch_insn_formatter_free(ArchInsnFormatter*); #endif diff --git a/src/arch/disasm.c b/src/arch/disasm.c @@ -23,3 +23,47 @@ void arch_disasm_free(ArchDisasm* d) { if (!d) return; if (d->destroy) d->destroy(d); } + +CfreeStatus arch_decode_one(Compiler* c, const u8* bytes, size_t len, u64 pc, + CfreeDecodedInsn* out) { + const ArchImpl* arch; + if (!c || !bytes || !out) return CFREE_INVALID; + arch = arch_for_compiler(c); + if (!arch || !arch->decode || !arch->decode->decode_one) + return CFREE_UNSUPPORTED; + return arch->decode->decode_one(c, bytes, len, pc, out); +} + +CfreeStatus arch_decode_block(Compiler* c, const u8* bytes, size_t len, u64 pc, + CfreeDecodedInsn* out, u32 cap, u32* n_out) { + const ArchImpl* arch; + if (n_out) *n_out = 0; + if (!c || !bytes || !out || !n_out) return CFREE_INVALID; + arch = arch_for_compiler(c); + if (!arch || !arch->decode || !arch->decode->decode_block) + return CFREE_UNSUPPORTED; + return arch->decode->decode_block(c, bytes, len, pc, out, cap, n_out); +} + +ArchInsnFormatter* arch_insn_formatter_new(Compiler* c) { + const ArchImpl* arch = arch_for_compiler(c); + if (arch && arch->decode && arch->decode->formatter_new) { + return arch->decode->formatter_new(c); + } + { + SrcLoc loc = {0, 0, 0}; + compiler_panic(c, loc, "arch_insn_formatter_new: unsupported target arch %d", + (int)c->target.arch); + } +} + +CfreeStatus arch_format_insn(ArchInsnFormatter* f, const CfreeDecodedInsn* insn, + CfreeInsn* out) { + if (!f || !insn || !out) return CFREE_INVALID; + return f->format(f, insn, out); +} + +void arch_insn_formatter_free(ArchInsnFormatter* f) { + if (!f) return; + if (f->destroy) f->destroy(f); +} diff --git a/src/arch/rv64/arch.c b/src/arch/rv64/arch.c @@ -10,6 +10,7 @@ extern const LinkArchDesc link_arch_rv64; extern const ArchDbgOps rv64_dbg_ops; +extern const ArchEmuOps rv64_emu_ops; static const ArchDwarfOps rv64_dwarf_ops = { .min_inst_len = 4u, @@ -144,6 +145,8 @@ const ArchImpl arch_impl_rv64 = { .asm_new = rv64_arch_asm_new, .disasm_new = rv64_disasm_new, .apply_label_fixup = rv64_apply_label_fixup, + .decode = &rv64_decode_ops, + .emu = &rv64_emu_ops, .link = &link_arch_rv64, .dwarf = &rv64_dwarf_ops, .dbg = &rv64_dbg_ops, diff --git a/src/arch/rv64/disasm.c b/src/arch/rv64/disasm.c @@ -18,9 +18,10 @@ #define RV64_DASM_MNEM_CAP 16u #define RV64_DASM_OPS_CAP 96u #define RV64_DASM_ANN_CAP 64u +#define RV64_ENCODING_UNKNOWN 0xffffffffu -typedef struct Rv64Disasm { - ArchDisasm base; +typedef struct Rv64InsnFormatter { + ArchInsnFormatter base; Compiler* c; Heap* heap; char mnem_buf[RV64_DASM_MNEM_CAP]; @@ -29,8 +30,17 @@ typedef struct Rv64Disasm { StrBuf mnem; StrBuf ops; StrBuf ann; +} Rv64InsnFormatter; + +typedef struct Rv64Disasm { + ArchDisasm base; + Rv64InsnFormatter fmt; } Rv64Disasm; +static CfreeStatus rv64_format_insn(ArchInsnFormatter*, + const CfreeDecodedInsn*, CfreeInsn*); +static void rv64_formatter_destroy(ArchInsnFormatter*); + static u32 rv_read_u32_le(const u8* b) { return (u32)b[0] | ((u32)b[1] << 8) | ((u32)b[2] << 16) | ((u32)b[3] << 24); @@ -40,66 +50,389 @@ static u32 rv_read_u16_le(const u8* b) { return (u32)b[0] | ((u32)b[1] << 8); } -static void rv_emit_fallback32(Rv64Disasm* d, u32 word) { - strbuf_reset(&d->mnem); - strbuf_puts(&d->mnem, ".word"); - strbuf_reset(&d->ops); - strbuf_put_hex_u64(&d->ops, (u64)word); +static void rv_fmt_emit_fallback32(Rv64InsnFormatter* f, u32 word) { + strbuf_reset(&f->mnem); + strbuf_puts(&f->mnem, ".word"); + strbuf_reset(&f->ops); + strbuf_put_hex_u64(&f->ops, (u64)word); } -static void rv_emit_fallback16(Rv64Disasm* d, u32 hw) { - strbuf_reset(&d->mnem); - strbuf_puts(&d->mnem, ".hword"); - strbuf_reset(&d->ops); - strbuf_put_hex_u64(&d->ops, (u64)hw); +static void rv_fmt_emit_fallback16(Rv64InsnFormatter* f, u32 hw) { + strbuf_reset(&f->mnem); + strbuf_puts(&f->mnem, ".hword"); + strbuf_reset(&f->ops); + strbuf_put_hex_u64(&f->ops, (u64)hw); } -static u32 rv_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr, - CfreeInsn* out) { - Rv64Disasm* d = (Rv64Disasm*)base; - if (len < 2u) return 0; - u32 first_hw = rv_read_u16_le(bytes); - u32 nbytes; - if ((first_hw & 3u) != 3u) { - /* 16-bit compressed instruction. */ - const Rv64InsnDesc* desc = rv64_disasm_find_c(first_hw); - if (desc) { - strbuf_reset(&d->mnem); - strbuf_put_slice(&d->mnem, desc->mnemonic); - strbuf_reset(&d->ops); - rv64_print_operands(&d->ops, desc, first_hw, vaddr); - } else { - rv_emit_fallback16(d, first_hw); +static u32 rv64_desc_encoding_id(const Rv64InsnDesc* desc) { + u32 i; + if (!desc) return RV64_ENCODING_UNKNOWN; + for (i = 0; i < rv64_insn_table_n; ++i) { + if (desc == &rv64_insn_table[i]) return i; + } + return RV64_ENCODING_UNKNOWN; +} + +static u32 rv64_semantic_opcode(u32 word, u32 nbytes) { + u32 op, funct3, funct7; + if (nbytes != 4u) return RV64_DEC_UNKNOWN; + if (word == rv_ecall()) return RV64_DEC_ECALL; + if (word == rv_ebreak()) return RV64_DEC_EBREAK; + op = word & 0x7fu; + funct3 = (word >> 12) & 0x7u; + funct7 = (word >> 25) & 0x7fu; + if (op == RV_OP_IMM && funct3 == 0u) return RV64_DEC_ADDI; + if (op == RV_OP && funct3 == 0u && funct7 == 0u) return RV64_DEC_ADD; + if (op == RV_AUIPC) return RV64_DEC_AUIPC; + if (op == RV_LOAD && funct3 == 3u) return RV64_DEC_LD; + if (op == RV_STORE && funct3 == 3u) return RV64_DEC_SD; + if (op == RV_JALR && funct3 == 0u) return RV64_DEC_JALR; + return RV64_DEC_UNKNOWN; +} + +static void rv_decop_none(CfreeDecodedOperand* o) { + memset(o, 0, sizeof(*o)); + o->kind = CFREE_DECOP_NONE; + o->index_reg = REG_NONE; +} + +static void rv_decop_reg(CfreeDecodedOperand* o, u32 reg, u8 width_bits) { + rv_decop_none(o); + o->kind = CFREE_DECOP_REG; + o->width_bits = width_bits; + o->reg = reg; +} + +static void rv_decop_imm(CfreeDecodedOperand* o, i64 imm) { + rv_decop_none(o); + o->kind = CFREE_DECOP_IMM; + o->imm = imm; +} + +static void rv_decop_sysreg(CfreeDecodedOperand* o, u32 reg) { + rv_decop_none(o); + o->kind = CFREE_DECOP_SYSREG; + o->reg = reg; +} + +static void rv_decop_mem(CfreeDecodedOperand* o, u32 base, i64 imm, + u8 width_bits) { + rv_decop_none(o); + o->kind = CFREE_DECOP_MEM; + o->width_bits = width_bits; + o->reg = base; + o->imm = imm; +} + +static void rv_decop_pcrel(CfreeDecodedOperand* o, u64 pc, i64 disp) { + rv_decop_none(o); + o->kind = CFREE_DECOP_PCREL; + o->imm = (i64)(pc + (u64)disp); +} + +static u8 rv_load_width_bits(u32 funct3) { + switch (funct3 & 7u) { + case 0: + case 4: + return 8; + case 1: + case 5: + return 16; + case 2: + case 6: + return 32; + case 3: + return 64; + default: + return 0; + } +} + +static u16 rv64_decode_flags(const Rv64InsnDesc* desc, u32 word) { + u16 flags = 0; + Rv64Format fmt; + if (!desc) return 0; + fmt = (Rv64Format)desc->fmt; + switch (fmt) { + case RV64_FMT_B: + case RV64_FMT_CB: + case RV64_FMT_CJ: + flags |= CFREE_DECODE_TERMINATOR | CFREE_DECODE_BRANCH; + break; + case RV64_FMT_J: + flags |= CFREE_DECODE_TERMINATOR | CFREE_DECODE_BRANCH; + if (((word >> 7) & 0x1fu) == RV_RA) flags |= CFREE_DECODE_CALL; + break; + case RV64_FMT_JALR: { + u32 rd = (word >> 7) & 0x1fu; + u32 rs1 = (word >> 15) & 0x1fu; + flags |= CFREE_DECODE_TERMINATOR | CFREE_DECODE_BRANCH; + if (rd == RV_RA) flags |= CFREE_DECODE_CALL; + if (rd == RV_ZERO && rs1 == RV_RA) flags |= CFREE_DECODE_RET; + break; } - nbytes = 2; - } else { - if (len < 4u) return 0; - u32 word = rv_read_u32_le(bytes); - const Rv64InsnDesc* desc = rv64_disasm_find(word); - if (desc) { - strbuf_reset(&d->mnem); - strbuf_put_slice(&d->mnem, desc->mnemonic); - strbuf_reset(&d->ops); - rv64_print_operands(&d->ops, desc, word, vaddr); - } else { - rv_emit_fallback32(d, word); + case RV64_FMT_CR: + if (slice_eq_cstr(desc->mnemonic, "c.jr") || + slice_eq_cstr(desc->mnemonic, "c.jalr")) { + flags |= CFREE_DECODE_TERMINATOR | CFREE_DECODE_BRANCH; + if (slice_eq_cstr(desc->mnemonic, "c.jalr")) flags |= CFREE_DECODE_CALL; + } + break; + case RV64_FMT_SYSTEM: + if (word == rv_ecall() || word == rv_ebreak()) + flags |= CFREE_DECODE_TERMINATOR | CFREE_DECODE_TRAP; + break; + case RV64_FMT_C_NONE: + if ((word & 0xffffu) == 0x9002u) + flags |= CFREE_DECODE_TERMINATOR | CFREE_DECODE_TRAP; + break; + case RV64_FMT_LOAD: + case RV64_FMT_STORE: + case RV64_FMT_FP_LOAD: + case RV64_FMT_FP_STORE: + case RV64_FMT_AMO: + case RV64_FMT_LR: + case RV64_FMT_CL: + case RV64_FMT_CS: + case RV64_FMT_CSS: + flags |= CFREE_DECODE_MEMORY; + break; + default: + break; + } + return flags; +} + +static void rv64_decode_operands(const Rv64InsnDesc* desc, u32 word, u64 pc, + CfreeDecodedInsn* out) { + Rv64Format fmt; + if (!desc) return; + fmt = (Rv64Format)desc->fmt; + switch (fmt) { + case RV64_FMT_R: + case RV64_FMT_FP_R: + case RV64_FMT_FP_RM: { + Rv64R r = rv64_r_unpack(word); + out->noperands = 3; + rv_decop_reg(&out->operands[0], r.rd, 64); + rv_decop_reg(&out->operands[1], r.rs1, 64); + rv_decop_reg(&out->operands[2], r.rs2, 64); + break; + } + case RV64_FMT_I: { + Rv64I i = rv64_i_unpack(word); + out->noperands = 3; + rv_decop_reg(&out->operands[0], i.rd, 64); + rv_decop_reg(&out->operands[1], i.rs1, 64); + rv_decop_imm(&out->operands[2], rv64_sext(i.imm12, 12)); + break; + } + case RV64_FMT_I_SHIFT: + case RV64_FMT_I_SHIFTW: { + Rv64I i = rv64_i_unpack(word); + out->noperands = 3; + rv_decop_reg(&out->operands[0], i.rd, 64); + rv_decop_reg(&out->operands[1], i.rs1, 64); + rv_decop_imm(&out->operands[2], + fmt == RV64_FMT_I_SHIFTW ? (i.imm12 & 0x1f) + : (i.imm12 & 0x3f)); + break; } - nbytes = 4; + case RV64_FMT_LOAD: + case RV64_FMT_FP_LOAD: { + Rv64I i = rv64_i_unpack(word); + out->noperands = 2; + rv_decop_reg(&out->operands[0], i.rd, 64); + rv_decop_mem(&out->operands[1], i.rs1, rv64_sext(i.imm12, 12), + rv_load_width_bits(i.funct3)); + break; + } + case RV64_FMT_S: + case RV64_FMT_STORE: + case RV64_FMT_FP_STORE: { + Rv64S s = rv64_s_unpack(word); + out->noperands = 2; + rv_decop_reg(&out->operands[0], s.rs2, 64); + rv_decop_mem(&out->operands[1], s.rs1, rv64_sext(s.imm12, 12), + rv_load_width_bits(s.funct3)); + break; + } + case RV64_FMT_B: { + Rv64B b = rv64_b_unpack(word); + out->noperands = 3; + rv_decop_reg(&out->operands[0], b.rs1, 64); + rv_decop_reg(&out->operands[1], b.rs2, 64); + rv_decop_pcrel(&out->operands[2], pc, rv64_sext(b.imm13, 13)); + break; + } + case RV64_FMT_U: { + Rv64U u = rv64_u_unpack(word); + out->noperands = 2; + rv_decop_reg(&out->operands[0], u.rd, 64); + rv_decop_imm(&out->operands[1], (i64)(i32)u.imm32_hi20); + break; + } + case RV64_FMT_J: { + Rv64J j = rv64_j_unpack(word); + out->noperands = 2; + rv_decop_reg(&out->operands[0], j.rd, 64); + rv_decop_pcrel(&out->operands[1], pc, rv64_sext(j.imm21, 21)); + break; + } + case RV64_FMT_JALR: { + Rv64I i = rv64_i_unpack(word); + out->noperands = 2; + rv_decop_reg(&out->operands[0], i.rd, 64); + rv_decop_mem(&out->operands[1], i.rs1, rv64_sext(i.imm12, 12), 64); + break; + } + case RV64_FMT_CSR: { + Rv64I i = rv64_i_unpack(word); + out->noperands = 3; + rv_decop_reg(&out->operands[0], i.rd, 64); + rv_decop_sysreg(&out->operands[1], i.imm12); + rv_decop_reg(&out->operands[2], i.rs1, 64); + break; + } + case RV64_FMT_CSRI: { + Rv64I i = rv64_i_unpack(word); + out->noperands = 3; + rv_decop_reg(&out->operands[0], i.rd, 64); + rv_decop_sysreg(&out->operands[1], i.imm12); + rv_decop_imm(&out->operands[2], (i64)i.rs1); + break; + } + default: + break; + } +} + +static CfreeStatus rv64_decode_one(Compiler* c, const u8* bytes, size_t len, + u64 pc, CfreeDecodedInsn* out) { + const Rv64InsnDesc* desc; + u32 first_hw; + u32 word; + u32 encoding_id; + (void)c; + if (!bytes || !out) return CFREE_INVALID; + if (len < 2u) return CFREE_MALFORMED; + memset(out, 0, sizeof(*out)); + for (u32 i = 0; i < CFREE_DECODE_MAX_OPERANDS; ++i) + rv_decop_none(&out->operands[i]); + + first_hw = rv_read_u16_le(bytes); + if ((first_hw & 3u) != 3u) { + word = first_hw; + desc = rv64_disasm_find_c(first_hw); + out->nbytes = 2; + } else { + if (len < 4u) return CFREE_MALFORMED; + word = rv_read_u32_le(bytes); + desc = rv64_disasm_find(word); + out->nbytes = 4; } - strbuf_reset(&d->ann); - out->vaddr = vaddr; + encoding_id = rv64_desc_encoding_id(desc); + out->pc = pc; out->bytes = bytes; - out->nbytes = nbytes; - out->mnemonic = strbuf_slice(&d->mnem); - out->operands = strbuf_slice(&d->ops); - out->annotation = strbuf_slice(&d->ann); - return nbytes; + out->encoding_id = encoding_id; + out->opcode = rv64_semantic_opcode(word, out->nbytes); + out->flags = rv64_decode_flags(desc, word); + out->arch[0] = word; + out->arch[1] = desc ? desc->fmt : 0xffu; + rv64_decode_operands(desc, word, pc, out); + return CFREE_OK; +} + +static CfreeStatus rv64_decode_block(Compiler* c, const u8* bytes, size_t len, + u64 pc, CfreeDecodedInsn* out, u32 cap, + u32* n_out) { + u32 n = 0; + if (n_out) *n_out = 0; + if (!bytes || !out || !n_out) return CFREE_INVALID; + while (n < cap && len > 0) { + CfreeStatus st = rv64_decode_one(c, bytes, len, pc, &out[n]); + if (st != CFREE_OK) return n ? CFREE_OK : st; + bytes += out[n].nbytes; + len -= out[n].nbytes; + pc += out[n].nbytes; + ++n; + if (out[n - 1u].flags & CFREE_DECODE_TERMINATOR) break; + } + *n_out = n; + return CFREE_OK; +} + +static void rv64_formatter_init(Rv64InsnFormatter* f, Compiler* c, Heap* h) { + memset(f, 0, sizeof(*f)); + f->c = c; + f->heap = h; + f->base.format = rv64_format_insn; + f->base.destroy = rv64_formatter_destroy; + strbuf_init(&f->mnem, f->mnem_buf, sizeof f->mnem_buf); + strbuf_init(&f->ops, f->ops_buf, sizeof f->ops_buf); + strbuf_init(&f->ann, f->ann_buf, sizeof f->ann_buf); +} + +static CfreeStatus rv64_format_insn(ArchInsnFormatter* base, + const CfreeDecodedInsn* insn, + CfreeInsn* out) { + Rv64InsnFormatter* f = (Rv64InsnFormatter*)base; + const Rv64InsnDesc* desc; + u32 word; + if (!f || !insn || !out) return CFREE_INVALID; + word = (u32)insn->arch[0]; + desc = insn->nbytes == 2u ? rv64_disasm_find_c(word) : rv64_disasm_find(word); + if (desc) { + strbuf_reset(&f->mnem); + strbuf_put_slice(&f->mnem, desc->mnemonic); + strbuf_reset(&f->ops); + rv64_print_operands(&f->ops, desc, word, insn->pc); + } else if (insn->nbytes == 2u) { + rv_fmt_emit_fallback16(f, word); + } else { + rv_fmt_emit_fallback32(f, word); + } + + strbuf_reset(&f->ann); + out->vaddr = insn->pc; + out->bytes = insn->bytes; + out->nbytes = insn->nbytes; + out->mnemonic = strbuf_slice(&f->mnem); + out->operands = strbuf_slice(&f->ops); + out->annotation = strbuf_slice(&f->ann); + return CFREE_OK; +} + +static void rv64_formatter_destroy(ArchInsnFormatter* base) { + Rv64InsnFormatter* f = (Rv64InsnFormatter*)base; + if (!f) return; + f->heap->free(f->heap, f, sizeof(*f)); +} + +static ArchInsnFormatter* rv64_formatter_new(Compiler* c) { + Heap* h = (Heap*)c->ctx->heap; + Rv64InsnFormatter* f = + (Rv64InsnFormatter*)h->alloc(h, sizeof(*f), _Alignof(Rv64InsnFormatter)); + if (!f) return NULL; + rv64_formatter_init(f, c, h); + return &f->base; +} + +static u32 rv_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr, + CfreeInsn* out) { + Rv64Disasm* d = (Rv64Disasm*)base; + CfreeDecodedInsn insn; + CfreeStatus st = rv64_decode_one(d->fmt.c, bytes, len, vaddr, &insn); + if (st != CFREE_OK) return 0; + st = rv64_format_insn(&d->fmt.base, &insn, out); + if (st != CFREE_OK) return 0; + return insn.nbytes; } static void rv64_destroy(ArchDisasm* base) { Rv64Disasm* d = (Rv64Disasm*)base; - d->heap->free(d->heap, d, sizeof(*d)); + d->fmt.heap->free(d->fmt.heap, d, sizeof(*d)); } ArchDisasm* rv64_disasm_new(Compiler* c) { @@ -107,12 +440,18 @@ ArchDisasm* rv64_disasm_new(Compiler* c) { Rv64Disasm* d = (Rv64Disasm*)h->alloc(h, sizeof(*d), _Alignof(Rv64Disasm)); if (!d) return NULL; memset(d, 0, sizeof(*d)); - d->c = c; - d->heap = h; d->base.decode = rv_decode; d->base.destroy = rv64_destroy; - strbuf_init(&d->mnem, d->mnem_buf, sizeof d->mnem_buf); - strbuf_init(&d->ops, d->ops_buf, sizeof d->ops_buf); - strbuf_init(&d->ann, d->ann_buf, sizeof d->ann_buf); + rv64_formatter_init(&d->fmt, c, h); return &d->base; } + +const ArchDecodeOps rv64_decode_ops = { + .min_insn_len = 2, + .max_insn_len = 4, + .decode_one = rv64_decode_one, + .decode_block = rv64_decode_block, + .formatter_new = rv64_formatter_new, + .format = rv64_format_insn, + .formatter_free = rv64_formatter_destroy, +}; diff --git a/src/arch/rv64/disasm.h b/src/arch/rv64/disasm.h @@ -10,5 +10,6 @@ #include "arch/arch.h" ArchDisasm* rv64_disasm_new(Compiler*); +extern const ArchDecodeOps rv64_decode_ops; #endif diff --git a/src/arch/rv64/emu.c b/src/arch/rv64/emu.c @@ -0,0 +1,516 @@ +#include "emu/emu.h" + +#include <string.h> + +#include "arch/arch.h" +#include "arch/rv64/isa.h" +#include "core/slice.h" + +#define RV64_EMU_SYM_XREG "__emu_rv64_xreg" +#define RV64_EMU_SYM_SET_XREG "__emu_rv64_set_xreg" +#define RV64_EMU_SYM_JALR "__emu_rv64_jalr" + +typedef struct Rv64EmuCPUState { + u64 x[32]; + u64 f[32]; + u32 fcsr; + u64 reserved_addr; + int has_reservation; +} Rv64EmuCPUState; + +typedef struct Rv64EmuLiftSyms { + CfreeCgSym xreg; + CfreeCgSym set_xreg; + CfreeCgSym load64; + CfreeCgSym load64_checked; + CfreeCgSym store64; + CfreeCgSym jalr; + CfreeCgSym syscall; + CfreeCgTypeId xreg_fn; + CfreeCgTypeId set_xreg_fn; + CfreeCgTypeId load64_fn; + CfreeCgTypeId load64_checked_fn; + CfreeCgTypeId store64_fn; + CfreeCgTypeId jalr_fn; + CfreeCgTypeId syscall_fn; + CfreeCgTypeId thread_ptr; + CfreeCgTypeId i32; + CfreeCgTypeId i64; + CfreeCgTypeId i64_ptr; + CfreeCgTypeId void_ty; +} Rv64EmuLiftSyms; + +static CfreeCgSym rv64_emu_decl_helper(CfreeCompiler* c, CfreeCg* cg, + const char* name, CfreeCgTypeId type) { + CfreeCgDecl d; + memset(&d, 0, sizeof(d)); + d.kind = CFREE_CG_DECL_FUNC; + d.linkage_name = cfree_sym_intern(c, cfree_slice_cstr(name)); + d.display_name = d.linkage_name; + d.linkage_name = cfree_cg_c_linkage_name(c, d.linkage_name); + d.type = type; + d.sym.bind = CFREE_SB_GLOBAL; + d.sym.visibility = CFREE_CG_VIS_DEFAULT; + return cfree_cg_decl(cg, d); +} + +static CfreeCgTypeId rv64_emu_func_type(CfreeCompiler* c, CfreeCgTypeId ret, + const CfreeCgTypeId* params, + u32 nparams) { + CfreeCgFuncParam p[5]; + CfreeCgFuncSig sig; + u32 i; + memset(p, 0, sizeof(p)); + for (i = 0; i < nparams; ++i) p[i].type = params[i]; + memset(&sig, 0, sizeof(sig)); + sig.ret = ret; + sig.params = p; + sig.nparams = nparams; + sig.call_conv = CFREE_CG_CC_TARGET_C; + return cfree_cg_type_func(c, sig); +} + +static void rv64_emu_lift_syms_init(CfreeCompiler* c, CfreeCg* cg, + Rv64EmuLiftSyms* out) { + CfreeCgBuiltinTypes bi = cfree_cg_builtin_types(c); + CfreeCgTypeId params[5]; + memset(out, 0, sizeof(*out)); + out->void_ty = bi.id[CFREE_CG_BUILTIN_VOID]; + out->i32 = bi.id[CFREE_CG_BUILTIN_I32]; + out->i64 = bi.id[CFREE_CG_BUILTIN_I64]; + out->i64_ptr = cfree_cg_type_ptr(c, out->i64, 0); + out->thread_ptr = emu_thread_type((Compiler*)c); + + params[0] = out->thread_ptr; + params[1] = out->i32; + out->xreg_fn = rv64_emu_func_type(c, out->i64, params, 2); + + params[0] = out->thread_ptr; + params[1] = out->i32; + params[2] = out->i64; + out->set_xreg_fn = rv64_emu_func_type(c, out->void_ty, params, 3); + + params[0] = out->thread_ptr; + params[1] = out->i64; + out->load64_fn = rv64_emu_func_type(c, out->i64, params, 2); + + { + CfreeCgTypeId load_params[5]; + load_params[0] = out->thread_ptr; + load_params[1] = out->i64; + load_params[2] = out->i64; + load_params[3] = out->i64; + load_params[4] = out->i64_ptr; + out->load64_checked_fn = rv64_emu_func_type(c, out->i64, load_params, 5); + } + + { + CfreeCgTypeId store_params[5]; + store_params[0] = out->thread_ptr; + store_params[1] = out->i64; + store_params[2] = out->i64; + store_params[3] = out->i64; + store_params[4] = out->i64; + out->store64_fn = rv64_emu_func_type(c, out->i64, store_params, 5); + } + + { + CfreeCgTypeId jalr_params[5]; + jalr_params[0] = out->thread_ptr; + jalr_params[1] = out->i64; + jalr_params[2] = out->i64; + jalr_params[3] = out->i64; + jalr_params[4] = out->i64; + out->jalr_fn = rv64_emu_func_type(c, out->i64, jalr_params, 5); + } + + params[0] = out->thread_ptr; + params[1] = out->i64; + out->syscall_fn = rv64_emu_func_type(c, out->i64, params, 2); + + out->xreg = rv64_emu_decl_helper(c, cg, RV64_EMU_SYM_XREG, out->xreg_fn); + out->set_xreg = + rv64_emu_decl_helper(c, cg, RV64_EMU_SYM_SET_XREG, out->set_xreg_fn); + out->load64 = rv64_emu_decl_helper(c, cg, EMU_SYM_LOAD64, out->load64_fn); + out->load64_checked = rv64_emu_decl_helper(c, cg, EMU_SYM_LOAD64_CHECKED, + out->load64_checked_fn); + out->store64 = + rv64_emu_decl_helper(c, cg, EMU_SYM_STORE64, out->store64_fn); + out->jalr = rv64_emu_decl_helper(c, cg, RV64_EMU_SYM_JALR, out->jalr_fn); + out->syscall = rv64_emu_decl_helper(c, cg, EMU_SYM_SYSCALL, out->syscall_fn); +} + +static CfreeCgMemAccess rv64_emu_mem(CfreeCgTypeId type) { + CfreeCgMemAccess m; + memset(&m, 0, sizeof(m)); + m.type = type; + return m; +} + +static void rv64_emu_push_thread(CfreeCg* cg, CfreeCgLocal thread, + CfreeCgTypeId thread_ptr) { + cfree_cg_push_local(cg, thread); + cfree_cg_load(cg, rv64_emu_mem(thread_ptr), (CfreeCgEffAddr){0, 0}); +} + +static void rv64_emu_push_xreg(CfreeCg* cg, const Rv64EmuLiftSyms* s, + CfreeCgLocal thread, u32 reg) { + if (reg == 0u) { + cfree_cg_push_int(cg, 0, s->i64); + return; + } + rv64_emu_push_thread(cg, thread, s->thread_ptr); + cfree_cg_push_int(cg, reg, s->i32); + cfree_cg_call_symbol(cg, s->xreg, 2, (CfreeCgCallAttrs){0}); +} + +static void rv64_emu_store_xreg_from_tmp(CfreeCg* cg, const Rv64EmuLiftSyms* s, + CfreeCgLocal thread, CfreeCgLocal tmp, + u32 reg) { + if (reg == 0u) return; + rv64_emu_push_thread(cg, thread, s->thread_ptr); + cfree_cg_push_int(cg, reg, s->i32); + cfree_cg_push_local(cg, tmp); + cfree_cg_load(cg, rv64_emu_mem(s->i64), (CfreeCgEffAddr){0, 0}); + cfree_cg_call_symbol(cg, s->set_xreg, 3, (CfreeCgCallAttrs){0}); +} + +static void rv64_emu_store_xreg_from_stack(CfreeCg* cg, + const Rv64EmuLiftSyms* s, + CfreeCgLocal thread, u32 reg, + CfreeCgLocal tmp) { + cfree_cg_push_local(cg, tmp); + cfree_cg_swap(cg); + cfree_cg_store(cg, rv64_emu_mem(s->i64), (CfreeCgEffAddr){0, 0}); + if (reg == 0u) return; + rv64_emu_store_xreg_from_tmp(cg, s, thread, tmp, reg); +} + +static void rv64_emu_store_local_from_stack(CfreeCg* cg, + const Rv64EmuLiftSyms* s, + CfreeCgLocal local) { + cfree_cg_push_local(cg, local); + cfree_cg_swap(cg); + cfree_cg_store(cg, rv64_emu_mem(s->i64), (CfreeCgEffAddr){0, 0}); +} + +static void rv64_emu_push_local_value(CfreeCg* cg, const Rv64EmuLiftSyms* s, + CfreeCgLocal local) { + cfree_cg_push_local(cg, local); + cfree_cg_load(cg, rv64_emu_mem(s->i64), (CfreeCgEffAddr){0, 0}); +} + +static void rv64_emu_push_addr(CfreeCg* cg, const Rv64EmuLiftSyms* s, + CfreeCgLocal thread, + const CfreeDecodedOperand* mem) { + rv64_emu_push_xreg(cg, s, thread, mem->reg); + if (mem->imm) { + cfree_cg_push_int(cg, (u64)mem->imm, s->i64); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + } +} + +static CfreeStatus rv64_emu_lift_block(Compiler* compiler, CfreeCg* cg, + const CfreeDecodedInsn* insts, u32 n, + const EmuLiftCtx* ctx) { + Rv64EmuLiftSyms syms; + CfreeCgLocal thread; + CfreeCgLocal tmp; + CfreeCgLocal fault_next; + CfreeCgLocalAttrs attrs; + u64 next_pc; + u32 i; + CfreeCompiler* c; + + if (!compiler || !cg || !insts || !ctx) return CFREE_INVALID; + c = (CfreeCompiler*)compiler; + rv64_emu_lift_syms_init(c, cg, &syms); + + cfree_cg_func_begin(cg, ctx->block_sym); + memset(&attrs, 0, sizeof(attrs)); + attrs.name = cfree_sym_intern(c, CFREE_SLICE_LIT("thread")); + thread = cfree_cg_param(cg, 0, syms.thread_ptr, attrs); + attrs.name = cfree_sym_intern(c, CFREE_SLICE_LIT("tmp")); + tmp = cfree_cg_local(cg, syms.i64, attrs); + attrs.name = cfree_sym_intern(c, CFREE_SLICE_LIT("fault_next")); + fault_next = cfree_cg_local(cg, syms.i64, attrs); + + next_pc = ctx->guest_pc; + for (i = 0; i < n; ++i) { + const CfreeDecodedInsn* in = &insts[i]; + next_pc = in->pc + in->nbytes; + switch (in->opcode) { + case RV64_DEC_ADDI: { + u32 rd = in->operands[0].reg; + u32 rs1 = in->operands[1].reg; + i64 imm = in->operands[2].imm; + rv64_emu_push_xreg(cg, &syms, thread, rs1); + cfree_cg_push_int(cg, (u64)imm, syms.i64); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + rv64_emu_store_xreg_from_stack(cg, &syms, thread, rd, tmp); + break; + } + case RV64_DEC_ADD: { + u32 rd = in->operands[0].reg; + u32 rs1 = in->operands[1].reg; + u32 rs2 = in->operands[2].reg; + rv64_emu_push_xreg(cg, &syms, thread, rs1); + rv64_emu_push_xreg(cg, &syms, thread, rs2); + cfree_cg_int_binop(cg, CFREE_CG_INT_ADD, 0); + rv64_emu_store_xreg_from_stack(cg, &syms, thread, rd, tmp); + break; + } + case RV64_DEC_AUIPC: { + u32 rd = in->operands[0].reg; + i64 imm = in->operands[1].imm; + cfree_cg_push_int(cg, (u64)(in->pc + (u64)imm), syms.i64); + rv64_emu_store_xreg_from_stack(cg, &syms, thread, rd, tmp); + break; + } + case RV64_DEC_LD: { + u32 rd = in->operands[0].reg; + CfreeCgLabel ok = cfree_cg_label_new(cg); + rv64_emu_push_thread(cg, thread, syms.thread_ptr); + rv64_emu_push_addr(cg, &syms, thread, &in->operands[1]); + cfree_cg_push_int(cg, in->pc, syms.i64); + cfree_cg_push_int(cg, next_pc, syms.i64); + cfree_cg_push_local_addr(cg, tmp); + cfree_cg_call_symbol(cg, syms.load64_checked, 5, + (CfreeCgCallAttrs){0}); + rv64_emu_store_local_from_stack(cg, &syms, fault_next); + rv64_emu_push_local_value(cg, &syms, fault_next); + cfree_cg_push_int(cg, 0, syms.i64); + cfree_cg_int_cmp(cg, CFREE_CG_INT_NE); + cfree_cg_branch_false(cg, ok); + rv64_emu_push_local_value(cg, &syms, fault_next); + cfree_cg_ret(cg); + cfree_cg_label_place(cg, ok); + rv64_emu_store_xreg_from_tmp(cg, &syms, thread, tmp, rd); + break; + } + case RV64_DEC_SD: { + rv64_emu_push_thread(cg, thread, syms.thread_ptr); + rv64_emu_push_addr(cg, &syms, thread, &in->operands[1]); + rv64_emu_push_xreg(cg, &syms, thread, in->operands[0].reg); + cfree_cg_push_int(cg, in->pc, syms.i64); + cfree_cg_push_int(cg, next_pc, syms.i64); + cfree_cg_call_symbol(cg, syms.store64, 5, (CfreeCgCallAttrs){0}); + cfree_cg_ret(cg); + cfree_cg_func_end(cg); + return CFREE_OK; + } + case RV64_DEC_JALR: { + Rv64I ji = rv64_i_unpack(in->arch[0]); + u32 rd = ji.rd; + u32 rs1 = ji.rs1; + i64 imm = rv64_sext(ji.imm12, 12); + rv64_emu_push_thread(cg, thread, syms.thread_ptr); + cfree_cg_push_int(cg, rd, syms.i64); + cfree_cg_push_int(cg, rs1, syms.i64); + cfree_cg_push_int(cg, (u64)imm, syms.i64); + cfree_cg_push_int(cg, next_pc, syms.i64); + cfree_cg_call_symbol(cg, syms.jalr, 5, (CfreeCgCallAttrs){0}); + cfree_cg_ret(cg); + cfree_cg_func_end(cg); + return CFREE_OK; + break; + } + case RV64_DEC_ECALL: + rv64_emu_push_thread(cg, thread, syms.thread_ptr); + cfree_cg_push_int(cg, next_pc, syms.i64); + cfree_cg_call_symbol(cg, syms.syscall, 2, (CfreeCgCallAttrs){0}); + cfree_cg_ret(cg); + cfree_cg_func_end(cg); + return CFREE_OK; + default: + cfree_cg_push_int(cg, in->pc, syms.i64); + cfree_cg_ret(cg); + cfree_cg_func_end(cg); + return CFREE_OK; + } + } + + cfree_cg_push_int(cg, next_pc, syms.i64); + cfree_cg_ret(cg); + cfree_cg_func_end(cg); + return CFREE_OK; +} + +static EmuCPUState* rv64_emu_cpu_new(Compiler* c, u64 initial_pc, + u64 initial_sp) { + EmuCPUState* cpu = emu_cpu_new_with_arch_state( + c, CFREE_ARCH_RV64, initial_pc, sizeof(Rv64EmuCPUState), + _Alignof(Rv64EmuCPUState)); + Rv64EmuCPUState* rv = (Rv64EmuCPUState*)emu_cpu_arch_state(cpu); + if (rv) rv->x[2] = initial_sp; + return cpu; +} + +static Rv64EmuCPUState* rv64_thread_state(EmuThread* thread) { + return thread ? (Rv64EmuCPUState*)emu_cpu_arch_state(emu_thread_cpu(thread)) + : NULL; +} + +u64 emu_rv64_xreg(EmuThread* thread, u32 i) { + Rv64EmuCPUState* rv = rv64_thread_state(thread); + if (!rv || i >= 32u) return 0; + return i == 0u ? 0u : rv->x[i]; +} + +void emu_rv64_set_xreg(EmuThread* thread, u32 i, u64 v) { + Rv64EmuCPUState* rv = rv64_thread_state(thread); + if (!rv || i >= 32u || i == 0u) return; + rv->x[i] = v; +} + +static u64 rv64_get_syscall_no(EmuThread* thread) { + return emu_rv64_xreg(thread, 17u); +} + +static u64 rv64_get_syscall_arg(EmuThread* thread, u32 index) { + static const u32 regs[6] = {10u, 11u, 12u, 13u, 14u, 15u}; + return index < 6u ? emu_rv64_xreg(thread, regs[index]) : 0; +} + +static void rv64_set_syscall_result(EmuThread* thread, u64 value) { + emu_rv64_set_xreg(thread, 10u, value); +} + +static u64 rv64_get_sp(EmuThread* thread) { return emu_rv64_xreg(thread, 2u); } + +static void rv64_set_sp(EmuThread* thread, u64 value) { + emu_rv64_set_xreg(thread, 2u, value); +} + +static u64 rv64_get_tp(EmuThread* thread) { return emu_rv64_xreg(thread, 4u); } + +static void rv64_set_tp(EmuThread* thread, u64 value) { + emu_rv64_set_xreg(thread, 4u, value); +} + +static void rv64_signal_wr64(u8* p, u64 v) { + u32 i; + for (i = 0; i < 8u; ++i) p[i] = (u8)(v >> (8u * i)); +} + +static u64 rv64_signal_rd64(const u8* p) { + return (u64)p[0] | ((u64)p[1] << 8) | ((u64)p[2] << 16) | + ((u64)p[3] << 24) | ((u64)p[4] << 32) | ((u64)p[5] << 40) | + ((u64)p[6] << 48) | ((u64)p[7] << 56); +} + +static u64 rv64_signal_context_size(EmuProcess* process, EmuThread* thread) { + (void)process; + (void)thread; + return 32u * 8u; +} + +static CfreeStatus rv64_save_signal_context(EmuProcess* process, + EmuThread* thread, u8* dst, + u64 size) { + u32 i; + (void)process; + if (!thread || !dst || size < 32u * 8u) return CFREE_INVALID; + for (i = 0; i < 32u; ++i) + rv64_signal_wr64(dst + (u64)i * 8u, emu_rv64_xreg(thread, i)); + return CFREE_OK; +} + +static CfreeStatus rv64_restore_signal_context(EmuProcess* process, + EmuThread* thread, + const u8* src, u64 size) { + u32 i; + (void)process; + if (!thread || !src || size < 32u * 8u) return CFREE_INVALID; + for (i = 0; i < 32u; ++i) + emu_rv64_set_xreg(thread, i, rv64_signal_rd64(src + (u64)i * 8u)); + return CFREE_OK; +} + +static CfreeStatus rv64_set_signal_handler_args(EmuProcess* process, + EmuThread* thread, int signo, + u64 siginfo, u64 ucontext) { + (void)process; + if (!thread) return CFREE_INVALID; + emu_rv64_set_xreg(thread, 10u, (u64)signo); + emu_rv64_set_xreg(thread, 11u, siginfo); + emu_rv64_set_xreg(thread, 12u, ucontext); + return CFREE_OK; +} + +static u64 rv64_signal_stack_align(EmuProcess* process, EmuThread* thread) { + (void)process; + (void)thread; + return 16u; +} + +static CfreeStatus rv64_emit_import_thunk(EmuProcess* process, + u64 thunk_vaddr) { + u8 code[4]; + u32 word = 0x00008067u; + u32 i; + if (!process) return CFREE_INVALID; + for (i = 0; i < 4u; ++i) code[i] = (u8)(word >> (8u * i)); + return emu_addr_space_copy_in(&process->image.addr_space, thunk_vaddr, code, + sizeof(code)); +} + +u64 emu_rv64_jalr(EmuThread* thread, u64 rd, u64 rs1, u64 imm, u64 next_pc) { + EmuImportBinding* b = NULL; + u64 target; + if (rd != 0u) emu_rv64_set_xreg(thread, (u32)rd, next_pc); + target = emu_rv64_xreg(thread, (u32)rs1) + imm; + target &= ~1ull; + if (emu_dl_resolve_import_thunk(thread ? thread->process : NULL, target, + &b) == CFREE_OK && + b) { + u64 args[3]; + u64 result = 0; + args[0] = emu_rv64_xreg(thread, 10u); + args[1] = emu_rv64_xreg(thread, 11u); + args[2] = emu_rv64_xreg(thread, 12u); + if (emu_call_host_import(thread, b, args, 3u, &result) != CFREE_OK) { + emu_cpu_trap_fault(emu_thread_cpu(thread)); + return next_pc; + } + if (b->signature.result != CFREE_EMU_VALUE_VOID) + emu_rv64_set_xreg(thread, 10u, result); + return next_pc; + } + return target; +} + +static void* rv64_resolve_runtime_helper(void* emu, CfreeSlice name) { + (void)emu; + if (cfree_slice_eq_cstr(name, RV64_EMU_SYM_XREG)) + return (void*)emu_rv64_xreg; + if (cfree_slice_eq_cstr(name, RV64_EMU_SYM_SET_XREG)) + return (void*)emu_rv64_set_xreg; + if (cfree_slice_eq_cstr(name, RV64_EMU_SYM_JALR)) + return (void*)emu_rv64_jalr; + return NULL; +} + +const ArchEmuOps rv64_emu_ops = { + .cpu_new = rv64_emu_cpu_new, + .cpu_type = emu_cpu_type, + .block_fn_type = emu_block_fn_type, + .lift_block = rv64_emu_lift_block, + .get_gpr = emu_rv64_xreg, + .set_gpr = emu_rv64_set_xreg, + .get_syscall_no = rv64_get_syscall_no, + .get_syscall_arg = rv64_get_syscall_arg, + .set_syscall_result = rv64_set_syscall_result, + .get_sp = rv64_get_sp, + .set_sp = rv64_set_sp, + .get_tp = rv64_get_tp, + .set_tp = rv64_set_tp, + .signal_context_size = rv64_signal_context_size, + .save_signal_context = rv64_save_signal_context, + .restore_signal_context = rv64_restore_signal_context, + .set_signal_handler_args = rv64_set_signal_handler_args, + .signal_stack_align = rv64_signal_stack_align, + .import_thunk_size = 4u, + .emit_import_thunk = rv64_emit_import_thunk, + .resolve_runtime_helper = rv64_resolve_runtime_helper, +}; diff --git a/src/arch/rv64/isa.h b/src/arch/rv64/isa.h @@ -385,6 +385,18 @@ typedef enum Rv64Format { RV64_FMT_C_NONE, /* known opcode with no operands (C.NOP, C.EBREAK) */ } Rv64Format; +typedef enum Rv64DecodedOpcode { + RV64_DEC_UNKNOWN = 0, + RV64_DEC_ADDI, + RV64_DEC_ADD, + RV64_DEC_AUIPC, + RV64_DEC_LD, + RV64_DEC_SD, + RV64_DEC_JALR, + RV64_DEC_ECALL, + RV64_DEC_EBREAK, +} Rv64DecodedOpcode; + /* ---- AsmFlags column on Rv64InsnDesc ---- */ #define RV64_ASMFL_ALIAS 0x01u /* row is an alias (preferred print form) */ #define RV64_ASMFL_FP 0x02u /* operands take f-register prefix */ diff --git a/src/emu/cpu.c b/src/emu/cpu.c @@ -1,59 +1,33 @@ -/* CPUState: per-thread guest register/lazy-flag/memory-base record, - * synthesized once per emu invocation as an interned C `CfreeCgTypeId`. The - * lifter references fields through a stable offset table generated - * alongside the type; the runtime owns the storage and exposes its - * address to the JIT linker via the extern resolver (EMU_SYM_CPU_STATE). - * - * For the first-round rv64 bring-up we also provide a direct interpreter - * loop (emu_cpu_interp_block) that consumes EmuInsts and updates this - * record without going through the CG/JIT pipeline. The interpreter is - * what test/emu/rv64_smoke_test.c exercises; the JIT lifter (lift.c) - * stays a stub until the per-arch CG plumbing lands. The shape of this - * struct is shared between the two paths so the eventual lifter can - * generate equivalent loads/stores. */ +/* CPUState: per-thread guest register/trap/memory-base record. Lifted blocks + * and runtime helpers access it through explicit helper calls; the host address + * is exposed to the JIT linker by emu_runtime_extern_resolver. */ -#include <math.h> #include <string.h> #include "emu/emu.h" -/* ---- Guest-AS shape (set by emu_load_elf via emu_cpu_attach_mem) ---- - * The guest "address space" is a single contiguous host buffer; the - * mapping is guest_va = guest_va_base + (host_ptr - guest_base). We - * trap on any access outside [guest_va_base, guest_va_base + size). */ +/* ---- Guest-AS shape (set by executable loader) ---- + * The CPU owns no memory storage. It only holds a borrowed pointer to the + * process address space; sparse VM translation lives in image.c. */ struct EmuCPUState { Compiler* c; - CfreeEmuArch arch; + CfreeArchKind arch; + EmuThread* thread; u64 pc; EmuTrapReason trap; int exit_code; - /* Guest memory window: host pointer + guest-VA mapping. */ - u8* guest_base; - u64 guest_va_base; - u64 guest_size; - - /* brk pointer (program break). Starts at the top of the loaded - * image's data segment; brk(addr) grows it within the guest AS. */ - u64 brk_cur; - u64 brk_max; - - /* RV64 register file. x[0] is hardwired to 0 but we keep storage so - * the lifter can address through a uniform offset. The interpreter - * unconditionally writes 0 to slot 0 on every retire. */ - u64 x[32]; - u64 f[32]; /* D-precision 64-bit; F-only ops use the low 32 bits. */ - u32 fcsr; - - /* LR/SC reservation (A extension). The interpreter implements a - * trivial single-reservation model. */ - u64 reserved_addr; - int has_reservation; + EmuAddrSpace* addr_space; + void* arch_state; + size_t arch_state_size; + size_t arch_state_align; }; -EmuCPUState* emu_cpu_new(Compiler* c, CfreeEmuArch arch, u64 initial_pc, - u64 initial_sp) { +EmuCPUState* emu_cpu_new_with_arch_state(Compiler* c, CfreeArchKind arch, + u64 initial_pc, + size_t arch_state_size, + size_t arch_state_align) { Heap* h; EmuCPUState* s; if (!c) return NULL; @@ -64,8 +38,17 @@ EmuCPUState* emu_cpu_new(Compiler* c, CfreeEmuArch arch, u64 initial_pc, s->c = c; s->arch = arch; s->pc = initial_pc; - s->x[2] = initial_sp; /* sp == x2 on RV64; matches aa64 SP semantics */ s->trap = EMU_TRAP_NONE; + s->arch_state_size = arch_state_size; + s->arch_state_align = arch_state_align ? arch_state_align : _Alignof(u64); + if (arch_state_size) { + s->arch_state = h->alloc(h, arch_state_size, s->arch_state_align); + if (!s->arch_state) { + h->free(h, s, sizeof(*s)); + return NULL; + } + memset(s->arch_state, 0, arch_state_size); + } return s; } @@ -73,9 +56,24 @@ void emu_cpu_free(EmuCPUState* s) { Heap* h; if (!s) return; h = s->c->ctx->heap; + if (s->arch_state) h->free(h, s->arch_state, s->arch_state_size); h->free(h, s, sizeof(*s)); } +void* emu_cpu_arch_state(EmuCPUState* s) { return s ? s->arch_state : NULL; } + +const void* emu_cpu_arch_state_const(const EmuCPUState* s) { + return s ? s->arch_state : NULL; +} + +void emu_cpu_set_thread(EmuCPUState* s, EmuThread* thread) { + if (s) s->thread = thread; +} + +EmuThread* emu_cpu_thread(const EmuCPUState* s) { + return s ? s->thread : NULL; +} + u64 emu_cpu_pc(const EmuCPUState* s) { return s ? s->pc : 0; } void emu_cpu_set_pc(EmuCPUState* s, u64 pc) { @@ -90,51 +88,42 @@ int emu_cpu_exit_code(const EmuCPUState* s) { return s ? s->exit_code : 0; } /* ---- Guest-memory window plumbing ---- */ -void emu_cpu_attach_mem(EmuCPUState* s, u8* base, u64 va_base, u64 size, - u64 brk_cur, u64 brk_max) { +void emu_cpu_attach_addr_space(EmuCPUState* s, EmuAddrSpace* as) { if (!s) return; - s->guest_base = base; - s->guest_va_base = va_base; - s->guest_size = size; - s->brk_cur = brk_cur; - s->brk_max = brk_max; + s->addr_space = as; } u8* emu_cpu_guest_base(const EmuCPUState* s) { - return s ? s->guest_base : NULL; + (void)s; + return NULL; } u64 emu_cpu_guest_va_base(const EmuCPUState* s) { - return s ? s->guest_va_base : 0; + (void)s; + return 0; } u64 emu_cpu_guest_size(const EmuCPUState* s) { - return s ? s->guest_size : 0; + (void)s; + return 0; } -static u8* emu_cpu_va_to_host(EmuCPUState* s, u64 va, u64 nbytes) { - if (!s || !s->guest_base) return NULL; - if (va < s->guest_va_base) return NULL; - if (va - s->guest_va_base + nbytes > s->guest_size) return NULL; - return s->guest_base + (va - s->guest_va_base); +u8* emu_cpu_va_to_host_pub(EmuCPUState* s, u64 va, u64 nbytes) { + return emu_cpu_va_to_host_perm(s, va, nbytes, 0); } -u8* emu_cpu_va_to_host_pub(EmuCPUState* s, u64 va, u64 nbytes) { - return emu_cpu_va_to_host(s, va, nbytes); +u8* emu_cpu_va_to_host_perm(EmuCPUState* s, u64 va, u64 nbytes, + u8 need_perms) { + if (!s || !s->addr_space) return NULL; + return emu_addr_space_ptr(s->addr_space, va, nbytes, need_perms); } -/* ---- Register accessors used by syscall + interpreter ---- */ -u64 emu_cpu_xreg(const EmuCPUState* s, u32 i) { - if (!s || i >= 32u) return 0; - return i == 0u ? 0u : s->x[i]; +u64 emu_cpu_brk_cur(const EmuCPUState* s) { + return s && s->addr_space ? s->addr_space->brk_cur : 0; } -void emu_cpu_set_xreg(EmuCPUState* s, u32 i, u64 v) { - if (!s || i >= 32u || i == 0u) return; - s->x[i] = v; +u64 emu_cpu_brk_max(const EmuCPUState* s) { + return s && s->addr_space ? s->addr_space->brk_max : 0; } - -u64 emu_cpu_brk_cur(const EmuCPUState* s) { return s ? s->brk_cur : 0; } -u64 emu_cpu_brk_max(const EmuCPUState* s) { return s ? s->brk_max : 0; } void emu_cpu_set_brk_cur(EmuCPUState* s, u64 v) { - if (s) s->brk_cur = v; + if (s && s->addr_space) s->addr_space->brk_cur = v; } void emu_cpu_trap_exit(EmuCPUState* s, int code) { @@ -146,907 +135,35 @@ void emu_cpu_trap_fault(EmuCPUState* s) { if (!s) return; s->trap = EMU_TRAP_FAULT; } - -CfreeCgTypeId emu_cpu_type(Compiler* c, CfreeEmuArch arch) { - /* Per-arch struct layout for the JIT lifter lands with the per-ISA - * lifter. The interpreter path doesn't need this; the JIT lift.c is - * still a stub. */ - (void)c; - (void)arch; - return CFREE_CG_TYPE_NONE; -} - -CfreeCgTypeId emu_block_fn_type(Compiler* c, CfreeEmuArch arch) { - /* Block ABI: u64 entry(EmuCPUState*). Materialized once the type - * subsystem and per-arch CPUState type land together. */ - (void)c; - (void)arch; - return CFREE_CG_TYPE_NONE; -} - -/* ============================================================ - * RV64 interpreter - * ============================================================ - * - * Consumes EmuInsts produced by emu_decode_block and updates the - * CPUState in place. The interpreter is the path the rv64 smoke - * test exercises today; the JIT lifter (lift.c) is still a stub and - * will eventually emit equivalent host code through CG. - * - * The encoding of EmuInst.operands matches what decode.c writes: - * operands[0] = rd - * operands[1] = rs1 - * operands[2] = rs2 (or rs3 / shamt depending on op) - * operands[3] = imm (sign-extended u64) - * operands[4] = funct3 (mostly used for FP rm) - * operands[5] = aux (funct7 / fmt / amo flags) - * - * EmuInst.op holds an Rv64Op enum drawn from src/emu/decode.c. */ - -#include "emu/rv64_ops.h" - -/* Forward decl from runtime.c for syscall dispatch (emu_syscall). */ -void emu_syscall(EmuCPUState*); - -#define X(i) (((i) == 0u) ? 0ull : s->x[(i)]) -#define SETX(i, v) \ - do { \ - if ((i) != 0u) s->x[(i)] = (u64)(v); \ - } while (0) - -static i64 sext32(u64 v) { return (i64)(i32)(u32)v; } - -static int rv_load(EmuCPUState* s, u64 addr, u32 nbytes, int sign_ext, - u64* out) { - u8* p = emu_cpu_va_to_host(s, addr, nbytes); - u64 v = 0; - u32 i; - if (!p) { - s->trap = EMU_TRAP_FAULT; - return 0; - } - for (i = 0; i < nbytes; ++i) v |= ((u64)p[i]) << (8u * i); - if (sign_ext) { - u64 sign_bit = 1ull << (8u * nbytes - 1u); - if (v & sign_bit) v |= ~((sign_bit << 1) - 1ull); - } - *out = v; - return 1; -} - -static int rv_store(EmuCPUState* s, u64 addr, u32 nbytes, u64 v) { - u8* p = emu_cpu_va_to_host(s, addr, nbytes); - u32 i; - if (!p) { - s->trap = EMU_TRAP_FAULT; - return 0; - } - for (i = 0; i < nbytes; ++i) p[i] = (u8)(v >> (8u * i)); - return 1; -} - -/* Build a host double from the 64-bit fpr slot via memcpy to avoid - * type-punning UB. */ -static double f64_of(u64 bits) { - double d; - memcpy(&d, &bits, sizeof(d)); - return d; -} -static u64 bits_of_f64(double d) { - u64 b; - memcpy(&b, &d, sizeof(b)); - return b; -} -static float f32_of(u32 bits) { - float f; - memcpy(&f, &bits, sizeof(f)); - return f; -} -static u32 bits_of_f32(float f) { - u32 b; - memcpy(&b, &f, sizeof(b)); - return b; -} - -/* NaN-box a 32-bit single-precision result into the 64-bit FPR slot. */ -static u64 nanbox32(u32 bits) { - return (u64)bits | 0xffffffff00000000ull; +void emu_cpu_clear_trap(EmuCPUState* s) { + if (!s) return; + s->trap = EMU_TRAP_NONE; + s->exit_code = 0; } -/* Classify a single-precision value into the FCLASS bitmask. */ -static u64 fclass_s(u32 bits) { - u32 sign = (bits >> 31) & 1u; - u32 exp = (bits >> 23) & 0xffu; - u32 frac = bits & 0x7fffffu; - if (exp == 0xffu) { - if (frac == 0u) return sign ? (1u << 0) : (1u << 7); - return (frac & 0x400000u) ? (1u << 9) : (1u << 8); - } - if (exp == 0u) { - if (frac == 0u) return sign ? (1u << 3) : (1u << 4); - return sign ? (1u << 2) : (1u << 5); - } - return sign ? (1u << 1) : (1u << 6); -} -static u64 fclass_d(u64 bits) { - u32 sign = (u32)((bits >> 63) & 1ull); - u32 exp = (u32)((bits >> 52) & 0x7ffull); - u64 frac = bits & 0xfffffffffffffull; - if (exp == 0x7ffu) { - if (frac == 0) return sign ? (1u << 0) : (1u << 7); - return (frac & 0x8000000000000ull) ? (1u << 9) : (1u << 8); - } - if (exp == 0u) { - if (frac == 0) return sign ? (1u << 3) : (1u << 4); - return sign ? (1u << 2) : (1u << 5); - } - return sign ? (1u << 1) : (1u << 6); +EmuCPUState* emu_thread_cpu(EmuThread* t) { + return t ? t->cpu : NULL; } -/* Saturating fp -> int conversions per RV semantics. */ -static i32 fp_to_i32(double v) { - if (v != v) return 0; - if (v >= 2147483647.0) return 0x7fffffff; - if (v <= -2147483648.0) return (i32)0x80000000; - return (i32)v; +CfreeCgTypeId emu_thread_type(Compiler* c) { + return cfree_cg_type_ptr( + (CfreeCompiler*)c, + cfree_cg_builtin_types((CfreeCompiler*)c).id[CFREE_CG_BUILTIN_VOID], 0); } -static u32 fp_to_u32(double v) { - if (v != v) return 0xffffffffu; - if (v >= 4294967295.0) return 0xffffffffu; - if (v <= 0.0) return 0u; - return (u32)v; -} -static i64 fp_to_i64(double v) { - if (v != v) return 0; - if (v >= 9223372036854775808.0) return 0x7fffffffffffffffll; - if (v < -9223372036854775808.0) return (i64)0x8000000000000000ll; - return (i64)v; -} -static u64 fp_to_u64(double v) { - if (v != v) return (u64)-1; - if (v >= 18446744073709551616.0) return (u64)-1; - if (v <= 0.0) return 0u; - return (u64)v; -} - -/* Interpret a single EmuInst. Returns 0 on trap; otherwise writes the - * next PC to *next_pc. The caller (emu_cpu_interp_block) walks the - * EmuInst stream until a terminator fires or `n` is reached. */ -static int interp_one(EmuCPUState* s, const EmuInst* in, u64* next_pc) { - u32 op = in->op; - u32 rd = (u32)in->operands[0]; - u32 rs1 = (u32)in->operands[1]; - u32 rs2 = (u32)in->operands[2]; - i64 imm = (i64)in->operands[3]; - u32 funct3 = (u32)in->operands[4]; - u32 aux = (u32)in->operands[5]; - u64 a, b; - u64 addr; - u64 load_val; - u64 pc = in->guest_pc; - u64 npc = pc + in->guest_bytes; - (void)funct3; - - a = X(rs1); - b = X(rs2); - - switch (op) { - /* ---- U-type ---- */ - case RV64_OP_LUI: - SETX(rd, (u64)(i64)(i32)imm); - break; - case RV64_OP_AUIPC: - SETX(rd, pc + (u64)(i64)(i32)imm); - break; - /* ---- Jumps ---- */ - case RV64_OP_JAL: - if (rd) SETX(rd, npc); - npc = pc + (u64)imm; - break; - case RV64_OP_JALR: { - u64 target = (a + (u64)imm) & ~1ull; - if (rd) SETX(rd, npc); - npc = target; - break; - } - - /* ---- Branches ---- */ - case RV64_OP_BEQ: - if (a == b) npc = pc + (u64)imm; - break; - case RV64_OP_BNE: - if (a != b) npc = pc + (u64)imm; - break; - case RV64_OP_BLT: - if ((i64)a < (i64)b) npc = pc + (u64)imm; - break; - case RV64_OP_BGE: - if ((i64)a >= (i64)b) npc = pc + (u64)imm; - break; - case RV64_OP_BLTU: - if (a < b) npc = pc + (u64)imm; - break; - case RV64_OP_BGEU: - if (a >= b) npc = pc + (u64)imm; - break; - - /* ---- Loads ---- */ - case RV64_OP_LB: - addr = a + (u64)imm; - if (!rv_load(s, addr, 1, 1, &load_val)) return 0; - SETX(rd, load_val); - break; - case RV64_OP_LH: - addr = a + (u64)imm; - if (!rv_load(s, addr, 2, 1, &load_val)) return 0; - SETX(rd, load_val); - break; - case RV64_OP_LW: - addr = a + (u64)imm; - if (!rv_load(s, addr, 4, 1, &load_val)) return 0; - SETX(rd, load_val); - break; - case RV64_OP_LD: - addr = a + (u64)imm; - if (!rv_load(s, addr, 8, 0, &load_val)) return 0; - SETX(rd, load_val); - break; - case RV64_OP_LBU: - addr = a + (u64)imm; - if (!rv_load(s, addr, 1, 0, &load_val)) return 0; - SETX(rd, load_val); - break; - case RV64_OP_LHU: - addr = a + (u64)imm; - if (!rv_load(s, addr, 2, 0, &load_val)) return 0; - SETX(rd, load_val); - break; - case RV64_OP_LWU: - addr = a + (u64)imm; - if (!rv_load(s, addr, 4, 0, &load_val)) return 0; - SETX(rd, load_val); - break; - - /* ---- Stores ---- */ - case RV64_OP_SB: - if (!rv_store(s, a + (u64)imm, 1, b)) return 0; - break; - case RV64_OP_SH: - if (!rv_store(s, a + (u64)imm, 2, b)) return 0; - break; - case RV64_OP_SW: - if (!rv_store(s, a + (u64)imm, 4, b)) return 0; - break; - case RV64_OP_SD: - if (!rv_store(s, a + (u64)imm, 8, b)) return 0; - break; - - /* ---- ALU (RV64I) ---- */ - case RV64_OP_ADDI: - SETX(rd, a + (u64)imm); - break; - case RV64_OP_SLTI: - SETX(rd, (i64)a < imm ? 1u : 0u); - break; - case RV64_OP_SLTIU: - SETX(rd, a < (u64)imm ? 1u : 0u); - break; - case RV64_OP_XORI: - SETX(rd, a ^ (u64)imm); - break; - case RV64_OP_ORI: - SETX(rd, a | (u64)imm); - break; - case RV64_OP_ANDI: - SETX(rd, a & (u64)imm); - break; - case RV64_OP_SLLI: - SETX(rd, a << ((u64)imm & 0x3fu)); - break; - case RV64_OP_SRLI: - SETX(rd, a >> ((u64)imm & 0x3fu)); - break; - case RV64_OP_SRAI: - SETX(rd, (u64)((i64)a >> ((u64)imm & 0x3fu))); - break; - case RV64_OP_ADD: - SETX(rd, a + b); - break; - case RV64_OP_SUB: - SETX(rd, a - b); - break; - case RV64_OP_SLL: - SETX(rd, a << (b & 0x3fu)); - break; - case RV64_OP_SLT: - SETX(rd, (i64)a < (i64)b ? 1u : 0u); - break; - case RV64_OP_SLTU: - SETX(rd, a < b ? 1u : 0u); - break; - case RV64_OP_XOR: - SETX(rd, a ^ b); - break; - case RV64_OP_SRL: - SETX(rd, a >> (b & 0x3fu)); - break; - case RV64_OP_SRA: - SETX(rd, (u64)((i64)a >> (b & 0x3fu))); - break; - case RV64_OP_OR: - SETX(rd, a | b); - break; - case RV64_OP_AND: - SETX(rd, a & b); - break; - - /* ---- 32-bit ALU (W-forms) — result sign-extended to 64 bits ---- */ - case RV64_OP_ADDIW: - SETX(rd, (u64)sext32(a + (u64)imm)); - break; - case RV64_OP_SLLIW: - SETX(rd, (u64)sext32((u32)a << ((u32)imm & 0x1fu))); - break; - case RV64_OP_SRLIW: - SETX(rd, (u64)sext32((u32)a >> ((u32)imm & 0x1fu))); - break; - case RV64_OP_SRAIW: - SETX(rd, (u64)(i64)((i32)a >> ((u32)imm & 0x1fu))); - break; - case RV64_OP_ADDW: - SETX(rd, (u64)sext32(a + b)); - break; - case RV64_OP_SUBW: - SETX(rd, (u64)sext32(a - b)); - break; - case RV64_OP_SLLW: - SETX(rd, (u64)sext32((u32)a << (b & 0x1fu))); - break; - case RV64_OP_SRLW: - SETX(rd, (u64)sext32((u32)a >> (b & 0x1fu))); - break; - case RV64_OP_SRAW: - SETX(rd, (u64)(i64)((i32)a >> (b & 0x1fu))); - break; - - /* ---- M extension ---- */ - case RV64_OP_MUL: - SETX(rd, a * b); - break; - case RV64_OP_MULH: - SETX(rd, (u64)(((__int128)(i64)a * (__int128)(i64)b) >> 64)); - break; - case RV64_OP_MULHU: - SETX(rd, (u64)(((unsigned __int128)a * (unsigned __int128)b) >> 64)); - break; - case RV64_OP_MULHSU: - SETX(rd, (u64)(((__int128)(i64)a * (unsigned __int128)b) >> 64)); - break; - case RV64_OP_DIV: - if (b == 0) - SETX(rd, (u64)-1); - else if ((i64)a == (i64)0x8000000000000000ll && (i64)b == -1) - SETX(rd, a); - else - SETX(rd, (u64)((i64)a / (i64)b)); - break; - case RV64_OP_DIVU: - SETX(rd, b == 0 ? (u64)-1 : a / b); - break; - case RV64_OP_REM: - if (b == 0) - SETX(rd, a); - else if ((i64)a == (i64)0x8000000000000000ll && (i64)b == -1) - SETX(rd, 0); - else - SETX(rd, (u64)((i64)a % (i64)b)); - break; - case RV64_OP_REMU: - SETX(rd, b == 0 ? a : a % b); - break; - case RV64_OP_MULW: - SETX(rd, (u64)sext32((u32)a * (u32)b)); - break; - case RV64_OP_DIVW: - if ((u32)b == 0) - SETX(rd, (u64)-1); - else if ((i32)a == (i32)0x80000000 && (i32)b == -1) - SETX(rd, (u64)sext32((u32)a)); - else - SETX(rd, (u64)(i64)((i32)a / (i32)b)); - break; - case RV64_OP_DIVUW: - SETX(rd, - (u32)b == 0 ? (u64)-1 : (u64)sext32((u32)a / (u32)b)); - break; - case RV64_OP_REMW: - if ((u32)b == 0) - SETX(rd, (u64)sext32((u32)a)); - else if ((i32)a == (i32)0x80000000 && (i32)b == -1) - SETX(rd, 0); - else - SETX(rd, (u64)(i64)((i32)a % (i32)b)); - break; - case RV64_OP_REMUW: - SETX(rd, - (u32)b == 0 ? (u64)sext32((u32)a) : (u64)sext32((u32)a % (u32)b)); - break; +CfreeCgTypeId emu_cpu_type(Compiler* c) { return emu_thread_type(c); } - /* ---- F / D loads & stores ---- */ - case RV64_OP_FLW: - addr = a + (u64)imm; - if (!rv_load(s, addr, 4, 0, &load_val)) return 0; - /* NaN-box: high 32 bits = 1. */ - s->f[rd] = load_val | 0xffffffff00000000ull; - break; - case RV64_OP_FLD: - addr = a + (u64)imm; - if (!rv_load(s, addr, 8, 0, &load_val)) return 0; - s->f[rd] = load_val; - break; - case RV64_OP_FSW: - if (!rv_store(s, a + (u64)imm, 4, s->f[rs2] & 0xffffffffull)) return 0; - break; - case RV64_OP_FSD: - if (!rv_store(s, a + (u64)imm, 8, s->f[rs2])) return 0; - break; - - /* ---- FP arithmetic (subset — single/double add/sub/mul/div) ---- */ - case RV64_OP_FADD_S: - s->f[rd] = (u64)bits_of_f32(f32_of((u32)s->f[rs1]) + - f32_of((u32)s->f[rs2])) | - 0xffffffff00000000ull; - break; - case RV64_OP_FSUB_S: - s->f[rd] = (u64)bits_of_f32(f32_of((u32)s->f[rs1]) - - f32_of((u32)s->f[rs2])) | - 0xffffffff00000000ull; - break; - case RV64_OP_FMUL_S: - s->f[rd] = (u64)bits_of_f32(f32_of((u32)s->f[rs1]) * - f32_of((u32)s->f[rs2])) | - 0xffffffff00000000ull; - break; - case RV64_OP_FDIV_S: - s->f[rd] = (u64)bits_of_f32(f32_of((u32)s->f[rs1]) / - f32_of((u32)s->f[rs2])) | - 0xffffffff00000000ull; - break; - case RV64_OP_FADD_D: - s->f[rd] = bits_of_f64(f64_of(s->f[rs1]) + f64_of(s->f[rs2])); - break; - case RV64_OP_FSUB_D: - s->f[rd] = bits_of_f64(f64_of(s->f[rs1]) - f64_of(s->f[rs2])); - break; - case RV64_OP_FMUL_D: - s->f[rd] = bits_of_f64(f64_of(s->f[rs1]) * f64_of(s->f[rs2])); - break; - case RV64_OP_FDIV_D: - s->f[rd] = bits_of_f64(f64_of(s->f[rs1]) / f64_of(s->f[rs2])); - break; - - /* ---- FP compares (write 0/1 into GPR rd) ---- */ - case RV64_OP_FEQ_S: - SETX(rd, f32_of((u32)s->f[rs1]) == f32_of((u32)s->f[rs2]) ? 1u : 0u); - break; - case RV64_OP_FLT_S: - SETX(rd, f32_of((u32)s->f[rs1]) < f32_of((u32)s->f[rs2]) ? 1u : 0u); - break; - case RV64_OP_FLE_S: - SETX(rd, f32_of((u32)s->f[rs1]) <= f32_of((u32)s->f[rs2]) ? 1u : 0u); - break; - case RV64_OP_FEQ_D: - SETX(rd, f64_of(s->f[rs1]) == f64_of(s->f[rs2]) ? 1u : 0u); - break; - case RV64_OP_FLT_D: - SETX(rd, f64_of(s->f[rs1]) < f64_of(s->f[rs2]) ? 1u : 0u); - break; - case RV64_OP_FLE_D: - SETX(rd, f64_of(s->f[rs1]) <= f64_of(s->f[rs2]) ? 1u : 0u); - break; - - /* ---- FP-int bitcasts (FMV.X.W, FMV.W.X, FMV.X.D, FMV.D.X) ---- */ - case RV64_OP_FMV_X_W: - SETX(rd, (u64)sext32(s->f[rs1] & 0xffffffffull)); - break; - case RV64_OP_FMV_W_X: - s->f[rd] = (X(rs1) & 0xffffffffull) | 0xffffffff00000000ull; - break; - case RV64_OP_FMV_X_D: - SETX(rd, s->f[rs1]); - break; - case RV64_OP_FMV_D_X: - s->f[rd] = X(rs1); - break; - - /* ---- A extension: LR/SC + AMO* (simple non-atomic emulation) ---- */ - case RV64_OP_LR_W: - if (!rv_load(s, a, 4, 1, &load_val)) return 0; - SETX(rd, load_val); - s->reserved_addr = a; - s->has_reservation = 1; - break; - case RV64_OP_LR_D: - if (!rv_load(s, a, 8, 0, &load_val)) return 0; - SETX(rd, load_val); - s->reserved_addr = a; - s->has_reservation = 1; - break; - case RV64_OP_SC_W: - if (s->has_reservation && s->reserved_addr == a) { - if (!rv_store(s, a, 4, b)) return 0; - SETX(rd, 0); - } else { - SETX(rd, 1); - } - s->has_reservation = 0; - break; - case RV64_OP_SC_D: - if (s->has_reservation && s->reserved_addr == a) { - if (!rv_store(s, a, 8, b)) return 0; - SETX(rd, 0); - } else { - SETX(rd, 1); - } - s->has_reservation = 0; - break; - case RV64_OP_AMOSWAP_W: - case RV64_OP_AMOADD_W: - case RV64_OP_AMOXOR_W: - case RV64_OP_AMOAND_W: - case RV64_OP_AMOOR_W: - case RV64_OP_AMOMIN_W: - case RV64_OP_AMOMAX_W: - case RV64_OP_AMOMINU_W: - case RV64_OP_AMOMAXU_W: { - if (!rv_load(s, a, 4, 1, &load_val)) return 0; - i64 lv = (i64)(i32)load_val; - i64 rv = (i64)(i32)b; - u32 nv; - switch (op) { - case RV64_OP_AMOSWAP_W: nv = (u32)b; break; - case RV64_OP_AMOADD_W: nv = (u32)(lv + rv); break; - case RV64_OP_AMOXOR_W: nv = (u32)(load_val ^ b); break; - case RV64_OP_AMOAND_W: nv = (u32)(load_val & b); break; - case RV64_OP_AMOOR_W: nv = (u32)(load_val | b); break; - case RV64_OP_AMOMIN_W: nv = (u32)(lv < rv ? lv : rv); break; - case RV64_OP_AMOMAX_W: nv = (u32)(lv > rv ? lv : rv); break; - case RV64_OP_AMOMINU_W: - nv = (u32)((u32)load_val < (u32)b ? (u32)load_val : (u32)b); - break; - default: /* AMOMAXU_W */ - nv = (u32)((u32)load_val > (u32)b ? (u32)load_val : (u32)b); - break; - } - if (!rv_store(s, a, 4, nv)) return 0; - SETX(rd, (u64)sext32(load_val)); - break; - } - case RV64_OP_AMOSWAP_D: - case RV64_OP_AMOADD_D: - case RV64_OP_AMOXOR_D: - case RV64_OP_AMOAND_D: - case RV64_OP_AMOOR_D: - case RV64_OP_AMOMIN_D: - case RV64_OP_AMOMAX_D: - case RV64_OP_AMOMINU_D: - case RV64_OP_AMOMAXU_D: { - if (!rv_load(s, a, 8, 0, &load_val)) return 0; - i64 lv = (i64)load_val; - i64 rv = (i64)b; - u64 nv; - switch (op) { - case RV64_OP_AMOSWAP_D: nv = b; break; - case RV64_OP_AMOADD_D: nv = load_val + b; break; - case RV64_OP_AMOXOR_D: nv = load_val ^ b; break; - case RV64_OP_AMOAND_D: nv = load_val & b; break; - case RV64_OP_AMOOR_D: nv = load_val | b; break; - case RV64_OP_AMOMIN_D: nv = (u64)(lv < rv ? lv : rv); break; - case RV64_OP_AMOMAX_D: nv = (u64)(lv > rv ? lv : rv); break; - case RV64_OP_AMOMINU_D: nv = load_val < b ? load_val : b; break; - default: /* AMOMAXU_D */ nv = load_val > b ? load_val : b; break; - } - if (!rv_store(s, a, 8, nv)) return 0; - SETX(rd, load_val); - break; - } - - /* ---- FP sign-injection ---- */ - case RV64_OP_FSGNJ_S: { - u32 a32 = (u32)s->f[rs1]; - u32 sign = (u32)s->f[rs2] & 0x80000000u; - s->f[rd] = nanbox32((a32 & 0x7fffffffu) | sign); - break; - } - case RV64_OP_FSGNJN_S: { - u32 a32 = (u32)s->f[rs1]; - u32 sign = ((u32)s->f[rs2] ^ 0x80000000u) & 0x80000000u; - s->f[rd] = nanbox32((a32 & 0x7fffffffu) | sign); - break; - } - case RV64_OP_FSGNJX_S: { - u32 a32 = (u32)s->f[rs1]; - u32 sign = ((u32)s->f[rs2] ^ a32) & 0x80000000u; - s->f[rd] = nanbox32((a32 & 0x7fffffffu) | sign); - break; - } - case RV64_OP_FSGNJ_D: { - u64 sign = s->f[rs2] & 0x8000000000000000ull; - s->f[rd] = (s->f[rs1] & 0x7fffffffffffffffull) | sign; - break; - } - case RV64_OP_FSGNJN_D: { - u64 sign = (s->f[rs2] ^ 0x8000000000000000ull) & 0x8000000000000000ull; - s->f[rd] = (s->f[rs1] & 0x7fffffffffffffffull) | sign; - break; - } - case RV64_OP_FSGNJX_D: { - u64 sign = (s->f[rs2] ^ s->f[rs1]) & 0x8000000000000000ull; - s->f[rd] = (s->f[rs1] & 0x7fffffffffffffffull) | sign; - break; - } - - /* ---- FP min/max (-0 < +0; both-NaN -> canonical NaN). */ - case RV64_OP_FMIN_S: { - float fa = f32_of((u32)s->f[rs1]); - float fb = f32_of((u32)s->f[rs2]); - float r; - if (fa != fa && fb != fb) r = f32_of(0x7fc00000u); - else if (fa != fa) r = fb; - else if (fb != fb) r = fa; - else r = (fa <= fb) ? fa : fb; - s->f[rd] = nanbox32(bits_of_f32(r)); - break; - } - case RV64_OP_FMAX_S: { - float fa = f32_of((u32)s->f[rs1]); - float fb = f32_of((u32)s->f[rs2]); - float r; - if (fa != fa && fb != fb) r = f32_of(0x7fc00000u); - else if (fa != fa) r = fb; - else if (fb != fb) r = fa; - else r = (fa >= fb) ? fa : fb; - s->f[rd] = nanbox32(bits_of_f32(r)); - break; - } - case RV64_OP_FMIN_D: { - double da = f64_of(s->f[rs1]); - double db = f64_of(s->f[rs2]); - double r; - if (da != da && db != db) r = f64_of(0x7ff8000000000000ull); - else if (da != da) r = db; - else if (db != db) r = da; - else r = (da <= db) ? da : db; - s->f[rd] = bits_of_f64(r); - break; - } - case RV64_OP_FMAX_D: { - double da = f64_of(s->f[rs1]); - double db = f64_of(s->f[rs2]); - double r; - if (da != da && db != db) r = f64_of(0x7ff8000000000000ull); - else if (da != da) r = db; - else if (db != db) r = da; - else r = (da >= db) ? da : db; - s->f[rd] = bits_of_f64(r); - break; - } - - /* ---- FP sqrt ---- */ - case RV64_OP_FSQRT_S: - s->f[rd] = nanbox32(bits_of_f32((float)sqrt((double)f32_of((u32)s->f[rs1])))); - break; - case RV64_OP_FSQRT_D: - s->f[rd] = bits_of_f64(sqrt(f64_of(s->f[rs1]))); - break; - - /* ---- FP conversions: fp -> int ---- */ - case RV64_OP_FCVT_W_S: - SETX(rd, (u64)(i64)fp_to_i32((double)f32_of((u32)s->f[rs1]))); - break; - case RV64_OP_FCVT_WU_S: - SETX(rd, (u64)(i64)(i32)fp_to_u32((double)f32_of((u32)s->f[rs1]))); - break; - case RV64_OP_FCVT_L_S: - SETX(rd, (u64)fp_to_i64((double)f32_of((u32)s->f[rs1]))); - break; - case RV64_OP_FCVT_LU_S: - SETX(rd, fp_to_u64((double)f32_of((u32)s->f[rs1]))); - break; - case RV64_OP_FCVT_W_D: - SETX(rd, (u64)(i64)fp_to_i32(f64_of(s->f[rs1]))); - break; - case RV64_OP_FCVT_WU_D: - SETX(rd, (u64)(i64)(i32)fp_to_u32(f64_of(s->f[rs1]))); - break; - case RV64_OP_FCVT_L_D: - SETX(rd, (u64)fp_to_i64(f64_of(s->f[rs1]))); - break; - case RV64_OP_FCVT_LU_D: - SETX(rd, fp_to_u64(f64_of(s->f[rs1]))); - break; - - /* ---- FP conversions: int -> fp ---- */ - case RV64_OP_FCVT_S_W: - s->f[rd] = nanbox32(bits_of_f32((float)(i32)X(rs1))); - break; - case RV64_OP_FCVT_S_WU: - s->f[rd] = nanbox32(bits_of_f32((float)(u32)X(rs1))); - break; - case RV64_OP_FCVT_S_L: - s->f[rd] = nanbox32(bits_of_f32((float)(i64)X(rs1))); - break; - case RV64_OP_FCVT_S_LU: - s->f[rd] = nanbox32(bits_of_f32((float)(u64)X(rs1))); - break; - case RV64_OP_FCVT_D_W: - s->f[rd] = bits_of_f64((double)(i32)X(rs1)); - break; - case RV64_OP_FCVT_D_WU: - s->f[rd] = bits_of_f64((double)(u32)X(rs1)); - break; - case RV64_OP_FCVT_D_L: - s->f[rd] = bits_of_f64((double)(i64)X(rs1)); - break; - case RV64_OP_FCVT_D_LU: - s->f[rd] = bits_of_f64((double)(u64)X(rs1)); - break; - - /* ---- FP <-> FP ---- */ - case RV64_OP_FCVT_S_D: - s->f[rd] = nanbox32(bits_of_f32((float)f64_of(s->f[rs1]))); - break; - case RV64_OP_FCVT_D_S: - s->f[rd] = bits_of_f64((double)f32_of((u32)s->f[rs1])); - break; - - /* ---- FP classify ---- */ - case RV64_OP_FCLASS_S: - SETX(rd, fclass_s((u32)s->f[rs1])); - break; - case RV64_OP_FCLASS_D: - SETX(rd, fclass_d(s->f[rs1])); - break; - - /* ---- Fused multiply-add (rs3 == aux) ---- */ - case RV64_OP_FMADD_S: { - float a = f32_of((u32)s->f[rs1]); - float b = f32_of((u32)s->f[rs2]); - float c = f32_of((u32)s->f[aux]); - s->f[rd] = nanbox32(bits_of_f32(fmaf(a, b, c))); - break; - } - case RV64_OP_FMSUB_S: { - float a = f32_of((u32)s->f[rs1]); - float b = f32_of((u32)s->f[rs2]); - float c = f32_of((u32)s->f[aux]); - s->f[rd] = nanbox32(bits_of_f32(fmaf(a, b, -c))); - break; - } - case RV64_OP_FNMSUB_S: { - float a = f32_of((u32)s->f[rs1]); - float b = f32_of((u32)s->f[rs2]); - float c = f32_of((u32)s->f[aux]); - s->f[rd] = nanbox32(bits_of_f32(fmaf(-a, b, c))); - break; - } - case RV64_OP_FNMADD_S: { - float a = f32_of((u32)s->f[rs1]); - float b = f32_of((u32)s->f[rs2]); - float c = f32_of((u32)s->f[aux]); - s->f[rd] = nanbox32(bits_of_f32(fmaf(-a, b, -c))); - break; - } - case RV64_OP_FMADD_D: { - double a = f64_of(s->f[rs1]); - double b = f64_of(s->f[rs2]); - double c = f64_of(s->f[aux]); - s->f[rd] = bits_of_f64(fma(a, b, c)); - break; - } - case RV64_OP_FMSUB_D: { - double a = f64_of(s->f[rs1]); - double b = f64_of(s->f[rs2]); - double c = f64_of(s->f[aux]); - s->f[rd] = bits_of_f64(fma(a, b, -c)); - break; - } - case RV64_OP_FNMSUB_D: { - double a = f64_of(s->f[rs1]); - double b = f64_of(s->f[rs2]); - double c = f64_of(s->f[aux]); - s->f[rd] = bits_of_f64(fma(-a, b, c)); - break; - } - case RV64_OP_FNMADD_D: { - double a = f64_of(s->f[rs1]); - double b = f64_of(s->f[rs2]); - double c = f64_of(s->f[aux]); - s->f[rd] = bits_of_f64(fma(-a, b, -c)); - break; - } - - /* ---- CSR (Zicsr) — minimal: fcsr (0x003), frm (0x002), fflags - * (0x001) have meaningful semantics. Other CSRs read as zero. */ - case RV64_OP_CSRRW: - case RV64_OP_CSRRS: - case RV64_OP_CSRRC: - case RV64_OP_CSRRWI: - case RV64_OP_CSRRSI: - case RV64_OP_CSRRCI: { - u32 csr = (u32)(u64)imm; - u64 src; - u64 old = 0; - int is_imm = (op == RV64_OP_CSRRWI || op == RV64_OP_CSRRSI || - op == RV64_OP_CSRRCI); - src = is_imm ? (u64)rs1 : a; - if (csr == 0x001u) old = s->fcsr & 0x1fu; - else if (csr == 0x002u) old = (s->fcsr >> 5) & 0x7u; - else if (csr == 0x003u) old = s->fcsr & 0xffu; - else old = 0u; - { - u64 new_val = old; - switch (op) { - case RV64_OP_CSRRW: - case RV64_OP_CSRRWI: new_val = src; break; - case RV64_OP_CSRRS: - case RV64_OP_CSRRSI: new_val = old | src; break; - case RV64_OP_CSRRC: - case RV64_OP_CSRRCI: new_val = old & ~src; break; - } - if (csr == 0x001u) - s->fcsr = (u32)((s->fcsr & ~0x1fu) | (new_val & 0x1fu)); - else if (csr == 0x002u) - s->fcsr = (u32)((s->fcsr & ~(0x7u << 5)) | ((new_val & 0x7u) << 5)); - else if (csr == 0x003u) - s->fcsr = (u32)(new_val & 0xffu); - } - SETX(rd, old); - break; - } - - /* ---- System ---- */ - case RV64_OP_ECALL: - emu_syscall(s); - /* emu_syscall may set EMU_TRAP_EXIT; let the caller observe it. */ - break; - case RV64_OP_EBREAK: - s->trap = EMU_TRAP_FAULT; - return 0; - case RV64_OP_FENCE: - /* No-op for in-process single-threaded interpretation. */ - break; - - /* ---- NOP / unmodeled FP / illegal ---- */ - case RV64_OP_NOP: - break; - case RV64_OP_ILLEGAL: - default: - s->trap = EMU_TRAP_FAULT; - return 0; - } - - *next_pc = npc; - (void)aux; - return 1; -} - -/* Interpret a decoded block. Updates PC + trap_reason on the CPUState. - * Returns the count of instructions actually executed. */ -u32 emu_cpu_interp_block(EmuCPUState* s, const EmuInst* insts, u32 n) { - u32 i; - u64 npc; - if (!s || !insts) return 0; - for (i = 0; i < n; ++i) { - if (!interp_one(s, &insts[i], &npc)) { - /* Trap set by interp_one; PC stays at the trapping insn so the - * dispatcher can report the offending guest_pc. */ - s->pc = insts[i].guest_pc; - return i; - } - s->pc = npc; - if (s->trap != EMU_TRAP_NONE) return i + 1u; - } - return n; +CfreeCgTypeId emu_block_fn_type(Compiler* c) { + CfreeCgBuiltinTypes bi; + CfreeCgFuncParam param; + CfreeCgFuncSig sig; + bi = cfree_cg_builtin_types((CfreeCompiler*)c); + memset(&param, 0, sizeof(param)); + param.type = emu_thread_type(c); + memset(&sig, 0, sizeof(sig)); + sig.ret = bi.id[CFREE_CG_BUILTIN_I64]; + sig.params = &param; + sig.nparams = 1; + sig.call_conv = CFREE_CG_CC_TARGET_C; + return cfree_cg_type_func((CfreeCompiler*)c, sig); } diff --git a/src/emu/decode.c b/src/emu/decode.c @@ -1,729 +0,0 @@ -/* Per-ISA structured decoder. The lifter (src/emu/lift.c) and the - * direct interpreter (src/emu/cpu.c) both consume the EmuInst stream - * produced here. v1 targets aarch64 and riscv64; the aa64 path is - * still a stub. The rv64 path covers RV64I + M + RV32F + RV32D + A + - * C (compressed) + Zicsr-minimal, plus the FCVT / FSGNJ / FMIN-MAX / - * FMADD families. */ - -#include <string.h> - -#include "core/core.h" -#include "emu/emu.h" -#include "emu/rv64_ops.h" - -/* ============================================================ - * RV64 decoder - * ============================================================ */ - -static u32 rd_u32_le_local(const u8* b) { - return (u32)b[0] | ((u32)b[1] << 8) | ((u32)b[2] << 16) | ((u32)b[3] << 24); -} - -static i64 sext(u64 v, u32 bits) { - u64 m = 1ull << (bits - 1u); - return (i64)((v ^ m) - m); -} - -static i64 i_imm(u32 w) { return sext((u64)(w >> 20), 12); } -static i64 s_imm(u32 w) { - u32 i = ((w >> 7) & 0x1fu) | (((w >> 25) & 0x7fu) << 5); - return sext((u64)i, 12); -} -static i64 b_imm(u32 w) { - u32 i = (((w >> 31) & 1u) << 12) | (((w >> 7) & 1u) << 11) | - (((w >> 25) & 0x3fu) << 5) | (((w >> 8) & 0xfu) << 1); - return sext((u64)i, 13); -} -static i64 j_imm(u32 w) { - u32 i = (((w >> 31) & 1u) << 20) | (((w >> 12) & 0xffu) << 12) | - (((w >> 20) & 1u) << 11) | (((w >> 21) & 0x3ffu) << 1); - return sext((u64)i, 21); -} -static i64 u_imm(u32 w) { return (i64)(i32)(w & 0xfffff000u); } - -static void emit_inst(EmuInst* dst, u64 pc, u32 op, u32 rd, u32 rs1, u32 rs2, - i64 imm, u32 funct3, u32 aux, u32 term) { - memset(dst, 0, sizeof(*dst)); - dst->op = op; - dst->flags = term ? RV64_INST_FLAG_TERMINATOR : 0u; - dst->guest_pc = pc; - dst->guest_bytes = 4u; - dst->operands[0] = rd; - dst->operands[1] = rs1; - dst->operands[2] = rs2; - dst->operands[3] = (u64)imm; - dst->operands[4] = funct3; - dst->operands[5] = aux; -} - -/* ---------------------------------------------------------------- - * RVC (compressed) decode - * ---------------------------------------------------------------- - * Each 16-bit RVC encoding maps 1:1 to a 32-bit base-ISA instruction. - * We expand the 16-bit insn to its 32-bit form and recurse through the - * normal decoder. The set covers RV64C: C.ADDI4SPN, C.LW, C.LD, C.SW, - * C.SD, C.NOP, C.ADDI, C.ADDIW, C.LI, C.ADDI16SP, C.LUI, C.SRLI, C.SRAI, - * C.ANDI, C.SUB, C.XOR, C.OR, C.AND, C.SUBW, C.ADDW, C.J, C.BEQZ, - * C.BNEZ, C.SLLI, C.LDSP, C.LWSP, C.JR, C.MV, C.EBREAK, C.JALR, C.ADD, - * C.SDSP, C.SWSP, plus C.FLD/C.FSD/C.FLDSP/C.FSDSP for the D extension. - * Returns the expanded 32-bit instruction or 0 for an illegal encoding. */ -static u32 rvc_expand(u16 c) { - u32 op = c & 3u; - u32 funct3 = (u32)(c >> 13) & 7u; - u32 rdq = ((c >> 2) & 7u) + 8u; - u32 rs1q = ((c >> 7) & 7u) + 8u; - u32 rs2q = ((c >> 2) & 7u) + 8u; - u32 rd_rs1 = (u32)(c >> 7) & 31u; - u32 rs2 = (u32)(c >> 2) & 31u; - if (op == 0u) { - switch (funct3) { - case 0: { /* C.ADDI4SPN: addi rd', sp, nzuimm */ - u32 nz = (u32)(((c >> 11) & 3u) << 4) | (u32)(((c >> 7) & 0xfu) << 6) | - (u32)(((c >> 6) & 1u) << 2) | (u32)(((c >> 5) & 1u) << 3); - if (nz == 0u) return 0; - return (nz << 20) | (2u << 15) | (0u << 12) | (rdq << 7) | 0x13u; - } - case 1: { /* C.FLD */ - u32 off = (u32)(((c >> 10) & 7u) << 3) | (u32)(((c >> 5) & 3u) << 6); - return (off << 20) | (rs1q << 15) | (3u << 12) | (rdq << 7) | 0x07u; - } - case 2: { /* C.LW */ - u32 off = (u32)(((c >> 10) & 7u) << 3) | (u32)(((c >> 6) & 1u) << 2) | - (u32)(((c >> 5) & 1u) << 6); - return (off << 20) | (rs1q << 15) | (2u << 12) | (rdq << 7) | 0x03u; - } - case 3: { /* C.LD */ - u32 off = (u32)(((c >> 10) & 7u) << 3) | (u32)(((c >> 5) & 3u) << 6); - return (off << 20) | (rs1q << 15) | (3u << 12) | (rdq << 7) | 0x03u; - } - case 5: { /* C.FSD */ - u32 off = (u32)(((c >> 10) & 7u) << 3) | (u32)(((c >> 5) & 3u) << 6); - u32 imm_lo = off & 0x1fu; - u32 imm_hi = (off >> 5) & 0x7fu; - return (imm_hi << 25) | (rs2q << 20) | (rs1q << 15) | (3u << 12) | - (imm_lo << 7) | 0x27u; - } - case 6: { /* C.SW */ - u32 off = (u32)(((c >> 10) & 7u) << 3) | (u32)(((c >> 6) & 1u) << 2) | - (u32)(((c >> 5) & 1u) << 6); - u32 imm_lo = off & 0x1fu; - u32 imm_hi = (off >> 5) & 0x7fu; - return (imm_hi << 25) | (rs2q << 20) | (rs1q << 15) | (2u << 12) | - (imm_lo << 7) | 0x23u; - } - case 7: { /* C.SD */ - u32 off = (u32)(((c >> 10) & 7u) << 3) | (u32)(((c >> 5) & 3u) << 6); - u32 imm_lo = off & 0x1fu; - u32 imm_hi = (off >> 5) & 0x7fu; - return (imm_hi << 25) | (rs2q << 20) | (rs1q << 15) | (3u << 12) | - (imm_lo << 7) | 0x23u; - } - default: return 0; - } - } else if (op == 1u) { - switch (funct3) { - case 0: { /* C.NOP / C.ADDI */ - u32 imm5 = ((c >> 12) & 1u) << 5; - u32 imm04 = (c >> 2) & 0x1fu; - i32 imm = (i32)(imm5 | imm04); - if (imm5) imm |= ~0x3f; - if (rd_rs1 == 0) return 0x13u; /* NOP */ - return ((u32)imm << 20) | (rd_rs1 << 15) | (0u << 12) | (rd_rs1 << 7) | - 0x13u; - } - case 1: { /* C.ADDIW */ - u32 imm5 = ((c >> 12) & 1u) << 5; - u32 imm04 = (c >> 2) & 0x1fu; - i32 imm = (i32)(imm5 | imm04); - if (imm5) imm |= ~0x3f; - if (rd_rs1 == 0) return 0; - return ((u32)imm << 20) | (rd_rs1 << 15) | (0u << 12) | (rd_rs1 << 7) | - 0x1bu; - } - case 2: { /* C.LI */ - u32 imm5 = ((c >> 12) & 1u) << 5; - u32 imm04 = (c >> 2) & 0x1fu; - i32 imm = (i32)(imm5 | imm04); - if (imm5) imm |= ~0x3f; - if (rd_rs1 == 0) return 0; - return ((u32)imm << 20) | (0u << 15) | (0u << 12) | (rd_rs1 << 7) | - 0x13u; - } - case 3: { - if (rd_rs1 == 2u) { - /* C.ADDI16SP */ - u32 b9 = (c >> 12) & 1u; - u32 b4 = (c >> 6) & 1u; - u32 b6 = (c >> 5) & 1u; - u32 b8_7 = (c >> 3) & 3u; - u32 b5 = (c >> 2) & 1u; - i32 imm = (i32)((b9 << 9) | (b8_7 << 7) | (b6 << 6) | (b5 << 5) | - (b4 << 4)); - if (b9) imm |= ~0x3ff; - if (imm == 0) return 0; - return ((u32)imm << 20) | (2u << 15) | (0u << 12) | (2u << 7) | 0x13u; - } else { - /* C.LUI */ - u32 b17 = (c >> 12) & 1u; - u32 b16_12 = (c >> 2) & 0x1fu; - i32 imm = (i32)((b17 << 17) | (b16_12 << 12)); - if (b17) imm |= ~0x3ffff; - if (rd_rs1 == 0 || imm == 0) return 0; - return ((u32)imm & 0xfffff000u) | (rd_rs1 << 7) | 0x37u; - } - } - case 4: { - u32 sub = (c >> 10) & 3u; - u32 imm5 = ((c >> 12) & 1u) << 5; - u32 imm04 = (c >> 2) & 0x1fu; - u32 shamt = imm5 | imm04; - if (sub == 0) { - return ((0u << 26) | shamt) << 20 | (rs1q << 15) | (5u << 12) | - (rs1q << 7) | 0x13u; - } else if (sub == 1) { - return (((0x10u << 6) | shamt) << 20) | (rs1q << 15) | (5u << 12) | - (rs1q << 7) | 0x13u; - } else if (sub == 2) { - i32 imm = (i32)shamt; - if (imm5) imm |= ~0x3f; - return ((u32)imm << 20) | (rs1q << 15) | (7u << 12) | (rs1q << 7) | - 0x13u; - } else { - u32 bit12 = (c >> 12) & 1u; - u32 sub2 = (c >> 5) & 3u; - if (bit12 == 0) { - if (sub2 == 0) - return (0x20u << 25) | (rs2q << 20) | (rs1q << 15) | (0u << 12) | - (rs1q << 7) | 0x33u; - if (sub2 == 1) - return (0x00u << 25) | (rs2q << 20) | (rs1q << 15) | (4u << 12) | - (rs1q << 7) | 0x33u; - if (sub2 == 2) - return (0x00u << 25) | (rs2q << 20) | (rs1q << 15) | (6u << 12) | - (rs1q << 7) | 0x33u; - if (sub2 == 3) - return (0x00u << 25) | (rs2q << 20) | (rs1q << 15) | (7u << 12) | - (rs1q << 7) | 0x33u; - } else { - if (sub2 == 0) - return (0x20u << 25) | (rs2q << 20) | (rs1q << 15) | (0u << 12) | - (rs1q << 7) | 0x3bu; - if (sub2 == 1) - return (0x00u << 25) | (rs2q << 20) | (rs1q << 15) | (0u << 12) | - (rs1q << 7) | 0x3bu; - } - return 0; - } - } - case 5: { /* C.J */ - i32 imm = 0; - imm |= (i32)(((c >> 12) & 1u) << 11); - imm |= (i32)(((c >> 11) & 1u) << 4); - imm |= (i32)(((c >> 9) & 3u) << 8); - imm |= (i32)(((c >> 8) & 1u) << 10); - imm |= (i32)(((c >> 7) & 1u) << 6); - imm |= (i32)(((c >> 6) & 1u) << 7); - imm |= (i32)(((c >> 3) & 7u) << 1); - imm |= (i32)(((c >> 2) & 1u) << 5); - if (imm & (1 << 11)) imm |= ~0xfff; - u32 b20 = ((u32)imm >> 11) & 1u; - u32 b10_1 = ((u32)imm >> 1) & 0x3ffu; - u32 b11 = ((u32)imm >> 11) & 1u; - u32 b19_12 = b11 ? 0xffu : 0u; - return (b20 << 31) | (b10_1 << 21) | (b11 << 20) | (b19_12 << 12) | - (0u << 7) | 0x6fu; - } - case 6: - case 7: { /* C.BEQZ / C.BNEZ */ - i32 imm = 0; - imm |= (i32)(((c >> 12) & 1u) << 8); - imm |= (i32)(((c >> 10) & 3u) << 3); - imm |= (i32)(((c >> 5) & 3u) << 6); - imm |= (i32)(((c >> 3) & 3u) << 1); - imm |= (i32)(((c >> 2) & 1u) << 5); - if (imm & (1 << 8)) imm |= ~0x1ff; - u32 ui = (u32)imm; - u32 b12 = (ui >> 12) & 1u; - u32 b10_5 = (ui >> 5) & 0x3fu; - u32 b4_1 = (ui >> 1) & 0xfu; - u32 b11 = (ui >> 11) & 1u; - u32 f3 = funct3 == 6 ? 0u : 1u; - return (b12 << 31) | (b10_5 << 25) | (0u << 20) | (rs1q << 15) | - (f3 << 12) | (b4_1 << 8) | (b11 << 7) | 0x63u; - } - default: return 0; - } - } else if (op == 2u) { - switch (funct3) { - case 0: { /* C.SLLI */ - u32 imm5 = ((c >> 12) & 1u) << 5; - u32 imm04 = (c >> 2) & 0x1fu; - u32 shamt = imm5 | imm04; - if (rd_rs1 == 0) return 0; - return (shamt << 20) | (rd_rs1 << 15) | (1u << 12) | (rd_rs1 << 7) | - 0x13u; - } - case 1: { /* C.FLDSP */ - u32 off = (u32)(((c >> 12) & 1u) << 5) | (u32)(((c >> 5) & 3u) << 3) | - (u32)(((c >> 2) & 7u) << 6); - return (off << 20) | (2u << 15) | (3u << 12) | (rd_rs1 << 7) | 0x07u; - } - case 2: { /* C.LWSP */ - u32 off = (u32)(((c >> 12) & 1u) << 5) | (u32)(((c >> 4) & 7u) << 2) | - (u32)(((c >> 2) & 3u) << 6); - if (rd_rs1 == 0) return 0; - return (off << 20) | (2u << 15) | (2u << 12) | (rd_rs1 << 7) | 0x03u; - } - case 3: { /* C.LDSP */ - u32 off = (u32)(((c >> 12) & 1u) << 5) | (u32)(((c >> 5) & 3u) << 3) | - (u32)(((c >> 2) & 7u) << 6); - if (rd_rs1 == 0) return 0; - return (off << 20) | (2u << 15) | (3u << 12) | (rd_rs1 << 7) | 0x03u; - } - case 4: { - u32 bit12 = (c >> 12) & 1u; - if (bit12 == 0) { - if (rs2 == 0) { - if (rd_rs1 == 0) return 0; - return (0u << 20) | (rd_rs1 << 15) | (0u << 12) | (0u << 7) | - 0x67u; /* C.JR */ - } else { - if (rd_rs1 == 0) return 0; - return (0u << 25) | (rs2 << 20) | (0u << 15) | (0u << 12) | - (rd_rs1 << 7) | 0x33u; /* C.MV */ - } - } else { - if (rd_rs1 == 0 && rs2 == 0) { - return 0x00100073u; /* C.EBREAK */ - } else if (rs2 == 0) { - return (0u << 20) | (rd_rs1 << 15) | (0u << 12) | (1u << 7) | - 0x67u; /* C.JALR */ - } else { - if (rd_rs1 == 0) return 0; - return (0u << 25) | (rs2 << 20) | (rd_rs1 << 15) | (0u << 12) | - (rd_rs1 << 7) | 0x33u; /* C.ADD */ - } - } - } - case 5: { /* C.FSDSP */ - u32 off = (u32)(((c >> 10) & 7u) << 3) | (u32)(((c >> 7) & 7u) << 6); - u32 imm_lo = off & 0x1fu; - u32 imm_hi = (off >> 5) & 0x7fu; - return (imm_hi << 25) | (rs2 << 20) | (2u << 15) | (3u << 12) | - (imm_lo << 7) | 0x27u; - } - case 6: { /* C.SWSP */ - u32 off = (u32)(((c >> 9) & 0xfu) << 2) | (u32)(((c >> 7) & 3u) << 6); - u32 imm_lo = off & 0x1fu; - u32 imm_hi = (off >> 5) & 0x7fu; - return (imm_hi << 25) | (rs2 << 20) | (2u << 15) | (2u << 12) | - (imm_lo << 7) | 0x23u; - } - case 7: { /* C.SDSP */ - u32 off = (u32)(((c >> 10) & 7u) << 3) | (u32)(((c >> 7) & 7u) << 6); - u32 imm_lo = off & 0x1fu; - u32 imm_hi = (off >> 5) & 0x7fu; - return (imm_hi << 25) | (rs2 << 20) | (2u << 15) | (3u << 12) | - (imm_lo << 7) | 0x23u; - } - default: return 0; - } - } - return 0; -} - -static u32 decode_one_rv64(u32 w, u64 pc, EmuInst* out, u32* is_term) { - u32 op = w & 0x7fu; - u32 rd = (w >> 7) & 31u; - u32 funct3 = (w >> 12) & 7u; - u32 rs1 = (w >> 15) & 31u; - u32 rs2 = (w >> 20) & 31u; - u32 funct7 = (w >> 25) & 0x7fu; - *is_term = 0; - - switch (op) { - case 0x37u: /* LUI */ - emit_inst(out, pc, RV64_OP_LUI, rd, 0, 0, u_imm(w), 0, 0, 0); - return 4; - case 0x17u: /* AUIPC */ - emit_inst(out, pc, RV64_OP_AUIPC, rd, 0, 0, u_imm(w), 0, 0, 0); - return 4; - case 0x6fu: /* JAL */ - *is_term = 1; - emit_inst(out, pc, RV64_OP_JAL, rd, 0, 0, j_imm(w), 0, 0, 1); - return 4; - case 0x67u: /* JALR */ - *is_term = 1; - emit_inst(out, pc, RV64_OP_JALR, rd, rs1, 0, i_imm(w), funct3, 0, 1); - return 4; - case 0x63u: { /* BRANCH */ - static const u32 ops[8] = { - RV64_OP_BEQ, RV64_OP_BNE, RV64_OP_ILLEGAL, RV64_OP_ILLEGAL, - RV64_OP_BLT, RV64_OP_BGE, RV64_OP_BLTU, RV64_OP_BGEU, - }; - u32 o = ops[funct3]; - *is_term = 1; - emit_inst(out, pc, o, 0, rs1, rs2, b_imm(w), funct3, 0, 1); - return 4; - } - case 0x03u: { /* LOAD */ - static const u32 ops[8] = { - RV64_OP_LB, RV64_OP_LH, RV64_OP_LW, RV64_OP_LD, - RV64_OP_LBU, RV64_OP_LHU, RV64_OP_LWU, RV64_OP_ILLEGAL, - }; - emit_inst(out, pc, ops[funct3], rd, rs1, 0, i_imm(w), funct3, 0, 0); - return 4; - } - case 0x23u: { /* STORE */ - static const u32 ops[8] = { - RV64_OP_SB, RV64_OP_SH, RV64_OP_SW, RV64_OP_SD, - RV64_OP_ILLEGAL, RV64_OP_ILLEGAL, RV64_OP_ILLEGAL, RV64_OP_ILLEGAL, - }; - emit_inst(out, pc, ops[funct3], 0, rs1, rs2, s_imm(w), funct3, 0, 0); - return 4; - } - case 0x13u: { /* OP-IMM */ - i64 imm = i_imm(w); - u32 o = RV64_OP_ILLEGAL; - switch (funct3) { - case 0: o = RV64_OP_ADDI; break; - case 1: - /* SLLI: funct6 == 0 (top 6 bits zero) */ - if ((w >> 26) == 0u) { - o = RV64_OP_SLLI; - imm = (i64)((w >> 20) & 0x3fu); - } - break; - case 2: o = RV64_OP_SLTI; break; - case 3: o = RV64_OP_SLTIU; break; - case 4: o = RV64_OP_XORI; break; - case 5: - imm = (i64)((w >> 20) & 0x3fu); - if ((w >> 26) == 0x00u) { - o = RV64_OP_SRLI; - } else if ((w >> 26) == 0x10u) { - o = RV64_OP_SRAI; - } - break; - case 6: o = RV64_OP_ORI; break; - case 7: o = RV64_OP_ANDI; break; - default: break; - } - if (o == RV64_OP_ADDI && rd == 0 && rs1 == 0 && imm == 0) { - emit_inst(out, pc, RV64_OP_NOP, 0, 0, 0, 0, 0, 0, 0); - } else { - emit_inst(out, pc, o, rd, rs1, 0, imm, funct3, 0, 0); - } - return 4; - } - case 0x1bu: { /* OP-IMM-32 */ - u32 o = RV64_OP_ILLEGAL; - i64 imm; - if (funct3 == 0) { - o = RV64_OP_ADDIW; - imm = i_imm(w); - } else if (funct3 == 1 && funct7 == 0) { - o = RV64_OP_SLLIW; - imm = (i64)rs2; - } else if (funct3 == 5 && funct7 == 0) { - o = RV64_OP_SRLIW; - imm = (i64)rs2; - } else if (funct3 == 5 && funct7 == 0x20u) { - o = RV64_OP_SRAIW; - imm = (i64)rs2; - } else { - imm = 0; - } - emit_inst(out, pc, o, rd, rs1, 0, imm, funct3, 0, 0); - return 4; - } - case 0x33u: { /* OP */ - u32 o = RV64_OP_ILLEGAL; - if (funct7 == 0x00u) { - static const u32 ops[8] = { - RV64_OP_ADD, RV64_OP_SLL, RV64_OP_SLT, RV64_OP_SLTU, - RV64_OP_XOR, RV64_OP_SRL, RV64_OP_OR, RV64_OP_AND, - }; - o = ops[funct3]; - } else if (funct7 == 0x20u) { - if (funct3 == 0) o = RV64_OP_SUB; - else if (funct3 == 5) o = RV64_OP_SRA; - } else if (funct7 == 0x01u) { - static const u32 ops[8] = { - RV64_OP_MUL, RV64_OP_MULH, RV64_OP_MULHSU, RV64_OP_MULHU, - RV64_OP_DIV, RV64_OP_DIVU, RV64_OP_REM, RV64_OP_REMU, - }; - o = ops[funct3]; - } - emit_inst(out, pc, o, rd, rs1, rs2, 0, funct3, funct7, 0); - return 4; - } - case 0x3bu: { /* OP-32 */ - u32 o = RV64_OP_ILLEGAL; - if (funct7 == 0x00u) { - if (funct3 == 0) o = RV64_OP_ADDW; - else if (funct3 == 1) o = RV64_OP_SLLW; - else if (funct3 == 5) o = RV64_OP_SRLW; - } else if (funct7 == 0x20u) { - if (funct3 == 0) o = RV64_OP_SUBW; - else if (funct3 == 5) o = RV64_OP_SRAW; - } else if (funct7 == 0x01u) { - static const u32 ops[8] = { - RV64_OP_MULW, RV64_OP_ILLEGAL, RV64_OP_ILLEGAL, RV64_OP_ILLEGAL, - RV64_OP_DIVW, RV64_OP_DIVUW, RV64_OP_REMW, RV64_OP_REMUW, - }; - o = ops[funct3]; - } - emit_inst(out, pc, o, rd, rs1, rs2, 0, funct3, funct7, 0); - return 4; - } - case 0x07u: { /* LOAD-FP */ - u32 o = RV64_OP_ILLEGAL; - if (funct3 == 2) o = RV64_OP_FLW; - else if (funct3 == 3) o = RV64_OP_FLD; - emit_inst(out, pc, o, rd, rs1, 0, i_imm(w), funct3, 0, 0); - return 4; - } - case 0x27u: { /* STORE-FP */ - u32 o = RV64_OP_ILLEGAL; - if (funct3 == 2) o = RV64_OP_FSW; - else if (funct3 == 3) o = RV64_OP_FSD; - emit_inst(out, pc, o, 0, rs1, rs2, s_imm(w), funct3, 0, 0); - return 4; - } - case 0x53u: { /* OP-FP */ - u32 fmt = funct7 & 1u; /* 0=S, 1=D */ - u32 major = funct7 >> 2; - u32 o = RV64_OP_ILLEGAL; - switch (major) { - case 0x00: o = fmt ? RV64_OP_FADD_D : RV64_OP_FADD_S; break; - case 0x01: o = fmt ? RV64_OP_FSUB_D : RV64_OP_FSUB_S; break; - case 0x02: o = fmt ? RV64_OP_FMUL_D : RV64_OP_FMUL_S; break; - case 0x03: o = fmt ? RV64_OP_FDIV_D : RV64_OP_FDIV_S; break; - case 0x04: /* FSGNJ family — funct3 selects variant */ - if (funct3 == 0) - o = fmt ? RV64_OP_FSGNJ_D : RV64_OP_FSGNJ_S; - else if (funct3 == 1) - o = fmt ? RV64_OP_FSGNJN_D : RV64_OP_FSGNJN_S; - else if (funct3 == 2) - o = fmt ? RV64_OP_FSGNJX_D : RV64_OP_FSGNJX_S; - break; - case 0x05: /* FMIN / FMAX */ - if (funct3 == 0) - o = fmt ? RV64_OP_FMIN_D : RV64_OP_FMIN_S; - else if (funct3 == 1) - o = fmt ? RV64_OP_FMAX_D : RV64_OP_FMAX_S; - break; - case 0x08: /* FCVT.S.D / FCVT.D.S (rs2 == fmt of source). */ - if (fmt == 0 && rs2 == 1u) o = RV64_OP_FCVT_S_D; - else if (fmt == 1 && rs2 == 0u) o = RV64_OP_FCVT_D_S; - break; - case 0x0b: /* FSQRT.S / FSQRT.D — rs2 == 0 */ - if (rs2 == 0u) o = fmt ? RV64_OP_FSQRT_D : RV64_OP_FSQRT_S; - break; - case 0x14: - /* FP compare: funct3 0=fle, 1=flt, 2=feq */ - if (funct3 == 0) - o = fmt ? RV64_OP_FLE_D : RV64_OP_FLE_S; - else if (funct3 == 1) - o = fmt ? RV64_OP_FLT_D : RV64_OP_FLT_S; - else if (funct3 == 2) - o = fmt ? RV64_OP_FEQ_D : RV64_OP_FEQ_S; - break; - case 0x18: - /* FCVT.{W,WU,L,LU}.S/D — fp -> int. rs2 picks dest size: - * 0 = W, 1 = WU, 2 = L, 3 = LU. */ - if (fmt == 0) { - if (rs2 == 0) o = RV64_OP_FCVT_W_S; - else if (rs2 == 1) o = RV64_OP_FCVT_WU_S; - else if (rs2 == 2) o = RV64_OP_FCVT_L_S; - else if (rs2 == 3) o = RV64_OP_FCVT_LU_S; - } else { - if (rs2 == 0) o = RV64_OP_FCVT_W_D; - else if (rs2 == 1) o = RV64_OP_FCVT_WU_D; - else if (rs2 == 2) o = RV64_OP_FCVT_L_D; - else if (rs2 == 3) o = RV64_OP_FCVT_LU_D; - } - break; - case 0x1a: - /* FCVT.S/D.{W,WU,L,LU} — int -> fp. rs2 picks src size. */ - if (fmt == 0) { - if (rs2 == 0) o = RV64_OP_FCVT_S_W; - else if (rs2 == 1) o = RV64_OP_FCVT_S_WU; - else if (rs2 == 2) o = RV64_OP_FCVT_S_L; - else if (rs2 == 3) o = RV64_OP_FCVT_S_LU; - } else { - if (rs2 == 0) o = RV64_OP_FCVT_D_W; - else if (rs2 == 1) o = RV64_OP_FCVT_D_WU; - else if (rs2 == 2) o = RV64_OP_FCVT_D_L; - else if (rs2 == 3) o = RV64_OP_FCVT_D_LU; - } - break; - case 0x1c: - /* FMV.X.W / FMV.X.D (funct3==0) or FCLASS (funct3==1) */ - if (rs2 == 0) { - if (funct3 == 0) - o = fmt ? RV64_OP_FMV_X_D : RV64_OP_FMV_X_W; - else if (funct3 == 1) - o = fmt ? RV64_OP_FCLASS_D : RV64_OP_FCLASS_S; - } - break; - case 0x1e: - /* FMV.W.X / FMV.D.X */ - if (funct3 == 0 && rs2 == 0) { - o = fmt ? RV64_OP_FMV_D_X : RV64_OP_FMV_W_X; - } - break; - default: - break; - } - emit_inst(out, pc, o, rd, rs1, rs2, 0, funct3, funct7, 0); - return 4; - } - case 0x43u: /* FMADD */ - case 0x47u: /* FMSUB */ - case 0x4bu: /* FNMSUB */ - case 0x4fu: { /* FNMADD */ - u32 fmt = funct7 & 1u; /* 0=S, 1=D */ - u32 rs3 = (w >> 27) & 31u; - u32 o = RV64_OP_ILLEGAL; - switch (op) { - case 0x43u: o = fmt ? RV64_OP_FMADD_D : RV64_OP_FMADD_S; break; - case 0x47u: o = fmt ? RV64_OP_FMSUB_D : RV64_OP_FMSUB_S; break; - case 0x4bu: o = fmt ? RV64_OP_FNMSUB_D : RV64_OP_FNMSUB_S; break; - case 0x4fu: o = fmt ? RV64_OP_FNMADD_D : RV64_OP_FNMADD_S; break; - } - emit_inst(out, pc, o, rd, rs1, rs2, 0, funct3, rs3, 0); - return 4; - } - case 0x2fu: { /* AMO */ - u32 funct5 = funct7 >> 2; - u32 width = funct3; /* 2 = W, 3 = D */ - u32 o = RV64_OP_ILLEGAL; - if (width == 2u) { - switch (funct5) { - case 0x02: o = RV64_OP_LR_W; break; - case 0x03: o = RV64_OP_SC_W; break; - case 0x01: o = RV64_OP_AMOSWAP_W; break; - case 0x00: o = RV64_OP_AMOADD_W; break; - case 0x04: o = RV64_OP_AMOXOR_W; break; - case 0x0c: o = RV64_OP_AMOAND_W; break; - case 0x08: o = RV64_OP_AMOOR_W; break; - case 0x10: o = RV64_OP_AMOMIN_W; break; - case 0x14: o = RV64_OP_AMOMAX_W; break; - case 0x18: o = RV64_OP_AMOMINU_W; break; - case 0x1c: o = RV64_OP_AMOMAXU_W; break; - default: break; - } - } else if (width == 3u) { - switch (funct5) { - case 0x02: o = RV64_OP_LR_D; break; - case 0x03: o = RV64_OP_SC_D; break; - case 0x01: o = RV64_OP_AMOSWAP_D; break; - case 0x00: o = RV64_OP_AMOADD_D; break; - case 0x04: o = RV64_OP_AMOXOR_D; break; - case 0x0c: o = RV64_OP_AMOAND_D; break; - case 0x08: o = RV64_OP_AMOOR_D; break; - case 0x10: o = RV64_OP_AMOMIN_D; break; - case 0x14: o = RV64_OP_AMOMAX_D; break; - case 0x18: o = RV64_OP_AMOMINU_D; break; - case 0x1c: o = RV64_OP_AMOMAXU_D; break; - default: break; - } - } - emit_inst(out, pc, o, rd, rs1, rs2, 0, funct3, funct7, 0); - return 4; - } - case 0x0fu: /* MISC-MEM (FENCE / FENCE.I) */ - emit_inst(out, pc, RV64_OP_FENCE, rd, rs1, 0, i_imm(w), funct3, 0, 0); - return 4; - case 0x73u: { /* SYSTEM */ - if (w == 0x00000073u) { - *is_term = 1; - emit_inst(out, pc, RV64_OP_ECALL, 0, 0, 0, 0, 0, 0, 1); - } else if (w == 0x00100073u) { - *is_term = 1; - emit_inst(out, pc, RV64_OP_EBREAK, 0, 0, 0, 0, 0, 0, 1); - } else if (funct3 != 0u && funct3 != 4u) { - /* CSR access: rs1 is GPR (or zimm5 for *I variants). The CSR - * index lives in the 12-bit imm field. funct3 picks the variant. - * 1 = csrrw, 2 = csrrs, 3 = csrrc, - * 5 = csrrwi, 6 = csrrsi, 7 = csrrci. */ - u32 csr_idx = (w >> 20) & 0xfffu; - u32 o = RV64_OP_ILLEGAL; - switch (funct3) { - case 1: o = RV64_OP_CSRRW; break; - case 2: o = RV64_OP_CSRRS; break; - case 3: o = RV64_OP_CSRRC; break; - case 5: o = RV64_OP_CSRRWI; break; - case 6: o = RV64_OP_CSRRSI; break; - case 7: o = RV64_OP_CSRRCI; break; - } - emit_inst(out, pc, o, rd, rs1, 0, (i64)(u64)csr_idx, funct3, 0, 0); - } else { - emit_inst(out, pc, RV64_OP_ILLEGAL, 0, 0, 0, 0, 0, 0, 0); - } - return 4; - } - default: - emit_inst(out, pc, RV64_OP_ILLEGAL, 0, 0, 0, 0, 0, 0, 0); - return 4; - } -} - -/* The caller (translate_block / interpreter test) guarantees `bytes` is - * the host address of guest_pc inside the loaded image. RVC (compressed) - * insns are detected by the low two bits != 0b11; for each we expand to - * the 32-bit equivalent and reuse the base decoder, but the EmuInst's - * guest_bytes is patched back to 2 so the PC advances correctly. */ -static u32 decode_block_rv64(const u8* bytes, u64 guest_pc, EmuInst* out, - u32 max) { - u32 n = 0; - u32 off = 0; - while (n < max) { - u16 lo = (u16)bytes[off] | ((u16)bytes[off + 1] << 8); - u32 w; - u32 term = 0; - u32 used; - u32 is_rvc = ((lo & 3u) != 3u) ? 1u : 0u; - if (is_rvc) { - w = rvc_expand(lo); - if (w == 0u) { - emit_inst(&out[n], guest_pc + off, RV64_OP_ILLEGAL, 0, 0, 0, 0, 0, 0, - 0); - out[n].guest_bytes = 2u; - ++n; - break; - } - } else { - w = rd_u32_le_local(bytes + off); - } - used = decode_one_rv64(w, guest_pc + off, &out[n], &term); - if (used == 0) return n; - if (is_rvc) { - out[n].guest_bytes = 2u; - used = 2u; - } - off += used; - ++n; - if (term) break; - if (out[n - 1u].op == RV64_OP_ILLEGAL) break; - } - return n; -} - -u32 emu_decode_block(CfreeEmuArch arch, const u8* bytes, u64 guest_pc, - EmuInst* out, u32 max) { - if (!bytes || !out || max == 0) return 0; - if (arch == CFREE_EMU_ARCH_RISCV64) { - return decode_block_rv64(bytes, guest_pc, out, max); - } - /* aa64 decode lands separately. */ - return 0; -} - -void emu_trace_insn(Compiler* c, u64 guest_pc, const EmuInst* insn) { - (void)c; - (void)guest_pc; - (void)insn; -} diff --git a/src/emu/dl.c b/src/emu/dl.c @@ -0,0 +1,370 @@ +#include <string.h> + +#include "core/slice.h" +#include "emu/emu.h" +#include "obj/format.h" +#include "obj/reloc_apply.h" + +#define EMU_IMPORT_THUNK_RESERVE 0x1000ull + +static int slice_is_null(CfreeSlice s) { return !s.data && s.len == 0; } + +CfreeStatus emu_apply_reloc_bytes(Compiler* c, RelocKind kind, u8* P_bytes, + u64 S, i64 A, u64 P) { + if (!P_bytes) return CFREE_INVALID; + link_reloc_apply(c, kind, P_bytes, S, A, P); + return CFREE_OK; +} + +CfreeStatus emu_dl_lookup_symbol(EmuProcess* process, CfreeSlice symbol, + u64* out_addr) { + EmuLoadedImage* img; + const ObjFormatEmuOps* fmt; + u32 oi; + if (!process || !out_addr) return CFREE_INVALID; + img = &process->image; + fmt = process->obj_format ? process->obj_format->emu : NULL; + if (!fmt || !fmt->dyn_symbol_lookup) return CFREE_UNSUPPORTED; + *out_addr = 0; + for (oi = 0; oi < img->link_map.nobjects; ++oi) { + EmuLoadedObject* obj = &img->link_map.objects[oi]; + EmuDynSymbol sym; + if (fmt->dyn_symbol_lookup(process, obj, symbol, &sym) == CFREE_OK && + sym.defined) { + *out_addr = obj->load_bias + sym.value; + return CFREE_OK; + } + } + return CFREE_NOT_FOUND; +} + +static CfreeStatus ensure_import_binding_cap(Compiler* c, EmuLoadedImage* img, + u32 need) { + Heap* heap = c->ctx->heap; + u32 old_cap; + u32 new_cap; + EmuImportBinding* grown; + if (img->import_bindings_cap >= need) return CFREE_OK; + old_cap = img->import_bindings_cap; + new_cap = old_cap ? old_cap * 2u : 8u; + while (new_cap < need) new_cap *= 2u; + grown = (EmuImportBinding*)heap->realloc( + heap, img->import_bindings, sizeof(*img->import_bindings) * old_cap, + sizeof(*img->import_bindings) * new_cap, _Alignof(EmuImportBinding)); + if (!grown) return CFREE_ERR; + memset(grown + old_cap, 0, sizeof(*grown) * (new_cap - old_cap)); + img->import_bindings = grown; + img->import_bindings_cap = new_cap; + return CFREE_OK; +} + +static CfreeEmuImportSignature default_u64_sig(void) { + CfreeEmuImportSignature sig; + memset(&sig, 0, sizeof(sig)); + sig.abi = CFREE_EMU_IMPORT_ABI_GUEST_C; + sig.result = CFREE_EMU_VALUE_U64; + sig.nargs = 1u; + sig.args[0] = CFREE_EMU_VALUE_U64; + return sig; +} + +static u32 import_thunk_size(const EmuProcess* process) { + if (!process || !process->arch || !process->arch->emu || + !process->arch->emu->import_thunk_size) + return 0; + return process->arch->emu->import_thunk_size; +} + +static CfreeStatus emit_import_thunk(EmuProcess* process, u64 thunk) { + if (!process || !process->arch || !process->arch->emu || + !process->arch->emu->emit_import_thunk) + return CFREE_UNSUPPORTED; + return process->arch->emu->emit_import_thunk(process, thunk); +} + +static CfreeStatus add_import_binding(Compiler* c, EmuProcess* process, + EmuLoadedImage* img, + const EmuDynamicImport* req, + const CfreeEmuResolvedImport* resolved, + u64* out_addr) { + EmuImportBinding* b; + u64 thunk; + u32 thunk_size; + if (resolved->guest_addr) { + *out_addr = resolved->guest_addr; + return CFREE_OK; + } + if (!resolved->host_fn) return CFREE_NOT_FOUND; + thunk_size = import_thunk_size(process); + if (!thunk_size) return CFREE_UNSUPPORTED; + if (img->import_thunk_next + thunk_size > + img->import_thunk_base + img->import_thunk_size) + return CFREE_NOMEM; + if (ensure_import_binding_cap(c, img, img->nimport_bindings + 1u) != + CFREE_OK) + return CFREE_ERR; + thunk = img->import_thunk_next; + img->import_thunk_next += thunk_size; + if (emit_import_thunk(process, thunk) != CFREE_OK) + return CFREE_ERR; + b = &img->import_bindings[img->nimport_bindings++]; + memset(b, 0, sizeof(*b)); + b->object_name = req->object_name; + b->symbol_name = req->symbol_name; + b->got_vaddr = req->got_vaddr; + b->thunk_vaddr = thunk; + b->resolved_guest_addr = resolved->guest_addr; + b->resolved_host_fn = resolved->host_fn; + b->flags = resolved->flags; + b->signature = resolved->signature.nargs || resolved->signature.result + ? resolved->signature + : req->signature; + if (!b->signature.nargs && !b->signature.result) b->signature = default_u64_sig(); + *out_addr = thunk; + return CFREE_OK; +} + +CfreeStatus emu_dl_resolve_import_thunk(EmuProcess* process, u64 target, + EmuImportBinding** out) { + EmuLoadedImage* img; + u32 i; + if (out) *out = NULL; + if (!process || !out) return CFREE_INVALID; + img = &process->image; + if (!img->import_thunk_size || target < img->import_thunk_base || + target >= img->import_thunk_base + img->import_thunk_size) + return CFREE_NOT_FOUND; + for (i = 0; i < img->nimport_bindings; ++i) { + EmuImportBinding* b = &img->import_bindings[i]; + if (b->thunk_vaddr == target && b->resolved_host_fn) { + *out = b; + return CFREE_OK; + } + } + return CFREE_NOT_FOUND; +} + +CfreeStatus emu_call_host_import(EmuThread* thread, EmuImportBinding* b, + const u64* args, u32 nargs, + u64* result_out) { + CfreeEmuImportSignature sig; + if (result_out) *result_out = 0; + if (!thread || !b || !b->resolved_host_fn) return CFREE_INVALID; + sig = b->signature; + if (!sig.nargs && !sig.result) sig = default_u64_sig(); + b->signature = sig; + if (sig.nargs > nargs || sig.nargs > 3u) return CFREE_UNSUPPORTED; + if (sig.nargs == 0u) { + if (sig.result != CFREE_EMU_VALUE_VOID) { + u64 (*fn0)(void) = (u64(*)(void))b->resolved_host_fn; + if (result_out) *result_out = fn0(); + } else { + ((void (*)(void))b->resolved_host_fn)(); + } + } else if (sig.nargs == 1u) { + u64 (*fn1)(u64) = (u64(*)(u64))b->resolved_host_fn; + if (result_out) *result_out = fn1(args[0]); + } else if (sig.nargs == 2u) { + u64 (*fn2)(u64, u64) = (u64(*)(u64, u64))b->resolved_host_fn; + if (result_out) *result_out = fn2(args[0], args[1]); + } else if (sig.nargs == 3u) { + u64 (*fn3)(u64, u64, u64) = (u64(*)(u64, u64, u64))b->resolved_host_fn; + if (result_out) *result_out = fn3(args[0], args[1], args[2]); + } + return CFREE_OK; +} + +static CfreeStatus load_needed_objects(Compiler* c, EmuProcess* process, + const EmuLoadOptions* opts, + const ObjFormatEmuOps* fmt) { + EmuLoadedImage* img = &process->image; + u32 oi; + if (!fmt->dyn_needed_iter || !fmt->dyn_needed_next) return CFREE_OK; + for (oi = 0; oi < img->link_map.nobjects; ++oi) { + EmuLoadedObject* obj = &img->link_map.objects[oi]; + EmuDynNeededIter it; + CfreeSlice needed; + fmt->dyn_needed_iter(obj, &it); + while (fmt->dyn_needed_next(process, &it, &needed)) { + CfreeSlice bytes = CFREE_SLICE_NULL; + u32 loaded = 0; + u32 k; + if (slice_is_null(needed) || !opts->bindings || + !opts->bindings->resolve_object) + continue; + for (k = 0; k < img->link_map.nobjects; ++k) { + if (cfree_slice_eq(img->link_map.objects[k].name, + needed) || + cfree_slice_eq(img->link_map.objects[k].soname, + needed)) { + loaded = 1; + break; + } + } + if (loaded) continue; + if (opts->bindings->resolve_object(opts->bindings->user, process, + needed, &bytes) != + CFREE_OK || + !bytes.data) + return CFREE_NOT_FOUND; + if (!fmt->map_object || + fmt->map_object(c, process, img, needed, bytes, 0, + &k) != CFREE_OK) + return CFREE_ERR; + } + } + return CFREE_OK; +} + +static CfreeStatus resolve_reloc_symbol(Compiler* c, EmuProcess* process, + const EmuLoadOptions* opts, + const ObjFormatEmuOps* fmt, + EmuLoadedObject* obj, u64 sym_idx, + u64 patch_addr, + EmuDynamicImport* req, + u64* value_out) { + EmuLoadedImage* img = &process->image; + EmuDynSymbol sym; + CfreeStatus st; + (void)c; + if (!fmt->dyn_symbol_by_index || + fmt->dyn_symbol_by_index(process, obj, sym_idx, &sym) != CFREE_OK) + return CFREE_INVALID; + memset(req, 0, sizeof(*req)); + req->object_name = + obj->imports.nneeded ? obj->imports.needed[0] : obj->name; + req->symbol_name = sym.name; + req->got_vaddr = patch_addr; + st = emu_dl_lookup_symbol(process, sym.name, value_out); + if (st == CFREE_OK) return CFREE_OK; + if (opts->bindings && opts->bindings->resolve_import) { + CfreeEmuResolvedImport resolved; + memset(&resolved, 0, sizeof(resolved)); + st = opts->bindings->resolve_import(opts->bindings->user, process, req, + &resolved); + if (st == CFREE_OK) + st = add_import_binding(c, process, img, req, &resolved, value_out); + } + return st; +} + +static CfreeStatus apply_reloc_table(Compiler* c, EmuProcess* process, + const EmuLoadOptions* opts, + const ObjFormatEmuOps* fmt, + EmuLoadedObject* obj, + EmuDynRelocTableKind table) { + EmuLoadedImage* img = &process->image; + EmuDynRelocIter it; + EmuDynReloc reloc; + if (!fmt->reloc_iter || !fmt->reloc_next || !fmt->reloc_classify) + return CFREE_OK; + fmt->reloc_iter(obj, table, &it); + while (fmt->reloc_next(process, &it, &reloc)) { + u64 value = 0; + u8* patch; + RelocKind kind; + u32 kind_u; + EmuDynRelocClass cls; + patch = emu_addr_space_ptr(&img->addr_space, reloc.patch_addr, + reloc.width ? reloc.width : 8u, + EMU_MEM_WRITE); + if (!patch) return CFREE_INVALID; + + if (fmt->reloc_classify(process, obj, &reloc, &cls, &kind_u) != CFREE_OK) + return CFREE_UNSUPPORTED; + kind = (RelocKind)kind_u; + if (cls == EMU_DYN_RELOC_RELATIVE) { + value = obj->load_bias; + } else if (cls == EMU_DYN_RELOC_SYMBOLIC || + cls == EMU_DYN_RELOC_IMPORT_SLOT) { + EmuDynamicImport req; + CfreeStatus st = + resolve_reloc_symbol(c, process, opts, fmt, obj, reloc.symbol_index, + reloc.patch_addr, + &req, &value); + if (st != CFREE_OK) { + if (cls == EMU_DYN_RELOC_IMPORT_SLOT) { + u32 thunk_size = import_thunk_size(process); + if (!thunk_size) return CFREE_UNSUPPORTED; + if (img->import_thunk_next + thunk_size > + img->import_thunk_base + img->import_thunk_size) + return CFREE_NOMEM; + value = img->import_thunk_next; + img->import_thunk_next += thunk_size; + if (emit_import_thunk(process, value) != CFREE_OK) + return CFREE_ERR; + } else { + return st; + } + } + } else if (cls == EMU_DYN_RELOC_NONE) { + continue; + } + + if (emu_apply_reloc_bytes(c, kind, patch, value, reloc.addend, + reloc.patch_addr) != + CFREE_OK) + return CFREE_ERR; + emu_addr_space_invalidate(&img->addr_space, reloc.patch_addr, + reloc.width ? reloc.width : 8u); + } + return CFREE_OK; +} + +static CfreeStatus apply_dynamic_relocs(Compiler* c, EmuProcess* process, + const EmuLoadOptions* opts, + const ObjFormatEmuOps* fmt) { + EmuLoadedImage* img = &process->image; + u32 oi; + for (oi = 0; oi < img->link_map.nobjects; ++oi) { + EmuLoadedObject* obj = &img->link_map.objects[oi]; + CfreeStatus st; + st = apply_reloc_table(c, process, opts, fmt, obj, + EMU_DYN_RELOC_TABLE_MAIN); + if (st != CFREE_OK) return st; + st = apply_reloc_table(c, process, opts, fmt, obj, + EMU_DYN_RELOC_TABLE_PLT); + if (st != CFREE_OK) return st; + } + return CFREE_OK; +} + +CfreeStatus emu_dl_init_process(Compiler* c, EmuProcess* process) { + EmuLoadedImage* img; + u64 base = 0; + if (!c || !process) return CFREE_INVALID; + img = &process->image; + if (img->import_thunk_size) return CFREE_OK; + if (!process->os || !process->os->emu_find_map_region) + return CFREE_UNSUPPORTED; + if (process->os->emu_find_map_region(process, EMU_IMPORT_THUNK_RESERVE, + img->addr_space.page_size, + EMU_OS_MAP_DL_THUNKS, &base) != + CFREE_OK) + return CFREE_ERR; + if (emu_addr_space_map(&img->addr_space, base, EMU_IMPORT_THUNK_RESERVE, + EMU_MEM_READ | EMU_MEM_EXEC, EMU_MAP_ANON) != + CFREE_OK) + return CFREE_ERR; + img->import_thunk_base = base; + img->import_thunk_size = EMU_IMPORT_THUNK_RESERVE; + img->import_thunk_next = base; + if (process->os->emu_note_map_region) + process->os->emu_note_map_region(process, base, EMU_IMPORT_THUNK_RESERVE, + EMU_OS_MAP_DL_THUNKS); + return CFREE_OK; +} + +CfreeStatus emu_dl_load_dependencies_and_relocate(Compiler* c, + EmuProcess* process, + const EmuLoadOptions* opts, + const ObjFormatEmuOps* fmt) { + CfreeStatus st; + if (!c || !process || !opts || !fmt) return CFREE_INVALID; + process->dl_policy.global_scope_head = process->image.link_map.global_scope_head; + st = load_needed_objects(c, process, opts, fmt); + if (st != CFREE_OK) return st; + st = emu_tls_rebuild_modules(c, process); + if (st != CFREE_OK) return st; + return apply_dynamic_relocs(c, process, opts, fmt); +} diff --git a/src/emu/elf_load.c b/src/emu/elf_load.c @@ -1,552 +0,0 @@ -/* Guest ELF loader. - * - * The host gives us an ELF buffer in `bytes`. We parse the ELF64 header - * directly (no need to involve obj/elf_read.c — its purpose is to build - * an ObjBuilder for the linker, which we don't want here), walk PT_LOAD - * program headers, allocate a single contiguous host buffer covering - * the union of segment VAs, and copy file contents in. - * - * The "guest address space" is flat: guest_base (host pointer) maps to - * guest_va_base (the lowest p_vaddr seen). Translations are - * host = guest_base + (guest_va - guest_va_base) - * The emulator's bounds checks (cpu.c, runtime.c) enforce that any - * touched VA lies within [guest_va_base, guest_va_base + guest_size). - * - * Stack: allocated inside the same buffer at the high end. argv/envp/auxv - * are pushed per the RISC-V psABI initial-stack layout. - * - * Handles static-linked ELF64 LE with EM_RISCV. For dynamic-linked - * programs (PT_INTERP present), the caller must pre-stage the - * interpreter bytes via emu_load_elf_set_interp_bytes. We then load the - * interpreter ELF alongside the program, set the entry PC to the - * interpreter's e_entry, and arrange auxv so AT_BASE points to the - * interpreter's load base while AT_PHDR/AT_PHENT/AT_PHNUM still describe - * the program. */ - -#include <string.h> - -#include "core/core.h" -#include "core/slice.h" -#include "emu/emu.h" -#include "emu/rv64_ops.h" -#include "obj/elf/elf.h" - -/* ---- Layout knobs ---- */ -/* Stack size — large enough for typical libc init in the smoke tests - * but bounded so a typo doesn't allocate the host out of memory. */ -#define EMU_STACK_SIZE (1u * 1024u * 1024u) -/* Heap (brk) reserve appended at the end of the loaded segments, before - * the stack. */ -#define EMU_BRK_RESERVE (2u * 1024u * 1024u) -/* Page size we align segments to. The actual guest page granularity is - * unspecified for a flat-AS interpreter; 4KiB is a reasonable default. */ -#define EMU_PAGE_SIZE 0x1000ull - -static u64 round_up(u64 v, u64 a) { return (v + a - 1u) & ~(a - 1u); } -static u64 round_down(u64 v, u64 a) { return v & ~(a - 1u); } - -/* ---- ELF64 wire reads ---- */ -static u16 rd16(const u8* p) { return (u16)p[0] | ((u16)p[1] << 8); } -static u32 rd32(const u8* p) { - return (u32)p[0] | ((u32)p[1] << 8) | ((u32)p[2] << 16) | ((u32)p[3] << 24); -} -static u64 rd64(const u8* p) { return (u64)rd32(p) | ((u64)rd32(p + 4) << 32); } - -static void wr64(u8* p, u64 v) { - u32 i; - for (i = 0; i < 8; ++i) p[i] = (u8)(v >> (8u * i)); -} - -/* Side-channel for dynamic-linked program support: a caller stages the - * interpreter (ld.so) bytes here before calling emu_load_elf, and we - * consume them if the program ELF has a PT_INTERP segment. Single-shot - * (cleared after use). The emulator is freestanding from libc, so we - * cannot open arbitrary host files ourselves — the caller (driver / - * test harness) is responsible for fetching the interpreter bytes. */ -static struct { - const u8* bytes; - size_t len; -} g_pending_interp; - -void emu_load_elf_set_interp_bytes(const u8* bytes, size_t len) { - g_pending_interp.bytes = bytes; - g_pending_interp.len = len; -} - -/* Iterate PT_LOAD segments of an ELF and compute the [lo,hi) extent. - * Returns 0 on success. */ -static int elf_layout_extent(const u8* bytes, size_t len, u64* out_lo, - u64* out_hi, u64* out_phoff, u16* out_phentsize, - u16* out_phnum, u64* out_entry, int* out_pic) { - u16 e_type, e_machine, e_phentsize, e_phnum; - u64 e_entry, e_phoff; - u64 lo = 0, hi = 0; - int saw = 0; - u32 i; - if (len < ELF64_EHDR_SIZE) return 1; - if (bytes[EI_MAG0] != ELFMAG0 || bytes[EI_MAG1] != ELFMAG1 || - bytes[EI_MAG2] != ELFMAG2 || bytes[EI_MAG3] != ELFMAG3) - return 1; - if (bytes[EI_CLASS] != ELFCLASS64) return 1; - if (bytes[EI_DATA] != ELFDATA2LSB) return 1; - e_type = rd16(bytes + 16); - e_machine = rd16(bytes + 18); - e_entry = rd64(bytes + 24); - e_phoff = rd64(bytes + 32); - e_phentsize = rd16(bytes + 54); - e_phnum = rd16(bytes + 56); - if (e_machine != EM_RISCV) return 1; - if (e_phentsize < ELF64_PHDR_SIZE) return 1; - for (i = 0; i < e_phnum; ++i) { - const u8* ph = bytes + e_phoff + (u64)i * e_phentsize; - u32 p_type = rd32(ph + 0); - u64 p_vaddr = rd64(ph + 16); - u64 p_memsz = rd64(ph + 40); - if (p_type != PT_LOAD) continue; - if (!saw) { - lo = round_down(p_vaddr, EMU_PAGE_SIZE); - hi = round_up(p_vaddr + p_memsz, EMU_PAGE_SIZE); - saw = 1; - } else { - u64 a = round_down(p_vaddr, EMU_PAGE_SIZE); - u64 b = round_up(p_vaddr + p_memsz, EMU_PAGE_SIZE); - if (a < lo) lo = a; - if (b > hi) hi = b; - } - } - if (!saw) return 1; - *out_lo = lo; - *out_hi = hi; - *out_phoff = e_phoff; - *out_phentsize = e_phentsize; - *out_phnum = e_phnum; - *out_entry = e_entry; - *out_pic = (e_type == ET_DYN); - return 0; -} - -/* Copy PT_LOAD segments from `src` into the guest AS host buffer. - * `bias` is the load bias added to each p_vaddr (zero for ET_EXEC, - * the chosen base for PIE / interpreter images). */ -static int elf_copy_segments(const u8* src, size_t len, u64 phoff, - u16 phentsize, u16 phnum, u8* guest_base, - u64 guest_va_base, u64 bias) { - u32 i; - for (i = 0; i < phnum; ++i) { - const u8* ph = src + phoff + (u64)i * phentsize; - u32 p_type = rd32(ph + 0); - u64 p_offset = rd64(ph + 8); - u64 p_vaddr = rd64(ph + 16) + bias; - u64 p_filesz = rd64(ph + 32); - u64 p_memsz = rd64(ph + 40); - if (p_type != PT_LOAD) continue; - if (p_offset + p_filesz > len) return 1; - if (p_filesz) { - memcpy(guest_base + (p_vaddr - guest_va_base), src + p_offset, - (size_t)p_filesz); - } - if (p_memsz > p_filesz) { - memset(guest_base + (p_vaddr - guest_va_base) + p_filesz, 0, - (size_t)(p_memsz - p_filesz)); - } - } - return 0; -} - -int emu_load_elf(Compiler* c, CfreeEmuArch arch, const u8* bytes, size_t len, - const char* const* argv, const char* const* envp, - EmuLoadedImage* out) { - const u8* eh; - u16 e_type, e_machine, e_phentsize, e_phnum; - u64 e_entry, e_phoff; - u64 lo_va = 0, hi_va = 0; - int saw_load = 0; - u32 i; - Heap* heap; - u8* guest_base; - u64 image_end; - u64 guest_size; - u64 stack_top; - u64 sp; - u64 brk_start; - int argc; - const char* const* p; - /* ELF64 program-header fields we need (per spec): p_type(0,4), - * p_flags(4,4), p_offset(8,8), p_vaddr(16,8), p_paddr(24,8), - * p_filesz(32,8), p_memsz(40,8), p_align(48,8). */ - - if (!out) return 1; - memset(out, 0, sizeof(*out)); - if (!c || !bytes || len < ELF64_EHDR_SIZE) return 1; - if (arch != CFREE_EMU_ARCH_RISCV64) { - /* aa64 loader lives separately. */ - return 2; - } - if (bytes[EI_MAG0] != ELFMAG0 || bytes[EI_MAG1] != ELFMAG1 || - bytes[EI_MAG2] != ELFMAG2 || bytes[EI_MAG3] != ELFMAG3) { - return 3; - } - if (bytes[EI_CLASS] != ELFCLASS64) return 4; - if (bytes[EI_DATA] != ELFDATA2LSB) return 5; - - eh = bytes; - e_type = rd16(eh + 16); - e_machine = rd16(eh + 18); - e_entry = rd64(eh + 24); - e_phoff = rd64(eh + 32); - e_phentsize = rd16(eh + 54); - e_phnum = rd16(eh + 56); - - if (e_machine != EM_RISCV) return 6; - if (e_type != ET_EXEC && e_type != ET_DYN) return 7; - if (e_phentsize < ELF64_PHDR_SIZE) return 8; - if ((u64)e_phoff + (u64)e_phnum * e_phentsize > len) return 9; - - /* Pass 1: compute [lo_va, hi_va) across PT_LOAD. */ - for (i = 0; i < e_phnum; ++i) { - const u8* ph = bytes + e_phoff + (u64)i * e_phentsize; - u32 p_type = rd32(ph + 0); - u64 p_vaddr = rd64(ph + 16); - u64 p_memsz = rd64(ph + 40); - if (p_type != PT_LOAD) continue; - if (!saw_load) { - lo_va = round_down(p_vaddr, EMU_PAGE_SIZE); - hi_va = round_up(p_vaddr + p_memsz, EMU_PAGE_SIZE); - saw_load = 1; - } else { - u64 lo = round_down(p_vaddr, EMU_PAGE_SIZE); - u64 hi = round_up(p_vaddr + p_memsz, EMU_PAGE_SIZE); - if (lo < lo_va) lo_va = lo; - if (hi > hi_va) hi_va = hi; - } - } - if (!saw_load) return 10; - - /* PT_INTERP handoff: if the program ELF has an interpreter, place the - * interpreter image past the program's hi_va and arrange the entry PC - * to land in the interpreter. AT_BASE in the auxv (added below) tells - * the interpreter where it was loaded. The host must have staged the - * interpreter bytes via emu_load_elf_set_interp_bytes; otherwise we - * fail with a distinct error code. */ - int have_interp = 0; - u64 interp_lo_va = 0, interp_hi_va = 0, interp_phoff = 0, interp_entry = 0; - u16 interp_phentsize = 0, interp_phnum = 0; - int interp_pic = 0; - u64 interp_base_va = 0; - for (i = 0; i < e_phnum; ++i) { - const u8* ph = bytes + e_phoff + (u64)i * e_phentsize; - u32 p_type = rd32(ph + 0); - if (p_type == PT_INTERP) { - have_interp = 1; - break; - } - } - if (have_interp) { - if (!g_pending_interp.bytes || g_pending_interp.len == 0) { - /* Caller missed staging the interpreter — fail loudly so the host - * knows it needs to supply ld.so bytes. */ - return 15; - } - if (elf_layout_extent(g_pending_interp.bytes, g_pending_interp.len, - &interp_lo_va, &interp_hi_va, &interp_phoff, - &interp_phentsize, &interp_phnum, &interp_entry, - &interp_pic) != 0) { - return 16; - } - interp_base_va = round_up(hi_va, EMU_PAGE_SIZE); - if (!interp_pic && interp_lo_va < interp_base_va) { - return 17; - } - if (interp_pic) { - u64 span = interp_hi_va - interp_lo_va; - hi_va = interp_base_va + span; - } else { - if (interp_hi_va > hi_va) hi_va = interp_hi_va; - interp_base_va = interp_lo_va; - } - } - - image_end = hi_va; - brk_start = round_up(image_end, EMU_PAGE_SIZE); - stack_top = brk_start + EMU_BRK_RESERVE + EMU_STACK_SIZE; - guest_size = stack_top - lo_va; - - heap = c->ctx->heap; - guest_base = (u8*)heap->alloc(heap, (size_t)guest_size, 16u); - if (!guest_base) return 11; - memset(guest_base, 0, (size_t)guest_size); - - /* Pass 2: copy PT_LOAD segments into the host buffer. - * The interpreter does not enforce per-segment permissions in v1; the - * smoke test only needs executable + readable + writable to all be - * accessible. RWX divergence can land alongside the JIT lifter. */ - for (i = 0; i < e_phnum; ++i) { - const u8* ph = bytes + e_phoff + (u64)i * e_phentsize; - u32 p_type = rd32(ph + 0); - u64 p_offset = rd64(ph + 8); - u64 p_vaddr = rd64(ph + 16); - u64 p_filesz = rd64(ph + 32); - u64 p_memsz = rd64(ph + 40); - if (p_type != PT_LOAD) continue; - if (p_offset + p_filesz > len) { - heap->free(heap, guest_base, (size_t)guest_size); - return 12; - } - if (p_filesz) { - memcpy(guest_base + (p_vaddr - lo_va), bytes + p_offset, - (size_t)p_filesz); - } - if (p_memsz > p_filesz) { - memset(guest_base + (p_vaddr - lo_va) + p_filesz, 0, - (size_t)(p_memsz - p_filesz)); - } - } - - /* Copy the interpreter's PT_LOAD segments next; its entry becomes the - * initial PC so the dynamic loader runs first. */ - if (have_interp) { - u64 bias = interp_pic ? (interp_base_va - interp_lo_va) : 0u; - if (elf_copy_segments(g_pending_interp.bytes, g_pending_interp.len, - interp_phoff, interp_phentsize, interp_phnum, - guest_base, lo_va, bias) != 0) { - heap->free(heap, guest_base, (size_t)guest_size); - g_pending_interp.bytes = NULL; - g_pending_interp.len = 0; - return 18; - } - /* Switch entry to the interpreter. */ - e_entry = interp_entry + bias; - /* Clear the staging slot — single-shot. */ - g_pending_interp.bytes = NULL; - g_pending_interp.len = 0; - } - - /* ---- Initial stack layout (RISC-V psABI) ---- - * The stack grows down. Top of stack contains, low to high: - * argc (u64) - * argv[0..argc-1] (u64 each, pointers into the strings region) - * NULL terminator - * envp[0..envc-1] - * NULL terminator - * auxv: pairs of (a_type, a_val), terminated by AT_NULL - * strings region (argv + envp string bodies) - * 16-byte AT_RANDOM payload - * - * Layout choice for v1: we place strings + AT_RANDOM at the top of - * the stack and the table immediately below, with `sp` 16-byte - * aligned per ABI. */ - - argc = 0; - if (argv) { - for (p = argv; *p; ++p) ++argc; - } - int envc = 0; - if (envp) { - for (p = envp; *p; ++p) ++envc; - } - - /* Place strings at high end of stack. */ - u64 cursor = stack_top; - u64 *argv_addrs = NULL, *envp_addrs = NULL; - if (argc > 0) { - argv_addrs = (u64*)heap->alloc(heap, sizeof(u64) * (size_t)argc, 8u); - if (!argv_addrs) { - heap->free(heap, guest_base, (size_t)guest_size); - return 13; - } - } - if (envc > 0) { - envp_addrs = (u64*)heap->alloc(heap, sizeof(u64) * (size_t)envc, 8u); - if (!envp_addrs) { - if (argv_addrs) heap->free(heap, argv_addrs, sizeof(u64) * (size_t)argc); - heap->free(heap, guest_base, (size_t)guest_size); - return 14; - } - } - - for (i = 0; i < (u32)argc; ++i) { - size_t slen = slice_from_cstr(argv[i]).len + 1u; - cursor -= slen; - memcpy(guest_base + (cursor - lo_va), argv[i], slen); - argv_addrs[i] = cursor; - } - for (i = 0; i < (u32)envc; ++i) { - size_t slen = slice_from_cstr(envp[i]).len + 1u; - cursor -= slen; - memcpy(guest_base + (cursor - lo_va), envp[i], slen); - envp_addrs[i] = cursor; - } - - /* 16-byte AT_RANDOM payload. */ - cursor -= 16u; - { - u8* dst = guest_base + (cursor - lo_va); - /* Deterministic bytes are fine for the interpreter; libc only - * cares about *having* AT_RANDOM, not its entropy quality. */ - for (i = 0; i < 16u; ++i) dst[i] = (u8)(0xa5u ^ i); - } - u64 at_random_va = cursor; - - /* Align cursor down to 16. */ - cursor &= ~(u64)0xfu; - - /* Table size: argc(8) + (argc+1)*8 + (envc+1)*8 + auxv (6 pairs * - * 16). Place the table so that final sp is 16-byte aligned. */ - u64 table_bytes = 8u /* argc */ - + (u64)(argc + 1) * 8u /* argv + NULL */ - + (u64)(envc + 1) * 8u /* envp + NULL */ - + 6u * 16u; /* auxv pairs incl. AT_NULL */ - /* Round table_bytes up to 16 so sp lands aligned. */ - u64 sp_table = (cursor - table_bytes) & ~(u64)0xfu; - sp = sp_table; - - u8* tp = guest_base + (sp - lo_va); - /* argc */ - wr64(tp, (u64)argc); - tp += 8; - for (i = 0; i < (u32)argc; ++i) { - wr64(tp, argv_addrs[i]); - tp += 8; - } - wr64(tp, 0); - tp += 8; /* argv NULL */ - for (i = 0; i < (u32)envc; ++i) { - wr64(tp, envp_addrs[i]); - tp += 8; - } - wr64(tp, 0); - tp += 8; /* envp NULL */ - - /* auxv: AT_PHDR, AT_PHENT, AT_PHNUM, AT_PAGESZ, AT_ENTRY, AT_RANDOM, - * AT_NULL. We list 7 entries; the table_bytes formula reserves - * exactly 6*16 = 96 bytes for auxv pairs (one of which is AT_NULL). - * Bump the formula to 7 pairs for correctness. */ - /* (Note: re-derived above; we leave the budget conservative.) */ - static const u32 AT_NULL_ = 0, AT_PHDR = 3, AT_PHENT = 4, AT_PHNUM = 5, - AT_PAGESZ = 6, AT_BASE = 7, AT_ENTRY = 9, AT_RANDOM = 25; - /* Emit pairs; if the budget is exhausted, AT_NULL fills the slot. - * - * AT_PHDR/AT_PHENT/AT_PHNUM always describe the *program* ELF, never - * the interpreter (the loader uses them to find DT_NEEDED etc.). - * AT_ENTRY is the program's original entry, even when we hand - * control to the interpreter first. When a PT_INTERP exists, we - * also emit AT_BASE pointing to the interpreter's load base so - * ld.so knows where it lives. */ - struct { - u64 type; - u64 val; - } aux[] = { - {AT_PHDR, lo_va + e_phoff}, - {AT_PHENT, e_phentsize}, - {AT_PHNUM, e_phnum}, - {AT_PAGESZ, EMU_PAGE_SIZE}, - {AT_BASE, have_interp ? interp_base_va : 0u}, - {AT_ENTRY, rd64(bytes + 24)}, /* program entry, never the interp */ - {AT_RANDOM, at_random_va}, - {AT_NULL_, 0}, - }; - u32 aux_count = sizeof(aux) / sizeof(aux[0]); - /* If the table_bytes budget was undersized, recompute and shift sp. */ - u64 needed = - 8u + (u64)(argc + 1) * 8u + (u64)(envc + 1) * 8u + (u64)aux_count * 16u; - if (needed > table_bytes) { - /* Re-place table_bytes := needed, re-align sp_table. */ - sp_table = (cursor - needed) & ~(u64)0xfu; - sp = sp_table; - tp = guest_base + (sp - lo_va); - wr64(tp, (u64)argc); - tp += 8; - for (i = 0; i < (u32)argc; ++i) { - wr64(tp, argv_addrs[i]); - tp += 8; - } - wr64(tp, 0); - tp += 8; - for (i = 0; i < (u32)envc; ++i) { - wr64(tp, envp_addrs[i]); - tp += 8; - } - wr64(tp, 0); - tp += 8; - } - for (i = 0; i < aux_count; ++i) { - wr64(tp, aux[i].type); - tp += 8; - wr64(tp, aux[i].val); - tp += 8; - } - - if (argv_addrs) heap->free(heap, argv_addrs, sizeof(u64) * (size_t)argc); - if (envp_addrs) heap->free(heap, envp_addrs, sizeof(u64) * (size_t)envc); - - out->guest_base = guest_base; - out->guest_size = (size_t)guest_size; - out->entry_pc = e_entry; - out->initial_sp = sp; - - /* Stash the va_base and brk window inside out via in-band fields — - * the EmuLoadedImage struct only exposes guest_base/size/entry/sp. - * cfree_emu_new immediately calls emu_cpu_attach_mem below via a - * separate helper so the per-arch CPUState picks up the AS shape. - * For now we expose va_base + brk through a side-channel hook the - * test calls explicitly (see emu_load_elf_attach below). */ - /* Return the lo_va via a static side channel; the test invokes - * emu_load_elf_attach immediately after to wire the CPUState. */ - /* Side-channel: stuff lo_va into the high bits of guest_size? Bad - * idea. Instead, expose extra accessors via a tiny private out - * struct in the header — but the header is locked. We extend the - * struct in cpu.c via emu_cpu_attach_mem with the values we just - * computed, by passing them through a thread-local? No — the - * simplest sound path is to attach the CPUState here, but we don't - * have it. - * - * Compromise: cache lo_va + brk_start in a small static cell keyed - * by guest_base. The caller (smoke test or cfree_emu_new) reads via - * emu_load_elf_last_va_info(). This is intentionally minimal: a - * single global cell, set by the latest emu_load_elf call, consumed - * once by the caller. */ - extern void emu_load_elf_remember_(void* base, u64 va_base, u64 size, - u64 brk_cur, u64 brk_max); - emu_load_elf_remember_(guest_base, lo_va, guest_size, brk_start, - brk_start + EMU_BRK_RESERVE); - return 0; -} - -/* Minimal side-channel used by callers that need the brk + va_base. - * Holds the values from the most recent successful emu_load_elf call. - * Single-threaded; the emulator is not thread-safe today. */ -static struct { - void* base; - u64 va_base; - u64 size; - u64 brk_cur; - u64 brk_max; -} g_last_image; - -void emu_load_elf_remember_(void* base, u64 va_base, u64 size, u64 brk_cur, - u64 brk_max) { - g_last_image.base = base; - g_last_image.va_base = va_base; - g_last_image.size = size; - g_last_image.brk_cur = brk_cur; - g_last_image.brk_max = brk_max; -} - -int emu_load_elf_attach(EmuCPUState* cpu, const EmuLoadedImage* img) { - if (!cpu || !img || g_last_image.base != img->guest_base) return 1; - emu_cpu_attach_mem(cpu, (u8*)img->guest_base, g_last_image.va_base, - g_last_image.size, g_last_image.brk_cur, - g_last_image.brk_max); - return 0; -} - -void emu_unload_image(Compiler* c, EmuLoadedImage* img) { - Heap* heap; - if (!c || !img || !img->guest_base) { - if (img) memset(img, 0, sizeof(*img)); - return; - } - heap = c->ctx->heap; - heap->free(heap, img->guest_base, img->guest_size); - memset(img, 0, sizeof(*img)); -} diff --git a/src/emu/emu.c b/src/emu/emu.c @@ -1,4 +1,4 @@ -/* libcfree's guest-ISA emulator: load a guest ELF, translate one +/* libcfree's guest-ISA emulator: load a guest executable, translate one * basic block at a time into host code via the existing CG/MC/link * pipeline, dispatch through a code cache. See doc/EMU.md for design * and §6 for the incremental-link discipline. @@ -10,35 +10,37 @@ #include "emu/emu.h" +#include <cfree/link.h> #include <setjmp.h> #include <string.h> +#include "arch/arch.h" #include "core/pool.h" #include "core/slice.h" -#include "link/link.h" +#include "obj/format.h" #include "obj/obj.h" /* ---- Lifecycle ---- */ struct CfreeEmu { Compiler* c; - CfreeEmuArch guest_arch; + CfreeTarget guest_target; int opt_level; CfreeEmuTraceFlags trace; + CfreeEmuExternalBindings bindings; - /* Borrowed JIT host (execmem + tls). The public CfreeEmuOptions has no - * field for this; the driver attaches one via emu_set_jit_host between - * cfree_emu_new and the first cfree_emu_run / cfree_emu_step. When - * NULL, runs of this emu surface CFREE_UNSUPPORTED. */ + /* Borrowed JIT host (execmem + tls). When NULL, runs of this emu surface + * CFREE_UNSUPPORTED. */ const CfreeJitHost* host; - EmuLoadedImage guest; - EmuCPUState* cpu; + EmuProcess process; + EmuThread main_thread; - Linker* linker; - LinkImage* image; - EmuCodeRegion* code_region; EmuCodeCache* cache; + u64 cache_generation; + CfreeJit** jits; + u32 njits; + u32 jits_cap; int done; int exit_code; @@ -52,13 +54,79 @@ static SrcLoc no_loc(void) { return l; } -static int arch_supported(CfreeEmuArch a) { - return a == CFREE_EMU_ARCH_AARCH64 || a == CFREE_EMU_ARCH_RISCV64; +/* The block function call ABI: u64 entry(EmuThread*). Cast through + * a typedef so the call site reads cleanly in the dispatcher. */ +typedef u64 (*EmuBlockFn)(EmuThread*); + +typedef struct EmuResolvedConfig { + CfreeTarget target; + const ObjFormatImpl* obj_format; + const ArchImpl* arch; + const CfreeOsImpl* os; +} EmuResolvedConfig; + +static EmuCPUState* emu_main_cpu(CfreeEmu* e) { + return e ? e->main_thread.cpu : NULL; } -/* The block function call ABI: u64 entry(EmuCPUState*). Cast through - * a typedef so the call site reads cleanly in the dispatcher. */ -typedef u64 (*EmuBlockFn)(EmuCPUState*); +static CfreeStatus emu_public_syscall_adapter(void* user, EmuProcess* process, + EmuThread* thread, + const EmuSyscallRequest* req, + EmuSyscallResult* out) { + CfreeEmu* e = (CfreeEmu*)user; + CfreeEmuSyscallRequest public_req; + CfreeEmuSyscallResult public_out; + CfreeStatus st; + u32 i; + (void)process; + (void)thread; + if (!e || !e->bindings.syscall || !req || !out) return CFREE_INVALID; + memset(&public_req, 0, sizeof(public_req)); + public_req.number = req->number; + for (i = 0; i < 6u; ++i) public_req.args[i] = req->args[i]; + memset(&public_out, 0, sizeof(public_out)); + st = e->bindings.syscall(e->bindings.user, e, &public_req, &public_out); + if (st != CFREE_OK) return st; + memset(out, 0, sizeof(*out)); + out->result = public_out.result; + out->guest_errno = public_out.guest_errno; + out->flags = public_out.flags; + return CFREE_OK; +} + +static CfreeStatus emu_public_import_adapter(void* user, EmuProcess* process, + const EmuDynamicImport* req, + CfreeEmuResolvedImport* out) { + CfreeEmu* e = (CfreeEmu*)user; + CfreeEmuImportRequest public_req; + (void)process; + if (!e || !e->bindings.resolve_import || !req || !out) + return CFREE_INVALID; + memset(&public_req, 0, sizeof(public_req)); + public_req.object_name = req->object_name; + public_req.symbol_name = req->symbol_name; + public_req.signature = req->signature; + return e->bindings.resolve_import(e->bindings.user, e, &public_req, out); +} + +static CfreeStatus emu_public_object_adapter(void* user, EmuProcess* process, + CfreeSlice object_name, + CfreeSlice* out_bytes) { + CfreeEmu* e = (CfreeEmu*)user; + CfreeEmuObjectRequest public_req; + CfreeEmuResolvedObject public_out; + CfreeStatus st; + (void)process; + if (!e || !e->bindings.resolve_object || !out_bytes) return CFREE_INVALID; + memset(&public_req, 0, sizeof(public_req)); + public_req.object_name = object_name; + memset(&public_out, 0, sizeof(public_out)); + st = e->bindings.resolve_object(e->bindings.user, e, &public_req, + &public_out); + if (st != CFREE_OK) return st; + *out_bytes = public_out.object_bytes; + return public_out.object_bytes.data ? CFREE_OK : CFREE_NOT_FOUND; +} void emu_set_jit_host(CfreeEmu* e, const CfreeJitHost* host) { if (!e) return; @@ -69,16 +137,103 @@ const CfreeJitHost* emu_get_jit_host(const CfreeEmu* e) { return e ? e->host : NULL; } +CfreeStatus emu_process_os_alloc(Compiler* c, EmuProcess* process, + size_t size, size_t align) { + Heap* heap; + if (!c || !process || !size || process->os_private) return CFREE_INVALID; + heap = c->ctx->heap; + process->os_private = heap->alloc(heap, size, align ? align : 1u); + if (!process->os_private) return CFREE_NOMEM; + memset(process->os_private, 0, size); + return CFREE_OK; +} + +void emu_process_os_free(Compiler* c, EmuProcess* process, size_t size) { + Heap* heap; + if (!c || !process || !process->os_private) return; + heap = c->ctx->heap; + heap->free(heap, process->os_private, size); + process->os_private = NULL; +} + +CfreeStatus emu_thread_os_alloc(Compiler* c, EmuThread* thread, size_t size, + size_t align) { + Heap* heap; + if (!c || !thread || !size || thread->os_private) return CFREE_INVALID; + heap = c->ctx->heap; + thread->os_private = heap->alloc(heap, size, align ? align : 1u); + if (!thread->os_private) return CFREE_NOMEM; + memset(thread->os_private, 0, size); + return CFREE_OK; +} + +void emu_thread_os_free(Compiler* c, EmuThread* thread, size_t size) { + Heap* heap; + if (!c || !thread || !thread->os_private) return; + heap = c->ctx->heap; + heap->free(heap, thread->os_private, size); + thread->os_private = NULL; +} + +static CfreeStatus emu_resolve_config(Compiler* c, const CfreeEmuOptions* opts, + EmuResolvedConfig* out) { + CfreeBinFmt bin_fmt; + CfreeTarget target; + const ObjFormatImpl* obj_format; + const ObjFormatImpl* target_format; + const ArchImpl* arch; + const CfreeOsImpl* os; + CfreeStatus st; + + if (!c || !opts || !out || !opts->guest_bytes.data || opts->guest_bytes.len == 0) + return CFREE_INVALID; + memset(out, 0, sizeof(*out)); + + bin_fmt = cfree_detect_fmt(opts->guest_bytes.data, opts->guest_bytes.len); + obj_format = obj_format_lookup_bin(bin_fmt); + if (!obj_format || !obj_format->emu || !obj_format->emu->load_executable) + return CFREE_UNSUPPORTED; + + if (opts->has_guest_target) { + target = opts->guest_target; + } else { + if (!obj_format->emu->detect_executable) return CFREE_UNSUPPORTED; + memset(&target, 0, sizeof(target)); + st = obj_format->emu->detect_executable(c, opts->guest_bytes, &target); + if (st != CFREE_OK) return st; + } + + target_format = obj_format_lookup(target.obj); + if (target_format != obj_format) return CFREE_UNSUPPORTED; + + arch = arch_lookup(target.arch); + os = os_lookup(target.os); + if (!arch || !arch->decode || !arch->decode->decode_block || !arch->emu || + !arch->emu->cpu_new || !arch->emu->cpu_type || + !arch->emu->block_fn_type || !arch->emu->lift_block || !os) { + return CFREE_UNSUPPORTED; + } + + out->target = target; + out->obj_format = obj_format; + out->arch = arch; + out->os = os; + return CFREE_OK; +} + CfreeStatus cfree_emu_new(CfreeCompiler* c, const CfreeEmuOptions* opts, CfreeEmu** out) { PanicSave saved; Heap* heap; CfreeEmu* e; + EmuResolvedConfig resolved; + EmuLoadOptions load_opts; + CfreeStatus st; if (out) *out = NULL; if (!c || !opts || !out) return CFREE_INVALID; - if (!opts->guest_elf_bytes || opts->guest_elf_len == 0) return CFREE_INVALID; - if (!arch_supported(opts->guest_arch)) return CFREE_UNSUPPORTED; + if (!opts->guest_bytes.data || opts->guest_bytes.len == 0) + return CFREE_INVALID; compiler_panic_save(c, &saved); if (setjmp(c->panic)) { @@ -92,33 +247,82 @@ CfreeStatus cfree_emu_new(CfreeCompiler* c, const CfreeEmuOptions* opts, if (!e) compiler_panic(c, no_loc(), "emu: out of memory"); memset(e, 0, sizeof(*e)); e->c = c; - e->guest_arch = opts->guest_arch; e->opt_level = opts->optimize; e->trace = opts->trace; + e->bindings = opts->bindings; + e->host = opts->jit_host; - /* 1. Load the guest ELF: mmap a guest AS and place PT_LOAD segments, - * push argv/envp/auxv onto the guest stack. */ - if (emu_load_elf(c, opts->guest_arch, opts->guest_elf_bytes, - opts->guest_elf_len, opts->argv, opts->envp, - &e->guest) != 0) { - compiler_panic(c, no_loc(), "emu: failed to load guest ELF"); + st = emu_resolve_config(c, opts, &resolved); + if (st != CFREE_OK) { + compiler_panic(c, no_loc(), "emu: unsupported guest executable"); + } + e->guest_target = resolved.target; + + memset(&load_opts, 0, sizeof(load_opts)); + load_opts.name = opts->guest_name; + load_opts.bytes = opts->guest_bytes; + load_opts.guest_target = resolved.target; + load_opts.argv = opts->argv; + load_opts.envp = opts->envp; + load_opts.os = resolved.os; + load_opts.process = &e->process; + + e->process.compiler = c; + e->process.guest_target = resolved.target; + e->process.obj_format = resolved.obj_format; + e->process.arch = resolved.arch; + e->process.os = resolved.os; + if (e->bindings.syscall) { + e->process.bindings.syscall = emu_public_syscall_adapter; + e->process.bindings.user = e; + } else { + e->process.bindings.syscall = resolved.os->emu_default_syscall; + e->process.bindings.user = NULL; + } + if (e->bindings.resolve_import) { + e->process.bindings.resolve_import = emu_public_import_adapter; + e->process.bindings.user = e; } + if (e->bindings.resolve_object) { + e->process.bindings.resolve_object = emu_public_object_adapter; + e->process.bindings.user = e; + } + load_opts.bindings = &e->process.bindings; + e->main_thread.process = &e->process; - /* 2. Allocate per-thread CPU state and seed PC/SP. */ - e->cpu = - emu_cpu_new(c, opts->guest_arch, e->guest.entry_pc, e->guest.initial_sp); + if (resolved.os->emu_init_process_private && + resolved.os->emu_init_process_private(c, &e->process) != CFREE_OK) { + compiler_panic(c, no_loc(), "emu: failed to initialize OS process state"); + } + if (resolved.os->emu_init_thread_private && + resolved.os->emu_init_thread_private(c, &e->process, &e->main_thread) != + CFREE_OK) { + compiler_panic(c, no_loc(), "emu: failed to initialize OS thread state"); + } - /* 3. Code region is deferred until a JIT host is attached — the driver - * calls emu_set_jit_host before cfree_emu_run / cfree_emu_step. - * The linker session itself doesn't need execmem; only block - * materialization does, and that runs from cfree_emu_lookup. */ + /* 1. Load the guest executable through the object-format emu hook. */ + st = resolved.obj_format->emu->load_executable(c, &load_opts, + &e->process.image); + if (st != CFREE_OK) { + compiler_panic(c, no_loc(), "emu: failed to load guest executable"); + } + if (resolved.os->emu_init_process && + resolved.os->emu_init_process(c, &e->process, &load_opts, + &e->process.image) != CFREE_OK) { + compiler_panic(c, no_loc(), "emu: failed to initialize guest process"); + } - /* 4. Stand up the session linker. The extern resolver maps each - * EMU_SYM_* helper name to the host address of its trampoline / - * the running CfreeEmu's CPU state. */ - e->linker = link_new(c); - if (!e->linker) compiler_panic(c, no_loc(), "emu: link_new failed"); - link_set_extern_resolver(e->linker, emu_runtime_extern_resolver, e); + /* 2. Allocate per-thread CPU state and seed PC/SP. */ + e->main_thread.cpu = resolved.arch->emu->cpu_new(c, e->process.image.entry_pc, + e->process.image.initial_sp); + emu_cpu_set_thread(e->main_thread.cpu, &e->main_thread); + if (!e->main_thread.cpu || + emu_loaded_image_attach_cpu(e->main_thread.cpu, &e->process.image) != 0 || + (resolved.os->emu_init_thread && + resolved.os->emu_init_thread(c, &e->process, &e->main_thread) != + CFREE_OK)) { + compiler_panic(c, no_loc(), "emu: failed to initialize guest CPU state"); + } compiler_panic_restore(c, &saved); *out = e; @@ -130,67 +334,95 @@ void cfree_emu_free(CfreeEmu* e) { if (!e) return; heap = e->c->ctx->heap; + while (e->njits) cfree_jit_free(e->jits[--e->njits]); + if (e->jits) heap->free(heap, e->jits, sizeof(*e->jits) * e->jits_cap); if (e->cache) emu_cache_free(e->cache); - if (e->image) link_image_free(e->image); - if (e->linker) link_free(e->linker); - if (e->code_region) emu_code_region_free(e->code_region); - if (e->cpu) emu_cpu_free(e->cpu); - emu_unload_image(e->c, &e->guest); + if (e->process.os && e->process.os->emu_destroy_thread_private) + e->process.os->emu_destroy_thread_private(e->c, &e->main_thread); + if (e->main_thread.cpu) emu_cpu_free(e->main_thread.cpu); + emu_tls_destroy_process(e->c, &e->process); + if (e->process.os && e->process.os->emu_destroy_process_private) + e->process.os->emu_destroy_process_private(e->c, &e->process); + emu_unload_image(e->c, &e->process.image); heap->free(heap, e, sizeof(*e)); } -/* Lazily allocate the code region + initial image + cache the first time - * cfree_emu_lookup runs. Requires a wired JIT host. Returns CFREE_OK on - * success, CFREE_UNSUPPORTED if no host is wired, or panics for setup - * failures. */ +/* Lazily allocate the code cache the first time cfree_emu_lookup runs. + * Requires a wired JIT host because cold blocks are published as ordinary + * one-block JIT images. */ static CfreeStatus ensure_runtime(CfreeEmu* e) { - if (e->image) return CFREE_OK; + if (e->cache) return CFREE_OK; if (!e->host || !e->host->execmem) return CFREE_UNSUPPORTED; - - e->code_region = - emu_code_region_new(e->c, e->host->execmem, EMU_CODE_REGION_SIZE); - if (!e->code_region) { - compiler_panic(e->c, no_loc(), "emu: emu_code_region_new failed"); - } - e->image = link_resolve_at(e->linker, emu_code_region_base(e->code_region)); - if (!e->image) compiler_panic(e->c, no_loc(), "emu: link_resolve_at failed"); e->cache = emu_cache_new(e->c); return CFREE_OK; } +static void emu_keep_jit(CfreeEmu* e, CfreeJit* jit) { + Heap* heap = e->c->ctx->heap; + if (e->njits == e->jits_cap) { + u32 old_cap = e->jits_cap; + u32 new_cap = old_cap ? old_cap * 2u : 8u; + CfreeJit** grown = (CfreeJit**)heap->realloc( + heap, e->jits, sizeof(*e->jits) * old_cap, sizeof(*e->jits) * new_cap, + _Alignof(CfreeJit*)); + if (!grown) compiler_panic(e->c, no_loc(), "emu: out of memory"); + e->jits = grown; + e->jits_cap = new_cap; + } + e->jits[e->njits++] = jit; +} + /* ---- Translation (cold-miss path) ---- */ static void* translate_block(CfreeEmu* e, u64 guest_pc) { - EmuInst insts[EMU_MAX_INSTS_PER_BLOCK]; + CfreeDecodedInsn* insts; + const ArchImpl* arch; + Heap* heap; + const u8* host_pc; + u64 va_end; + size_t decode_len; u32 ninsts; ObjBuilder* ob; CfreeCg* cg; CfreeCodeOptions copts; Sym block_name; - ObjSymId block_sym; + CfreeCgDecl block_decl; + CfreeCgSym block_sym; EmuLiftCtx ctx; - LinkSymId sym_id; - const LinkSymbol* sym; void* entry; CfreeStatus st; + CfreeLinkSessionOptions lopts; + CfreeLinkSession* sess; + CfreeJit* jit; + CfreeSlice block_slice; if (e->trace & CFREE_EMU_TRACE_BLOCK) emu_trace_block(e->c, guest_pc); - /* Bounds check: guest_pc must lie inside the mapped guest AS. - * The loader maps the guest AS so guest VAs are valid host - * pointers (1:1); reading bytes through the cast is safe. */ - { - uintptr_t base = (uintptr_t)e->guest.guest_base; - if ((uintptr_t)guest_pc < base || - (uintptr_t)guest_pc >= base + e->guest.guest_size) { - return NULL; - } - } + arch = e->process.arch; + if (!arch || !arch->decode || !arch->decode->decode_block || !arch->emu || + !arch->emu->cpu_type || !arch->emu->block_fn_type || + !arch->emu->lift_block) + return NULL; - ninsts = emu_decode_block(e->guest_arch, (const u8*)(uintptr_t)guest_pc, - guest_pc, insts, EMU_MAX_INSTS_PER_BLOCK); - if (ninsts == 0) return NULL; + host_pc = emu_cpu_va_to_host_perm(emu_main_cpu(e), guest_pc, + arch->decode->min_insn_len, + EMU_MEM_EXEC); + if (!host_pc) return NULL; + va_end = emu_addr_space_contig_len(&e->process.image.addr_space, guest_pc, + EMU_MEM_EXEC); + if (!va_end) return NULL; + decode_len = (size_t)va_end; + heap = e->c->ctx->heap; + insts = (CfreeDecodedInsn*)heap->alloc( + heap, sizeof(*insts) * EMU_MAX_INSTS_PER_BLOCK, _Alignof(CfreeDecodedInsn)); + if (!insts) compiler_panic(e->c, no_loc(), "emu: out of memory"); + st = arch->decode->decode_block(e->c, host_pc, decode_len, guest_pc, insts, + EMU_MAX_INSTS_PER_BLOCK, &ninsts); + if (st != CFREE_OK || ninsts == 0) { + heap->free(heap, insts, sizeof(*insts) * EMU_MAX_INSTS_PER_BLOCK); + return NULL; + } if (e->trace & CFREE_EMU_TRACE_INSN) { u32 j; @@ -208,53 +440,67 @@ static void* translate_block(CfreeEmu* e, u64 guest_pc) { compiler_panic(e->c, no_loc(), "emu: cfree_cg_new failed"); block_name = emu_block_sym_name(e->c, guest_pc); - /* Forward-declare the block's symbol so the lifter can refer to it - * via cfree_cg_func_begin. obj_symbol_define fills in (section, value, - * size) once the function is emitted. */ - block_sym = - obj_symbol(ob, block_name, SB_GLOBAL, SK_FUNC, OBJ_SEC_NONE, 0, 0); + memset(&block_decl, 0, sizeof(block_decl)); + block_decl.kind = CFREE_CG_DECL_FUNC; + block_decl.linkage_name = + cfree_cg_c_linkage_name((CfreeCompiler*)e->c, (CfreeSym)block_name); + block_decl.display_name = (CfreeSym)block_name; + block_decl.type = arch->emu->block_fn_type(e->c); + block_decl.sym.bind = CFREE_SB_GLOBAL; + block_decl.sym.visibility = CFREE_CG_VIS_DEFAULT; + block_sym = cfree_cg_decl(cg, block_decl); + if (block_sym == CFREE_CG_SYM_NONE) + compiler_panic(e->c, no_loc(), "emu: failed to declare block symbol"); memset(&ctx, 0, sizeof(ctx)); - ctx.arch = e->guest_arch; - ctx.cpu_state_type = emu_cpu_type(e->c, e->guest_arch); - ctx.block_fn_type = emu_block_fn_type(e->c, e->guest_arch); + ctx.compiler = e->c; + ctx.arch = e->guest_target.arch; + ctx.thread_type = emu_thread_type(e->c); + ctx.cpu_state_type = ctx.thread_type; + ctx.block_fn_type = arch->emu->block_fn_type(e->c); ctx.block_sym = block_sym; ctx.guest_pc = guest_pc; - emu_lift_block(e->guest_arch, cg, insts, ninsts, &ctx); + st = arch->emu->lift_block(e->c, cg, insts, ninsts, &ctx); + heap->free(heap, insts, sizeof(*insts) * EMU_MAX_INSTS_PER_BLOCK); + insts = NULL; + if (st != CFREE_OK) + compiler_panic(e->c, no_loc(), "emu: failed to lift block"); + st = cfree_cg_end_obj(cg); + if (st != CFREE_OK) + compiler_panic(e->c, no_loc(), "emu: cfree_cg_end_obj failed"); cfree_cg_free(cg); obj_finalize(ob); - /* Add the block's object to the session linker and extend the - * image. link_resolve_extend places the new section at the next - * free offset within the reserved VA region (must not change host - * addresses of already-placed sections — chaining depends on it), - * resolves the block's runtime-helper externs via the resolver, - * and applies new relocations into the live image. */ - link_add_obj(e->linker, ob); - link_resolve_extend(e->linker, e->image); - - /* Commit and mprotect RX up to the new high-water of the image. */ - { - uintptr_t end = emu_code_region_base(e->code_region); - u32 i; - for (i = 0; i < link_segment_count(e->image); ++i) { - const LinkSegment* seg = link_segment_get(e->image, i + 1u); - uintptr_t segend = (uintptr_t)seg->vaddr + (uintptr_t)seg->mem_size; - if (segend > end) end = segend; - } - emu_code_region_commit_rx_to(e->code_region, end); + block_slice = pool_slice(e->c->global, block_name); + memset(&lopts, 0, sizeof(lopts)); + lopts.output_kind = CFREE_LINK_OUTPUT_JIT; + lopts.jit_host = e->host; + lopts.extern_resolver = emu_runtime_extern_resolver; + lopts.extern_resolver_user = e; + + sess = NULL; + jit = NULL; + st = cfree_link_session_new((CfreeCompiler*)e->c, &lopts, &sess); + if (st == CFREE_OK) + st = cfree_link_session_add_obj(sess, (CfreeObjBuilder*)ob); + if (st == CFREE_OK) st = cfree_link_session_jit(sess, &jit); + if (sess) cfree_link_session_free(sess); + if (st != CFREE_OK || !jit) + compiler_panic(e->c, no_loc(), "emu: failed to publish JIT block"); + + entry = cfree_jit_lookup(jit, *(CfreeSlice*)&block_slice); + if (!entry) { + cfree_jit_free(jit); + return NULL; } - - /* Resolve the freshly placed block to its host entry. */ - sym_id = link_symbol_lookup(e->image, block_name); - if (sym_id == LINK_SYM_NONE) return NULL; - sym = link_symbol(e->image, sym_id); - if (!sym || !sym->defined) return NULL; - entry = (void*)(uintptr_t)sym->vaddr; + emu_keep_jit(e, jit); emu_cache_insert(e->cache, guest_pc, entry); + emu_addr_space_mark_translated(&e->process.image.addr_space, guest_pc, + decode_len); + e->cache_generation = e->process.image.addr_space.generation; return entry; } @@ -264,6 +510,12 @@ void* cfree_emu_lookup(CfreeEmu* e, uint64_t guest_pc) { if (!e) return NULL; + if (e->cache && e->cache_generation != e->process.image.addr_space.generation) { + emu_cache_free(e->cache); + e->cache = NULL; + e->cache_generation = 0; + } + /* Cache hit short-circuits the panic boundary. */ if (e->cache) { entry = emu_cache_lookup(e->cache, guest_pc); @@ -305,7 +557,9 @@ CfreeStatus cfree_emu_step(CfreeEmu* e, uint32_t nblocks) { } for (i = 0; i < nblocks && !e->done; ++i) { - u64 pc = emu_cpu_pc(e->cpu); + EmuThread* thread = &e->main_thread; + EmuCPUState* cpu = thread->cpu; + u64 pc = emu_cpu_pc(cpu); void* entry; EmuBlockFn fn; u64 next_pc; @@ -321,13 +575,13 @@ CfreeStatus cfree_emu_step(CfreeEmu* e, uint32_t nblocks) { } fn = (EmuBlockFn)entry; - next_pc = fn(e->cpu); - emu_cpu_set_pc(e->cpu, next_pc); + next_pc = fn(thread); + emu_cpu_set_pc(cpu, next_pc); - trap = emu_cpu_trap_reason(e->cpu); + trap = emu_cpu_trap_reason(cpu); if (trap == EMU_TRAP_EXIT) { e->done = 1; - e->exit_code = emu_cpu_exit_code(e->cpu); + e->exit_code = emu_cpu_exit_code(cpu); } else if (trap == EMU_TRAP_FAULT) { compiler_panic(e->c, no_loc(), "emu: guest faulted at pc=0x%llx", (unsigned long long)next_pc); @@ -349,10 +603,6 @@ CfreeStatus cfree_emu_run(CfreeCompiler* c, const CfreeEmuOptions* opts, st = cfree_emu_new(c, opts, &e); if (st != CFREE_OK) return st; - /* The public cfree_emu_run does not carry a JitHost, so the emu must - * already have one wired (driver-side) before we get here. Without it - * the first ensure_runtime call returns CFREE_UNSUPPORTED and we drop - * the emu cleanly. */ while (!e->done) { st = cfree_emu_step(e, 1024); if (st != CFREE_OK) break; @@ -366,7 +616,11 @@ CfreeStatus cfree_emu_run(CfreeCompiler* c, const CfreeEmuOptions* opts, /* Runtime accessor for the resolver — exposes the running emu's * CPUState pointer without baking the CfreeEmu layout into runtime.c. * Used by emu_runtime_extern_resolver for EMU_SYM_CPU_STATE. */ -EmuCPUState* emu_internal_cpu(CfreeEmu* e) { return e ? e->cpu : NULL; } +EmuCPUState* emu_internal_cpu(CfreeEmu* e) { return emu_main_cpu(e); } + +EmuProcess* emu_internal_process(CfreeEmu* e) { + return e ? &e->process : NULL; +} /* ---- Block symbol naming ---- * "emu_block_<16-hex-pc>" — fixed-width hex so the linker's hash diff --git a/src/emu/emu.h b/src/emu/emu.h @@ -6,24 +6,38 @@ * composes the pieces declared here. See doc/EMU.md for design. * * Layering: emu.c owns CfreeEmu lifecycle and the translate/dispatch - * loop; per-ISA decoders/lifters, CPUState synthesis, the JIT code - * cache and reserved-VA region, and the runtime helper trampolines - * each live behind one of the surfaces below so the top-level driver - * never reaches into ISA-specific code. */ + * loop; per-ISA decoders/lifters live behind ArchImpl hooks, while CPUState + * synthesis, the JIT code cache and reserved-VA region, and the runtime helper + * trampolines each live behind one of the surfaces below. */ #include <cfree/cg.h> #include <cfree/emu.h> #include <cfree/jit.h> +#include "arch/arch.h" #include "core/core.h" #include "obj/obj.h" typedef struct LinkImage LinkImage; typedef struct Linker Linker; +typedef struct EmuCPUState EmuCPUState; +typedef struct ObjFormatImpl ObjFormatImpl; +typedef struct CfreeOsImpl CfreeOsImpl; +typedef struct EmuProcess EmuProcess; +typedef struct EmuThread EmuThread; +typedef struct EmuExternalBindings EmuExternalBindings; +typedef struct ObjFormatEmuOps ObjFormatEmuOps; +typedef struct EmuLoadedObject EmuLoadedObject; +typedef struct EmuDynNeededIter EmuDynNeededIter; +typedef struct EmuDynSymbol EmuDynSymbol; +typedef struct EmuDynRelocIter EmuDynRelocIter; +typedef struct EmuDynReloc EmuDynReloc; +typedef struct EmuObjectFormatData EmuObjectFormatData; /* ---- Configuration knobs ---------------------------------------- */ -/* Bounded so the translator can stack-allocate the EmuInst buffer. */ +/* Bounded so the translator can stack-allocate the decoded instruction buffer. + */ #define EMU_MAX_INSTS_PER_BLOCK 64u /* Reserved JIT code region. emu_runtime mmap's PROT_NONE up front and @@ -33,91 +47,423 @@ typedef struct Linker Linker; #define EMU_CODE_REGION_SIZE (128ull * 1024ull * 1024ull) /* ---- Per-emu JIT host wiring ------------------------------------ */ -/* The public CfreeEmuOptions has no field for a JitHost — drivers attach - * one through this internal-only setter before calling cfree_emu_run / - * cfree_emu_step. `host` is borrowed and must outlive the CfreeEmu. */ +/* `host` is borrowed and must outlive the CfreeEmu. The setter remains for + * internal callers that construct an emu before wiring driver-owned JIT state. */ void emu_set_jit_host(CfreeEmu*, const CfreeJitHost*); const CfreeJitHost* emu_get_jit_host(const CfreeEmu*); -/* ---- Guest ELF loader ------------------------------------------- */ +/* ---- Guest executable image ------------------------------------- */ + +typedef enum EmuMemPerm { + EMU_MEM_READ = 1u << 0, + EMU_MEM_WRITE = 1u << 1, + EMU_MEM_EXEC = 1u << 2, +} EmuMemPerm; + +typedef enum EmuMapKind { + EMU_MAP_ANON, + EMU_MAP_FILE, + EMU_MAP_GUARD, +} EmuMapKind; + +typedef enum EmuFaultKind { + EMU_FAULT_NONE = 0, + EMU_FAULT_UNMAPPED, + EMU_FAULT_PROT, + EMU_FAULT_EXEC_INVALIDATED, +} EmuFaultKind; + +typedef struct EmuMemFault { + EmuFaultKind kind; + u64 addr; + u8 access; +} EmuMemFault; + +typedef struct EmuMap { + u64 start; + u64 end; + u8 perms; + EmuMapKind kind; + u32 flags; + u64 generation; + u8* bytes; + u8* dirty_pages; + u8* translated_pages; +} EmuMap; + +typedef struct EmuAddrSpace { + Compiler* compiler; + Heap* heap; + u64 page_size; + EmuMap* maps; + u32 nmaps; + u32 maps_cap; + u64 generation; + u64 brk_base; + u64 brk_cur; + u64 brk_max; + EmuMemFault last_fault; +} EmuAddrSpace; + +typedef struct EmuDynamicImport { + CfreeSlice object_name; + CfreeSlice symbol_name; + u64 got_vaddr; + u64 thunk_vaddr; + CfreeEmuImportSignature signature; +} EmuDynamicImport; + +typedef struct EmuImportBinding { + CfreeSlice object_name; + CfreeSlice symbol_name; + u64 got_vaddr; + u64 thunk_vaddr; + u64 resolved_guest_addr; + void* resolved_host_fn; + u32 flags; + CfreeEmuImportSignature signature; +} EmuImportBinding; + +typedef struct EmuObjectImports { + CfreeSlice needed[16]; + u32 nneeded; +} EmuObjectImports; + +typedef struct EmuObjectInitFini { + u64 init; + u64 fini; + u64 init_array; + u64 init_arraysz; + u64 fini_array; + u64 fini_arraysz; +} EmuObjectInitFini; + +struct EmuObjectFormatData { + void* data; + size_t size; + size_t align; +}; + +typedef struct EmuObjectProcessInfo { + u64 headers_vaddr; + u64 header_entry_size; + u64 header_count; + CfreeSlice interpreter_path; +} EmuObjectProcessInfo; + +typedef u32 EmuDynRelocClass; +#define EMU_DYN_RELOC_NONE 0u +#define EMU_DYN_RELOC_RELATIVE 1u +#define EMU_DYN_RELOC_SYMBOLIC 2u +#define EMU_DYN_RELOC_IMPORT_SLOT 3u + +struct EmuDynNeededIter { + const EmuLoadedObject* object; + u32 index; +}; + +struct EmuDynSymbol { + CfreeSlice name; + u64 value; + u64 size; + u64 index; + u32 flags; + int defined; +}; + +struct EmuDynReloc { + u64 patch_addr; + u64 symbol_index; + u64 wire_type; + i64 addend; + u32 width; +}; + +typedef u32 EmuDynRelocTableKind; +#define EMU_DYN_RELOC_TABLE_MAIN 0u +#define EMU_DYN_RELOC_TABLE_PLT 1u + +struct EmuDynRelocIter { + const EmuLoadedObject* object; + EmuDynRelocTableKind table; + u64 cursor; +}; + +typedef struct EmuTlsModule { + u64 image_vaddr; + u64 filesz; + u64 memsz; + u64 align; + u32 module_id; +} EmuTlsModule; + +typedef struct EmuLoadedObject { + CfreeSlice name; + CfreeSlice soname; + u64 load_bias; + u64 map_start; + u64 map_end; + EmuObjectImports imports; + EmuObjectInitFini init_fini; + EmuObjectFormatData format; + EmuTlsModule tls; + u32 flags; +} EmuLoadedObject; + +typedef struct EmuLinkMap { + EmuLoadedObject* objects; + u32 nobjects; + u32 objects_cap; + u32 main_object; + u32 global_scope_head; +} EmuLinkMap; + +typedef struct EmuDlPolicy { + u32 flags; + u32 global_scope_head; + u32 search_path_head; +} EmuDlPolicy; + +typedef struct EmuTlsState { + EmuTlsModule* modules; + u32 nmodules; + u32 modules_cap; + u64 static_size; + u64 static_align; +} EmuTlsState; + +typedef struct EmuTlsBlock { + u32 module_id; + u64 base; + u64 memsz; +} EmuTlsBlock; + +typedef struct EmuTlsBlocks { + EmuTlsBlock* blocks; + u32 nblocks; + u32 blocks_cap; +} EmuTlsBlocks; typedef struct EmuLoadedImage { - void* guest_base; /* host pointer to the mapped guest AS */ - size_t guest_size; /* bytes reserved for the guest AS */ - u64 entry_pc; /* guest VA of the program entry point */ - u64 initial_sp; /* guest VA of the initial stack pointer */ + EmuAddrSpace addr_space; + u64 entry_pc; /* guest VA of the program entry point */ + u64 initial_sp; /* guest VA of the initial stack pointer */ + EmuObjectProcessInfo process_info; + EmuDynamicImport* imports; + u32 nimports; + EmuImportBinding* import_bindings; + u32 nimport_bindings; + u32 import_bindings_cap; + u64 import_thunk_base; + u64 import_thunk_size; + u64 import_thunk_next; + EmuLinkMap link_map; } EmuLoadedImage; -/* Parse the guest ELF, mmap the guest AS, copy PT_LOAD segments, - * push argv/envp/auxv onto the guest stack at initial_sp. Returns 0 - * on success and writes *out; returns nonzero on parse failure. */ -int emu_load_elf(Compiler*, CfreeEmuArch, const u8* bytes, size_t len, - const char* const* argv, const char* const* envp, - EmuLoadedImage* out); +typedef struct EmuLoadOptions { + CfreeSlice name; + CfreeSlice bytes; + CfreeTarget guest_target; + const char* const* argv; + const char* const* envp; + const CfreeOsImpl* os; + const EmuExternalBindings* bindings; + EmuProcess* process; +} EmuLoadOptions; + +typedef struct EmuSyscallRequest { + u64 number; + u64 args[6]; +} EmuSyscallRequest; + +typedef struct EmuSyscallResult { + i64 result; + i32 guest_errno; + u32 flags; +} EmuSyscallResult; + +#define EMU_SYSCALL_RESULT_SKIP_ENCODE 1u + +struct EmuExternalBindings { + CfreeStatus (*syscall)(void* user, EmuProcess*, EmuThread*, + const EmuSyscallRequest*, EmuSyscallResult* out); + CfreeStatus (*resolve_import)(void* user, EmuProcess*, + const EmuDynamicImport*, + CfreeEmuResolvedImport* out); + CfreeStatus (*resolve_object)(void* user, EmuProcess*, + CfreeSlice object_name, CfreeSlice* out_bytes); + void* user; +}; + +typedef struct EmuFaultEvent { + EmuFaultKind kind; + u64 addr; + u64 pc; + u64 next_pc; + u8 access; +} EmuFaultEvent; + +struct EmuProcess { + Compiler* compiler; + CfreeTarget guest_target; + const ObjFormatImpl* obj_format; + const ArchImpl* arch; + const CfreeOsImpl* os; + void* os_private; + EmuLoadedImage image; + EmuExternalBindings bindings; + EmuDlPolicy dl_policy; + EmuTlsState tls_state; +}; + +struct EmuThread { + EmuProcess* process; + EmuCPUState* cpu; + void* os_private; +}; + +struct CfreeOsImpl { + CfreeOSKind kind; + const char* name; + CfreeStatus (*emu_init_process_private)(Compiler*, EmuProcess*); + void (*emu_destroy_process_private)(Compiler*, EmuProcess*); + CfreeStatus (*emu_init_thread_private)(Compiler*, EmuProcess*, EmuThread*); + void (*emu_destroy_thread_private)(Compiler*, EmuThread*); + CfreeStatus (*emu_init_process)(Compiler*, EmuProcess*, + const EmuLoadOptions*, + const EmuLoadedImage*); + CfreeStatus (*emu_init_thread)(Compiler*, EmuProcess*, EmuThread*); + CfreeStatus (*emu_decode_syscall)(EmuProcess*, EmuThread*, + EmuSyscallRequest* out); + CfreeStatus (*emu_encode_syscall_result)(EmuProcess*, EmuThread*, + const EmuSyscallResult*); + u64 (*emu_syscall_next_pc)(EmuProcess*, EmuThread*, + const EmuSyscallRequest*, u64 next_pc); + CfreeStatus (*emu_find_map_region)(EmuProcess*, u64 nbytes, u64 align, + u32 purpose, u64* out); + void (*emu_note_map_region)(EmuProcess*, u64 base, u64 nbytes, + u32 purpose); + CfreeStatus (*emu_default_syscall)(void* user, EmuProcess*, EmuThread*, + const EmuSyscallRequest*, + EmuSyscallResult* out); + CfreeStatus (*emu_deliver_fault)(EmuProcess*, EmuThread*, + const EmuFaultEvent*, u64* next_pc_out); +}; + +const CfreeOsImpl* os_lookup(CfreeOSKind); + +#define EMU_OS_MAP_MMAP 1u +#define EMU_OS_MAP_DL_THUNKS 2u +#define EMU_OS_MAP_TLS 3u + +int emu_loaded_image_attach_cpu(EmuCPUState*, EmuLoadedImage*); void emu_unload_image(Compiler*, EmuLoadedImage*); +CfreeStatus emu_dl_init_process(Compiler*, EmuProcess*); +CfreeStatus emu_dl_load_dependencies_and_relocate(Compiler*, EmuProcess*, + const EmuLoadOptions*, + const ObjFormatEmuOps*); +CfreeStatus emu_dl_lookup_symbol(EmuProcess*, CfreeSlice symbol, + u64* out_addr); +CfreeStatus emu_dl_resolve_import_thunk(EmuProcess*, u64 target, + EmuImportBinding** out); +CfreeStatus emu_call_host_import(EmuThread*, EmuImportBinding*, + const u64* args, u32 nargs, u64* result_out); +CfreeStatus emu_object_format_data_alloc(Compiler*, EmuLoadedObject*, size_t, + size_t, void** out); +CfreeStatus emu_apply_reloc_bytes(Compiler*, RelocKind, u8* P_bytes, u64 S, + i64 A, u64 P); +CfreeStatus emu_tls_rebuild_modules(Compiler*, EmuProcess*); +CfreeStatus emu_tls_blocks_add(Compiler*, EmuTlsBlocks*, u32 module_id, + u64 base, u64 memsz); +CfreeStatus emu_tls_copy_module_image(EmuProcess*, const EmuTlsModule*, + u64 base); +void emu_tls_destroy_process(Compiler*, EmuProcess*); +void emu_tls_destroy_blocks(Compiler*, EmuTlsBlocks*); +CfreeStatus emu_fault_deliver(EmuProcess*, EmuThread*, + const EmuFaultEvent*, u64* next_pc_out); +CfreeStatus emu_process_os_alloc(Compiler*, EmuProcess*, size_t size, + size_t align); +void emu_process_os_free(Compiler*, EmuProcess*, size_t size); +CfreeStatus emu_thread_os_alloc(Compiler*, EmuThread*, size_t size, + size_t align); +void emu_thread_os_free(Compiler*, EmuThread*, size_t size); +CfreeStatus emu_addr_space_init(EmuAddrSpace*, Compiler*, u64 page_size); +void emu_addr_space_destroy(EmuAddrSpace*); +CfreeStatus emu_addr_space_map(EmuAddrSpace*, u64 va, u64 nbytes, u8 perms, + EmuMapKind kind); +CfreeStatus emu_addr_space_unmap(EmuAddrSpace*, u64 va, u64 nbytes); +CfreeStatus emu_addr_space_protect(EmuAddrSpace*, u64 va, u64 nbytes, + u8 perms); +CfreeStatus emu_addr_space_find_gap(EmuAddrSpace*, u64 nbytes, u64 align, + u64 min_va, u64 max_va, u64* out); +CfreeStatus emu_addr_space_set_brk(EmuAddrSpace*, u64 requested, + u64* actual_out); +CfreeStatus emu_addr_space_copy_in(EmuAddrSpace*, u64 va, const void* src, + u64 nbytes); +CfreeStatus emu_addr_space_set_perm(EmuAddrSpace*, u64 va, u64 nbytes, + u8 perms); +u8* emu_addr_space_ptr(EmuAddrSpace*, u64 va, u64 nbytes, u8 need_perms); +u64 emu_addr_space_contig_len(EmuAddrSpace*, u64 va, u8 need_perms); +const EmuMemFault* emu_addr_space_last_fault(const EmuAddrSpace*); +void emu_addr_space_mark_translated(EmuAddrSpace*, u64 va, u64 nbytes); +void emu_addr_space_invalidate(EmuAddrSpace*, u64 va, u64 nbytes); /* ---- CPU state -------------------------------------------------- */ -typedef struct EmuCPUState EmuCPUState; - typedef enum EmuTrapReason { EMU_TRAP_NONE = 0, EMU_TRAP_EXIT, /* guest exit syscall; exit_code valid */ EMU_TRAP_FAULT, /* unmapped access / decode failure */ } EmuTrapReason; -EmuCPUState* emu_cpu_new(Compiler*, CfreeEmuArch, u64 initial_pc, - u64 initial_sp); +EmuCPUState* emu_cpu_new_with_arch_state(Compiler*, CfreeArchKind, + u64 initial_pc, + size_t arch_state_size, + size_t arch_state_align); void emu_cpu_free(EmuCPUState*); +void* emu_cpu_arch_state(EmuCPUState*); +const void* emu_cpu_arch_state_const(const EmuCPUState*); +void emu_cpu_set_thread(EmuCPUState*, EmuThread*); +EmuThread* emu_cpu_thread(const EmuCPUState*); u64 emu_cpu_pc(const EmuCPUState*); void emu_cpu_set_pc(EmuCPUState*, u64); EmuTrapReason emu_cpu_trap_reason(const EmuCPUState*); int emu_cpu_exit_code(const EmuCPUState*); - -/* The interned codegen record type representing CPUState for `arch`. The - * lifter references fields through an ObjSymId resolved by the - * runtime extern resolver to &CfreeEmu->cpu storage. */ -CfreeCgTypeId emu_cpu_type(Compiler*, CfreeEmuArch); - -/* The function type `u64 (CPUState*)` used for every lifted block. +void emu_cpu_attach_addr_space(EmuCPUState*, EmuAddrSpace*); +u8* emu_cpu_guest_base(const EmuCPUState*); +u64 emu_cpu_guest_va_base(const EmuCPUState*); +u64 emu_cpu_guest_size(const EmuCPUState*); +u8* emu_cpu_va_to_host_pub(EmuCPUState*, u64 va, u64 nbytes); +u8* emu_cpu_va_to_host_perm(EmuCPUState*, u64 va, u64 nbytes, u8 need_perms); +u64 emu_cpu_brk_cur(const EmuCPUState*); +u64 emu_cpu_brk_max(const EmuCPUState*); +void emu_cpu_set_brk_cur(EmuCPUState*, u64 v); +void emu_cpu_trap_exit(EmuCPUState*, int code); +void emu_cpu_trap_fault(EmuCPUState*); +void emu_cpu_clear_trap(EmuCPUState*); +EmuCPUState* emu_thread_cpu(EmuThread*); + +/* The interned codegen pointer type representing EmuThread for JIT helper + * calls. CPUState remains arch-owned state below the thread. */ +CfreeCgTypeId emu_thread_type(Compiler*); +CfreeCgTypeId emu_cpu_type(Compiler*); + +/* The function type `u64 (EmuThread*)` used for every lifted block. * Returned interned. */ -CfreeCgTypeId emu_block_fn_type(Compiler*, CfreeEmuArch); - -/* ---- Decoder ---------------------------------------------------- */ -/* Concrete shape lives here (rather than as a per-ISA opaque) so the - * translator can stack-allocate a fixed-size buffer in - * cfree_emu_lookup. Per-ISA decoders/lifters interpret the operand - * payload through their own enums; the carrier is shared. */ -typedef struct EmuInst { - u32 op; /* per-ISA enum */ - u32 flags; /* TERMINATOR | MEM | SETS_FLAGS | ... */ - u64 guest_pc; - u32 guest_bytes; /* instruction width in guest bytes */ - u32 nop; - u64 operands[6]; /* per-ISA payload */ -} EmuInst; - -/* Decode up to the next basic-block terminator or `max` instructions, - * whichever comes first. Returns the count written to `out`. Zero - * means decode failed at `guest_pc` (undecodable / out-of-bounds). */ -u32 emu_decode_block(CfreeEmuArch, const u8* bytes, u64 guest_pc, EmuInst* out, - u32 max); +CfreeCgTypeId emu_block_fn_type(Compiler*); /* ---- Lifter ----------------------------------------------------- */ typedef struct EmuLiftCtx { - CfreeEmuArch arch; - CfreeCgTypeId cpu_state_type; /* from emu_cpu_type */ + Compiler* compiler; + CfreeArchKind arch; + CfreeCgTypeId thread_type; /* from emu_thread_type */ + CfreeCgTypeId cpu_state_type; /* legacy alias for pointer helper type */ CfreeCgTypeId block_fn_type; /* from emu_block_fn_type */ - ObjSymId block_sym; /* function symbol for this block */ - u64 guest_pc; /* PC of first instruction in the block */ + CfreeCgSym block_sym; /* function symbol for this block */ + u64 guest_pc; /* PC of first instruction in the block */ } EmuLiftCtx; -/* Walk `insts` and emit one CG function (signature next_pc_t(CPUState*)) - * for the block. Calls cfree_cg_func_begin/end exactly once. */ -void emu_lift_block(CfreeEmuArch, CfreeCg*, const EmuInst* insts, u32 n, - const EmuLiftCtx*); - /* ---- Code cache ------------------------------------------------- */ typedef struct EmuCodeCache EmuCodeCache; @@ -157,6 +503,10 @@ void emu_code_region_commit_rx_to(EmuCodeRegion*, uintptr_t end); #define EMU_SYM_LOAD16 "__emu_load16" #define EMU_SYM_LOAD32 "__emu_load32" #define EMU_SYM_LOAD64 "__emu_load64" +#define EMU_SYM_LOAD8_CHECKED "__emu_load8_checked" +#define EMU_SYM_LOAD16_CHECKED "__emu_load16_checked" +#define EMU_SYM_LOAD32_CHECKED "__emu_load32_checked" +#define EMU_SYM_LOAD64_CHECKED "__emu_load64_checked" #define EMU_SYM_STORE8 "__emu_store8" #define EMU_SYM_STORE16 "__emu_store16" #define EMU_SYM_STORE32 "__emu_store32" @@ -177,23 +527,32 @@ void* emu_runtime_extern_resolver(void* user, CfreeSlice name); * the guest AS, so loads/stores bounds-check against the EmuCPUState's * mapped guest range and trap on miss (writing EMU_TRAP_FAULT into the * CPU state and falling back to the dispatcher). */ -u8 emu_mem_load8(EmuCPUState*, u64 addr); -u16 emu_mem_load16(EmuCPUState*, u64 addr); -u32 emu_mem_load32(EmuCPUState*, u64 addr); -u64 emu_mem_load64(EmuCPUState*, u64 addr); -void emu_mem_store8(EmuCPUState*, u64 addr, u8); -void emu_mem_store16(EmuCPUState*, u64 addr, u16); -void emu_mem_store32(EmuCPUState*, u64 addr, u32); -void emu_mem_store64(EmuCPUState*, u64 addr, u64); +u8 emu_mem_load8(EmuThread*, u64 addr); +u16 emu_mem_load16(EmuThread*, u64 addr); +u32 emu_mem_load32(EmuThread*, u64 addr); +u64 emu_mem_load64(EmuThread*, u64 addr); +u64 emu_mem_load8_checked(EmuThread*, u64 addr, u64 fault_pc, u64 next_pc, + u64* value_out); +u64 emu_mem_load16_checked(EmuThread*, u64 addr, u64 fault_pc, u64 next_pc, + u64* value_out); +u64 emu_mem_load32_checked(EmuThread*, u64 addr, u64 fault_pc, u64 next_pc, + u64* value_out); +u64 emu_mem_load64_checked(EmuThread*, u64 addr, u64 fault_pc, u64 next_pc, + u64* value_out); +u64 emu_mem_store8(EmuThread*, u64 addr, u8, u64 fault_pc, u64 next_pc); +u64 emu_mem_store16(EmuThread*, u64 addr, u16, u64 fault_pc, u64 next_pc); +u64 emu_mem_store32(EmuThread*, u64 addr, u32, u64 fault_pc, u64 next_pc); +u64 emu_mem_store64(EmuThread*, u64 addr, u64, u64 fault_pc, u64 next_pc); /* Reads syscall number / args from the guest registers, forwards to * the host OS, and writes the return into the guest return register. */ -void emu_syscall(EmuCPUState*); +void emu_syscall(EmuThread*); +u64 emu_syscall_next(EmuThread*, u64 next_pc); /* ---- Tracing ---------------------------------------------------- */ void emu_trace_pc(Compiler*, u64 guest_pc); void emu_trace_block(Compiler*, u64 guest_pc); -void emu_trace_insn(Compiler*, u64 guest_pc, const EmuInst*); +void emu_trace_insn(Compiler*, u64 guest_pc, const CfreeDecodedInsn*); #endif diff --git a/src/emu/image.c b/src/emu/image.c @@ -0,0 +1,469 @@ +#include <string.h> + +#include "emu/emu.h" + +static u64 emu_round_down(u64 v, u64 a) { return v & ~(a - 1u); } +static u64 emu_round_up(u64 v, u64 a) { return (v + a - 1u) & ~(a - 1u); } + +static u64 map_pages(const EmuAddrSpace* as, u64 start, u64 end) { + return (end - start) / as->page_size; +} + +static void fault_set(EmuAddrSpace* as, EmuFaultKind kind, u64 addr, + u8 access) { + if (!as) return; + as->last_fault.kind = kind; + as->last_fault.addr = addr; + as->last_fault.access = access; +} + +static void map_free(EmuAddrSpace* as, EmuMap* m) { + u64 npages; + if (!as || !m) return; + npages = map_pages(as, m->start, m->end); + if (m->bytes) as->heap->free(as->heap, m->bytes, (size_t)(m->end - m->start)); + if (m->dirty_pages) as->heap->free(as->heap, m->dirty_pages, (size_t)npages); + if (m->translated_pages) + as->heap->free(as->heap, m->translated_pages, (size_t)npages); + memset(m, 0, sizeof(*m)); +} + +static CfreeStatus map_alloc_storage(EmuAddrSpace* as, EmuMap* m) { + u64 nbytes, npages; + nbytes = m->end - m->start; + npages = map_pages(as, m->start, m->end); + m->bytes = (u8*)as->heap->alloc(as->heap, (size_t)nbytes, 16u); + m->dirty_pages = (u8*)as->heap->alloc(as->heap, (size_t)npages, 1u); + m->translated_pages = (u8*)as->heap->alloc(as->heap, (size_t)npages, 1u); + if (!m->bytes || !m->dirty_pages || !m->translated_pages) { + map_free(as, m); + return CFREE_NOMEM; + } + memset(m->bytes, 0, (size_t)nbytes); + memset(m->dirty_pages, 0, (size_t)npages); + memset(m->translated_pages, 0, (size_t)npages); + return CFREE_OK; +} + +static int map_overlaps(const EmuMap* m, u64 start, u64 end) { + return start < m->end && end > m->start; +} + +static EmuMap* find_map(EmuAddrSpace* as, u64 va) { + u32 i; + if (!as) return NULL; + for (i = 0; i < as->nmaps; ++i) { + if (va >= as->maps[i].start && va < as->maps[i].end) return &as->maps[i]; + if (va < as->maps[i].start) return NULL; + } + return NULL; +} + +static int range_is_free(EmuAddrSpace* as, u64 start, u64 end) { + u32 i; + for (i = 0; i < as->nmaps; ++i) { + if (map_overlaps(&as->maps[i], start, end)) return 0; + if (end <= as->maps[i].start) return 1; + } + return 1; +} + +static CfreeStatus ensure_map_cap(EmuAddrSpace* as, u32 need) { + EmuMap* grown; + u32 old_cap, new_cap; + if (need <= as->maps_cap) return CFREE_OK; + old_cap = as->maps_cap; + new_cap = old_cap ? old_cap * 2u : 8u; + while (new_cap < need) new_cap *= 2u; + grown = (EmuMap*)as->heap->realloc(as->heap, as->maps, + sizeof(*as->maps) * old_cap, + sizeof(*as->maps) * new_cap, + _Alignof(EmuMap)); + if (!grown) return CFREE_NOMEM; + as->maps = grown; + as->maps_cap = new_cap; + return CFREE_OK; +} + +static CfreeStatus insert_map(EmuAddrSpace* as, EmuMap map) { + u32 idx; + CfreeStatus st; + st = ensure_map_cap(as, as->nmaps + 1u); + if (st != CFREE_OK) return st; + idx = 0; + while (idx < as->nmaps && as->maps[idx].start < map.start) ++idx; + if (idx < as->nmaps) { + memmove(&as->maps[idx + 1u], &as->maps[idx], + sizeof(*as->maps) * (as->nmaps - idx)); + } + as->maps[idx] = map; + ++as->nmaps; + return CFREE_OK; +} + +static CfreeStatus append_piece(EmuAddrSpace* as, EmuMap* src, u64 start, + u64 end, u8 perms) { + EmuMap dst; + CfreeStatus st; + u64 src_off, npages, page_off; + memset(&dst, 0, sizeof(dst)); + dst.start = start; + dst.end = end; + dst.perms = perms; + dst.kind = src->kind; + dst.flags = src->flags; + dst.generation = ++as->generation; + st = map_alloc_storage(as, &dst); + if (st != CFREE_OK) return st; + src_off = start - src->start; + memcpy(dst.bytes, src->bytes + src_off, (size_t)(end - start)); + npages = map_pages(as, start, end); + page_off = src_off / as->page_size; + memcpy(dst.dirty_pages, src->dirty_pages + page_off, (size_t)npages); + memcpy(dst.translated_pages, src->translated_pages + page_off, + (size_t)npages); + st = insert_map(as, dst); + if (st != CFREE_OK) map_free(as, &dst); + return st; +} + +static int range_is_mapped(EmuAddrSpace* as, u64 start, u64 end) { + u64 cur = start; + EmuMap* m; + while (cur < end) { + m = find_map(as, cur); + if (!m) return 0; + cur = m->end < end ? m->end : end; + } + return 1; +} + +CfreeStatus emu_addr_space_init(EmuAddrSpace* as, Compiler* c, u64 page_size) { + if (!as || !c || !page_size || (page_size & (page_size - 1u)) != 0) + return CFREE_INVALID; + memset(as, 0, sizeof(*as)); + as->compiler = c; + as->heap = c->ctx->heap; + as->page_size = page_size; + return CFREE_OK; +} + +void emu_addr_space_destroy(EmuAddrSpace* as) { + u32 i; + if (!as || !as->heap) return; + for (i = 0; i < as->nmaps; ++i) map_free(as, &as->maps[i]); + if (as->maps) + as->heap->free(as->heap, as->maps, sizeof(*as->maps) * as->maps_cap); + memset(as, 0, sizeof(*as)); +} + +CfreeStatus emu_addr_space_map(EmuAddrSpace* as, u64 va, u64 nbytes, u8 perms, + EmuMapKind kind) { + EmuMap m; + CfreeStatus st; + if (!as || !as->heap || !nbytes) return CFREE_INVALID; + if ((va & (as->page_size - 1u)) != 0 || + (nbytes & (as->page_size - 1u)) != 0) + return CFREE_INVALID; + if (va + nbytes < va) return CFREE_INVALID; + if (!range_is_free(as, va, va + nbytes)) return CFREE_INVALID; + memset(&m, 0, sizeof(m)); + m.start = va; + m.end = va + nbytes; + m.perms = perms; + m.kind = kind; + m.generation = ++as->generation; + st = map_alloc_storage(as, &m); + if (st != CFREE_OK) return st; + st = insert_map(as, m); + if (st != CFREE_OK) map_free(as, &m); + return st; +} + +CfreeStatus emu_addr_space_unmap(EmuAddrSpace* as, u64 va, u64 nbytes) { + u64 start, end; + u32 i; + if (!as || !as->heap || !nbytes) return CFREE_INVALID; + start = emu_round_down(va, as->page_size); + end = emu_round_up(va + nbytes, as->page_size); + if (end <= start) return CFREE_INVALID; + i = 0; + while (i < as->nmaps) { + EmuMap old = as->maps[i]; + u64 old_end = old.end; + if (!map_overlaps(&old, start, end)) { + ++i; + continue; + } + memmove(&as->maps[i], &as->maps[i + 1u], + sizeof(*as->maps) * (as->nmaps - i - 1u)); + --as->nmaps; + if (old.start < start) { + CfreeStatus st = append_piece(as, &old, old.start, start, old.perms); + if (st != CFREE_OK) { + map_free(as, &old); + return st; + } + } + if (end < old.end) { + CfreeStatus st = append_piece(as, &old, end, old.end, old.perms); + if (st != CFREE_OK) { + map_free(as, &old); + return st; + } + } + map_free(as, &old); + while (i < as->nmaps && as->maps[i].start < old_end) ++i; + } + return CFREE_OK; +} + +CfreeStatus emu_addr_space_protect(EmuAddrSpace* as, u64 va, u64 nbytes, + u8 perms) { + u64 start, end; + u32 i; + if (!as || !as->heap || !nbytes) return CFREE_INVALID; + start = emu_round_down(va, as->page_size); + end = emu_round_up(va + nbytes, as->page_size); + if (end <= start || !range_is_mapped(as, start, end)) return CFREE_INVALID; + i = 0; + while (i < as->nmaps) { + EmuMap old = as->maps[i]; + u64 old_end = old.end; + if (!map_overlaps(&old, start, end)) { + ++i; + continue; + } + memmove(&as->maps[i], &as->maps[i + 1u], + sizeof(*as->maps) * (as->nmaps - i - 1u)); + --as->nmaps; + if (old.start < start) { + CfreeStatus st = append_piece(as, &old, old.start, start, old.perms); + if (st != CFREE_OK) { + map_free(as, &old); + return st; + } + } + { + u64 mid_start = old.start > start ? old.start : start; + u64 mid_end = old.end < end ? old.end : end; + CfreeStatus st = append_piece(as, &old, mid_start, mid_end, perms); + if (st != CFREE_OK) { + map_free(as, &old); + return st; + } + } + if (end < old.end) { + CfreeStatus st = append_piece(as, &old, end, old.end, old.perms); + if (st != CFREE_OK) { + map_free(as, &old); + return st; + } + } + map_free(as, &old); + while (i < as->nmaps && as->maps[i].start < old_end) ++i; + } + return CFREE_OK; +} + +CfreeStatus emu_addr_space_find_gap(EmuAddrSpace* as, u64 nbytes, u64 align, + u64 min_va, u64 max_va, u64* out) { + u64 cur; + u32 i; + if (!as || !out || !nbytes) return CFREE_INVALID; + if (!align) align = as->page_size; + nbytes = emu_round_up(nbytes, as->page_size); + cur = emu_round_up(min_va, align); + for (i = 0; i < as->nmaps; ++i) { + if (cur + nbytes < cur || cur + nbytes > max_va) return CFREE_NOMEM; + if (cur + nbytes <= as->maps[i].start) { + *out = cur; + return CFREE_OK; + } + if (cur < as->maps[i].end) cur = emu_round_up(as->maps[i].end, align); + } + if (cur + nbytes < cur || cur + nbytes > max_va) return CFREE_NOMEM; + *out = cur; + return CFREE_OK; +} + +CfreeStatus emu_addr_space_set_brk(EmuAddrSpace* as, u64 requested, + u64* actual_out) { + u64 old_page, new_page; + CfreeStatus st; + if (!as) return CFREE_INVALID; + if (requested == 0) { + if (actual_out) *actual_out = as->brk_cur; + return CFREE_OK; + } + if (requested < as->brk_base || requested > as->brk_max) { + if (actual_out) *actual_out = as->brk_cur; + return CFREE_OK; + } + old_page = emu_round_up(as->brk_cur, as->page_size); + new_page = emu_round_up(requested, as->page_size); + if (new_page > old_page) { + st = emu_addr_space_map(as, old_page, new_page - old_page, + EMU_MEM_READ | EMU_MEM_WRITE, EMU_MAP_ANON); + if (st != CFREE_OK) { + if (actual_out) *actual_out = as->brk_cur; + return CFREE_OK; + } + } else if (new_page < old_page) { + st = emu_addr_space_unmap(as, new_page, old_page - new_page); + if (st != CFREE_OK) { + if (actual_out) *actual_out = as->brk_cur; + return CFREE_OK; + } + } + as->brk_cur = requested; + if (actual_out) *actual_out = as->brk_cur; + return CFREE_OK; +} + +CfreeStatus emu_addr_space_copy_in(EmuAddrSpace* as, u64 va, const void* src, + u64 nbytes) { + u8* p; + if (!nbytes) return CFREE_OK; + if (!src) return CFREE_INVALID; + p = emu_addr_space_ptr(as, va, nbytes, 0); + if (!p) return CFREE_INVALID; + memcpy(p, src, (size_t)nbytes); + return CFREE_OK; +} + +CfreeStatus emu_addr_space_set_perm(EmuAddrSpace* as, u64 va, u64 nbytes, + u8 perms) { + return emu_addr_space_protect(as, va, nbytes, perms); +} + +u8* emu_addr_space_ptr(EmuAddrSpace* as, u64 va, u64 nbytes, u8 need_perms) { + EmuMap* m; + u64 off, start_page, end_page, i; + if (!as || !as->maps) return NULL; + m = find_map(as, va); + if (!m) { + fault_set(as, EMU_FAULT_UNMAPPED, va, need_perms); + return NULL; + } + if (va + nbytes < va || va + nbytes > m->end) { + fault_set(as, EMU_FAULT_UNMAPPED, va + nbytes, need_perms); + return NULL; + } + if (need_perms && (m->perms & need_perms) != need_perms) { + fault_set(as, EMU_FAULT_PROT, va, need_perms); + return NULL; + } + off = va - m->start; + if ((need_perms & EMU_MEM_WRITE) && nbytes) { + start_page = off / as->page_size; + end_page = (off + nbytes + as->page_size - 1u) / as->page_size; + for (i = start_page; i < end_page; ++i) { + m->dirty_pages[i] = 1u; + if (m->translated_pages[i]) { + m->translated_pages[i] = 0; + ++m->generation; + ++as->generation; + } + } + } + fault_set(as, EMU_FAULT_NONE, 0, 0); + return m->bytes + off; +} + +u64 emu_addr_space_contig_len(EmuAddrSpace* as, u64 va, u8 need_perms) { + EmuMap* m; + if (!as) return 0; + m = find_map(as, va); + if (!m) return 0; + if (need_perms && (m->perms & need_perms) != need_perms) return 0; + return m->end - va; +} + +const EmuMemFault* emu_addr_space_last_fault(const EmuAddrSpace* as) { + return as ? &as->last_fault : NULL; +} + +void emu_addr_space_mark_translated(EmuAddrSpace* as, u64 va, u64 nbytes) { + EmuMap* m; + u64 end, off, start_page, end_page, i; + if (!as || !nbytes) return; + m = find_map(as, va); + if (!m) return; + end = va + nbytes; + if (end < va || end > m->end) end = m->end; + off = va - m->start; + start_page = off / as->page_size; + end_page = (off + (end - va) + as->page_size - 1u) / as->page_size; + for (i = start_page; i < end_page; ++i) m->translated_pages[i] = 1u; +} + +void emu_addr_space_invalidate(EmuAddrSpace* as, u64 va, u64 nbytes) { + u64 start, end; + u32 i; + if (!as || !nbytes) return; + start = emu_round_down(va, as->page_size); + end = emu_round_up(va + nbytes, as->page_size); + for (i = 0; i < as->nmaps; ++i) { + EmuMap* m = &as->maps[i]; + u64 s, e, j; + if (!map_overlaps(m, start, end)) continue; + s = (start > m->start ? start : m->start) - m->start; + e = (end < m->end ? end : m->end) - m->start; + for (j = s / as->page_size; j < e / as->page_size; ++j) { + m->translated_pages[j] = 0; + } + ++m->generation; + } + ++as->generation; +} + +int emu_loaded_image_attach_cpu(EmuCPUState* cpu, EmuLoadedImage* img) { + if (!cpu || !img || !img->addr_space.heap) return 1; + emu_cpu_attach_addr_space(cpu, &img->addr_space); + return 0; +} + +void emu_unload_image(Compiler* c, EmuLoadedImage* img) { + Heap* heap; + u32 i; + if (!img) return; + heap = c ? c->ctx->heap : img->addr_space.heap; + if (heap && img->link_map.objects) { + for (i = 0; i < img->link_map.nobjects; ++i) { + EmuLoadedObject* obj = &img->link_map.objects[i]; + if (obj->format.data) { + heap->free(heap, obj->format.data, obj->format.size); + } + } + } + emu_addr_space_destroy(&img->addr_space); + if (heap && img->imports) { + heap->free(heap, img->imports, sizeof(*img->imports) * img->nimports); + } + if (heap && img->import_bindings) { + heap->free(heap, img->import_bindings, + sizeof(*img->import_bindings) * img->import_bindings_cap); + } + if (heap && img->link_map.objects) { + heap->free(heap, img->link_map.objects, + sizeof(*img->link_map.objects) * img->link_map.objects_cap); + } + memset(img, 0, sizeof(*img)); +} + +CfreeStatus emu_object_format_data_alloc(Compiler* c, EmuLoadedObject* obj, + size_t size, size_t align, + void** out) { + Heap* heap; + if (out) *out = NULL; + if (!c || !obj || !size || !out) return CFREE_INVALID; + heap = c->ctx->heap; + obj->format.data = heap->alloc(heap, size, align ? align : _Alignof(u64)); + if (!obj->format.data) return CFREE_NOMEM; + memset(obj->format.data, 0, size); + obj->format.size = size; + obj->format.align = align ? align : _Alignof(u64); + *out = obj->format.data; + return CFREE_OK; +} diff --git a/src/emu/lift.c b/src/emu/lift.c @@ -1,38 +0,0 @@ -/* Per-ISA lifter. Consumes EmuInsts and drives CG to emit one host - * function per guest basic block (signature u64(EmuCPUState*)). - * Lifters target CG exclusively — never CGTarget directly — so the - * pipeline below CG is unchanged from the C front-end. - * - * STATUS: deferred. emu_cpu_type/emu_block_fn_type both return - * CFREE_CG_TYPE_NONE in cpu.c, and the public CG surface for taking - * the address of a struct field (needed to lift x[rd] = ...) is still - * being threaded through CGTarget hooks for rv64. The interpreter - * path (emu_cpu_interp_block, cpu.c) is the one exercised by every - * emu test today. - * - * When this lands: - * 1. emu_cpu_type / emu_block_fn_type return interned CfreeCgTypeIds - * for the rv64 EmuCPUState shape and `u64(EmuCPUState*)`. - * 2. This function emits one cfree_cg_func_begin/end pair per block. - * 3. Per Rv64Op, emit either a CG arith / load / store sequence or a - * call to the EMU_SYM_* helper (LOAD8/STORE8/SYSCALL/...). - * 4. Terminators (BRANCH/JAL/JALR/ECALL) write the next-PC to a CG - * local and the function returns it; ECALL also issues a call to - * EMU_SYM_SYSCALL before returning. - * - * For now translate_block (emu.c) panics on cold-miss because the empty - * function body would be malformed, so the interpreter is the only path - * that ever runs. */ - -#include <cfree/cg.h> - -#include "emu/emu.h" - -void emu_lift_block(CfreeEmuArch arch, CfreeCg* cg, const EmuInst* insts, - u32 n, const EmuLiftCtx* ctx) { - (void)arch; - (void)cg; - (void)insts; - (void)n; - (void)ctx; -} diff --git a/src/emu/runtime.c b/src/emu/runtime.c @@ -12,7 +12,6 @@ #include "core/util.h" #include "emu/emu.h" -#include "emu/rv64_ops.h" /* ============================================================ * Reserved code region @@ -25,11 +24,6 @@ * applies relocations. */ -static SrcLoc no_loc(void) { - SrcLoc l = {0, 0, 0}; - return l; -} - static u64 page_size_bytes(const CfreeExecMem* m) { return m->page_size ? (u64)m->page_size : 0x4000u; } @@ -182,331 +176,275 @@ void* emu_cache_lookup(const EmuCodeCache* c, u64 guest_pc) { * can pull the CPUState pointer without dragging emu.c's struct * definition into this TU's contract. */ EmuCPUState* emu_internal_cpu(CfreeEmu*); +EmuProcess* emu_internal_process(CfreeEmu*); + +/* Memory helpers. Bounds-checked through the CPUState's guest-AS window + * (cpu.c). Checked helpers let an OS convert faults to its own delivery + * mechanism and return a non-zero resume PC to the lifted block. */ + +static u64 emu_deliver_memory_fault(EmuThread* thread, u64 addr, u8 access, + u64 fault_pc, u64 next_pc) { + EmuProcess* process = thread ? thread->process : NULL; + EmuCPUState* cpu = emu_thread_cpu(thread); + const EmuMemFault* fault; + EmuFaultEvent ev; + u64 delivered_pc = next_pc; + if (!process || !cpu) return next_pc; + fault = emu_addr_space_last_fault(&process->image.addr_space); + memset(&ev, 0, sizeof(ev)); + ev.kind = fault ? fault->kind : EMU_FAULT_NONE; + ev.addr = addr; + ev.pc = fault_pc; + ev.next_pc = next_pc; + ev.access = access; + if (emu_fault_deliver(process, thread, &ev, &delivered_pc) != CFREE_OK) { + emu_cpu_trap_fault(cpu); + return fault_pc ? fault_pc : next_pc; + } + return delivered_pc; +} -/* Memory helpers. Bounds-checked through the CPUState's guest-AS - * window (cpu.c). On bounds miss they trap into the CPU state and - * return zero; the dispatcher (or interpreter loop) observes the - * EMU_TRAP_FAULT on the next poll. */ +static u64 emu_mem_load_checked(EmuThread* t, u64 addr, u64 nbytes, + u8 access, u64 fault_pc, u64 next_pc, + u64* value_out) { + EmuCPUState* s = emu_thread_cpu(t); + u8* p = emu_cpu_va_to_host_perm(s, addr, nbytes, access); + u64 v = 0; + u64 i; + if (!value_out) { + emu_cpu_trap_fault(s); + return fault_pc ? fault_pc : next_pc; + } + if (!p) return emu_deliver_memory_fault(t, addr, access, fault_pc, next_pc); + for (i = 0; i < nbytes; ++i) v |= ((u64)p[i]) << (8u * (u32)i); + *value_out = v; + return 0; +} -u8 emu_mem_load8(EmuCPUState* s, u64 addr) { - u8* p = emu_cpu_va_to_host_pub(s, addr, 1); +u8 emu_mem_load8(EmuThread* t, u64 addr) { + EmuCPUState* s = emu_thread_cpu(t); + u8* p = emu_cpu_va_to_host_perm(s, addr, 1, EMU_MEM_READ); if (!p) { emu_cpu_trap_fault(s); return 0; } return p[0]; } -u16 emu_mem_load16(EmuCPUState* s, u64 addr) { - u8* p = emu_cpu_va_to_host_pub(s, addr, 2); +u16 emu_mem_load16(EmuThread* t, u64 addr) { + EmuCPUState* s = emu_thread_cpu(t); + u8* p = emu_cpu_va_to_host_perm(s, addr, 2, EMU_MEM_READ); if (!p) { emu_cpu_trap_fault(s); return 0; } return (u16)p[0] | ((u16)p[1] << 8); } -u32 emu_mem_load32(EmuCPUState* s, u64 addr) { - u8* p = emu_cpu_va_to_host_pub(s, addr, 4); +u32 emu_mem_load32(EmuThread* t, u64 addr) { + EmuCPUState* s = emu_thread_cpu(t); + u8* p = emu_cpu_va_to_host_perm(s, addr, 4, EMU_MEM_READ); if (!p) { emu_cpu_trap_fault(s); return 0; } return (u32)p[0] | ((u32)p[1] << 8) | ((u32)p[2] << 16) | ((u32)p[3] << 24); } -u64 emu_mem_load64(EmuCPUState* s, u64 addr) { - u32 lo = emu_mem_load32(s, addr); - u32 hi = emu_mem_load32(s, addr + 4u); +u64 emu_mem_load64(EmuThread* t, u64 addr) { + u32 lo = emu_mem_load32(t, addr); + u32 hi = emu_mem_load32(t, addr + 4u); return (u64)lo | ((u64)hi << 32); } -void emu_mem_store8(EmuCPUState* s, u64 addr, u8 v) { - u8* p = emu_cpu_va_to_host_pub(s, addr, 1); +u64 emu_mem_load8_checked(EmuThread* t, u64 addr, u64 fault_pc, u64 next_pc, + u64* value_out) { + return emu_mem_load_checked(t, addr, 1u, EMU_MEM_READ, fault_pc, next_pc, + value_out); +} + +u64 emu_mem_load16_checked(EmuThread* t, u64 addr, u64 fault_pc, u64 next_pc, + u64* value_out) { + return emu_mem_load_checked(t, addr, 2u, EMU_MEM_READ, fault_pc, next_pc, + value_out); +} + +u64 emu_mem_load32_checked(EmuThread* t, u64 addr, u64 fault_pc, u64 next_pc, + u64* value_out) { + return emu_mem_load_checked(t, addr, 4u, EMU_MEM_READ, fault_pc, next_pc, + value_out); +} + +u64 emu_mem_load64_checked(EmuThread* t, u64 addr, u64 fault_pc, u64 next_pc, + u64* value_out) { + return emu_mem_load_checked(t, addr, 8u, EMU_MEM_READ, fault_pc, next_pc, + value_out); +} + +u64 emu_mem_store8(EmuThread* t, u64 addr, u8 v, u64 fault_pc, u64 next_pc) { + EmuCPUState* s = emu_thread_cpu(t); + u8* p = emu_cpu_va_to_host_perm(s, addr, 1, EMU_MEM_WRITE); if (!p) { - emu_cpu_trap_fault(s); - return; + return emu_deliver_memory_fault(t, addr, EMU_MEM_WRITE, fault_pc, next_pc); } p[0] = v; + return next_pc; } -void emu_mem_store16(EmuCPUState* s, u64 addr, u16 v) { - u8* p = emu_cpu_va_to_host_pub(s, addr, 2); +u64 emu_mem_store16(EmuThread* t, u64 addr, u16 v, u64 fault_pc, u64 next_pc) { + EmuCPUState* s = emu_thread_cpu(t); + u8* p = emu_cpu_va_to_host_perm(s, addr, 2, EMU_MEM_WRITE); if (!p) { - emu_cpu_trap_fault(s); - return; + return emu_deliver_memory_fault(t, addr, EMU_MEM_WRITE, fault_pc, next_pc); } p[0] = (u8)v; p[1] = (u8)(v >> 8); + return next_pc; } -void emu_mem_store32(EmuCPUState* s, u64 addr, u32 v) { - u8* p = emu_cpu_va_to_host_pub(s, addr, 4); +u64 emu_mem_store32(EmuThread* t, u64 addr, u32 v, u64 fault_pc, u64 next_pc) { + EmuCPUState* s = emu_thread_cpu(t); + u8* p = emu_cpu_va_to_host_perm(s, addr, 4, EMU_MEM_WRITE); if (!p) { - emu_cpu_trap_fault(s); - return; + return emu_deliver_memory_fault(t, addr, EMU_MEM_WRITE, fault_pc, next_pc); } p[0] = (u8)v; p[1] = (u8)(v >> 8); p[2] = (u8)(v >> 16); p[3] = (u8)(v >> 24); + return next_pc; } -void emu_mem_store64(EmuCPUState* s, u64 addr, u64 v) { - emu_mem_store32(s, addr, (u32)v); - emu_mem_store32(s, addr + 4u, (u32)(v >> 32)); +u64 emu_mem_store64(EmuThread* t, u64 addr, u64 v, u64 fault_pc, u64 next_pc) { + EmuCPUState* s = emu_thread_cpu(t); + u8* p = emu_cpu_va_to_host_perm(s, addr, 8, EMU_MEM_WRITE); + if (!p) { + return emu_deliver_memory_fault(t, addr, EMU_MEM_WRITE, fault_pc, next_pc); + } + p[0] = (u8)v; + p[1] = (u8)(v >> 8); + p[2] = (u8)(v >> 16); + p[3] = (u8)(v >> 24); + p[4] = (u8)(v >> 32); + p[5] = (u8)(v >> 40); + p[6] = (u8)(v >> 48); + p[7] = (u8)(v >> 56); + return next_pc; } -/* ============================================================ - * Syscall handler — Linux / riscv64 ABI subset - * ============================================================ - * - * Reads syscall number from a7 and args from a0-a5, dispatches to a - * host-side handler, writes the return into a0. Linux/riscv64 - * syscall numbers (the asm-generic table that musl & glibc use): - * - * read 63 - * write 64 - * close 57 - * fstat 80 - * exit 93 - * exit_group 94 - * brk 214 - * mmap 222 - * - * The emulator is freestanding from the libcfree side; we cannot - * actually issue host syscalls without dragging libc into the - * allowlist. v1 routes guest stdio writes nowhere (the caller can - * subscribe via a hook in a later round); the only syscalls with - * real side effects on the CPU state are exit/exit_group and brk. - * That's enough to land the smoke test (which calls exit_group(42)). - */ - -#define SYS_openat 56u -#define SYS_close 57u -#define SYS_lseek 62u -#define SYS_read 63u -#define SYS_write 64u -#define SYS_readv 65u -#define SYS_writev 66u -#define SYS_fstat 80u -#define SYS_exit 93u -#define SYS_exit_group 94u -#define SYS_set_tid_address 96u -#define SYS_clock_gettime 113u -#define SYS_sched_yield 124u -#define SYS_rt_sigaction 134u -#define SYS_rt_sigprocmask 135u -#define SYS_rt_sigreturn 139u -#define SYS_getpid 172u -#define SYS_getuid 174u -#define SYS_geteuid 175u -#define SYS_getgid 176u -#define SYS_getegid 177u -#define SYS_brk 214u -#define SYS_mmap 222u - -void emu_syscall(EmuCPUState* s) { - u64 nr = emu_cpu_xreg(s, 17u); /* a7 */ - u64 a0 = emu_cpu_xreg(s, 10u); - u64 a1 = emu_cpu_xreg(s, 11u); - u64 a2 = emu_cpu_xreg(s, 12u); - /* a3..a5 reserved for future syscalls. */ - i64 ret = -38; /* -ENOSYS */ - - switch (nr) { - case SYS_exit: - case SYS_exit_group: - emu_cpu_trap_exit(s, (int)(i32)a0); - return; /* don't write a return into a0; the dispatcher exits */ - - case SYS_write: { - /* Bounds-check the buffer through the AS window. The bytes are - * not actually delivered anywhere in v1 — guest stdio is - * silent. Returning a1 lets musl believe the write succeeded - * and continue without spinning. */ - u8* p = emu_cpu_va_to_host_pub(s, a1, a2); - if (!p) { - ret = -14; /* -EFAULT */ - } else { - (void)a0; /* fd ignored */ - ret = (i64)a2; - } - break; - } - - case SYS_read: - /* No stdin in v1; return 0 (EOF) for fd 0, EBADF otherwise. */ - ret = a0 == 0u ? 0 : -9; - break; - - case SYS_close: - ret = 0; - break; - - case SYS_brk: { - u64 req = a0; - u64 cur = emu_cpu_brk_cur(s); - u64 max = emu_cpu_brk_max(s); - if (req == 0) { - ret = (i64)cur; - } else if (req >= cur && req <= max) { - emu_cpu_set_brk_cur(s, req); - ret = (i64)req; - } else { - /* Linux returns the current brk on failure. */ - ret = (i64)cur; - } - break; - } - - case SYS_mmap: { - /* Anonymous, fixed-length mmap is satisfied by allocating from - * the brk window — good enough for malloc bring-up. Any other - * shape returns -ENOMEM so the libc falls back to brk. */ - u64 length = a1; - u64 cur = emu_cpu_brk_cur(s); - u64 max = emu_cpu_brk_max(s); - length = (length + 0xfffu) & ~0xfffull; - if (length == 0 || cur + length > max) { - ret = -12; /* -ENOMEM */ - } else { - u64 base = cur; - emu_cpu_set_brk_cur(s, cur + length); - ret = (i64)base; - } - break; - } - - case SYS_fstat: - /* Stat the guest pointer with a zero'd struct stat. musl reads - * st_mode to learn whether stdout is a tty; clearing the buffer - * makes it look like a regular file. */ - { - u8* p = emu_cpu_va_to_host_pub(s, a1, 128u); - if (!p) { - ret = -14; - } else { - memset(p, 0, 128u); - ret = 0; - } - } - break; - - case SYS_openat: - /* Pretend every open fails with ENOENT so musl returns a sane - * errno to the guest. We do not maintain a guest fd table. */ - ret = -2; - break; - - case SYS_lseek: - /* No-op seek: claim we landed at the requested offset. */ - ret = (i64)a1; - break; - - case SYS_readv: { - /* Iovec array: each entry is {void* iov_base; size_t iov_len}. - * We don't actually populate the buffers (no input source) — just - * return 0 to signal EOF. Validate the iovec footprint so we trap - * on bad pointers. */ - u8* p = emu_cpu_va_to_host_pub(s, a1, a2 * 16u); - if (!p) ret = -14; - else ret = 0; - break; - } - - case SYS_writev: { - /* Sum iov_len across the array. Bytes are silently dropped (same - * as SYS_write today). */ - u8* p = emu_cpu_va_to_host_pub(s, a1, a2 * 16u); - u64 total = 0; - u64 i; - if (!p) { - ret = -14; - break; - } - for (i = 0; i < a2; ++i) { - u64 base = 0, l = 0; - u32 j; - for (j = 0; j < 8u; ++j) base |= ((u64)p[i * 16u + j]) << (8u * j); - for (j = 0; j < 8u; ++j) l |= ((u64)p[i * 16u + 8u + j]) << (8u * j); - (void)base; - total += l; - } - ret = (i64)total; - break; - } +static void emu_syscall_decoded(EmuThread* thread, + const EmuSyscallRequest* req) { + EmuCPUState* s; + EmuProcess* process; + EmuSyscallResult result; + CfreeStatus st; + + s = emu_thread_cpu(thread); + process = thread ? thread->process : NULL; + if (!s || !thread || !process || !req || !process->os || + !process->os->emu_encode_syscall_result || !process->bindings.syscall) { + emu_cpu_trap_fault(s); + return; + } + st = process->bindings.syscall(process->bindings.user, process, thread, req, + &result); + if (st != CFREE_OK) { + emu_cpu_trap_fault(s); + return; + } - case SYS_set_tid_address: - /* No real threads — return a fixed tid. */ - ret = 1; - break; - - case SYS_clock_gettime: { - /* timespec {time_t tv_sec; long tv_nsec}: 16 bytes. We hand back - * zero so guest libc gets a monotonically non-negative value - * without dragging the host clock in. */ - u8* p = emu_cpu_va_to_host_pub(s, a1, 16u); - if (!p) { - ret = -14; - } else { - memset(p, 0, 16u); - ret = 0; - } - break; - } + if (emu_cpu_trap_reason(s) == EMU_TRAP_EXIT) return; + if (!(result.flags & EMU_SYSCALL_RESULT_SKIP_ENCODE)) { + st = process->os->emu_encode_syscall_result(process, thread, &result); + if (st != CFREE_OK) emu_cpu_trap_fault(s); + } +} - case SYS_sched_yield: - ret = 0; - break; - - case SYS_rt_sigaction: - case SYS_rt_sigprocmask: - /* Pretend success; we never deliver signals to the guest. */ - ret = 0; - break; - - case SYS_rt_sigreturn: - /* No signal frame to restore. -ENOSYS is benign. */ - ret = -38; - break; - - case SYS_getpid: - case SYS_getuid: - case SYS_geteuid: - case SYS_getgid: - case SYS_getegid: - /* Stable host-independent identity values. */ - ret = 1; - break; - - default: - ret = -38; - break; +void emu_syscall(EmuThread* thread) { + EmuCPUState* s = emu_thread_cpu(thread); + EmuProcess* process = thread ? thread->process : NULL; + EmuSyscallRequest req; + if (!s || !process || !process->os || !process->os->emu_decode_syscall) { + emu_cpu_trap_fault(s); + return; } + if (process->os->emu_decode_syscall(process, thread, &req) != CFREE_OK) { + emu_cpu_trap_fault(s); + return; + } + emu_syscall_decoded(thread, &req); +} - emu_cpu_set_xreg(s, 10u, (u64)ret); /* a0 */ +u64 emu_syscall_next(EmuThread* thread, u64 next_pc) { + EmuCPUState* s = emu_thread_cpu(thread); + EmuProcess* process = thread ? thread->process : NULL; + EmuSyscallRequest req; + if (!s || !process || !process->os || !process->os->emu_decode_syscall) { + emu_cpu_trap_fault(s); + return next_pc; + } + if (process->os->emu_decode_syscall(process, thread, &req) != CFREE_OK) { + emu_cpu_trap_fault(s); + return next_pc; + } + emu_syscall_decoded(thread, &req); + if (process->os->emu_syscall_next_pc && + emu_cpu_trap_reason(s) == EMU_TRAP_NONE) + return process->os->emu_syscall_next_pc(process, thread, &req, next_pc); + return next_pc; } /* ============================================================ * Extern resolver * ============================================================ - * Called by the linker for any undefined symbol the per-block - * ObjBuilder references. Returns the host VA of the named helper - * (or the running emu's CPUState). Returning NULL surfaces as a - * fatal "undefined reference" diagnostic from link_resolve_extend. */ + * Called by the linker for any undefined symbol the per-block ObjBuilder + * references. Returns the host VA of the named helper or NULL for the + * linker's ordinary undefined-symbol diagnostic. */ void* emu_runtime_extern_resolver(void* user, CfreeSlice name) { + CfreeSlice demangled; if (!name.s) return NULL; + demangled = name; + if (demangled.len > 2u && demangled.s[0] == '_' && demangled.s[1] == '_' && + demangled.s[2] == '_') { + demangled.s++; + demangled.len--; + } - if (cfree_slice_eq_cstr(name, EMU_SYM_CPU_STATE)) { + if (cfree_slice_eq_cstr(demangled, EMU_SYM_CPU_STATE)) { CfreeEmu* e = (CfreeEmu*)user; return (void*)emu_internal_cpu(e); } - if (cfree_slice_eq_cstr(name, EMU_SYM_LOAD8)) return (void*)emu_mem_load8; - if (cfree_slice_eq_cstr(name, EMU_SYM_LOAD16)) return (void*)emu_mem_load16; - if (cfree_slice_eq_cstr(name, EMU_SYM_LOAD32)) return (void*)emu_mem_load32; - if (cfree_slice_eq_cstr(name, EMU_SYM_LOAD64)) return (void*)emu_mem_load64; - if (cfree_slice_eq_cstr(name, EMU_SYM_STORE8)) return (void*)emu_mem_store8; - if (cfree_slice_eq_cstr(name, EMU_SYM_STORE16)) return (void*)emu_mem_store16; - if (cfree_slice_eq_cstr(name, EMU_SYM_STORE32)) return (void*)emu_mem_store32; - if (cfree_slice_eq_cstr(name, EMU_SYM_STORE64)) return (void*)emu_mem_store64; - if (cfree_slice_eq_cstr(name, EMU_SYM_SYSCALL)) return (void*)emu_syscall; + if (cfree_slice_eq_cstr(demangled, EMU_SYM_LOAD8)) + return (void*)emu_mem_load8; + if (cfree_slice_eq_cstr(demangled, EMU_SYM_LOAD16)) + return (void*)emu_mem_load16; + if (cfree_slice_eq_cstr(demangled, EMU_SYM_LOAD32)) + return (void*)emu_mem_load32; + if (cfree_slice_eq_cstr(demangled, EMU_SYM_LOAD64)) + return (void*)emu_mem_load64; + if (cfree_slice_eq_cstr(demangled, EMU_SYM_LOAD8_CHECKED)) + return (void*)emu_mem_load8_checked; + if (cfree_slice_eq_cstr(demangled, EMU_SYM_LOAD16_CHECKED)) + return (void*)emu_mem_load16_checked; + if (cfree_slice_eq_cstr(demangled, EMU_SYM_LOAD32_CHECKED)) + return (void*)emu_mem_load32_checked; + if (cfree_slice_eq_cstr(demangled, EMU_SYM_LOAD64_CHECKED)) + return (void*)emu_mem_load64_checked; + if (cfree_slice_eq_cstr(demangled, EMU_SYM_STORE8)) + return (void*)emu_mem_store8; + if (cfree_slice_eq_cstr(demangled, EMU_SYM_STORE16)) + return (void*)emu_mem_store16; + if (cfree_slice_eq_cstr(demangled, EMU_SYM_STORE32)) + return (void*)emu_mem_store32; + if (cfree_slice_eq_cstr(demangled, EMU_SYM_STORE64)) + return (void*)emu_mem_store64; + if (cfree_slice_eq_cstr(demangled, EMU_SYM_SYSCALL)) + return (void*)emu_syscall_next; + { + CfreeEmu* e = (CfreeEmu*)user; + EmuProcess* process = emu_internal_process(e); + if (process && process->arch && process->arch->emu && + process->arch->emu->resolve_runtime_helper) { + void* p = process->arch->emu->resolve_runtime_helper(user, demangled); + if (p) return p; + } + } /* EMU_SYM_DISPATCH is the cross-block tail-call helper; it shares * the host address of the dispatcher entry. The dispatcher loop @@ -514,7 +452,6 @@ void* emu_runtime_extern_resolver(void* user, CfreeSlice name) { * a return-of-next_pc instead of a real call here. v1 returns * NULL — lifters that don't yet emit DISPATCH calls are fine. */ - (void)no_loc; return NULL; } @@ -530,3 +467,8 @@ void emu_trace_block(Compiler* c, u64 pc) { (void)c; (void)pc; } +void emu_trace_insn(Compiler* c, u64 guest_pc, const CfreeDecodedInsn* insn) { + (void)c; + (void)guest_pc; + (void)insn; +} diff --git a/src/emu/rv64_ops.h b/src/emu/rv64_ops.h @@ -1,241 +0,0 @@ -/* RV64 op enum for the emulator decoder + interpreter. - * - * The decoder (src/emu/decode.c) writes one of these values into - * EmuInst.op for each instruction. The interpreter (cpu.c) and the - * eventual JIT lifter (lift.c) consume the enum to drive a switch. - * - * Coverage: RV64I + RV64M + RV32F + RV32D + RV64A + RVC (C extension) - * + Zicsr-minimal (fcsr/frm/fflags). FCVT/FSGNJ/FMIN/FMAX/FMADD/FMSUB - * families are wired alongside basic FP ops. */ -#ifndef CFREE_EMU_RV64_OPS_H -#define CFREE_EMU_RV64_OPS_H - -typedef enum Rv64Op { - RV64_OP_ILLEGAL = 0, - RV64_OP_NOP, - - /* U-type */ - RV64_OP_LUI, - RV64_OP_AUIPC, - - /* Jumps */ - RV64_OP_JAL, - RV64_OP_JALR, - - /* Branches */ - RV64_OP_BEQ, - RV64_OP_BNE, - RV64_OP_BLT, - RV64_OP_BGE, - RV64_OP_BLTU, - RV64_OP_BGEU, - - /* Loads */ - RV64_OP_LB, - RV64_OP_LH, - RV64_OP_LW, - RV64_OP_LD, - RV64_OP_LBU, - RV64_OP_LHU, - RV64_OP_LWU, - - /* Stores */ - RV64_OP_SB, - RV64_OP_SH, - RV64_OP_SW, - RV64_OP_SD, - - /* ALU immediate */ - RV64_OP_ADDI, - RV64_OP_SLTI, - RV64_OP_SLTIU, - RV64_OP_XORI, - RV64_OP_ORI, - RV64_OP_ANDI, - RV64_OP_SLLI, - RV64_OP_SRLI, - RV64_OP_SRAI, - - /* ALU register */ - RV64_OP_ADD, - RV64_OP_SUB, - RV64_OP_SLL, - RV64_OP_SLT, - RV64_OP_SLTU, - RV64_OP_XOR, - RV64_OP_SRL, - RV64_OP_SRA, - RV64_OP_OR, - RV64_OP_AND, - - /* W-form (RV64-only) */ - RV64_OP_ADDIW, - RV64_OP_SLLIW, - RV64_OP_SRLIW, - RV64_OP_SRAIW, - RV64_OP_ADDW, - RV64_OP_SUBW, - RV64_OP_SLLW, - RV64_OP_SRLW, - RV64_OP_SRAW, - - /* M extension */ - RV64_OP_MUL, - RV64_OP_MULH, - RV64_OP_MULHSU, - RV64_OP_MULHU, - RV64_OP_DIV, - RV64_OP_DIVU, - RV64_OP_REM, - RV64_OP_REMU, - RV64_OP_MULW, - RV64_OP_DIVW, - RV64_OP_DIVUW, - RV64_OP_REMW, - RV64_OP_REMUW, - - /* F / D loads & stores */ - RV64_OP_FLW, - RV64_OP_FLD, - RV64_OP_FSW, - RV64_OP_FSD, - - /* FP arithmetic */ - RV64_OP_FADD_S, - RV64_OP_FSUB_S, - RV64_OP_FMUL_S, - RV64_OP_FDIV_S, - RV64_OP_FADD_D, - RV64_OP_FSUB_D, - RV64_OP_FMUL_D, - RV64_OP_FDIV_D, - - /* FP compares */ - RV64_OP_FEQ_S, - RV64_OP_FLT_S, - RV64_OP_FLE_S, - RV64_OP_FEQ_D, - RV64_OP_FLT_D, - RV64_OP_FLE_D, - - /* FP bitcasts */ - RV64_OP_FMV_X_W, - RV64_OP_FMV_W_X, - RV64_OP_FMV_X_D, - RV64_OP_FMV_D_X, - - /* A extension */ - RV64_OP_LR_W, - RV64_OP_LR_D, - RV64_OP_SC_W, - RV64_OP_SC_D, - RV64_OP_AMOSWAP_W, - RV64_OP_AMOADD_W, - RV64_OP_AMOXOR_W, - RV64_OP_AMOAND_W, - RV64_OP_AMOOR_W, - RV64_OP_AMOMIN_W, - RV64_OP_AMOMAX_W, - RV64_OP_AMOMINU_W, - RV64_OP_AMOMAXU_W, - RV64_OP_AMOSWAP_D, - RV64_OP_AMOADD_D, - RV64_OP_AMOXOR_D, - RV64_OP_AMOAND_D, - RV64_OP_AMOOR_D, - RV64_OP_AMOMIN_D, - RV64_OP_AMOMAX_D, - RV64_OP_AMOMINU_D, - RV64_OP_AMOMAXU_D, - - /* FP sign-injection (S/D) */ - RV64_OP_FSGNJ_S, - RV64_OP_FSGNJN_S, - RV64_OP_FSGNJX_S, - RV64_OP_FSGNJ_D, - RV64_OP_FSGNJN_D, - RV64_OP_FSGNJX_D, - - /* FP min/max */ - RV64_OP_FMIN_S, - RV64_OP_FMAX_S, - RV64_OP_FMIN_D, - RV64_OP_FMAX_D, - - /* FP sqrt */ - RV64_OP_FSQRT_S, - RV64_OP_FSQRT_D, - - /* FP conversions: int<->fp (S = single, D = double) */ - RV64_OP_FCVT_W_S, - RV64_OP_FCVT_WU_S, - RV64_OP_FCVT_L_S, - RV64_OP_FCVT_LU_S, - RV64_OP_FCVT_S_W, - RV64_OP_FCVT_S_WU, - RV64_OP_FCVT_S_L, - RV64_OP_FCVT_S_LU, - RV64_OP_FCVT_W_D, - RV64_OP_FCVT_WU_D, - RV64_OP_FCVT_L_D, - RV64_OP_FCVT_LU_D, - RV64_OP_FCVT_D_W, - RV64_OP_FCVT_D_WU, - RV64_OP_FCVT_D_L, - RV64_OP_FCVT_D_LU, - /* Single<->double */ - RV64_OP_FCVT_S_D, - RV64_OP_FCVT_D_S, - - /* FP classify */ - RV64_OP_FCLASS_S, - RV64_OP_FCLASS_D, - - /* Fused multiply-add (R4-type). rs3 is encoded in aux. */ - RV64_OP_FMADD_S, - RV64_OP_FMSUB_S, - RV64_OP_FNMSUB_S, - RV64_OP_FNMADD_S, - RV64_OP_FMADD_D, - RV64_OP_FMSUB_D, - RV64_OP_FNMSUB_D, - RV64_OP_FNMADD_D, - - /* Zicsr — CSR access. The immediate value carries the CSR index; - * funct3 distinguishes the variant. */ - RV64_OP_CSRRW, - RV64_OP_CSRRS, - RV64_OP_CSRRC, - RV64_OP_CSRRWI, - RV64_OP_CSRRSI, - RV64_OP_CSRRCI, - - /* System / misc */ - RV64_OP_ECALL, - RV64_OP_EBREAK, - RV64_OP_FENCE, -} Rv64Op; - -/* EmuInst.flags bits */ -#define RV64_INST_FLAG_TERMINATOR 0x1u - -/* Internal: extra accessors used by elf_load + runtime + syscall layer. */ -struct EmuCPUState; -void emu_cpu_attach_mem(struct EmuCPUState*, unsigned char* base, u64 va_base, - u64 size, u64 brk_cur, u64 brk_max); -unsigned char* emu_cpu_guest_base(const struct EmuCPUState*); -u64 emu_cpu_guest_va_base(const struct EmuCPUState*); -u64 emu_cpu_guest_size(const struct EmuCPUState*); -unsigned char* emu_cpu_va_to_host_pub(struct EmuCPUState*, u64 va, u64 nbytes); -u64 emu_cpu_xreg(const struct EmuCPUState*, u32 i); -void emu_cpu_set_xreg(struct EmuCPUState*, u32 i, u64 v); -u64 emu_cpu_brk_cur(const struct EmuCPUState*); -u64 emu_cpu_brk_max(const struct EmuCPUState*); -void emu_cpu_set_brk_cur(struct EmuCPUState*, u64 v); -void emu_cpu_trap_exit(struct EmuCPUState*, int code); -void emu_cpu_trap_fault(struct EmuCPUState*); - -/* Interpreter entry; emu_decode_block produced the EmuInsts. */ -u32 emu_cpu_interp_block(struct EmuCPUState*, const EmuInst* insts, u32 n); - -#endif diff --git a/src/emu/signal.c b/src/emu/signal.c @@ -0,0 +1,12 @@ +#include "emu/emu.h" + +CfreeStatus emu_fault_deliver(EmuProcess* process, EmuThread* thread, + const EmuFaultEvent* ev, u64* next_pc_out) { + if (!process || !thread || !ev || !next_pc_out) return CFREE_INVALID; + if (!process->os || !process->os->emu_deliver_fault) { + emu_cpu_trap_fault(emu_thread_cpu(thread)); + *next_pc_out = ev->pc ? ev->pc : ev->next_pc; + return CFREE_OK; + } + return process->os->emu_deliver_fault(process, thread, ev, next_pc_out); +} diff --git a/src/emu/tls.c b/src/emu/tls.c @@ -0,0 +1,121 @@ +#include <string.h> + +#include "emu/emu.h" + +static u64 round_up(u64 v, u64 a) { + if (!a) return v; + return (v + a - 1u) & ~(a - 1u); +} + +static CfreeStatus ensure_tls_module_cap(Compiler* c, EmuTlsState* st, + u32 need) { + Heap* heap = c->ctx->heap; + u32 old_cap; + u32 new_cap; + EmuTlsModule* grown; + if (st->modules_cap >= need) return CFREE_OK; + old_cap = st->modules_cap; + new_cap = old_cap ? old_cap * 2u : 4u; + while (new_cap < need) new_cap *= 2u; + grown = (EmuTlsModule*)heap->realloc( + heap, st->modules, sizeof(*st->modules) * old_cap, + sizeof(*st->modules) * new_cap, _Alignof(EmuTlsModule)); + if (!grown) return CFREE_NOMEM; + memset(grown + old_cap, 0, sizeof(*grown) * (new_cap - old_cap)); + st->modules = grown; + st->modules_cap = new_cap; + return CFREE_OK; +} + +static CfreeStatus ensure_tls_block_cap(Compiler* c, EmuTlsBlocks* blocks, + u32 need) { + Heap* heap = c->ctx->heap; + u32 old_cap; + u32 new_cap; + EmuTlsBlock* grown; + if (blocks->blocks_cap >= need) return CFREE_OK; + old_cap = blocks->blocks_cap; + new_cap = old_cap ? old_cap * 2u : 4u; + while (new_cap < need) new_cap *= 2u; + grown = (EmuTlsBlock*)heap->realloc( + heap, blocks->blocks, sizeof(*blocks->blocks) * old_cap, + sizeof(*blocks->blocks) * new_cap, _Alignof(EmuTlsBlock)); + if (!grown) return CFREE_NOMEM; + memset(grown + old_cap, 0, sizeof(*grown) * (new_cap - old_cap)); + blocks->blocks = grown; + blocks->blocks_cap = new_cap; + return CFREE_OK; +} + +CfreeStatus emu_tls_rebuild_modules(Compiler* c, EmuProcess* process) { + EmuTlsState* st; + u32 i; + u32 next_id = 1u; + if (!c || !process) return CFREE_INVALID; + st = &process->tls_state; + st->nmodules = 0; + st->static_size = 0; + st->static_align = 1u; + for (i = 0; i < process->image.link_map.nobjects; ++i) { + EmuTlsModule m = process->image.link_map.objects[i].tls; + u64 align; + if (!m.memsz && !m.filesz) continue; + if (ensure_tls_module_cap(c, st, st->nmodules + 1u) != CFREE_OK) + return CFREE_NOMEM; + m.module_id = next_id++; + process->image.link_map.objects[i].tls.module_id = m.module_id; + st->modules[st->nmodules++] = m; + align = m.align ? m.align : 1u; + if (align > st->static_align) st->static_align = align; + st->static_size = round_up(st->static_size, align) + m.memsz; + } + return CFREE_OK; +} + +CfreeStatus emu_tls_blocks_add(Compiler* c, EmuTlsBlocks* blocks, + u32 module_id, u64 base, u64 memsz) { + EmuTlsBlock* b; + if (!c || !blocks || !module_id) return CFREE_INVALID; + if (ensure_tls_block_cap(c, blocks, blocks->nblocks + 1u) != CFREE_OK) + return CFREE_NOMEM; + b = &blocks->blocks[blocks->nblocks++]; + b->module_id = module_id; + b->base = base; + b->memsz = memsz; + return CFREE_OK; +} + +CfreeStatus emu_tls_copy_module_image(EmuProcess* process, + const EmuTlsModule* module, u64 base) { + u8* src; + if (!process || !module) return CFREE_INVALID; + if (!module->filesz) return CFREE_OK; + src = emu_addr_space_ptr(&process->image.addr_space, module->image_vaddr, + module->filesz, EMU_MEM_READ); + if (!src) return CFREE_ERR; + return emu_addr_space_copy_in(&process->image.addr_space, base, src, + module->filesz); +} + +void emu_tls_destroy_process(Compiler* c, EmuProcess* process) { + Heap* heap; + if (!c || !process) return; + heap = c->ctx->heap; + if (process->tls_state.modules) { + heap->free(heap, process->tls_state.modules, + sizeof(*process->tls_state.modules) * + process->tls_state.modules_cap); + } + memset(&process->tls_state, 0, sizeof(process->tls_state)); +} + +void emu_tls_destroy_blocks(Compiler* c, EmuTlsBlocks* blocks) { + Heap* heap; + if (!c || !blocks) return; + heap = c->ctx->heap; + if (blocks->blocks) { + heap->free(heap, blocks->blocks, + sizeof(*blocks->blocks) * blocks->blocks_cap); + } + memset(blocks, 0, sizeof(*blocks)); +} diff --git a/src/link/link_reloc.c b/src/link/link_reloc.c @@ -1,516 +0,0 @@ -/* Per-arch relocation application. - * - * Pure function: takes the resolved final addresses (S, P) and the - * addend (A), and patches `width` bytes at the relocation site. - * Callers (link_emit_elf, cfree_jit_from_image) compute the - * runtime base offset themselves; this routine sees only final values. - * - * Encoding references: - * AArch64: ARM ARMv8-A "ELF for the ARM 64-bit Architecture (AArch64)" - * §5.7 (relocation types). - * RISC-V: "RISC-V ELF psABI specification" §3 (relocation types) and - * "The RISC-V Instruction Set Manual, Volume I" Chapter 19 - * (instruction encodings). Reloc semantics live behind the - * R_RV_* RelocKind values; LO12_S sits at the S-type imm - * slots, LO12_I at I-type, and BRANCH/JAL at B/J-type. */ - -#include <string.h> - -#include "core/bytes.h" -#include "link/link_internal.h" - -static SrcLoc no_loc(void) { - SrcLoc l = {0, 0, 0}; - return l; -} - -void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A, - u64 P) { - switch (k) { - case R_ABS32: - case R_X64_32S: - case R_X64_TPOFF32: { - /* All three write a 32-bit value at the site. ABS32 / _32S - * take an absolute (unsigned / sign-extended) symbol address; - * TPOFF32 takes the (caller-precomputed) TP-relative offset. - * At the byte level the encoding is identical. */ - u64 v = S + (u64)A; - wr_u32_le(P_bytes, (u32)(v & 0xffffffffu)); - return; - } - case R_ABS64: - case R_X64_TPOFF64: - case R_X64_RELATIVE: { - /* R_X64_RELATIVE: (S + A) — for static-with-relocs paths the - * linker writes the relocated value directly; the dynamic - * loader would otherwise do the same fixup at load time. */ - u64 v = S + (u64)A; - wr_u64_le(P_bytes, v); - return; - } - case R_X64_GLOB_DAT: - case R_X64_JUMP_SLOT: { - /* Dynamic linker normally applies these; for static-with-relocs - * paths we write the resolved symbol value (S) into the GOT/PLT - * slot. Addend is unused per the x86_64 psABI. */ - wr_u64_le(P_bytes, S); - return; - } - case R_X64_COPY: - compiler_panic(c, no_loc(), - "link: R_X64_COPY belongs in dynamic loader, " - "not static link"); - return; - case R_REL32: - case R_PC32: - case R_X64_PLT32: - case R_X64_GOTPCREL: - case R_X64_GOTPCRELX: - case R_X64_REX_GOTPCRELX: - case R_X64_GOTPC32: { - /* AArch64 ELF: PREL32 maps to either of these; both encode a - * 32-bit signed PC-relative displacement. The cfree-canonical - * distinction (section-relative vs PC-relative) collapses on - * AArch64 because the linker resolves to absolute vaddrs. - * - * x86_64 PLT32: in a static link there is no PLT, so the - * displacement collapses to a plain 32-bit PC-relative call. */ - i64 v = (i64)S + A - (i64)P; - wr_u32_le(P_bytes, (u32)((u64)v & 0xffffffffu)); - return; - } - case R_X64_PC8: { - i64 v = (i64)S + A - (i64)P; - if (v < -128 || v > 127) - compiler_panic(c, no_loc(), "link: X64_PC8 out of range"); - P_bytes[0] = (u8)((u64)v & 0xffu); - return; - } - case R_REL64: - case R_PC64: { - /* 64-bit PC-relative; AArch64 R_AARCH64_PREL64. Used by - * `.quad sym1 - sym2` style symbol-difference encodings (e.g. - * the arm64 kernel image_size header field). */ - i64 v = (i64)S + A - (i64)P; - wr_u64_le(P_bytes, (u64)v); - return; - } - case R_AARCH64_ABS16: { - u64 v = S + (u64)A; - wr_u16_le(P_bytes, (u16)(v & 0xffffu)); - return; - } - case R_AARCH64_PREL16: { - i64 v = (i64)S + A - (i64)P; - wr_u16_le(P_bytes, (u16)((u64)v & 0xffffu)); - return; - } - case R_AARCH64_CONDBR19: - case R_AARCH64_LD_PREL_LO19: { - /* B.cond / CB(N)Z / LDR (literal) — imm19 in 4-byte units, - * signed, at bits [23:5]. Range: ±1MiB. */ - i64 disp = (i64)S + A - (i64)P; - u32 instr; - u32 imm19; - if (disp & 3) - compiler_panic(c, no_loc(), - "link: imm19 reloc misaligned displacement"); - if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20)) - compiler_panic(c, no_loc(), - "link: imm19 reloc out of range (need ±1MiB)"); - imm19 = (u32)((disp >> 2) & 0x7ffffu); - instr = rd_u32_le(P_bytes); - instr = (instr & ~(0x7ffffu << 5)) | (imm19 << 5); - wr_u32_le(P_bytes, instr); - return; - } - case R_AARCH64_TSTBR14: { - /* TBZ/TBNZ — imm14 in 4-byte units, signed, at bits [18:5]. - * Range: ±32KiB. */ - i64 disp = (i64)S + A - (i64)P; - u32 instr; - u32 imm14; - if (disp & 3) - compiler_panic(c, no_loc(), "link: TSTBR14 misaligned displacement"); - if (disp < -(i64)(1 << 15) || disp >= (i64)(1 << 15)) - compiler_panic(c, no_loc(), "link: TSTBR14 out of range (need ±32KiB)"); - imm14 = (u32)((disp >> 2) & 0x3fffu); - instr = rd_u32_le(P_bytes); - instr = (instr & ~(0x3fffu << 5)) | (imm14 << 5); - wr_u32_le(P_bytes, instr); - return; - } - case R_AARCH64_ADR_PREL_LO21: { - /* ADR — byte-granularity imm21, encoded as immlo[30:29] + - * immhi[23:5]. No 12-bit shift (unlike ADRP). Range: ±1MiB. */ - i64 disp = (i64)S + A - (i64)P; - u32 instr; - u32 immlo, immhi; - if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20)) - compiler_panic(c, no_loc(), - "link: ADR_PREL_LO21 out of range (need ±1MiB)"); - immlo = (u32)(disp & 0x3u); - immhi = (u32)((disp >> 2) & 0x7ffffu); - instr = rd_u32_le(P_bytes); - instr = (instr & 0x9f00001fu) | (immlo << 29) | (immhi << 5); - wr_u32_le(P_bytes, instr); - return; - } - case R_AARCH64_JUMP26: - case R_AARCH64_CALL26: { - /* B/BL imm26 — branch displacement in 4-byte units, signed. - * Clear bits [25:0] of the existing instruction and OR in the - * new imm26. Range check: ±128MiB. */ - i64 disp = (i64)S + A - (i64)P; - u32 instr; - u32 imm26; - if (disp & 3) - compiler_panic(c, no_loc(), "link: CALL26 misaligned displacement"); - if (disp < -(i64)(1 << 27) || disp >= (i64)(1 << 27)) - compiler_panic(c, no_loc(), "link: CALL26 out of range (need ±128MiB)"); - imm26 = (u32)((disp >> 2) & 0x3ffffffu); - instr = rd_u32_le(P_bytes); - instr = (instr & 0xfc000000u) | imm26; - wr_u32_le(P_bytes, instr); - return; - } - case R_AARCH64_TLVP_LOAD_PAGE21: - case R_AARCH64_ADR_GOT_PAGE: - case R_AARCH64_ADR_PREL_PG_HI21: - case R_AARCH64_ADR_PREL_PG_HI21_NC: { - /* ADRP — page-relative imm21, encoded as immlo[30:29] + - * immhi[23:5]. Effective immediate is (S+A) page minus P page, - * shifted right by 12, sign-extended to 33 bits. _NC variant - * skips the range check (compiler asserts it can't overflow, - * e.g. when paired with explicit page bracketing). */ - i64 page_s = ((i64)S + A) & ~(i64)0xfff; - i64 page_p = (i64)P & ~(i64)0xfff; - i64 disp = page_s - page_p; - i64 imm21 = disp >> 12; - u32 instr; - u32 immlo, immhi; - if (k != R_AARCH64_ADR_PREL_PG_HI21_NC && - (imm21 < -(i64)(1 << 20) || imm21 >= (i64)(1 << 20))) - compiler_panic(c, no_loc(), - "link: ADR_PREL_PG_HI21 out of range (need ±4GiB)"); - immlo = (u32)(imm21 & 0x3u); - immhi = (u32)((imm21 >> 2) & 0x7ffffu); - instr = rd_u32_le(P_bytes); - instr = (instr & 0x9f00001fu) | (immlo << 29) | (immhi << 5); - wr_u32_le(P_bytes, instr); - return; - } - case R_AARCH64_ADD_ABS_LO12_NC: { - /* ADD (immediate) imm12 at bits [21:10]. NC = no overflow check. */ - u64 v = ((u64)S + (u64)A) & 0xfffu; - u32 instr = rd_u32_le(P_bytes); - instr = (instr & ~(0xfffu << 10)) | ((u32)v << 10); - wr_u32_le(P_bytes, instr); - return; - } - case R_AARCH64_TLSLE_ADD_TPREL_HI12: - case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: { - /* AArch64 TLS local-exec. Caller passes S already as the - * TP-relative offset (target's image offset minus the TLS - * image base, plus the 16-byte AArch64 TCB). HI12 takes - * bits 23:12, LO12_NC takes bits 11:0; both encoded as - * imm12 at instruction bits [21:10] of an ADD (immediate). - * The HI12 form's instruction carries LSL #12 in its opcode, - * so bits 11:0 of the operand naturally land at scale 4096. */ - u64 v = (u64)((i64)S + A); - u32 imm12 = (k == R_AARCH64_TLSLE_ADD_TPREL_HI12) - ? (u32)((v >> 12) & 0xfffu) - : (u32)(v & 0xfffu); - u32 instr = rd_u32_le(P_bytes); - instr = (instr & ~(0xfffu << 10)) | (imm12 << 10); - wr_u32_le(P_bytes, instr); - return; - } - case R_AARCH64_LDST8_ABS_LO12_NC: - case R_AARCH64_LDST16_ABS_LO12_NC: - case R_AARCH64_LDST32_ABS_LO12_NC: - case R_AARCH64_LDST64_ABS_LO12_NC: - case R_AARCH64_LDST128_ABS_LO12_NC: - case R_AARCH64_LD64_GOT_LO12_NC: - case R_AARCH64_TLVP_LOAD_PAGEOFF12: { - /* LDR/STR with imm12 at bits [21:10]; the imm is scaled by the - * access size, so we right-shift the low 12 bits of (S+A) by - * the size scale before encoding. NC = no overflow check. - * - * LD64_GOT_LO12_NC has the same encoding as LDST64_ABS_LO12_NC; - * the linker has already redirected `S` to the GOT slot. */ - u32 shift = (k == R_AARCH64_LDST8_ABS_LO12_NC) ? 0u - : (k == R_AARCH64_LDST16_ABS_LO12_NC) ? 1u - : (k == R_AARCH64_LDST32_ABS_LO12_NC) ? 2u - : (k == R_AARCH64_LDST64_ABS_LO12_NC || - k == R_AARCH64_LD64_GOT_LO12_NC || - k == R_AARCH64_TLVP_LOAD_PAGEOFF12) - ? 3u - : 4u; - u64 lo12 = ((u64)S + (u64)A) & 0xfffu; - u64 imm12 = lo12 >> shift; - u32 instr = rd_u32_le(P_bytes); - if (lo12 & ((1u << shift) - 1u)) - compiler_panic(c, no_loc(), - "link: LDST%u_ABS_LO12_NC misaligned address " - "(kind=%u S=0x%llx A=%lld P=0x%llx)", - 1u << (3 + shift), (unsigned)k, - (unsigned long long)S, (long long)A, - (unsigned long long)P); - instr = (instr & ~(0xfffu << 10)) | ((u32)(imm12 & 0xfffu) << 10); - wr_u32_le(P_bytes, instr); - return; - } - case R_RV_HI20: - case R_RV_TPREL_HI20: { - /* U-type (LUI/AUIPC) imm[31:12] = high 20 bits of (S + A + 0x800). - * The 0x800 bias compensates the sign-extension of the paired - * 12-bit ADDI/load/store immediate, so HI20 + signext12(LO12) - * reconstructs the full value. */ - i64 v = (i64)S + A; - u32 hi20 = (u32)(((u64)(v + 0x800)) >> 12) & 0xfffffu; - u32 instr = rd_u32_le(P_bytes); - instr = (instr & 0x00000fffu) | (hi20 << 12); - wr_u32_le(P_bytes, instr); - return; - } - case R_RV_PCREL_HI20: - case R_RV_GOT_HI20: { - /* AUIPC pc-relative HI20: same encoding as HI20 but the - * displacement is (S + A) - P. The paired PCREL_LO12 reloc at - * the ADDI/load below recovers the low 12 bits of the same - * displacement via a lookup keyed on this AUIPC's site vaddr. - * GOT_HI20 collapses to PCREL_HI20 in static-link with no - * indirection: the symbol resolves to its own address. */ - i64 disp = (i64)S + A - (i64)P; - u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu; - u32 instr = rd_u32_le(P_bytes); - instr = (instr & 0x00000fffu) | (hi20 << 12); - wr_u32_le(P_bytes, instr); - return; - } - case R_RV_LO12_I: - case R_RV_TPREL_LO12_I: { - /* I-type imm[11:0] in instruction bits [31:20]. Low 12 bits of - * (S + A); the sign-extension at execute time pairs with HI20's - * 0x800 bias to reconstruct the full address. */ - u64 v = (u64)((i64)S + A); - u32 lo12 = (u32)(v & 0xfffu); - u32 instr = rd_u32_le(P_bytes); - instr = (instr & 0x000fffffu) | (lo12 << 20); - wr_u32_le(P_bytes, instr); - return; - } - case R_RV_LO12_S: - case R_RV_TPREL_LO12_S: { - /* S-type imm[11:5] in bits [31:25], imm[4:0] in bits [11:7]. */ - u64 v = (u64)((i64)S + A); - u32 lo12 = (u32)(v & 0xfffu); - u32 instr = rd_u32_le(P_bytes); - instr = (instr & 0x01fff07fu) | ((lo12 & 0xfe0u) << 20) | - ((lo12 & 0x1fu) << 7); - wr_u32_le(P_bytes, instr); - return; - } - case R_RV_BRANCH: { - /* B-type 12-bit signed displacement in 2-byte units (13-bit - * range). imm[12] in bit 31, imm[10:5] in 30:25, imm[4:1] in - * 11:8, imm[11] in bit 7. */ - i64 disp = (i64)S + A - (i64)P; - u32 instr; - u32 b; - if (disp & 1) - compiler_panic(c, no_loc(), "link: RV BRANCH misaligned displacement"); - if (disp < -(i64)(1 << 12) || disp >= (i64)(1 << 12)) - compiler_panic(c, no_loc(), "link: RV BRANCH out of range (need ±4KiB)"); - b = (u32)((u64)disp & 0x1ffeu) | ((u32)(((u64)disp >> 11) & 1u) << 11) | - ((u32)(((u64)disp >> 12) & 1u) << 12); - instr = rd_u32_le(P_bytes); - instr &= 0x01fff07fu; - instr |= ((b >> 12) & 1u) << 31; - instr |= ((b >> 5) & 0x3fu) << 25; - instr |= ((b >> 1) & 0xfu) << 8; - instr |= ((b >> 11) & 1u) << 7; - wr_u32_le(P_bytes, instr); - return; - } - case R_RV_JAL: { - /* J-type 20-bit signed displacement in 2-byte units (21-bit - * range). imm[20] in bit 31, imm[10:1] in 30:21, imm[11] in bit - * 20, imm[19:12] in bits 19:12. */ - i64 disp = (i64)S + A - (i64)P; - u32 instr; - u32 b; - if (disp & 1) - compiler_panic(c, no_loc(), "link: RV JAL misaligned displacement"); - if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20)) - compiler_panic(c, no_loc(), "link: RV JAL out of range (need ±1MiB)"); - b = (u32)((u64)disp & 0x1ffffeu) | - ((u32)(((u64)disp >> 11) & 1u) << 11) | - ((u32)(((u64)disp >> 20) & 1u) << 20); - instr = rd_u32_le(P_bytes); - instr &= 0x00000fffu; - instr |= ((b >> 20) & 1u) << 31; - instr |= ((b >> 1) & 0x3ffu) << 21; - instr |= ((b >> 11) & 1u) << 20; - instr |= ((b >> 12) & 0xffu) << 12; - wr_u32_le(P_bytes, instr); - return; - } - case R_RV_CALL: - case R_PLT32: { - /* AUIPC + JALR pair encoding the same 32-bit signed PC-relative - * displacement. AUIPC at P, JALR at P+4. The 0x800 bias on the - * AUIPC immediate compensates JALR's signed 12-bit imm so that - * (auipc_imm << 12) + signext12(jalr_imm) == disp. - * - * R_PLT32 is the cfree-canonical RelocKind that - * elf_riscv64_reloc_from(R_RISCV_CALL_PLT) maps to; static-link - * with no PLT collapses CALL_PLT to a direct CALL (no - * indirection). */ - i64 disp = (i64)S + A - (i64)P; - u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu; - u32 lo12 = (u32)((u64)disp & 0xfffu); - u32 auipc = rd_u32_le(P_bytes); - u32 jalr = rd_u32_le(P_bytes + 4); - if (disp < -(i64)(1ll << 31) || disp >= (i64)(1ll << 31)) - compiler_panic(c, no_loc(), "link: RV CALL out of range (need ±2GiB)"); - auipc = (auipc & 0x00000fffu) | (hi20 << 12); - jalr = (jalr & 0x000fffffu) | (lo12 << 20); - wr_u32_le(P_bytes, auipc); - wr_u32_le(P_bytes + 4, jalr); - return; - } - case R_RV_RVC_BRANCH: { - /* CB-type 8-bit signed displacement in 2-byte units (9-bit - * range). c.beqz / c.bnez. Encoding (16-bit instruction): - * bit 12 = imm[8] - * bits 11:10 = imm[4:3] - * bits 9:7 = rs1' (untouched) - * bits 6:5 = imm[7:6] - * bits 4:3 = imm[2:1] - * bit 2 = imm[5] */ - i64 disp = (i64)S + A - (i64)P; - u16 instr = (u16)(P_bytes[0] | ((u16)P_bytes[1] << 8)); - u32 b; - if (disp & 1) - compiler_panic(c, no_loc(), - "link: RV RVC_BRANCH misaligned displacement"); - if (disp < -(i64)(1 << 8) || disp >= (i64)(1 << 8)) - compiler_panic(c, no_loc(), - "link: RV RVC_BRANCH out of range (need ±256B)"); - b = (u32)((u64)disp & 0x1feu); - instr = (u16)(instr & 0xe383u); - instr = (u16)(instr | (((b >> 8) & 1u) << 12)); - instr = (u16)(instr | (((b >> 3) & 3u) << 10)); - instr = (u16)(instr | (((b >> 6) & 3u) << 5)); - instr = (u16)(instr | (((b >> 1) & 3u) << 3)); - instr = (u16)(instr | (((b >> 5) & 1u) << 2)); - P_bytes[0] = (u8)(instr & 0xffu); - P_bytes[1] = (u8)((instr >> 8) & 0xffu); - return; - } - case R_RV_RVC_JUMP: { - /* CJ-type 11-bit signed displacement in 2-byte units (12-bit - * range). c.j / c.jal. Encoding bits in the 16-bit instruction: - * 12=imm[11], 11=imm[4], 10:9=imm[9:8], 8=imm[10], - * 7=imm[6], 6=imm[7], 5:3=imm[3:1], 2=imm[5]. */ - i64 disp = (i64)S + A - (i64)P; - u16 instr = (u16)(P_bytes[0] | ((u16)P_bytes[1] << 8)); - u32 b; - if (disp & 1) - compiler_panic(c, no_loc(), - "link: RV RVC_JUMP misaligned displacement"); - if (disp < -(i64)(1 << 11) || disp >= (i64)(1 << 11)) - compiler_panic(c, no_loc(), - "link: RV RVC_JUMP out of range (need ±2KiB)"); - b = (u32)((u64)disp & 0xffeu); - instr = (u16)(instr & 0xe003u); - instr = (u16)(instr | (((b >> 11) & 1u) << 12)); - instr = (u16)(instr | (((b >> 4) & 1u) << 11)); - instr = (u16)(instr | (((b >> 8) & 3u) << 9)); - instr = (u16)(instr | (((b >> 10) & 1u) << 8)); - instr = (u16)(instr | (((b >> 6) & 1u) << 7)); - instr = (u16)(instr | (((b >> 7) & 1u) << 6)); - instr = (u16)(instr | (((b >> 1) & 7u) << 3)); - instr = (u16)(instr | (((b >> 5) & 1u) << 2)); - P_bytes[0] = (u8)(instr & 0xffu); - P_bytes[1] = (u8)((instr >> 8) & 0xffu); - return; - } - case R_RV_RELAX: - case R_RV_TPREL_ADD: - /* Marker relocs only — RELAX permits the prior reloc to be - * compressed, TPREL_ADD annotates a TLS thread-pointer ADD that - * the linker may fold during relaxation. We don't relax, so - * both are no-ops. */ - return; - case R_RV_ADD8: { - /* word8 += S + A. Used (paired with a SUB8 against another sym - * at the same site) to encode symbol differences. */ - u8 cur = P_bytes[0]; - P_bytes[0] = (u8)(cur + (u8)((S + (u64)A) & 0xffu)); - return; - } - case R_RV_SUB8: { - u8 cur = P_bytes[0]; - P_bytes[0] = (u8)(cur - (u8)((S + (u64)A) & 0xffu)); - return; - } - case R_RV_ADD16: { - u16 cur = rd_u16_le(P_bytes); - wr_u16_le(P_bytes, (u16)(cur + (u16)((S + (u64)A) & 0xffffu))); - return; - } - case R_RV_SUB16: { - u16 cur = rd_u16_le(P_bytes); - wr_u16_le(P_bytes, (u16)(cur - (u16)((S + (u64)A) & 0xffffu))); - return; - } - case R_RV_ADD32: { - u32 cur = rd_u32_le(P_bytes); - wr_u32_le(P_bytes, (u32)(cur + (u32)((S + (u64)A) & 0xffffffffu))); - return; - } - case R_RV_SUB32: { - u32 cur = rd_u32_le(P_bytes); - wr_u32_le(P_bytes, (u32)(cur - (u32)((S + (u64)A) & 0xffffffffu))); - return; - } - case R_RV_ADD64: { - u64 cur = rd_u64_le(P_bytes); - wr_u64_le(P_bytes, cur + S + (u64)A); - return; - } - case R_RV_SUB64: { - u64 cur = rd_u64_le(P_bytes); - wr_u64_le(P_bytes, cur - S - (u64)A); - return; - } - case R_RV_SUB6: { - /* Bottom 6 bits of byte = (byte - (S + A)) & 0x3f. */ - u8 cur = P_bytes[0]; - u8 v = (u8)((cur & 0x3fu) - (u8)((S + (u64)A) & 0x3fu)); - P_bytes[0] = (u8)((cur & 0xc0u) | (v & 0x3fu)); - return; - } - case R_RV_SET6: { - u8 cur = P_bytes[0]; - P_bytes[0] = (u8)((cur & 0xc0u) | (u8)((S + (u64)A) & 0x3fu)); - return; - } - case R_RV_SET8: - P_bytes[0] = (u8)((S + (u64)A) & 0xffu); - return; - case R_RV_SET16: - wr_u16_le(P_bytes, (u16)((S + (u64)A) & 0xffffu)); - return; - case R_RV_SET32: - wr_u32_le(P_bytes, (u32)((S + (u64)A) & 0xffffffffu)); - return; - default: - compiler_panic(c, no_loc(), - "link: unsupported reloc kind %u", - (unsigned)k); - } -} diff --git a/src/obj/elf/emu_load.c b/src/obj/elf/emu_load.c @@ -0,0 +1,564 @@ +/* Guest ELF loader. + * + * The host gives us an ELF buffer in `bytes`. We parse the ELF64 header + * directly (no need to involve obj/elf_read.c — its purpose is to build + * an ObjBuilder for the linker, which we don't want here), walk PT_LOAD + * program headers, map them through EmuAddrSpace, and copy file contents in. + * + * Handles ELF64 LE ET_EXEC/ET_DYN at the object-format boundary: PT_LOAD + * mapping plus PT_INTERP, PT_DYNAMIC, and PT_TLS metadata. OS process layout + * and dynamic-loader policy live in src/os and src/emu/dl.c. + */ + +#include <string.h> + +#include "core/core.h" +#include "core/slice.h" +#include "emu/emu.h" +#include "obj/elf/elf.h" +#include "obj/format.h" + +/* Page size we align segments to. The actual guest page granularity is + * unspecified for the flat address-space model; 4KiB is a reasonable default. + */ +#define EMU_PAGE_SIZE 0x1000ull + +extern const ObjFormatEmuOps elf_emu_ops; + +typedef struct EmuElfDynInfo { + u64 dynamic_vaddr; + u64 dynamic_size; + u64 strtab; + u64 strsz; + u64 symtab; + u64 syment; + u64 hash; + u64 gnu_hash; + u64 rela; + u64 relasz; + u64 relaent; + u64 jmprel; + u64 pltrelsz; + u64 pltgot; + u32 flags; +} EmuElfDynInfo; + +static u64 round_up(u64 v, u64 a) { return (v + a - 1u) & ~(a - 1u); } +static u64 round_down(u64 v, u64 a) { return v & ~(a - 1u); } + +/* ---- ELF64 wire reads ---- */ +static u16 rd16(const u8* p) { return (u16)p[0] | ((u16)p[1] << 8); } +static u32 rd32(const u8* p) { + return (u32)p[0] | ((u32)p[1] << 8) | ((u32)p[2] << 16) | ((u32)p[3] << 24); +} +static u64 rd64(const u8* p) { return (u64)rd32(p) | ((u64)rd32(p + 4) << 32); } + +static CfreeSlice cstr_at(const u8* base, u64 max) { + u64 n = 0; + while (n < max && base[n]) ++n; + return (CfreeSlice){.data = base, .len = (size_t)n}; +} + +static EmuElfDynInfo* elf_dyn(EmuLoadedObject* obj) { + return obj ? (EmuElfDynInfo*)obj->format.data : NULL; +} + +static const EmuElfDynInfo* elf_dyn_const(const EmuLoadedObject* obj) { + return obj ? (const EmuElfDynInfo*)obj->format.data : NULL; +} + +static u8 elf_phdr_perms(u32 p_flags) { + u8 perms = 0; + if (p_flags & PF_R) perms |= EMU_MEM_READ; + if (p_flags & PF_W) perms |= EMU_MEM_WRITE; + if (p_flags & PF_X) perms |= EMU_MEM_EXEC; + return perms; +} + +static CfreeStatus elf_detect_executable(Compiler* c, CfreeSlice slice, + CfreeTarget* out) { + const u8* bytes = slice.data; + const ObjFormatImpl* fmt; + const ObjElfArchOps* arch_ops; + u16 e_type; + u16 e_machine; + (void)c; + if (!bytes || slice.len < ELF64_EHDR_SIZE || !out) return CFREE_INVALID; + if (bytes[EI_MAG0] != ELFMAG0 || bytes[EI_MAG1] != ELFMAG1 || + bytes[EI_MAG2] != ELFMAG2 || bytes[EI_MAG3] != ELFMAG3) { + return CFREE_INVALID; + } + if (bytes[EI_CLASS] != ELFCLASS64 || bytes[EI_DATA] != ELFDATA2LSB) + return CFREE_UNSUPPORTED; + + e_type = rd16(bytes + 16); + e_machine = rd16(bytes + 18); + if (e_type != ET_EXEC) return CFREE_UNSUPPORTED; + fmt = obj_format_lookup(CFREE_OBJ_ELF); + arch_ops = fmt && fmt->elf_machine ? fmt->elf_machine(e_machine) : NULL; + if (!arch_ops) return CFREE_UNSUPPORTED; + + memset(out, 0, sizeof(*out)); + out->arch = arch_ops->arch; + out->os = CFREE_OS_LINUX; + out->obj = CFREE_OBJ_ELF; + out->ptr_size = 8u; + out->ptr_align = 8u; + out->big_endian = false; + return CFREE_OK; +} + +static CfreeStatus ensure_object_cap(Compiler* c, EmuLoadedImage* img, + u32 need) { + Heap* heap = c->ctx->heap; + u32 old_cap; + u32 new_cap; + EmuLoadedObject* grown; + if (img->link_map.objects_cap >= need) return CFREE_OK; + old_cap = img->link_map.objects_cap; + new_cap = old_cap ? old_cap * 2u : 4u; + while (new_cap < need) new_cap *= 2u; + grown = (EmuLoadedObject*)heap->realloc( + heap, img->link_map.objects, sizeof(*img->link_map.objects) * old_cap, + sizeof(*img->link_map.objects) * new_cap, _Alignof(EmuLoadedObject)); + if (!grown) return CFREE_ERR; + memset(grown + old_cap, 0, sizeof(*grown) * (new_cap - old_cap)); + img->link_map.objects = grown; + img->link_map.objects_cap = new_cap; + return CFREE_OK; +} + +static CfreeStatus parse_object_dynamic(EmuLoadedImage* img, + EmuLoadedObject* obj) { + u8* dyn; + u64 dynamic_size; + u64 strtab = 0, strsz = 0; + u64 needed_offs[16]; + u32 nneeded_offs = 0; + u64 j; + EmuElfDynInfo* dinfo = elf_dyn(obj); + if (!dinfo || !dinfo->dynamic_vaddr) return CFREE_OK; + dynamic_size = dinfo->dynamic_size; + dyn = emu_addr_space_ptr(&img->addr_space, dinfo->dynamic_vaddr, + dynamic_size, EMU_MEM_READ); + if (!dyn) return CFREE_INVALID; + for (j = 0; j + ELF64_DYN_SIZE <= dynamic_size; j += ELF64_DYN_SIZE) { + u64 tag = rd64(dyn + j); + u64 val = rd64(dyn + j + 8u); + u64 ptr = obj->load_bias + val; + if (tag == DT_NULL) break; + switch (tag) { + case DT_NEEDED: + if (nneeded_offs < sizeof(needed_offs) / sizeof(needed_offs[0])) + needed_offs[nneeded_offs++] = val; + break; + case DT_STRTAB: + strtab = ptr; + dinfo->strtab = ptr; + break; + case DT_STRSZ: + strsz = val; + dinfo->strsz = val; + break; + case DT_SYMTAB: + dinfo->symtab = ptr; + break; + case DT_SYMENT: + dinfo->syment = val; + break; + case DT_HASH: + dinfo->hash = ptr; + break; + case DT_GNU_HASH: + dinfo->gnu_hash = ptr; + break; + case DT_RELA: + dinfo->rela = ptr; + break; + case DT_RELASZ: + dinfo->relasz = val; + break; + case DT_RELAENT: + dinfo->relaent = val; + break; + case DT_JMPREL: + dinfo->jmprel = ptr; + break; + case DT_PLTRELSZ: + dinfo->pltrelsz = val; + break; + case DT_PLTGOT: + dinfo->pltgot = ptr; + break; + case DT_INIT: + obj->init_fini.init = ptr; + break; + case DT_FINI: + obj->init_fini.fini = ptr; + break; + case DT_INIT_ARRAY: + obj->init_fini.init_array = ptr; + break; + case DT_INIT_ARRAYSZ: + obj->init_fini.init_arraysz = val; + break; + case DT_FINI_ARRAY: + obj->init_fini.fini_array = ptr; + break; + case DT_FINI_ARRAYSZ: + obj->init_fini.fini_arraysz = val; + break; + case DT_SONAME: + if (strtab) { + u8* s = emu_addr_space_ptr(&img->addr_space, strtab + val, 1, + EMU_MEM_READ); + if (s) { + obj->soname = cstr_at(s, strsz > val ? strsz - val : 0); + } + } + break; + default: + break; + } + } + for (j = 0; j < nneeded_offs; ++j) { + u64 val = needed_offs[j]; + if (strtab && obj->imports.nneeded < + sizeof(obj->imports.needed) / + sizeof(obj->imports.needed[0])) { + u8* s = emu_addr_space_ptr(&img->addr_space, strtab + val, 1, + EMU_MEM_READ); + if (s) obj->imports.needed[obj->imports.nneeded++] = + cstr_at(s, strsz > val ? strsz - val : 0); + } + } + return CFREE_OK; +} + +static CfreeStatus elf_load_one_object(Compiler* c, EmuProcess* process, + EmuLoadedImage* img, + CfreeSlice name, CfreeSlice bytes_slice, + int is_main, u32* out_index) { + const u8* bytes = bytes_slice.data; + size_t len = bytes_slice.len; + u16 e_type, e_machine, e_phentsize, e_phnum; + const ObjFormatImpl* fmt; + const ObjElfArchOps* arch_ops; + u64 e_entry, e_phoff; + u64 lo_va = 0, hi_va = 0, load_bias = 0; + int saw_load = 0; + u32 i; + EmuLoadedObject* obj; + + if (!bytes || len < ELF64_EHDR_SIZE) return CFREE_INVALID; + if (bytes[EI_MAG0] != ELFMAG0 || bytes[EI_MAG1] != ELFMAG1 || + bytes[EI_MAG2] != ELFMAG2 || bytes[EI_MAG3] != ELFMAG3) + return CFREE_INVALID; + if (bytes[EI_CLASS] != ELFCLASS64 || bytes[EI_DATA] != ELFDATA2LSB) + return CFREE_UNSUPPORTED; + e_type = rd16(bytes + 16); + e_machine = rd16(bytes + 18); + e_entry = rd64(bytes + 24); + e_phoff = rd64(bytes + 32); + e_phentsize = rd16(bytes + 54); + e_phnum = rd16(bytes + 56); + fmt = obj_format_lookup(CFREE_OBJ_ELF); + arch_ops = fmt && fmt->elf_machine ? fmt->elf_machine(e_machine) : NULL; + if (!arch_ops) return CFREE_UNSUPPORTED; + if (process && process->guest_target.arch != arch_ops->arch) + return CFREE_UNSUPPORTED; + if (is_main ? e_type != ET_EXEC : e_type != ET_DYN) return CFREE_UNSUPPORTED; + if (e_phentsize < ELF64_PHDR_SIZE) return CFREE_INVALID; + if ((u64)e_phoff + (u64)e_phnum * e_phentsize > len) return CFREE_INVALID; + + for (i = 0; i < e_phnum; ++i) { + const u8* ph = bytes + e_phoff + (u64)i * e_phentsize; + u32 p_type = rd32(ph); + u64 p_vaddr = rd64(ph + 16); + u64 p_memsz = rd64(ph + 40); + if (p_type != PT_LOAD) continue; + if (!saw_load) { + lo_va = round_down(p_vaddr, EMU_PAGE_SIZE); + hi_va = round_up(p_vaddr + p_memsz, EMU_PAGE_SIZE); + saw_load = 1; + } else { + u64 lo = round_down(p_vaddr, EMU_PAGE_SIZE); + u64 hi = round_up(p_vaddr + p_memsz, EMU_PAGE_SIZE); + if (lo < lo_va) lo_va = lo; + if (hi > hi_va) hi_va = hi; + } + } + if (!saw_load) return CFREE_INVALID; + if (!is_main) { + if (emu_addr_space_find_gap(&img->addr_space, hi_va - lo_va, + EMU_PAGE_SIZE, 0x2000000000ull, + 0x3000000000ull, &load_bias) != CFREE_OK) + return CFREE_ERR; + load_bias -= lo_va; + } + + if (ensure_object_cap(c, img, img->link_map.nobjects + 1u) != CFREE_OK) + return CFREE_ERR; + obj = &img->link_map.objects[img->link_map.nobjects]; + memset(obj, 0, sizeof(*obj)); + obj->name = name; + obj->load_bias = load_bias; + obj->map_start = load_bias + lo_va; + obj->map_end = load_bias + hi_va; + if (emu_object_format_data_alloc(c, obj, sizeof(EmuElfDynInfo), + _Alignof(EmuElfDynInfo), + &obj->format.data) != CFREE_OK) + return CFREE_ERR; + *out_index = img->link_map.nobjects++; + + for (i = 0; i < e_phnum; ++i) { + const u8* ph = bytes + e_phoff + (u64)i * e_phentsize; + u32 p_type = rd32(ph); + u32 p_flags = rd32(ph + 4); + u64 p_offset = rd64(ph + 8); + u64 p_vaddr = rd64(ph + 16); + u64 p_filesz = rd64(ph + 32); + u64 p_memsz = rd64(ph + 40); + u64 p_align = rd64(ph + 48); + if (p_type == PT_LOAD) { + u64 map_start = round_down(load_bias + p_vaddr, EMU_PAGE_SIZE); + u64 map_end = round_up(load_bias + p_vaddr + p_memsz, EMU_PAGE_SIZE); + if (p_filesz > p_memsz || p_offset + p_filesz > len) + return CFREE_INVALID; + if (emu_addr_space_map(&img->addr_space, map_start, map_end - map_start, + elf_phdr_perms(p_flags), EMU_MAP_FILE) != + CFREE_OK) + return CFREE_ERR; + if (p_filesz && + emu_addr_space_copy_in(&img->addr_space, load_bias + p_vaddr, + bytes + p_offset, p_filesz) != CFREE_OK) + return CFREE_ERR; + } else if (p_type == PT_DYNAMIC) { + EmuElfDynInfo* dinfo = elf_dyn(obj); + if (!dinfo) return CFREE_ERR; + dinfo->dynamic_vaddr = load_bias + p_vaddr; + dinfo->dynamic_size = p_filesz; + } else if (p_type == PT_TLS) { + obj->tls.image_vaddr = load_bias + p_vaddr; + obj->tls.filesz = p_filesz; + obj->tls.memsz = p_memsz; + obj->tls.align = p_align; + obj->tls.module_id = *out_index + 1u; + } else if (is_main && p_type == PT_INTERP && + p_offset + p_filesz <= len) { + img->process_info.interpreter_path = cstr_at(bytes + p_offset, p_filesz); + } + } + if (is_main) { + img->entry_pc = e_entry; + img->process_info.headers_vaddr = lo_va + e_phoff; + img->process_info.header_entry_size = e_phentsize; + img->process_info.header_count = e_phnum; + } + return parse_object_dynamic(img, obj); +} + +static CfreeStatus elf_load_executable(Compiler* c, const EmuLoadOptions* opts, + EmuLoadedImage* out) { + u32 index = 0; + if (!out) return CFREE_INVALID; + memset(out, 0, sizeof(*out)); + if (!c || !opts || !opts->bytes.data || opts->bytes.len < ELF64_EHDR_SIZE) + return CFREE_INVALID; + if (emu_addr_space_init(&out->addr_space, c, EMU_PAGE_SIZE) != CFREE_OK) + return CFREE_ERR; + if (elf_load_one_object(c, opts->process, out, opts->name, opts->bytes, 1, + &index) != CFREE_OK) { + emu_unload_image(c, out); + return CFREE_ERR; + } + out->link_map.main_object = index; + out->link_map.global_scope_head = index; + return CFREE_OK; +} + +static u32 elf_emu_reloc_from(CfreeArchKind arch, u32 wire_type) { + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_ELF); + const ObjElfArchOps* ops; + if (!fmt || !fmt->elf_arch) return (u32)-1; + ops = fmt->elf_arch(arch); + if (!ops || !ops->reloc_from) return (u32)-1; + return ops->reloc_from(wire_type); +} + +static u32 elf_object_sym_count(EmuProcess* process, + const EmuLoadedObject* obj) { + u8* h; + const EmuElfDynInfo* dinfo = elf_dyn_const(obj); + if (dinfo && dinfo->hash) { + h = emu_addr_space_ptr(&process->image.addr_space, dinfo->hash, 8u, + EMU_MEM_READ); + if (h) return rd32(h + 4u); + } + return 32u; +} + +static CfreeStatus elf_symbol_at(EmuProcess* process, + const EmuLoadedObject* obj, u64 index, + EmuDynSymbol* out) { + u8* sym; + u32 st_name; + u16 st_shndx; + u8* name; + const EmuElfDynInfo* dinfo = elf_dyn_const(obj); + if (!process || !obj || !out || !dinfo || !dinfo->symtab || + !dinfo->strtab) + return CFREE_INVALID; + sym = emu_addr_space_ptr(&process->image.addr_space, + dinfo->symtab + index * ELF64_SYM_SIZE, + ELF64_SYM_SIZE, EMU_MEM_READ); + if (!sym) return CFREE_INVALID; + st_name = rd32(sym); + st_shndx = (u16)(sym[6u] | ((u16)sym[7u] << 8)); + memset(out, 0, sizeof(*out)); + out->index = index; + out->value = rd64(sym + 8u); + out->size = rd64(sym + 16u); + out->defined = st_shndx != SHN_UNDEF; + if (st_name) { + name = emu_addr_space_ptr(&process->image.addr_space, + dinfo->strtab + st_name, 1u, + EMU_MEM_READ); + if (!name) return CFREE_INVALID; + out->name = cstr_at(name, dinfo->strsz > st_name + ? dinfo->strsz - st_name + : 256u); + } + return CFREE_OK; +} + +static void elf_dyn_needed_iter(const EmuLoadedObject* obj, + EmuDynNeededIter* out) { + if (!out) return; + memset(out, 0, sizeof(*out)); + out->object = obj; +} + +static int elf_dyn_needed_next(EmuProcess* process, EmuDynNeededIter* it, + CfreeSlice* out) { + (void)process; + if (!it || !it->object || !out) return 0; + if (it->index >= it->object->imports.nneeded) return 0; + *out = it->object->imports.needed[it->index++]; + return 1; +} + +static CfreeStatus elf_dyn_symbol_lookup(EmuProcess* process, + const EmuLoadedObject* obj, + CfreeSlice symbol, + EmuDynSymbol* out) { + u32 n; + u32 i; + if (!process || !obj || !out) return CFREE_INVALID; + n = elf_object_sym_count(process, obj); + for (i = 1u; i < n; ++i) { + EmuDynSymbol have; + if (elf_symbol_at(process, obj, i, &have) != CFREE_OK) continue; + if (have.defined && have.name.data && cfree_slice_eq(have.name, symbol)) { + *out = have; + return CFREE_OK; + } + } + return CFREE_NOT_FOUND; +} + +static CfreeStatus elf_dyn_symbol_by_index(EmuProcess* process, + const EmuLoadedObject* obj, + u64 symbol_index, + EmuDynSymbol* out) { + return elf_symbol_at(process, obj, symbol_index, out); +} + +static void elf_reloc_iter(const EmuLoadedObject* obj, + EmuDynRelocTableKind table, + EmuDynRelocIter* out) { + if (!out) return; + memset(out, 0, sizeof(*out)); + out->object = obj; + out->table = table; +} + +static int elf_reloc_next(EmuProcess* process, EmuDynRelocIter* it, + EmuDynReloc* out) { + const EmuLoadedObject* obj; + u64 base; + u64 size; + u8* rela; + u64 r_info; + const EmuElfDynInfo* dinfo; + if (!process || !it || !it->object || !out) return 0; + obj = it->object; + dinfo = elf_dyn_const(obj); + if (!dinfo) return 0; + if (it->table == EMU_DYN_RELOC_TABLE_PLT) { + base = dinfo->jmprel; + size = dinfo->pltrelsz; + } else { + base = dinfo->rela; + size = dinfo->relasz; + } + if (!base || it->cursor + ELF64_RELA_SIZE > size) return 0; + rela = emu_addr_space_ptr(&process->image.addr_space, base + it->cursor, + ELF64_RELA_SIZE, EMU_MEM_READ); + it->cursor += ELF64_RELA_SIZE; + if (!rela) return 0; + r_info = rd64(rela + 8u); + memset(out, 0, sizeof(*out)); + out->patch_addr = obj->load_bias + rd64(rela); + out->symbol_index = r_info >> 32; + out->wire_type = r_info & 0xffffffffull; + out->addend = (i64)rd64(rela + 16u); + out->width = 8u; + return 1; +} + +static CfreeStatus elf_reloc_classify(EmuProcess* process, + const EmuLoadedObject* obj, + const EmuDynReloc* reloc, + EmuDynRelocClass* cls, + u32* kind_out) { + u32 r_type; + (void)obj; + if (!process || !reloc || !cls || !kind_out) return CFREE_INVALID; + r_type = (u32)reloc->wire_type; + *cls = EMU_DYN_RELOC_NONE; + *kind_out = R_NONE; + if (process->guest_target.arch == CFREE_ARCH_RV64 && + r_type == ELF_R_RISCV_RELATIVE) { + *cls = EMU_DYN_RELOC_RELATIVE; + *kind_out = (u32)R_ABS64; + return CFREE_OK; + } + if (process->guest_target.arch == CFREE_ARCH_RV64 && + (r_type == ELF_R_RISCV_JUMP_SLOT || r_type == ELF_R_RISCV_64)) { + *cls = r_type == ELF_R_RISCV_JUMP_SLOT ? EMU_DYN_RELOC_IMPORT_SLOT + : EMU_DYN_RELOC_SYMBOLIC; + *kind_out = (u32)R_ABS64; + return CFREE_OK; + } + if (r_type == 0) return CFREE_OK; + if (elf_emu_reloc_from(process->guest_target.arch, r_type) == (u32)-1) + return CFREE_UNSUPPORTED; + *cls = EMU_DYN_RELOC_RELATIVE; + *kind_out = elf_emu_reloc_from(process->guest_target.arch, r_type); + return *kind_out == R_NONE ? CFREE_UNSUPPORTED : CFREE_OK; +} + +const ObjFormatEmuOps elf_emu_ops = { + .detect_executable = elf_detect_executable, + .load_executable = elf_load_executable, + .map_object = elf_load_one_object, + .dyn_needed_iter = elf_dyn_needed_iter, + .dyn_needed_next = elf_dyn_needed_next, + .dyn_symbol_lookup = elf_dyn_symbol_lookup, + .dyn_symbol_by_index = elf_dyn_symbol_by_index, + .reloc_iter = elf_reloc_iter, + .reloc_next = elf_reloc_next, + .reloc_classify = elf_reloc_classify, + .reloc_from = elf_emu_reloc_from, +}; diff --git a/src/obj/format.h b/src/obj/format.h @@ -8,6 +8,16 @@ typedef struct LinkImage LinkImage; typedef struct Linker Linker; +typedef struct EmuLoadOptions EmuLoadOptions; +typedef struct EmuLoadedImage EmuLoadedImage; +typedef struct EmuProcess EmuProcess; +typedef struct EmuLoadedObject EmuLoadedObject; +typedef struct EmuDynNeededIter EmuDynNeededIter; +typedef struct EmuDynSymbol EmuDynSymbol; +typedef struct EmuDynRelocIter EmuDynRelocIter; +typedef struct EmuDynReloc EmuDynReloc; +typedef u32 EmuDynRelocClass; +typedef u32 EmuDynRelocTableKind; typedef ObjBuilder* (*ObjFormatReadFn)(Compiler*, const char* name, const u8* data, size_t len); @@ -77,6 +87,29 @@ typedef struct ObjFormatDsoReader { const char* name; } ObjFormatDsoReader; +typedef struct ObjFormatEmuOps { + CfreeStatus (*detect_executable)(Compiler*, CfreeSlice bytes, + CfreeTarget* target_out); + CfreeStatus (*load_executable)(Compiler*, const EmuLoadOptions*, + EmuLoadedImage* out); + CfreeStatus (*map_object)(Compiler*, EmuProcess*, EmuLoadedImage*, + CfreeSlice name, CfreeSlice bytes, int is_main, + u32* out_index); + void (*dyn_needed_iter)(const EmuLoadedObject*, EmuDynNeededIter* out); + int (*dyn_needed_next)(EmuProcess*, EmuDynNeededIter*, CfreeSlice* out); + CfreeStatus (*dyn_symbol_lookup)(EmuProcess*, const EmuLoadedObject*, + CfreeSlice symbol, EmuDynSymbol* out); + CfreeStatus (*dyn_symbol_by_index)(EmuProcess*, const EmuLoadedObject*, + u64 symbol_index, EmuDynSymbol* out); + void (*reloc_iter)(const EmuLoadedObject*, EmuDynRelocTableKind table, + EmuDynRelocIter* out); + int (*reloc_next)(EmuProcess*, EmuDynRelocIter*, EmuDynReloc* out); + CfreeStatus (*reloc_classify)(EmuProcess*, const EmuLoadedObject*, + const EmuDynReloc*, EmuDynRelocClass* cls, + u32* kind_out); + u32 (*reloc_from)(CfreeArchKind arch, u32 wire_type); +} ObjFormatEmuOps; + typedef struct ObjFormatImpl { ObjFmt kind; CfreeBinFmt bin_fmt; @@ -90,6 +123,7 @@ typedef struct ObjFormatImpl { ObjFormatLinkEmitFn link_emit; ObjFormatLayoutDynFn layout_dyn; ObjFormatFreeDynFn free_dyn; + const ObjFormatEmuOps* emu; const ObjElfArchOps* (*elf_arch)(CfreeArchKind); const ObjElfArchOps* (*elf_machine)(u32 e_machine); diff --git a/src/obj/registry.c b/src/obj/registry.c @@ -15,6 +15,7 @@ void link_emit_coff(LinkImage*, Writer*); #if CFREE_OBJ_ELF_ENABLED void layout_dyn(Linker*, LinkImage*); void link_dyn_state_free(LinkImage*); +extern const ObjFormatEmuOps elf_emu_ops; #endif #if CFREE_OBJ_COFF_ENABLED @@ -230,6 +231,7 @@ static const ObjFormatImpl obj_format_impl_elf = { .link_emit = link_emit_elf, .layout_dyn = layout_dyn, .free_dyn = link_dyn_state_free, + .emu = &elf_emu_ops, .elf_arch = obj_elf_arch, .elf_machine = obj_elf_machine, }; diff --git a/src/obj/reloc_apply.c b/src/obj/reloc_apply.c @@ -0,0 +1,516 @@ +/* Per-arch relocation byte application. + * + * Pure function: takes the resolved final addresses (S, P) and the + * addend (A), and patches `width` bytes at the relocation site. + * Callers (static linker, JIT linker, emulator dynamic loader) compute + * loader/linker policy first; this routine sees only final values. + * + * Encoding references: + * AArch64: ARM ARMv8-A "ELF for the ARM 64-bit Architecture (AArch64)" + * §5.7 (relocation types). + * RISC-V: "RISC-V ELF psABI specification" §3 (relocation types) and + * "The RISC-V Instruction Set Manual, Volume I" Chapter 19 + * (instruction encodings). Reloc semantics live behind the + * R_RV_* RelocKind values; LO12_S sits at the S-type imm + * slots, LO12_I at I-type, and BRANCH/JAL at B/J-type. */ + +#include <string.h> + +#include "core/bytes.h" +#include "obj/reloc_apply.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A, + u64 P) { + switch (k) { + case R_ABS32: + case R_X64_32S: + case R_X64_TPOFF32: { + /* All three write a 32-bit value at the site. ABS32 / _32S + * take an absolute (unsigned / sign-extended) symbol address; + * TPOFF32 takes the (caller-precomputed) TP-relative offset. + * At the byte level the encoding is identical. */ + u64 v = S + (u64)A; + wr_u32_le(P_bytes, (u32)(v & 0xffffffffu)); + return; + } + case R_ABS64: + case R_X64_TPOFF64: + case R_X64_RELATIVE: { + /* R_X64_RELATIVE: (S + A) — for static-with-relocs paths the + * linker writes the relocated value directly; the dynamic + * loader would otherwise do the same fixup at load time. */ + u64 v = S + (u64)A; + wr_u64_le(P_bytes, v); + return; + } + case R_X64_GLOB_DAT: + case R_X64_JUMP_SLOT: { + /* Dynamic linker normally applies these; for static-with-relocs + * paths we write the resolved symbol value (S) into the GOT/PLT + * slot. Addend is unused per the x86_64 psABI. */ + wr_u64_le(P_bytes, S); + return; + } + case R_X64_COPY: + compiler_panic(c, no_loc(), + "link: R_X64_COPY belongs in dynamic loader, " + "not static link"); + return; + case R_REL32: + case R_PC32: + case R_X64_PLT32: + case R_X64_GOTPCREL: + case R_X64_GOTPCRELX: + case R_X64_REX_GOTPCRELX: + case R_X64_GOTPC32: { + /* AArch64 ELF: PREL32 maps to either of these; both encode a + * 32-bit signed PC-relative displacement. The cfree-canonical + * distinction (section-relative vs PC-relative) collapses on + * AArch64 because the linker resolves to absolute vaddrs. + * + * x86_64 PLT32: in a static link there is no PLT, so the + * displacement collapses to a plain 32-bit PC-relative call. */ + i64 v = (i64)S + A - (i64)P; + wr_u32_le(P_bytes, (u32)((u64)v & 0xffffffffu)); + return; + } + case R_X64_PC8: { + i64 v = (i64)S + A - (i64)P; + if (v < -128 || v > 127) + compiler_panic(c, no_loc(), "link: X64_PC8 out of range"); + P_bytes[0] = (u8)((u64)v & 0xffu); + return; + } + case R_REL64: + case R_PC64: { + /* 64-bit PC-relative; AArch64 R_AARCH64_PREL64. Used by + * `.quad sym1 - sym2` style symbol-difference encodings (e.g. + * the arm64 kernel image_size header field). */ + i64 v = (i64)S + A - (i64)P; + wr_u64_le(P_bytes, (u64)v); + return; + } + case R_AARCH64_ABS16: { + u64 v = S + (u64)A; + wr_u16_le(P_bytes, (u16)(v & 0xffffu)); + return; + } + case R_AARCH64_PREL16: { + i64 v = (i64)S + A - (i64)P; + wr_u16_le(P_bytes, (u16)((u64)v & 0xffffu)); + return; + } + case R_AARCH64_CONDBR19: + case R_AARCH64_LD_PREL_LO19: { + /* B.cond / CB(N)Z / LDR (literal) — imm19 in 4-byte units, + * signed, at bits [23:5]. Range: ±1MiB. */ + i64 disp = (i64)S + A - (i64)P; + u32 instr; + u32 imm19; + if (disp & 3) + compiler_panic(c, no_loc(), + "link: imm19 reloc misaligned displacement"); + if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20)) + compiler_panic(c, no_loc(), + "link: imm19 reloc out of range (need ±1MiB)"); + imm19 = (u32)((disp >> 2) & 0x7ffffu); + instr = rd_u32_le(P_bytes); + instr = (instr & ~(0x7ffffu << 5)) | (imm19 << 5); + wr_u32_le(P_bytes, instr); + return; + } + case R_AARCH64_TSTBR14: { + /* TBZ/TBNZ — imm14 in 4-byte units, signed, at bits [18:5]. + * Range: ±32KiB. */ + i64 disp = (i64)S + A - (i64)P; + u32 instr; + u32 imm14; + if (disp & 3) + compiler_panic(c, no_loc(), "link: TSTBR14 misaligned displacement"); + if (disp < -(i64)(1 << 15) || disp >= (i64)(1 << 15)) + compiler_panic(c, no_loc(), "link: TSTBR14 out of range (need ±32KiB)"); + imm14 = (u32)((disp >> 2) & 0x3fffu); + instr = rd_u32_le(P_bytes); + instr = (instr & ~(0x3fffu << 5)) | (imm14 << 5); + wr_u32_le(P_bytes, instr); + return; + } + case R_AARCH64_ADR_PREL_LO21: { + /* ADR — byte-granularity imm21, encoded as immlo[30:29] + + * immhi[23:5]. No 12-bit shift (unlike ADRP). Range: ±1MiB. */ + i64 disp = (i64)S + A - (i64)P; + u32 instr; + u32 immlo, immhi; + if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20)) + compiler_panic(c, no_loc(), + "link: ADR_PREL_LO21 out of range (need ±1MiB)"); + immlo = (u32)(disp & 0x3u); + immhi = (u32)((disp >> 2) & 0x7ffffu); + instr = rd_u32_le(P_bytes); + instr = (instr & 0x9f00001fu) | (immlo << 29) | (immhi << 5); + wr_u32_le(P_bytes, instr); + return; + } + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: { + /* B/BL imm26 — branch displacement in 4-byte units, signed. + * Clear bits [25:0] of the existing instruction and OR in the + * new imm26. Range check: ±128MiB. */ + i64 disp = (i64)S + A - (i64)P; + u32 instr; + u32 imm26; + if (disp & 3) + compiler_panic(c, no_loc(), "link: CALL26 misaligned displacement"); + if (disp < -(i64)(1 << 27) || disp >= (i64)(1 << 27)) + compiler_panic(c, no_loc(), "link: CALL26 out of range (need ±128MiB)"); + imm26 = (u32)((disp >> 2) & 0x3ffffffu); + instr = rd_u32_le(P_bytes); + instr = (instr & 0xfc000000u) | imm26; + wr_u32_le(P_bytes, instr); + return; + } + case R_AARCH64_TLVP_LOAD_PAGE21: + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADR_PREL_PG_HI21_NC: { + /* ADRP — page-relative imm21, encoded as immlo[30:29] + + * immhi[23:5]. Effective immediate is (S+A) page minus P page, + * shifted right by 12, sign-extended to 33 bits. _NC variant + * skips the range check (compiler asserts it can't overflow, + * e.g. when paired with explicit page bracketing). */ + i64 page_s = ((i64)S + A) & ~(i64)0xfff; + i64 page_p = (i64)P & ~(i64)0xfff; + i64 disp = page_s - page_p; + i64 imm21 = disp >> 12; + u32 instr; + u32 immlo, immhi; + if (k != R_AARCH64_ADR_PREL_PG_HI21_NC && + (imm21 < -(i64)(1 << 20) || imm21 >= (i64)(1 << 20))) + compiler_panic(c, no_loc(), + "link: ADR_PREL_PG_HI21 out of range (need ±4GiB)"); + immlo = (u32)(imm21 & 0x3u); + immhi = (u32)((imm21 >> 2) & 0x7ffffu); + instr = rd_u32_le(P_bytes); + instr = (instr & 0x9f00001fu) | (immlo << 29) | (immhi << 5); + wr_u32_le(P_bytes, instr); + return; + } + case R_AARCH64_ADD_ABS_LO12_NC: { + /* ADD (immediate) imm12 at bits [21:10]. NC = no overflow check. */ + u64 v = ((u64)S + (u64)A) & 0xfffu; + u32 instr = rd_u32_le(P_bytes); + instr = (instr & ~(0xfffu << 10)) | ((u32)v << 10); + wr_u32_le(P_bytes, instr); + return; + } + case R_AARCH64_TLSLE_ADD_TPREL_HI12: + case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: { + /* AArch64 TLS local-exec. Caller passes S already as the + * TP-relative offset (target's image offset minus the TLS + * image base, plus the 16-byte AArch64 TCB). HI12 takes + * bits 23:12, LO12_NC takes bits 11:0; both encoded as + * imm12 at instruction bits [21:10] of an ADD (immediate). + * The HI12 form's instruction carries LSL #12 in its opcode, + * so bits 11:0 of the operand naturally land at scale 4096. */ + u64 v = (u64)((i64)S + A); + u32 imm12 = (k == R_AARCH64_TLSLE_ADD_TPREL_HI12) + ? (u32)((v >> 12) & 0xfffu) + : (u32)(v & 0xfffu); + u32 instr = rd_u32_le(P_bytes); + instr = (instr & ~(0xfffu << 10)) | (imm12 << 10); + wr_u32_le(P_bytes, instr); + return; + } + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_LDST16_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + case R_AARCH64_LDST128_ABS_LO12_NC: + case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_TLVP_LOAD_PAGEOFF12: { + /* LDR/STR with imm12 at bits [21:10]; the imm is scaled by the + * access size, so we right-shift the low 12 bits of (S+A) by + * the size scale before encoding. NC = no overflow check. + * + * LD64_GOT_LO12_NC has the same encoding as LDST64_ABS_LO12_NC; + * the linker has already redirected `S` to the GOT slot. */ + u32 shift = (k == R_AARCH64_LDST8_ABS_LO12_NC) ? 0u + : (k == R_AARCH64_LDST16_ABS_LO12_NC) ? 1u + : (k == R_AARCH64_LDST32_ABS_LO12_NC) ? 2u + : (k == R_AARCH64_LDST64_ABS_LO12_NC || + k == R_AARCH64_LD64_GOT_LO12_NC || + k == R_AARCH64_TLVP_LOAD_PAGEOFF12) + ? 3u + : 4u; + u64 lo12 = ((u64)S + (u64)A) & 0xfffu; + u64 imm12 = lo12 >> shift; + u32 instr = rd_u32_le(P_bytes); + if (lo12 & ((1u << shift) - 1u)) + compiler_panic(c, no_loc(), + "link: LDST%u_ABS_LO12_NC misaligned address " + "(kind=%u S=0x%llx A=%lld P=0x%llx)", + 1u << (3 + shift), (unsigned)k, + (unsigned long long)S, (long long)A, + (unsigned long long)P); + instr = (instr & ~(0xfffu << 10)) | ((u32)(imm12 & 0xfffu) << 10); + wr_u32_le(P_bytes, instr); + return; + } + case R_RV_HI20: + case R_RV_TPREL_HI20: { + /* U-type (LUI/AUIPC) imm[31:12] = high 20 bits of (S + A + 0x800). + * The 0x800 bias compensates the sign-extension of the paired + * 12-bit ADDI/load/store immediate, so HI20 + signext12(LO12) + * reconstructs the full value. */ + i64 v = (i64)S + A; + u32 hi20 = (u32)(((u64)(v + 0x800)) >> 12) & 0xfffffu; + u32 instr = rd_u32_le(P_bytes); + instr = (instr & 0x00000fffu) | (hi20 << 12); + wr_u32_le(P_bytes, instr); + return; + } + case R_RV_PCREL_HI20: + case R_RV_GOT_HI20: { + /* AUIPC pc-relative HI20: same encoding as HI20 but the + * displacement is (S + A) - P. The paired PCREL_LO12 reloc at + * the ADDI/load below recovers the low 12 bits of the same + * displacement via a lookup keyed on this AUIPC's site vaddr. + * GOT_HI20 collapses to PCREL_HI20 in static-link with no + * indirection: the symbol resolves to its own address. */ + i64 disp = (i64)S + A - (i64)P; + u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu; + u32 instr = rd_u32_le(P_bytes); + instr = (instr & 0x00000fffu) | (hi20 << 12); + wr_u32_le(P_bytes, instr); + return; + } + case R_RV_LO12_I: + case R_RV_TPREL_LO12_I: { + /* I-type imm[11:0] in instruction bits [31:20]. Low 12 bits of + * (S + A); the sign-extension at execute time pairs with HI20's + * 0x800 bias to reconstruct the full address. */ + u64 v = (u64)((i64)S + A); + u32 lo12 = (u32)(v & 0xfffu); + u32 instr = rd_u32_le(P_bytes); + instr = (instr & 0x000fffffu) | (lo12 << 20); + wr_u32_le(P_bytes, instr); + return; + } + case R_RV_LO12_S: + case R_RV_TPREL_LO12_S: { + /* S-type imm[11:5] in bits [31:25], imm[4:0] in bits [11:7]. */ + u64 v = (u64)((i64)S + A); + u32 lo12 = (u32)(v & 0xfffu); + u32 instr = rd_u32_le(P_bytes); + instr = (instr & 0x01fff07fu) | ((lo12 & 0xfe0u) << 20) | + ((lo12 & 0x1fu) << 7); + wr_u32_le(P_bytes, instr); + return; + } + case R_RV_BRANCH: { + /* B-type 12-bit signed displacement in 2-byte units (13-bit + * range). imm[12] in bit 31, imm[10:5] in 30:25, imm[4:1] in + * 11:8, imm[11] in bit 7. */ + i64 disp = (i64)S + A - (i64)P; + u32 instr; + u32 b; + if (disp & 1) + compiler_panic(c, no_loc(), "link: RV BRANCH misaligned displacement"); + if (disp < -(i64)(1 << 12) || disp >= (i64)(1 << 12)) + compiler_panic(c, no_loc(), "link: RV BRANCH out of range (need ±4KiB)"); + b = (u32)((u64)disp & 0x1ffeu) | ((u32)(((u64)disp >> 11) & 1u) << 11) | + ((u32)(((u64)disp >> 12) & 1u) << 12); + instr = rd_u32_le(P_bytes); + instr &= 0x01fff07fu; + instr |= ((b >> 12) & 1u) << 31; + instr |= ((b >> 5) & 0x3fu) << 25; + instr |= ((b >> 1) & 0xfu) << 8; + instr |= ((b >> 11) & 1u) << 7; + wr_u32_le(P_bytes, instr); + return; + } + case R_RV_JAL: { + /* J-type 20-bit signed displacement in 2-byte units (21-bit + * range). imm[20] in bit 31, imm[10:1] in 30:21, imm[11] in bit + * 20, imm[19:12] in bits 19:12. */ + i64 disp = (i64)S + A - (i64)P; + u32 instr; + u32 b; + if (disp & 1) + compiler_panic(c, no_loc(), "link: RV JAL misaligned displacement"); + if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20)) + compiler_panic(c, no_loc(), "link: RV JAL out of range (need ±1MiB)"); + b = (u32)((u64)disp & 0x1ffffeu) | + ((u32)(((u64)disp >> 11) & 1u) << 11) | + ((u32)(((u64)disp >> 20) & 1u) << 20); + instr = rd_u32_le(P_bytes); + instr &= 0x00000fffu; + instr |= ((b >> 20) & 1u) << 31; + instr |= ((b >> 1) & 0x3ffu) << 21; + instr |= ((b >> 11) & 1u) << 20; + instr |= ((b >> 12) & 0xffu) << 12; + wr_u32_le(P_bytes, instr); + return; + } + case R_RV_CALL: + case R_PLT32: { + /* AUIPC + JALR pair encoding the same 32-bit signed PC-relative + * displacement. AUIPC at P, JALR at P+4. The 0x800 bias on the + * AUIPC immediate compensates JALR's signed 12-bit imm so that + * (auipc_imm << 12) + signext12(jalr_imm) == disp. + * + * R_PLT32 is the cfree-canonical RelocKind that + * elf_riscv64_reloc_from(R_RISCV_CALL_PLT) maps to; static-link + * with no PLT collapses CALL_PLT to a direct CALL (no + * indirection). */ + i64 disp = (i64)S + A - (i64)P; + u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu; + u32 lo12 = (u32)((u64)disp & 0xfffu); + u32 auipc = rd_u32_le(P_bytes); + u32 jalr = rd_u32_le(P_bytes + 4); + if (disp < -(i64)(1ll << 31) || disp >= (i64)(1ll << 31)) + compiler_panic(c, no_loc(), "link: RV CALL out of range (need ±2GiB)"); + auipc = (auipc & 0x00000fffu) | (hi20 << 12); + jalr = (jalr & 0x000fffffu) | (lo12 << 20); + wr_u32_le(P_bytes, auipc); + wr_u32_le(P_bytes + 4, jalr); + return; + } + case R_RV_RVC_BRANCH: { + /* CB-type 8-bit signed displacement in 2-byte units (9-bit + * range). c.beqz / c.bnez. Encoding (16-bit instruction): + * bit 12 = imm[8] + * bits 11:10 = imm[4:3] + * bits 9:7 = rs1' (untouched) + * bits 6:5 = imm[7:6] + * bits 4:3 = imm[2:1] + * bit 2 = imm[5] */ + i64 disp = (i64)S + A - (i64)P; + u16 instr = (u16)(P_bytes[0] | ((u16)P_bytes[1] << 8)); + u32 b; + if (disp & 1) + compiler_panic(c, no_loc(), + "link: RV RVC_BRANCH misaligned displacement"); + if (disp < -(i64)(1 << 8) || disp >= (i64)(1 << 8)) + compiler_panic(c, no_loc(), + "link: RV RVC_BRANCH out of range (need ±256B)"); + b = (u32)((u64)disp & 0x1feu); + instr = (u16)(instr & 0xe383u); + instr = (u16)(instr | (((b >> 8) & 1u) << 12)); + instr = (u16)(instr | (((b >> 3) & 3u) << 10)); + instr = (u16)(instr | (((b >> 6) & 3u) << 5)); + instr = (u16)(instr | (((b >> 1) & 3u) << 3)); + instr = (u16)(instr | (((b >> 5) & 1u) << 2)); + P_bytes[0] = (u8)(instr & 0xffu); + P_bytes[1] = (u8)((instr >> 8) & 0xffu); + return; + } + case R_RV_RVC_JUMP: { + /* CJ-type 11-bit signed displacement in 2-byte units (12-bit + * range). c.j / c.jal. Encoding bits in the 16-bit instruction: + * 12=imm[11], 11=imm[4], 10:9=imm[9:8], 8=imm[10], + * 7=imm[6], 6=imm[7], 5:3=imm[3:1], 2=imm[5]. */ + i64 disp = (i64)S + A - (i64)P; + u16 instr = (u16)(P_bytes[0] | ((u16)P_bytes[1] << 8)); + u32 b; + if (disp & 1) + compiler_panic(c, no_loc(), + "link: RV RVC_JUMP misaligned displacement"); + if (disp < -(i64)(1 << 11) || disp >= (i64)(1 << 11)) + compiler_panic(c, no_loc(), + "link: RV RVC_JUMP out of range (need ±2KiB)"); + b = (u32)((u64)disp & 0xffeu); + instr = (u16)(instr & 0xe003u); + instr = (u16)(instr | (((b >> 11) & 1u) << 12)); + instr = (u16)(instr | (((b >> 4) & 1u) << 11)); + instr = (u16)(instr | (((b >> 8) & 3u) << 9)); + instr = (u16)(instr | (((b >> 10) & 1u) << 8)); + instr = (u16)(instr | (((b >> 6) & 1u) << 7)); + instr = (u16)(instr | (((b >> 7) & 1u) << 6)); + instr = (u16)(instr | (((b >> 1) & 7u) << 3)); + instr = (u16)(instr | (((b >> 5) & 1u) << 2)); + P_bytes[0] = (u8)(instr & 0xffu); + P_bytes[1] = (u8)((instr >> 8) & 0xffu); + return; + } + case R_RV_RELAX: + case R_RV_TPREL_ADD: + /* Marker relocs only — RELAX permits the prior reloc to be + * compressed, TPREL_ADD annotates a TLS thread-pointer ADD that + * the linker may fold during relaxation. We don't relax, so + * both are no-ops. */ + return; + case R_RV_ADD8: { + /* word8 += S + A. Used (paired with a SUB8 against another sym + * at the same site) to encode symbol differences. */ + u8 cur = P_bytes[0]; + P_bytes[0] = (u8)(cur + (u8)((S + (u64)A) & 0xffu)); + return; + } + case R_RV_SUB8: { + u8 cur = P_bytes[0]; + P_bytes[0] = (u8)(cur - (u8)((S + (u64)A) & 0xffu)); + return; + } + case R_RV_ADD16: { + u16 cur = rd_u16_le(P_bytes); + wr_u16_le(P_bytes, (u16)(cur + (u16)((S + (u64)A) & 0xffffu))); + return; + } + case R_RV_SUB16: { + u16 cur = rd_u16_le(P_bytes); + wr_u16_le(P_bytes, (u16)(cur - (u16)((S + (u64)A) & 0xffffu))); + return; + } + case R_RV_ADD32: { + u32 cur = rd_u32_le(P_bytes); + wr_u32_le(P_bytes, (u32)(cur + (u32)((S + (u64)A) & 0xffffffffu))); + return; + } + case R_RV_SUB32: { + u32 cur = rd_u32_le(P_bytes); + wr_u32_le(P_bytes, (u32)(cur - (u32)((S + (u64)A) & 0xffffffffu))); + return; + } + case R_RV_ADD64: { + u64 cur = rd_u64_le(P_bytes); + wr_u64_le(P_bytes, cur + S + (u64)A); + return; + } + case R_RV_SUB64: { + u64 cur = rd_u64_le(P_bytes); + wr_u64_le(P_bytes, cur - S - (u64)A); + return; + } + case R_RV_SUB6: { + /* Bottom 6 bits of byte = (byte - (S + A)) & 0x3f. */ + u8 cur = P_bytes[0]; + u8 v = (u8)((cur & 0x3fu) - (u8)((S + (u64)A) & 0x3fu)); + P_bytes[0] = (u8)((cur & 0xc0u) | (v & 0x3fu)); + return; + } + case R_RV_SET6: { + u8 cur = P_bytes[0]; + P_bytes[0] = (u8)((cur & 0xc0u) | (u8)((S + (u64)A) & 0x3fu)); + return; + } + case R_RV_SET8: + P_bytes[0] = (u8)((S + (u64)A) & 0xffu); + return; + case R_RV_SET16: + wr_u16_le(P_bytes, (u16)((S + (u64)A) & 0xffffu)); + return; + case R_RV_SET32: + wr_u32_le(P_bytes, (u32)((S + (u64)A) & 0xffffffffu)); + return; + default: + compiler_panic(c, no_loc(), + "link: unsupported reloc kind %u", + (unsigned)k); + } +} diff --git a/src/obj/reloc_apply.h b/src/obj/reloc_apply.h @@ -0,0 +1,9 @@ +#ifndef CFREE_OBJ_RELOC_APPLY_H +#define CFREE_OBJ_RELOC_APPLY_H + +#include "core/core.h" +#include "obj/obj.h" + +void link_reloc_apply(Compiler*, RelocKind, u8* P_bytes, u64 S, i64 A, u64 P); + +#endif diff --git a/src/os/linux/linux.c b/src/os/linux/linux.c @@ -0,0 +1,848 @@ +#include <string.h> + +#include "core/slice.h" +#include "emu/emu.h" +#include "obj/format.h" + +#define LINUX_SYS_openat 56u +#define LINUX_SYS_close 57u +#define LINUX_SYS_lseek 62u +#define LINUX_SYS_read 63u +#define LINUX_SYS_write 64u +#define LINUX_SYS_readv 65u +#define LINUX_SYS_writev 66u +#define LINUX_SYS_fstat 80u +#define LINUX_SYS_exit 93u +#define LINUX_SYS_exit_group 94u +#define LINUX_SYS_set_tid_address 96u +#define LINUX_SYS_clock_gettime 113u +#define LINUX_SYS_sched_yield 124u +#define LINUX_SYS_rt_sigaction 134u +#define LINUX_SYS_rt_sigprocmask 135u +#define LINUX_SYS_rt_sigreturn 139u +#define LINUX_SYS_getpid 172u +#define LINUX_SYS_getuid 174u +#define LINUX_SYS_geteuid 175u +#define LINUX_SYS_getgid 176u +#define LINUX_SYS_getegid 177u +#define LINUX_SYS_brk 214u +#define LINUX_SYS_mmap 222u +#define LINUX_SYS_mprotect 226u +#define LINUX_SYS_munmap 215u + +#define LINUX_PROT_READ 1u +#define LINUX_PROT_WRITE 2u +#define LINUX_PROT_EXEC 4u + +#define LINUX_MAP_PRIVATE 0x02u +#define LINUX_MAP_FIXED 0x10u +#define LINUX_MAP_ANONYMOUS 0x20u +#define LINUX_MAP_FIXED_NOREPLACE 0x100000u + +#define LINUX_EBADF 9 +#define LINUX_EFAULT 14 +#define LINUX_EINVAL 22 +#define LINUX_ENOSYS 38 +#define LINUX_ENOMEM 12 +#define LINUX_EEXIST 17 + +#define EMU_LINUX_SIGFRAME_MAGIC 0x53494746524d3031ull +#define EMU_LINUX_SIGFRAME_SIZE 512u +#define EMU_LINUX_SIGFRAME_SAVED_PC 8u +#define EMU_LINUX_SIGFRAME_SIGINFO 32u +#define EMU_LINUX_SIGFRAME_UCONTEXT 64u +#define EMU_LINUX_SIGFRAME_XREGS 128u +#define EMU_LINUX_STACK_SIZE (1u * 1024u * 1024u) +#define EMU_LINUX_BRK_RESERVE (2u * 1024u * 1024u) +#define EMU_LINUX_INITIAL_MMAP_HINT 0x4000000000ull + +typedef struct LinuxSignalAction { + u64 handler; + u64 flags; + u64 restorer; + int installed; +} LinuxSignalAction; + +typedef struct LinuxProcessState { + u64 mmap_hint; + LinuxSignalAction signal_actions[64]; +} LinuxProcessState; + +typedef struct LinuxThreadState { + u64 signal_mask; + u64 signal_frame_sp; + EmuTlsBlocks tls_blocks; +} LinuxThreadState; + +static u64 linux_round_up(u64 v, u64 a) { return (v + a - 1u) & ~(a - 1u); } + +static LinuxProcessState* linux_process_state(EmuProcess* process) { + return process ? (LinuxProcessState*)process->os_private : NULL; +} + +static LinuxThreadState* linux_thread_state(EmuThread* thread) { + return thread ? (LinuxThreadState*)thread->os_private : NULL; +} + +static u64 linux_rd64(const u8* p) { + return (u64)p[0] | ((u64)p[1] << 8) | ((u64)p[2] << 16) | + ((u64)p[3] << 24) | ((u64)p[4] << 32) | ((u64)p[5] << 40) | + ((u64)p[6] << 48) | ((u64)p[7] << 56); +} + +static void linux_wr64(u8* p, u64 v) { + u32 i; + for (i = 0; i < 8u; ++i) p[i] = (u8)(v >> (8u * i)); +} + +static void linux_free_stack_lists(Heap* heap, u64* argv_addrs, int argc, + u64* envp_addrs, int envc) { + if (argv_addrs) heap->free(heap, argv_addrs, sizeof(u64) * (size_t)argc); + if (envp_addrs) heap->free(heap, envp_addrs, sizeof(u64) * (size_t)envc); +} + +static u8 linux_emu_perms(u64 prot) { + u8 perms = 0; + if (prot & LINUX_PROT_READ) perms |= EMU_MEM_READ; + if (prot & LINUX_PROT_WRITE) perms |= EMU_MEM_WRITE; + if (prot & LINUX_PROT_EXEC) perms |= EMU_MEM_EXEC; + return perms; +} + +static CfreeStatus linux_find_map_region(EmuProcess* process, u64 nbytes, + u64 align, u32 purpose, u64* out) { + EmuAddrSpace* as; + LinuxProcessState* ps; + u64 max_va = 0x0000800000000000ull; + u64 min_va; + if (!process || !out) return CFREE_INVALID; + ps = linux_process_state(process); + if (!ps) return CFREE_INVALID; + as = &process->image.addr_space; + min_va = ps->mmap_hint; + if (purpose == EMU_OS_MAP_TLS && min_va < 0x7000000000ull) + min_va = 0x7000000000ull; + return emu_addr_space_find_gap(as, nbytes, align ? align : as->page_size, + min_va, max_va, out); +} + +static void linux_note_map_region(EmuProcess* process, u64 base, u64 nbytes, + u32 purpose) { + LinuxProcessState* ps; + u64 page_size; + (void)purpose; + if (!process || !nbytes) return; + ps = linux_process_state(process); + if (!ps) return; + page_size = process->image.addr_space.page_size + ? process->image.addr_space.page_size + : 0x1000u; + ps->mmap_hint = linux_round_up(base + nbytes, page_size); +} + +static CfreeStatus linux_init_process_private(Compiler* c, + EmuProcess* process) { + LinuxProcessState* ps; + CfreeStatus st; + if (!c || !process) return CFREE_INVALID; + st = emu_process_os_alloc(c, process, sizeof(LinuxProcessState), + _Alignof(LinuxProcessState)); + if (st != CFREE_OK) return st; + ps = linux_process_state(process); + ps->mmap_hint = EMU_LINUX_INITIAL_MMAP_HINT; + return CFREE_OK; +} + +static void linux_destroy_process_private(Compiler* c, EmuProcess* process) { + if (!c || !process) return; + emu_process_os_free(c, process, sizeof(LinuxProcessState)); +} + +static CfreeStatus linux_init_thread_private(Compiler* c, EmuProcess* process, + EmuThread* thread) { + (void)process; + if (!c || !thread) return CFREE_INVALID; + return emu_thread_os_alloc(c, thread, sizeof(LinuxThreadState), + _Alignof(LinuxThreadState)); +} + +static void linux_destroy_thread_private(Compiler* c, EmuThread* thread) { + LinuxThreadState* ts; + if (!c || !thread) return; + ts = linux_thread_state(thread); + if (ts) emu_tls_destroy_blocks(c, &ts->tls_blocks); + emu_thread_os_free(c, thread, sizeof(LinuxThreadState)); +} + +static CfreeStatus linux_init_process(Compiler* c, EmuProcess* process, + const EmuLoadOptions* opts, + const EmuLoadedImage* image) { + EmuLoadedImage* img; + EmuLoadedObject* main_obj; + Heap* heap; + u64 image_end; + u64 brk_start; + u64 stack_guard_base; + u64 stack_base; + u64 stack_top; + u64 cursor; + u64 at_random_va; + u64 table_bytes; + u64 sp; + u64* argv_addrs = NULL; + u64* envp_addrs = NULL; + int argc = 0; + int envc = 0; + const char* const* p; + u32 i; + u8* tp; + enum { + AT_NULL_ = 0, + AT_PHDR = 3, + AT_PHENT = 4, + AT_PHNUM = 5, + AT_PAGESZ = 6, + AT_ENTRY = 9, + AT_RANDOM = 25 + }; + struct { + u64 type; + u64 val; + } aux[7]; + u32 aux_count = sizeof(aux) / sizeof(aux[0]); + + (void)image; + if (!c || !process || !opts) return CFREE_INVALID; + if (!linux_process_state(process)) return CFREE_INVALID; + img = &process->image; + if (!img->link_map.nobjects || + img->link_map.main_object >= img->link_map.nobjects) + return CFREE_INVALID; + main_obj = &img->link_map.objects[img->link_map.main_object]; + heap = c->ctx->heap; + + image_end = linux_round_up(main_obj->map_end, img->addr_space.page_size); + brk_start = image_end; + stack_guard_base = brk_start + EMU_LINUX_BRK_RESERVE; + stack_base = stack_guard_base + img->addr_space.page_size; + stack_top = stack_base + EMU_LINUX_STACK_SIZE; + if (emu_addr_space_map(&img->addr_space, stack_guard_base, + img->addr_space.page_size, 0, EMU_MAP_GUARD) != + CFREE_OK || + emu_addr_space_map(&img->addr_space, stack_base, EMU_LINUX_STACK_SIZE, + EMU_MEM_READ | EMU_MEM_WRITE, EMU_MAP_ANON) != + CFREE_OK) + return CFREE_ERR; + + if (opts->argv) { + for (p = opts->argv; *p; ++p) ++argc; + } + if (opts->envp) { + for (p = opts->envp; *p; ++p) ++envc; + } + if (argc > 0) { + argv_addrs = (u64*)heap->alloc(heap, sizeof(u64) * (size_t)argc, 8u); + if (!argv_addrs) return CFREE_ERR; + } + if (envc > 0) { + envp_addrs = (u64*)heap->alloc(heap, sizeof(u64) * (size_t)envc, 8u); + if (!envp_addrs) { + linux_free_stack_lists(heap, argv_addrs, argc, envp_addrs, envc); + return CFREE_ERR; + } + } + + cursor = stack_top; + for (i = 0; i < (u32)argc; ++i) { + size_t slen = slice_from_cstr(opts->argv[i]).len + 1u; + cursor -= slen; + if (emu_addr_space_copy_in(&img->addr_space, cursor, opts->argv[i], + slen) != CFREE_OK) { + linux_free_stack_lists(heap, argv_addrs, argc, envp_addrs, envc); + return CFREE_INVALID; + } + argv_addrs[i] = cursor; + } + for (i = 0; i < (u32)envc; ++i) { + size_t slen = slice_from_cstr(opts->envp[i]).len + 1u; + cursor -= slen; + if (emu_addr_space_copy_in(&img->addr_space, cursor, opts->envp[i], + slen) != CFREE_OK) { + linux_free_stack_lists(heap, argv_addrs, argc, envp_addrs, envc); + return CFREE_INVALID; + } + envp_addrs[i] = cursor; + } + cursor -= 16u; + { + u8 random[16]; + for (i = 0; i < 16u; ++i) random[i] = (u8)(0xa5u ^ i); + if (emu_addr_space_copy_in(&img->addr_space, cursor, random, 16u) != + CFREE_OK) { + linux_free_stack_lists(heap, argv_addrs, argc, envp_addrs, envc); + return CFREE_INVALID; + } + } + at_random_va = cursor; + cursor &= ~(u64)0xfu; + + aux[0].type = AT_PHDR; + aux[0].val = img->process_info.headers_vaddr; + aux[1].type = AT_PHENT; + aux[1].val = img->process_info.header_entry_size; + aux[2].type = AT_PHNUM; + aux[2].val = img->process_info.header_count; + aux[3].type = AT_PAGESZ; + aux[3].val = img->addr_space.page_size; + aux[4].type = AT_ENTRY; + aux[4].val = img->entry_pc; + aux[5].type = AT_RANDOM; + aux[5].val = at_random_va; + aux[6].type = AT_NULL_; + aux[6].val = 0; + + table_bytes = 8u + (u64)(argc + 1) * 8u + (u64)(envc + 1) * 8u + + (u64)aux_count * 16u; + sp = (cursor - table_bytes) & ~(u64)0xfu; + tp = emu_addr_space_ptr(&img->addr_space, sp, table_bytes, EMU_MEM_WRITE); + if (!tp) { + linux_free_stack_lists(heap, argv_addrs, argc, envp_addrs, envc); + return CFREE_INVALID; + } + linux_wr64(tp, (u64)argc); + tp += 8u; + for (i = 0; i < (u32)argc; ++i) { + linux_wr64(tp, argv_addrs[i]); + tp += 8u; + } + linux_wr64(tp, 0); + tp += 8u; + for (i = 0; i < (u32)envc; ++i) { + linux_wr64(tp, envp_addrs[i]); + tp += 8u; + } + linux_wr64(tp, 0); + tp += 8u; + for (i = 0; i < aux_count; ++i) { + linux_wr64(tp, aux[i].type); + tp += 8u; + linux_wr64(tp, aux[i].val); + tp += 8u; + } + linux_free_stack_lists(heap, argv_addrs, argc, envp_addrs, envc); + + img->addr_space.brk_base = brk_start; + img->addr_space.brk_cur = brk_start; + img->addr_space.brk_max = brk_start + EMU_LINUX_BRK_RESERVE; + linux_note_map_region(process, stack_base, EMU_LINUX_STACK_SIZE, + EMU_OS_MAP_MMAP); + img->initial_sp = sp; + if (emu_dl_init_process(c, process) != CFREE_OK) return CFREE_ERR; + if (!process->obj_format || !process->obj_format->emu || + emu_dl_load_dependencies_and_relocate(c, process, opts, + process->obj_format->emu) != + CFREE_OK) + return CFREE_ERR; + return CFREE_OK; +} + +static CfreeStatus linux_init_thread(Compiler* c, EmuProcess* process, + EmuThread* thread) { + LinuxThreadState* ts; + u32 i; + if (!c || !process || !thread || !thread->cpu) return CFREE_INVALID; + ts = linux_thread_state(thread); + if (!ts) return CFREE_INVALID; + if (!process->arch || !process->arch->emu || !process->arch->emu->set_tp) + return CFREE_UNSUPPORTED; + for (i = 0; i < process->tls_state.nmodules; ++i) { + EmuTlsModule* m = &process->tls_state.modules[i]; + u64 page_size = process->image.addr_space.page_size + ? process->image.addr_space.page_size + : 0x1000u; + u64 nbytes = linux_round_up(m->memsz ? m->memsz : m->filesz, page_size); + u64 base = 0; + if (!nbytes) continue; + if (!process->os || !process->os->emu_find_map_region || + process->os->emu_find_map_region(process, nbytes, page_size, + EMU_OS_MAP_TLS, &base) != CFREE_OK) + return CFREE_ERR; + if (emu_addr_space_map(&process->image.addr_space, base, nbytes, + EMU_MEM_READ | EMU_MEM_WRITE, EMU_MAP_ANON) != + CFREE_OK) + return CFREE_ERR; + if (process->os->emu_note_map_region) + process->os->emu_note_map_region(process, base, nbytes, EMU_OS_MAP_TLS); + if (emu_tls_copy_module_image(process, m, base) != CFREE_OK) + return CFREE_ERR; + if (emu_tls_blocks_add(c, &ts->tls_blocks, m->module_id, base, + m->memsz) != CFREE_OK) + return CFREE_NOMEM; + if (m->module_id == 1u) process->arch->emu->set_tp(thread, base); + } + return CFREE_OK; +} + +static CfreeStatus linux_deliver_signal(EmuProcess* process, EmuThread* thread, + int signo, u64 fault_addr, + u64 fault_pc, u64 next_pc, + u64* next_pc_out) { + EmuCPUState* cpu = emu_thread_cpu(thread); + LinuxProcessState* ps; + LinuxThreadState* ts; + LinuxSignalAction* act; + u64 sp; + u64 frame_sp; + u64 frame_size; + u64 ctx_size; + u64 stack_align; + u8* frame; + if (!process || !thread || !cpu || !next_pc_out) return CFREE_INVALID; + ps = linux_process_state(process); + ts = linux_thread_state(thread); + if (!ps || !ts) return CFREE_INVALID; + if (signo <= 0 || signo >= 64) { + emu_cpu_trap_fault(cpu); + *next_pc_out = fault_pc ? fault_pc : next_pc; + return CFREE_OK; + } + act = &ps->signal_actions[signo]; + if (ts->signal_mask & (1ull << (u32)signo)) { + emu_cpu_trap_fault(cpu); + *next_pc_out = fault_pc ? fault_pc : next_pc; + return CFREE_OK; + } + if (!act->installed || !act->handler) { + emu_cpu_trap_fault(cpu); + *next_pc_out = fault_pc ? fault_pc : next_pc; + return CFREE_OK; + } + if (!process->arch || !process->arch->emu || !process->arch->emu->get_sp || + !process->arch->emu->set_sp || + !process->arch->emu->signal_context_size || + !process->arch->emu->save_signal_context || + !process->arch->emu->set_signal_handler_args || + !process->arch->emu->signal_stack_align) + return CFREE_UNSUPPORTED; + sp = process->arch->emu->get_sp(thread); + ctx_size = process->arch->emu->signal_context_size(process, thread); + stack_align = process->arch->emu->signal_stack_align(process, thread); + if (!ctx_size || !stack_align || (stack_align & (stack_align - 1u)) != 0) + return CFREE_UNSUPPORTED; + frame_size = EMU_LINUX_SIGFRAME_XREGS + ctx_size; + if (frame_size < EMU_LINUX_SIGFRAME_SIZE) + frame_size = EMU_LINUX_SIGFRAME_SIZE; + frame_size = linux_round_up(frame_size, stack_align); + frame_sp = (sp - frame_size) & ~(stack_align - 1u); + frame = emu_cpu_va_to_host_perm(cpu, frame_sp, frame_size, EMU_MEM_WRITE); + if (!frame) { + emu_cpu_trap_fault(cpu); + *next_pc_out = fault_pc ? fault_pc : next_pc; + return CFREE_OK; + } + memset(frame, 0, (size_t)frame_size); + linux_wr64(frame, EMU_LINUX_SIGFRAME_MAGIC); + linux_wr64(frame + EMU_LINUX_SIGFRAME_SAVED_PC, fault_pc); + linux_wr64(frame + EMU_LINUX_SIGFRAME_SIGINFO, (u64)signo); + linux_wr64(frame + EMU_LINUX_SIGFRAME_SIGINFO + 16u, fault_addr); + if (process->arch->emu->save_signal_context( + process, thread, frame + EMU_LINUX_SIGFRAME_XREGS, ctx_size) != + CFREE_OK) + return CFREE_ERR; + ts->signal_frame_sp = frame_sp; + process->arch->emu->set_sp(thread, frame_sp); + if (process->arch->emu->set_signal_handler_args( + process, thread, signo, frame_sp + EMU_LINUX_SIGFRAME_SIGINFO, + frame_sp + EMU_LINUX_SIGFRAME_UCONTEXT) != CFREE_OK) + return CFREE_ERR; + emu_cpu_clear_trap(cpu); + emu_cpu_set_pc(cpu, act->handler); + *next_pc_out = act->handler; + return CFREE_OK; +} + +static CfreeStatus linux_deliver_fault(EmuProcess* process, EmuThread* thread, + const EmuFaultEvent* ev, + u64* next_pc_out) { + if (!ev) return CFREE_INVALID; + return linux_deliver_signal(process, thread, 11, ev->addr, ev->pc, + ev->next_pc, next_pc_out); +} + +static CfreeStatus linux_decode_syscall(EmuProcess* process, EmuThread* thread, + EmuSyscallRequest* out) { + u32 i; + if (!process || !thread || !thread->cpu || !out) + return CFREE_INVALID; + if (!process->arch || !process->arch->emu || + !process->arch->emu->get_syscall_no || + !process->arch->emu->get_syscall_arg) + return CFREE_UNSUPPORTED; + memset(out, 0, sizeof(*out)); + out->number = process->arch->emu->get_syscall_no(thread); + for (i = 0; i < 6u; ++i) + out->args[i] = process->arch->emu->get_syscall_arg(thread, i); + return CFREE_OK; +} + +static CfreeStatus linux_encode_syscall_result(EmuProcess* process, + EmuThread* thread, + const EmuSyscallResult* r) { + (void)process; + if (!process || !thread || !thread->cpu || !r) + return CFREE_INVALID; + if (!process->arch || !process->arch->emu || + !process->arch->emu->set_syscall_result) + return CFREE_UNSUPPORTED; + process->arch->emu->set_syscall_result(thread, (u64)r->result); + return CFREE_OK; +} + +static CfreeStatus linux_default_syscall(void* user, EmuProcess* process, + EmuThread* thread, + const EmuSyscallRequest* req, + EmuSyscallResult* out) { + EmuCPUState* s; + u64 nr; + u64 a0, a1, a2; + u64 a3, a4, a5; + i64 ret = -LINUX_ENOSYS; + (void)user; + if (!process || !thread || !thread->cpu || !req || !out) return CFREE_INVALID; + s = thread->cpu; + nr = req->number; + a0 = req->args[0]; + a1 = req->args[1]; + a2 = req->args[2]; + a3 = req->args[3]; + a4 = req->args[4]; + a5 = req->args[5]; + memset(out, 0, sizeof(*out)); + + switch (nr) { + case LINUX_SYS_exit: + case LINUX_SYS_exit_group: + emu_cpu_trap_exit(s, (int)(i32)a0); + return CFREE_OK; + case LINUX_SYS_write: { + u8* p = emu_cpu_va_to_host_perm(s, a1, a2, EMU_MEM_READ); + ret = p ? (i64)a2 : -LINUX_EFAULT; + break; + } + case LINUX_SYS_read: + ret = a0 == 0u ? 0 : -LINUX_EBADF; + break; + case LINUX_SYS_close: + ret = 0; + break; + case LINUX_SYS_brk: { + u64 actual = 0; + (void)emu_addr_space_set_brk(&process->image.addr_space, a0, &actual); + ret = (i64)actual; + break; + } + case LINUX_SYS_mmap: { + EmuAddrSpace* as = &process->image.addr_space; + LinuxProcessState* ps = linux_process_state(process); + u64 addr = a0; + u64 length = linux_round_up(a1, as->page_size); + u64 flags = a3; + u64 fd = a4; + u64 off = a5; + u64 map_at = 0; + CfreeStatus st; + if (!ps) { + ret = -LINUX_EINVAL; + break; + } + if (!length || (off & (as->page_size - 1u)) != 0) { + ret = -LINUX_EINVAL; + break; + } + if (!(flags & LINUX_MAP_ANONYMOUS)) { + ret = -LINUX_ENOSYS; + break; + } + if (!(flags & LINUX_MAP_PRIVATE)) { + ret = -LINUX_EINVAL; + break; + } + if ((flags & LINUX_MAP_ANONYMOUS) && (i64)fd != -1 && fd != 0) { + ret = -LINUX_EBADF; + break; + } + if (flags & (LINUX_MAP_FIXED | LINUX_MAP_FIXED_NOREPLACE)) { + if ((addr & (as->page_size - 1u)) != 0) { + ret = -LINUX_EINVAL; + break; + } + map_at = addr; + if (flags & LINUX_MAP_FIXED_NOREPLACE) { + st = emu_addr_space_map(as, map_at, length, linux_emu_perms(a2), + linux_emu_perms(a2) ? EMU_MAP_ANON + : EMU_MAP_GUARD); + if (st == CFREE_OK) { + if (process->os && process->os->emu_note_map_region) + process->os->emu_note_map_region(process, map_at, length, + EMU_OS_MAP_MMAP); + ret = (i64)map_at; + } else { + ret = -LINUX_EEXIST; + } + break; + } + (void)emu_addr_space_unmap(as, map_at, length); + } else { + u64 min_va = addr ? linux_round_up(addr, as->page_size) + : ps->mmap_hint; + if (addr) { + st = emu_addr_space_find_gap(as, length, as->page_size, min_va, + 0x0000800000000000ull, &map_at); + } else if (process->os && process->os->emu_find_map_region) { + st = process->os->emu_find_map_region(process, length, + as->page_size, + EMU_OS_MAP_MMAP, &map_at); + } else { + st = CFREE_UNSUPPORTED; + } + if (st != CFREE_OK) { + ret = -LINUX_ENOMEM; + break; + } + } + st = emu_addr_space_map(as, map_at, length, linux_emu_perms(a2), + linux_emu_perms(a2) ? EMU_MAP_ANON + : EMU_MAP_GUARD); + if (st != CFREE_OK) { + ret = -LINUX_ENOMEM; + } else { + if (process->os && process->os->emu_note_map_region) + process->os->emu_note_map_region(process, map_at, length, + EMU_OS_MAP_MMAP); + ret = (i64)map_at; + } + break; + } + case LINUX_SYS_munmap: { + EmuAddrSpace* as = &process->image.addr_space; + u64 addr = a0; + u64 length = linux_round_up(a1, as->page_size); + if (!length || (addr & (as->page_size - 1u)) != 0) { + ret = -LINUX_EINVAL; + } else { + (void)emu_addr_space_unmap(as, addr, length); + ret = 0; + } + break; + } + case LINUX_SYS_mprotect: { + EmuAddrSpace* as = &process->image.addr_space; + u64 addr = a0; + u64 length = linux_round_up(a1, as->page_size); + if (!length || (addr & (as->page_size - 1u)) != 0) { + ret = -LINUX_EINVAL; + } else if (emu_addr_space_protect(as, addr, length, + linux_emu_perms(a2)) == CFREE_OK) { + ret = 0; + } else { + ret = -LINUX_ENOMEM; + } + break; + } + case LINUX_SYS_fstat: { + u8* p = emu_cpu_va_to_host_perm(s, a1, 128u, EMU_MEM_WRITE); + if (!p) { + ret = -LINUX_EFAULT; + } else { + memset(p, 0, 128u); + ret = 0; + } + break; + } + case LINUX_SYS_openat: + ret = -2; + break; + case LINUX_SYS_lseek: + ret = (i64)a1; + break; + case LINUX_SYS_readv: { + u8* p = emu_cpu_va_to_host_perm(s, a1, a2 * 16u, EMU_MEM_READ); + ret = p ? 0 : -LINUX_EFAULT; + break; + } + case LINUX_SYS_writev: { + u8* p = emu_cpu_va_to_host_perm(s, a1, a2 * 16u, EMU_MEM_READ); + u64 total = 0; + u64 i; + if (!p) { + ret = -LINUX_EFAULT; + break; + } + for (i = 0; i < a2; ++i) { + u64 l = 0; + u32 j; + for (j = 0; j < 8u; ++j) l |= ((u64)p[i * 16u + 8u + j]) << (8u * j); + total += l; + } + ret = (i64)total; + break; + } + case LINUX_SYS_set_tid_address: + ret = 1; + break; + case LINUX_SYS_clock_gettime: { + u8* p = emu_cpu_va_to_host_perm(s, a1, 16u, EMU_MEM_WRITE); + if (!p) { + ret = -LINUX_EFAULT; + } else { + memset(p, 0, 16u); + ret = 0; + } + break; + } + case LINUX_SYS_sched_yield: + ret = 0; + break; + case LINUX_SYS_rt_sigaction: + if (a0 < 64u && a1) { + LinuxProcessState* ps = linux_process_state(process); + u8* p = emu_cpu_va_to_host_perm(s, a1, 24u, EMU_MEM_READ); + if (!ps) { + ret = -LINUX_EINVAL; + } else if (!p) { + ret = -LINUX_EFAULT; + } else { + u64 handler = 0, flags = 0, restorer = 0; + u32 j; + for (j = 0; j < 8u; ++j) handler |= ((u64)p[j]) << (8u * j); + for (j = 0; j < 8u; ++j) flags |= ((u64)p[8u + j]) << (8u * j); + for (j = 0; j < 8u; ++j) + restorer |= ((u64)p[16u + j]) << (8u * j); + ps->signal_actions[a0].handler = handler; + ps->signal_actions[a0].flags = flags; + ps->signal_actions[a0].restorer = restorer; + ps->signal_actions[a0].installed = 1; + ret = 0; + } + } else { + ret = 0; + } + break; + case LINUX_SYS_rt_sigprocmask: + if (a0 == 0u && a1) { + LinuxThreadState* ts = linux_thread_state(thread); + u8* p = emu_cpu_va_to_host_perm(s, a1, 8u, EMU_MEM_READ); + if (!ts) { + ret = -LINUX_EINVAL; + } else if (!p) { + ret = -LINUX_EFAULT; + } else { + ts->signal_mask |= linux_rd64(p); + ret = 0; + } + } else if (a0 == 1u && a1) { + LinuxThreadState* ts = linux_thread_state(thread); + u8* p = emu_cpu_va_to_host_perm(s, a1, 8u, EMU_MEM_READ); + if (!ts) { + ret = -LINUX_EINVAL; + } else if (!p) { + ret = -LINUX_EFAULT; + } else { + ts->signal_mask &= ~linux_rd64(p); + ret = 0; + } + } else if (a0 == 2u && a1) { + LinuxThreadState* ts = linux_thread_state(thread); + u8* p = emu_cpu_va_to_host_perm(s, a1, 8u, EMU_MEM_READ); + if (!ts) { + ret = -LINUX_EINVAL; + } else if (!p) { + ret = -LINUX_EFAULT; + } else { + ts->signal_mask = linux_rd64(p); + ret = 0; + } + } else { + ret = 0; + } + break; + case LINUX_SYS_rt_sigreturn: { + LinuxThreadState* ts = linux_thread_state(thread); + u64 frame_sp; + u8* frame; + u64 ctx_size; + u64 frame_size; + if (!process->arch || !process->arch->emu || + !process->arch->emu->get_sp || + !process->arch->emu->signal_context_size || + !process->arch->emu->restore_signal_context) { + ret = -LINUX_ENOSYS; + break; + } + if (!ts) { + ret = -LINUX_EINVAL; + break; + } + ctx_size = process->arch->emu->signal_context_size(process, thread); + frame_size = EMU_LINUX_SIGFRAME_XREGS + ctx_size; + frame_sp = ts->signal_frame_sp ? ts->signal_frame_sp + : process->arch->emu->get_sp(thread); + frame = emu_cpu_va_to_host_perm(s, frame_sp, frame_size, EMU_MEM_READ); + if (!frame || linux_rd64(frame) != EMU_LINUX_SIGFRAME_MAGIC) { + ret = -LINUX_EFAULT; + break; + } + if (process->arch->emu->restore_signal_context( + process, thread, frame + EMU_LINUX_SIGFRAME_XREGS, ctx_size) != + CFREE_OK) { + ret = -LINUX_EFAULT; + break; + } + emu_cpu_set_pc(s, linux_rd64(frame + EMU_LINUX_SIGFRAME_SAVED_PC)); + ts->signal_frame_sp = 0; + out->flags |= EMU_SYSCALL_RESULT_SKIP_ENCODE; + ret = 0; + break; + } + case LINUX_SYS_getpid: + case LINUX_SYS_getuid: + case LINUX_SYS_geteuid: + case LINUX_SYS_getgid: + case LINUX_SYS_getegid: + ret = 1; + break; + default: + ret = -LINUX_ENOSYS; + break; + } + + out->result = ret; + return CFREE_OK; +} + +static u64 linux_syscall_next_pc(EmuProcess* process, EmuThread* thread, + const EmuSyscallRequest* req, u64 next_pc) { + EmuCPUState* s = emu_thread_cpu(thread); + (void)process; + if (req && req->number == LINUX_SYS_rt_sigreturn && s) + return emu_cpu_pc(s); + return next_pc; +} + +const CfreeOsImpl linux_os_impl = { + .kind = CFREE_OS_LINUX, + .name = "linux", + .emu_init_process_private = linux_init_process_private, + .emu_destroy_process_private = linux_destroy_process_private, + .emu_init_thread_private = linux_init_thread_private, + .emu_destroy_thread_private = linux_destroy_thread_private, + .emu_init_process = linux_init_process, + .emu_init_thread = linux_init_thread, + .emu_decode_syscall = linux_decode_syscall, + .emu_encode_syscall_result = linux_encode_syscall_result, + .emu_syscall_next_pc = linux_syscall_next_pc, + .emu_find_map_region = linux_find_map_region, + .emu_note_map_region = linux_note_map_region, + .emu_default_syscall = linux_default_syscall, + .emu_deliver_fault = linux_deliver_fault, +}; diff --git a/src/os/registry.c b/src/os/registry.c @@ -0,0 +1,12 @@ +#include "emu/emu.h" + +extern const CfreeOsImpl linux_os_impl; + +const CfreeOsImpl* os_lookup(CfreeOSKind kind) { + switch (kind) { + case CFREE_OS_LINUX: + return &linux_os_impl; + default: + return NULL; + } +} diff --git a/test/arch/rv64_decode_test.c b/test/arch/rv64_decode_test.c @@ -0,0 +1,177 @@ +/* RV64 structured decode test. + * + * Pins the ArchDecodeOps path used by the emu lifter bring-up: bytes decode + * into CfreeDecodedInsn records, and the formatter renders those same + * records for disassembly without parsing text back into operands. */ + +#include <cfree/compile.h> +#include <cfree/core.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "arch/arch.h" +#include "arch/rv64/isa.h" + +static void* h_alloc(CfreeHeap* h, size_t n, size_t a) { + (void)h; + (void)a; + return n ? malloc(n) : NULL; +} +static void* h_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) { + (void)h; + (void)o; + (void)a; + return realloc(p, n); +} +static void h_free(CfreeHeap* h, void* p, size_t n) { + (void)h; + (void)n; + free(p); +} +static CfreeHeap g_heap = {h_alloc, h_realloc, h_free, NULL}; + +static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, + const char* fmt, va_list ap) { + (void)s; + (void)loc; + fprintf(stderr, "diag %d: ", (int)k); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); +} +static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; +static CfreeContext g_ctx; + +static int fails; + +#define EXPECT(cond, ...) \ + do { \ + if (!(cond)) { \ + ++fails; \ + fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fputc('\n', stderr); \ + } \ + } while (0) + +static CfreeCompiler* new_compiler(void) { + CfreeTarget t; + CfreeCompiler* c = NULL; + memset(&t, 0, sizeof(t)); + t.arch = CFREE_ARCH_RV64; + t.os = CFREE_OS_LINUX; + t.obj = CFREE_OBJ_ELF; + t.ptr_size = 8; + t.ptr_align = 8; + memset(&g_ctx, 0, sizeof(g_ctx)); + g_ctx.heap = &g_heap; + g_ctx.diag = &g_diag; + if (cfree_compiler_new(t, &g_ctx, &c) != CFREE_OK || !c) { + fprintf(stderr, "compiler_new failed\n"); + exit(2); + } + return c; +} + +static void put32(unsigned char* b, size_t off, unsigned v) { + b[off + 0] = (unsigned char)v; + b[off + 1] = (unsigned char)(v >> 8); + b[off + 2] = (unsigned char)(v >> 16); + b[off + 3] = (unsigned char)(v >> 24); +} + +static void decode_addi(CfreeCompiler* pub) { + Compiler* c = (Compiler*)pub; + unsigned char bytes[4]; + CfreeDecodedInsn insn; + CfreeStatus st; + + put32(bytes, 0, rv_addi(RV_A0, RV_ZERO, 42)); + memset(&insn, 0, sizeof(insn)); + st = arch_decode_one(c, bytes, sizeof(bytes), 0x1000, &insn); + EXPECT(st == CFREE_OK, "decode_one(addi) status %d", (int)st); + EXPECT(insn.pc == 0x1000, "pc = 0x%llx", + (unsigned long long)insn.pc); + EXPECT(insn.nbytes == 4, "nbytes = %u", (unsigned)insn.nbytes); + EXPECT(insn.opcode == RV64_DEC_ADDI, "opcode = %u, want ADDI", + (unsigned)insn.opcode); + EXPECT((insn.flags & CFREE_DECODE_TERMINATOR) == 0, + "addi should not be a terminator"); + EXPECT(insn.noperands == 3, "addi operand count = %u", + (unsigned)insn.noperands); + EXPECT(insn.operands[0].kind == CFREE_DECOP_REG && + insn.operands[0].reg == RV_A0, + "addi rd operand wrong"); + EXPECT(insn.operands[1].kind == CFREE_DECOP_REG && + insn.operands[1].reg == RV_ZERO, + "addi rs1 operand wrong"); + EXPECT(insn.operands[2].kind == CFREE_DECOP_IMM && + insn.operands[2].imm == 42, + "addi imm operand wrong"); +} + +static void decode_block_stops_at_ecall(CfreeCompiler* pub) { + Compiler* c = (Compiler*)pub; + unsigned char bytes[16]; + CfreeDecodedInsn insts[4]; + u32 n = 0; + CfreeStatus st; + + put32(bytes, 0, rv_addi(RV_A0, RV_ZERO, 42)); + put32(bytes, 4, rv_addi(RV_A7, RV_ZERO, 93)); + put32(bytes, 8, rv_ecall()); + put32(bytes, 12, rv_addi(RV_A0, RV_ZERO, 7)); + + memset(insts, 0, sizeof(insts)); + st = arch_decode_block(c, bytes, sizeof(bytes), 0x2000, insts, 4, &n); + EXPECT(st == CFREE_OK, "decode_block status %d", (int)st); + EXPECT(n == 3, "decode_block count = %u", (unsigned)n); + EXPECT(insts[2].nbytes == 4, "ecall nbytes = %u", + (unsigned)insts[2].nbytes); + EXPECT(insts[2].opcode == RV64_DEC_ECALL, "ecall opcode = %u", + (unsigned)insts[2].opcode); + EXPECT((insts[2].flags & CFREE_DECODE_TERMINATOR) != 0, + "ecall should terminate block"); + EXPECT((insts[2].flags & CFREE_DECODE_TRAP) != 0, + "ecall should be marked trap"); +} + +static void format_decoded_record(CfreeCompiler* pub) { + Compiler* c = (Compiler*)pub; + unsigned char bytes[4]; + CfreeDecodedInsn insn; + ArchInsnFormatter* fmt; + CfreeInsn text; + CfreeStatus st; + + put32(bytes, 0, rv_addi(RV_A0, RV_ZERO, 42)); + st = arch_decode_one(c, bytes, sizeof(bytes), 0x3000, &insn); + EXPECT(st == CFREE_OK, "decode_one for format status %d", (int)st); + fmt = arch_insn_formatter_new(c); + EXPECT(fmt != NULL, "formatter_new returned NULL"); + if (!fmt) return; + memset(&text, 0, sizeof(text)); + st = arch_format_insn(fmt, &insn, &text); + EXPECT(st == CFREE_OK, "format status %d", (int)st); + EXPECT(cfree_slice_eq_cstr(text.mnemonic, "li"), "mnemonic = %.*s", + CFREE_SLICE_ARG(text.mnemonic)); + EXPECT(text.operands.s && strstr(text.operands.s, "a0"), + "operands missing a0: %.*s", CFREE_SLICE_ARG(text.operands)); + EXPECT(text.operands.s && strstr(text.operands.s, "42"), + "operands missing 42: %.*s", CFREE_SLICE_ARG(text.operands)); + arch_insn_formatter_free(fmt); +} + +int main(void) { + CfreeCompiler* c = new_compiler(); + decode_addi(c); + decode_block_stops_at_ecall(c); + format_decoded_record(c); + cfree_compiler_free(c); + if (fails) { + fprintf(stderr, "rv64_decode_test: %d failure(s)\n", fails); + return 1; + } + return 0; +} diff --git a/test/emu/rv64_extras_test.c b/test/emu/rv64_extras_test.c @@ -1,614 +0,0 @@ -/* RV64 emulator extras smoke test. - * - * Pins behavior added in Wave 2 of the rv64 emulator parity push: - * - FCVT family (int <-> fp, fp <-> fp) - * - FSGNJ.{s,d} - * - FMIN/FMAX - * - FMADD.{s,d} - * - FCLASS - * - RVC (compressed) decode — c.li / c.add / c.mv expansions - * - CSR access against the fcsr / frm / fflags subset - * - New syscalls: clock_gettime, sched_yield, getuid family, - * set_tid_address, openat, lseek, writev, rt_sigaction - * - PT_INTERP detection (smoke only: we feed a fake interp ELF and - * check the loader's auxv contains an AT_BASE entry pointing at - * the interpreter's load base). - * - * The interpreter path is the one this test pins. The JIT lift path - * (src/emu/lift.c) is deferred — see that file's header comment. */ - -#include <cfree/compile.h> -#include <cfree/core.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "arch/rv64/isa.h" -#include "core/core.h" -#include "emu/emu.h" -#include "emu/rv64_ops.h" -#include "obj/elf/elf.h" - -/* Loader side-channel — declared in elf_load.c. */ -int emu_load_elf_attach(EmuCPUState*, const EmuLoadedImage*); -void emu_load_elf_set_interp_bytes(const unsigned char* bytes, size_t len); - -/* ============================================================ - * Test harness glue (mirrors rv64_smoke_test.c). - * ============================================================ */ - -static void* h_alloc(CfreeHeap* h, size_t n, size_t a) { - (void)h; - (void)a; - return n ? malloc(n) : NULL; -} -static void* h_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) { - (void)h; - (void)o; - (void)a; - return realloc(p, n); -} -static void h_free(CfreeHeap* h, void* p, size_t n) { - (void)h; - (void)n; - free(p); -} -static CfreeHeap g_heap = {h_alloc, h_realloc, h_free, NULL}; - -static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, - const char* fmt, va_list ap) { - (void)s; - (void)loc; - fprintf(stderr, "diag %d: ", (int)k); - vfprintf(stderr, fmt, ap); - fputc('\n', stderr); -} -static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; -static CfreeContext g_ctx; - -static int g_fail; -#define EXPECT(cond, ...) \ - do { \ - if (!(cond)) { \ - ++g_fail; \ - fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ - fprintf(stderr, __VA_ARGS__); \ - fputc('\n', stderr); \ - } \ - } while (0) - -static CfreeCompiler* new_compiler(void) { - CfreeTarget t; - CfreeCompiler* c = NULL; - memset(&t, 0, sizeof t); - t.arch = CFREE_ARCH_RV64; - t.os = CFREE_OS_LINUX; - t.obj = CFREE_OBJ_ELF; - t.ptr_size = 8; - t.ptr_align = 8; - memset(&g_ctx, 0, sizeof g_ctx); - g_ctx.heap = &g_heap; - g_ctx.diag = &g_diag; - if (cfree_compiler_new(t, &g_ctx, &c) != CFREE_OK || !c) { - fprintf(stderr, "compiler_new failed\n"); - exit(2); - } - return c; -} - -/* ============================================================ - * Decode-only assertions for new ops. - * ============================================================ */ - -/* FCVT.W.S — funct7=0x60 (fmt 0 = S, major 0x18), rs2=0 (W), rd, rs1. */ -static u32 enc_fcvt_w_s(u32 rd, u32 rs1) { - return (0x60u << 25) | (0u << 20) | (rs1 << 15) | (0u << 12) | (rd << 7) | - 0x53u; -} -/* FSGNJ.S — funct7=0x10 (fmt 0, major 0x04), funct3=0 */ -static u32 enc_fsgnj_s(u32 rd, u32 rs1, u32 rs2) { - return (0x10u << 25) | (rs2 << 20) | (rs1 << 15) | (0u << 12) | (rd << 7) | - 0x53u; -} -/* FMADD.S — opcode 0x43, funct7 fmt bit 0 = S, rs3 in bits 31..27 */ -static u32 enc_fmadd_s(u32 rd, u32 rs1, u32 rs2, u32 rs3) { - return (rs3 << 27) | (0u << 25) | (rs2 << 20) | (rs1 << 15) | (0u << 12) | - (rd << 7) | 0x43u; -} -/* CSRRS rd, csr, rs1 — funct3=2 */ -static u32 enc_csrrs(u32 rd, u32 csr, u32 rs1) { - return ((csr & 0xfffu) << 20) | (rs1 << 15) | (2u << 12) | (rd << 7) | 0x73u; -} -/* CSRRWI rd, csr, zimm5 — funct3=5, zimm5 in rs1 slot */ -static u32 enc_csrrwi(u32 rd, u32 csr, u32 zimm) { - return ((csr & 0xfffu) << 20) | ((zimm & 0x1fu) << 15) | (5u << 12) | - (rd << 7) | 0x73u; -} - -static void decode_extras(void) { - EmuInst insts[8]; - unsigned char buf[32]; - u32 n; - - /* FCVT.W.S a0, fa0 */ - ((u32*)buf)[0] = enc_fcvt_w_s(10, 10); - ((u32*)buf)[1] = rv_ecall(); - n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, buf, 0x10000, insts, 8); - EXPECT(n >= 2u && insts[0].op == RV64_OP_FCVT_W_S, "FCVT.W.S decode"); - - /* FSGNJ.S fa2, fa0, fa1 */ - ((u32*)buf)[0] = enc_fsgnj_s(12, 10, 11); - ((u32*)buf)[1] = rv_ecall(); - n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, buf, 0x10000, insts, 8); - EXPECT(n >= 2u && insts[0].op == RV64_OP_FSGNJ_S, "FSGNJ.S decode"); - - /* FMADD.S */ - ((u32*)buf)[0] = enc_fmadd_s(12, 10, 11, 13); - ((u32*)buf)[1] = rv_ecall(); - n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, buf, 0x10000, insts, 8); - EXPECT(n >= 2u && insts[0].op == RV64_OP_FMADD_S, "FMADD.S decode"); - EXPECT((u32)insts[0].operands[5] == 13u, "FMADD.S rs3 should be 13"); - - /* CSRRS a0, fcsr, x0 -- read fcsr into a0 */ - ((u32*)buf)[0] = enc_csrrs(10, 0x003, 0); - ((u32*)buf)[1] = rv_ecall(); - n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, buf, 0x10000, insts, 8); - EXPECT(n >= 2u && insts[0].op == RV64_OP_CSRRS, "CSRRS decode"); - EXPECT((u32)(i64)insts[0].operands[3] == 0x003u, - "CSRRS imm should be csr=0x003, got 0x%x", - (unsigned)(u64)insts[0].operands[3]); - - /* CSRRWI x0, frm, 0b011 (round mode = RDN) */ - ((u32*)buf)[0] = enc_csrrwi(0, 0x002, 3); - ((u32*)buf)[1] = rv_ecall(); - n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, buf, 0x10000, insts, 8); - EXPECT(n >= 2u && insts[0].op == RV64_OP_CSRRWI, "CSRRWI decode"); -} - -/* RVC: two compressed insns followed by ECALL. We pack a halfword - * stream by hand: C.LI a0, 5 (0x4115) followed by C.ADDI a0, 1 (0x0505) - * then ECALL (32-bit). - * - * C.LI rd, imm6: 010_imm5_rd_imm4..0_01 - * imm = 5, rd = a0 (10). Layout: - * [15:13]=010 (C.LI) - * [12] = imm[5] = 0 - * [11:7] = rd = 10 - * [6:2] = imm[4:0] = 5 - * [1:0] = 01 - * => 0100 0101 0001 0101 = 0x4515 - * - * C.ADDI rd, imm6: 000_imm5_rd_imm4..0_01 - * rd = a0 (10), imm = 1 - * [15:13]=000 - * [12]=0 - * [11:7]=10 - * [6:2]=1 - * [1:0]=01 - * => 0000 0101 0000 0101 = 0x0505 - */ -static void decode_rvc(void) { - EmuInst insts[8]; - unsigned char buf[16]; - u32 n; - buf[0] = 0x15; - buf[1] = 0x45; /* C.LI a0, 5 */ - buf[2] = 0x05; - buf[3] = 0x05; /* C.ADDI a0, 1 */ - ((u32*)(buf + 4))[0] = rv_ecall(); - n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, buf, 0x10000, insts, 8); - EXPECT(n >= 3u, "RVC decode block returned %u insts", n); - EXPECT(insts[0].op == RV64_OP_ADDI && (u32)insts[0].operands[0] == 10u && - (i64)insts[0].operands[3] == 5, - "RVC c.li -> addi a0, x0, 5 (got op=%u rd=%u imm=%lld)", - (unsigned)insts[0].op, (unsigned)insts[0].operands[0], - (long long)(i64)insts[0].operands[3]); - EXPECT(insts[0].guest_bytes == 2u, "RVC insn must advance PC by 2, got %u", - insts[0].guest_bytes); - EXPECT(insts[1].op == RV64_OP_ADDI && (u32)insts[1].operands[0] == 10u && - (i64)insts[1].operands[3] == 1, - "RVC c.addi -> addi a0, a0, 1"); - EXPECT(insts[2].op == RV64_OP_ECALL, "ECALL after RVC"); -} - -/* ============================================================ - * Interpreter executes FCVT + CSR via a hand-rolled ELF - * ============================================================ - * - * Program: load int 42 into a0, FCVT.S.W ft0, a0 (single-precision - * 42.0), FMV.X.W a1, ft0 (read bits), CSRRS a2, fcsr, x0, exit a1. - * - * We don't actually need to verify the exact float bits — just that - * the interpreter dispatched through each new op without trapping. - * The exit code is the float's bit pattern's low byte, which is - * deterministic (the float 42.0 has bits 0x42280000). - */ -static u32 enc_fcvt_s_w(u32 rd, u32 rs1) { - /* major=0x1a, fmt=0 (S), rs2=0 (W) -> funct7 = (0x1a<<2)|0 = 0x68 */ - return (0x68u << 25) | (0u << 20) | (rs1 << 15) | (0u << 12) | (rd << 7) | - 0x53u; -} -static u32 enc_fmv_x_w(u32 rd, u32 rs1) { - /* major=0x1c, fmt=0 (S), rs2=0, funct3=0, funct7=0x70 */ - return (0x70u << 25) | (0u << 20) | (rs1 << 15) | (0u << 12) | (rd << 7) | - 0x53u; -} - -static unsigned char* build_fp_elf(size_t* out_len) { - enum { PAGE = 0x1000u, BASE_VA = 0x10000ull, TEXT_OFF = 0x1000u }; - /* Instruction stream: 7 insns = 28 bytes. */ - u32 prog[16]; - size_t prog_n = 0; - prog[prog_n++] = rv_addi(10, 0, 42); /* a0 = 42 */ - prog[prog_n++] = enc_fcvt_s_w(0, 10); /* ft0 = (float)a0 */ - prog[prog_n++] = enc_fmv_x_w(11, 0); /* a1 = bits(ft0) */ - prog[prog_n++] = enc_csrrs(12, 0x003, 0); /* a2 = fcsr */ - prog[prog_n++] = rv_addi(10, 0, 0); /* a0 = 0 (exit code) */ - prog[prog_n++] = rv_addi(17, 0, 94); /* a7 = SYS_exit_group */ - prog[prog_n++] = rv_ecall(); /* ecall */ - - size_t prog_bytes = prog_n * 4u; - size_t total = TEXT_OFF + prog_bytes; - unsigned char* b = (unsigned char*)calloc(1, total); - if (!b) return NULL; - b[EI_MAG0] = ELFMAG0; - b[EI_MAG1] = ELFMAG1; - b[EI_MAG2] = ELFMAG2; - b[EI_MAG3] = ELFMAG3; - b[EI_CLASS] = ELFCLASS64; - b[EI_DATA] = ELFDATA2LSB; - b[EI_VERSION] = EV_CURRENT; - /* e_type=ET_EXEC, e_machine=EM_RISCV, e_entry, e_phoff, ... */ - unsigned* p32; - unsigned long long* p64; - /* Use the same put helpers idiom from smoke_test: open-code them. */ - b[16] = ET_EXEC; - b[17] = 0; - b[18] = (unsigned char)EM_RISCV; - b[19] = (unsigned char)(EM_RISCV >> 8); - b[20] = EV_CURRENT; - /* e_entry = BASE_VA + TEXT_OFF */ - unsigned long long ent = BASE_VA + TEXT_OFF; - for (int i = 0; i < 8; ++i) b[24 + i] = (unsigned char)(ent >> (8 * i)); - /* e_phoff = 64 */ - b[32] = 64; - for (int i = 1; i < 8; ++i) b[32 + i] = 0; - /* e_ehsize=64, e_phentsize=56, e_phnum=1 */ - b[52] = ELF64_EHDR_SIZE; - b[53] = 0; - b[54] = ELF64_PHDR_SIZE; - b[55] = 0; - b[56] = 1; - b[57] = 0; - - /* PT_LOAD covering [0, total) at VA BASE_VA. */ - b[64] = PT_LOAD; /* p_type lo */ - b[64 + 4] = (unsigned char)(PF_R | PF_X); - /* p_offset = 0; p_vaddr = BASE_VA; p_paddr = BASE_VA; p_filesz = total; - * p_memsz = total; p_align = PAGE. */ - for (int i = 0; i < 8; ++i) - b[64 + 16 + i] = (unsigned char)(BASE_VA >> (8 * i)); - for (int i = 0; i < 8; ++i) - b[64 + 24 + i] = (unsigned char)(BASE_VA >> (8 * i)); - unsigned long long tot = total; - for (int i = 0; i < 8; ++i) b[64 + 32 + i] = (unsigned char)(tot >> (8 * i)); - for (int i = 0; i < 8; ++i) b[64 + 40 + i] = (unsigned char)(tot >> (8 * i)); - b[64 + 48] = (unsigned char)PAGE; - b[64 + 49] = (unsigned char)(PAGE >> 8); - - /* Copy the program bytes at file offset TEXT_OFF. */ - memcpy(b + TEXT_OFF, prog, prog_bytes); - (void)p32; - (void)p64; - *out_len = total; - return b; -} - -static void fp_csr_interp(void) { - CfreeCompiler* c = new_compiler(); - Compiler* cc = (Compiler*)c; - unsigned char* elf; - size_t elf_len; - EmuLoadedImage img; - EmuCPUState* cpu; - EmuInst insts[16]; - u32 n; - u32 steps; - - elf = build_fp_elf(&elf_len); - EXPECT(elf != NULL, "ELF build"); - if (!elf) return; - - memset(&img, 0, sizeof img); - int rc = - emu_load_elf(cc, CFREE_EMU_ARCH_RISCV64, elf, elf_len, NULL, NULL, &img); - EXPECT(rc == 0, "emu_load_elf rc=%d", rc); - if (rc != 0) { - free(elf); - cfree_compiler_free(c); - return; - } - - cpu = emu_cpu_new(cc, CFREE_EMU_ARCH_RISCV64, img.entry_pc, img.initial_sp); - EXPECT(cpu != NULL, "cpu_new"); - rc = emu_load_elf_attach(cpu, &img); - EXPECT(rc == 0, "attach"); - - for (steps = 0; steps < 64u; ++steps) { - u64 pc = emu_cpu_pc(cpu); - unsigned char* p = emu_cpu_va_to_host_pub(cpu, pc, 4); - if (!p) { - EXPECT(0, "PC OOB"); - break; - } - n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, p, pc, insts, 16); - if (n == 0) { - EXPECT(0, "decode 0"); - break; - } - emu_cpu_interp_block(cpu, insts, n); - if (emu_cpu_trap_reason(cpu) != EMU_TRAP_NONE) break; - } - EXPECT(emu_cpu_trap_reason(cpu) == EMU_TRAP_EXIT, "trap_reason = EXIT"); - /* exit code was a0 = 0, which we set explicitly. */ - EXPECT(emu_cpu_exit_code(cpu) == 0, "exit_code 0"); - - /* Inspect a1 / a2 to confirm FCVT.S.W and CSRRS ran. */ - EXPECT(emu_cpu_xreg(cpu, 11) == 0x42280000ull, - "a1 should hold bits of (float)42 = 0x42280000, got 0x%llx", - (unsigned long long)emu_cpu_xreg(cpu, 11)); - EXPECT(emu_cpu_xreg(cpu, 12) == 0, "a2 fcsr starts at 0, got 0x%llx", - (unsigned long long)emu_cpu_xreg(cpu, 12)); - - emu_cpu_free(cpu); - emu_unload_image(cc, &img); - free(elf); - cfree_compiler_free(c); -} - -/* ============================================================ - * Syscall coverage: exercise the new stub syscalls. - * ============================================================ */ -static void syscalls_extras(void) { - CfreeCompiler* c = new_compiler(); - Compiler* cc = (Compiler*)c; - EmuCPUState* cpu = emu_cpu_new(cc, CFREE_EMU_ARCH_RISCV64, 0, 0); - EXPECT(cpu != NULL, "cpu"); - /* sched_yield => 0. a7 = 124, a0 unused. */ - emu_cpu_set_xreg(cpu, 17, 124u); - emu_syscall(cpu); - EXPECT((i64)emu_cpu_xreg(cpu, 10) == 0, "sched_yield returns 0"); - - /* getuid => 1. */ - emu_cpu_set_xreg(cpu, 17, 174u); - emu_syscall(cpu); - EXPECT((i64)emu_cpu_xreg(cpu, 10) == 1, "getuid returns 1"); - - /* set_tid_address => 1. */ - emu_cpu_set_xreg(cpu, 17, 96u); - emu_syscall(cpu); - EXPECT((i64)emu_cpu_xreg(cpu, 10) == 1, "set_tid_address returns 1"); - - /* openat => -ENOENT (-2). */ - emu_cpu_set_xreg(cpu, 17, 56u); - emu_syscall(cpu); - EXPECT((i64)emu_cpu_xreg(cpu, 10) == -2, "openat returns -ENOENT"); - - /* lseek => returns the offset arg (a1). */ - emu_cpu_set_xreg(cpu, 17, 62u); - emu_cpu_set_xreg(cpu, 11, 0x123u); - emu_syscall(cpu); - EXPECT(emu_cpu_xreg(cpu, 10) == 0x123ull, "lseek returns offset"); - - /* rt_sigaction => 0. */ - emu_cpu_set_xreg(cpu, 17, 134u); - emu_syscall(cpu); - EXPECT((i64)emu_cpu_xreg(cpu, 10) == 0, "rt_sigaction returns 0"); - - emu_cpu_free(cpu); - cfree_compiler_free(c); -} - -/* ============================================================ - * PT_INTERP loader handoff - * ============================================================ - * - * Builds a tiny "program ELF" that has both a PT_LOAD and a PT_INTERP - * pointing at the path "/lib/ld-musl-riscv64.so.1". The interpreter - * ELF is staged via emu_load_elf_set_interp_bytes; we use a minimal - * ET_DYN ELF whose only segment is the loader's tiny .text. The - * loader should pick the interpreter entry as the initial PC. */ - -static unsigned char* build_minimal_interp_elf(size_t* out_len) { - /* ET_DYN with one PT_LOAD covering [0, 0x1010) and a token instruction - * (a0=99, ecall) at entry 0x1000. */ - enum { PAGE = 0x1000u, TEXT_OFF = 0x1000u }; - size_t total = TEXT_OFF + 16; - unsigned char* b = (unsigned char*)calloc(1, total); - if (!b) return NULL; - b[EI_MAG0] = ELFMAG0; - b[EI_MAG1] = ELFMAG1; - b[EI_MAG2] = ELFMAG2; - b[EI_MAG3] = ELFMAG3; - b[EI_CLASS] = ELFCLASS64; - b[EI_DATA] = ELFDATA2LSB; - b[EI_VERSION] = EV_CURRENT; - b[16] = ET_DYN; - b[17] = 0; - b[18] = (unsigned char)EM_RISCV; - b[19] = (unsigned char)(EM_RISCV >> 8); - b[20] = EV_CURRENT; - /* e_entry = TEXT_OFF (relative for ET_DYN) */ - unsigned long long ent = TEXT_OFF; - for (int i = 0; i < 8; ++i) b[24 + i] = (unsigned char)(ent >> (8 * i)); - b[32] = 64; - b[52] = ELF64_EHDR_SIZE; - b[54] = ELF64_PHDR_SIZE; - b[56] = 1; - /* PT_LOAD at vaddr 0 covering [0, total). */ - b[64] = PT_LOAD; - b[64 + 4] = (unsigned char)(PF_R | PF_X); - /* p_vaddr = 0, p_paddr = 0, p_filesz/p_memsz = total. */ - unsigned long long tot = total; - for (int i = 0; i < 8; ++i) b[64 + 32 + i] = (unsigned char)(tot >> (8 * i)); - for (int i = 0; i < 8; ++i) b[64 + 40 + i] = (unsigned char)(tot >> (8 * i)); - b[64 + 48] = (unsigned char)PAGE; - b[64 + 49] = (unsigned char)(PAGE >> 8); - /* Body: addi a0,zero,99; addi a7,zero,94; ecall */ - u32 i0 = rv_addi(10, 0, 99); - u32 i1 = rv_addi(17, 0, 94); - u32 i2 = rv_ecall(); - memcpy(b + TEXT_OFF, &i0, 4); - memcpy(b + TEXT_OFF + 4, &i1, 4); - memcpy(b + TEXT_OFF + 8, &i2, 4); - *out_len = total; - return b; -} - -static unsigned char* build_program_with_interp(size_t* out_len) { - /* PT_LOAD then PT_INTERP. Program _start is just an exit(42), but it - * never runs — the interpreter does. */ - enum { PAGE = 0x1000u, BASE_VA = 0x40000ull, TEXT_OFF = 0x1000u }; - /* Layout: - * [0..63] ehdr - * [64..119] PT_LOAD - * [120..175] PT_INTERP - * [176..0xfff] zero pad - * [0x1000..] text - * Interp string is placed inside the PT_LOAD segment but past .text, - * at file offset 0x1100. */ - const char interp_path[] = "/lib/ld-musl-riscv64.so.1"; - size_t interp_off = 0x1100; - size_t total = interp_off + sizeof(interp_path) + 0x100; - unsigned char* b = (unsigned char*)calloc(1, total); - if (!b) return NULL; - b[EI_MAG0] = ELFMAG0; - b[EI_MAG1] = ELFMAG1; - b[EI_MAG2] = ELFMAG2; - b[EI_MAG3] = ELFMAG3; - b[EI_CLASS] = ELFCLASS64; - b[EI_DATA] = ELFDATA2LSB; - b[EI_VERSION] = EV_CURRENT; - b[16] = ET_EXEC; - b[17] = 0; - b[18] = (unsigned char)EM_RISCV; - b[19] = (unsigned char)(EM_RISCV >> 8); - b[20] = EV_CURRENT; - unsigned long long ent = BASE_VA + TEXT_OFF; - for (int i = 0; i < 8; ++i) b[24 + i] = (unsigned char)(ent >> (8 * i)); - b[32] = 64; - b[52] = ELF64_EHDR_SIZE; - b[54] = ELF64_PHDR_SIZE; - b[56] = 2; /* two program headers */ - /* PT_LOAD covering [0, total) at VA BASE_VA. */ - b[64] = PT_LOAD; - b[64 + 4] = (unsigned char)(PF_R | PF_X); - for (int i = 0; i < 8; ++i) - b[64 + 16 + i] = (unsigned char)(BASE_VA >> (8 * i)); - for (int i = 0; i < 8; ++i) - b[64 + 24 + i] = (unsigned char)(BASE_VA >> (8 * i)); - unsigned long long tot = total; - for (int i = 0; i < 8; ++i) b[64 + 32 + i] = (unsigned char)(tot >> (8 * i)); - for (int i = 0; i < 8; ++i) b[64 + 40 + i] = (unsigned char)(tot >> (8 * i)); - b[64 + 48] = (unsigned char)PAGE; - b[64 + 49] = (unsigned char)(PAGE >> 8); - /* PT_INTERP. p_offset = interp_off, p_filesz = strlen(path)+1. */ - size_t ph2 = 64 + 56; - b[ph2] = PT_INTERP; - unsigned long long ioff = interp_off; - for (int i = 0; i < 8; ++i) b[ph2 + 8 + i] = (unsigned char)(ioff >> (8 * i)); - unsigned long long ilen = sizeof(interp_path); - for (int i = 0; i < 8; ++i) - b[ph2 + 32 + i] = (unsigned char)(ilen >> (8 * i)); - for (int i = 0; i < 8; ++i) - b[ph2 + 40 + i] = (unsigned char)(ilen >> (8 * i)); - /* Program text: exit(42). */ - u32 i0 = rv_addi(10, 0, 42); - u32 i1 = rv_addi(17, 0, 94); - u32 i2 = rv_ecall(); - memcpy(b + TEXT_OFF, &i0, 4); - memcpy(b + TEXT_OFF + 4, &i1, 4); - memcpy(b + TEXT_OFF + 8, &i2, 4); - /* Interpreter path string. */ - memcpy(b + interp_off, interp_path, sizeof(interp_path)); - *out_len = total; - return b; -} - -static void pt_interp_handoff(void) { - CfreeCompiler* c = new_compiler(); - Compiler* cc = (Compiler*)c; - size_t interp_len = 0, prog_len = 0; - unsigned char* interp = build_minimal_interp_elf(&interp_len); - unsigned char* prog = build_program_with_interp(&prog_len); - EXPECT(interp && prog, "buffer alloc"); - if (!interp || !prog) { - free(interp); - free(prog); - cfree_compiler_free(c); - return; - } - - /* Stage the interpreter bytes; loader consumes them on the next - * emu_load_elf call. */ - emu_load_elf_set_interp_bytes(interp, interp_len); - - EmuLoadedImage img; - memset(&img, 0, sizeof img); - int rc = emu_load_elf(cc, CFREE_EMU_ARCH_RISCV64, prog, prog_len, NULL, NULL, - &img); - EXPECT(rc == 0, "emu_load_elf with PT_INTERP rc=%d", rc); - if (rc != 0) { - free(interp); - free(prog); - cfree_compiler_free(c); - return; - } - /* entry_pc should be the interpreter's entry (which we placed past - * the program). The program's BASE_VA is 0x40000, so the interpreter - * lands at >= 0x42000-ish. */ - EXPECT(img.entry_pc > 0x40000ull, - "entry_pc must come from the interpreter, got 0x%llx", - (unsigned long long)img.entry_pc); - - /* Run a few interpreter blocks to make sure the loader's PT_LOADs - * are actually addressable. The fake "interpreter" exits with 94. */ - EmuCPUState* cpu = - emu_cpu_new(cc, CFREE_EMU_ARCH_RISCV64, img.entry_pc, img.initial_sp); - rc = emu_load_elf_attach(cpu, &img); - EXPECT(rc == 0, "attach"); - for (u32 steps = 0; steps < 16u; ++steps) { - u64 pc = emu_cpu_pc(cpu); - unsigned char* p = emu_cpu_va_to_host_pub(cpu, pc, 4); - if (!p) break; - EmuInst insts[8]; - u32 n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, p, pc, insts, 8); - if (n == 0) break; - emu_cpu_interp_block(cpu, insts, n); - if (emu_cpu_trap_reason(cpu) != EMU_TRAP_NONE) break; - } - EXPECT(emu_cpu_trap_reason(cpu) == EMU_TRAP_EXIT, - "interp exited via EMU_TRAP_EXIT"); - EXPECT(emu_cpu_exit_code(cpu) == 99, - "interp exit code 99 (= a0 at exit), got %d", emu_cpu_exit_code(cpu)); - - emu_cpu_free(cpu); - emu_unload_image(cc, &img); - free(interp); - free(prog); - cfree_compiler_free(c); -} - -int main(void) { - decode_extras(); - decode_rvc(); - fp_csr_interp(); - syscalls_extras(); - pt_interp_handoff(); - if (g_fail) { - fprintf(stderr, "FAILED %d check(s)\n", g_fail); - return 1; - } - fprintf(stderr, "OK\n"); - return 0; -} diff --git a/test/emu/rv64_smoke_test.c b/test/emu/rv64_smoke_test.c @@ -7,36 +7,45 @@ * addi a7, zero, 94 # SYS_exit_group * ecall * - * Loads it via emu_load_elf, attaches it to a fresh EmuCPUState, then - * walks emu_decode_block + emu_cpu_interp_block until the CPU traps - * with EMU_TRAP_EXIT. Asserts the exit code is 42. + * Runs it through cfree_emu_run and asserts the lifted/JIT path exits + * with code 42. * * This exercises: * - the ELF64 loader (header + program-header validation, PT_LOAD * placement, argv/envp/auxv stack layout) - * - the RV64 decoder (ADDI, ECALL) - * - the interpreter dispatch loop + * - the shared RV64 ArchDecodeOps path (ADDI, ECALL) * - the syscall handler (SYS_exit_group) - * - * The lift/JIT path is deliberately *not* exercised — lift.c is still - * a stub. The interpreter is the contract this test pins. */ + * - the RV64 ArchEmuOps lifter and host JIT dispatch path. */ #include <cfree/compile.h> #include <cfree/core.h> +#include <cfree/jit.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/mman.h> +#include <unistd.h> +#if defined(__APPLE__) +#include <mach/mach.h> +#include <mach/mach_vm.h> +#define XM_DUAL_APPLE 1 +#else +#define XM_DUAL_APPLE 0 +#endif +#if defined(__linux__) +#include <sys/syscall.h> +#define XM_DUAL_LINUX 1 +#else +#define XM_DUAL_LINUX 0 +#endif +#include "arch/arch.h" #include "arch/rv64/isa.h" #include "core/core.h" #include "emu/emu.h" -#include "emu/rv64_ops.h" #include "obj/elf/elf.h" -/* The loader exposes emu_load_elf_attach via a forward decl since the - * locked include/cfree/emu.h does not expose it. cpu.c exports the - * direct accessors used by the test. */ -int emu_load_elf_attach(EmuCPUState*, const EmuLoadedImage*); +EmuCPUState* emu_internal_cpu(CfreeEmu*); /* Host heap glue (same shape as test/api). */ static void* h_alloc(CfreeHeap* h, size_t n, size_t a) { @@ -68,6 +77,145 @@ static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; static CfreeContext g_ctx; +static int xm_to_posix(int p) { + int q = 0; + if (p & CFREE_PROT_READ) q |= PROT_READ; + if (p & CFREE_PROT_WRITE) q |= PROT_WRITE; + if (p & CFREE_PROT_EXEC) q |= PROT_EXEC; + return q; +} + +typedef struct XmTok { + void* w; + void* r; + size_t n; +} XmTok; + +static CfreeStatus xm_reserve_single(size_t n, CfreeExecMemRegion* out) { + void* m = + mmap(NULL, n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (m == MAP_FAILED) return CFREE_NOMEM; + out->write = m; + out->runtime = m; + out->size = n; + out->token = NULL; + return CFREE_OK; +} + +static CfreeStatus xm_reserve(void* user, size_t n, int prot, + CfreeExecMemRegion* out) { + (void)user; + if (!out || !n) return CFREE_INVALID; + if (!(prot & CFREE_PROT_EXEC)) return xm_reserve_single(n, out); +#if XM_DUAL_APPLE + { + void* w = + mmap(NULL, n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + mach_vm_address_t r = 0; + vm_prot_t cur = 0, max = 0; + XmTok* tok; + if (w == MAP_FAILED) return CFREE_NOMEM; + if (mach_vm_remap(mach_task_self(), &r, (mach_vm_size_t)n, 0, + VM_FLAGS_ANYWHERE, mach_task_self(), + (mach_vm_address_t)(uintptr_t)w, FALSE, &cur, &max, + VM_INHERIT_NONE) != KERN_SUCCESS) { + munmap(w, n); + return CFREE_NOMEM; + } + if (mprotect((void*)(uintptr_t)r, n, PROT_READ) != 0) { + munmap((void*)(uintptr_t)r, n); + munmap(w, n); + return CFREE_NOMEM; + } + tok = (XmTok*)malloc(sizeof(*tok)); + if (!tok) { + munmap((void*)(uintptr_t)r, n); + munmap(w, n); + return CFREE_NOMEM; + } + tok->w = w; + tok->r = (void*)(uintptr_t)r; + tok->n = n; + out->write = w; + out->runtime = (void*)(uintptr_t)r; + out->size = n; + out->token = tok; + return CFREE_OK; + } +#elif XM_DUAL_LINUX + { + int fd = (int)syscall(SYS_memfd_create, "cfree-emu-jit-test", 0u); + void *w, *r; + XmTok* tok; + if (fd < 0) return CFREE_NOMEM; + if (ftruncate(fd, (off_t)n) != 0) { + close(fd); + return CFREE_NOMEM; + } + w = mmap(NULL, n, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (w == MAP_FAILED) { + close(fd); + return CFREE_NOMEM; + } + r = mmap(NULL, n, PROT_READ, MAP_SHARED, fd, 0); + close(fd); + if (r == MAP_FAILED) { + munmap(w, n); + return CFREE_NOMEM; + } + tok = (XmTok*)malloc(sizeof(*tok)); + if (!tok) { + munmap(r, n); + munmap(w, n); + return CFREE_NOMEM; + } + tok->w = w; + tok->r = r; + tok->n = n; + out->write = w; + out->runtime = r; + out->size = n; + out->token = tok; + return CFREE_OK; + } +#else + return xm_reserve_single(n, out); +#endif +} + +static CfreeStatus xm_protect(void* user, void* addr, size_t n, int prot) { + (void)user; + return mprotect(addr, n, xm_to_posix(prot)) == 0 ? CFREE_OK : CFREE_IO; +} + +static void xm_release(void* user, CfreeExecMemRegion* r) { + (void)user; + if (!r || !r->size) return; + if (r->token) { + XmTok* tok = (XmTok*)r->token; + if (tok->r && tok->r != tok->w) munmap(tok->r, tok->n); + if (tok->w) munmap(tok->w, tok->n); + free(tok); + } else if (r->write) { + munmap(r->write, r->size); + } + memset(r, 0, sizeof(*r)); +} + +static void xm_flush(void* user, void* addr, size_t n) { + (void)user; +#if defined(__aarch64__) || defined(__arm__) || defined(__riscv) + __builtin___clear_cache((char*)addr, (char*)addr + n); +#else + (void)addr; + (void)n; +#endif +} + +static CfreeExecMem g_execmem = { + 16 * 1024, xm_reserve, xm_protect, xm_release, xm_flush, NULL, +}; + static int g_fail; #define EXPECT(cond, ...) \ do { \ @@ -98,6 +246,40 @@ static CfreeCompiler* new_compiler(void) { return c; } +static CfreeCompiler* new_host_compiler(void) { + CfreeTarget t; + CfreeCompiler* c = NULL; + memset(&t, 0, sizeof t); +#if defined(__x86_64__) || defined(_M_X64) + t.arch = CFREE_ARCH_X86_64; +#elif defined(__aarch64__) || defined(_M_ARM64) + t.arch = CFREE_ARCH_ARM_64; +#elif defined(__riscv) && __riscv_xlen == 64 + t.arch = CFREE_ARCH_RV64; +#else + return NULL; +#endif +#if defined(__APPLE__) + t.os = CFREE_OS_MACOS; + t.obj = CFREE_OBJ_MACHO; +#elif defined(__linux__) + t.os = CFREE_OS_LINUX; + t.obj = CFREE_OBJ_ELF; +#else + return NULL; +#endif + t.ptr_size = 8; + t.ptr_align = 8; + memset(&g_ctx, 0, sizeof g_ctx); + g_ctx.heap = &g_heap; + g_ctx.diag = &g_diag; + if (cfree_compiler_new(t, &g_ctx, &c) != CFREE_OK || !c) { + fprintf(stderr, "host compiler_new failed\n"); + exit(2); + } + return c; +} + /* ============================================================ * Minimal RV64 ELF64 builder * ============================================================ */ @@ -118,6 +300,59 @@ static void put64(unsigned char* b, size_t off, uint64_t v) { put32(b, off + 4, (unsigned)(v >> 32)); } +static void put_phdr(unsigned char* b, size_t off, uint32_t type, + uint32_t flags, uint64_t file_off, uint64_t vaddr, + uint64_t filesz, uint64_t memsz, uint64_t align) { + put32(b, off + 0, type); + put32(b, off + 4, flags); + put64(b, off + 8, file_off); + put64(b, off + 16, vaddr); + put64(b, off + 24, vaddr); + put64(b, off + 32, filesz); + put64(b, off + 40, memsz); + put64(b, off + 48, align); +} + +static void put_dyn(unsigned char* b, size_t off, uint64_t tag, + uint64_t val) { + put64(b, off + 0, tag); + put64(b, off + 8, val); +} + +static void put_sym(unsigned char* b, size_t off, uint32_t name, + uint8_t info, uint16_t shndx, uint64_t value, + uint64_t size) { + put32(b, off + 0, name); + b[off + 4] = info; + b[off + 5] = 0; + put16(b, off + 6, shndx); + put64(b, off + 8, value); + put64(b, off + 16, size); +} + +static void put_rela(unsigned char* b, size_t off, uint64_t r_offset, + uint64_t r_info, int64_t addend) { + put64(b, off + 0, r_offset); + put64(b, off + 8, r_info); + put64(b, off + 16, (uint64_t)addend); +} + +static int64_t pcrel_delta(uint64_t from, uint64_t to) { + if (to >= from) return (int64_t)(to - from); + return -(int64_t)(from - to); +} + +static uint32_t rv_pcrel_hi(uint64_t from, uint64_t to) { + int64_t d = pcrel_delta(from, to); + return (uint32_t)((d + 0x800) >> 12) & 0xfffffu; +} + +static int32_t rv_pcrel_lo(uint64_t from, uint64_t to) { + int64_t d = pcrel_delta(from, to); + int64_t hi = (d + 0x800) >> 12; + return (int32_t)(d - (hi << 12)); +} + /* Build a static rv64 ELF: ehdr + 1 phdr + text. The text segment is * page-aligned at virtual address 0x10000 and contains the three * instructions described in the file header. Returns the buffer (must @@ -188,105 +423,1085 @@ static unsigned char* build_minimal_elf(size_t* out_len) { return b; } +/* Dynamic fixture for the intended emu loader surface: + * PT_INTERP + PT_DYNAMIC + DT_NEEDED + JUMP_SLOT import + PT_TLS. + * + * The import resolver is expected to fill `import_add_got` with a guest-callable + * thunk for import_add(a0). The TLS setup is expected to seed tp so that + * ld t1, 0(tp) reads the PT_TLS initializer 11. The guest exits 31 + 11. */ +static unsigned char* build_dynamic_import_tls_elf(size_t* out_len) { + enum { + PAGE = 0x1000u, + BASE_VA = 0x10000ull, + PHNUM = 5u, + TEXT_OFF = 0x1000u, + INTERP_OFF = 0x1100u, + DYNSTR_OFF = 0x1180u, + DYNSYM_OFF = 0x1200u, + RELA_OFF = 0x1300u, + DYNAMIC_OFF = 0x2000u, + DATA_OFF = 0x2100u, + TLS_OFF = 0x2200u, + TOTAL = 0x2300u, + }; + const uint64_t text_va = BASE_VA + TEXT_OFF; + const uint64_t interp_va = BASE_VA + INTERP_OFF; + const uint64_t dynstr_va = BASE_VA + DYNSTR_OFF; + const uint64_t dynsym_va = BASE_VA + DYNSYM_OFF; + const uint64_t rela_va = BASE_VA + RELA_OFF; + const uint64_t dynamic_va = BASE_VA + DYNAMIC_OFF; + const uint64_t got_va = BASE_VA + DATA_OFF; + const uint64_t tls_va = BASE_VA + TLS_OFF; + const char interp[] = "/cfree-test-ld.so"; + const char needed[] = "libcfree-emu-test.so"; + const char import_name[] = "import_add"; + const size_t dynstr_needed_off = 1u; + const size_t dynstr_import_off = 1u + sizeof(needed); + unsigned char* b = (unsigned char*)calloc(1, TOTAL); + unsigned char* ds; + size_t dynstr_len; + + if (!b) return NULL; + + b[EI_MAG0] = ELFMAG0; + b[EI_MAG1] = ELFMAG1; + b[EI_MAG2] = ELFMAG2; + b[EI_MAG3] = ELFMAG3; + b[EI_CLASS] = ELFCLASS64; + b[EI_DATA] = ELFDATA2LSB; + b[EI_VERSION] = EV_CURRENT; + b[EI_OSABI] = ELFOSABI_LINUX; + put16(b, 16, ET_EXEC); + put16(b, 18, EM_RISCV); + put32(b, 20, EV_CURRENT); + put64(b, 24, text_va); + put64(b, 32, ELF64_EHDR_SIZE); + put32(b, 48, EF_RISCV_FLOAT_ABI_DOUBLE); + put16(b, 52, ELF64_EHDR_SIZE); + put16(b, 54, ELF64_PHDR_SIZE); + put16(b, 56, PHNUM); + + put_phdr(b, 64 + 0u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_X, 0, BASE_VA, + 0x1400u, 0x1400u, PAGE); + put_phdr(b, 64 + 1u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_W, DYNAMIC_OFF, + dynamic_va, TLS_OFF + 8u - DYNAMIC_OFF, TLS_OFF + 8u - DYNAMIC_OFF, + PAGE); + put_phdr(b, 64 + 2u * ELF64_PHDR_SIZE, PT_INTERP, PF_R, INTERP_OFF, + interp_va, sizeof(interp), sizeof(interp), 1u); + put_phdr(b, 64 + 3u * ELF64_PHDR_SIZE, PT_DYNAMIC, PF_R | PF_W, + DYNAMIC_OFF, dynamic_va, 10u * ELF64_DYN_SIZE, + 10u * ELF64_DYN_SIZE, 8u); + put_phdr(b, 64 + 4u * ELF64_PHDR_SIZE, PT_TLS, PF_R, TLS_OFF, tls_va, 8u, + 8u, 8u); + + put32(b, TEXT_OFF + 0u, rv_auipc(RV_T0, rv_pcrel_hi(text_va, got_va))); + put32(b, TEXT_OFF + 4u, rv_ld(RV_T0, RV_T0, rv_pcrel_lo(text_va, got_va))); + put32(b, TEXT_OFF + 8u, rv_addi(RV_A0, RV_ZERO, 31)); + put32(b, TEXT_OFF + 12u, rv_jalr(RV_RA, RV_T0, 0)); + put32(b, TEXT_OFF + 16u, rv_ld(RV_T1, RV_TP, 0)); + put32(b, TEXT_OFF + 20u, rv_add(RV_A0, RV_A0, RV_T1)); + put32(b, TEXT_OFF + 24u, rv_addi(RV_A7, RV_ZERO, 94)); + put32(b, TEXT_OFF + 28u, rv_ecall()); + + memcpy(b + INTERP_OFF, interp, sizeof(interp)); + ds = b + DYNSTR_OFF; + ds[0] = 0; + memcpy(ds + dynstr_needed_off, needed, sizeof(needed)); + memcpy(ds + dynstr_import_off, import_name, sizeof(import_name)); + dynstr_len = dynstr_import_off + sizeof(import_name); + + put_sym(b, DYNSYM_OFF, 0, 0, SHN_UNDEF, 0, 0); + put_sym(b, DYNSYM_OFF + ELF64_SYM_SIZE, (uint32_t)dynstr_import_off, + ELF64_ST_INFO(STB_GLOBAL, STT_FUNC), SHN_UNDEF, 0, 0); + put_rela(b, RELA_OFF, got_va, ELF64_R_INFO(1u, ELF_R_RISCV_JUMP_SLOT), 0); + + put_dyn(b, DYNAMIC_OFF + 0u * ELF64_DYN_SIZE, DT_NEEDED, + dynstr_needed_off); + put_dyn(b, DYNAMIC_OFF + 1u * ELF64_DYN_SIZE, DT_STRTAB, dynstr_va); + put_dyn(b, DYNAMIC_OFF + 2u * ELF64_DYN_SIZE, DT_STRSZ, dynstr_len); + put_dyn(b, DYNAMIC_OFF + 3u * ELF64_DYN_SIZE, DT_SYMTAB, dynsym_va); + put_dyn(b, DYNAMIC_OFF + 4u * ELF64_DYN_SIZE, DT_SYMENT, ELF64_SYM_SIZE); + put_dyn(b, DYNAMIC_OFF + 5u * ELF64_DYN_SIZE, DT_PLTREL, DT_RELA); + put_dyn(b, DYNAMIC_OFF + 6u * ELF64_DYN_SIZE, DT_JMPREL, rela_va); + put_dyn(b, DYNAMIC_OFF + 7u * ELF64_DYN_SIZE, DT_PLTRELSZ, + ELF64_RELA_SIZE); + put_dyn(b, DYNAMIC_OFF + 8u * ELF64_DYN_SIZE, DT_RELAENT, + ELF64_RELA_SIZE); + put_dyn(b, DYNAMIC_OFF + 9u * ELF64_DYN_SIZE, DT_NULL, 0); + + put64(b, TLS_OFF, 11u); + *out_len = TOTAL; + return b; +} + +static unsigned char* build_tls_distinct_elf(size_t* out_len) { + enum { + PAGE = 0x1000u, + BASE_VA = 0x10000ull, + PHNUM = 3u, + TEXT_OFF = 0x1000u, + TLS_OFF = 0x2000u, + TOTAL = 0x2010u, + }; + const uint64_t text_va = BASE_VA + TEXT_OFF; + const uint64_t tls_va = BASE_VA + TLS_OFF; + unsigned char* b = (unsigned char*)calloc(1, TOTAL); + if (!b) return NULL; + + b[EI_MAG0] = ELFMAG0; + b[EI_MAG1] = ELFMAG1; + b[EI_MAG2] = ELFMAG2; + b[EI_MAG3] = ELFMAG3; + b[EI_CLASS] = ELFCLASS64; + b[EI_DATA] = ELFDATA2LSB; + b[EI_VERSION] = EV_CURRENT; + b[EI_OSABI] = ELFOSABI_LINUX; + put16(b, 16, ET_EXEC); + put16(b, 18, EM_RISCV); + put32(b, 20, EV_CURRENT); + put64(b, 24, text_va); + put64(b, 32, ELF64_EHDR_SIZE); + put32(b, 48, EF_RISCV_FLOAT_ABI_DOUBLE); + put16(b, 52, ELF64_EHDR_SIZE); + put16(b, 54, ELF64_PHDR_SIZE); + put16(b, 56, PHNUM); + + put_phdr(b, 64 + 0u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_X, 0, BASE_VA, + TEXT_OFF + 40u, TEXT_OFF + 40u, PAGE); + put_phdr(b, 64 + 1u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_W, TLS_OFF, + tls_va, 16u, 16u, PAGE); + put_phdr(b, 64 + 2u * ELF64_PHDR_SIZE, PT_TLS, PF_R, TLS_OFF, tls_va, 8u, + 16u, 8u); + + put32(b, TEXT_OFF + 0u, rv_auipc(RV_T0, rv_pcrel_hi(text_va, tls_va))); + put32(b, TEXT_OFF + 4u, + rv_addi(RV_T0, RV_T0, rv_pcrel_lo(text_va, tls_va))); + put32(b, TEXT_OFF + 8u, rv_addi(RV_T1, RV_ZERO, 9)); + put32(b, TEXT_OFF + 12u, rv_sd(RV_T1, RV_TP, 0)); + put32(b, TEXT_OFF + 16u, rv_ld(RV_A0, RV_T0, 0)); + put32(b, TEXT_OFF + 20u, rv_ld(RV_T2, RV_TP, 0)); + put32(b, TEXT_OFF + 24u, rv_add(RV_A0, RV_A0, RV_T2)); + put32(b, TEXT_OFF + 28u, rv_addi(RV_A7, RV_ZERO, 94)); + put32(b, TEXT_OFF + 32u, rv_ecall()); + + put64(b, TLS_OFF, 11u); + *out_len = TOTAL; + return b; +} + +static unsigned char* build_host_import_elf(size_t* out_len) { + enum { + PAGE = 0x1000u, + BASE_VA = 0x10000ull, + PHNUM = 3u, + TEXT_OFF = 0x1000u, + DYNSTR_OFF = 0x1100u, + DYNSYM_OFF = 0x1180u, + RELA_OFF = 0x1200u, + DYNAMIC_OFF = 0x2000u, + GOT_OFF = 0x2100u, + TOTAL = 0x2200u, + }; + const uint64_t text_va = BASE_VA + TEXT_OFF; + const uint64_t dynstr_va = BASE_VA + DYNSTR_OFF; + const uint64_t dynsym_va = BASE_VA + DYNSYM_OFF; + const uint64_t rela_va = BASE_VA + RELA_OFF; + const uint64_t dynamic_va = BASE_VA + DYNAMIC_OFF; + const uint64_t got_va = BASE_VA + GOT_OFF; + const char needed[] = "libhostbridge.so"; + const char import_name[] = "host_add2"; + const size_t dynstr_needed_off = 1u; + const size_t dynstr_import_off = 1u + sizeof(needed); + unsigned char* b = (unsigned char*)calloc(1, TOTAL); + unsigned char* ds; + size_t dynstr_len; + if (!b) return NULL; + + b[EI_MAG0] = ELFMAG0; + b[EI_MAG1] = ELFMAG1; + b[EI_MAG2] = ELFMAG2; + b[EI_MAG3] = ELFMAG3; + b[EI_CLASS] = ELFCLASS64; + b[EI_DATA] = ELFDATA2LSB; + b[EI_VERSION] = EV_CURRENT; + b[EI_OSABI] = ELFOSABI_LINUX; + put16(b, 16, ET_EXEC); + put16(b, 18, EM_RISCV); + put32(b, 20, EV_CURRENT); + put64(b, 24, text_va); + put64(b, 32, ELF64_EHDR_SIZE); + put32(b, 48, EF_RISCV_FLOAT_ABI_DOUBLE); + put16(b, 52, ELF64_EHDR_SIZE); + put16(b, 54, ELF64_PHDR_SIZE); + put16(b, 56, PHNUM); + put_phdr(b, 64 + 0u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_X, 0, BASE_VA, + 0x1300u, 0x1300u, PAGE); + put_phdr(b, 64 + 1u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_W, DYNAMIC_OFF, + dynamic_va, GOT_OFF + 8u - DYNAMIC_OFF, GOT_OFF + 8u - DYNAMIC_OFF, + PAGE); + put_phdr(b, 64 + 2u * ELF64_PHDR_SIZE, PT_DYNAMIC, PF_R | PF_W, + DYNAMIC_OFF, dynamic_va, 10u * ELF64_DYN_SIZE, + 10u * ELF64_DYN_SIZE, 8u); + + put32(b, TEXT_OFF + 0u, rv_auipc(RV_T0, rv_pcrel_hi(text_va, got_va))); + put32(b, TEXT_OFF + 4u, rv_ld(RV_T0, RV_T0, rv_pcrel_lo(text_va, got_va))); + put32(b, TEXT_OFF + 8u, rv_addi(RV_A0, RV_ZERO, 35)); + put32(b, TEXT_OFF + 12u, rv_addi(RV_A1, RV_ZERO, 7)); + put32(b, TEXT_OFF + 16u, rv_jalr(RV_RA, RV_T0, 0)); + put32(b, TEXT_OFF + 20u, rv_addi(RV_A7, RV_ZERO, 94)); + put32(b, TEXT_OFF + 24u, rv_ecall()); + + ds = b + DYNSTR_OFF; + ds[0] = 0; + memcpy(ds + dynstr_needed_off, needed, sizeof(needed)); + memcpy(ds + dynstr_import_off, import_name, sizeof(import_name)); + dynstr_len = dynstr_import_off + sizeof(import_name); + put_sym(b, DYNSYM_OFF, 0, 0, SHN_UNDEF, 0, 0); + put_sym(b, DYNSYM_OFF + ELF64_SYM_SIZE, (uint32_t)dynstr_import_off, + ELF64_ST_INFO(STB_GLOBAL, STT_FUNC), SHN_UNDEF, 0, 0); + put_rela(b, RELA_OFF, got_va, ELF64_R_INFO(1u, ELF_R_RISCV_JUMP_SLOT), 0); + put_dyn(b, DYNAMIC_OFF + 0u * ELF64_DYN_SIZE, DT_NEEDED, + dynstr_needed_off); + put_dyn(b, DYNAMIC_OFF + 1u * ELF64_DYN_SIZE, DT_STRTAB, dynstr_va); + put_dyn(b, DYNAMIC_OFF + 2u * ELF64_DYN_SIZE, DT_STRSZ, dynstr_len); + put_dyn(b, DYNAMIC_OFF + 3u * ELF64_DYN_SIZE, DT_SYMTAB, dynsym_va); + put_dyn(b, DYNAMIC_OFF + 4u * ELF64_DYN_SIZE, DT_SYMENT, ELF64_SYM_SIZE); + put_dyn(b, DYNAMIC_OFF + 5u * ELF64_DYN_SIZE, DT_PLTREL, DT_RELA); + put_dyn(b, DYNAMIC_OFF + 6u * ELF64_DYN_SIZE, DT_JMPREL, rela_va); + put_dyn(b, DYNAMIC_OFF + 7u * ELF64_DYN_SIZE, DT_PLTRELSZ, + ELF64_RELA_SIZE); + put_dyn(b, DYNAMIC_OFF + 8u * ELF64_DYN_SIZE, DT_RELAENT, + ELF64_RELA_SIZE); + put_dyn(b, DYNAMIC_OFF + 9u * ELF64_DYN_SIZE, DT_NULL, 0); + *out_len = TOTAL; + return b; +} + +static unsigned char* build_dso_import_main_elf(size_t* out_len) { + enum { + PAGE = 0x1000u, + BASE_VA = 0x10000ull, + PHNUM = 3u, + TEXT_OFF = 0x1000u, + DYNSTR_OFF = 0x1100u, + DYNSYM_OFF = 0x1180u, + RELA_OFF = 0x1200u, + DYNAMIC_OFF = 0x2000u, + GOT_OFF = 0x2100u, + TOTAL = 0x2200u, + }; + const uint64_t text_va = BASE_VA + TEXT_OFF; + const uint64_t dynstr_va = BASE_VA + DYNSTR_OFF; + const uint64_t dynsym_va = BASE_VA + DYNSYM_OFF; + const uint64_t rela_va = BASE_VA + RELA_OFF; + const uint64_t dynamic_va = BASE_VA + DYNAMIC_OFF; + const uint64_t got_va = BASE_VA + GOT_OFF; + const char needed[] = "libdsoadd.so"; + const char import_name[] = "dso_add"; + const size_t dynstr_needed_off = 1u; + const size_t dynstr_import_off = 1u + sizeof(needed); + unsigned char* b = (unsigned char*)calloc(1, TOTAL); + unsigned char* ds; + size_t dynstr_len; + if (!b) return NULL; + + b[EI_MAG0] = ELFMAG0; + b[EI_MAG1] = ELFMAG1; + b[EI_MAG2] = ELFMAG2; + b[EI_MAG3] = ELFMAG3; + b[EI_CLASS] = ELFCLASS64; + b[EI_DATA] = ELFDATA2LSB; + b[EI_VERSION] = EV_CURRENT; + b[EI_OSABI] = ELFOSABI_LINUX; + put16(b, 16, ET_EXEC); + put16(b, 18, EM_RISCV); + put32(b, 20, EV_CURRENT); + put64(b, 24, text_va); + put64(b, 32, ELF64_EHDR_SIZE); + put32(b, 48, EF_RISCV_FLOAT_ABI_DOUBLE); + put16(b, 52, ELF64_EHDR_SIZE); + put16(b, 54, ELF64_PHDR_SIZE); + put16(b, 56, PHNUM); + put_phdr(b, 64 + 0u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_X, 0, BASE_VA, + 0x1300u, 0x1300u, PAGE); + put_phdr(b, 64 + 1u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_W, DYNAMIC_OFF, + dynamic_va, GOT_OFF + 8u - DYNAMIC_OFF, GOT_OFF + 8u - DYNAMIC_OFF, + PAGE); + put_phdr(b, 64 + 2u * ELF64_PHDR_SIZE, PT_DYNAMIC, PF_R | PF_W, + DYNAMIC_OFF, dynamic_va, 10u * ELF64_DYN_SIZE, + 10u * ELF64_DYN_SIZE, 8u); + + put32(b, TEXT_OFF + 0u, rv_auipc(RV_T0, rv_pcrel_hi(text_va, got_va))); + put32(b, TEXT_OFF + 4u, rv_ld(RV_T0, RV_T0, rv_pcrel_lo(text_va, got_va))); + put32(b, TEXT_OFF + 8u, rv_addi(RV_A0, RV_ZERO, 35)); + put32(b, TEXT_OFF + 12u, rv_jalr(RV_RA, RV_T0, 0)); + put32(b, TEXT_OFF + 16u, rv_addi(RV_A7, RV_ZERO, 94)); + put32(b, TEXT_OFF + 20u, rv_ecall()); + + ds = b + DYNSTR_OFF; + ds[0] = 0; + memcpy(ds + dynstr_needed_off, needed, sizeof(needed)); + memcpy(ds + dynstr_import_off, import_name, sizeof(import_name)); + dynstr_len = dynstr_import_off + sizeof(import_name); + put_sym(b, DYNSYM_OFF, 0, 0, SHN_UNDEF, 0, 0); + put_sym(b, DYNSYM_OFF + ELF64_SYM_SIZE, (uint32_t)dynstr_import_off, + ELF64_ST_INFO(STB_GLOBAL, STT_FUNC), SHN_UNDEF, 0, 0); + put_rela(b, RELA_OFF, got_va, ELF64_R_INFO(1u, ELF_R_RISCV_JUMP_SLOT), 0); + put_dyn(b, DYNAMIC_OFF + 0u * ELF64_DYN_SIZE, DT_NEEDED, + dynstr_needed_off); + put_dyn(b, DYNAMIC_OFF + 1u * ELF64_DYN_SIZE, DT_STRTAB, dynstr_va); + put_dyn(b, DYNAMIC_OFF + 2u * ELF64_DYN_SIZE, DT_STRSZ, dynstr_len); + put_dyn(b, DYNAMIC_OFF + 3u * ELF64_DYN_SIZE, DT_SYMTAB, dynsym_va); + put_dyn(b, DYNAMIC_OFF + 4u * ELF64_DYN_SIZE, DT_SYMENT, ELF64_SYM_SIZE); + put_dyn(b, DYNAMIC_OFF + 5u * ELF64_DYN_SIZE, DT_PLTREL, DT_RELA); + put_dyn(b, DYNAMIC_OFF + 6u * ELF64_DYN_SIZE, DT_JMPREL, rela_va); + put_dyn(b, DYNAMIC_OFF + 7u * ELF64_DYN_SIZE, DT_PLTRELSZ, + ELF64_RELA_SIZE); + put_dyn(b, DYNAMIC_OFF + 8u * ELF64_DYN_SIZE, DT_RELAENT, + ELF64_RELA_SIZE); + put_dyn(b, DYNAMIC_OFF + 9u * ELF64_DYN_SIZE, DT_NULL, 0); + *out_len = TOTAL; + return b; +} + +static unsigned char* build_dso_import_so(size_t* out_len) { + enum { + PAGE = 0x1000u, + PHNUM = 3u, + TEXT_OFF = 0x1000u, + DYNAMIC_OFF = 0x2000u, + DYNSTR_OFF = 0x2100u, + DYNSYM_OFF = 0x2180u, + HASH_OFF = 0x2200u, + RELA_OFF = 0x2220u, + DATA_OFF = 0x2300u, + TOTAL = 0x2400u, + }; + const uint64_t text_va = TEXT_OFF; + const uint64_t dynamic_va = DYNAMIC_OFF; + const uint64_t dynstr_va = DYNSTR_OFF; + const uint64_t dynsym_va = DYNSYM_OFF; + const uint64_t hash_va = HASH_OFF; + const uint64_t rela_va = RELA_OFF; + const uint64_t ptr_va = DATA_OFF; + const uint64_t value_va = DATA_OFF + 8u; + const char soname[] = "libdsoadd.so"; + const char symbol[] = "dso_add"; + const size_t dynstr_soname_off = 1u; + const size_t dynstr_symbol_off = 1u + sizeof(soname); + unsigned char* b = (unsigned char*)calloc(1, TOTAL); + unsigned char* ds; + size_t dynstr_len; + if (!b) return NULL; + + b[EI_MAG0] = ELFMAG0; + b[EI_MAG1] = ELFMAG1; + b[EI_MAG2] = ELFMAG2; + b[EI_MAG3] = ELFMAG3; + b[EI_CLASS] = ELFCLASS64; + b[EI_DATA] = ELFDATA2LSB; + b[EI_VERSION] = EV_CURRENT; + b[EI_OSABI] = ELFOSABI_LINUX; + put16(b, 16, ET_DYN); + put16(b, 18, EM_RISCV); + put32(b, 20, EV_CURRENT); + put64(b, 24, text_va); + put64(b, 32, ELF64_EHDR_SIZE); + put32(b, 48, EF_RISCV_FLOAT_ABI_DOUBLE); + put16(b, 52, ELF64_EHDR_SIZE); + put16(b, 54, ELF64_PHDR_SIZE); + put16(b, 56, PHNUM); + put_phdr(b, 64 + 0u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_X, 0, 0, + TEXT_OFF + 24u, TEXT_OFF + 24u, PAGE); + put_phdr(b, 64 + 1u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_W, DYNAMIC_OFF, + dynamic_va, DATA_OFF + 16u - DYNAMIC_OFF, + DATA_OFF + 16u - DYNAMIC_OFF, PAGE); + put_phdr(b, 64 + 2u * ELF64_PHDR_SIZE, PT_DYNAMIC, PF_R | PF_W, + DYNAMIC_OFF, dynamic_va, 10u * ELF64_DYN_SIZE, + 10u * ELF64_DYN_SIZE, 8u); + + put32(b, TEXT_OFF + 0u, rv_auipc(RV_T0, rv_pcrel_hi(text_va, ptr_va))); + put32(b, TEXT_OFF + 4u, rv_ld(RV_T0, RV_T0, rv_pcrel_lo(text_va, ptr_va))); + put32(b, TEXT_OFF + 8u, rv_ld(RV_T1, RV_T0, 0)); + put32(b, TEXT_OFF + 12u, rv_add(RV_A0, RV_A0, RV_T1)); + put32(b, TEXT_OFF + 16u, rv_jalr(RV_ZERO, RV_RA, 0)); + + ds = b + DYNSTR_OFF; + ds[0] = 0; + memcpy(ds + dynstr_soname_off, soname, sizeof(soname)); + memcpy(ds + dynstr_symbol_off, symbol, sizeof(symbol)); + dynstr_len = dynstr_symbol_off + sizeof(symbol); + put_sym(b, DYNSYM_OFF, 0, 0, SHN_UNDEF, 0, 0); + put_sym(b, DYNSYM_OFF + ELF64_SYM_SIZE, (uint32_t)dynstr_symbol_off, + ELF64_ST_INFO(STB_GLOBAL, STT_FUNC), 1u, text_va, 20u); + put32(b, HASH_OFF + 0u, 1u); + put32(b, HASH_OFF + 4u, 2u); + put32(b, HASH_OFF + 8u, 1u); + put32(b, HASH_OFF + 12u, 0u); + put_rela(b, RELA_OFF, ptr_va, ELF64_R_INFO(0u, ELF_R_RISCV_RELATIVE), + (int64_t)value_va); + put64(b, DATA_OFF + 8u, 7u); + put_dyn(b, DYNAMIC_OFF + 0u * ELF64_DYN_SIZE, DT_SONAME, + dynstr_soname_off); + put_dyn(b, DYNAMIC_OFF + 1u * ELF64_DYN_SIZE, DT_STRTAB, dynstr_va); + put_dyn(b, DYNAMIC_OFF + 2u * ELF64_DYN_SIZE, DT_STRSZ, dynstr_len); + put_dyn(b, DYNAMIC_OFF + 3u * ELF64_DYN_SIZE, DT_SYMTAB, dynsym_va); + put_dyn(b, DYNAMIC_OFF + 4u * ELF64_DYN_SIZE, DT_SYMENT, ELF64_SYM_SIZE); + put_dyn(b, DYNAMIC_OFF + 5u * ELF64_DYN_SIZE, DT_HASH, hash_va); + put_dyn(b, DYNAMIC_OFF + 6u * ELF64_DYN_SIZE, DT_RELA, rela_va); + put_dyn(b, DYNAMIC_OFF + 7u * ELF64_DYN_SIZE, DT_RELASZ, + ELF64_RELA_SIZE); + put_dyn(b, DYNAMIC_OFF + 8u * ELF64_DYN_SIZE, DT_RELAENT, + ELF64_RELA_SIZE); + put_dyn(b, DYNAMIC_OFF + 9u * ELF64_DYN_SIZE, DT_NULL, 0); + *out_len = TOTAL; + return b; +} + +/* Static fixture for the intended permission/fault/signal surface. The guest + * installs a minimal SIGSEGV handler, writes to its RX text page, and expects + * Linux/RV64 signal delivery to transfer control to `handler`, which exits 42. + */ +static unsigned char* build_signal_perms_elf(size_t* out_len) { + enum { + PAGE = 0x1000u, + BASE_VA = 0x10000ull, + TEXT_OFF = 0x1000u, + DATA_OFF = 0x2000u, + TOTAL = 0x2040u, + }; + const uint64_t text_va = BASE_VA + TEXT_OFF; + const uint64_t data_va = BASE_VA + DATA_OFF; + const uint64_t handler_va = text_va + 48u; + unsigned char* b = (unsigned char*)calloc(1, TOTAL); + + if (!b) return NULL; + + b[EI_MAG0] = ELFMAG0; + b[EI_MAG1] = ELFMAG1; + b[EI_MAG2] = ELFMAG2; + b[EI_MAG3] = ELFMAG3; + b[EI_CLASS] = ELFCLASS64; + b[EI_DATA] = ELFDATA2LSB; + b[EI_VERSION] = EV_CURRENT; + b[EI_OSABI] = ELFOSABI_LINUX; + put16(b, 16, ET_EXEC); + put16(b, 18, EM_RISCV); + put32(b, 20, EV_CURRENT); + put64(b, 24, text_va); + put64(b, 32, ELF64_EHDR_SIZE); + put32(b, 48, EF_RISCV_FLOAT_ABI_DOUBLE); + put16(b, 52, ELF64_EHDR_SIZE); + put16(b, 54, ELF64_PHDR_SIZE); + put16(b, 56, 2); + + put_phdr(b, 64 + 0u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_X, 0, BASE_VA, + TEXT_OFF + 60u, TEXT_OFF + 60u, PAGE); + put_phdr(b, 64 + 1u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_W, DATA_OFF, + data_va, 32u, 32u, PAGE); + + put32(b, TEXT_OFF + 0u, rv_addi(RV_A0, RV_ZERO, 11)); /* SIGSEGV */ + put32(b, TEXT_OFF + 4u, rv_auipc(RV_A1, rv_pcrel_hi(text_va + 4u, data_va))); + put32(b, TEXT_OFF + 8u, + rv_addi(RV_A1, RV_A1, rv_pcrel_lo(text_va + 4u, data_va))); + put32(b, TEXT_OFF + 12u, rv_addi(RV_A2, RV_ZERO, 0)); + put32(b, TEXT_OFF + 16u, rv_addi(RV_A3, RV_ZERO, 8)); + put32(b, TEXT_OFF + 20u, rv_addi(RV_A7, RV_ZERO, 134)); /* rt_sigaction */ + put32(b, TEXT_OFF + 24u, rv_ecall()); + put32(b, TEXT_OFF + 28u, rv_auipc(RV_T0, 0)); + put32(b, TEXT_OFF + 32u, rv_sd(RV_ZERO, RV_T0, 0)); + put32(b, TEXT_OFF + 36u, rv_addi(RV_A0, RV_ZERO, 1)); + put32(b, TEXT_OFF + 40u, rv_addi(RV_A7, RV_ZERO, 94)); + put32(b, TEXT_OFF + 44u, rv_ecall()); + put32(b, TEXT_OFF + 48u, rv_addi(RV_A0, RV_ZERO, 42)); + put32(b, TEXT_OFF + 52u, rv_addi(RV_A7, RV_ZERO, 94)); + put32(b, TEXT_OFF + 56u, rv_ecall()); + + put64(b, DATA_OFF + 0u, handler_va); + put64(b, DATA_OFF + 8u, 0); + put64(b, DATA_OFF + 16u, 0); + put64(b, DATA_OFF + 24u, 0); + + *out_len = TOTAL; + return b; +} + +static unsigned char* build_signal_load_fault_elf(size_t* out_len) { + enum { + PAGE = 0x1000u, + BASE_VA = 0x10000ull, + TEXT_OFF = 0x1000u, + DATA_OFF = 0x2000u, + TOTAL = 0x2040u, + }; + const uint64_t text_va = BASE_VA + TEXT_OFF; + const uint64_t data_va = BASE_VA + DATA_OFF; + const uint64_t handler_va = text_va + 48u; + unsigned char* b = (unsigned char*)calloc(1, TOTAL); + + if (!b) return NULL; + + b[EI_MAG0] = ELFMAG0; + b[EI_MAG1] = ELFMAG1; + b[EI_MAG2] = ELFMAG2; + b[EI_MAG3] = ELFMAG3; + b[EI_CLASS] = ELFCLASS64; + b[EI_DATA] = ELFDATA2LSB; + b[EI_VERSION] = EV_CURRENT; + b[EI_OSABI] = ELFOSABI_LINUX; + put16(b, 16, ET_EXEC); + put16(b, 18, EM_RISCV); + put32(b, 20, EV_CURRENT); + put64(b, 24, text_va); + put64(b, 32, ELF64_EHDR_SIZE); + put32(b, 48, EF_RISCV_FLOAT_ABI_DOUBLE); + put16(b, 52, ELF64_EHDR_SIZE); + put16(b, 54, ELF64_PHDR_SIZE); + put16(b, 56, 2); + + put_phdr(b, 64 + 0u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_X, 0, BASE_VA, + TEXT_OFF + 60u, TEXT_OFF + 60u, PAGE); + put_phdr(b, 64 + 1u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_W, DATA_OFF, + data_va, 32u, 32u, PAGE); + + put32(b, TEXT_OFF + 0u, rv_addi(RV_A0, RV_ZERO, 11)); + put32(b, TEXT_OFF + 4u, rv_auipc(RV_A1, rv_pcrel_hi(text_va + 4u, data_va))); + put32(b, TEXT_OFF + 8u, + rv_addi(RV_A1, RV_A1, rv_pcrel_lo(text_va + 4u, data_va))); + put32(b, TEXT_OFF + 12u, rv_addi(RV_A2, RV_ZERO, 0)); + put32(b, TEXT_OFF + 16u, rv_addi(RV_A3, RV_ZERO, 8)); + put32(b, TEXT_OFF + 20u, rv_addi(RV_A7, RV_ZERO, 134)); + put32(b, TEXT_OFF + 24u, rv_ecall()); + put32(b, TEXT_OFF + 28u, rv_addi(RV_T0, RV_ZERO, 0)); + put32(b, TEXT_OFF + 32u, rv_ld(RV_T1, RV_T0, 0)); + put32(b, TEXT_OFF + 36u, rv_addi(RV_A0, RV_ZERO, 1)); + put32(b, TEXT_OFF + 40u, rv_addi(RV_A7, RV_ZERO, 94)); + put32(b, TEXT_OFF + 44u, rv_ecall()); + put32(b, TEXT_OFF + 48u, rv_addi(RV_A0, RV_ZERO, 42)); + put32(b, TEXT_OFF + 52u, rv_addi(RV_A7, RV_ZERO, 94)); + put32(b, TEXT_OFF + 56u, rv_ecall()); + + put64(b, DATA_OFF + 0u, handler_va); + put64(b, DATA_OFF + 8u, 0); + put64(b, DATA_OFF + 16u, 0); + put64(b, DATA_OFF + 24u, 0); + + *out_len = TOTAL; + return b; +} + +static unsigned char* build_signal_sigreturn_elf(size_t* out_len) { + enum { + PAGE = 0x1000u, + BASE_VA = 0x10000ull, + TEXT_OFF = 0x1000u, + DATA_OFF = 0x2000u, + TOTAL = 0x2040u, + }; + const uint64_t text_va = BASE_VA + TEXT_OFF; + const uint64_t data_va = BASE_VA + DATA_OFF; + const uint64_t handler_va = text_va + 48u; + unsigned char* b = (unsigned char*)calloc(1, TOTAL); + if (!b) return NULL; + + b[EI_MAG0] = ELFMAG0; + b[EI_MAG1] = ELFMAG1; + b[EI_MAG2] = ELFMAG2; + b[EI_MAG3] = ELFMAG3; + b[EI_CLASS] = ELFCLASS64; + b[EI_DATA] = ELFDATA2LSB; + b[EI_VERSION] = EV_CURRENT; + b[EI_OSABI] = ELFOSABI_LINUX; + put16(b, 16, ET_EXEC); + put16(b, 18, EM_RISCV); + put32(b, 20, EV_CURRENT); + put64(b, 24, text_va); + put64(b, 32, ELF64_EHDR_SIZE); + put32(b, 48, EF_RISCV_FLOAT_ABI_DOUBLE); + put16(b, 52, ELF64_EHDR_SIZE); + put16(b, 54, ELF64_PHDR_SIZE); + put16(b, 56, 2); + + put_phdr(b, 64 + 0u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_X, 0, BASE_VA, + TEXT_OFF + 80u, TEXT_OFF + 80u, PAGE); + put_phdr(b, 64 + 1u * ELF64_PHDR_SIZE, PT_LOAD, PF_R | PF_W, DATA_OFF, + data_va, 32u, 32u, PAGE); + + put32(b, TEXT_OFF + 0u, rv_addi(RV_A0, RV_ZERO, 11)); + put32(b, TEXT_OFF + 4u, rv_auipc(RV_A1, rv_pcrel_hi(text_va + 4u, data_va))); + put32(b, TEXT_OFF + 8u, + rv_addi(RV_A1, RV_A1, rv_pcrel_lo(text_va + 4u, data_va))); + put32(b, TEXT_OFF + 12u, rv_addi(RV_A2, RV_ZERO, 0)); + put32(b, TEXT_OFF + 16u, rv_addi(RV_A3, RV_ZERO, 8)); + put32(b, TEXT_OFF + 20u, rv_addi(RV_A7, RV_ZERO, 134)); + put32(b, TEXT_OFF + 24u, rv_ecall()); + put32(b, TEXT_OFF + 28u, rv_auipc(RV_T0, 0)); + put32(b, TEXT_OFF + 32u, rv_sd(RV_ZERO, RV_T0, 0)); + put32(b, TEXT_OFF + 36u, rv_addi(RV_A0, RV_ZERO, 42)); + put32(b, TEXT_OFF + 40u, rv_addi(RV_A7, RV_ZERO, 94)); + put32(b, TEXT_OFF + 44u, rv_ecall()); + + put32(b, TEXT_OFF + 48u, + rv_auipc(RV_A0, rv_pcrel_hi(handler_va, text_va))); + put32(b, TEXT_OFF + 52u, + rv_addi(RV_A0, RV_A0, rv_pcrel_lo(handler_va, text_va))); + put32(b, TEXT_OFF + 56u, rv_addi(RV_A1, RV_ZERO, 1)); + put32(b, TEXT_OFF + 60u, rv_addi(RV_A2, RV_ZERO, 7)); + put32(b, TEXT_OFF + 64u, rv_addi(RV_A7, RV_ZERO, 226)); + put32(b, TEXT_OFF + 68u, rv_ecall()); + put32(b, TEXT_OFF + 72u, rv_addi(RV_A7, RV_ZERO, 139)); + put32(b, TEXT_OFF + 76u, rv_ecall()); + + put64(b, DATA_OFF + 0u, handler_va); + put64(b, DATA_OFF + 8u, 0); + put64(b, DATA_OFF + 16u, 0); + put64(b, DATA_OFF + 24u, 0); + *out_len = TOTAL; + return b; +} + /* ============================================================ * Decoder smoke (sanity-check a handful of encodings before the - * end-to-end interp run). + * end-to-end JIT run). * ============================================================ */ static void decoder_smoke(void) { - EmuInst insts[8]; + CfreeCompiler* c = new_compiler(); + const ArchImpl* arch = arch_lookup(CFREE_ARCH_RV64); + CfreeDecodedInsn insts[8]; + CfreeStatus st; u32 n; unsigned char buf[16]; put32(buf, 0, rv_addi(RV_A0, RV_ZERO, 42)); put32(buf, 4, rv_addi(RV_A7, RV_ZERO, 94)); put32(buf, 8, rv_ecall()); put32(buf, 12, rv_add(RV_T0, RV_A0, RV_A1)); - n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, buf, 0x10000, insts, 8); + EXPECT(arch && arch->decode && arch->decode->decode_block, + "rv64 ArchDecodeOps unavailable"); + if (!arch || !arch->decode || !arch->decode->decode_block) { + cfree_compiler_free(c); + return; + } + st = arch->decode->decode_block((Compiler*)c, buf, sizeof(buf), 0x10000, + insts, 8, &n); + EXPECT(st == CFREE_OK, "decode_block returned %d", (int)st); EXPECT(n >= 3u, "decode block returned %u insts", n); - EXPECT(insts[0].op == RV64_OP_ADDI, "first insn must be ADDI, got %u", - insts[0].op); - EXPECT((u32)insts[0].operands[0] == RV_A0, "rd should be a0"); - EXPECT((i64)insts[0].operands[3] == 42, "imm should be 42"); - EXPECT(insts[1].op == RV64_OP_ADDI, "second insn must be ADDI"); - EXPECT((i64)insts[1].operands[3] == 94, "imm should be 94"); - EXPECT(insts[2].op == RV64_OP_ECALL, "third insn must be ECALL, got %u", - insts[2].op); - EXPECT(insts[2].flags & RV64_INST_FLAG_TERMINATOR, + EXPECT(insts[0].opcode == RV64_DEC_ADDI, "first insn must be ADDI, got %u", + insts[0].opcode); + EXPECT(insts[0].operands[0].reg == RV_A0, "rd should be a0"); + EXPECT(insts[0].operands[2].imm == 42, "imm should be 42"); + EXPECT(insts[1].opcode == RV64_DEC_ADDI, "second insn must be ADDI"); + EXPECT(insts[1].operands[2].imm == 94, "imm should be 94"); + EXPECT(insts[2].opcode == RV64_DEC_ECALL, "third insn must be ECALL, got %u", + insts[2].opcode); + EXPECT(insts[2].flags & CFREE_DECODE_TERMINATOR, "ECALL must be marked terminator"); /* The block stops at ECALL; the ADD at offset 12 should not have * been decoded. */ EXPECT(n == 3u, "decoder must stop at the terminator (got n=%u)", n); + cfree_compiler_free(c); } -/* ============================================================ - * End-to-end interp run - * ============================================================ */ -static void interp_smoke(void) { - CfreeCompiler* c = new_compiler(); - Compiler* cc = (Compiler*)c; - unsigned char* elf; - size_t elf_len; - EmuLoadedImage img; +static void vm_unit_smoke(void) { + CfreeCompiler* c = new_host_compiler(); + EmuAddrSpace as; + u8* p; + const EmuMemFault* fault; + CfreeStatus st; + if (!c) return; + memset(&as, 0, sizeof(as)); + st = emu_addr_space_init(&as, (Compiler*)c, 0x1000u); + EXPECT(st == CFREE_OK, "vm: init returned %d", (int)st); + if (st != CFREE_OK) { + cfree_compiler_free(c); + return; + } + + st = emu_addr_space_map(&as, 0x10000u, 0x3000u, + EMU_MEM_READ | EMU_MEM_WRITE, EMU_MAP_ANON); + EXPECT(st == CFREE_OK, "vm: initial anon map returned %d", (int)st); + EXPECT(as.nmaps == 1u, "vm: expected one map, got %u", as.nmaps); + + st = emu_addr_space_map(&as, 0x11000u, 0x1000u, EMU_MEM_READ, + EMU_MAP_ANON); + EXPECT(st == CFREE_INVALID, "vm: overlapping map must be rejected"); + + p = emu_addr_space_ptr(&as, 0x10ff8u, 16u, EMU_MEM_WRITE); + EXPECT(p != NULL, "vm: cross-page access inside one map should succeed"); + EXPECT(as.maps[0].dirty_pages[0] && as.maps[0].dirty_pages[1], + "vm: cross-page write should dirty both pages"); + + st = emu_addr_space_map(&as, 0x20000u, 0x1000u, 0, EMU_MAP_GUARD); + EXPECT(st == CFREE_OK, "vm: guard map returned %d", (int)st); + p = emu_addr_space_ptr(&as, 0x20000u, 1u, EMU_MEM_READ); + fault = emu_addr_space_last_fault(&as); + EXPECT(p == NULL && fault && fault->kind == EMU_FAULT_PROT, + "vm: guard read should be a protection fault"); + + st = emu_addr_space_protect(&as, 0x11000u, 0x1000u, EMU_MEM_READ); + EXPECT(st == CFREE_OK, "vm: middle-page protect returned %d", (int)st); + EXPECT(as.nmaps == 4u, "vm: protect should split maps, got %u", as.nmaps); + p = emu_addr_space_ptr(&as, 0x11000u, 1u, EMU_MEM_WRITE); + fault = emu_addr_space_last_fault(&as); + EXPECT(p == NULL && fault && fault->kind == EMU_FAULT_PROT, + "vm: write to read-only split page should fault"); + p = emu_addr_space_ptr(&as, 0x11000u, 1u, EMU_MEM_READ); + EXPECT(p != NULL, "vm: read from read-only split page should succeed"); + + st = emu_addr_space_unmap(&as, 0x11000u, 0x1000u); + EXPECT(st == CFREE_OK, "vm: middle-page unmap returned %d", (int)st); + p = emu_addr_space_ptr(&as, 0x11000u, 1u, EMU_MEM_READ); + fault = emu_addr_space_last_fault(&as); + EXPECT(p == NULL && fault && fault->kind == EMU_FAULT_UNMAPPED, + "vm: read from unmapped hole should fault as unmapped"); + + { + u64 gap = 0; + st = emu_addr_space_find_gap(&as, 0x1000u, 0x1000u, 0x10000u, 0x30000u, + &gap); + EXPECT(st == CFREE_OK && gap == 0x11000u, + "vm: find_gap should return the unmapped hole, got st=%d gap=0x%llx", + (int)st, (unsigned long long)gap); + } + + emu_addr_space_destroy(&as); + cfree_compiler_free(c); +} + +static void linux_vm_syscall_smoke(void) { + CfreeCompiler* c = new_host_compiler(); + EmuProcess process; + EmuThread thread; EmuCPUState* cpu; - EmuInst insts[16]; - u32 n; - u32 steps; - int exit_code; + const CfreeOsImpl* os; + const ArchImpl* arch; + u64 addr; + u8* p; + const EmuMemFault* fault; + if (!c) return; + memset(&process, 0, sizeof(process)); + memset(&thread, 0, sizeof(thread)); + os = os_lookup(CFREE_OS_LINUX); + arch = arch_lookup(CFREE_ARCH_RV64); + EXPECT(os && os->emu_default_syscall, "linux vm syscall: OS hook missing"); + EXPECT(arch && arch->emu, "linux vm syscall: arch hook missing"); + if (!os || !os->emu_default_syscall || !arch || !arch->emu) { + cfree_compiler_free(c); + return; + } + process.compiler = (Compiler*)c; + process.os = os; + process.arch = arch; + process.guest_target.arch = CFREE_ARCH_RV64; + process.guest_target.os = CFREE_OS_LINUX; + process.bindings.syscall = os->emu_default_syscall; + if (os->emu_init_process_private) { + EXPECT(os->emu_init_process_private((Compiler*)c, &process) == CFREE_OK, + "linux vm syscall: process private init failed"); + } + thread.process = &process; + if (os->emu_init_thread_private) { + EXPECT(os->emu_init_thread_private((Compiler*)c, &process, &thread) == + CFREE_OK, + "linux vm syscall: thread private init failed"); + } + EXPECT(emu_addr_space_init(&process.image.addr_space, (Compiler*)c, + 0x1000u) == CFREE_OK, + "linux vm syscall: address-space init failed"); + cpu = arch->emu->cpu_new((Compiler*)c, 0, 0); + EXPECT(cpu != NULL, "linux vm syscall: cpu alloc failed"); + if (!cpu) { + if (os->emu_destroy_thread_private) + os->emu_destroy_thread_private((Compiler*)c, &thread); + if (os->emu_destroy_process_private) + os->emu_destroy_process_private((Compiler*)c, &process); + emu_addr_space_destroy(&process.image.addr_space); + cfree_compiler_free(c); + return; + } + thread.cpu = cpu; + emu_cpu_set_thread(cpu, &thread); + emu_cpu_attach_addr_space(cpu, &process.image.addr_space); - elf = build_minimal_elf(&elf_len); - EXPECT(elf != NULL, "ELF buffer allocation failed"); - if (!elf) return; + arch->emu->set_gpr(&thread, RV_A0, 0); + arch->emu->set_gpr(&thread, RV_A1, 0x1000u); + arch->emu->set_gpr(&thread, RV_A2, 3u); + arch->emu->set_gpr(&thread, RV_A3, 0x22u); + arch->emu->set_gpr(&thread, RV_A4, ~(u64)0); + arch->emu->set_gpr(&thread, RV_A5, 0); + arch->emu->set_gpr(&thread, RV_A7, 222u); + emu_syscall(&thread); + addr = arch->emu->get_gpr(&thread, RV_A0); + EXPECT((i64)addr > 0, "linux vm syscall: mmap returned 0x%llx", + (unsigned long long)addr); + p = emu_addr_space_ptr(&process.image.addr_space, addr, 8u, EMU_MEM_WRITE); + EXPECT(p != NULL, "linux vm syscall: mmap result should be writable"); - memset(&img, 0, sizeof img); - int rc = emu_load_elf(cc, CFREE_EMU_ARCH_RISCV64, elf, elf_len, - /*argv*/ NULL, /*envp*/ NULL, &img); - EXPECT(rc == 0, "emu_load_elf returned %d", rc); - if (rc != 0) { + arch->emu->set_gpr(&thread, RV_A0, addr); + arch->emu->set_gpr(&thread, RV_A1, 0x1000u); + arch->emu->set_gpr(&thread, RV_A2, 1u); + arch->emu->set_gpr(&thread, RV_A7, 226u); + emu_syscall(&thread); + EXPECT(arch->emu->get_gpr(&thread, RV_A0) == 0, + "linux vm syscall: mprotect failed"); + p = emu_addr_space_ptr(&process.image.addr_space, addr, 8u, EMU_MEM_WRITE); + fault = emu_addr_space_last_fault(&process.image.addr_space); + EXPECT(p == NULL && fault && fault->kind == EMU_FAULT_PROT, + "linux vm syscall: mprotect should deny writes"); + + arch->emu->set_gpr(&thread, RV_A0, addr); + arch->emu->set_gpr(&thread, RV_A1, 0x1000u); + arch->emu->set_gpr(&thread, RV_A7, 215u); + emu_syscall(&thread); + EXPECT(arch->emu->get_gpr(&thread, RV_A0) == 0, + "linux vm syscall: munmap failed"); + p = emu_addr_space_ptr(&process.image.addr_space, addr, 1u, EMU_MEM_READ); + fault = emu_addr_space_last_fault(&process.image.addr_space); + EXPECT(p == NULL && fault && fault->kind == EMU_FAULT_UNMAPPED, + "linux vm syscall: munmap should remove the mapping"); + + emu_cpu_free(cpu); + if (os->emu_destroy_thread_private) + os->emu_destroy_thread_private((Compiler*)c, &thread); + if (os->emu_destroy_process_private) + os->emu_destroy_process_private((Compiler*)c, &process); + emu_addr_space_destroy(&process.image.addr_space); + cfree_compiler_free(c); +} + +static void emu_fixture_expect_exit_with_bindings( + const char* name, unsigned char* elf, size_t elf_len, int want_exit, + uint32_t max_blocks, const CfreeEmuExternalBindings* bindings); + +static void emu_fixture_expect_exit(const char* name, unsigned char* elf, + size_t elf_len, int want_exit, + uint32_t max_blocks) { + CfreeEmuExternalBindings no_bindings; + memset(&no_bindings, 0, sizeof(no_bindings)); + emu_fixture_expect_exit_with_bindings(name, elf, elf_len, want_exit, + max_blocks, &no_bindings); +} + +static void emu_fixture_expect_exit_with_bindings( + const char* name, unsigned char* elf, size_t elf_len, int want_exit, + uint32_t max_blocks, const CfreeEmuExternalBindings* bindings) { + CfreeCompiler* c; + CfreeJitHost host; + CfreeEmuOptions opts; + CfreeTarget guest_target; + CfreeEmu* emu = NULL; + EmuCPUState* cpu; + EmuTrapReason trap; + CfreeStatus st; + uint32_t i; + long ps; + + c = new_host_compiler(); + if (!c) { free(elf); return; } - EXPECT(img.entry_pc == 0x11000ull, "entry_pc should be 0x11000, got 0x%llx", - (unsigned long long)img.entry_pc); - EXPECT(img.guest_base != NULL, "guest_base is NULL"); - EXPECT(img.initial_sp != 0, "initial_sp is 0"); - - cpu = emu_cpu_new(cc, CFREE_EMU_ARCH_RISCV64, img.entry_pc, img.initial_sp); - EXPECT(cpu != NULL, "emu_cpu_new returned NULL"); - - rc = emu_load_elf_attach(cpu, &img); - EXPECT(rc == 0, "emu_load_elf_attach returned %d", rc); - - /* Translate the host pointer to the entry instruction stream. */ - unsigned char* host_pc = emu_cpu_va_to_host_pub(cpu, img.entry_pc, 4); - EXPECT(host_pc != NULL, "VA translation failed"); - - for (steps = 0; steps < 32u; ++steps) { - u64 pc = emu_cpu_pc(cpu); - unsigned char* p = emu_cpu_va_to_host_pub(cpu, pc, 4); - if (!p) { - EXPECT(0, "PC 0x%llx not in guest AS", (unsigned long long)pc); - break; + ps = sysconf(_SC_PAGESIZE); + if (ps > 0) g_execmem.page_size = (size_t)ps; + + memset(&host, 0, sizeof(host)); + host.execmem = &g_execmem; + memset(&guest_target, 0, sizeof(guest_target)); + guest_target.arch = CFREE_ARCH_RV64; + guest_target.os = CFREE_OS_LINUX; + guest_target.obj = CFREE_OBJ_ELF; + guest_target.ptr_size = 8u; + guest_target.ptr_align = 8u; + memset(&opts, 0, sizeof(opts)); + opts.guest_bytes.data = elf; + opts.guest_bytes.len = elf_len; + opts.guest_target = guest_target; + opts.has_guest_target = true; + opts.jit_host = &host; + if (bindings) opts.bindings = *bindings; + + st = cfree_emu_new(c, &opts, &emu); + EXPECT(st == CFREE_OK, "%s: cfree_emu_new returned %d", name, (int)st); + if (st == CFREE_OK && emu) { + trap = EMU_TRAP_NONE; + for (i = 0; i < max_blocks && trap == EMU_TRAP_NONE; ++i) { + st = cfree_emu_step(emu, 1); + EXPECT(st == CFREE_OK, "%s: cfree_emu_step returned %d at block %u", + name, (int)st, i); + if (st != CFREE_OK) break; + cpu = emu_internal_cpu(emu); + trap = emu_cpu_trap_reason(cpu); + } + cpu = emu_internal_cpu(emu); + trap = emu_cpu_trap_reason(cpu); + EXPECT(trap == EMU_TRAP_EXIT, "%s: expected exit trap within %u blocks", + name, max_blocks); + EXPECT(emu_cpu_exit_code(cpu) == want_exit, + "%s: exit_code should be %d, got %d", name, want_exit, + emu_cpu_exit_code(cpu)); + if (trap == EMU_TRAP_EXIT && emu_cpu_exit_code(cpu) == want_exit) { + fprintf(stderr, "PASS %s\n", name); } - n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, p, pc, insts, 16); - EXPECT(n > 0u, "decode at pc=0x%llx returned 0 insts", - (unsigned long long)pc); - if (n == 0u) break; - emu_cpu_interp_block(cpu, insts, n); - if (emu_cpu_trap_reason(cpu) != EMU_TRAP_NONE) break; } - EXPECT(emu_cpu_trap_reason(cpu) == EMU_TRAP_EXIT, - "expected EMU_TRAP_EXIT, got %u", (unsigned)emu_cpu_trap_reason(cpu)); - exit_code = emu_cpu_exit_code(cpu); - EXPECT(exit_code == 42, "exit_code should be 42, got %d", exit_code); - emu_cpu_free(cpu); - emu_unload_image(cc, &img); + if (emu) cfree_emu_free(emu); free(elf); cfree_compiler_free(c); } +static void jit_vertical_smoke(void) { + unsigned char* elf; + size_t elf_len; + + elf = build_minimal_elf(&elf_len); + EXPECT(elf != NULL, "static ELF buffer allocation failed"); + if (!elf) return; + emu_fixture_expect_exit("static-rv64-exit", elf, elf_len, 42, 8); +} + +static uint64_t host_plus5(uint64_t v) { return v + 5u; } +static uint64_t host_add2(uint64_t a, uint64_t b) { return a + b; } + +static CfreeStatus host_import_resolver(void* user, CfreeEmu* emu, + const CfreeEmuImportRequest* req, + CfreeEmuResolvedImport* out) { + (void)user; + (void)emu; + memset(out, 0, sizeof(*out)); + if (req && cfree_slice_eq_cstr(req->symbol_name, "host_add2")) { + out->host_fn = (void*)host_add2; + out->signature.abi = CFREE_EMU_IMPORT_ABI_GUEST_C; + out->signature.result = CFREE_EMU_VALUE_U64; + out->signature.nargs = 2u; + out->signature.args[0] = CFREE_EMU_VALUE_U64; + out->signature.args[1] = CFREE_EMU_VALUE_U64; + return CFREE_OK; + } + if (req && cfree_slice_eq_cstr(req->symbol_name, "import_add")) { + out->host_fn = (void*)host_plus5; + out->signature.abi = CFREE_EMU_IMPORT_ABI_GUEST_C; + out->signature.result = CFREE_EMU_VALUE_U64; + out->signature.nargs = 1u; + out->signature.args[0] = CFREE_EMU_VALUE_U64; + return CFREE_OK; + } + return CFREE_NOT_FOUND; +} + +typedef struct DsoFixture { + unsigned char* bytes; + size_t len; +} DsoFixture; + +static CfreeStatus dso_object_resolver(void* user, CfreeEmu* emu, + const CfreeEmuObjectRequest* req, + CfreeEmuResolvedObject* out) { + DsoFixture* d = (DsoFixture*)user; + (void)emu; + memset(out, 0, sizeof(*out)); + if (d && req && cfree_slice_eq_cstr(req->object_name, "libdsoadd.so")) { + out->object_bytes.data = d->bytes; + out->object_bytes.len = d->len; + return CFREE_OK; + } + return CFREE_NOT_FOUND; +} + +static void dynamic_import_tls_red(void) { + unsigned char* elf; + size_t elf_len; + + elf = build_dynamic_import_tls_elf(&elf_len); + EXPECT(elf != NULL, "dynamic import/TLS ELF buffer allocation failed"); + if (!elf) return; + emu_fixture_expect_exit("dynamic-import-tls-rv64", elf, elf_len, 42, 32); +} + +static void host_import_bridge_smoke(void) { + unsigned char* elf; + size_t elf_len; + CfreeEmuExternalBindings bindings; + elf = build_host_import_elf(&elf_len); + EXPECT(elf != NULL, "host import ELF buffer allocation failed"); + if (!elf) return; + memset(&bindings, 0, sizeof(bindings)); + bindings.resolve_import = host_import_resolver; + emu_fixture_expect_exit_with_bindings("host-import-bridge-rv64", elf, + elf_len, 42, 32, &bindings); +} + +static void dso_import_reloc_smoke(void) { + unsigned char* elf; + size_t elf_len; + DsoFixture dso; + CfreeEmuExternalBindings bindings; + memset(&dso, 0, sizeof(dso)); + dso.bytes = build_dso_import_so(&dso.len); + EXPECT(dso.bytes != NULL, "DSO ELF buffer allocation failed"); + if (!dso.bytes) return; + elf = build_dso_import_main_elf(&elf_len); + EXPECT(elf != NULL, "DSO main ELF buffer allocation failed"); + if (!elf) { + free(dso.bytes); + return; + } + memset(&bindings, 0, sizeof(bindings)); + bindings.resolve_object = dso_object_resolver; + bindings.user = &dso; + emu_fixture_expect_exit_with_bindings("dso-import-reloc-rv64", elf, elf_len, + 42, 64, &bindings); + free(dso.bytes); +} + +static void tls_distinct_smoke(void) { + unsigned char* elf; + size_t elf_len; + elf = build_tls_distinct_elf(&elf_len); + EXPECT(elf != NULL, "distinct TLS ELF buffer allocation failed"); + if (!elf) return; + emu_fixture_expect_exit("tls-distinct-rv64", elf, elf_len, 20, 32); +} + +static void signal_perms_red(void) { + unsigned char* elf; + size_t elf_len; + + elf = build_signal_perms_elf(&elf_len); + EXPECT(elf != NULL, "signal/perms ELF buffer allocation failed"); + if (!elf) return; + emu_fixture_expect_exit("signal-perms-rv64", elf, elf_len, 42, 64); +} + +static void signal_load_fault_smoke(void) { + unsigned char* elf; + size_t elf_len; + + elf = build_signal_load_fault_elf(&elf_len); + EXPECT(elf != NULL, "signal/load-fault ELF buffer allocation failed"); + if (!elf) return; + emu_fixture_expect_exit("signal-load-fault-rv64", elf, elf_len, 42, 64); +} + +static void signal_sigreturn_smoke(void) { + unsigned char* elf; + size_t elf_len; + elf = build_signal_sigreturn_elf(&elf_len); + EXPECT(elf != NULL, "signal sigreturn ELF buffer allocation failed"); + if (!elf) return; + emu_fixture_expect_exit("signal-sigreturn-rv64", elf, elf_len, 42, 96); +} + int main(void) { decoder_smoke(); - interp_smoke(); + jit_vertical_smoke(); + dynamic_import_tls_red(); + host_import_bridge_smoke(); + dso_import_reloc_smoke(); + tls_distinct_smoke(); + signal_perms_red(); + signal_load_fault_smoke(); + signal_sigreturn_smoke(); + vm_unit_smoke(); + linux_vm_syscall_smoke(); if (g_fail) { fprintf(stderr, "FAILED %d check(s)\n", g_fail); return 1; diff --git a/test/test.mk b/test/test.mk @@ -114,32 +114,32 @@ $(DEBUG_TEST_BIN): test/debug/roundtrip_unit.c $(LIB_AR) # (first-match disasm picks the alias spelling over the canonical # form). Internal arch/ surface — needs -Isrc. AA64_ISA_TEST_BIN = build/test/aa64_isa_test +RV64_DECODE_TEST_BIN = build/test/rv64_decode_test -test-isa: $(AA64_ISA_TEST_BIN) +test-isa: $(AA64_ISA_TEST_BIN) $(RV64_DECODE_TEST_BIN) $(AA64_ISA_TEST_BIN) + $(RV64_DECODE_TEST_BIN) $(AA64_ISA_TEST_BIN): test/arch/aa64_isa_test.c $(LIB_AR) @mkdir -p $(dir $@) $(CC) $(DRIVER_CFLAGS) -Isrc test/arch/aa64_isa_test.c $(LIB_AR) -o $@ -# test-emu: emulator unit tests. The rv64 lane builds tiny in-memory rv64 -# ELFs and asserts the interpreter exits through the syscall handler with +$(RV64_DECODE_TEST_BIN): test/arch/rv64_decode_test.c $(LIB_AR) + @mkdir -p $(dir $@) + $(CC) $(DRIVER_CFLAGS) -Isrc test/arch/rv64_decode_test.c $(LIB_AR) -o $@ + +# test-emu: emulator unit tests. The rv64 lane builds a tiny in-memory rv64 +# ELF and asserts the lifted/JIT path exits through the syscall handler with # the expected code. Internal arch/emu surface — needs -Isrc. EMU_RV64_TEST_BIN = build/test/emu_rv64_test -EMU_RV64_EXTRAS_TEST_BIN = build/test/emu_rv64_extras_test -test-emu: $(EMU_RV64_TEST_BIN) $(EMU_RV64_EXTRAS_TEST_BIN) +test-emu: $(EMU_RV64_TEST_BIN) $(EMU_RV64_TEST_BIN) - $(EMU_RV64_EXTRAS_TEST_BIN) $(EMU_RV64_TEST_BIN): test/emu/rv64_smoke_test.c $(LIB_AR) @mkdir -p $(dir $@) $(CC) $(DRIVER_CFLAGS) -Isrc test/emu/rv64_smoke_test.c $(LIB_AR) -o $@ -$(EMU_RV64_EXTRAS_TEST_BIN): test/emu/rv64_extras_test.c $(LIB_AR) - @mkdir -p $(dir $@) - $(CC) $(DRIVER_CFLAGS) -Isrc test/emu/rv64_extras_test.c $(LIB_AR) -o $@ - CG_API_TEST_BIN = build/test/cg_api_test CG_SWITCH_TEST_BIN = build/test/cg_switch_test ABI_CLASSIFY_TEST_BIN = build/test/abi_classify_test