kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit f7300171f7b5734a14ac2248c32c4af8b8f799e8
parent da095c5232e37ba266029d2bbf5b5a0174e47c29
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 10:35:45 -0700

emu: scaffold cfree emu driver and stubbed public API

Adds the CfreeEmu* surface described in doc/EMU.md (guest arch enum,
trace flags, options struct, opaque handle, and the run/new/step/lookup/
free entries) and a complete `cfree emu` driver tool. The driver loads
a guest ELF, auto-detects the guest arch (aarch64/riscv64) via
cfree_detect_target, marshals argv/envp, and calls cfree_emu_run.
libcfree-side entries are panic-free stubs in src/api/stubs.c; the
binary builds and the driver returns a clean error until the emu
subsystem under src/emu/ lands.

Diffstat:
Mdriver/driver.h | 2++
Adriver/emu.c | 260+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/main.c | 3++-
Minclude/cfree.h | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/api/stubs.c | 16++++++++++++++++
5 files changed, 362 insertions(+), 1 deletion(-)

diff --git a/driver/driver.h b/driver/driver.h @@ -18,6 +18,7 @@ typedef enum DriverTool { DRIVER_TOOL_OBJDUMP, DRIVER_TOOL_DBG, DRIVER_TOOL_RUN, + DRIVER_TOOL_EMU, } DriverTool; /* Multi-call entry: dispatches by argv[0] basename (or argv[1] fallback). */ @@ -31,6 +32,7 @@ int driver_ar (int argc, char** argv); int driver_objdump(int argc, char** argv); int driver_dbg (int argc, char** argv); int driver_run (int argc, char** argv); +int driver_emu (int argc, char** argv); /* Shared host environment used by every tool that calls into libcfree. * driver_env_init wires up the libc-backed heap, the stderr diag sink, and diff --git a/driver/emu.c b/driver/emu.c @@ -0,0 +1,260 @@ +#include "driver.h" + +#include <stddef.h> +#include <stdint.h> + +/* `cfree emu` — run a guest user-mode ELF on the host via libcfree's + * per-basic-block JIT translator. v1 guest archs: aarch64, riscv64. + * + * Argv shape mirrors `cfree run`: a single positional input (the guest + * ELF path) followed by `--` and the guest argv. Flags configure the + * translator (optimize level), tracing (PC / instruction / block), and + * the guest arch (auto-detected from the ELF when -arch is absent). + * + * The freestanding emu core takes guest bytes; this driver handles the + * path -> bytes step and the argv/envp marshalling. The driver returns + * the guest's exit code on a clean exit, or 1 on internal failure. */ + +#define EMU_TOOL "emu" + +typedef struct EmuOptions { + DriverEnv* env; + size_t argv_bound; + + int opt_level; + CfreeEmuTraceFlags trace; + CfreeEmuArch guest_arch; + int guest_arch_set; + + const char* elf_path; /* positional input (required) */ + + /* Guest argv collected after `--`. argv[0] defaults to elf_path + * when the user supplied no `--` segment. The trailing NULL is + * added at marshalling time and is not counted in nguest_argv. */ + const char** guest_argv; + uint32_t nguest_argv; +} EmuOptions; + +static void emu_usage(void) +{ + driver_errf(EMU_TOOL, "%s", + "usage: cfree emu [-O0|-O1|-O2]\n" + " [-arch aarch64|riscv64]\n" + " [-tracepc] [-traceinsn] [-traceblock]\n" + " guest.elf [-- arg...]"); +} + +static int emu_alloc_arrays(EmuOptions* o, int argc) +{ + size_t bound = (size_t)argc; + o->argv_bound = bound; + /* +1 to leave room for the elf_path default at index 0. */ + o->guest_argv = driver_alloc_zeroed(o->env, (bound + 1) * sizeof(*o->guest_argv)); + if (!o->guest_argv) { + driver_errf(EMU_TOOL, "out of memory"); + return 1; + } + return 0; +} + +static int emu_record_arch(EmuOptions* o, const char* val) +{ + if (driver_streq(val, "aarch64") || driver_streq(val, "arm64")) { + o->guest_arch = CFREE_EMU_ARCH_AARCH64; o->guest_arch_set = 1; return 0; + } + if (driver_streq(val, "riscv64") || driver_streq(val, "rv64")) { + o->guest_arch = CFREE_EMU_ARCH_RISCV64; o->guest_arch_set = 1; return 0; + } + driver_errf(EMU_TOOL, "unsupported -arch value: %s", val); + return 1; +} + +static int emu_parse(int argc, char** argv, EmuOptions* o) +{ + int i; + int after_dash_dash = 0; + if (emu_alloc_arrays(o, argc) != 0) return 1; + + for (i = 1; i < argc; ++i) { + const char* a = argv[i]; + + if (after_dash_dash) { + o->guest_argv[o->nguest_argv++] = argv[i]; + continue; + } + if (driver_streq(a, "--")) { after_dash_dash = 1; continue; } + + if (driver_streq(a, "-O0")) { o->opt_level = 0; continue; } + if (driver_streq(a, "-O1")) { o->opt_level = 1; continue; } + if (driver_streq(a, "-O2")) { o->opt_level = 2; continue; } + + if (driver_streq(a, "-tracepc")) { o->trace |= CFREE_EMU_TRACE_PC; continue; } + if (driver_streq(a, "-traceinsn")) { o->trace |= CFREE_EMU_TRACE_INSN; continue; } + if (driver_streq(a, "-traceblock")) { o->trace |= CFREE_EMU_TRACE_BLOCK; continue; } + + if (driver_streq(a, "-arch")) { + if (++i >= argc) { driver_errf(EMU_TOOL, "-arch requires an argument"); return 1; } + if (emu_record_arch(o, argv[i]) != 0) return 1; + continue; + } + + if (a[0] == '-' && a[1] != '\0') { + driver_errf(EMU_TOOL, "unknown flag: %s", a); + return 1; + } + + if (o->elf_path) { + driver_errf(EMU_TOOL, "multiple guest ELF inputs: %s, %s", + o->elf_path, a); + return 1; + } + o->elf_path = a; + } + + if (!o->elf_path) { + driver_errf(EMU_TOOL, "missing guest ELF input"); + emu_usage(); + return 1; + } + return 0; +} + +static void emu_options_release(EmuOptions* o) +{ + size_t bound = o->argv_bound; + if (o->guest_argv) { + driver_free(o->env, o->guest_argv, + (bound + 1) * sizeof(*o->guest_argv)); + } +} + +/* Map a CfreeArchKind from cfree_detect_target onto the emu's guest-arch + * enum. Returns 0 on a supported arch, 1 otherwise. */ +static int emu_arch_from_kind(CfreeArchKind k, CfreeEmuArch* out) +{ + switch (k) { + case CFREE_ARCH_ARM_64: *out = CFREE_EMU_ARCH_AARCH64; return 0; + case CFREE_ARCH_RV64: *out = CFREE_EMU_ARCH_RISCV64; return 0; + default: return 1; + } +} + +static const char* emu_arch_name(CfreeEmuArch a) +{ + switch (a) { + case CFREE_EMU_ARCH_AARCH64: return "aarch64"; + case CFREE_EMU_ARCH_RISCV64: return "riscv64"; + } + return "?"; +} + +/* Auto-detect the guest arch from the ELF magic when -arch was not set. + * Honors a user-supplied -arch verbatim (no cross-check against the ELF; + * mismatches surface as decode failures inside the emu). */ +static int emu_resolve_arch(EmuOptions* o, const CfreeBytesInput* elf) +{ + CfreeTarget detected; + if (o->guest_arch_set) return 0; + if (cfree_detect_target(elf->data, elf->len, &detected) != 0) { + driver_errf(EMU_TOOL, + "could not detect target from %s; pass -arch", o->elf_path); + return 1; + } + if (emu_arch_from_kind(detected.arch, &o->guest_arch) != 0) { + driver_errf(EMU_TOOL, + "unsupported guest arch in %s; v1 supports aarch64 and riscv64", + o->elf_path); + return 1; + } + o->guest_arch_set = 1; + return 0; +} + +/* Build a NULL-terminated argv for the guest. argv[0] defaults to the + * guest ELF path if the user supplied no `--` segment, matching Unix + * convention. The returned array points into the caller-owned argv; the + * trailing NULL slot lives in the EmuOptions back-store. */ +static void emu_finalize_argv(EmuOptions* o, const char*** out_argv) +{ + if (o->nguest_argv == 0) { + o->guest_argv[0] = o->elf_path; + o->guest_argv[1] = 0; + } else { + o->guest_argv[o->nguest_argv] = 0; + } + *out_argv = (const char**)o->guest_argv; +} + +int driver_emu(int argc, char** argv) +{ + DriverEnv env; + EmuOptions eo = {0}; + CfreeEnv cenv; + CfreeCompiler* compiler = NULL; + DriverLoad elf_lf = {0}; + CfreeBytesInput elf_in; + CfreeEmuOptions opts; + const char** guest_argv; + int exit_code = 0; + int rc = 1; + + driver_env_init(&env); + eo.env = &env; + + if (emu_parse(argc, argv, &eo) != 0) { + emu_options_release(&eo); + driver_env_fini(&env); + return 2; + } + + cenv = driver_env_to_cfree(&env); + if (!cenv.file_io || !cenv.file_io->read_all) { + driver_errf(EMU_TOOL, "host file I/O unavailable"); + goto out; + } + + if (driver_load_bytes(cenv.file_io, EMU_TOOL, eo.elf_path, + &elf_lf, &elf_in) != 0) { + goto out; + } + + if (emu_resolve_arch(&eo, &elf_in) != 0) goto out; + + /* The emu's host-side compiler runs at the host's native target — + * the JIT image holds host code, the *guest* arch is configured + * through CfreeEmuOptions.guest_arch. */ + compiler = cfree_compiler_new(driver_host_target(), &cenv); + if (!compiler) { + driver_errf(EMU_TOOL, "failed to initialize compiler"); + goto out; + } + + emu_finalize_argv(&eo, &guest_argv); + + { + CfreeEmuOptions z = {0}; + opts = z; + } + opts.guest_arch = eo.guest_arch; + opts.guest_elf_bytes = elf_in.data; + opts.guest_elf_len = elf_in.len; + opts.optimize = eo.opt_level; + opts.trace = eo.trace; + opts.argv = (const char* const*)guest_argv; + opts.envp = 0; + + if (cfree_emu_run(compiler, &opts, &exit_code) != 0) { + driver_errf(EMU_TOOL, "emulation of %s (%s) failed", + eo.elf_path, emu_arch_name(eo.guest_arch)); + goto out; + } + + rc = exit_code; + +out: + if (compiler) cfree_compiler_free(compiler); + if (elf_lf.loaded && cenv.file_io) driver_release_bytes(cenv.file_io, &elf_lf); + emu_options_release(&eo); + driver_env_fini(&env); + return rc; +} diff --git a/driver/main.c b/driver/main.c @@ -1,7 +1,7 @@ #include "driver.h" /* Multi-call dispatch. Looks at argv[0]'s basename for "cc", "as", "ld", - * "ar", "objdump", "dbg", or "run"; if argv[0] is the bare "cfree" + * "ar", "objdump", "dbg", "run", or "emu"; if argv[0] is the bare "cfree" * binary, falls back to argv[1]. Preprocessor-only mode is `cc -E`. */ static int dispatch(const char* name, int argc, char** argv) @@ -13,6 +13,7 @@ static int dispatch(const char* name, int argc, char** argv) if (driver_streq(name, "objdump")) return driver_objdump(argc, argv); if (driver_streq(name, "dbg")) return driver_dbg (argc, argv); if (driver_streq(name, "run")) return driver_run (argc, argv); + if (driver_streq(name, "emu")) return driver_emu (argc, argv); return -1; } diff --git a/include/cfree.h b/include/cfree.h @@ -916,6 +916,88 @@ int cfree_pipeline_link_jit (CfreePipeline*, const CfreeLinkOptions*, CfreeJit** out_jit); /* ============================================================ + * Emulator (cfree emu) + * ============================================================ + * Run a guest user-mode ELF on the host via per-basic-block JIT translation. + * Pipeline shape: guest bytes -> per-ISA decoder -> per-ISA lifter -> CG -> + * (opt?) -> MCEmitter -> ObjBuilder -> link_jit (incremental) -> host code. + * The emu owns a single growing CfreeJit for the session: cold blocks are + * translated and incrementally linked into one image; hot edges are patched + * by the runtime (block chaining) outside the linker. + * + * v1 guest archs: aarch64, riscv64. x86_64 deferred. SIMD/vector ISA + * extensions, full-system emulation, self-modifying code, and foreign-OS + * syscalls are not supported in v1 (see doc/EMU.md). + * + * The freestanding core takes guest ELF bytes; path-shaped helpers live in + * the driver and feed bytes via CfreeFileIO.read_all. Guest memory loads + * and stores route through libcfree's runtime (bounds-checked against the + * mapped guest address space); guest syscalls are forwarded to the host OS + * via per-OS tables. */ + +typedef enum CfreeEmuArch { + CFREE_EMU_ARCH_AARCH64, + CFREE_EMU_ARCH_RISCV64, +} CfreeEmuArch; + +/* Trace flag bitmask. PC traces the guest PC at every block entry; INSN + * traces every decoded guest instruction; BLOCK traces each translation + * event (cold-miss into the lifter). All traces are emitted via the env's + * CfreeDiagSink at CFREE_DIAG_NOTE. */ +typedef enum CfreeEmuTraceFlag { + CFREE_EMU_TRACE_PC = 1u << 0, + CFREE_EMU_TRACE_INSN = 1u << 1, + CFREE_EMU_TRACE_BLOCK = 1u << 2, +} CfreeEmuTraceFlag; + +typedef uint32_t CfreeEmuTraceFlags; + +/* Per-invocation emu configuration. `guest_elf_bytes` must outlive the call + * (cfree_emu_run) or the returned CfreeEmu (cfree_emu_new). `argv` and + * `envp`, when non-NULL, are NULL-terminated arrays of NUL-terminated + * strings; the emu copies them into the guest stack at startup, so the + * caller need not keep them alive past the new/run call. argv[0] is + * conventionally the guest program path. envp may be NULL for an empty + * environment. + * + * `optimize` selects the per-block backend: 0 drives a CGTarget directly + * (fast translation, slow execution); 2 wraps with opt_cgtarget (slow + * translation, fast execution). Other levels are reserved. + * + * Guest fd map / sandboxing is not exposed in v1 — guest syscalls are + * forwarded into the host process's fd table verbatim. */ +typedef struct CfreeEmuOptions { + CfreeEmuArch guest_arch; + const uint8_t* guest_elf_bytes; + size_t guest_elf_len; + int optimize; + CfreeEmuTraceFlags trace; + const char* const* argv; /* NULL-terminated; may be NULL */ + const char* const* envp; /* NULL-terminated; may be NULL */ +} CfreeEmuOptions; + +typedef struct CfreeEmu CfreeEmu; + +/* One-shot: load the guest ELF, run until exit/trap, fill *out_exit_code + * with the guest's exit status. Returns 0 on a clean guest exit (including + * a nonzero guest exit_code), nonzero on internal failure (decode/lift + * failure, OOM, unsupported guest arch). */ +int cfree_emu_run(CfreeCompiler*, const CfreeEmuOptions*, + int* out_exit_code); + +/* Lower-level surface for dbg integration. Lifecycle: emu_new constructs + * the runtime (reserves the code-cache VA region, maps guest segments, + * builds the initial CPUState); emu_step runs at most `nblocks` translated + * blocks before returning; emu_lookup translates the block at `guest_pc` + * if cold and returns its host entry (NULL on translation failure or an + * unmapped guest_pc). emu_free releases the runtime, the JIT image, and + * the guest address space. */ +CfreeEmu* cfree_emu_new (CfreeCompiler*, const CfreeEmuOptions*); +int cfree_emu_step (CfreeEmu*, uint32_t nblocks); +void* cfree_emu_lookup(CfreeEmu*, uint64_t guest_pc); +void cfree_emu_free (CfreeEmu*); + +/* ============================================================ * Binary format detection * ============================================================ * Sniff the format of a binary blob from its magic bytes. diff --git a/src/api/stubs.c b/src/api/stubs.c @@ -215,3 +215,19 @@ CfreeDwarfParamIter* cfree_dwarf_param_iter_new (CfreeDebugInfo* d, uint64_t pc) int cfree_dwarf_param_iter_next(CfreeDwarfParamIter* it, CfreeDwarfVar* o) { (void)it; (void)o; return 0; } void cfree_dwarf_param_iter_free(CfreeDwarfParamIter* it) { (void)it; } + +/* ============================================================ + * Emulator (cfree emu) + * ============================================================ */ +struct CfreeEmu { int _; }; + +int cfree_emu_run(CfreeCompiler* c, const CfreeEmuOptions* opts, int* out_exit_code) +{ + (void)c; (void)opts; + if (out_exit_code) *out_exit_code = 0; + return 1; +} +CfreeEmu* cfree_emu_new (CfreeCompiler* c, const CfreeEmuOptions* o) { (void)c; (void)o; return 0; } +int cfree_emu_step (CfreeEmu* e, uint32_t n) { (void)e; (void)n; return 1; } +void* cfree_emu_lookup(CfreeEmu* e, uint64_t pc) { (void)e; (void)pc; return 0; } +void cfree_emu_free (CfreeEmu* e) { (void)e; }