commit f7300171f7b5734a14ac2248c32c4af8b8f799e8
parent da095c5232e37ba266029d2bbf5b5a0174e47c29
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 9 May 2026 10:35:45 -0700
emu: scaffold cfree emu driver and stubbed public API
Adds the CfreeEmu* surface described in doc/EMU.md (guest arch enum,
trace flags, options struct, opaque handle, and the run/new/step/lookup/
free entries) and a complete `cfree emu` driver tool. The driver loads
a guest ELF, auto-detects the guest arch (aarch64/riscv64) via
cfree_detect_target, marshals argv/envp, and calls cfree_emu_run.
libcfree-side entries are panic-free stubs in src/api/stubs.c; the
binary builds and the driver returns a clean error until the emu
subsystem under src/emu/ lands.
Diffstat:
5 files changed, 362 insertions(+), 1 deletion(-)
diff --git a/driver/driver.h b/driver/driver.h
@@ -18,6 +18,7 @@ typedef enum DriverTool {
DRIVER_TOOL_OBJDUMP,
DRIVER_TOOL_DBG,
DRIVER_TOOL_RUN,
+ DRIVER_TOOL_EMU,
} DriverTool;
/* Multi-call entry: dispatches by argv[0] basename (or argv[1] fallback). */
@@ -31,6 +32,7 @@ int driver_ar (int argc, char** argv);
int driver_objdump(int argc, char** argv);
int driver_dbg (int argc, char** argv);
int driver_run (int argc, char** argv);
+int driver_emu (int argc, char** argv);
/* Shared host environment used by every tool that calls into libcfree.
* driver_env_init wires up the libc-backed heap, the stderr diag sink, and
diff --git a/driver/emu.c b/driver/emu.c
@@ -0,0 +1,260 @@
+#include "driver.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+/* `cfree emu` — run a guest user-mode ELF on the host via libcfree's
+ * per-basic-block JIT translator. v1 guest archs: aarch64, riscv64.
+ *
+ * Argv shape mirrors `cfree run`: a single positional input (the guest
+ * ELF path) followed by `--` and the guest argv. Flags configure the
+ * translator (optimize level), tracing (PC / instruction / block), and
+ * the guest arch (auto-detected from the ELF when -arch is absent).
+ *
+ * The freestanding emu core takes guest bytes; this driver handles the
+ * path -> bytes step and the argv/envp marshalling. The driver returns
+ * the guest's exit code on a clean exit, or 1 on internal failure. */
+
+#define EMU_TOOL "emu"
+
+typedef struct EmuOptions {
+ DriverEnv* env;
+ size_t argv_bound;
+
+ int opt_level;
+ CfreeEmuTraceFlags trace;
+ CfreeEmuArch guest_arch;
+ int guest_arch_set;
+
+ const char* elf_path; /* positional input (required) */
+
+ /* Guest argv collected after `--`. argv[0] defaults to elf_path
+ * when the user supplied no `--` segment. The trailing NULL is
+ * added at marshalling time and is not counted in nguest_argv. */
+ const char** guest_argv;
+ uint32_t nguest_argv;
+} EmuOptions;
+
+static void emu_usage(void)
+{
+ driver_errf(EMU_TOOL, "%s",
+ "usage: cfree emu [-O0|-O1|-O2]\n"
+ " [-arch aarch64|riscv64]\n"
+ " [-tracepc] [-traceinsn] [-traceblock]\n"
+ " guest.elf [-- arg...]");
+}
+
+static int emu_alloc_arrays(EmuOptions* o, int argc)
+{
+ size_t bound = (size_t)argc;
+ o->argv_bound = bound;
+ /* +1 to leave room for the elf_path default at index 0. */
+ o->guest_argv = driver_alloc_zeroed(o->env, (bound + 1) * sizeof(*o->guest_argv));
+ if (!o->guest_argv) {
+ driver_errf(EMU_TOOL, "out of memory");
+ return 1;
+ }
+ return 0;
+}
+
+static int emu_record_arch(EmuOptions* o, const char* val)
+{
+ if (driver_streq(val, "aarch64") || driver_streq(val, "arm64")) {
+ o->guest_arch = CFREE_EMU_ARCH_AARCH64; o->guest_arch_set = 1; return 0;
+ }
+ if (driver_streq(val, "riscv64") || driver_streq(val, "rv64")) {
+ o->guest_arch = CFREE_EMU_ARCH_RISCV64; o->guest_arch_set = 1; return 0;
+ }
+ driver_errf(EMU_TOOL, "unsupported -arch value: %s", val);
+ return 1;
+}
+
+static int emu_parse(int argc, char** argv, EmuOptions* o)
+{
+ int i;
+ int after_dash_dash = 0;
+ if (emu_alloc_arrays(o, argc) != 0) return 1;
+
+ for (i = 1; i < argc; ++i) {
+ const char* a = argv[i];
+
+ if (after_dash_dash) {
+ o->guest_argv[o->nguest_argv++] = argv[i];
+ continue;
+ }
+ if (driver_streq(a, "--")) { after_dash_dash = 1; continue; }
+
+ if (driver_streq(a, "-O0")) { o->opt_level = 0; continue; }
+ if (driver_streq(a, "-O1")) { o->opt_level = 1; continue; }
+ if (driver_streq(a, "-O2")) { o->opt_level = 2; continue; }
+
+ if (driver_streq(a, "-tracepc")) { o->trace |= CFREE_EMU_TRACE_PC; continue; }
+ if (driver_streq(a, "-traceinsn")) { o->trace |= CFREE_EMU_TRACE_INSN; continue; }
+ if (driver_streq(a, "-traceblock")) { o->trace |= CFREE_EMU_TRACE_BLOCK; continue; }
+
+ if (driver_streq(a, "-arch")) {
+ if (++i >= argc) { driver_errf(EMU_TOOL, "-arch requires an argument"); return 1; }
+ if (emu_record_arch(o, argv[i]) != 0) return 1;
+ continue;
+ }
+
+ if (a[0] == '-' && a[1] != '\0') {
+ driver_errf(EMU_TOOL, "unknown flag: %s", a);
+ return 1;
+ }
+
+ if (o->elf_path) {
+ driver_errf(EMU_TOOL, "multiple guest ELF inputs: %s, %s",
+ o->elf_path, a);
+ return 1;
+ }
+ o->elf_path = a;
+ }
+
+ if (!o->elf_path) {
+ driver_errf(EMU_TOOL, "missing guest ELF input");
+ emu_usage();
+ return 1;
+ }
+ return 0;
+}
+
+static void emu_options_release(EmuOptions* o)
+{
+ size_t bound = o->argv_bound;
+ if (o->guest_argv) {
+ driver_free(o->env, o->guest_argv,
+ (bound + 1) * sizeof(*o->guest_argv));
+ }
+}
+
+/* Map a CfreeArchKind from cfree_detect_target onto the emu's guest-arch
+ * enum. Returns 0 on a supported arch, 1 otherwise. */
+static int emu_arch_from_kind(CfreeArchKind k, CfreeEmuArch* out)
+{
+ switch (k) {
+ case CFREE_ARCH_ARM_64: *out = CFREE_EMU_ARCH_AARCH64; return 0;
+ case CFREE_ARCH_RV64: *out = CFREE_EMU_ARCH_RISCV64; return 0;
+ default: return 1;
+ }
+}
+
+static const char* emu_arch_name(CfreeEmuArch a)
+{
+ switch (a) {
+ case CFREE_EMU_ARCH_AARCH64: return "aarch64";
+ case CFREE_EMU_ARCH_RISCV64: return "riscv64";
+ }
+ return "?";
+}
+
+/* Auto-detect the guest arch from the ELF magic when -arch was not set.
+ * Honors a user-supplied -arch verbatim (no cross-check against the ELF;
+ * mismatches surface as decode failures inside the emu). */
+static int emu_resolve_arch(EmuOptions* o, const CfreeBytesInput* elf)
+{
+ CfreeTarget detected;
+ if (o->guest_arch_set) return 0;
+ if (cfree_detect_target(elf->data, elf->len, &detected) != 0) {
+ driver_errf(EMU_TOOL,
+ "could not detect target from %s; pass -arch", o->elf_path);
+ return 1;
+ }
+ if (emu_arch_from_kind(detected.arch, &o->guest_arch) != 0) {
+ driver_errf(EMU_TOOL,
+ "unsupported guest arch in %s; v1 supports aarch64 and riscv64",
+ o->elf_path);
+ return 1;
+ }
+ o->guest_arch_set = 1;
+ return 0;
+}
+
+/* Build a NULL-terminated argv for the guest. argv[0] defaults to the
+ * guest ELF path if the user supplied no `--` segment, matching Unix
+ * convention. The returned array points into the caller-owned argv; the
+ * trailing NULL slot lives in the EmuOptions back-store. */
+static void emu_finalize_argv(EmuOptions* o, const char*** out_argv)
+{
+ if (o->nguest_argv == 0) {
+ o->guest_argv[0] = o->elf_path;
+ o->guest_argv[1] = 0;
+ } else {
+ o->guest_argv[o->nguest_argv] = 0;
+ }
+ *out_argv = (const char**)o->guest_argv;
+}
+
+int driver_emu(int argc, char** argv)
+{
+ DriverEnv env;
+ EmuOptions eo = {0};
+ CfreeEnv cenv;
+ CfreeCompiler* compiler = NULL;
+ DriverLoad elf_lf = {0};
+ CfreeBytesInput elf_in;
+ CfreeEmuOptions opts;
+ const char** guest_argv;
+ int exit_code = 0;
+ int rc = 1;
+
+ driver_env_init(&env);
+ eo.env = &env;
+
+ if (emu_parse(argc, argv, &eo) != 0) {
+ emu_options_release(&eo);
+ driver_env_fini(&env);
+ return 2;
+ }
+
+ cenv = driver_env_to_cfree(&env);
+ if (!cenv.file_io || !cenv.file_io->read_all) {
+ driver_errf(EMU_TOOL, "host file I/O unavailable");
+ goto out;
+ }
+
+ if (driver_load_bytes(cenv.file_io, EMU_TOOL, eo.elf_path,
+ &elf_lf, &elf_in) != 0) {
+ goto out;
+ }
+
+ if (emu_resolve_arch(&eo, &elf_in) != 0) goto out;
+
+ /* The emu's host-side compiler runs at the host's native target —
+ * the JIT image holds host code, the *guest* arch is configured
+ * through CfreeEmuOptions.guest_arch. */
+ compiler = cfree_compiler_new(driver_host_target(), &cenv);
+ if (!compiler) {
+ driver_errf(EMU_TOOL, "failed to initialize compiler");
+ goto out;
+ }
+
+ emu_finalize_argv(&eo, &guest_argv);
+
+ {
+ CfreeEmuOptions z = {0};
+ opts = z;
+ }
+ opts.guest_arch = eo.guest_arch;
+ opts.guest_elf_bytes = elf_in.data;
+ opts.guest_elf_len = elf_in.len;
+ opts.optimize = eo.opt_level;
+ opts.trace = eo.trace;
+ opts.argv = (const char* const*)guest_argv;
+ opts.envp = 0;
+
+ if (cfree_emu_run(compiler, &opts, &exit_code) != 0) {
+ driver_errf(EMU_TOOL, "emulation of %s (%s) failed",
+ eo.elf_path, emu_arch_name(eo.guest_arch));
+ goto out;
+ }
+
+ rc = exit_code;
+
+out:
+ if (compiler) cfree_compiler_free(compiler);
+ if (elf_lf.loaded && cenv.file_io) driver_release_bytes(cenv.file_io, &elf_lf);
+ emu_options_release(&eo);
+ driver_env_fini(&env);
+ return rc;
+}
diff --git a/driver/main.c b/driver/main.c
@@ -1,7 +1,7 @@
#include "driver.h"
/* Multi-call dispatch. Looks at argv[0]'s basename for "cc", "as", "ld",
- * "ar", "objdump", "dbg", or "run"; if argv[0] is the bare "cfree"
+ * "ar", "objdump", "dbg", "run", or "emu"; if argv[0] is the bare "cfree"
* binary, falls back to argv[1]. Preprocessor-only mode is `cc -E`. */
static int dispatch(const char* name, int argc, char** argv)
@@ -13,6 +13,7 @@ static int dispatch(const char* name, int argc, char** argv)
if (driver_streq(name, "objdump")) return driver_objdump(argc, argv);
if (driver_streq(name, "dbg")) return driver_dbg (argc, argv);
if (driver_streq(name, "run")) return driver_run (argc, argv);
+ if (driver_streq(name, "emu")) return driver_emu (argc, argv);
return -1;
}
diff --git a/include/cfree.h b/include/cfree.h
@@ -916,6 +916,88 @@ int cfree_pipeline_link_jit (CfreePipeline*, const CfreeLinkOptions*,
CfreeJit** out_jit);
/* ============================================================
+ * Emulator (cfree emu)
+ * ============================================================
+ * Run a guest user-mode ELF on the host via per-basic-block JIT translation.
+ * Pipeline shape: guest bytes -> per-ISA decoder -> per-ISA lifter -> CG ->
+ * (opt?) -> MCEmitter -> ObjBuilder -> link_jit (incremental) -> host code.
+ * The emu owns a single growing CfreeJit for the session: cold blocks are
+ * translated and incrementally linked into one image; hot edges are patched
+ * by the runtime (block chaining) outside the linker.
+ *
+ * v1 guest archs: aarch64, riscv64. x86_64 deferred. SIMD/vector ISA
+ * extensions, full-system emulation, self-modifying code, and foreign-OS
+ * syscalls are not supported in v1 (see doc/EMU.md).
+ *
+ * The freestanding core takes guest ELF bytes; path-shaped helpers live in
+ * the driver and feed bytes via CfreeFileIO.read_all. Guest memory loads
+ * and stores route through libcfree's runtime (bounds-checked against the
+ * mapped guest address space); guest syscalls are forwarded to the host OS
+ * via per-OS tables. */
+
+typedef enum CfreeEmuArch {
+ CFREE_EMU_ARCH_AARCH64,
+ CFREE_EMU_ARCH_RISCV64,
+} CfreeEmuArch;
+
+/* Trace flag bitmask. PC traces the guest PC at every block entry; INSN
+ * traces every decoded guest instruction; BLOCK traces each translation
+ * event (cold-miss into the lifter). All traces are emitted via the env's
+ * CfreeDiagSink at CFREE_DIAG_NOTE. */
+typedef enum CfreeEmuTraceFlag {
+ CFREE_EMU_TRACE_PC = 1u << 0,
+ CFREE_EMU_TRACE_INSN = 1u << 1,
+ CFREE_EMU_TRACE_BLOCK = 1u << 2,
+} CfreeEmuTraceFlag;
+
+typedef uint32_t CfreeEmuTraceFlags;
+
+/* Per-invocation emu configuration. `guest_elf_bytes` must outlive the call
+ * (cfree_emu_run) or the returned CfreeEmu (cfree_emu_new). `argv` and
+ * `envp`, when non-NULL, are NULL-terminated arrays of NUL-terminated
+ * strings; the emu copies them into the guest stack at startup, so the
+ * caller need not keep them alive past the new/run call. argv[0] is
+ * conventionally the guest program path. envp may be NULL for an empty
+ * environment.
+ *
+ * `optimize` selects the per-block backend: 0 drives a CGTarget directly
+ * (fast translation, slow execution); 2 wraps with opt_cgtarget (slow
+ * translation, fast execution). Other levels are reserved.
+ *
+ * Guest fd map / sandboxing is not exposed in v1 — guest syscalls are
+ * forwarded into the host process's fd table verbatim. */
+typedef struct CfreeEmuOptions {
+ CfreeEmuArch guest_arch;
+ const uint8_t* guest_elf_bytes;
+ size_t guest_elf_len;
+ int optimize;
+ CfreeEmuTraceFlags trace;
+ const char* const* argv; /* NULL-terminated; may be NULL */
+ const char* const* envp; /* NULL-terminated; may be NULL */
+} CfreeEmuOptions;
+
+typedef struct CfreeEmu CfreeEmu;
+
+/* One-shot: load the guest ELF, run until exit/trap, fill *out_exit_code
+ * with the guest's exit status. Returns 0 on a clean guest exit (including
+ * a nonzero guest exit_code), nonzero on internal failure (decode/lift
+ * failure, OOM, unsupported guest arch). */
+int cfree_emu_run(CfreeCompiler*, const CfreeEmuOptions*,
+ int* out_exit_code);
+
+/* Lower-level surface for dbg integration. Lifecycle: emu_new constructs
+ * the runtime (reserves the code-cache VA region, maps guest segments,
+ * builds the initial CPUState); emu_step runs at most `nblocks` translated
+ * blocks before returning; emu_lookup translates the block at `guest_pc`
+ * if cold and returns its host entry (NULL on translation failure or an
+ * unmapped guest_pc). emu_free releases the runtime, the JIT image, and
+ * the guest address space. */
+CfreeEmu* cfree_emu_new (CfreeCompiler*, const CfreeEmuOptions*);
+int cfree_emu_step (CfreeEmu*, uint32_t nblocks);
+void* cfree_emu_lookup(CfreeEmu*, uint64_t guest_pc);
+void cfree_emu_free (CfreeEmu*);
+
+/* ============================================================
* Binary format detection
* ============================================================
* Sniff the format of a binary blob from its magic bytes.
diff --git a/src/api/stubs.c b/src/api/stubs.c
@@ -215,3 +215,19 @@ CfreeDwarfParamIter* cfree_dwarf_param_iter_new (CfreeDebugInfo* d, uint64_t pc)
int cfree_dwarf_param_iter_next(CfreeDwarfParamIter* it, CfreeDwarfVar* o)
{ (void)it; (void)o; return 0; }
void cfree_dwarf_param_iter_free(CfreeDwarfParamIter* it) { (void)it; }
+
+/* ============================================================
+ * Emulator (cfree emu)
+ * ============================================================ */
+struct CfreeEmu { int _; };
+
+int cfree_emu_run(CfreeCompiler* c, const CfreeEmuOptions* opts, int* out_exit_code)
+{
+ (void)c; (void)opts;
+ if (out_exit_code) *out_exit_code = 0;
+ return 1;
+}
+CfreeEmu* cfree_emu_new (CfreeCompiler* c, const CfreeEmuOptions* o) { (void)c; (void)o; return 0; }
+int cfree_emu_step (CfreeEmu* e, uint32_t n) { (void)e; (void)n; return 1; }
+void* cfree_emu_lookup(CfreeEmu* e, uint64_t pc) { (void)e; (void)pc; return 0; }
+void cfree_emu_free (CfreeEmu* e) { (void)e; }