commit f95c8fbb62afcf93138dc8dd542c0e89ef107005
parent cb323097a119ba2428b1eb1457cff882a572a43a
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 19 May 2026 12:44:49 -0700
Public API rewrite
Diffstat:
20 files changed, 1570 insertions(+), 2006 deletions(-)
diff --git a/include/cfree.h b/include/cfree.h
@@ -1,1763 +0,0 @@
-#ifndef CFREE_H
-#define CFREE_H
-
-/* libcfree's complete public API. The driver and any other consumer of
- * libcfree only includes this single header — internal headers live under
- * src/ and are not part of the stable surface.
- *
- * Every public identifier starts with cfree_, Cfree, or CFREE_. */
-
-#include <stdarg.h>
-#include <stddef.h>
-#include <stdint.h>
-
-/* ============================================================
- * Opaque handles
- * ============================================================ */
-typedef struct CfreeCompiler CfreeCompiler;
-typedef struct CfreePipeline CfreePipeline;
-typedef struct CfreeObjBuilder CfreeObjBuilder;
-typedef struct CfreeJit CfreeJit;
-typedef struct CfreeJitSession CfreeJitSession;
-typedef struct CfreeObjFile CfreeObjFile;
-typedef struct CfreeDebugInfo CfreeDebugInfo;
-typedef struct CfreeBytesInput CfreeBytesInput;
-typedef struct CfreeCompileOptions CfreeCompileOptions;
-typedef struct CfreePpOptions CfreePpOptions;
-typedef uint32_t CfreeSym;
-
-/* ============================================================
- * Source locations (carried in diagnostics)
- * ============================================================ */
-typedef struct CfreeSrcLoc {
- uint32_t file_id;
- uint32_t line;
- uint32_t col;
-} CfreeSrcLoc;
-
-/* ============================================================
- * Frame snapshot
- * ============================================================
- * Canonical register snapshot. Shared by the unwinder (cfree_dwarf_unwind_step)
- * and the JIT session's stop notifications (CfreeStopInfo.regs). `pc` and
- * `cfa` are the program counter and canonical frame address; `regs` uses the
- * target arch's DWARF register numbering. Registers beyond the arch's defined
- * range are zero. */
-typedef struct CfreeUnwindFrame {
- uint64_t pc;
- uint64_t cfa;
- uint64_t regs[32];
-} CfreeUnwindFrame;
-
-/* ============================================================
- * Host-implemented interfaces (vtables)
- * ============================================================
- * Heap, DiagSink, and Writer are implemented *outside* libcfree, by the
- * host. This keeps libcfree free of stdio, malloc, and POSIX I/O — the
- * host provides them. Subclass by placing the struct as the first field
- * of an enclosing type and casting; libcfree calls the function pointers
- * with the base pointer. */
-
-typedef struct CfreeHeap CfreeHeap;
-struct CfreeHeap {
- void *(*alloc)(CfreeHeap *, size_t size, size_t align);
- void *(*realloc)(CfreeHeap *, void *p, size_t old_size, size_t new_size,
- size_t align);
- void (*free)(CfreeHeap *, void *p, size_t size);
- void *user;
-};
-
-typedef enum CfreeDiagKind {
- CFREE_DIAG_NOTE,
- CFREE_DIAG_WARN,
- CFREE_DIAG_ERROR,
- CFREE_DIAG_FATAL,
-} CfreeDiagKind;
-
-typedef struct CfreeDiagSink CfreeDiagSink;
-struct CfreeDiagSink {
- void (*emit)(CfreeDiagSink *, CfreeDiagKind, CfreeSrcLoc, const char *fmt,
- va_list);
- void *user;
- /* libcfree maintains these counters; hosts may inspect, must not write. */
- uint32_t errors;
- uint32_t warnings;
-};
-
-typedef struct CfreeWriter CfreeWriter;
-struct CfreeWriter {
- void (*write)(CfreeWriter *, const void *data, size_t n);
- void (*seek)(CfreeWriter *, uint64_t offset);
- uint64_t (*tell)(CfreeWriter *);
- int (*error)(CfreeWriter *);
- /* close is responsible for any host-side teardown (closing fds, freeing
- * the enclosing struct). After close the pointer is invalid. */
- void (*close)(CfreeWriter *);
-};
-
-/* ============================================================
- * Target description
- * ============================================================ */
-typedef enum CfreeArchKind {
- CFREE_ARCH_X86_32,
- CFREE_ARCH_X86_64,
- CFREE_ARCH_ARM_32,
- CFREE_ARCH_ARM_64,
- CFREE_ARCH_RV32,
- CFREE_ARCH_RV64,
- CFREE_ARCH_WASM,
-} CfreeArchKind;
-
-typedef enum CfreeOSKind {
- CFREE_OS_FREESTANDING,
- CFREE_OS_LINUX,
- CFREE_OS_MACOS,
- CFREE_OS_WINDOWS,
- CFREE_OS_WASI,
-} CfreeOSKind;
-
-typedef enum CfreeObjFmt {
- CFREE_OBJ_ELF,
- CFREE_OBJ_COFF,
- CFREE_OBJ_MACHO,
- CFREE_OBJ_WASM,
-} CfreeObjFmt;
-
-typedef enum CfreePic {
- CFREE_PIC_NONE,
- CFREE_PIC_PIC,
- CFREE_PIC_PIE,
-} CfreePic;
-
-/* CFREE_CM_DEFAULT is resolved per-arch (small on x86-64/AArch64, medium on
- * RISC-V, etc.). PIC and code-model are independent: -fPIC -mcmodel=small and
- * -fPIE -mcmodel=medium are both coherent. */
-typedef enum CfreeCodeModel {
- CFREE_CM_DEFAULT,
- CFREE_CM_SMALL,
- CFREE_CM_MEDIUM,
- CFREE_CM_LARGE,
-} CfreeCodeModel;
-
-typedef struct CfreeTarget {
- CfreeArchKind arch;
- CfreeOSKind os;
- CfreeObjFmt obj;
- uint8_t ptr_size; /* 4 or 8 */
- uint8_t ptr_align;
- uint8_t big_endian;
- uint8_t pic; /* CfreePic; default CFREE_PIC_NONE */
- uint8_t code_model; /* CfreeCodeModel; default CFREE_CM_DEFAULT */
-} CfreeTarget;
-
-/* JIT note: cfree_link_jit and cfree_jit_from_image force pic = CFREE_PIC_PIC
- * regardless of caller input — the mmap'd image's address is unknown until
- * map time. The override happens at the linker entry, not silently inside the
- * backend. */
-
-/* ============================================================
- * Symbol classification
- * ============================================================
- * Shared by the object inspector and the JIT symbol iterator. Bind
- * captures linkage; Kind captures what the symbol points at. */
-typedef enum CfreeSymBind {
- CFREE_SB_LOCAL,
- CFREE_SB_GLOBAL,
- CFREE_SB_WEAK,
-} CfreeSymBind;
-
-typedef enum CfreeSymKind {
- CFREE_SK_UNDEF,
- CFREE_SK_FUNC,
- CFREE_SK_OBJ,
- CFREE_SK_SECTION,
- CFREE_SK_FILE,
- CFREE_SK_COMMON,
- CFREE_SK_TLS,
- CFREE_SK_ABS,
- /* Defined symbol with no specific type (e.g., assembly label or
- * AArch64 mapping symbol). Distinct from CFREE_SK_UNDEF, which is
- * the "undefined external" sentinel. */
- CFREE_SK_NOTYPE,
- /* GNU IFUNC: function with runtime resolver (STT_GNU_IFUNC). */
- CFREE_SK_IFUNC,
-} CfreeSymKind;
-
-/* ============================================================
- * Architecture: register name mapping
- * ============================================================
- * DWARF register numbering varies per arch (CfreeUnwindFrame.regs is indexed
- * by DWARF register number). These helpers translate between DWARF index and
- * canonical assembler name (e.g. "rax", "x0", "a0") so dbg can render
- * `info registers` and accept `set $rax = ...` syntax.
- *
- * Stateless and allocation-free — name strings are static library data.
- * `cfree_arch_register_name` returns NULL for an unmapped DWARF index;
- * `cfree_arch_register_index` returns 0 on a known name and 1 if the name
- * is unknown. To enumerate every register defined for an arch, loop
- * 0..cfree_arch_register_count(arch) calling cfree_arch_register_at; the
- * iteration indices are dense in `[0, count)` and are unrelated to the
- * DWARF indices, which are sparse (e.g. 32..63 are unused on aarch64). */
-typedef struct CfreeArchReg {
- uint32_t dwarf_idx;
- const char *name;
-} CfreeArchReg;
-
-const char *cfree_arch_register_name(CfreeArchKind, uint32_t dwarf_idx);
-int cfree_arch_register_index(CfreeArchKind, const char *name,
- uint32_t *idx_out);
-
-uint32_t cfree_arch_register_count(CfreeArchKind);
-int cfree_arch_register_at(CfreeArchKind, uint32_t idx, CfreeArchReg *out);
-
-/* ============================================================
- * Host environment
- * ============================================================
- * The host supplies a heap, optional file I/O, and a diag sink. The
- * freestanding core never takes paths; path-shaped helpers in the driver
- * feed bytes/Writers. */
-typedef struct CfreeFileData {
- const uint8_t *data;
- size_t size;
- void *token; /* opaque ownership handle for release */
-} CfreeFileData;
-
-typedef struct CfreeFileIO {
- int (*read_all)(void *user, const char *path, CfreeFileData *out);
- void (*release)(void *user, CfreeFileData *);
- CfreeWriter *(*open_writer)(void *user, const char *path);
- void *user;
-} CfreeFileIO;
-
-/* Executable-memory vtable. Required by the JIT mapper (cfree_jit_from_image)
- * and the emu runtime; consulted by the linker for page-aligned segment
- * layout. May be NULL for hosts that never JIT and never run the emu — link
- * layout falls back to a 16 KiB page in that case.
- *
- * The vtable enforces strict W^X: no virtual page is ever simultaneously
- * writable and executable. For regions that will eventually hold code
- * (CFREE_PROT_EXEC in the requested perms) the host returns a dual mapping —
- * two virtual addresses that alias the same physical memory, where the
- * `write` alias has WRITE but never EXEC, and the `runtime` alias has
- * EXEC after a corresponding protect() call but never WRITE. Callers
- * populate code via the `write` alias and execute / take addresses against
- * the `runtime` alias. For non-EXEC regions a single mapping suffices and
- * write == runtime.
- *
- * reserve — allocate `size` bytes (page_size-aligned) whose final
- * perms will be `prot`. On success returns 0 and fills
- * *out (write/runtime/size/token); returns non-zero on
- * failure. The returned `write` alias is always RW;
- * `runtime` starts read-only and is flipped to final
- * perms by protect().
- * protect — apply final perms via the runtime alias for [addr,
- * addr+size) (page_size-aligned, lying inside the
- * reservation's runtime alias). Returns 0 on success.
- * release — free a prior reservation, including both aliases.
- * flush_icache — make freshly written instructions visible to the CPU
- * at [addr, addr+size) on the runtime alias. May be a
- * no-op on x86; required on aarch64 before transferring
- * control to JITed code.
- *
- * `prot` is a bitmask of CFREE_PROT_*. */
-enum {
- CFREE_PROT_NONE = 0,
- CFREE_PROT_READ = 1 << 0,
- CFREE_PROT_WRITE = 1 << 1,
- CFREE_PROT_EXEC = 1 << 2,
-};
-
-typedef struct CfreeExecMemRegion {
- void *write; /* RW alias for population; never has EXEC */
- void *runtime; /* runtime/execution alias; never has WRITE.
- For non-EXEC reservations equals `write`. */
- size_t size; /* page-aligned bytes */
- void *token; /* opaque host handle for release() */
-} CfreeExecMemRegion;
-
-typedef struct CfreeExecMem {
- size_t page_size;
- int (*reserve)(void *user, size_t size, int prot, CfreeExecMemRegion *out);
- int (*protect)(void *user, void *addr, size_t size, int prot);
- void (*release)(void *user, CfreeExecMemRegion *region);
- void (*flush_icache)(void *user, void *addr, size_t size);
- void *user;
-} CfreeExecMem;
-
-/* Debugger OS vtable. Required by the JIT session (cfree_jit_session_new) so
- * libcfree never includes <pthread.h>, <signal.h>, or platform headers for
- * ucontext / W^X flips. May be NULL for hosts that never enter `dbg`.
- *
- * Threading model: a single worker thread is spawned per session; the REPL
- * thread and worker hand off through two events (stop, resume). Signal
- * handlers run on the worker thread, snapshot the host ucontext into a
- * CfreeUnwindFrame, and call back into the session through on_fault.
- *
- * thread_start / _join — spawn worker, join on session teardown.
- * thread_interrupt — async-signal-safe: deliver `interrupt_signo`
- * to the worker thread (used by session_interrupt).
- * event_* — one-shot binary events. The session creates two
- * per worker; signal/wait must be safe to call
- * from the worker's signal-handler context.
- * signals_install — install handlers for SIGTRAP/SEGV/BUS/ILL/FPE
- * plus `interrupt_signo`. Each handler:
- * 1. snapshots ucontext into a CfreeUnwindFrame;
- * 2. invokes ops->on_fault(session, signo, ®s);
- * 3. on return, writes mutated regs back into
- * ucontext before returning to the kernel.
- * If on_fault returns nonzero the OS layer
- * re-raises the signal to the host default
- * (a fault the session declined to handle).
- * signals_uninstall — restore prior dispositions on session teardown.
- * interrupt_signo — host signal number reserved for STOP_INTERRUPT
- * (e.g. SIGUSR2 on POSIX).
- * code_write_begin/_end — open a write window over [runtime_addr,
- * runtime_addr+n) inside an existing
- * CfreeExecMem reservation. *write_out is the
- * address through which the session writes the
- * BRK / restore bytes. On dual-mapping hosts
- * (Apple silicon) it is the write alias; on
- * Linux it equals runtime_addr and the OS layer
- * mprotect-flips RW<->RX around the window.
- * flush_icache — make freshly patched code visible to the CPU
- * at the runtime alias. Required on aarch64.
- * guarded_copy — read/write `n` bytes between in-process
- * addresses with a TLS sigsetjmp landing slot
- * so SIGSEGV/SIGBUS during the copy returns
- * nonzero instead of stopping the worker. The
- * SEGV/BUS handlers in signals_install check
- * this landing slot before delegating to
- * on_fault. */
-typedef struct CfreeDbgSignalOps {
- int (*on_fault)(void *session, int signo, CfreeUnwindFrame *regs);
-} CfreeDbgSignalOps;
-
-typedef struct CfreeDbgOs {
- int (*thread_start)(void *user, void (*fn)(void *), void *arg,
- void **thread_out);
- void (*thread_join)(void *user, void *thread);
- int (*thread_interrupt)(void *user, void *thread);
-
- void *(*event_new)(void *user);
- void (*event_free)(void *user, void *ev);
- void (*event_wait)(void *user, void *ev);
- void (*event_signal)(void *user, void *ev);
- void (*event_reset)(void *user, void *ev);
-
- int (*signals_install)(void *user, const CfreeDbgSignalOps *ops,
- void *session);
- void (*signals_uninstall)(void *user);
- int interrupt_signo;
-
- int (*code_write_begin)(void *user, void *runtime_addr, size_t n,
- void **write_out);
- void (*code_write_end)(void *user, void *runtime_addr, size_t n);
- void (*flush_icache)(void *user, void *runtime_addr, size_t n);
-
- int (*guarded_copy)(void *user, void *dst, const void *src, size_t n);
-
- void *user;
-} CfreeDbgOs;
-
-/* Host vtable for the JIT TLV thunk on Mach-O targets.
- *
- * `cfree run` on macOS-aarch64 needs to service Mach-O thread-local
- * descriptor calls — there's no dyld in the JIT image to allocate the
- * pthread key and rewrite descriptor[0] to a per-image thunk. libcfree
- * provides the asm thunk (caller-save-preserving) but cannot itself
- * include <pthread.h>, so the per-thread plumbing is plumbed in via this
- * vtable. NULL is fine on hosts that never JIT TLV code.
- *
- * ctx_new — called once per JIT image at link time. Receives the
- * TLS image: `image_size` bytes total, `image_filesz`
- * of which are initialized from `init_bytes`, aligned
- * to `align`. Returns an opaque ctx pointer that the
- * thunk reads from descriptor[+8].
- *
- * The returned ctx MUST satisfy a binary contract: the
- * first 8 bytes contain a function pointer of type
- * `void* (*)(void* ctx)` that returns the per-thread
- * TLS block (allocating + seeding on first call from
- * each thread). This is what the thunk calls; placing
- * it inside the ctx lets the thunk avoid loading
- * process-global state.
- *
- * ctx_destroy — called from cfree_jit_free. Implementations should
- * delete the pthread_key (POSIX runs per-thread
- * destructors then) and release the ctx storage. */
-typedef struct CfreeJitTls {
- void *(*ctx_new)(void *user, const void *init_bytes, size_t image_filesz,
- size_t image_size, size_t align);
- void (*ctx_destroy)(void *user, void *ctx);
- void *user;
-} CfreeJitTls;
-
-typedef struct CfreeMetrics {
- void (*scope_begin)(void *user, const char *name);
- void (*scope_end)(void *user, const char *name);
- void (*count)(void *user, const char *name, uint64_t value);
- void *user;
-} CfreeMetrics;
-
-typedef struct CfreeEnv {
- CfreeHeap *heap;
- const CfreeFileIO *file_io; /* may be NULL for purely in-memory pipelines */
- CfreeDiagSink *diag;
- const CfreeExecMem *execmem; /* NULL ok unless JIT/emu paths run */
- const CfreeDbgOs *dbg_os; /* NULL ok unless `cfree dbg` paths run */
- const CfreeJitTls *jit_tls; /* NULL ok unless JIT TLV paths run */
- const CfreeMetrics *metrics; /* optional scoped metrics sink */
- /* Unix seconds since 1970-01-01 UTC, or negative for "no clock". Used
- * by the preprocessor for __DATE__ / __TIME__ (negative → C11 §6.10.8.1
- * placeholders). The host decides the policy (SOURCE_DATE_EPOCH,
- * wall clock, fixed value); libcfree never reads either. */
- int64_t now;
-} CfreeEnv;
-
-/* ============================================================
- * Compiler lifecycle
- * ============================================================ */
-CfreeCompiler *cfree_compiler_new(CfreeTarget, const CfreeEnv *);
-void cfree_compiler_free(CfreeCompiler *);
-CfreeTarget cfree_compiler_target(CfreeCompiler *);
-
-/* Resolve a CfreeSrcLoc.file_id to the spelling used when the source was
- * registered (typically the path passed to FileIO.read_all, or a memory-
- * input label). Returns NULL when `c` is NULL or `file_id` doesn't name a
- * registered file. The returned pointer is owned by the compiler and is
- * valid until cfree_compiler_free. Diagnostic sinks use this to print
- * `path:line:col` instead of the bare numeric `file_id`. */
-const char *cfree_compiler_file_name(CfreeCompiler *, uint32_t file_id);
-
-/* Intern a string into the compiler's global symbol pool. The returned symbol
- * is stable until cfree_compiler_free and may be passed through public APIs
- * that traffic in pre-interned names. 0 is reserved for "no symbol"; this
- * entry never returns 0 for a non-NULL string. */
-CfreeSym cfree_sym_intern(CfreeCompiler *, const char *str);
-
-/* Returns the diagnostic sink registered at compiler construction. */
-CfreeDiagSink *cfree_compiler_diag_sink(CfreeCompiler *);
-
-/* ============================================================
- * Writer dispatch (inline)
- * ============================================================
- * Callers obtain CfreeWriter*s from CfreeFileIO.open_writer or from
- * cfree_writer_mem. The dispatch helpers below are pure inline thunks
- * over the vtable — libcfree itself uses the vtable directly. */
-static inline void cfree_writer_write(CfreeWriter *w, const void *d, size_t n) {
- w->write(w, d, n);
-}
-static inline void cfree_writer_seek(CfreeWriter *w, uint64_t off) {
- w->seek(w, off);
-}
-static inline uint64_t cfree_writer_tell(CfreeWriter *w) { return w->tell(w); }
-static inline int cfree_writer_error(CfreeWriter *w) { return w->error(w); }
-static inline void cfree_writer_close(CfreeWriter *w) { w->close(w); }
-
-/* In-memory writer backed by the supplied heap. Useful as a building
- * block; the buffer is owned by the Writer and cfree_writer_mem_bytes
- * returns its current contents (valid until the next write or close). */
-CfreeWriter *cfree_writer_mem(CfreeHeap *);
-const uint8_t *cfree_writer_mem_bytes(CfreeWriter *, size_t *len_out);
-
-/* ============================================================
- * JIT
- * ============================================================
- * cfree_link_jit produces a CfreeJit owning its mapped pages and resolved
- * image. Symbol lookup is by name (object-local handles never escape
- * libcfree); dlsym-shaped — the caller casts to whatever function
- * signature the JITed symbol actually has (e.g. int(*)(int, char**) for
- * `main`). Returns NULL on miss. */
-void cfree_jit_free(CfreeJit *);
-void *cfree_jit_lookup(CfreeJit *, const char *name);
-/* Experimental append-only JIT growth. Appends one finalized object into
- * reserved JIT slack without moving existing code/data. Returns nonzero on
- * duplicate strong definitions, unresolved references, capacity exhaustion,
- * or relocation/protection failure. */
-int cfree_jit_append_obj(CfreeJit *, CfreeObjBuilder *);
-uint64_t cfree_jit_generation(CfreeJit *);
-/* Run all fini_array destructors in reverse order. Call after the last
- * use of JITed code, before cfree_jit_free. */
-void cfree_jit_run_dtors(CfreeJit *);
-
-/* ----- JIT image inspection -----
- *
- * cfree_jit_view borrows a CfreeObjFile over the loaded JIT image. Lets the
- * driver feed the JIT to objdump/dwarf consumers without round-tripping the
- * image to bytes. The returned pointer is owned by the CfreeJit and is
- * invalidated by cfree_jit_free; callers must not call cfree_obj_close on it.
- *
- * cfree_jit_addr_to_sym is the reverse of cfree_jit_lookup: maps a runtime
- * PC to the enclosing global symbol. Returns 0 on success and 1 when no
- * symbol contains `addr`. The interned name string is valid until
- * cfree_jit_free. */
-const CfreeObjFile *cfree_jit_view(CfreeJit *);
-int cfree_jit_addr_to_sym(CfreeJit *, uint64_t addr, const char **name_out,
- uint64_t *off_out);
-
-/* PC-space translation between the JIT's runtime address space (where
- * executable code actually lives) and the image-relative vaddr space
- * (the coordinate system the linked image — and any DWARF emitted at
- * compile time — was authored in).
- *
- * The DWARF consumer (cfree_dwarf_addr_to_line, cfree_dwarf_line_to_addr,
- * cfree_dwarf_unwind_step, etc.) operates entirely in image-relative
- * vaddrs; the debugger, host signal handlers, and breakpoint installer
- * work in runtime addresses. Callers translate at every boundary.
- *
- * Both functions return 0 if the input is not contained in any mapped
- * segment. Identity maps for the JIT's iplt / abs-symbol cases are out
- * of scope here — those addresses don't participate in source-level
- * stepping.
- *
- * Stable for the JIT's lifetime; constant-time over jit segment count. */
-uint64_t cfree_jit_runtime_to_image(CfreeJit *, uint64_t runtime_pc);
-uint64_t cfree_jit_image_to_runtime(CfreeJit *, uint64_t image_vaddr);
-
-/* Enumerate every globally visible symbol in the resolved JIT image.
- * Drives `info functions` / `info variables` and tab completion in dbg.
- * `name` is interned and valid until cfree_jit_free; CfreeSymKind is the
- * same enum as the object inspector uses (CFREE_SK_FUNC / CFREE_SK_OBJ /
- * etc.). */
-typedef struct CfreeJitSymIter CfreeJitSymIter;
-typedef struct CfreeJitSym {
- const char *name;
- uint64_t addr;
- uint64_t size;
- CfreeSymKind kind;
-} CfreeJitSym;
-
-CfreeJitSymIter *cfree_jit_sym_iter_new(CfreeJit *);
-int cfree_jit_sym_iter_next(CfreeJitSymIter *, CfreeJitSym *out);
-void cfree_jit_sym_iter_free(CfreeJitSymIter *);
-
-/* ----- JIT session: controlled execution -----
- *
- * A session wraps the JIT in a worker thread and a per-arch trap engine
- * (software breakpoint patch + single-step / displaced-step trampoline).
- * The library owns all signal handling, ucontext extraction, and per-arch
- * trap-byte / single-step machinery. The driver side uses the session to
- * call into the JIT'd code and is notified of stops (breakpoint, fault,
- * exit) via the blocking session_call/session_resume entries.
- *
- * Threading model: a single worker thread runs the JIT entry. session_call
- * and session_resume block the caller until the worker stops; the worker
- * is parked on stop and resumed from the next session_resume. Only one
- * thread may drive the session at a time.
- *
- * Lifetime: the CfreeJit must outlive the CfreeJitSession. cfree_jit_free
- * tears down a session implicitly if one is still attached, but explicit
- * cfree_jit_session_free is preferred so the worker thread is joined
- * deterministically.
- *
- * Breakpoints: set/clear via session_breakpoint_set/_clear. The trap byte
- * patch and arch-specific single-step trampoline (used to step over the
- * patched instruction during resume) are entirely internal. The session
- * dedupes addresses; setting a breakpoint at an existing address returns
- * the original handle. */
-
-typedef enum CfreeStopKind {
- CFREE_STOP_BREAKPOINT, /* worker hit a breakpoint we set */
- CFREE_STOP_SIGNAL, /* worker took a fault we did not arm */
- CFREE_STOP_EXIT, /* worker entry returned normally */
- CFREE_STOP_INTERRUPT, /* host requested via session_interrupt */
-} CfreeStopKind;
-
-typedef struct CfreeStopInfo {
- CfreeStopKind kind;
- int signal; /* host signo when kind == STOP_SIGNAL */
- int exit_code; /* worker return value when kind == EXIT */
- uint32_t bp_id; /* set bp handle when kind == BREAKPOINT */
- CfreeUnwindFrame regs; /* register snapshot at the stop site */
-} CfreeStopInfo;
-
-typedef enum CfreeResumeMode {
- CFREE_RESUME_CONTINUE, /* run until next stop or exit */
- CFREE_RESUME_STEP_INSN, /* execute one machine instruction */
- CFREE_RESUME_STEP_LINE, /* until source line changes, staying
- * in current function */
- CFREE_RESUME_NEXT_LINE, /* like STEP_LINE but step OVER any
- * function calls */
- CFREE_RESUME_STEP_OUT, /* run until current frame returns */
-} CfreeResumeMode;
-
-/* Entry-point signature dispatched by session_call. The library is
- * responsible for marshalling argv/argc into the worker's ABI; the driver
- * is shape-agnostic. New entry shapes extend the enum. */
-typedef enum CfreeEntryKind {
- CFREE_ENTRY_INT_ARGV, /* int(int, char**) */
- CFREE_ENTRY_U64, /* uint64_t(uint64_t, ... up to 8 args) */
-} CfreeEntryKind;
-
-CfreeJitSession *cfree_jit_session_new(CfreeJit *);
-void cfree_jit_session_free(CfreeJitSession *);
-
-/* Bind a DWARF consumer to the session. Required for the source-level
- * resume modes (STEP_LINE, NEXT_LINE, STEP_OUT). The CfreeDebugInfo must
- * outlive every session_resume that uses those modes; the session does
- * not take ownership and will not free it. Passing NULL detaches.
- * Returns 0 on success. */
-int cfree_jit_session_attach_dwarf(CfreeJitSession *, CfreeDebugInfo *);
-
-/* Begin executing `entry` with `argv`. Blocks until the worker stops.
- * `entry` must be a pointer returned by cfree_jit_lookup (or otherwise
- * within the JIT image). Returns 0 on success (including an EXIT stop),
- * nonzero on internal failure (worker spawn, OOM). On success *stop is
- * populated. */
-int cfree_jit_session_call(CfreeJitSession *, void *entry, CfreeEntryKind,
- int argc, char **argv, CfreeStopInfo *stop_out);
-int cfree_jit_session_call_u64(CfreeJitSession *, void *entry,
- const uint64_t *args, uint32_t nargs,
- uint64_t *ret_out, CfreeStopInfo *stop_out);
-
-/* Resume the parked worker. Blocks until the next stop. Returns 0 on
- * success, nonzero if no worker is parked. */
-int cfree_jit_session_resume(CfreeJitSession *, CfreeResumeMode,
- CfreeStopInfo *stop_out);
-
-/* Asynchronously interrupt a running worker. Async-signal-safe: callable
- * from a SIGINT handler in the host. The next stop event delivered to the
- * driving thread will be CFREE_STOP_INTERRUPT. Returns 0 on a queued
- * interrupt, nonzero if no worker is currently running. */
-int cfree_jit_session_interrupt(CfreeJitSession *);
-
-/* Read `n` bytes from the worker's address space starting at `addr` into
- * `dst`. Used by `p` and `x` in the dbg driver to dereference globals,
- * frame-relative locals, and arbitrary user memory. Returns 0 on success
- * and nonzero on a bad address or partial read; partial-read attempts do
- * not modify `dst`. Safe to call only while the worker is parked at a
- * stop. */
-int cfree_jit_session_read_mem(CfreeJitSession *, uint64_t addr, void *dst,
- size_t n);
-
-/* Write `n` bytes from `src` into the worker's address space at `addr`.
- * Same constraints as the read variant: caller must be at a stop; partial
- * writes leave the target untouched and return nonzero. */
-int cfree_jit_session_write_mem(CfreeJitSession *, uint64_t addr,
- const void *src, size_t n);
-
-/* Read full register snapshot. Snapshot already lives in CfreeStopInfo;
- * this is for callers that want a refresh outside the stop event (e.g.
- * after a write). Returns 0 on success, nonzero if no worker is parked. */
-int cfree_jit_session_get_regs(CfreeJitSession *, CfreeUnwindFrame *out);
-
-/* Write back a register snapshot. The frame's `regs` are written into the
- * worker; `pc` and `cfa` are honored only when changed. The library
- * validates that `pc` lies inside the JIT image. Returns 0 on success,
- * nonzero on a bad pc or if no worker is parked. */
-int cfree_jit_session_set_regs(CfreeJitSession *, const CfreeUnwindFrame *);
-
-/* Set / clear a breakpoint at `addr` (which must lie within the JIT image).
- * On success, *bp_id_out is the session-local handle that future stop
- * events will report. Idempotent: setting at an address that already has
- * a breakpoint returns its existing handle. cfree_jit_session_breakpoint_clear
- * silently succeeds on an unknown handle. */
-int cfree_jit_session_breakpoint_set(CfreeJitSession *, uint64_t addr,
- uint32_t *bp_id_out);
-int cfree_jit_session_breakpoint_clear(CfreeJitSession *, uint32_t bp_id);
-
-/* Extended breakpoint setter with skip count, hit cap, and an optional
- * in-process predicate. The plain breakpoint_set above is a convenience
- * over this form (skip_count = max_hits = 0, condition = NULL).
- *
- * `condition`, when non-NULL, is invoked by the library on the worker
- * thread between the trap and the stop notification, after `skip_count`
- * silent skips have elapsed. It must return nonzero to deliver a stop
- * and zero to silently resume. The callback runs in a context where
- * calling back into the session is not safe — restrict it to register
- * inspection and pure computation.
- *
- * `max_hits`, when nonzero, auto-clears the breakpoint after that many
- * stops have been delivered (post-skip, post-condition). 0 means
- * unlimited. */
-typedef struct CfreeBreakpointSpec {
- uint64_t addr;
- uint64_t skip_count; /* silent skips before the first stop */
- uint64_t max_hits; /* 0 = unlimited */
- int (*condition)(void *user, const CfreeUnwindFrame *regs);
- void *condition_user;
-} CfreeBreakpointSpec;
-
-int cfree_jit_session_breakpoint_set_spec(CfreeJitSession *,
- const CfreeBreakpointSpec *,
- uint32_t *bp_id_out);
-
-/* Resolver invoked when the linker encounters an undefined symbol. Returning
- * NULL is an error. */
-typedef void *(*CfreeExternResolver)(void *user, const char *name);
-
-/* ============================================================
- * Pipeline
- * ============================================================
- * Layered driver-facing API. Four core operations:
- *
- * cfree_compile_obj one C TU -> in-memory CfreeObjBuilder (chains into
- * link) cfree_compile_obj_emit one C TU -> CfreeWriter (cc -c) cfree_link_exe
- * link inputs -> CfreeWriter (ld) cfree_link_jit link inputs -> owning
- * CfreeJit handle
- *
- * The CfreePipeline section near the end of this header bundles a
- * CfreeCompiler with these entries for tools that want a single owning
- * handle for a compile-then-link build.
- *
- * The freestanding core takes only byte buffers and Writers — never paths.
- * Path-shaped helpers live in driver-level adapters and feed the byte/
- * Writer APIs after consulting CfreeEnv.file_io.
- *
- * Errors are reported through libcfree's internal panic mechanism. Each
- * top-level function in this header saves and restores the active panic
- * handler around its own boundary, so these functions are safely nestable: a
- * caller that has already installed one keeps it across these calls. On
- * failure the function unwinds its own cleanups, restores the caller's
- * handler, and returns nonzero. */
-
-typedef struct CfreeDefine {
- const char *name;
- const char *body; /* NULL means "1" */
-} CfreeDefine;
-
-/* Source language tag carried on CfreeBytesInput when the input is fed to
- * cfree_compile_obj*. Ignored by entries that take bytes
- * for non-source purposes (linker, archive writer/reader, object reader).
- *
- * CFREE_LANG_C is value 0 so a zero-initialized CfreeBytesInput defaults to
- * C, matching the prior contract.
- *
- * `.S` (preprocessed asm) is NOT auto-handled at this layer: the driver runs
- * its C preprocessor first and then submits the result as CFREE_LANG_ASM. */
-typedef enum CfreeLanguage {
- CFREE_LANG_C = 0,
- CFREE_LANG_ASM = 1,
- CFREE_LANG_TOY = 2,
- CFREE_LANG_WASM = 3,
- CFREE_LANG_COUNT = 4,
-} CfreeLanguage;
-
-typedef int (*CfreeCompileFn)(CfreeCompiler *, const CfreeCompileOptions *,
- const CfreeBytesInput *, CfreeObjBuilder *out);
-
-/* Register out-of-core language frontend hooks for this compiler instance.
- * Passing NULL clears the slot. Returns nonzero on bad args. */
-int cfree_register_frontend(CfreeCompiler *, CfreeLanguage, CfreeCompileFn);
-
-/* Generic byte-buffer input. Used for source TUs (C/asm), encoded objects,
- * and archives. `name` is a diagnostic label (typically a path or pseudo-
- * path); the linker interns it on entry. `data` may be any byte-shaped
- * content. `lang` is consulted only by source-consuming entries; other
- * entries ignore it. */
-struct CfreeBytesInput {
- const char *name;
- const uint8_t *data;
- size_t len;
- CfreeLanguage lang;
-};
-
-/* Suffix-based language inference helper. `.s` -> CFREE_LANG_ASM, `.toy` ->
- * CFREE_LANG_TOY, `.wat`/`.wasm` -> CFREE_LANG_WASM, `.c`, `.cc`, `.cpp` and
- * any other suffix (including a path with no suffix) -> CFREE_LANG_C. `.S`
- * (preprocessed asm) is not recognized — drivers must preprocess first and
- * submit the result as CFREE_LANG_ASM. */
-CfreeLanguage cfree_language_for_path(const char *path);
-
-/* Preprocessor configuration shared by compile_* and the convenience run. */
-struct CfreePpOptions {
- const char *const *include_dirs;
- uint32_t ninclude_dirs;
- const char *const *system_include_dirs;
- uint32_t nsystem_include_dirs;
- const CfreeDefine *defines;
- uint32_t ndefines;
- const char *const *undefines;
- uint32_t nundefines;
-};
-
-/* Path prefix remap entry. Applied by SourceManager whenever it produces a
- * path for DWARF emission (DW_AT_comp_dir, DW_AT_name, line program). The
- * first match wins. Diagnostic output uses original paths. */
-typedef struct CfreePathPrefixMap {
- const char *old_prefix;
- const char *new_prefix;
-} CfreePathPrefixMap;
-
-/* Per-TU compile knobs. */
-struct CfreeCompileOptions {
- int opt_level; /* 0 direct, 1 minimal, 2 full */
- int debug_info;
- CfreePpOptions pp;
- /* Reproducible-build knobs. `epoch` (Unix seconds) is consulted by every
- * file emitter that would otherwise have written wall-clock time (COFF
- * header, Mach-O LC_BUILD_VERSION, ar ar_date, DWARF producer). 0 means
- * write no timestamp at all (the default). */
- uint64_t epoch;
- const CfreePathPrefixMap *path_map;
- uint32_t npath_map;
- /* Diagnostic policy.
- *
- * `warnings_are_errors` (-Werror): warnings emitted to CfreeDiagSink are
- * counted as errors for the compile_* return-value test and against
- * max_errors. The sink's `warnings` counter is unaffected.
- *
- * `max_errors`: 0 means unlimited. When >0, the parser stops emitting
- * after sink.errors reaches the cap (the Nth error is still emitted; the
- * (N+1)th is not), and compile_* returns nonzero. */
- int warnings_are_errors;
- uint32_t max_errors;
-};
-
-/* Compile one source TU (C or GAS-subset asm; selected by input->lang).
- *
- * cfree_compile_obj returns a CfreeObjBuilder owned by the CfreeCompiler. The
- * builder is already finalized; do not write to it further. Pass it to
- * cfree_link_exe / cfree_link_jit. It must be alive until the linker has
- * consumed it. The CfreeCompiler must outlive the returned builder.
- *
- * cfree_compile_obj_emit writes the encoded object to `out` and frees its
- * temporary builder before returning. The Writer is not closed. On nonzero
- * return the Writer may contain partial output and should not be consumed.
- *
- * Diagnostic model: report-all. Every error reachable by the parser's
- * recovery rules is emitted to env.diag before return; the parser does not
- * abort on routine syntax/semantic errors. These functions return 0 iff
- * env.diag->errors == 0 at the end of the call (with warnings counting as
- * errors when CfreeCompileOptions.warnings_are_errors is set). Returns
- * nonzero on internal failures (OOM, invariant violation), where the
- * underlying compiler_panic mechanism unwinds before return.
- *
- * When input->lang == CFREE_LANG_ASM the input bytes are fed straight to the
- * GAS-subset assembler; CfreeCompileOptions fields that are C-only
- * (CfreeCompileOptions.pp, opt_level) are ignored. Inline asm inside C TUs
- * is handled by the C parser internally — no separate entry. */
-int cfree_compile_obj(CfreeCompiler *, const CfreeCompileOptions *,
- const CfreeBytesInput *input, CfreeObjBuilder **out);
-int cfree_compile_obj_emit(CfreeCompiler *, const CfreeCompileOptions *,
- const CfreeBytesInput *input, CfreeWriter *out);
-
-/* ----- Header-dependency iteration -----
- *
- * Walks the include edges recorded by SourceManager during a preceding
- * cfree_compile_obj* call. The library hands out raw
- * edges; formatting (Make rules, ninja, JSON) is the driver's job.
- *
- * cfree_dep_iter_next returns 1 and fills `*out` for each remaining edge,
- * 0 when iteration is exhausted. The strings in CfreeDepEdge alias storage
- * owned by the CfreeCompiler and are valid until the next compile call or
- * compiler_free, whichever comes first.
- *
- * `includer_name` and `included_name` are the *resolved* paths SourceManager
- * actually opened — the same byte sequences passed to CfreeFileIO.read_all.
- * They are not the literal include token text; a `#include "x.h"` resolved
- * via -I to /abs/inc/x.h reports `/abs/inc/x.h`. This is what build systems
- * need: a Make rule emitted from these strings refers to files the build
- * tool will stat on rebuild.
- *
- * `from_system_path` distinguishes headers found through a system include
- * path (-isystem, sysroot, builtin) from user headers found via -I or the
- * source's own directory. This is the GCC `-MM` filter: drop edges whose
- * `from_system_path` is set. It is set by the include-path resolver, NOT by
- * the include syntax — `#include <myheader.h>` resolved through -I is a
- * user header (from_system_path=0); `#include "stdio.h"` resolved through a
- * system path is a system header (from_system_path=1).
- *
- * `bracketed` records the include *syntax* (1 for `<…>`, 0 for `"…"`). Tools
- * that round-trip include directives (formatters, IDE indexers) want the
- * lexical fact; -MM filtering does not.
- *
- * Edges are reported across all TUs processed since compiler_new; callers
- * that want a single TU's edges filter by `includer_name`. */
-typedef struct CfreeDepIter CfreeDepIter;
-
-typedef struct CfreeDepEdge {
- const char *includer_name; /* resolved path; same string given to read_all */
- const char *included_name; /* resolved path; same string given to read_all */
- CfreeSrcLoc include_loc;
- uint8_t from_system_path; /* 1 if resolved via a system include path */
- uint8_t bracketed; /* 1 if syntax was <…>; 0 for "…" */
- uint8_t pad[2];
-} CfreeDepEdge;
-
-CfreeDepIter *cfree_dep_iter_new(CfreeCompiler *);
-int cfree_dep_iter_next(CfreeDepIter *, CfreeDepEdge *out);
-void cfree_dep_iter_free(CfreeDepIter *);
-
-/* Build-ID emission mode (ELF .note.gnu.build-id and friends). */
-typedef enum CfreeBuildIdMode {
- CFREE_BUILDID_NONE, /* no build-id note (default) */
- CFREE_BUILDID_SHA256, /* hash all input section bytes in
- * stable order — reproducible */
- CFREE_BUILDID_UUID, /* random; opt-in, not reproducible */
- CFREE_BUILDID_USER, /* caller-supplied bytes */
-} CfreeBuildIdMode;
-
-/* ============================================================
- * Linker script (structured)
- * ============================================================
- * The linker accepts only the structured form. Programmatic build systems
- * construct a CfreeLinkScript directly; hosts that prefer GNU-ld text feed
- * the optional cfree_link_script_parse helper, which yields the same
- * structured form. The data model makes the supported semantics
- * inspectable rather than implicit in a parser.
- *
- * All pointers in a CfreeLinkScript are borrowed: the script and every
- * sub-object (expressions, regions, sections, assignments, name strings)
- * must outlive the call to cfree_link_exe / cfree_link_jit that consumes
- * it. cfree_link_script_parse arena-owns its result; cfree_link_script_free
- * releases everything reachable from a parser-produced script. */
-
-typedef struct CfreeLinkExpr CfreeLinkExpr;
-
-typedef enum CfreeLinkExprKind {
- CFREE_LE_INT, /* int_val */
- CFREE_LE_DOT, /* current location counter */
- CFREE_LE_SYM, /* name */
- CFREE_LE_REGION_ORIGIN, /* name = MEMORY region */
- CFREE_LE_REGION_LENGTH, /* name = MEMORY region */
- CFREE_LE_ADD,
- CFREE_LE_SUB,
- CFREE_LE_MUL,
- CFREE_LE_DIV,
- CFREE_LE_AND,
- CFREE_LE_OR,
- CFREE_LE_XOR,
- CFREE_LE_SHL,
- CFREE_LE_SHR,
- CFREE_LE_ALIGN, /* ALIGN(val, align) */
- CFREE_LE_MAX,
- CFREE_LE_MIN,
-} CfreeLinkExprKind;
-
-struct CfreeLinkExpr {
- uint8_t kind; /* CfreeLinkExprKind */
- union {
- int64_t int_val;
- const char *name;
- struct {
- const CfreeLinkExpr *lhs, *rhs;
- } bin;
- struct {
- const CfreeLinkExpr *val, *align;
- } align;
- } v;
-};
-
-typedef enum CfreeLinkRegionFlag {
- CFREE_LRF_R = 1u << 0,
- CFREE_LRF_W = 1u << 1,
- CFREE_LRF_X = 1u << 2,
-} CfreeLinkRegionFlag;
-
-typedef struct CfreeLinkRegion {
- const char *name;
- uint8_t flags; /* CfreeLinkRegionFlag mask */
- uint64_t origin;
- uint64_t length;
-} CfreeLinkRegion;
-
-typedef struct CfreeLinkInputMatch {
- const char *file_pattern; /* NULL == "*" */
- const char *section_pattern;
- int keep; /* nonzero: exempt from --gc-sections */
-} CfreeLinkInputMatch;
-
-typedef enum CfreeLinkAsnKind {
- CFREE_LAS_DOT, /* . = expr; sym ignored */
- CFREE_LAS_SYM, /* sym = expr */
- CFREE_LAS_PROVIDE, /* PROVIDE(sym = expr) */
-} CfreeLinkAsnKind;
-
-typedef struct CfreeLinkAssignment {
- uint8_t kind; /* CfreeLinkAsnKind */
- const char *sym; /* unused for CFREE_LAS_DOT */
- const CfreeLinkExpr *expr;
-} CfreeLinkAssignment;
-
-typedef struct CfreeLinkOutputSection {
- const char *name;
- const CfreeLinkExpr *vma; /* NULL: from region/dot */
- const CfreeLinkExpr *lma; /* NULL: equal to vma */
- const CfreeLinkInputMatch *inputs;
- uint32_t ninputs;
- const char *region; /* > REGION; NULL if absent */
- const char *load_region; /* AT> REGION; NULL if absent */
- const CfreeLinkAssignment *asns;
- uint32_t nasns;
-} CfreeLinkOutputSection;
-
-typedef struct CfreeLinkScript {
- const char *entry; /* NULL: use CfreeLinkOptions.entry */
- const CfreeLinkRegion *regions;
- uint32_t nregions;
- const CfreeLinkOutputSection *sections; /* in declaration order */
- uint32_t nsections;
- const CfreeLinkAssignment *top_asns; /* outside any SECTIONS{} */
- uint32_t ntop_asns;
-} CfreeLinkScript;
-
-/* Parse GNU-ld-subset text into a structured script. The compiler arena
- * owns the result; cfree_link_script_free releases it. The supported v1
- * subset is:
- * ENTRY(symbol)
- * MEMORY { name (rwx) : ORIGIN = expr, LENGTH = expr }
- * SECTIONS { ... } with output sections in declaration order
- * Input rules `*(.section.glob)` or `file.o(.section)`
- * KEEP(...) for --gc-sections opt-out
- * PROVIDE(sym = expr), plain `sym = expr`, `. = expr`
- * `> REGION` and `AT> REGION` placement
- * Operators + - * / & | ^ << >>, ALIGN(expr, align), MAX(a,b), MIN(a,b)
- * slash-star block comments
- * Anything outside the subset (OVERLAY, VERSION, INSERT BEFORE/AFTER,
- * OUTPUT_FORMAT, INPUT, GROUP, elaborate file patterns, other operators)
- * is rejected with a diagnostic and the call returns nonzero with *out
- * unchanged. Returns 0 on success. */
-int cfree_link_script_parse(CfreeCompiler *, const char *text, size_t len,
- const CfreeLinkScript **out);
-void cfree_link_script_free(CfreeCompiler *, const CfreeLinkScript *);
-
-/* Per-archive resolution mode (mirrors GNU ld's -Bstatic / -Bdynamic /
- * --as-needed positional state). Object-file inputs keep the plain
- * CfreeBytesInput shape — only archives carry these knobs. */
-typedef enum CfreeLinkMode {
- CFREE_LM_DEFAULT, /* output-kind default */
- CFREE_LM_STATIC, /* -Bstatic before this input */
- CFREE_LM_DYNAMIC, /* -Bdynamic */
- CFREE_LM_AS_NEEDED, /* --as-needed */
-} CfreeLinkMode;
-
-/* Archive input with linker-side state.
- *
- * link_mode: CfreeLinkMode (-Bstatic/-Bdynamic/--as-needed positional).
- * whole_archive: nonzero == --whole-archive: pull every member in
- * regardless of whether its symbols satisfy an undef.
- * group_id: clusters archives into a cyclic resolution group;
- * archives sharing a nonzero id are scanned cyclically
- * until no new symbols pull in (--start-group ...
- * --end-group). 0 (default) == linear single-pass.
- *
- * link_mode and whole_archive are orthogonal: --whole-archive applies
- * regardless of --as-needed / -Bstatic / -Bdynamic state. */
-typedef struct CfreeBytesInputArchive {
- CfreeBytesInput input;
- uint8_t link_mode; /* CfreeLinkMode; default CFREE_LM_DEFAULT */
- uint8_t whole_archive; /* nonzero == --whole-archive */
- uint8_t group_id;
- uint8_t pad;
-} CfreeBytesInputArchive;
-
-/* Common link-input set, embedded in both CfreeLinkOptions and
- * CfreeLinkSharedOptions. Adding a new input shape lands here in one
- * place rather than in every options struct. */
-typedef struct CfreeLinkInputs {
- CfreeObjBuilder *const *objs; /* fresh-compiled, by reference */
- uint32_t nobjs;
- const CfreeBytesInput *obj_bytes;
- uint32_t nobj_bytes;
- const CfreeBytesInputArchive *archives;
- uint32_t narchives;
- /* Shared-object inputs (ELF ET_DYN). Each entry's bytes are parsed
- * via the linker's read_elf_dso path; the DSO contributes no
- * sections to the output image, but its dynsym is searched during
- * undef resolution so references against this DSO bind dynamically.
- * The DSO's DT_SONAME (or its filename if missing) is recorded in
- * the produced image's DT_NEEDED list. */
- const CfreeBytesInput *dso_bytes;
- uint32_t ndso_bytes;
- /* Structured linker script. NULL means no script (target/format default
- * layout). Borrowed: must outlive the cfree_link_* call. */
- const CfreeLinkScript *linker_script;
- const char *entry; /* NULL = format/target default */
- CfreeExternResolver extern_resolver;
- void *extern_resolver_user;
- /* Build-ID. `build_id_mode` is a CfreeBuildIdMode. `build_id_bytes` /
- * `build_id_len` are consulted only when mode == CFREE_BUILDID_USER. */
- uint8_t build_id_mode;
- const uint8_t *build_id_bytes;
- uint32_t build_id_len;
-} CfreeLinkInputs;
-
-/* Options for executable / JIT link. Exe-only fields go on this struct
- * (currently none beyond the shared input set).
- *
- * gc_sections: nonzero enables --gc-sections (drop unreferenced sections
- * from the output, transitively from entry / KEEP roots /
- * exported symbols). Default 0. */
-typedef struct CfreeLinkOptions {
- CfreeLinkInputs inputs;
- int gc_sections;
- /* PIE / dynamic-exe shape. When `pie` is set or any DSO input is
- * present the output is ET_DYN; the runtime loader at
- * `interp_path` (default `/lib/ld-musl-aarch64.so.1` for
- * aarch64-linux when not specified) binds DT_NEEDED dependencies
- * before transferring to the entry symbol. NULL `interp_path` with
- * `pie==0` and no DSO inputs preserves the static ET_EXEC path. */
- int pie;
- const char *interp_path;
-} CfreeLinkOptions;
-
-/* Options for shared-library link.
- *
- * soname: recorded in the produced object (DT_SONAME on ELF,
- * LC_ID_DYLIB on Mach-O). NULL == none.
- * rpaths/runpaths: DT_RPATH / DT_RUNPATH entries, written verbatim. The
- * runtime loader expands $ORIGIN and similar tokens. On
- * Mach-O both lists collapse to LC_RPATH in
- * rpaths-then-runpaths order.
- * exports: flat list of symbol names promoted to the dynamic
- * symbol table. v1 has no symbol-version-script support;
- * that lands later as a separate CfreeVersionScript
- * type rather than folded into the linker-script grammar.
- * allow_undefined: default 1 for shared output. 0 forces every external
- * reference to be resolved at link time. */
-typedef struct CfreeLinkSharedOptions {
- CfreeLinkInputs inputs;
- const char *soname;
- const char *const *rpaths;
- uint32_t nrpaths;
- const char *const *runpaths;
- uint32_t nrunpaths;
- const char *const *exports;
- uint32_t nexports;
- int allow_undefined;
- /* Section GC. See CfreeLinkOptions.gc_sections. */
- int gc_sections;
-} CfreeLinkSharedOptions;
-
-/* All bytes inputs (obj_bytes, archives — including the CfreeBytesInput
- * nested inside each CfreeBytesInputArchive) must remain alive until the
- * matching cfree_link_* call returns. */
-
-/* Link to executable. Writer is not closed by the call. On nonzero return
- * the Writer may contain partial output and should not be consumed. */
-int cfree_link_exe(CfreeCompiler *, const CfreeLinkOptions *, CfreeWriter *out);
-
-/* Link to shared library / dylib in the format implied by Compiler.target
- * (ELF .so, Mach-O .dylib, PE .dll). Writer is not closed; on nonzero
- * return the Writer may contain partial output and should not be
- * consumed. */
-int cfree_link_shared(CfreeCompiler *, const CfreeLinkSharedOptions *,
- CfreeWriter *out);
-
-/* Link as JIT. On success, *out_jit owns its image and mapped pages and
- * must be released with cfree_jit_free. */
-int cfree_link_jit(CfreeCompiler *, const CfreeLinkOptions *,
- CfreeJit **out_jit);
-
-/* ============================================================
- * Pipeline (stateful driver-facing API)
- * ============================================================
- * A CfreePipeline bundles a CfreeCompiler with the lifecycle every
- * compile-then-link tool needs. Tools create a pipeline once per build,
- * feed bytes into pipeline_compile_obj, then drive one of the link entries.
- * `cfree_pipeline_compiler` exposes the underlying compiler so callers can
- * reach into APIs that need it directly (e.g. cfree_dwarf_open against a
- * JIT image, cfree_dep_iter_new for header-dep emission).
- *
- * Ownership: CfreeObjBuilders returned by pipeline_compile_obj are owned by
- * the pipeline's compiler and must be alive at the matching link call;
- * cfree_pipeline_free reaps everything in one shot. Path-shaped source
- * loading is the driver's job — pipeline entries take CfreeBytesInput. */
-
-CfreePipeline *cfree_pipeline_new(CfreeTarget, const CfreeEnv *);
-void cfree_pipeline_free(CfreePipeline *);
-
-/* Borrowed; must not be freed by callers. Valid until cfree_pipeline_free. */
-CfreeCompiler *cfree_pipeline_compiler(CfreePipeline *);
-
-int cfree_pipeline_compile_obj(CfreePipeline *, const CfreeCompileOptions *,
- const CfreeBytesInput *input,
- CfreeObjBuilder **out);
-
-int cfree_pipeline_link_exe(CfreePipeline *, const CfreeLinkOptions *,
- CfreeWriter *out);
-int cfree_pipeline_link_shared(CfreePipeline *, const CfreeLinkSharedOptions *,
- CfreeWriter *out);
-int cfree_pipeline_link_jit(CfreePipeline *, const CfreeLinkOptions *,
- CfreeJit **out_jit);
-
-/* ============================================================
- * Emulator (cfree emu)
- * ============================================================
- * Run a guest user-mode ELF on the host via per-basic-block JIT translation.
- * Pipeline shape: guest bytes -> per-ISA decoder -> per-ISA lifter -> CG ->
- * (opt?) -> MCEmitter -> ObjBuilder -> link_jit (incremental) -> host code.
- * The emu owns a single growing CfreeJit for the session: cold blocks are
- * translated and incrementally linked into one image; hot edges are patched
- * by the runtime (block chaining) outside the linker.
- *
- * v1 guest archs: aarch64, riscv64. x86_64 deferred. SIMD/vector ISA
- * extensions, full-system emulation, self-modifying code, and foreign-OS
- * syscalls are not supported in v1 (see doc/EMU.md).
- *
- * The freestanding core takes guest ELF bytes; path-shaped helpers live in
- * the driver and feed bytes via CfreeFileIO.read_all. Guest memory loads
- * and stores route through libcfree's runtime (bounds-checked against the
- * mapped guest address space); guest syscalls are forwarded to the host OS
- * via per-OS tables. */
-
-typedef enum CfreeEmuArch {
- CFREE_EMU_ARCH_AARCH64,
- CFREE_EMU_ARCH_RISCV64,
-} CfreeEmuArch;
-
-/* Trace flag bitmask. PC traces the guest PC at every block entry; INSN
- * traces every decoded guest instruction; BLOCK traces each translation
- * event (cold-miss into the lifter). All traces are emitted via the env's
- * CfreeDiagSink at CFREE_DIAG_NOTE. */
-typedef enum CfreeEmuTraceFlag {
- CFREE_EMU_TRACE_PC = 1u << 0,
- CFREE_EMU_TRACE_INSN = 1u << 1,
- CFREE_EMU_TRACE_BLOCK = 1u << 2,
-} CfreeEmuTraceFlag;
-
-typedef uint32_t CfreeEmuTraceFlags;
-
-/* Per-invocation emu configuration. `guest_elf_bytes` must outlive the call
- * (cfree_emu_run) or the returned CfreeEmu (cfree_emu_new). `argv` and
- * `envp`, when non-NULL, are NULL-terminated arrays of NUL-terminated
- * strings; the emu copies them into the guest stack at startup, so the
- * caller need not keep them alive past the new/run call. argv[0] is
- * conventionally the guest program path. envp may be NULL for an empty
- * environment.
- *
- * `optimize` selects the per-block backend: 0 drives a CGTarget directly
- * (fast translation, slow execution); 2 wraps with opt_cgtarget (slow
- * translation, fast execution). Other levels are reserved.
- *
- * Guest fd map / sandboxing is not exposed in v1 — guest syscalls are
- * forwarded into the host process's fd table verbatim. */
-typedef struct CfreeEmuOptions {
- CfreeEmuArch guest_arch;
- const uint8_t *guest_elf_bytes;
- size_t guest_elf_len;
- int optimize;
- CfreeEmuTraceFlags trace;
- const char *const *argv; /* NULL-terminated; may be NULL */
- const char *const *envp; /* NULL-terminated; may be NULL */
-} CfreeEmuOptions;
-
-typedef struct CfreeEmu CfreeEmu;
-
-/* One-shot: load the guest ELF, run until exit/trap, fill *out_exit_code
- * with the guest's exit status. Returns 0 on a clean guest exit (including
- * a nonzero guest exit_code), nonzero on internal failure (decode/lift
- * failure, OOM, unsupported guest arch). */
-int cfree_emu_run(CfreeCompiler *, const CfreeEmuOptions *, int *out_exit_code);
-
-/* Lower-level surface for dbg integration. Lifecycle: emu_new constructs
- * the runtime (reserves the code-cache VA region, maps guest segments,
- * builds the initial CPUState); emu_step runs at most `nblocks` translated
- * blocks before returning; emu_lookup translates the block at `guest_pc`
- * if cold and returns its host entry (NULL on translation failure or an
- * unmapped guest_pc). emu_free releases the runtime, the JIT image, and
- * the guest address space. */
-CfreeEmu *cfree_emu_new(CfreeCompiler *, const CfreeEmuOptions *);
-int cfree_emu_step(CfreeEmu *, uint32_t nblocks);
-void *cfree_emu_lookup(CfreeEmu *, uint64_t guest_pc);
-void cfree_emu_free(CfreeEmu *);
-
-/* ============================================================
- * Binary format detection
- * ============================================================
- * Sniff the format of a binary blob from its magic bytes.
- * COFF is detected by common machine-type values (x86, x86_64,
- * ARM, ARM64, RISC-V). Returns CFREE_BIN_UNKNOWN if no magic matches. */
-
-typedef enum CfreeBinFmt {
- CFREE_BIN_UNKNOWN = 0,
- CFREE_BIN_AR,
- CFREE_BIN_ELF,
- CFREE_BIN_COFF, /* relocatable COFF object; first 2 bytes are machine type */
- CFREE_BIN_PE, /* PE executable/DLL; starts with MZ header */
- CFREE_BIN_MACHO,
- CFREE_BIN_WASM,
-} CfreeBinFmt;
-
-CfreeBinFmt cfree_detect_fmt(const uint8_t *data, size_t len);
-
-/* Derive a CfreeTarget from object-file magic + headers (ELF e_machine /
- * EI_CLASS / EI_DATA, COFF Machine, Mach-O cputype, WASM = wasm32). Returns
- * 0 on success and fills *out; returns 1 when the input is not a recognized
- * relocatable object or its magic carries insufficient information. AR
- * archives are not handled here — open a member to detect its target. */
-int cfree_detect_target(const uint8_t *data, size_t len, CfreeTarget *out);
-
-/* ============================================================
- * Object inspection
- * ============================================================
- * Open a relocatable object for inspection. Format and target are both
- * auto-detected from the file (see cfree_detect_fmt / cfree_detect_target).
- * Returns NULL on failure. The input bytes must remain alive until
- * cfree_obj_close.
- *
- * After a successful open, query functions provide read-only access to
- * sections and symbols. Strings returned by query functions are interned
- * and valid until cfree_obj_close. */
-
-typedef struct CfreeObjFile CfreeObjFile;
-typedef struct CfreeObjSymIter CfreeObjSymIter;
-
-typedef enum CfreeSecKind {
- CFREE_SEC_TEXT,
- CFREE_SEC_RODATA,
- CFREE_SEC_DATA,
- CFREE_SEC_BSS,
- CFREE_SEC_DEBUG,
- CFREE_SEC_OTHER,
-} CfreeSecKind;
-
-typedef enum CfreeSecFlag {
- CFREE_SF_EXEC = 1u << 0,
- CFREE_SF_WRITE = 1u << 1,
- CFREE_SF_ALLOC = 1u << 2,
- CFREE_SF_TLS = 1u << 3,
- CFREE_SF_MERGE = 1u << 4,
- CFREE_SF_STRINGS = 1u << 5,
-} CfreeSecFlag;
-
-/* CfreeSymBind and CfreeSymKind are declared at the top of this header
- * because they are also used by the JIT symbol iterator. */
-
-#define CFREE_SECTION_NONE UINT32_MAX
-
-typedef struct CfreeObjSecInfo {
- const char *name; /* interned; valid until cfree_obj_close */
- CfreeSecKind kind;
- uint32_t flags; /* bitmask of CfreeSecFlag */
- uint32_t size; /* bytes; BSS uses virtual size */
- uint32_t align; /* always a power of 2; 1 means no constraint;
- * 0 is reserved and never appears (ELF's "0 or 1
- * means none" is normalized to 1 on read). */
- uint32_t entsize; /* section entry size, or 0 when not specified */
-} CfreeObjSecInfo;
-
-typedef struct CfreeObjSymInfo {
- const char *name; /* interned; valid until cfree_obj_close */
- CfreeSymBind bind;
- CfreeSymKind kind;
- uint32_t section; /* 0-based index, or CFREE_SECTION_NONE */
- uint64_t value;
- uint64_t size;
-} CfreeObjSymInfo;
-
-CfreeObjFile *cfree_obj_open(const CfreeEnv *, const CfreeBytesInput *);
-void cfree_obj_close(CfreeObjFile *);
-CfreeObjFmt cfree_obj_fmt(const CfreeObjFile *);
-CfreeTarget cfree_obj_target(const CfreeObjFile *);
-uint32_t cfree_obj_nsections(const CfreeObjFile *);
-CfreeObjSecInfo cfree_obj_section(const CfreeObjFile *, uint32_t idx);
-
-CfreeObjSymIter *cfree_obj_symiter_new(CfreeObjFile *);
-int cfree_obj_symiter_next(CfreeObjSymIter *, CfreeObjSymInfo *out);
-void cfree_obj_symiter_free(CfreeObjSymIter *);
-
-/* Raw bytes of a section. Returns a pointer aliasing storage owned by the
- * CfreeObjFile and valid until cfree_obj_close. For BSS (no in-file bytes),
- * returns NULL with `*len_out = 0`; the section's virtual size is on
- * CfreeObjSecInfo.size. Out-of-range idx returns NULL with `*len_out = 0`. */
-const uint8_t *cfree_obj_section_data(const CfreeObjFile *, uint32_t idx,
- size_t *len_out);
-
-/* Expose the underlying CfreeObjBuilder for use with cfree_disasm_iter_new
- * (so the disassembler can consult sym/reloc tables for annotation). The
- * pointer is owned by the CfreeObjFile and is valid until cfree_obj_close. */
-CfreeObjBuilder *cfree_obj_builder(const CfreeObjFile *);
-
-/* Relocation iterator. Walks every relocation in the object across all
- * sections in section-then-offset order. Strings are interned and valid
- * until cfree_obj_close. */
-typedef struct CfreeObjReloc {
- uint32_t section; /* 0-based section index the reloc applies to */
- uint64_t offset; /* offset within that section */
- uint32_t sym; /* opaque symbol id; CFREE_SECTION_NONE if none */
- const char *sym_name; /* interned; "" when sym is none/anonymous */
- int64_t addend;
- uint32_t kind; /* arch-specific reloc type code */
- const char *kind_name; /* interned, e.g. "R_X86_64_PC32" */
-} CfreeObjReloc;
-
-typedef struct CfreeObjRelocIter CfreeObjRelocIter;
-
-CfreeObjRelocIter *cfree_obj_reliter_new(CfreeObjFile *);
-int cfree_obj_reliter_next(CfreeObjRelocIter *, CfreeObjReloc *out);
-void cfree_obj_reliter_free(CfreeObjRelocIter *);
-
-/* ============================================================
- * DWARF consumer
- * ============================================================
- * Read DWARF (.debug_info / .debug_line / .debug_aranges / .eh_frame) out
- * of an already-opened CfreeObjFile. The CfreeObjFile must outlive the
- * CfreeDebugInfo. Strings handed back through the query functions are
- * interned and valid until cfree_dwarf_close.
- *
- * cfree_dwarf_open returns NULL when the object has no DWARF, when the
- * object's format doesn't carry DWARF (PE/COFF can; the consumer accepts
- * the standard sections wherever they live), or on internal failure.
- *
- * cfree_dwarf_addr_to_line maps a runtime / image PC to the source file,
- * line, and column that produced it. Return codes:
- * 0 — PC matched a line entry; outputs filled.
- * 1 — PC is inside a CU's address range but no row matched (e.g.
- * compiler scaffolding).
- * 2 — PC is outside every CU's coverage; the caller is in a frame
- * that was compiled without `-g` (REPL renders as "no debug info
- * for this frame").
- *
- * cfree_dwarf_line_to_addr is the inverse. `file` matches a CU's
- * line-table filename exactly, or as a path suffix (`util.c` matches
- * `/proj/util.c` but not `/proj/run/futile.c`). Return codes:
- * 0 — unique match, pc_out filled.
- * 1 — file not present in any CU (REPL: "file not covered").
- * 2 — file present, but no row at `line` (REPL: "no line N in file").
- * 3 — ambiguous: more than one distinct PC matches via suffix;
- * pc_out is the first match. Use cfree_dwarf_line_to_addr_all to
- * enumerate candidates and prompt for disambiguation.
- *
- * cfree_dwarf_func_at returns the enclosing subprogram's name and
- * inclusive PC bounds. Returns 0 on success, 1 if no subprogram contains
- * `pc`. */
-typedef struct CfreeDebugInfo CfreeDebugInfo;
-
-CfreeDebugInfo *cfree_dwarf_open(CfreeCompiler *, const CfreeObjFile *);
-void cfree_dwarf_close(CfreeDebugInfo *);
-
-int cfree_dwarf_addr_to_line(CfreeDebugInfo *, uint64_t pc,
- const char **file_out, uint32_t *line_out,
- uint32_t *col_out);
-int cfree_dwarf_line_to_addr(CfreeDebugInfo *, const char *file, uint32_t line,
- uint64_t *pc_out);
-
-/* Disambiguation enumerator paired with cfree_dwarf_line_to_addr's
- * ambiguous return. `out[k]` is filled for the first `cap` distinct
- * candidate PCs; `*n_out` is the total candidate count, which may
- * exceed `cap`. `file` strings are interned in the CfreeDebugInfo and
- * live until cfree_dwarf_close. Returns 0 on success, 1 on invalid args. */
-typedef struct CfreeDwarfLineMatch {
- uint64_t pc;
- const char *file;
-} CfreeDwarfLineMatch;
-
-int cfree_dwarf_line_to_addr_all(CfreeDebugInfo *, const char *file,
- uint32_t line, CfreeDwarfLineMatch *out,
- uint32_t cap, uint32_t *n_out);
-int cfree_dwarf_func_at(CfreeDebugInfo *, uint64_t pc, const char **name_out,
- uint64_t *low_pc_out, uint64_t *high_pc_out);
-
-/* Richer subprogram description for backtrace rendering. Returns the same
- * name and pc range as cfree_dwarf_func_at, plus the source location of the
- * subprogram DIE (DW_AT_decl_file/line) and an `inlined` flag (set when
- * `pc` resolves to an inlined instance). Returns 0 on success, 1 if no
- * subprogram contains `pc`. cfree_dwarf_func_at is kept as a thin
- * convenience over this entry. */
-typedef struct CfreeDwarfType CfreeDwarfType; /* opaque */
-
-typedef struct CfreeDwarfSubprogram {
- const char *name;
- uint64_t low_pc;
- uint64_t high_pc;
- const char *decl_file;
- uint32_t decl_line;
- const CfreeDwarfType *return_type;
- uint8_t inlined;
-} CfreeDwarfSubprogram;
-
-int cfree_dwarf_subprogram_at(CfreeDebugInfo *, uint64_t pc,
- CfreeDwarfSubprogram *out);
-int cfree_dwarf_subprogram_named(CfreeDebugInfo *, const char *name,
- CfreeDwarfSubprogram *out);
-
-/* CFI-driven unwind step. The caller seeds `frame->pc` (and any callee-saved
- * registers known at the leaf) and the consumer walks .eh_frame to compute
- * the caller frame in place: pc, cfa, and registers are updated.
- * CfreeUnwindFrame is declared at the top of this header. Register indices
- * follow the DWARF register numbering for the target arch (which matches
- * CfreeArchKind's canonical mapping). Returns 0 on a successful step, 1 at the
- * bottom of the stack (no caller), nonzero on decode error. */
-int cfree_dwarf_unwind_step(CfreeDebugInfo *, CfreeUnwindFrame *);
-
-/* ----- Variable locations -----
- *
- * Decode where a named variable lives at PC. Resolution order: the deepest
- * lexical scope at `pc` whose `name` matches wins; if no local matches, a
- * file-scope global with that name is returned; otherwise 1.
- *
- * `byte_size` is the variable's storage size in bytes, taken from the
- * variable's DIE type. Zero means unknown.
- *
- * cfree_dwarf_loc_read evaluates the location against `frame` (whose `regs`
- * supply register values; the leaf frame's regs come from CfreeStopInfo,
- * deeper frames from cfree_dwarf_unwind_step) and reads the underlying
- * bytes through the supplied JIT session. Up to `cap` bytes are written
- * into `dst`; *read_out reports the number actually read (capped to
- * the variable's byte_size). Returns 0 on success, nonzero on bad
- * arguments or a read fault.
- *
- * EXPR locations carry a DWARF expression byte string; libcfree owns the
- * stack-machine evaluator. Callers should treat the loc as opaque and
- * always go through cfree_dwarf_loc_read. */
-/* ----- Type descriptions -----
- *
- * Type DIEs are exposed as opaque CfreeDwarfType handles owned by the
- * CfreeDebugInfo (interned for the lifetime of the consumer). Callers
- * inspect a type with cfree_dwarf_type_info, which returns a kind tag plus
- * shape data (size, name, element count, inner type). For aggregates
- * (struct/union) and enums, dedicated iterators yield fields and enum
- * values respectively. */
-typedef enum CfreeDwarfTypeKind {
- CFREE_DT_VOID,
- CFREE_DT_SINT, /* base: signed integer */
- CFREE_DT_UINT, /* base: unsigned integer */
- CFREE_DT_BOOL,
- CFREE_DT_FLOAT,
- CFREE_DT_CHAR, /* signed_char / unsigned_char distinguished by SINT/UINT */
- CFREE_DT_PTR, /* points to inner */
- CFREE_DT_ARRAY, /* element type + length */
- CFREE_DT_STRUCT,
- CFREE_DT_UNION,
- CFREE_DT_ENUM, /* base type + named values */
- CFREE_DT_FUNC, /* function type, for function-pointer pretty-print */
- CFREE_DT_TYPEDEF, /* alias name + underlying */
-} CfreeDwarfTypeKind;
-
-typedef struct CfreeDwarfTypeInfo {
- CfreeDwarfTypeKind kind;
- uint32_t byte_size; /* 0 = unknown / void */
- const char *name; /* tag/typedef name; "" if anon */
- /* For ARRAY: element_count == 0 means flexible/unknown. */
- uint32_t element_count;
- /* For PTR/ARRAY/TYPEDEF: the inner type (NULL otherwise). */
- const CfreeDwarfType *inner;
-} CfreeDwarfTypeInfo;
-
-CfreeDwarfTypeInfo cfree_dwarf_type_info(const CfreeDwarfType *);
-
-/* Struct/union field iterator. Yields each direct field; nested aggregates
- * are reached by recursing on field.type. */
-typedef struct CfreeDwarfFieldIter CfreeDwarfFieldIter;
-typedef struct CfreeDwarfField {
- const char *name; /* "" for anonymous */
- uint32_t byte_offset;
- uint32_t bit_offset; /* for bitfields; 0 otherwise */
- uint32_t bit_size; /* for bitfields; 0 otherwise */
- const CfreeDwarfType *type;
-} CfreeDwarfField;
-
-CfreeDwarfFieldIter *cfree_dwarf_field_iter_new(CfreeDebugInfo *,
- const CfreeDwarfType *);
-int cfree_dwarf_field_iter_next(CfreeDwarfFieldIter *, CfreeDwarfField *out);
-void cfree_dwarf_field_iter_free(CfreeDwarfFieldIter *);
-
-/* Enum value iterator. */
-typedef struct CfreeDwarfEnumIter CfreeDwarfEnumIter;
-typedef struct CfreeDwarfEnumVal {
- const char *name;
- int64_t value;
-} CfreeDwarfEnumVal;
-
-CfreeDwarfEnumIter *cfree_dwarf_enum_iter_new(CfreeDebugInfo *,
- const CfreeDwarfType *);
-int cfree_dwarf_enum_iter_next(CfreeDwarfEnumIter *, CfreeDwarfEnumVal *out);
-void cfree_dwarf_enum_iter_free(CfreeDwarfEnumIter *);
-
-typedef enum CfreeDwarfLocKind {
- CFREE_DLOC_REG, /* value lives in a register */
- CFREE_DLOC_FRAME_OFS, /* [cfa + frame_ofs] */
- CFREE_DLOC_GLOBAL, /* absolute address */
- CFREE_DLOC_EXPR, /* DWARF expression bytes */
-} CfreeDwarfLocKind;
-
-typedef struct CfreeDwarfVarLoc {
- CfreeDwarfLocKind kind;
- uint32_t byte_size; /* 0 = unknown */
- /* DIE type of the variable. NULL when type information was not
- * recovered (e.g. stripped binary, hand-written symbol). When NULL,
- * callers should fall back to byte_size and treat the bytes opaquely. */
- const CfreeDwarfType *type;
- union {
- uint32_t reg;
- int32_t frame_ofs;
- uint64_t global;
- struct {
- const uint8_t *bytes;
- size_t len;
- } expr;
- } v;
-} CfreeDwarfVarLoc;
-
-/* Look up a variable visible at `pc` by name. Return codes:
- * 0 — found; *out filled.
- * 1 — `pc` is inside a known subprogram but no variable named `name`
- * resolves there (typo / out-of-scope).
- * 2 — `pc` is not covered by any subprogram (no debug info for this
- * frame); globals were still consulted before returning. */
-int cfree_dwarf_var_at(CfreeDebugInfo *, uint64_t pc, const char *name,
- CfreeDwarfVarLoc *out);
-int cfree_dwarf_loc_read(CfreeDebugInfo *, const CfreeDwarfVarLoc *,
- const CfreeUnwindFrame *,
- CfreeJitSession *, /* memory provider */
- void *dst, size_t cap, size_t *read_out);
-
-/* ----- Locals, arguments, and parameters -----
- *
- * cfree_dwarf_var_at resolves a single named variable. To enumerate every
- * variable visible at a PC (for `info locals` / `info args`), use the
- * vars-at iterator: it yields locals from the deepest scope outward and
- * then file-scope globals, with a role mask filter.
- *
- * cfree_dwarf_var_at is kept as a convenience over the iterator: it
- * performs deepest-scope-first matching by name and returns the first
- * hit. Both are valid entry points. */
-typedef enum CfreeDwarfVarRole {
- CFREE_DVR_LOCAL,
- CFREE_DVR_ARG,
- CFREE_DVR_GLOBAL,
-} CfreeDwarfVarRole;
-
-typedef struct CfreeDwarfVar {
- const char *name;
- CfreeDwarfVarRole role;
- CfreeDwarfVarLoc loc;
-} CfreeDwarfVar;
-
-typedef struct CfreeDwarfVarIter CfreeDwarfVarIter;
-
-CfreeDwarfVarIter *cfree_dwarf_vars_at_new(CfreeDebugInfo *, uint64_t pc,
- uint32_t role_mask);
-int cfree_dwarf_vars_at_next(CfreeDwarfVarIter *, CfreeDwarfVar *out);
-void cfree_dwarf_vars_at_free(CfreeDwarfVarIter *);
-
-/* Iterate the formal parameters of the subprogram covering `pc`, in
- * declaration order. Drives gdb-style backtrace argument rendering.
- * Returns NULL if `pc` is not inside any subprogram. */
-typedef struct CfreeDwarfParamIter CfreeDwarfParamIter;
-
-CfreeDwarfParamIter *cfree_dwarf_param_iter_new(CfreeDebugInfo *, uint64_t pc);
-CfreeDwarfParamIter *cfree_dwarf_param_iter_new_named(CfreeDebugInfo *,
- const char *name);
-int cfree_dwarf_param_iter_next(CfreeDwarfParamIter *, CfreeDwarfVar *out);
-void cfree_dwarf_param_iter_free(CfreeDwarfParamIter *);
-
-/* ============================================================
- * Disassembler
- * ============================================================
- * Two layers: a high-level convenience that walks a relocatable object's
- * text sections and writes an objdump-style listing, and a low-level
- * iterator that decodes instructions from a byte buffer with vaddr context.
- *
- * Operands are pre-rendered to text on CfreeInsn. Structured operands (per-
- * arch REG/IMM/MEM/SYM_REL enums) are the principled answer but multiply
- * surface per arch (x86 ModR/M, AArch64 vector lanes, RISC-V CSR names)
- * without v1 consumers; adding a structured form later is non-breaking
- * because the text fields remain accurate.
- *
- * Strings on CfreeInsn (mnemonic/operands/annotation) and `bytes` are owned
- * by the iterator and valid only until the next cfree_disasm_iter_next call
- * or cfree_disasm_iter_free, whichever comes first. */
-
-typedef struct CfreeInsn {
- uint64_t vaddr;
- const uint8_t *bytes;
- uint32_t nbytes;
- const char *mnemonic;
- const char *operands; /* pre-rendered; may be "" */
- const char *annotation; /* sym/reloc note; may be "" */
-} CfreeInsn;
-
-/* Walk a relocatable object's text sections and write an objdump-style
- * listing to `out`. Convenience over the iterator. The Writer is not
- * closed. Returns 0 on success, nonzero on failure. */
-int cfree_obj_disasm(CfreeCompiler *, const CfreeBytesInput *,
- CfreeWriter *out);
-
-/* Iterate instructions in a byte buffer at virtual address `vaddr`. If
- * `obj` is non-NULL, the decoder consults its symbol and relocation tables
- * to fill CfreeInsn.annotation; pass NULL for raw decoding. The bytes
- * buffer must remain alive until cfree_disasm_iter_free.
- *
- * cfree_disasm_iter_next returns 1 and fills `*out` for each decoded
- * instruction, 0 when the buffer is exhausted. On an undecodable byte the
- * iterator advances by the arch's minimum unit and emits a placeholder
- * mnemonic so the listing stays in sync. */
-typedef struct CfreeDisasmIter CfreeDisasmIter;
-
-CfreeDisasmIter *cfree_disasm_iter_new(CfreeCompiler *, const uint8_t *bytes,
- size_t len, uint64_t vaddr,
- CfreeObjBuilder *obj /* may be NULL */);
-int cfree_disasm_iter_next(CfreeDisasmIter *, CfreeInsn *out);
-void cfree_disasm_iter_free(CfreeDisasmIter *);
-
-/* ============================================================
- * Archive (ar) file
- * ============================================================
- * Pure format I/O — no compilation context required.
- *
- * cfree_ar_write packs member byte payloads into a POSIX ar archive written
- * to `out`. Options control reproducibility and format extensions:
- * - `epoch` Unix seconds written to ar_date for every member; 0
- * leaves the field as the literal "0" (the default).
- * - `symbol_index` if nonzero, emit a System V `/` symbol-index member
- * as the first member. The index payload is a 4-byte
- * big-endian symbol count, then count 4-byte big-endian
- * offsets pointing at member headers (relative to start
- * of archive), then NUL-terminated symbol names. Symbol
- * names are taken from `member_symbols`; an empty index
- * (count==0) is emitted when no symbols are supplied.
- * - `long_names` if nonzero, emit a `//` long-name table when any
- * member name exceeds 15 characters or contains '/'.
- * With long_names == 0, over-long names are truncated.
- * - `member_symbols` optional; parallel to the `members` array. Entry i
- * lists the global symbols defined by member i. NULL
- * (or per-entry count==0) means that member contributes
- * no symbols. Names point into caller-owned storage and
- * need only outlive the cfree_ar_write call. Ignored
- * when symbol_index == 0.
- * `opts` may be NULL to accept all defaults.
- * The Writer is not closed; I/O errors are detectable via out->error().
- * Returns 0 on success, 1 on bad arguments.
- *
- * cfree_ar_list writes one member name per line to `out` for each non-special
- * member in the archive. Returns 0 on success, 1 on bad arguments or
- * malformed archive.
- *
- * CfreeArIter is a stack-allocated cursor for iterating archive members.
- * cfree_ar_iter_init validates the archive magic and positions the cursor
- * at the first member; returns 1 on success, 0 on bad magic or NULL input.
- * cfree_ar_iter_next advances to the next non-special member and fills *out;
- * returns 1 if a member was returned, 0 at end or on malformed data.
- * Member data pointers alias the original archive bytes and are valid as
- * long as the archive bytes remain alive. CfreeArMember.name is interned
- * in iterator-owned storage and is valid only until the next iter_next
- * call on the same iterator. */
-typedef struct CfreeArMemberSymbols {
- const char *const *names; /* count entries; each NUL-terminated */
- uint32_t count;
-} CfreeArMemberSymbols;
-
-typedef struct CfreeArWriteOptions {
- uint64_t epoch; /* ar_date for every member; 0 = none */
- int symbol_index; /* emit System V '/' symbol-index member */
- int long_names; /* emit '//' long-name table when needed */
- /* Parallel to the `members` array; NULL means "no symbols anywhere".
- * Only consulted when symbol_index is nonzero. */
- const CfreeArMemberSymbols *member_symbols;
-} CfreeArWriteOptions;
-
-int cfree_ar_write(CfreeWriter *out, const CfreeBytesInput *members,
- uint32_t nmembers, const CfreeArWriteOptions *opts);
-int cfree_ar_list(const CfreeBytesInput *archive, CfreeWriter *out);
-
-typedef struct CfreeArIter {
- const uint8_t *_p;
- const uint8_t *_end;
- const uint8_t *_longnames; /* `//` table bytes, NULL until seen */
- size_t _longnames_len;
- char _namebuf[256]; /* iterator-owned scratch for member name */
-} CfreeArIter;
-
-typedef struct CfreeArMember {
- const char *name; /* iterator-owned; valid until next iter_next */
- const uint8_t *data; /* points into archive bytes */
- size_t size;
-} CfreeArMember;
-
-int cfree_ar_iter_init(CfreeArIter *, const CfreeBytesInput *archive);
-int cfree_ar_iter_next(CfreeArIter *, CfreeArMember *out);
-
-#endif
diff --git a/include/cfree/arch.h b/include/cfree/arch.h
@@ -0,0 +1,29 @@
+#ifndef CFREE_ARCH_H
+#define CFREE_ARCH_H
+
+#include <cfree/core.h>
+
+/*
+ * Architecture helpers shared by debuggers, unwinders, disassemblers, and
+ * hosts that render target register state.
+ */
+
+typedef struct CfreeUnwindFrame {
+ uint64_t pc;
+ uint64_t cfa;
+ uint64_t regs[32]; /* DWARF register-numbered, zero for absent registers */
+} CfreeUnwindFrame;
+
+typedef struct CfreeArchReg {
+ uint32_t dwarf_idx;
+ const char *name;
+} CfreeArchReg;
+
+const char *cfree_arch_register_name(CfreeArchKind, uint32_t dwarf_idx);
+CfreeStatus cfree_arch_register_index(CfreeArchKind, const char *name,
+ uint32_t *idx_out);
+uint32_t cfree_arch_register_count(CfreeArchKind);
+CfreeStatus cfree_arch_register_at(CfreeArchKind, uint32_t idx,
+ CfreeArchReg *out);
+
+#endif
diff --git a/include/cfree/archive.h b/include/cfree/archive.h
@@ -0,0 +1,43 @@
+#ifndef CFREE_ARCHIVE_H
+#define CFREE_ARCHIVE_H
+
+#include <cfree/core.h>
+
+/*
+ * POSIX ar archive reader/writer.
+ *
+ * Pure byte-format I/O: no compiler context is required. Member payloads and
+ * archive bytes are borrowed for the duration of each call/iterator.
+ */
+
+typedef struct CfreeArMemberSymbols {
+ const char *const *names;
+ uint32_t count;
+} CfreeArMemberSymbols;
+
+typedef struct CfreeArWriteOptions {
+ uint64_t epoch;
+ int symbol_index;
+ int long_names;
+ const CfreeArMemberSymbols *member_symbols;
+} CfreeArWriteOptions;
+
+CfreeStatus cfree_ar_write(CfreeWriter *out, const CfreeBytes *members,
+ uint32_t nmembers,
+ const CfreeArWriteOptions *opts);
+CfreeStatus cfree_ar_list(const CfreeBytes *archive, CfreeWriter *out);
+
+typedef struct CfreeArIter CfreeArIter;
+
+typedef struct CfreeArMember {
+ const char *name;
+ const uint8_t *data;
+ size_t size;
+} CfreeArMember;
+
+CfreeStatus cfree_ar_iter_new(const CfreeContext *, const CfreeBytes *archive,
+ CfreeArIter **out);
+CfreeIterResult cfree_ar_iter_next(CfreeArIter *, CfreeArMember *out);
+void cfree_ar_iter_free(CfreeArIter *);
+
+#endif
diff --git a/include/cfree/cg.h b/include/cfree/cg.h
@@ -1,7 +1,8 @@
#ifndef CFREE_PUBLIC_CG_H
#define CFREE_PUBLIC_CG_H
-#include <cfree.h>
+#include <cfree/core.h>
+#include <cfree/objbuild.h>
/* ============================================================
* Handles
@@ -185,9 +186,10 @@ CfreeCgCallConv cfree_cg_type_func_call_conv(CfreeCompiler*, CfreeCgTypeId);
int cfree_cg_type_func_is_variadic(CfreeCompiler*, CfreeCgTypeId);
uint32_t cfree_cg_type_record_nfields(CfreeCompiler*, CfreeCgTypeId);
-/* Returns 0 on success and fills any non-NULL out parameters. */
-int cfree_cg_type_record_field(CfreeCompiler*, CfreeCgTypeId, uint32_t index,
- CfreeCgField* out, uint64_t* offset_out);
+/* Returns CFREE_OK and fills any non-NULL out parameters on success. */
+CfreeStatus cfree_cg_type_record_field(CfreeCompiler*, CfreeCgTypeId,
+ uint32_t index, CfreeCgField* out,
+ uint64_t* offset_out);
typedef enum CfreeCgSymbolFeature {
CFREE_CG_SYMFEAT_WEAK,
@@ -369,8 +371,8 @@ CfreeSym cfree_cg_c_linkage_name(CfreeCompiler*, CfreeSym source_name);
* Lifecycle and Source Locations
* ============================================================ */
-CfreeCg* cfree_cg_new(CfreeCompiler*, CfreeObjBuilder* out,
- const CfreeCompileOptions*);
+CfreeStatus cfree_cg_new(CfreeCompiler*, CfreeObjBuilder* out,
+ const CfreeCodeOptions*, CfreeCg** cg_out);
void cfree_cg_free(CfreeCg*);
/* Sticky source location. Function, scope, local, param, instruction, and
diff --git a/include/cfree/compile.h b/include/cfree/compile.h
@@ -0,0 +1,111 @@
+#ifndef CFREE_COMPILE_H
+#define CFREE_COMPILE_H
+
+#include <cfree/core.h>
+#include <cfree/objbuild.h>
+
+/*
+ * Source compiler embedding API.
+ *
+ * This layer compiles one source translation unit into a relocatable object
+ * builder or directly emits encoded object bytes. It is the right API for a
+ * cc/as-like driver. Language frontends that want to emit code directly
+ * should use cfree/cg.h instead.
+ */
+
+typedef enum CfreeLanguage {
+ CFREE_LANG_C = 0,
+ CFREE_LANG_ASM = 1,
+ CFREE_LANG_TOY = 2,
+ CFREE_LANG_WASM = 3,
+ CFREE_LANG_COUNT = 4,
+} CfreeLanguage;
+
+typedef struct CfreeSourceInput {
+ CfreeBytes bytes;
+ CfreeLanguage lang;
+} CfreeSourceInput;
+
+typedef struct CfreeDefine {
+ const char *name;
+ const char *body; /* NULL means "1" */
+} CfreeDefine;
+
+typedef struct CfreePreprocessOptions {
+ const char *const *include_dirs;
+ uint32_t ninclude_dirs;
+ const char *const *system_include_dirs;
+ uint32_t nsystem_include_dirs;
+ const CfreeDefine *defines;
+ uint32_t ndefines;
+ const char *const *undefines;
+ uint32_t nundefines;
+} CfreePreprocessOptions;
+
+typedef struct CfreeDiagnosticOptions {
+ int warnings_are_errors;
+ uint32_t max_errors; /* 0 means unlimited */
+} CfreeDiagnosticOptions;
+
+typedef struct CfreeCCompileOptions {
+ CfreeCodeOptions code;
+ CfreePreprocessOptions preprocess;
+ CfreeDiagnosticOptions diagnostics;
+} CfreeCCompileOptions;
+
+typedef struct CfreeAsmCompileOptions {
+ CfreeCodeOptions code;
+ CfreeDiagnosticOptions diagnostics;
+} CfreeAsmCompileOptions;
+
+typedef struct CfreeFrontendCompileOptions {
+ CfreeCodeOptions code;
+ CfreeDiagnosticOptions diagnostics;
+ const void *language_options;
+} CfreeFrontendCompileOptions;
+
+typedef CfreeStatus (*CfreeCompileFn)(CfreeCompiler *,
+ const CfreeFrontendCompileOptions *,
+ const CfreeSourceInput *,
+ CfreeObjBuilder *out);
+
+CfreeLanguage cfree_language_for_path(const char *path);
+CfreeStatus cfree_register_frontend(CfreeCompiler *, CfreeLanguage,
+ CfreeCompileFn);
+
+CfreeStatus cfree_compile_c_obj(CfreeCompiler *, const CfreeCCompileOptions *,
+ const CfreeBytes *, CfreeObjBuilder **out);
+CfreeStatus cfree_compile_c_obj_emit(CfreeCompiler *,
+ const CfreeCCompileOptions *,
+ const CfreeBytes *, CfreeWriter *out);
+CfreeStatus cfree_compile_asm_obj(CfreeCompiler *,
+ const CfreeAsmCompileOptions *,
+ const CfreeBytes *, CfreeObjBuilder **out);
+CfreeStatus cfree_compile_asm_obj_emit(CfreeCompiler *,
+ const CfreeAsmCompileOptions *,
+ const CfreeBytes *, CfreeWriter *out);
+CfreeStatus cfree_compile_source_obj(CfreeCompiler *,
+ const CfreeFrontendCompileOptions *,
+ const CfreeSourceInput *,
+ CfreeObjBuilder **out);
+CfreeStatus cfree_compile_source_obj_emit(CfreeCompiler *,
+ const CfreeFrontendCompileOptions *,
+ const CfreeSourceInput *,
+ CfreeWriter *out);
+
+typedef struct CfreeDepIter CfreeDepIter;
+
+typedef struct CfreeDepEdge {
+ const char *includer_name;
+ const char *included_name;
+ CfreeSrcLoc include_loc;
+ uint8_t from_system_path;
+ uint8_t bracketed;
+ uint8_t pad[2];
+} CfreeDepEdge;
+
+CfreeStatus cfree_dep_iter_new(CfreeCompiler *, CfreeDepIter **out);
+CfreeIterResult cfree_dep_iter_next(CfreeDepIter *, CfreeDepEdge *out);
+void cfree_dep_iter_free(CfreeDepIter *);
+
+#endif
diff --git a/include/cfree/core.h b/include/cfree/core.h
@@ -0,0 +1,242 @@
+#ifndef CFREE_CORE_H
+#define CFREE_CORE_H
+
+/*
+ * Core libcfree API.
+ *
+ * This header is the small substrate shared by every public libcfree
+ * component: target descriptions, host services, diagnostics, compiler
+ * lifetime, interned symbols, and byte/writer helpers. It intentionally
+ * contains no compile, link, codegen, object, JIT, DWARF, or archive entry
+ * points. Include the narrower component header for those.
+ */
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/* Opaque handles shared across component headers. */
+typedef struct CfreeCompiler CfreeCompiler;
+typedef struct CfreeObjBuilder CfreeObjBuilder;
+typedef struct CfreeObjFile CfreeObjFile;
+typedef struct CfreeJit CfreeJit;
+typedef struct CfreeJitSession CfreeJitSession;
+typedef struct CfreeDebugInfo CfreeDebugInfo;
+typedef struct CfreeEmu CfreeEmu;
+
+typedef uint32_t CfreeSym;
+
+typedef enum CfreeStatus {
+ CFREE_OK = 0,
+ CFREE_ERR = 1,
+ CFREE_NOMEM = 2,
+ CFREE_INVALID = 3,
+ CFREE_UNSUPPORTED = 4,
+ CFREE_MALFORMED = 5,
+ CFREE_IO = 6,
+ CFREE_NOT_FOUND = 7,
+ CFREE_AMBIGUOUS = 8,
+} CfreeStatus;
+
+typedef enum CfreeIterResult {
+ CFREE_ITER_ERROR = -1,
+ CFREE_ITER_END = 0,
+ CFREE_ITER_ITEM = 1,
+} CfreeIterResult;
+
+typedef struct CfreeSrcLoc {
+ uint32_t file_id;
+ uint32_t line;
+ uint32_t col;
+} CfreeSrcLoc;
+
+typedef struct CfreeBytes {
+ const char *name; /* diagnostic label; may be NULL */
+ const uint8_t *data;
+ size_t len;
+} CfreeBytes;
+
+typedef enum CfreeArchKind {
+ CFREE_ARCH_X86_32,
+ CFREE_ARCH_X86_64,
+ CFREE_ARCH_ARM_32,
+ CFREE_ARCH_ARM_64,
+ CFREE_ARCH_RV32,
+ CFREE_ARCH_RV64,
+ CFREE_ARCH_WASM,
+} CfreeArchKind;
+
+typedef enum CfreeOSKind {
+ CFREE_OS_FREESTANDING,
+ CFREE_OS_LINUX,
+ CFREE_OS_MACOS,
+ CFREE_OS_WINDOWS,
+ CFREE_OS_FREEBSD,
+ CFREE_OS_WASI,
+} CfreeOSKind;
+
+typedef enum CfreeObjFmt {
+ CFREE_OBJ_ELF,
+ CFREE_OBJ_COFF,
+ CFREE_OBJ_MACHO,
+ CFREE_OBJ_WASM,
+} CfreeObjFmt;
+
+typedef enum CfreePic {
+ CFREE_PIC_NONE,
+ CFREE_PIC_PIC,
+ CFREE_PIC_PIE,
+} CfreePic;
+
+typedef enum CfreeCodeModel {
+ CFREE_CM_DEFAULT,
+ CFREE_CM_SMALL,
+ CFREE_CM_MEDIUM,
+ CFREE_CM_LARGE,
+} CfreeCodeModel;
+
+typedef struct CfreeTarget {
+ CfreeArchKind arch;
+ CfreeOSKind os;
+ CfreeObjFmt obj;
+ uint8_t ptr_size;
+ uint8_t ptr_align;
+ uint8_t big_endian;
+ uint8_t pic; /* CfreePic */
+ uint8_t code_model; /* CfreeCodeModel */
+} CfreeTarget;
+
+typedef enum CfreeSymBind {
+ CFREE_SB_LOCAL,
+ CFREE_SB_GLOBAL,
+ CFREE_SB_WEAK,
+} CfreeSymBind;
+
+typedef enum CfreeSymKind {
+ CFREE_SK_UNDEF,
+ CFREE_SK_FUNC,
+ CFREE_SK_OBJ,
+ CFREE_SK_SECTION,
+ CFREE_SK_FILE,
+ CFREE_SK_COMMON,
+ CFREE_SK_TLS,
+ CFREE_SK_ABS,
+ CFREE_SK_NOTYPE,
+ CFREE_SK_IFUNC,
+} CfreeSymKind;
+
+typedef struct CfreePathPrefixMap {
+ const char *old_prefix;
+ const char *new_prefix;
+} CfreePathPrefixMap;
+
+typedef struct CfreeCodeOptions {
+ int opt_level; /* 0 direct, 1 minimal, 2 full */
+ int debug_info; /* nonzero emits source/debug records when supported */
+ uint64_t epoch; /* reproducible timestamp seed; 0 means no timestamp */
+ const CfreePathPrefixMap *path_map;
+ uint32_t npath_map;
+} CfreeCodeOptions;
+
+typedef struct CfreeHeap CfreeHeap;
+struct CfreeHeap {
+ void *(*alloc)(CfreeHeap *, size_t size, size_t align);
+ void *(*realloc)(CfreeHeap *, void *p, size_t old_size, size_t new_size,
+ size_t align);
+ void (*free)(CfreeHeap *, void *p, size_t size);
+ void *user;
+};
+
+typedef enum CfreeDiagKind {
+ CFREE_DIAG_NOTE,
+ CFREE_DIAG_WARN,
+ CFREE_DIAG_ERROR,
+ CFREE_DIAG_FATAL,
+} CfreeDiagKind;
+
+typedef struct CfreeDiagSink CfreeDiagSink;
+struct CfreeDiagSink {
+ void (*emit)(CfreeDiagSink *, CfreeDiagKind, CfreeSrcLoc, const char *fmt,
+ va_list);
+ void *user;
+ uint32_t errors; /* maintained by libcfree; hosts may read */
+ uint32_t warnings; /* maintained by libcfree; hosts may read */
+};
+
+typedef struct CfreeWriter CfreeWriter;
+struct CfreeWriter {
+ CfreeStatus (*write)(CfreeWriter *, const void *data, size_t n);
+ CfreeStatus (*seek)(CfreeWriter *, uint64_t offset);
+ uint64_t (*tell)(CfreeWriter *);
+ CfreeStatus (*status)(CfreeWriter *);
+ void (*close)(CfreeWriter *);
+};
+
+static inline CfreeStatus cfree_writer_write(CfreeWriter *w, const void *d,
+ size_t n) {
+ return w->write(w, d, n);
+}
+
+static inline CfreeStatus cfree_writer_seek(CfreeWriter *w, uint64_t off) {
+ return w->seek(w, off);
+}
+
+static inline uint64_t cfree_writer_tell(CfreeWriter *w) { return w->tell(w); }
+static inline CfreeStatus cfree_writer_status(CfreeWriter *w) {
+ return w->status(w);
+}
+static inline void cfree_writer_close(CfreeWriter *w) { w->close(w); }
+
+typedef struct CfreeFileData {
+ const uint8_t *data;
+ size_t size;
+ void *token;
+} CfreeFileData;
+
+typedef struct CfreeFileIO {
+ CfreeStatus (*read_all)(void *user, const char *path, CfreeFileData *out);
+ void (*release)(void *user, CfreeFileData *);
+ CfreeStatus (*open_writer)(void *user, const char *path, CfreeWriter **out);
+ void *user;
+} CfreeFileIO;
+
+typedef struct CfreeMetrics {
+ void (*scope_begin)(void *user, const char *name);
+ void (*scope_end)(void *user, const char *name);
+ void (*count)(void *user, const char *name, uint64_t value);
+ void *user;
+} CfreeMetrics;
+
+typedef struct CfreeContext {
+ CfreeHeap *heap;
+ /* Optional. Source compilation uses this for include resolution and output
+ * helpers; pure byte-oriented modules such as object, archive, DWARF, and
+ * disassembly ignore it. */
+ const CfreeFileIO *file_io;
+ CfreeDiagSink *diag;
+ const CfreeMetrics *metrics;
+ /* Unix seconds, or negative when the host provides no clock. */
+ int64_t now;
+} CfreeContext;
+
+CfreeStatus cfree_compiler_new(CfreeTarget, const CfreeContext *,
+ CfreeCompiler **out);
+void cfree_compiler_free(CfreeCompiler *);
+CfreeTarget cfree_compiler_target(CfreeCompiler *);
+
+CfreeHeap *cfree_compiler_heap(CfreeCompiler *);
+const CfreeFileIO *cfree_compiler_file_io(CfreeCompiler *);
+CfreeDiagSink *cfree_compiler_diag_sink(CfreeCompiler *);
+int64_t cfree_compiler_now(CfreeCompiler *);
+CfreeContext cfree_compiler_context(CfreeCompiler *);
+
+const char *cfree_compiler_file_name(CfreeCompiler *, uint32_t file_id);
+
+CfreeSym cfree_sym_intern(CfreeCompiler *, const char *str);
+CfreeSym cfree_sym_intern_len(CfreeCompiler *, const char *str, size_t len);
+const char *cfree_sym_str(CfreeCompiler *, CfreeSym, size_t *len_out);
+
+CfreeStatus cfree_writer_mem(CfreeHeap *, CfreeWriter **out);
+const uint8_t *cfree_writer_mem_bytes(CfreeWriter *, size_t *len_out);
+
+#endif
diff --git a/include/cfree/dbg.h b/include/cfree/dbg.h
@@ -0,0 +1,120 @@
+#ifndef CFREE_DBG_H
+#define CFREE_DBG_H
+
+#include <cfree/arch.h>
+#include <cfree/jit.h>
+
+/*
+ * Controlled in-process JIT execution.
+ *
+ * A session owns the stop/resume state for one JIT image. Hosts provide the
+ * OS hooks through CfreeDbgHost when they need breakpointing, stepping, or
+ * signal-aware memory/register access.
+ */
+
+typedef struct CfreeDbgSignalOps {
+ CfreeStatus (*on_fault)(void *session, int signo, CfreeUnwindFrame *regs);
+} CfreeDbgSignalOps;
+
+typedef struct CfreeDbgOs {
+ CfreeStatus (*thread_start)(void *user, void (*fn)(void *), void *arg,
+ void **thread_out);
+ void (*thread_join)(void *user, void *thread);
+ CfreeStatus (*thread_interrupt)(void *user, void *thread);
+
+ CfreeStatus (*event_new)(void *user, void **event_out);
+ void (*event_free)(void *user, void *ev);
+ CfreeStatus (*event_wait)(void *user, void *ev);
+ CfreeStatus (*event_signal)(void *user, void *ev);
+ CfreeStatus (*event_reset)(void *user, void *ev);
+
+ CfreeStatus (*signals_install)(void *user, const CfreeDbgSignalOps *ops,
+ void *session);
+ void (*signals_uninstall)(void *user);
+ int interrupt_signo;
+
+ CfreeStatus (*code_write_begin)(void *user, void *runtime_addr, size_t n,
+ void **write_out);
+ void (*code_write_end)(void *user, void *runtime_addr, size_t n);
+ void (*flush_icache)(void *user, void *runtime_addr, size_t n);
+
+ CfreeStatus (*guarded_copy)(void *user, void *dst, const void *src,
+ size_t n);
+ void *user;
+} CfreeDbgOs;
+
+typedef struct CfreeDbgHost {
+ const CfreeDbgOs *os;
+} CfreeDbgHost;
+
+typedef enum CfreeStopKind {
+ CFREE_STOP_BREAKPOINT,
+ CFREE_STOP_SIGNAL,
+ CFREE_STOP_EXIT,
+ CFREE_STOP_INTERRUPT,
+} CfreeStopKind;
+
+typedef struct CfreeStopInfo {
+ CfreeStopKind kind;
+ int signal;
+ int exit_code;
+ uint32_t bp_id;
+ CfreeUnwindFrame regs;
+} CfreeStopInfo;
+
+typedef enum CfreeResumeMode {
+ CFREE_RESUME_CONTINUE,
+ CFREE_RESUME_STEP_INSN,
+ CFREE_RESUME_STEP_LINE,
+ CFREE_RESUME_NEXT_LINE,
+ CFREE_RESUME_STEP_OUT,
+} CfreeResumeMode;
+
+typedef enum CfreeEntryKind {
+ CFREE_ENTRY_INT_ARGV,
+ CFREE_ENTRY_U64,
+} CfreeEntryKind;
+
+typedef struct CfreeBreakpointSpec {
+ uint64_t addr;
+ uint64_t skip_count;
+ uint64_t max_hits;
+ int (*condition)(void *user, const CfreeUnwindFrame *regs);
+ void *condition_user;
+} CfreeBreakpointSpec;
+
+CfreeStatus cfree_jit_session_new(CfreeJit *, const CfreeDbgHost *,
+ CfreeJitSession **out);
+void cfree_jit_session_free(CfreeJitSession *);
+CfreeStatus cfree_jit_session_attach_dwarf(CfreeJitSession *,
+ CfreeDebugInfo *);
+
+CfreeStatus cfree_jit_session_call(CfreeJitSession *, void *entry,
+ CfreeEntryKind, int argc, char **argv,
+ CfreeStopInfo *stop_out);
+CfreeStatus cfree_jit_session_call_u64(CfreeJitSession *, void *entry,
+ const uint64_t *args, uint32_t nargs,
+ uint64_t *ret_out,
+ CfreeStopInfo *stop_out);
+CfreeStatus cfree_jit_session_resume(CfreeJitSession *, CfreeResumeMode,
+ CfreeStopInfo *stop_out);
+CfreeStatus cfree_jit_session_interrupt(CfreeJitSession *);
+
+CfreeStatus cfree_jit_session_read_mem(CfreeJitSession *, uint64_t addr,
+ void *dst, size_t n);
+CfreeStatus cfree_jit_session_write_mem(CfreeJitSession *, uint64_t addr,
+ const void *src, size_t n);
+CfreeStatus cfree_jit_session_get_regs(CfreeJitSession *,
+ CfreeUnwindFrame *out);
+CfreeStatus cfree_jit_session_set_regs(CfreeJitSession *,
+ const CfreeUnwindFrame *);
+
+CfreeStatus cfree_jit_session_breakpoint_set(CfreeJitSession *, uint64_t addr,
+ uint32_t *bp_id_out);
+CfreeStatus cfree_jit_session_breakpoint_clear(CfreeJitSession *,
+ uint32_t bp_id);
+CfreeStatus cfree_jit_session_breakpoint_set_spec(CfreeJitSession *,
+ const CfreeBreakpointSpec *,
+ uint32_t *bp_id_out);
+
+#endif
diff --git a/include/cfree/disasm.h b/include/cfree/disasm.h
@@ -0,0 +1,42 @@
+#ifndef CFREE_DISASM_H
+#define CFREE_DISASM_H
+
+#include <cfree/object.h>
+
+/*
+ * Disassembler API.
+ *
+ * The low-level iterator decodes a byte range. Passing an object file lets the
+ * iterator annotate instructions with known symbols/relocations.
+ */
+
+typedef struct CfreeInsn {
+ uint64_t vaddr;
+ const uint8_t *bytes;
+ uint32_t nbytes;
+ const char *mnemonic;
+ const char *operands;
+ const char *annotation;
+} CfreeInsn;
+
+typedef struct CfreeDisasmIter CfreeDisasmIter;
+
+typedef struct CfreeDisasmContext {
+ CfreeTarget target;
+ CfreeContext context;
+} CfreeDisasmContext;
+
+CfreeStatus cfree_disasm_iter_new(const CfreeDisasmContext *,
+ const uint8_t *bytes, size_t len,
+ uint64_t vaddr,
+ const CfreeObjFile *annotations,
+ CfreeDisasmIter **out);
+CfreeIterResult cfree_disasm_iter_next(CfreeDisasmIter *, CfreeInsn *out);
+void cfree_disasm_iter_free(CfreeDisasmIter *);
+
+CfreeStatus cfree_disasm_obj(const CfreeContext *, const CfreeObjFile *,
+ CfreeWriter *out);
+CfreeStatus cfree_disasm_obj_bytes(const CfreeContext *, const CfreeBytes *,
+ CfreeWriter *out);
+
+#endif
diff --git a/include/cfree/dwarf.h b/include/cfree/dwarf.h
@@ -0,0 +1,182 @@
+#ifndef CFREE_DWARF_H
+#define CFREE_DWARF_H
+
+#include <cfree/arch.h>
+#include <cfree/object.h>
+
+/*
+ * DWARF consumer API.
+ *
+ * Addresses are image-relative unless a caller explicitly translates them
+ * through the JIT API first. Memory reads for variable materialization are
+ * provided by the caller so this header does not depend on the JIT session API.
+ */
+
+typedef struct CfreeDwarfType CfreeDwarfType;
+
+CfreeStatus cfree_dwarf_open(const CfreeContext *, const CfreeObjFile *,
+ CfreeDebugInfo **out);
+void cfree_dwarf_free(CfreeDebugInfo *);
+
+CfreeStatus cfree_dwarf_addr_to_line(CfreeDebugInfo *, uint64_t pc,
+ const char **file_out,
+ uint32_t *line_out, uint32_t *col_out);
+CfreeStatus cfree_dwarf_line_to_addr(CfreeDebugInfo *, const char *file,
+ uint32_t line, uint64_t *pc_out);
+
+typedef struct CfreeDwarfLineMatch {
+ uint64_t pc;
+ const char *file;
+} CfreeDwarfLineMatch;
+
+CfreeStatus cfree_dwarf_line_to_addr_all(CfreeDebugInfo *, const char *file,
+ uint32_t line,
+ CfreeDwarfLineMatch *out,
+ uint32_t cap, uint32_t *n_out);
+CfreeStatus cfree_dwarf_func_at(CfreeDebugInfo *, uint64_t pc,
+ const char **name_out, uint64_t *low_pc_out,
+ uint64_t *high_pc_out);
+
+typedef struct CfreeDwarfSubprogram {
+ const char *name;
+ uint64_t low_pc;
+ uint64_t high_pc;
+ const char *decl_file;
+ uint32_t decl_line;
+ const CfreeDwarfType *return_type;
+ uint8_t inlined;
+} CfreeDwarfSubprogram;
+
+CfreeStatus cfree_dwarf_subprogram_at(CfreeDebugInfo *, uint64_t pc,
+ CfreeDwarfSubprogram *out);
+CfreeStatus cfree_dwarf_subprogram_named(CfreeDebugInfo *, const char *name,
+ CfreeDwarfSubprogram *out);
+CfreeStatus cfree_dwarf_unwind_step(CfreeDebugInfo *, CfreeUnwindFrame *);
+
+typedef enum CfreeDwarfTypeKind {
+ CFREE_DT_VOID,
+ CFREE_DT_SINT,
+ CFREE_DT_UINT,
+ CFREE_DT_BOOL,
+ CFREE_DT_FLOAT,
+ CFREE_DT_CHAR,
+ CFREE_DT_PTR,
+ CFREE_DT_ARRAY,
+ CFREE_DT_STRUCT,
+ CFREE_DT_UNION,
+ CFREE_DT_ENUM,
+ CFREE_DT_FUNC,
+ CFREE_DT_TYPEDEF,
+} CfreeDwarfTypeKind;
+
+typedef struct CfreeDwarfTypeInfo {
+ CfreeDwarfTypeKind kind;
+ uint32_t byte_size;
+ const char *name;
+ uint32_t element_count;
+ const CfreeDwarfType *inner;
+} CfreeDwarfTypeInfo;
+
+CfreeDwarfTypeInfo cfree_dwarf_type_info(const CfreeDwarfType *);
+
+typedef struct CfreeDwarfFieldIter CfreeDwarfFieldIter;
+typedef struct CfreeDwarfField {
+ const char *name;
+ uint32_t byte_offset;
+ uint32_t bit_offset;
+ uint32_t bit_size;
+ const CfreeDwarfType *type;
+} CfreeDwarfField;
+
+CfreeStatus cfree_dwarf_field_iter_new(CfreeDebugInfo *,
+ const CfreeDwarfType *,
+ CfreeDwarfFieldIter **out);
+CfreeIterResult cfree_dwarf_field_iter_next(CfreeDwarfFieldIter *,
+ CfreeDwarfField *out);
+void cfree_dwarf_field_iter_free(CfreeDwarfFieldIter *);
+
+typedef struct CfreeDwarfEnumIter CfreeDwarfEnumIter;
+typedef struct CfreeDwarfEnumVal {
+ const char *name;
+ int64_t value;
+} CfreeDwarfEnumVal;
+
+CfreeStatus cfree_dwarf_enum_iter_new(CfreeDebugInfo *,
+ const CfreeDwarfType *,
+ CfreeDwarfEnumIter **out);
+CfreeIterResult cfree_dwarf_enum_iter_next(CfreeDwarfEnumIter *,
+ CfreeDwarfEnumVal *out);
+void cfree_dwarf_enum_iter_free(CfreeDwarfEnumIter *);
+
+typedef enum CfreeDwarfLocKind {
+ CFREE_DLOC_REG,
+ CFREE_DLOC_FRAME_OFS,
+ CFREE_DLOC_GLOBAL,
+ CFREE_DLOC_EXPR,
+} CfreeDwarfLocKind;
+
+typedef struct CfreeDwarfVarLoc {
+ CfreeDwarfLocKind kind;
+ uint32_t byte_size;
+ const CfreeDwarfType *type;
+ union {
+ uint32_t reg;
+ int32_t frame_ofs;
+ uint64_t global;
+ struct {
+ const uint8_t *bytes;
+ size_t len;
+ } expr;
+ } v;
+} CfreeDwarfVarLoc;
+
+typedef CfreeStatus (*CfreeDwarfReadMemFn)(void *user, uint64_t addr,
+ void *dst, size_t n);
+
+CfreeStatus cfree_dwarf_var_at(CfreeDebugInfo *, uint64_t pc,
+ const char *name, CfreeDwarfVarLoc *out);
+CfreeStatus cfree_dwarf_loc_read(CfreeDebugInfo *, const CfreeDwarfVarLoc *,
+ const CfreeUnwindFrame *,
+ CfreeDwarfReadMemFn read_mem, void *read_user,
+ void *dst, size_t cap, size_t *read_out);
+
+typedef enum CfreeDwarfVarRole {
+ CFREE_DVR_LOCAL,
+ CFREE_DVR_ARG,
+ CFREE_DVR_GLOBAL,
+} CfreeDwarfVarRole;
+
+typedef enum CfreeDwarfVarRoleMask {
+ CFREE_DVRM_LOCAL = 1u << CFREE_DVR_LOCAL,
+ CFREE_DVRM_ARG = 1u << CFREE_DVR_ARG,
+ CFREE_DVRM_GLOBAL = 1u << CFREE_DVR_GLOBAL,
+ CFREE_DVRM_ALL = CFREE_DVRM_LOCAL | CFREE_DVRM_ARG | CFREE_DVRM_GLOBAL,
+} CfreeDwarfVarRoleMask;
+
+typedef struct CfreeDwarfVar {
+ const char *name;
+ CfreeDwarfVarRole role;
+ CfreeDwarfVarLoc loc;
+} CfreeDwarfVar;
+
+typedef struct CfreeDwarfVarIter CfreeDwarfVarIter;
+
+CfreeStatus cfree_dwarf_vars_at_new(CfreeDebugInfo *, uint64_t pc,
+ uint32_t role_mask,
+ CfreeDwarfVarIter **out);
+CfreeIterResult cfree_dwarf_vars_at_next(CfreeDwarfVarIter *,
+ CfreeDwarfVar *out);
+void cfree_dwarf_vars_at_free(CfreeDwarfVarIter *);
+
+typedef struct CfreeDwarfParamIter CfreeDwarfParamIter;
+
+CfreeStatus cfree_dwarf_param_iter_new(CfreeDebugInfo *, uint64_t pc,
+ CfreeDwarfParamIter **out);
+CfreeStatus cfree_dwarf_param_iter_new_named(CfreeDebugInfo *,
+ const char *name,
+ CfreeDwarfParamIter **out);
+CfreeIterResult cfree_dwarf_param_iter_next(CfreeDwarfParamIter *,
+ CfreeDwarfVar *out);
+void cfree_dwarf_param_iter_free(CfreeDwarfParamIter *);
+
+#endif
diff --git a/include/cfree/emu.h b/include/cfree/emu.h
@@ -0,0 +1,45 @@
+#ifndef CFREE_EMU_H
+#define CFREE_EMU_H
+
+#include <cfree/core.h>
+
+/*
+ * User-mode guest ELF emulator.
+ *
+ * The emulator translates guest basic blocks through the cfree backend and
+ * executes them in-process. It is intentionally separate from the JIT API:
+ * embedders that only run native JIT code do not need this surface.
+ */
+
+typedef enum CfreeEmuArch {
+ CFREE_EMU_ARCH_AARCH64,
+ CFREE_EMU_ARCH_RISCV64,
+} CfreeEmuArch;
+
+typedef enum CfreeEmuTraceFlag {
+ CFREE_EMU_TRACE_PC = 1u << 0,
+ CFREE_EMU_TRACE_INSN = 1u << 1,
+ CFREE_EMU_TRACE_BLOCK = 1u << 2,
+} CfreeEmuTraceFlag;
+
+typedef uint32_t CfreeEmuTraceFlags;
+
+typedef struct CfreeEmuOptions {
+ CfreeEmuArch guest_arch;
+ const uint8_t *guest_elf_bytes;
+ size_t guest_elf_len;
+ int optimize;
+ CfreeEmuTraceFlags trace;
+ const char *const *argv;
+ const char *const *envp;
+} CfreeEmuOptions;
+
+CfreeStatus cfree_emu_run(CfreeCompiler *, const CfreeEmuOptions *,
+ int *out_exit_code);
+CfreeStatus cfree_emu_new(CfreeCompiler *, const CfreeEmuOptions *,
+ CfreeEmu **out);
+CfreeStatus cfree_emu_step(CfreeEmu *, uint32_t nblocks);
+void *cfree_emu_lookup(CfreeEmu *, uint64_t guest_pc);
+void cfree_emu_free(CfreeEmu *);
+
+#endif
diff --git a/include/cfree/frontend.h b/include/cfree/frontend.h
@@ -1,80 +1,43 @@
#ifndef CFREE_FRONTEND_H
#define CFREE_FRONTEND_H
-#include <cfree.h>
+#include <cfree/cg.h>
+#include <cfree/source.h>
+#include <cfree/support/arena.h>
#include <stdarg.h>
-#include <stddef.h>
#include <stdint.h>
-typedef struct CfreeArena CfreeArena;
-
-/* Arena storage for frontends. The arena is opaque; allocation is bump-style
- * and released wholesale by reset/free. */
-CfreeArena* cfree_arena_new(CfreeHeap*, size_t block_size);
-void cfree_arena_free(CfreeArena*);
-void cfree_arena_reset(CfreeArena*);
-void* cfree_arena_alloc(CfreeArena*, size_t size, size_t align);
-void* cfree_arena_zalloc(CfreeArena*, size_t size, size_t align);
-char* cfree_arena_strdup(CfreeArena*, const char* s, size_t len);
-
-#define cfree_arena_new_obj(a, T) \
- ((T*)cfree_arena_alloc((a), sizeof(T), _Alignof(T)))
-#define cfree_arena_znew_obj(a, T) \
- ((T*)cfree_arena_zalloc((a), sizeof(T), _Alignof(T)))
-#define cfree_arena_array(a, T, n) \
- ((T*)cfree_arena_alloc((a), sizeof(T) * (size_t)(n), _Alignof(T)))
-#define cfree_arena_zarray(a, T, n) \
- ((T*)cfree_arena_zalloc((a), sizeof(T) * (size_t)(n), _Alignof(T)))
-
-/* Compiler-attached host services. These expose the vtables already present
- * in CfreeEnv without exposing CfreeCompiler's internal layout. */
-CfreeHeap* cfree_compiler_heap(CfreeCompiler*);
-const CfreeFileIO* cfree_compiler_file_io(CfreeCompiler*);
-int64_t cfree_compiler_now(CfreeCompiler*);
-
-/* Symbol pool helpers. cfree_sym_intern is the c-string convenience in
- * <cfree.h>; the length-taking form is needed by lexers and preprocessors. */
-CfreeSym cfree_sym_intern_len(CfreeCompiler*, const char* str, size_t len);
-const char* cfree_sym_str(CfreeCompiler*, CfreeSym, size_t* len_out);
-
-/* Source-file registration and include-edge recording. */
-uint32_t cfree_source_add_file(CfreeCompiler*, const char* path,
- int system_header);
-uint32_t cfree_source_add_memory(CfreeCompiler*, const char* name);
-uint32_t cfree_source_add_builtin(CfreeCompiler*, const char* name);
-void cfree_source_add_include(CfreeCompiler*, uint32_t includer_file_id,
- uint32_t included_file_id, CfreeSrcLoc loc,
- int system);
-
-typedef struct CfreeSourceFile {
- uint32_t id;
- CfreeSym name;
- CfreeSym path;
- uint8_t kind;
- uint8_t system_header;
- uint16_t pad;
-} CfreeSourceFile;
-
-int cfree_source_file(CfreeCompiler*, uint32_t file_id, CfreeSourceFile* out);
-
-/* Frontend panic boundary. Frontend entry points called by cfree_compile_obj*
- * already run under libcfree's top-level boundary; standalone frontend helpers
- * such as preprocess/token-dump can use cfree_frontend_run to get the same
- * behavior without seeing jmp_buf or CfreeCompiler internals. */
-typedef int (*CfreeFrontendRunFn)(CfreeCompiler*, void* user);
-int cfree_frontend_run(CfreeCompiler*, CfreeFrontendRunFn, void* user);
+/*
+ * Language frontend convenience API.
+ *
+ * This header is the intended one-stop include for source language
+ * implementations. It includes:
+ * - cfree/cg.h for code emission
+ * - cfree/source.h for source file ids and include-edge recording
+ * - cfree/support/arena.h for short-lived frontend allocation
+ *
+ * The declarations below are the frontend execution boundary and host-service
+ * shims that do not belong to codegen, source registry, or allocation.
+ *
+ * Frontend entry points called by cfree_compile_* already run under this
+ * boundary. Standalone frontend helpers such as preprocess/token-dump can use
+ * cfree_frontend_run to get the same behavior without exposing libcfree's
+ * internal panic machinery.
+ */
+typedef CfreeStatus (*CfreeFrontendRunFn)(CfreeCompiler *, void *user);
+CfreeStatus cfree_frontend_run(CfreeCompiler *, CfreeFrontendRunFn, void *user);
/* Optional metrics bridge for frontends. These are no-ops unless the host
- * supplied CfreeEnv.metrics. Frontends use this public shim instead of
+ * supplied CfreeContext.metrics. Frontends use this public shim instead of
* depending on libcfree's internal core headers. */
-void cfree_frontend_metrics_scope_begin(CfreeCompiler*, const char* name);
-void cfree_frontend_metrics_scope_end(CfreeCompiler*, const char* name);
-void cfree_frontend_metrics_count(CfreeCompiler*, const char* name,
+void cfree_frontend_metrics_scope_begin(CfreeCompiler *, const char *name);
+void cfree_frontend_metrics_scope_end(CfreeCompiler *, const char *name);
+void cfree_frontend_metrics_count(CfreeCompiler *, const char *name,
uint64_t value);
-_Noreturn void cfree_frontend_fatal(CfreeCompiler*, CfreeSrcLoc,
- const char* fmt, ...);
-_Noreturn void cfree_frontend_vfatal(CfreeCompiler*, CfreeSrcLoc,
- const char* fmt, va_list);
+_Noreturn void cfree_frontend_fatal(CfreeCompiler *, CfreeSrcLoc,
+ const char *fmt, ...);
+_Noreturn void cfree_frontend_vfatal(CfreeCompiler *, CfreeSrcLoc,
+ const char *fmt, va_list);
#endif
diff --git a/include/cfree/hashmap.h b/include/cfree/hashmap.h
@@ -1,170 +0,0 @@
-#ifndef CFREE_HASHMAP_H
-#define CFREE_HASHMAP_H
-
-#include <cfree.h>
-#include <stdint.h>
-#include <string.h>
-
-static inline uint32_t cfree_hash_u32(uint32_t x) {
- x += 0x9e3779b9u;
- x ^= x >> 16;
- x *= 0x7feb352du;
- x ^= x >> 15;
- x *= 0x846ca68bu;
- x ^= x >> 16;
- return x;
-}
-
-static inline uint32_t cfree_hash_u64(uint64_t x) {
- x ^= x >> 33;
- x *= 0xff51afd7ed558ccdULL;
- x ^= x >> 33;
- x *= 0xc4ceb9fe1a85ec53ULL;
- x ^= x >> 33;
- return (uint32_t)x;
-}
-
-#define CFREE_HASHMAP_LOAD_NUM 3u
-#define CFREE_HASHMAP_LOAD_DEN 4u
-#define CFREE_HASHMAP_INIT_CAP 16u
-
-#define CFREE_HASHMAP_DEFINE(NAME, KT, VT, HASH_FN) \
- typedef struct NAME##Slot { \
- KT k; \
- VT v; \
- } NAME##Slot; \
- typedef struct NAME { \
- CfreeHeap* heap; \
- NAME##Slot* slots; \
- uint32_t cap; \
- uint32_t used; \
- } NAME; \
- \
- __attribute__((unused)) static void NAME##_resize(NAME* m, \
- uint32_t new_cap) { \
- NAME##Slot* fresh; \
- uint32_t i, mask; \
- fresh = (NAME##Slot*)m->heap->alloc(m->heap, sizeof(*fresh) * new_cap, \
- _Alignof(NAME##Slot)); \
- if (!fresh) return; \
- memset(fresh, 0, sizeof(*fresh) * new_cap); \
- mask = new_cap - 1u; \
- for (i = 0; i < m->cap; ++i) { \
- KT k = m->slots[i].k; \
- uint32_t j; \
- if (!(k)) continue; \
- j = HASH_FN(k) & mask; \
- while (fresh[j].k) j = (j + 1u) & mask; \
- fresh[j] = m->slots[i]; \
- } \
- if (m->slots) \
- m->heap->free(m->heap, m->slots, sizeof(*m->slots) * m->cap); \
- m->slots = fresh; \
- m->cap = new_cap; \
- } \
- \
- __attribute__((unused)) static inline void NAME##_init_cap( \
- NAME* m, CfreeHeap* h, uint32_t cap) { \
- m->heap = h; \
- m->slots = NULL; \
- m->cap = 0; \
- m->used = 0; \
- if (cap) NAME##_resize(m, cap); \
- } \
- \
- __attribute__((unused)) static inline void NAME##_init(NAME* m, \
- CfreeHeap* h) { \
- NAME##_init_cap(m, h, CFREE_HASHMAP_INIT_CAP); \
- } \
- \
- __attribute__((unused)) static inline void NAME##_fini(NAME* m) { \
- if (m->slots) \
- m->heap->free(m->heap, m->slots, sizeof(*m->slots) * m->cap); \
- m->slots = NULL; \
- m->cap = m->used = 0; \
- } \
- \
- __attribute__((unused)) static inline VT* NAME##_get(const NAME* m, KT k) { \
- uint32_t mask, j; \
- if (m->cap == 0 || !(k)) return NULL; \
- mask = m->cap - 1u; \
- j = HASH_FN(k) & mask; \
- while (m->slots[j].k) { \
- if (m->slots[j].k == (k)) return &m->slots[j].v; \
- j = (j + 1u) & mask; \
- } \
- return NULL; \
- } \
- \
- __attribute__((unused)) static inline int NAME##_set(NAME* m, KT k, VT v) { \
- uint32_t mask, j; \
- if (m->cap == 0 || \
- m->used * CFREE_HASHMAP_LOAD_DEN >= m->cap * CFREE_HASHMAP_LOAD_NUM) \
- NAME##_resize(m, m->cap ? m->cap * 2u : CFREE_HASHMAP_INIT_CAP); \
- mask = m->cap - 1u; \
- j = HASH_FN(k) & mask; \
- while (m->slots[j].k) { \
- if (m->slots[j].k == (k)) { \
- m->slots[j].v = (v); \
- return 0; \
- } \
- j = (j + 1u) & mask; \
- } \
- m->slots[j].k = (k); \
- m->slots[j].v = (v); \
- m->used++; \
- return 1; \
- } \
- \
- __attribute__((unused)) static inline int NAME##_try_insert( \
- NAME* m, KT k, VT v, VT* existing_out) { \
- uint32_t mask, j; \
- if (m->cap == 0 || \
- m->used * CFREE_HASHMAP_LOAD_DEN >= m->cap * CFREE_HASHMAP_LOAD_NUM) \
- NAME##_resize(m, m->cap ? m->cap * 2u : CFREE_HASHMAP_INIT_CAP); \
- mask = m->cap - 1u; \
- j = HASH_FN(k) & mask; \
- while (m->slots[j].k) { \
- if (m->slots[j].k == (k)) { \
- if (existing_out) *existing_out = m->slots[j].v; \
- return 0; \
- } \
- j = (j + 1u) & mask; \
- } \
- m->slots[j].k = (k); \
- m->slots[j].v = (v); \
- m->used++; \
- return 1; \
- } \
- \
- __attribute__((unused)) static inline void NAME##_del(NAME* m, KT k) { \
- uint32_t mask, j; \
- if (m->cap == 0 || !(k)) return; \
- mask = m->cap - 1u; \
- j = HASH_FN(k) & mask; \
- while (m->slots[j].k) { \
- if (m->slots[j].k == (k)) { \
- uint32_t i = (j + 1u) & mask; \
- m->slots[j].k = 0; \
- m->used--; \
- while (m->slots[i].k) { \
- KT rk = m->slots[i].k; \
- VT rv = m->slots[i].v; \
- uint32_t nh; \
- m->slots[i].k = 0; \
- m->used--; \
- nh = HASH_FN(rk) & mask; \
- while (m->slots[nh].k) nh = (nh + 1u) & mask; \
- m->slots[nh].k = rk; \
- m->slots[nh].v = rv; \
- m->used++; \
- i = (i + 1u) & mask; \
- } \
- return; \
- } \
- j = (j + 1u) & mask; \
- } \
- } \
- struct NAME
-
-#endif
diff --git a/include/cfree/jit.h b/include/cfree/jit.h
@@ -0,0 +1,76 @@
+#ifndef CFREE_JIT_H
+#define CFREE_JIT_H
+
+#include <cfree/arch.h>
+#include <cfree/object.h>
+#include <cfree/objbuild.h>
+
+/*
+ * JIT image API.
+ *
+ * Linker-produced JIT images own mapped pages, resolved symbols, and a
+ * read-only object view for inspection/debug consumers.
+ */
+
+enum {
+ CFREE_PROT_NONE = 0,
+ CFREE_PROT_READ = 1 << 0,
+ CFREE_PROT_WRITE = 1 << 1,
+ CFREE_PROT_EXEC = 1 << 2,
+};
+
+typedef struct CfreeExecMemRegion {
+ void *write;
+ void *runtime;
+ size_t size;
+ void *token;
+} CfreeExecMemRegion;
+
+typedef struct CfreeExecMem {
+ size_t page_size;
+ CfreeStatus (*reserve)(void *user, size_t size, int prot,
+ CfreeExecMemRegion *out);
+ CfreeStatus (*protect)(void *user, void *addr, size_t size, int prot);
+ void (*release)(void *user, CfreeExecMemRegion *region);
+ void (*flush_icache)(void *user, void *addr, size_t size);
+ void *user;
+} CfreeExecMem;
+
+typedef struct CfreeJitTls {
+ void *(*ctx_new)(void *user, const void *init_bytes, size_t image_filesz,
+ size_t image_size, size_t align);
+ void (*ctx_destroy)(void *user, void *ctx);
+ void *user;
+} CfreeJitTls;
+
+typedef struct CfreeJitHost {
+ const CfreeExecMem *execmem;
+ const CfreeJitTls *tls;
+} CfreeJitHost;
+
+void cfree_jit_free(CfreeJit *);
+void *cfree_jit_lookup(CfreeJit *, const char *name);
+CfreeStatus cfree_jit_append_obj(CfreeJit *, CfreeObjBuilder *);
+uint64_t cfree_jit_generation(CfreeJit *);
+void cfree_jit_run_dtors(CfreeJit *);
+
+const CfreeObjFile *cfree_jit_view(CfreeJit *);
+CfreeStatus cfree_jit_addr_to_sym(CfreeJit *, uint64_t addr,
+ const char **name_out, uint64_t *off_out);
+uint64_t cfree_jit_runtime_to_image(CfreeJit *, uint64_t runtime_pc);
+uint64_t cfree_jit_image_to_runtime(CfreeJit *, uint64_t image_vaddr);
+
+typedef struct CfreeJitSymIter CfreeJitSymIter;
+
+typedef struct CfreeJitSym {
+ const char *name;
+ uint64_t addr;
+ uint64_t size;
+ CfreeSymKind kind;
+} CfreeJitSym;
+
+CfreeStatus cfree_jit_sym_iter_new(CfreeJit *, CfreeJitSymIter **out);
+CfreeIterResult cfree_jit_sym_iter_next(CfreeJitSymIter *, CfreeJitSym *out);
+void cfree_jit_sym_iter_free(CfreeJitSymIter *);
+
+#endif
diff --git a/include/cfree/link.h b/include/cfree/link.h
@@ -0,0 +1,184 @@
+#ifndef CFREE_LINK_H
+#define CFREE_LINK_H
+
+#include <cfree/core.h>
+#include <cfree/objbuild.h>
+
+/*
+ * Linker API.
+ *
+ * Inputs are explicit byte/object arrays. Path lookup, option parsing, and
+ * response-file handling are driver responsibilities.
+ */
+
+typedef void *(*CfreeExternResolver)(void *user, const char *name);
+typedef struct CfreeJitHost CfreeJitHost;
+
+typedef enum CfreeBuildIdMode {
+ CFREE_BUILDID_NONE,
+ CFREE_BUILDID_SHA256,
+ CFREE_BUILDID_UUID,
+ CFREE_BUILDID_USER,
+} CfreeBuildIdMode;
+
+typedef struct CfreeLinkExpr CfreeLinkExpr;
+
+typedef enum CfreeLinkExprKind {
+ CFREE_LE_INT,
+ CFREE_LE_DOT,
+ CFREE_LE_SYM,
+ CFREE_LE_REGION_ORIGIN,
+ CFREE_LE_REGION_LENGTH,
+ CFREE_LE_ADD,
+ CFREE_LE_SUB,
+ CFREE_LE_MUL,
+ CFREE_LE_DIV,
+ CFREE_LE_AND,
+ CFREE_LE_OR,
+ CFREE_LE_XOR,
+ CFREE_LE_SHL,
+ CFREE_LE_SHR,
+ CFREE_LE_ALIGN,
+ CFREE_LE_MAX,
+ CFREE_LE_MIN,
+} CfreeLinkExprKind;
+
+struct CfreeLinkExpr {
+ uint8_t kind; /* CfreeLinkExprKind */
+ union {
+ int64_t int_val;
+ const char *name;
+ struct {
+ const CfreeLinkExpr *lhs;
+ const CfreeLinkExpr *rhs;
+ } bin;
+ struct {
+ const CfreeLinkExpr *val;
+ const CfreeLinkExpr *align;
+ } align;
+ } v;
+};
+
+typedef enum CfreeLinkRegionFlag {
+ CFREE_LRF_R = 1u << 0,
+ CFREE_LRF_W = 1u << 1,
+ CFREE_LRF_X = 1u << 2,
+} CfreeLinkRegionFlag;
+
+typedef struct CfreeLinkRegion {
+ const char *name;
+ uint8_t flags; /* CfreeLinkRegionFlag */
+ uint64_t origin;
+ uint64_t length;
+} CfreeLinkRegion;
+
+typedef struct CfreeLinkInputMatch {
+ const char *file_pattern; /* NULL means "*" */
+ const char *section_pattern;
+ int keep;
+} CfreeLinkInputMatch;
+
+typedef enum CfreeLinkAsnKind {
+ CFREE_LAS_DOT,
+ CFREE_LAS_SYM,
+ CFREE_LAS_PROVIDE,
+} CfreeLinkAsnKind;
+
+typedef struct CfreeLinkAssignment {
+ uint8_t kind; /* CfreeLinkAsnKind */
+ const char *sym;
+ const CfreeLinkExpr *expr;
+} CfreeLinkAssignment;
+
+typedef struct CfreeLinkOutputSection {
+ const char *name;
+ const CfreeLinkExpr *vma;
+ const CfreeLinkExpr *lma;
+ const CfreeLinkInputMatch *inputs;
+ uint32_t ninputs;
+ const char *region;
+ const char *load_region;
+ const CfreeLinkAssignment *asns;
+ uint32_t nasns;
+} CfreeLinkOutputSection;
+
+typedef struct CfreeLinkScript {
+ const char *entry;
+ const CfreeLinkRegion *regions;
+ uint32_t nregions;
+ const CfreeLinkOutputSection *sections;
+ uint32_t nsections;
+ const CfreeLinkAssignment *top_asns;
+ uint32_t ntop_asns;
+} CfreeLinkScript;
+
+CfreeStatus cfree_link_script_parse(const CfreeContext *, const char *text,
+ size_t len, CfreeLinkScript **out);
+void cfree_link_script_free(const CfreeContext *, CfreeLinkScript *);
+
+typedef enum CfreeLinkMode {
+ CFREE_LM_DEFAULT,
+ CFREE_LM_STATIC,
+ CFREE_LM_DYNAMIC,
+ CFREE_LM_AS_NEEDED,
+} CfreeLinkMode;
+
+typedef struct CfreeLinkArchiveInput {
+ CfreeBytes bytes;
+ uint8_t link_mode; /* CfreeLinkMode */
+ uint8_t whole_archive;
+ uint8_t group_id;
+ uint8_t pad;
+} CfreeLinkArchiveInput;
+
+typedef struct CfreeLinkInputs {
+ CfreeObjBuilder *const *objs;
+ uint32_t nobjs;
+ const CfreeBytes *obj_bytes;
+ uint32_t nobj_bytes;
+ const CfreeLinkArchiveInput *archives;
+ uint32_t narchives;
+ const CfreeBytes *dso_bytes;
+ uint32_t ndso_bytes;
+ const CfreeLinkScript *linker_script;
+ const char *entry;
+ uint8_t build_id_mode; /* CfreeBuildIdMode */
+ const uint8_t *build_id_bytes;
+ uint32_t build_id_len;
+} CfreeLinkInputs;
+
+typedef struct CfreeExeLinkOptions {
+ CfreeLinkInputs inputs;
+ int gc_sections;
+ int pie;
+ const char *interp_path;
+} CfreeExeLinkOptions;
+
+typedef struct CfreeSharedLinkOptions {
+ CfreeLinkInputs inputs;
+ const char *soname;
+ const char *const *rpaths;
+ uint32_t nrpaths;
+ const char *const *runpaths;
+ uint32_t nrunpaths;
+ const char *const *exports;
+ uint32_t nexports;
+ int allow_undefined;
+ int gc_sections;
+} CfreeSharedLinkOptions;
+
+typedef struct CfreeJitLinkOptions {
+ CfreeLinkInputs inputs;
+ int gc_sections;
+ CfreeExternResolver extern_resolver;
+ void *extern_resolver_user;
+} CfreeJitLinkOptions;
+
+CfreeStatus cfree_link_exe(CfreeCompiler *, const CfreeExeLinkOptions *,
+ CfreeWriter *out);
+CfreeStatus cfree_link_shared(CfreeCompiler *, const CfreeSharedLinkOptions *,
+ CfreeWriter *out);
+CfreeStatus cfree_link_jit(CfreeCompiler *, const CfreeJitLinkOptions *,
+ const CfreeJitHost *, CfreeJit **out_jit);
+
+#endif
diff --git a/include/cfree/objbuild.h b/include/cfree/objbuild.h
@@ -0,0 +1,82 @@
+#ifndef CFREE_OBJBUILD_H
+#define CFREE_OBJBUILD_H
+
+#include <cfree/objmodel.h>
+
+/*
+ * Format-neutral relocatable object builder.
+ *
+ * This is for embedders that want to synthesize object files directly without
+ * going through the CG API. It is intentionally lower level than cfree/cg.h:
+ * callers choose sections, symbol bindings, raw bytes, and target relocation
+ * kinds themselves.
+ */
+
+typedef struct CfreeObjSectionDesc {
+ CfreeSym name;
+ CfreeSecKind kind;
+ uint32_t flags; /* CfreeSecFlag */
+ uint32_t align; /* 0 means default; otherwise power of two */
+ uint32_t entsize; /* 0 means none */
+} CfreeObjSectionDesc;
+
+typedef struct CfreeObjSymbolDesc {
+ CfreeSym name;
+ CfreeSymBind bind;
+ CfreeSymKind kind;
+ CfreeObjSection section; /* CFREE_SECTION_NONE for undef/abs/common */
+ uint64_t value;
+ uint64_t size;
+} CfreeObjSymbolDesc;
+
+typedef struct CfreeObjRelocDesc {
+ CfreeObjSection section;
+ uint64_t offset;
+ CfreeRelocKind kind;
+ CfreeObjSymbol symbol;
+ int64_t addend;
+} CfreeObjRelocDesc;
+
+CfreeStatus cfree_obj_builder_new(CfreeCompiler *, CfreeObjBuilder **out);
+void cfree_obj_builder_free(CfreeObjBuilder *);
+
+CfreeStatus cfree_obj_builder_section(CfreeObjBuilder *,
+ const CfreeObjSectionDesc *,
+ CfreeObjSection *out);
+CfreeStatus cfree_obj_builder_section_group(CfreeObjBuilder *, CfreeObjSection,
+ CfreeObjGroup);
+
+CfreeStatus cfree_obj_builder_pos(CfreeObjBuilder *, CfreeObjSection,
+ uint64_t *out);
+CfreeStatus cfree_obj_builder_align(CfreeObjBuilder *, CfreeObjSection,
+ uint32_t align, uint64_t *new_pos_out);
+CfreeStatus cfree_obj_builder_write(CfreeObjBuilder *, CfreeObjSection,
+ const void *data, size_t n);
+CfreeStatus cfree_obj_builder_reserve(CfreeObjBuilder *, CfreeObjSection,
+ size_t n, void **out);
+CfreeStatus cfree_obj_builder_reserve_bss(CfreeObjBuilder *, CfreeObjSection,
+ uint64_t size, uint32_t align);
+CfreeStatus cfree_obj_builder_patch(CfreeObjBuilder *, CfreeObjSection,
+ uint64_t offset, const void *data,
+ size_t n);
+
+CfreeStatus cfree_obj_builder_symbol(CfreeObjBuilder *,
+ const CfreeObjSymbolDesc *,
+ CfreeObjSymbol *out);
+CfreeStatus cfree_obj_builder_symbol_define(CfreeObjBuilder *, CfreeObjSymbol,
+ CfreeObjSection, uint64_t value,
+ uint64_t size);
+CfreeStatus cfree_obj_builder_reloc(CfreeObjBuilder *,
+ const CfreeObjRelocDesc *);
+
+CfreeStatus cfree_obj_builder_group(CfreeObjBuilder *, CfreeSym name,
+ CfreeObjSymbol signature, uint32_t flags,
+ CfreeObjGroup *out);
+CfreeStatus cfree_obj_builder_group_add_section(CfreeObjBuilder *,
+ CfreeObjGroup,
+ CfreeObjSection);
+
+CfreeStatus cfree_obj_builder_finalize(CfreeObjBuilder *);
+CfreeStatus cfree_obj_builder_emit(CfreeObjBuilder *, CfreeWriter *);
+
+#endif
diff --git a/include/cfree/object.h b/include/cfree/object.h
@@ -0,0 +1,59 @@
+#ifndef CFREE_OBJECT_H
+#define CFREE_OBJECT_H
+
+#include <cfree/objmodel.h>
+
+/*
+ * Object-file detection and read-only inspection.
+ *
+ * Object readers keep the caller's byte storage borrowed for the lifetime of
+ * CfreeObjFile. Strings returned by iterators are owned by the object file and
+ * remain valid until cfree_obj_free.
+ */
+
+typedef enum CfreeBinFmt {
+ CFREE_BIN_UNKNOWN = 0,
+ CFREE_BIN_AR,
+ CFREE_BIN_ELF,
+ CFREE_BIN_COFF,
+ CFREE_BIN_PE,
+ CFREE_BIN_MACHO,
+ CFREE_BIN_WASM,
+} CfreeBinFmt;
+
+typedef struct CfreeObjSymIter CfreeObjSymIter;
+typedef struct CfreeObjRelocIter CfreeObjRelocIter;
+
+CfreeBinFmt cfree_detect_fmt(const uint8_t *data, size_t len);
+CfreeStatus cfree_detect_target(const uint8_t *data, size_t len,
+ CfreeTarget *out);
+
+CfreeStatus cfree_obj_open(const CfreeContext *, const CfreeBytes *,
+ CfreeObjFile **out);
+void cfree_obj_free(CfreeObjFile *);
+
+CfreeObjFmt cfree_obj_fmt(const CfreeObjFile *);
+CfreeTarget cfree_obj_target(const CfreeObjFile *);
+
+uint32_t cfree_obj_nsections(const CfreeObjFile *);
+CfreeStatus cfree_obj_section(const CfreeObjFile *, CfreeObjSection idx,
+ CfreeObjSecInfo *out);
+CfreeStatus cfree_obj_section_data(const CfreeObjFile *, CfreeObjSection idx,
+ const uint8_t **data_out, size_t *len_out);
+CfreeStatus cfree_obj_section_by_name(const CfreeObjFile *, const char *name,
+ CfreeObjSection *out);
+
+CfreeStatus cfree_obj_symbol_by_name(const CfreeObjFile *, const char *name,
+ CfreeObjSymInfo *out);
+
+CfreeStatus cfree_obj_symiter_new(CfreeObjFile *, CfreeObjSymIter **out);
+CfreeIterResult cfree_obj_symiter_next(CfreeObjSymIter *,
+ CfreeObjSymInfo *out);
+void cfree_obj_symiter_free(CfreeObjSymIter *);
+
+CfreeStatus cfree_obj_reliter_new(CfreeObjFile *, CfreeObjRelocIter **out);
+CfreeIterResult cfree_obj_reliter_next(CfreeObjRelocIter *,
+ CfreeObjReloc *out);
+void cfree_obj_reliter_free(CfreeObjRelocIter *);
+
+#endif
diff --git a/include/cfree/objmodel.h b/include/cfree/objmodel.h
@@ -0,0 +1,75 @@
+#ifndef CFREE_OBJMODEL_H
+#define CFREE_OBJMODEL_H
+
+#include <cfree/core.h>
+
+/*
+ * Format-neutral object model shared by object builders, readers, linkers,
+ * disassemblers, and JIT image inspection.
+ */
+
+#define CFREE_SECTION_NONE UINT32_MAX
+#define CFREE_OBJ_SYMBOL_NONE UINT32_MAX
+#define CFREE_OBJ_GROUP_NONE UINT32_MAX
+
+typedef uint32_t CfreeObjSection;
+typedef uint32_t CfreeObjSymbol;
+typedef uint32_t CfreeObjGroup;
+
+typedef enum CfreeSecKind {
+ CFREE_SEC_TEXT,
+ CFREE_SEC_RODATA,
+ CFREE_SEC_DATA,
+ CFREE_SEC_BSS,
+ CFREE_SEC_DEBUG,
+ CFREE_SEC_OTHER,
+} CfreeSecKind;
+
+typedef enum CfreeSecFlag {
+ CFREE_SF_EXEC = 1u << 0,
+ CFREE_SF_WRITE = 1u << 1,
+ CFREE_SF_ALLOC = 1u << 2,
+ CFREE_SF_TLS = 1u << 3,
+ CFREE_SF_MERGE = 1u << 4,
+ CFREE_SF_STRINGS = 1u << 5,
+} CfreeSecFlag;
+
+typedef enum CfreeObjGroupFlag {
+ CFREE_OBJ_GROUP_COMDAT = 1u << 0,
+} CfreeObjGroupFlag;
+
+typedef struct CfreeRelocKind {
+ CfreeArchKind arch;
+ CfreeObjFmt obj_fmt;
+ uint32_t code;
+} CfreeRelocKind;
+
+typedef struct CfreeObjSecInfo {
+ const char *name;
+ CfreeSecKind kind;
+ uint32_t flags; /* CfreeSecFlag */
+ uint64_t size; /* bytes; BSS uses virtual size */
+ uint32_t align; /* power of two; 1 means no special alignment */
+ uint32_t entsize; /* section entry size, or 0 */
+} CfreeObjSecInfo;
+
+typedef struct CfreeObjSymInfo {
+ const char *name;
+ CfreeSymBind bind;
+ CfreeSymKind kind;
+ CfreeObjSection section;
+ uint64_t value;
+ uint64_t size;
+} CfreeObjSymInfo;
+
+typedef struct CfreeObjReloc {
+ CfreeObjSection section;
+ uint64_t offset;
+ CfreeObjSymbol sym;
+ const char *sym_name;
+ int64_t addend;
+ CfreeRelocKind kind;
+ const char *kind_name; /* diagnostic spelling, when known */
+} CfreeObjReloc;
+
+#endif
diff --git a/include/cfree/source.h b/include/cfree/source.h
@@ -0,0 +1,36 @@
+#ifndef CFREE_SOURCE_H
+#define CFREE_SOURCE_H
+
+#include <cfree/core.h>
+
+/*
+ * Compiler source registry.
+ *
+ * Language frontends use this to assign stable file ids to physical files,
+ * in-memory inputs, and builtin pseudo-files, then record include edges for
+ * dependency reporting and diagnostics.
+ */
+
+CfreeStatus cfree_source_add_file(CfreeCompiler *, const char *path,
+ int system_header, uint32_t *file_id_out);
+CfreeStatus cfree_source_add_memory(CfreeCompiler *, const char *name,
+ uint32_t *file_id_out);
+CfreeStatus cfree_source_add_builtin(CfreeCompiler *, const char *name,
+ uint32_t *file_id_out);
+CfreeStatus cfree_source_add_include(CfreeCompiler *, uint32_t includer_file_id,
+ uint32_t included_file_id,
+ CfreeSrcLoc loc, int system);
+
+typedef struct CfreeSourceFile {
+ uint32_t id;
+ CfreeSym name;
+ CfreeSym path;
+ uint8_t kind;
+ uint8_t system_header;
+ uint16_t pad;
+} CfreeSourceFile;
+
+CfreeStatus cfree_source_file(CfreeCompiler *, uint32_t file_id,
+ CfreeSourceFile *out);
+
+#endif
diff --git a/include/cfree/support/arena.h b/include/cfree/support/arena.h
@@ -0,0 +1,30 @@
+#ifndef CFREE_SUPPORT_ARENA_H
+#define CFREE_SUPPORT_ARENA_H
+
+#include <cfree/core.h>
+
+/*
+ * Opaque bump allocator for frontends and other short-lived public helpers.
+ * Individual allocations are not freed; reset/free releases arena storage in
+ * bulk.
+ */
+
+typedef struct CfreeArena CfreeArena;
+
+CfreeStatus cfree_arena_new(CfreeHeap *, size_t block_size, CfreeArena **out);
+void cfree_arena_free(CfreeArena *);
+void cfree_arena_reset(CfreeArena *);
+void *cfree_arena_alloc(CfreeArena *, size_t size, size_t align);
+void *cfree_arena_zalloc(CfreeArena *, size_t size, size_t align);
+char *cfree_arena_strdup(CfreeArena *, const char *s, size_t len);
+
+#define cfree_arena_new_obj(a, T) \
+ ((T *)cfree_arena_alloc((a), sizeof(T), _Alignof(T)))
+#define cfree_arena_znew_obj(a, T) \
+ ((T *)cfree_arena_zalloc((a), sizeof(T), _Alignof(T)))
+#define cfree_arena_array(a, T, n) \
+ ((T *)cfree_arena_alloc((a), sizeof(T) * (size_t)(n), _Alignof(T)))
+#define cfree_arena_zarray(a, T, n) \
+ ((T *)cfree_arena_zalloc((a), sizeof(T) * (size_t)(n), _Alignof(T)))
+
+#endif
diff --git a/include/cfree/support/hashmap.h b/include/cfree/support/hashmap.h
@@ -0,0 +1,176 @@
+#ifndef CFREE_SUPPORT_HASHMAP_H
+#define CFREE_SUPPORT_HASHMAP_H
+
+#include <cfree/core.h>
+#include <stdint.h>
+#include <string.h>
+
+static inline uint32_t cfree_hash_u32(uint32_t x) {
+ x += 0x9e3779b9u;
+ x ^= x >> 16;
+ x *= 0x7feb352du;
+ x ^= x >> 15;
+ x *= 0x846ca68bu;
+ x ^= x >> 16;
+ return x;
+}
+
+static inline uint32_t cfree_hash_u64(uint64_t x) {
+ x ^= x >> 33;
+ x *= 0xff51afd7ed558ccdULL;
+ x ^= x >> 33;
+ x *= 0xc4ceb9fe1a85ec53ULL;
+ x ^= x >> 33;
+ return (uint32_t)x;
+}
+
+#define CFREE_HASHMAP_LOAD_NUM 3u
+#define CFREE_HASHMAP_LOAD_DEN 4u
+#define CFREE_HASHMAP_INIT_CAP 16u
+
+#if defined(__GNUC__) || defined(__clang__)
+#define CFREE_HASHMAP_UNUSED __attribute__((unused))
+#else
+#define CFREE_HASHMAP_UNUSED
+#endif
+
+#define CFREE_HASHMAP_DEFINE(NAME, KT, VT, HASH_FN) \
+ typedef struct NAME##Slot { \
+ KT k; \
+ VT v; \
+ } NAME##Slot; \
+ typedef struct NAME { \
+ CfreeHeap* heap; \
+ NAME##Slot* slots; \
+ uint32_t cap; \
+ uint32_t used; \
+ } NAME; \
+ \
+ CFREE_HASHMAP_UNUSED static void NAME##_resize(NAME* m, \
+ uint32_t new_cap) { \
+ NAME##Slot* fresh; \
+ uint32_t i, mask; \
+ fresh = (NAME##Slot*)m->heap->alloc(m->heap, sizeof(*fresh) * new_cap, \
+ _Alignof(NAME##Slot)); \
+ if (!fresh) return; \
+ memset(fresh, 0, sizeof(*fresh) * new_cap); \
+ mask = new_cap - 1u; \
+ for (i = 0; i < m->cap; ++i) { \
+ KT k = m->slots[i].k; \
+ uint32_t j; \
+ if (!(k)) continue; \
+ j = HASH_FN(k) & mask; \
+ while (fresh[j].k) j = (j + 1u) & mask; \
+ fresh[j] = m->slots[i]; \
+ } \
+ if (m->slots) \
+ m->heap->free(m->heap, m->slots, sizeof(*m->slots) * m->cap); \
+ m->slots = fresh; \
+ m->cap = new_cap; \
+ } \
+ \
+ CFREE_HASHMAP_UNUSED static inline void NAME##_init_cap( \
+ NAME* m, CfreeHeap* h, uint32_t cap) { \
+ m->heap = h; \
+ m->slots = NULL; \
+ m->cap = 0; \
+ m->used = 0; \
+ if (cap) NAME##_resize(m, cap); \
+ } \
+ \
+ CFREE_HASHMAP_UNUSED static inline void NAME##_init(NAME* m, \
+ CfreeHeap* h) { \
+ NAME##_init_cap(m, h, CFREE_HASHMAP_INIT_CAP); \
+ } \
+ \
+ CFREE_HASHMAP_UNUSED static inline void NAME##_fini(NAME* m) { \
+ if (m->slots) \
+ m->heap->free(m->heap, m->slots, sizeof(*m->slots) * m->cap); \
+ m->slots = NULL; \
+ m->cap = m->used = 0; \
+ } \
+ \
+ CFREE_HASHMAP_UNUSED static inline VT* NAME##_get(const NAME* m, KT k) { \
+ uint32_t mask, j; \
+ if (m->cap == 0 || !(k)) return NULL; \
+ mask = m->cap - 1u; \
+ j = HASH_FN(k) & mask; \
+ while (m->slots[j].k) { \
+ if (m->slots[j].k == (k)) return &m->slots[j].v; \
+ j = (j + 1u) & mask; \
+ } \
+ return NULL; \
+ } \
+ \
+ CFREE_HASHMAP_UNUSED static inline int NAME##_set(NAME* m, KT k, VT v) { \
+ uint32_t mask, j; \
+ if (m->cap == 0 || \
+ m->used * CFREE_HASHMAP_LOAD_DEN >= m->cap * CFREE_HASHMAP_LOAD_NUM) \
+ NAME##_resize(m, m->cap ? m->cap * 2u : CFREE_HASHMAP_INIT_CAP); \
+ mask = m->cap - 1u; \
+ j = HASH_FN(k) & mask; \
+ while (m->slots[j].k) { \
+ if (m->slots[j].k == (k)) { \
+ m->slots[j].v = (v); \
+ return 0; \
+ } \
+ j = (j + 1u) & mask; \
+ } \
+ m->slots[j].k = (k); \
+ m->slots[j].v = (v); \
+ m->used++; \
+ return 1; \
+ } \
+ \
+ CFREE_HASHMAP_UNUSED static inline int NAME##_try_insert( \
+ NAME* m, KT k, VT v, VT* existing_out) { \
+ uint32_t mask, j; \
+ if (m->cap == 0 || \
+ m->used * CFREE_HASHMAP_LOAD_DEN >= m->cap * CFREE_HASHMAP_LOAD_NUM) \
+ NAME##_resize(m, m->cap ? m->cap * 2u : CFREE_HASHMAP_INIT_CAP); \
+ mask = m->cap - 1u; \
+ j = HASH_FN(k) & mask; \
+ while (m->slots[j].k) { \
+ if (m->slots[j].k == (k)) { \
+ if (existing_out) *existing_out = m->slots[j].v; \
+ return 0; \
+ } \
+ j = (j + 1u) & mask; \
+ } \
+ m->slots[j].k = (k); \
+ m->slots[j].v = (v); \
+ m->used++; \
+ return 1; \
+ } \
+ \
+ CFREE_HASHMAP_UNUSED static inline void NAME##_del(NAME* m, KT k) { \
+ uint32_t mask, j; \
+ if (m->cap == 0 || !(k)) return; \
+ mask = m->cap - 1u; \
+ j = HASH_FN(k) & mask; \
+ while (m->slots[j].k) { \
+ if (m->slots[j].k == (k)) { \
+ uint32_t i = (j + 1u) & mask; \
+ m->slots[j].k = 0; \
+ m->used--; \
+ while (m->slots[i].k) { \
+ KT rk = m->slots[i].k; \
+ VT rv = m->slots[i].v; \
+ uint32_t nh; \
+ m->slots[i].k = 0; \
+ m->used--; \
+ nh = HASH_FN(rk) & mask; \
+ while (m->slots[nh].k) nh = (nh + 1u) & mask; \
+ m->slots[nh].k = rk; \
+ m->slots[nh].v = rv; \
+ m->used++; \
+ i = (i + 1u) & mask; \
+ } \
+ return; \
+ } \
+ j = (j + 1u) & mask; \
+ } \
+ } \
+ struct NAME
+
+#endif