kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit f95c8fbb62afcf93138dc8dd542c0e89ef107005
parent cb323097a119ba2428b1eb1457cff882a572a43a
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 19 May 2026 12:44:49 -0700

Public API rewrite

Diffstat:
Dinclude/cfree.h | 1763-------------------------------------------------------------------------------
Ainclude/cfree/arch.h | 29+++++++++++++++++++++++++++++
Ainclude/cfree/archive.h | 43+++++++++++++++++++++++++++++++++++++++++++
Minclude/cfree/cg.h | 14++++++++------
Ainclude/cfree/compile.h | 111+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/cfree/core.h | 242+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/cfree/dbg.h | 120+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/cfree/disasm.h | 42++++++++++++++++++++++++++++++++++++++++++
Ainclude/cfree/dwarf.h | 182+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/cfree/emu.h | 45+++++++++++++++++++++++++++++++++++++++++++++
Minclude/cfree/frontend.h | 97+++++++++++++++++++++++++------------------------------------------------------
Dinclude/cfree/hashmap.h | 170-------------------------------------------------------------------------------
Ainclude/cfree/jit.h | 76++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/cfree/link.h | 184+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/cfree/objbuild.h | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/cfree/object.h | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/cfree/objmodel.h | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/cfree/source.h | 36++++++++++++++++++++++++++++++++++++
Ainclude/cfree/support/arena.h | 30++++++++++++++++++++++++++++++
Ainclude/cfree/support/hashmap.h | 176+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
20 files changed, 1570 insertions(+), 2006 deletions(-)

diff --git a/include/cfree.h b/include/cfree.h @@ -1,1763 +0,0 @@ -#ifndef CFREE_H -#define CFREE_H - -/* libcfree's complete public API. The driver and any other consumer of - * libcfree only includes this single header — internal headers live under - * src/ and are not part of the stable surface. - * - * Every public identifier starts with cfree_, Cfree, or CFREE_. */ - -#include <stdarg.h> -#include <stddef.h> -#include <stdint.h> - -/* ============================================================ - * Opaque handles - * ============================================================ */ -typedef struct CfreeCompiler CfreeCompiler; -typedef struct CfreePipeline CfreePipeline; -typedef struct CfreeObjBuilder CfreeObjBuilder; -typedef struct CfreeJit CfreeJit; -typedef struct CfreeJitSession CfreeJitSession; -typedef struct CfreeObjFile CfreeObjFile; -typedef struct CfreeDebugInfo CfreeDebugInfo; -typedef struct CfreeBytesInput CfreeBytesInput; -typedef struct CfreeCompileOptions CfreeCompileOptions; -typedef struct CfreePpOptions CfreePpOptions; -typedef uint32_t CfreeSym; - -/* ============================================================ - * Source locations (carried in diagnostics) - * ============================================================ */ -typedef struct CfreeSrcLoc { - uint32_t file_id; - uint32_t line; - uint32_t col; -} CfreeSrcLoc; - -/* ============================================================ - * Frame snapshot - * ============================================================ - * Canonical register snapshot. Shared by the unwinder (cfree_dwarf_unwind_step) - * and the JIT session's stop notifications (CfreeStopInfo.regs). `pc` and - * `cfa` are the program counter and canonical frame address; `regs` uses the - * target arch's DWARF register numbering. Registers beyond the arch's defined - * range are zero. */ -typedef struct CfreeUnwindFrame { - uint64_t pc; - uint64_t cfa; - uint64_t regs[32]; -} CfreeUnwindFrame; - -/* ============================================================ - * Host-implemented interfaces (vtables) - * ============================================================ - * Heap, DiagSink, and Writer are implemented *outside* libcfree, by the - * host. This keeps libcfree free of stdio, malloc, and POSIX I/O — the - * host provides them. Subclass by placing the struct as the first field - * of an enclosing type and casting; libcfree calls the function pointers - * with the base pointer. */ - -typedef struct CfreeHeap CfreeHeap; -struct CfreeHeap { - void *(*alloc)(CfreeHeap *, size_t size, size_t align); - void *(*realloc)(CfreeHeap *, void *p, size_t old_size, size_t new_size, - size_t align); - void (*free)(CfreeHeap *, void *p, size_t size); - void *user; -}; - -typedef enum CfreeDiagKind { - CFREE_DIAG_NOTE, - CFREE_DIAG_WARN, - CFREE_DIAG_ERROR, - CFREE_DIAG_FATAL, -} CfreeDiagKind; - -typedef struct CfreeDiagSink CfreeDiagSink; -struct CfreeDiagSink { - void (*emit)(CfreeDiagSink *, CfreeDiagKind, CfreeSrcLoc, const char *fmt, - va_list); - void *user; - /* libcfree maintains these counters; hosts may inspect, must not write. */ - uint32_t errors; - uint32_t warnings; -}; - -typedef struct CfreeWriter CfreeWriter; -struct CfreeWriter { - void (*write)(CfreeWriter *, const void *data, size_t n); - void (*seek)(CfreeWriter *, uint64_t offset); - uint64_t (*tell)(CfreeWriter *); - int (*error)(CfreeWriter *); - /* close is responsible for any host-side teardown (closing fds, freeing - * the enclosing struct). After close the pointer is invalid. */ - void (*close)(CfreeWriter *); -}; - -/* ============================================================ - * Target description - * ============================================================ */ -typedef enum CfreeArchKind { - CFREE_ARCH_X86_32, - CFREE_ARCH_X86_64, - CFREE_ARCH_ARM_32, - CFREE_ARCH_ARM_64, - CFREE_ARCH_RV32, - CFREE_ARCH_RV64, - CFREE_ARCH_WASM, -} CfreeArchKind; - -typedef enum CfreeOSKind { - CFREE_OS_FREESTANDING, - CFREE_OS_LINUX, - CFREE_OS_MACOS, - CFREE_OS_WINDOWS, - CFREE_OS_WASI, -} CfreeOSKind; - -typedef enum CfreeObjFmt { - CFREE_OBJ_ELF, - CFREE_OBJ_COFF, - CFREE_OBJ_MACHO, - CFREE_OBJ_WASM, -} CfreeObjFmt; - -typedef enum CfreePic { - CFREE_PIC_NONE, - CFREE_PIC_PIC, - CFREE_PIC_PIE, -} CfreePic; - -/* CFREE_CM_DEFAULT is resolved per-arch (small on x86-64/AArch64, medium on - * RISC-V, etc.). PIC and code-model are independent: -fPIC -mcmodel=small and - * -fPIE -mcmodel=medium are both coherent. */ -typedef enum CfreeCodeModel { - CFREE_CM_DEFAULT, - CFREE_CM_SMALL, - CFREE_CM_MEDIUM, - CFREE_CM_LARGE, -} CfreeCodeModel; - -typedef struct CfreeTarget { - CfreeArchKind arch; - CfreeOSKind os; - CfreeObjFmt obj; - uint8_t ptr_size; /* 4 or 8 */ - uint8_t ptr_align; - uint8_t big_endian; - uint8_t pic; /* CfreePic; default CFREE_PIC_NONE */ - uint8_t code_model; /* CfreeCodeModel; default CFREE_CM_DEFAULT */ -} CfreeTarget; - -/* JIT note: cfree_link_jit and cfree_jit_from_image force pic = CFREE_PIC_PIC - * regardless of caller input — the mmap'd image's address is unknown until - * map time. The override happens at the linker entry, not silently inside the - * backend. */ - -/* ============================================================ - * Symbol classification - * ============================================================ - * Shared by the object inspector and the JIT symbol iterator. Bind - * captures linkage; Kind captures what the symbol points at. */ -typedef enum CfreeSymBind { - CFREE_SB_LOCAL, - CFREE_SB_GLOBAL, - CFREE_SB_WEAK, -} CfreeSymBind; - -typedef enum CfreeSymKind { - CFREE_SK_UNDEF, - CFREE_SK_FUNC, - CFREE_SK_OBJ, - CFREE_SK_SECTION, - CFREE_SK_FILE, - CFREE_SK_COMMON, - CFREE_SK_TLS, - CFREE_SK_ABS, - /* Defined symbol with no specific type (e.g., assembly label or - * AArch64 mapping symbol). Distinct from CFREE_SK_UNDEF, which is - * the "undefined external" sentinel. */ - CFREE_SK_NOTYPE, - /* GNU IFUNC: function with runtime resolver (STT_GNU_IFUNC). */ - CFREE_SK_IFUNC, -} CfreeSymKind; - -/* ============================================================ - * Architecture: register name mapping - * ============================================================ - * DWARF register numbering varies per arch (CfreeUnwindFrame.regs is indexed - * by DWARF register number). These helpers translate between DWARF index and - * canonical assembler name (e.g. "rax", "x0", "a0") so dbg can render - * `info registers` and accept `set $rax = ...` syntax. - * - * Stateless and allocation-free — name strings are static library data. - * `cfree_arch_register_name` returns NULL for an unmapped DWARF index; - * `cfree_arch_register_index` returns 0 on a known name and 1 if the name - * is unknown. To enumerate every register defined for an arch, loop - * 0..cfree_arch_register_count(arch) calling cfree_arch_register_at; the - * iteration indices are dense in `[0, count)` and are unrelated to the - * DWARF indices, which are sparse (e.g. 32..63 are unused on aarch64). */ -typedef struct CfreeArchReg { - uint32_t dwarf_idx; - const char *name; -} CfreeArchReg; - -const char *cfree_arch_register_name(CfreeArchKind, uint32_t dwarf_idx); -int cfree_arch_register_index(CfreeArchKind, const char *name, - uint32_t *idx_out); - -uint32_t cfree_arch_register_count(CfreeArchKind); -int cfree_arch_register_at(CfreeArchKind, uint32_t idx, CfreeArchReg *out); - -/* ============================================================ - * Host environment - * ============================================================ - * The host supplies a heap, optional file I/O, and a diag sink. The - * freestanding core never takes paths; path-shaped helpers in the driver - * feed bytes/Writers. */ -typedef struct CfreeFileData { - const uint8_t *data; - size_t size; - void *token; /* opaque ownership handle for release */ -} CfreeFileData; - -typedef struct CfreeFileIO { - int (*read_all)(void *user, const char *path, CfreeFileData *out); - void (*release)(void *user, CfreeFileData *); - CfreeWriter *(*open_writer)(void *user, const char *path); - void *user; -} CfreeFileIO; - -/* Executable-memory vtable. Required by the JIT mapper (cfree_jit_from_image) - * and the emu runtime; consulted by the linker for page-aligned segment - * layout. May be NULL for hosts that never JIT and never run the emu — link - * layout falls back to a 16 KiB page in that case. - * - * The vtable enforces strict W^X: no virtual page is ever simultaneously - * writable and executable. For regions that will eventually hold code - * (CFREE_PROT_EXEC in the requested perms) the host returns a dual mapping — - * two virtual addresses that alias the same physical memory, where the - * `write` alias has WRITE but never EXEC, and the `runtime` alias has - * EXEC after a corresponding protect() call but never WRITE. Callers - * populate code via the `write` alias and execute / take addresses against - * the `runtime` alias. For non-EXEC regions a single mapping suffices and - * write == runtime. - * - * reserve — allocate `size` bytes (page_size-aligned) whose final - * perms will be `prot`. On success returns 0 and fills - * *out (write/runtime/size/token); returns non-zero on - * failure. The returned `write` alias is always RW; - * `runtime` starts read-only and is flipped to final - * perms by protect(). - * protect — apply final perms via the runtime alias for [addr, - * addr+size) (page_size-aligned, lying inside the - * reservation's runtime alias). Returns 0 on success. - * release — free a prior reservation, including both aliases. - * flush_icache — make freshly written instructions visible to the CPU - * at [addr, addr+size) on the runtime alias. May be a - * no-op on x86; required on aarch64 before transferring - * control to JITed code. - * - * `prot` is a bitmask of CFREE_PROT_*. */ -enum { - CFREE_PROT_NONE = 0, - CFREE_PROT_READ = 1 << 0, - CFREE_PROT_WRITE = 1 << 1, - CFREE_PROT_EXEC = 1 << 2, -}; - -typedef struct CfreeExecMemRegion { - void *write; /* RW alias for population; never has EXEC */ - void *runtime; /* runtime/execution alias; never has WRITE. - For non-EXEC reservations equals `write`. */ - size_t size; /* page-aligned bytes */ - void *token; /* opaque host handle for release() */ -} CfreeExecMemRegion; - -typedef struct CfreeExecMem { - size_t page_size; - int (*reserve)(void *user, size_t size, int prot, CfreeExecMemRegion *out); - int (*protect)(void *user, void *addr, size_t size, int prot); - void (*release)(void *user, CfreeExecMemRegion *region); - void (*flush_icache)(void *user, void *addr, size_t size); - void *user; -} CfreeExecMem; - -/* Debugger OS vtable. Required by the JIT session (cfree_jit_session_new) so - * libcfree never includes <pthread.h>, <signal.h>, or platform headers for - * ucontext / W^X flips. May be NULL for hosts that never enter `dbg`. - * - * Threading model: a single worker thread is spawned per session; the REPL - * thread and worker hand off through two events (stop, resume). Signal - * handlers run on the worker thread, snapshot the host ucontext into a - * CfreeUnwindFrame, and call back into the session through on_fault. - * - * thread_start / _join — spawn worker, join on session teardown. - * thread_interrupt — async-signal-safe: deliver `interrupt_signo` - * to the worker thread (used by session_interrupt). - * event_* — one-shot binary events. The session creates two - * per worker; signal/wait must be safe to call - * from the worker's signal-handler context. - * signals_install — install handlers for SIGTRAP/SEGV/BUS/ILL/FPE - * plus `interrupt_signo`. Each handler: - * 1. snapshots ucontext into a CfreeUnwindFrame; - * 2. invokes ops->on_fault(session, signo, &regs); - * 3. on return, writes mutated regs back into - * ucontext before returning to the kernel. - * If on_fault returns nonzero the OS layer - * re-raises the signal to the host default - * (a fault the session declined to handle). - * signals_uninstall — restore prior dispositions on session teardown. - * interrupt_signo — host signal number reserved for STOP_INTERRUPT - * (e.g. SIGUSR2 on POSIX). - * code_write_begin/_end — open a write window over [runtime_addr, - * runtime_addr+n) inside an existing - * CfreeExecMem reservation. *write_out is the - * address through which the session writes the - * BRK / restore bytes. On dual-mapping hosts - * (Apple silicon) it is the write alias; on - * Linux it equals runtime_addr and the OS layer - * mprotect-flips RW<->RX around the window. - * flush_icache — make freshly patched code visible to the CPU - * at the runtime alias. Required on aarch64. - * guarded_copy — read/write `n` bytes between in-process - * addresses with a TLS sigsetjmp landing slot - * so SIGSEGV/SIGBUS during the copy returns - * nonzero instead of stopping the worker. The - * SEGV/BUS handlers in signals_install check - * this landing slot before delegating to - * on_fault. */ -typedef struct CfreeDbgSignalOps { - int (*on_fault)(void *session, int signo, CfreeUnwindFrame *regs); -} CfreeDbgSignalOps; - -typedef struct CfreeDbgOs { - int (*thread_start)(void *user, void (*fn)(void *), void *arg, - void **thread_out); - void (*thread_join)(void *user, void *thread); - int (*thread_interrupt)(void *user, void *thread); - - void *(*event_new)(void *user); - void (*event_free)(void *user, void *ev); - void (*event_wait)(void *user, void *ev); - void (*event_signal)(void *user, void *ev); - void (*event_reset)(void *user, void *ev); - - int (*signals_install)(void *user, const CfreeDbgSignalOps *ops, - void *session); - void (*signals_uninstall)(void *user); - int interrupt_signo; - - int (*code_write_begin)(void *user, void *runtime_addr, size_t n, - void **write_out); - void (*code_write_end)(void *user, void *runtime_addr, size_t n); - void (*flush_icache)(void *user, void *runtime_addr, size_t n); - - int (*guarded_copy)(void *user, void *dst, const void *src, size_t n); - - void *user; -} CfreeDbgOs; - -/* Host vtable for the JIT TLV thunk on Mach-O targets. - * - * `cfree run` on macOS-aarch64 needs to service Mach-O thread-local - * descriptor calls — there's no dyld in the JIT image to allocate the - * pthread key and rewrite descriptor[0] to a per-image thunk. libcfree - * provides the asm thunk (caller-save-preserving) but cannot itself - * include <pthread.h>, so the per-thread plumbing is plumbed in via this - * vtable. NULL is fine on hosts that never JIT TLV code. - * - * ctx_new — called once per JIT image at link time. Receives the - * TLS image: `image_size` bytes total, `image_filesz` - * of which are initialized from `init_bytes`, aligned - * to `align`. Returns an opaque ctx pointer that the - * thunk reads from descriptor[+8]. - * - * The returned ctx MUST satisfy a binary contract: the - * first 8 bytes contain a function pointer of type - * `void* (*)(void* ctx)` that returns the per-thread - * TLS block (allocating + seeding on first call from - * each thread). This is what the thunk calls; placing - * it inside the ctx lets the thunk avoid loading - * process-global state. - * - * ctx_destroy — called from cfree_jit_free. Implementations should - * delete the pthread_key (POSIX runs per-thread - * destructors then) and release the ctx storage. */ -typedef struct CfreeJitTls { - void *(*ctx_new)(void *user, const void *init_bytes, size_t image_filesz, - size_t image_size, size_t align); - void (*ctx_destroy)(void *user, void *ctx); - void *user; -} CfreeJitTls; - -typedef struct CfreeMetrics { - void (*scope_begin)(void *user, const char *name); - void (*scope_end)(void *user, const char *name); - void (*count)(void *user, const char *name, uint64_t value); - void *user; -} CfreeMetrics; - -typedef struct CfreeEnv { - CfreeHeap *heap; - const CfreeFileIO *file_io; /* may be NULL for purely in-memory pipelines */ - CfreeDiagSink *diag; - const CfreeExecMem *execmem; /* NULL ok unless JIT/emu paths run */ - const CfreeDbgOs *dbg_os; /* NULL ok unless `cfree dbg` paths run */ - const CfreeJitTls *jit_tls; /* NULL ok unless JIT TLV paths run */ - const CfreeMetrics *metrics; /* optional scoped metrics sink */ - /* Unix seconds since 1970-01-01 UTC, or negative for "no clock". Used - * by the preprocessor for __DATE__ / __TIME__ (negative → C11 §6.10.8.1 - * placeholders). The host decides the policy (SOURCE_DATE_EPOCH, - * wall clock, fixed value); libcfree never reads either. */ - int64_t now; -} CfreeEnv; - -/* ============================================================ - * Compiler lifecycle - * ============================================================ */ -CfreeCompiler *cfree_compiler_new(CfreeTarget, const CfreeEnv *); -void cfree_compiler_free(CfreeCompiler *); -CfreeTarget cfree_compiler_target(CfreeCompiler *); - -/* Resolve a CfreeSrcLoc.file_id to the spelling used when the source was - * registered (typically the path passed to FileIO.read_all, or a memory- - * input label). Returns NULL when `c` is NULL or `file_id` doesn't name a - * registered file. The returned pointer is owned by the compiler and is - * valid until cfree_compiler_free. Diagnostic sinks use this to print - * `path:line:col` instead of the bare numeric `file_id`. */ -const char *cfree_compiler_file_name(CfreeCompiler *, uint32_t file_id); - -/* Intern a string into the compiler's global symbol pool. The returned symbol - * is stable until cfree_compiler_free and may be passed through public APIs - * that traffic in pre-interned names. 0 is reserved for "no symbol"; this - * entry never returns 0 for a non-NULL string. */ -CfreeSym cfree_sym_intern(CfreeCompiler *, const char *str); - -/* Returns the diagnostic sink registered at compiler construction. */ -CfreeDiagSink *cfree_compiler_diag_sink(CfreeCompiler *); - -/* ============================================================ - * Writer dispatch (inline) - * ============================================================ - * Callers obtain CfreeWriter*s from CfreeFileIO.open_writer or from - * cfree_writer_mem. The dispatch helpers below are pure inline thunks - * over the vtable — libcfree itself uses the vtable directly. */ -static inline void cfree_writer_write(CfreeWriter *w, const void *d, size_t n) { - w->write(w, d, n); -} -static inline void cfree_writer_seek(CfreeWriter *w, uint64_t off) { - w->seek(w, off); -} -static inline uint64_t cfree_writer_tell(CfreeWriter *w) { return w->tell(w); } -static inline int cfree_writer_error(CfreeWriter *w) { return w->error(w); } -static inline void cfree_writer_close(CfreeWriter *w) { w->close(w); } - -/* In-memory writer backed by the supplied heap. Useful as a building - * block; the buffer is owned by the Writer and cfree_writer_mem_bytes - * returns its current contents (valid until the next write or close). */ -CfreeWriter *cfree_writer_mem(CfreeHeap *); -const uint8_t *cfree_writer_mem_bytes(CfreeWriter *, size_t *len_out); - -/* ============================================================ - * JIT - * ============================================================ - * cfree_link_jit produces a CfreeJit owning its mapped pages and resolved - * image. Symbol lookup is by name (object-local handles never escape - * libcfree); dlsym-shaped — the caller casts to whatever function - * signature the JITed symbol actually has (e.g. int(*)(int, char**) for - * `main`). Returns NULL on miss. */ -void cfree_jit_free(CfreeJit *); -void *cfree_jit_lookup(CfreeJit *, const char *name); -/* Experimental append-only JIT growth. Appends one finalized object into - * reserved JIT slack without moving existing code/data. Returns nonzero on - * duplicate strong definitions, unresolved references, capacity exhaustion, - * or relocation/protection failure. */ -int cfree_jit_append_obj(CfreeJit *, CfreeObjBuilder *); -uint64_t cfree_jit_generation(CfreeJit *); -/* Run all fini_array destructors in reverse order. Call after the last - * use of JITed code, before cfree_jit_free. */ -void cfree_jit_run_dtors(CfreeJit *); - -/* ----- JIT image inspection ----- - * - * cfree_jit_view borrows a CfreeObjFile over the loaded JIT image. Lets the - * driver feed the JIT to objdump/dwarf consumers without round-tripping the - * image to bytes. The returned pointer is owned by the CfreeJit and is - * invalidated by cfree_jit_free; callers must not call cfree_obj_close on it. - * - * cfree_jit_addr_to_sym is the reverse of cfree_jit_lookup: maps a runtime - * PC to the enclosing global symbol. Returns 0 on success and 1 when no - * symbol contains `addr`. The interned name string is valid until - * cfree_jit_free. */ -const CfreeObjFile *cfree_jit_view(CfreeJit *); -int cfree_jit_addr_to_sym(CfreeJit *, uint64_t addr, const char **name_out, - uint64_t *off_out); - -/* PC-space translation between the JIT's runtime address space (where - * executable code actually lives) and the image-relative vaddr space - * (the coordinate system the linked image — and any DWARF emitted at - * compile time — was authored in). - * - * The DWARF consumer (cfree_dwarf_addr_to_line, cfree_dwarf_line_to_addr, - * cfree_dwarf_unwind_step, etc.) operates entirely in image-relative - * vaddrs; the debugger, host signal handlers, and breakpoint installer - * work in runtime addresses. Callers translate at every boundary. - * - * Both functions return 0 if the input is not contained in any mapped - * segment. Identity maps for the JIT's iplt / abs-symbol cases are out - * of scope here — those addresses don't participate in source-level - * stepping. - * - * Stable for the JIT's lifetime; constant-time over jit segment count. */ -uint64_t cfree_jit_runtime_to_image(CfreeJit *, uint64_t runtime_pc); -uint64_t cfree_jit_image_to_runtime(CfreeJit *, uint64_t image_vaddr); - -/* Enumerate every globally visible symbol in the resolved JIT image. - * Drives `info functions` / `info variables` and tab completion in dbg. - * `name` is interned and valid until cfree_jit_free; CfreeSymKind is the - * same enum as the object inspector uses (CFREE_SK_FUNC / CFREE_SK_OBJ / - * etc.). */ -typedef struct CfreeJitSymIter CfreeJitSymIter; -typedef struct CfreeJitSym { - const char *name; - uint64_t addr; - uint64_t size; - CfreeSymKind kind; -} CfreeJitSym; - -CfreeJitSymIter *cfree_jit_sym_iter_new(CfreeJit *); -int cfree_jit_sym_iter_next(CfreeJitSymIter *, CfreeJitSym *out); -void cfree_jit_sym_iter_free(CfreeJitSymIter *); - -/* ----- JIT session: controlled execution ----- - * - * A session wraps the JIT in a worker thread and a per-arch trap engine - * (software breakpoint patch + single-step / displaced-step trampoline). - * The library owns all signal handling, ucontext extraction, and per-arch - * trap-byte / single-step machinery. The driver side uses the session to - * call into the JIT'd code and is notified of stops (breakpoint, fault, - * exit) via the blocking session_call/session_resume entries. - * - * Threading model: a single worker thread runs the JIT entry. session_call - * and session_resume block the caller until the worker stops; the worker - * is parked on stop and resumed from the next session_resume. Only one - * thread may drive the session at a time. - * - * Lifetime: the CfreeJit must outlive the CfreeJitSession. cfree_jit_free - * tears down a session implicitly if one is still attached, but explicit - * cfree_jit_session_free is preferred so the worker thread is joined - * deterministically. - * - * Breakpoints: set/clear via session_breakpoint_set/_clear. The trap byte - * patch and arch-specific single-step trampoline (used to step over the - * patched instruction during resume) are entirely internal. The session - * dedupes addresses; setting a breakpoint at an existing address returns - * the original handle. */ - -typedef enum CfreeStopKind { - CFREE_STOP_BREAKPOINT, /* worker hit a breakpoint we set */ - CFREE_STOP_SIGNAL, /* worker took a fault we did not arm */ - CFREE_STOP_EXIT, /* worker entry returned normally */ - CFREE_STOP_INTERRUPT, /* host requested via session_interrupt */ -} CfreeStopKind; - -typedef struct CfreeStopInfo { - CfreeStopKind kind; - int signal; /* host signo when kind == STOP_SIGNAL */ - int exit_code; /* worker return value when kind == EXIT */ - uint32_t bp_id; /* set bp handle when kind == BREAKPOINT */ - CfreeUnwindFrame regs; /* register snapshot at the stop site */ -} CfreeStopInfo; - -typedef enum CfreeResumeMode { - CFREE_RESUME_CONTINUE, /* run until next stop or exit */ - CFREE_RESUME_STEP_INSN, /* execute one machine instruction */ - CFREE_RESUME_STEP_LINE, /* until source line changes, staying - * in current function */ - CFREE_RESUME_NEXT_LINE, /* like STEP_LINE but step OVER any - * function calls */ - CFREE_RESUME_STEP_OUT, /* run until current frame returns */ -} CfreeResumeMode; - -/* Entry-point signature dispatched by session_call. The library is - * responsible for marshalling argv/argc into the worker's ABI; the driver - * is shape-agnostic. New entry shapes extend the enum. */ -typedef enum CfreeEntryKind { - CFREE_ENTRY_INT_ARGV, /* int(int, char**) */ - CFREE_ENTRY_U64, /* uint64_t(uint64_t, ... up to 8 args) */ -} CfreeEntryKind; - -CfreeJitSession *cfree_jit_session_new(CfreeJit *); -void cfree_jit_session_free(CfreeJitSession *); - -/* Bind a DWARF consumer to the session. Required for the source-level - * resume modes (STEP_LINE, NEXT_LINE, STEP_OUT). The CfreeDebugInfo must - * outlive every session_resume that uses those modes; the session does - * not take ownership and will not free it. Passing NULL detaches. - * Returns 0 on success. */ -int cfree_jit_session_attach_dwarf(CfreeJitSession *, CfreeDebugInfo *); - -/* Begin executing `entry` with `argv`. Blocks until the worker stops. - * `entry` must be a pointer returned by cfree_jit_lookup (or otherwise - * within the JIT image). Returns 0 on success (including an EXIT stop), - * nonzero on internal failure (worker spawn, OOM). On success *stop is - * populated. */ -int cfree_jit_session_call(CfreeJitSession *, void *entry, CfreeEntryKind, - int argc, char **argv, CfreeStopInfo *stop_out); -int cfree_jit_session_call_u64(CfreeJitSession *, void *entry, - const uint64_t *args, uint32_t nargs, - uint64_t *ret_out, CfreeStopInfo *stop_out); - -/* Resume the parked worker. Blocks until the next stop. Returns 0 on - * success, nonzero if no worker is parked. */ -int cfree_jit_session_resume(CfreeJitSession *, CfreeResumeMode, - CfreeStopInfo *stop_out); - -/* Asynchronously interrupt a running worker. Async-signal-safe: callable - * from a SIGINT handler in the host. The next stop event delivered to the - * driving thread will be CFREE_STOP_INTERRUPT. Returns 0 on a queued - * interrupt, nonzero if no worker is currently running. */ -int cfree_jit_session_interrupt(CfreeJitSession *); - -/* Read `n` bytes from the worker's address space starting at `addr` into - * `dst`. Used by `p` and `x` in the dbg driver to dereference globals, - * frame-relative locals, and arbitrary user memory. Returns 0 on success - * and nonzero on a bad address or partial read; partial-read attempts do - * not modify `dst`. Safe to call only while the worker is parked at a - * stop. */ -int cfree_jit_session_read_mem(CfreeJitSession *, uint64_t addr, void *dst, - size_t n); - -/* Write `n` bytes from `src` into the worker's address space at `addr`. - * Same constraints as the read variant: caller must be at a stop; partial - * writes leave the target untouched and return nonzero. */ -int cfree_jit_session_write_mem(CfreeJitSession *, uint64_t addr, - const void *src, size_t n); - -/* Read full register snapshot. Snapshot already lives in CfreeStopInfo; - * this is for callers that want a refresh outside the stop event (e.g. - * after a write). Returns 0 on success, nonzero if no worker is parked. */ -int cfree_jit_session_get_regs(CfreeJitSession *, CfreeUnwindFrame *out); - -/* Write back a register snapshot. The frame's `regs` are written into the - * worker; `pc` and `cfa` are honored only when changed. The library - * validates that `pc` lies inside the JIT image. Returns 0 on success, - * nonzero on a bad pc or if no worker is parked. */ -int cfree_jit_session_set_regs(CfreeJitSession *, const CfreeUnwindFrame *); - -/* Set / clear a breakpoint at `addr` (which must lie within the JIT image). - * On success, *bp_id_out is the session-local handle that future stop - * events will report. Idempotent: setting at an address that already has - * a breakpoint returns its existing handle. cfree_jit_session_breakpoint_clear - * silently succeeds on an unknown handle. */ -int cfree_jit_session_breakpoint_set(CfreeJitSession *, uint64_t addr, - uint32_t *bp_id_out); -int cfree_jit_session_breakpoint_clear(CfreeJitSession *, uint32_t bp_id); - -/* Extended breakpoint setter with skip count, hit cap, and an optional - * in-process predicate. The plain breakpoint_set above is a convenience - * over this form (skip_count = max_hits = 0, condition = NULL). - * - * `condition`, when non-NULL, is invoked by the library on the worker - * thread between the trap and the stop notification, after `skip_count` - * silent skips have elapsed. It must return nonzero to deliver a stop - * and zero to silently resume. The callback runs in a context where - * calling back into the session is not safe — restrict it to register - * inspection and pure computation. - * - * `max_hits`, when nonzero, auto-clears the breakpoint after that many - * stops have been delivered (post-skip, post-condition). 0 means - * unlimited. */ -typedef struct CfreeBreakpointSpec { - uint64_t addr; - uint64_t skip_count; /* silent skips before the first stop */ - uint64_t max_hits; /* 0 = unlimited */ - int (*condition)(void *user, const CfreeUnwindFrame *regs); - void *condition_user; -} CfreeBreakpointSpec; - -int cfree_jit_session_breakpoint_set_spec(CfreeJitSession *, - const CfreeBreakpointSpec *, - uint32_t *bp_id_out); - -/* Resolver invoked when the linker encounters an undefined symbol. Returning - * NULL is an error. */ -typedef void *(*CfreeExternResolver)(void *user, const char *name); - -/* ============================================================ - * Pipeline - * ============================================================ - * Layered driver-facing API. Four core operations: - * - * cfree_compile_obj one C TU -> in-memory CfreeObjBuilder (chains into - * link) cfree_compile_obj_emit one C TU -> CfreeWriter (cc -c) cfree_link_exe - * link inputs -> CfreeWriter (ld) cfree_link_jit link inputs -> owning - * CfreeJit handle - * - * The CfreePipeline section near the end of this header bundles a - * CfreeCompiler with these entries for tools that want a single owning - * handle for a compile-then-link build. - * - * The freestanding core takes only byte buffers and Writers — never paths. - * Path-shaped helpers live in driver-level adapters and feed the byte/ - * Writer APIs after consulting CfreeEnv.file_io. - * - * Errors are reported through libcfree's internal panic mechanism. Each - * top-level function in this header saves and restores the active panic - * handler around its own boundary, so these functions are safely nestable: a - * caller that has already installed one keeps it across these calls. On - * failure the function unwinds its own cleanups, restores the caller's - * handler, and returns nonzero. */ - -typedef struct CfreeDefine { - const char *name; - const char *body; /* NULL means "1" */ -} CfreeDefine; - -/* Source language tag carried on CfreeBytesInput when the input is fed to - * cfree_compile_obj*. Ignored by entries that take bytes - * for non-source purposes (linker, archive writer/reader, object reader). - * - * CFREE_LANG_C is value 0 so a zero-initialized CfreeBytesInput defaults to - * C, matching the prior contract. - * - * `.S` (preprocessed asm) is NOT auto-handled at this layer: the driver runs - * its C preprocessor first and then submits the result as CFREE_LANG_ASM. */ -typedef enum CfreeLanguage { - CFREE_LANG_C = 0, - CFREE_LANG_ASM = 1, - CFREE_LANG_TOY = 2, - CFREE_LANG_WASM = 3, - CFREE_LANG_COUNT = 4, -} CfreeLanguage; - -typedef int (*CfreeCompileFn)(CfreeCompiler *, const CfreeCompileOptions *, - const CfreeBytesInput *, CfreeObjBuilder *out); - -/* Register out-of-core language frontend hooks for this compiler instance. - * Passing NULL clears the slot. Returns nonzero on bad args. */ -int cfree_register_frontend(CfreeCompiler *, CfreeLanguage, CfreeCompileFn); - -/* Generic byte-buffer input. Used for source TUs (C/asm), encoded objects, - * and archives. `name` is a diagnostic label (typically a path or pseudo- - * path); the linker interns it on entry. `data` may be any byte-shaped - * content. `lang` is consulted only by source-consuming entries; other - * entries ignore it. */ -struct CfreeBytesInput { - const char *name; - const uint8_t *data; - size_t len; - CfreeLanguage lang; -}; - -/* Suffix-based language inference helper. `.s` -> CFREE_LANG_ASM, `.toy` -> - * CFREE_LANG_TOY, `.wat`/`.wasm` -> CFREE_LANG_WASM, `.c`, `.cc`, `.cpp` and - * any other suffix (including a path with no suffix) -> CFREE_LANG_C. `.S` - * (preprocessed asm) is not recognized — drivers must preprocess first and - * submit the result as CFREE_LANG_ASM. */ -CfreeLanguage cfree_language_for_path(const char *path); - -/* Preprocessor configuration shared by compile_* and the convenience run. */ -struct CfreePpOptions { - const char *const *include_dirs; - uint32_t ninclude_dirs; - const char *const *system_include_dirs; - uint32_t nsystem_include_dirs; - const CfreeDefine *defines; - uint32_t ndefines; - const char *const *undefines; - uint32_t nundefines; -}; - -/* Path prefix remap entry. Applied by SourceManager whenever it produces a - * path for DWARF emission (DW_AT_comp_dir, DW_AT_name, line program). The - * first match wins. Diagnostic output uses original paths. */ -typedef struct CfreePathPrefixMap { - const char *old_prefix; - const char *new_prefix; -} CfreePathPrefixMap; - -/* Per-TU compile knobs. */ -struct CfreeCompileOptions { - int opt_level; /* 0 direct, 1 minimal, 2 full */ - int debug_info; - CfreePpOptions pp; - /* Reproducible-build knobs. `epoch` (Unix seconds) is consulted by every - * file emitter that would otherwise have written wall-clock time (COFF - * header, Mach-O LC_BUILD_VERSION, ar ar_date, DWARF producer). 0 means - * write no timestamp at all (the default). */ - uint64_t epoch; - const CfreePathPrefixMap *path_map; - uint32_t npath_map; - /* Diagnostic policy. - * - * `warnings_are_errors` (-Werror): warnings emitted to CfreeDiagSink are - * counted as errors for the compile_* return-value test and against - * max_errors. The sink's `warnings` counter is unaffected. - * - * `max_errors`: 0 means unlimited. When >0, the parser stops emitting - * after sink.errors reaches the cap (the Nth error is still emitted; the - * (N+1)th is not), and compile_* returns nonzero. */ - int warnings_are_errors; - uint32_t max_errors; -}; - -/* Compile one source TU (C or GAS-subset asm; selected by input->lang). - * - * cfree_compile_obj returns a CfreeObjBuilder owned by the CfreeCompiler. The - * builder is already finalized; do not write to it further. Pass it to - * cfree_link_exe / cfree_link_jit. It must be alive until the linker has - * consumed it. The CfreeCompiler must outlive the returned builder. - * - * cfree_compile_obj_emit writes the encoded object to `out` and frees its - * temporary builder before returning. The Writer is not closed. On nonzero - * return the Writer may contain partial output and should not be consumed. - * - * Diagnostic model: report-all. Every error reachable by the parser's - * recovery rules is emitted to env.diag before return; the parser does not - * abort on routine syntax/semantic errors. These functions return 0 iff - * env.diag->errors == 0 at the end of the call (with warnings counting as - * errors when CfreeCompileOptions.warnings_are_errors is set). Returns - * nonzero on internal failures (OOM, invariant violation), where the - * underlying compiler_panic mechanism unwinds before return. - * - * When input->lang == CFREE_LANG_ASM the input bytes are fed straight to the - * GAS-subset assembler; CfreeCompileOptions fields that are C-only - * (CfreeCompileOptions.pp, opt_level) are ignored. Inline asm inside C TUs - * is handled by the C parser internally — no separate entry. */ -int cfree_compile_obj(CfreeCompiler *, const CfreeCompileOptions *, - const CfreeBytesInput *input, CfreeObjBuilder **out); -int cfree_compile_obj_emit(CfreeCompiler *, const CfreeCompileOptions *, - const CfreeBytesInput *input, CfreeWriter *out); - -/* ----- Header-dependency iteration ----- - * - * Walks the include edges recorded by SourceManager during a preceding - * cfree_compile_obj* call. The library hands out raw - * edges; formatting (Make rules, ninja, JSON) is the driver's job. - * - * cfree_dep_iter_next returns 1 and fills `*out` for each remaining edge, - * 0 when iteration is exhausted. The strings in CfreeDepEdge alias storage - * owned by the CfreeCompiler and are valid until the next compile call or - * compiler_free, whichever comes first. - * - * `includer_name` and `included_name` are the *resolved* paths SourceManager - * actually opened — the same byte sequences passed to CfreeFileIO.read_all. - * They are not the literal include token text; a `#include "x.h"` resolved - * via -I to /abs/inc/x.h reports `/abs/inc/x.h`. This is what build systems - * need: a Make rule emitted from these strings refers to files the build - * tool will stat on rebuild. - * - * `from_system_path` distinguishes headers found through a system include - * path (-isystem, sysroot, builtin) from user headers found via -I or the - * source's own directory. This is the GCC `-MM` filter: drop edges whose - * `from_system_path` is set. It is set by the include-path resolver, NOT by - * the include syntax — `#include <myheader.h>` resolved through -I is a - * user header (from_system_path=0); `#include "stdio.h"` resolved through a - * system path is a system header (from_system_path=1). - * - * `bracketed` records the include *syntax* (1 for `<…>`, 0 for `"…"`). Tools - * that round-trip include directives (formatters, IDE indexers) want the - * lexical fact; -MM filtering does not. - * - * Edges are reported across all TUs processed since compiler_new; callers - * that want a single TU's edges filter by `includer_name`. */ -typedef struct CfreeDepIter CfreeDepIter; - -typedef struct CfreeDepEdge { - const char *includer_name; /* resolved path; same string given to read_all */ - const char *included_name; /* resolved path; same string given to read_all */ - CfreeSrcLoc include_loc; - uint8_t from_system_path; /* 1 if resolved via a system include path */ - uint8_t bracketed; /* 1 if syntax was <…>; 0 for "…" */ - uint8_t pad[2]; -} CfreeDepEdge; - -CfreeDepIter *cfree_dep_iter_new(CfreeCompiler *); -int cfree_dep_iter_next(CfreeDepIter *, CfreeDepEdge *out); -void cfree_dep_iter_free(CfreeDepIter *); - -/* Build-ID emission mode (ELF .note.gnu.build-id and friends). */ -typedef enum CfreeBuildIdMode { - CFREE_BUILDID_NONE, /* no build-id note (default) */ - CFREE_BUILDID_SHA256, /* hash all input section bytes in - * stable order — reproducible */ - CFREE_BUILDID_UUID, /* random; opt-in, not reproducible */ - CFREE_BUILDID_USER, /* caller-supplied bytes */ -} CfreeBuildIdMode; - -/* ============================================================ - * Linker script (structured) - * ============================================================ - * The linker accepts only the structured form. Programmatic build systems - * construct a CfreeLinkScript directly; hosts that prefer GNU-ld text feed - * the optional cfree_link_script_parse helper, which yields the same - * structured form. The data model makes the supported semantics - * inspectable rather than implicit in a parser. - * - * All pointers in a CfreeLinkScript are borrowed: the script and every - * sub-object (expressions, regions, sections, assignments, name strings) - * must outlive the call to cfree_link_exe / cfree_link_jit that consumes - * it. cfree_link_script_parse arena-owns its result; cfree_link_script_free - * releases everything reachable from a parser-produced script. */ - -typedef struct CfreeLinkExpr CfreeLinkExpr; - -typedef enum CfreeLinkExprKind { - CFREE_LE_INT, /* int_val */ - CFREE_LE_DOT, /* current location counter */ - CFREE_LE_SYM, /* name */ - CFREE_LE_REGION_ORIGIN, /* name = MEMORY region */ - CFREE_LE_REGION_LENGTH, /* name = MEMORY region */ - CFREE_LE_ADD, - CFREE_LE_SUB, - CFREE_LE_MUL, - CFREE_LE_DIV, - CFREE_LE_AND, - CFREE_LE_OR, - CFREE_LE_XOR, - CFREE_LE_SHL, - CFREE_LE_SHR, - CFREE_LE_ALIGN, /* ALIGN(val, align) */ - CFREE_LE_MAX, - CFREE_LE_MIN, -} CfreeLinkExprKind; - -struct CfreeLinkExpr { - uint8_t kind; /* CfreeLinkExprKind */ - union { - int64_t int_val; - const char *name; - struct { - const CfreeLinkExpr *lhs, *rhs; - } bin; - struct { - const CfreeLinkExpr *val, *align; - } align; - } v; -}; - -typedef enum CfreeLinkRegionFlag { - CFREE_LRF_R = 1u << 0, - CFREE_LRF_W = 1u << 1, - CFREE_LRF_X = 1u << 2, -} CfreeLinkRegionFlag; - -typedef struct CfreeLinkRegion { - const char *name; - uint8_t flags; /* CfreeLinkRegionFlag mask */ - uint64_t origin; - uint64_t length; -} CfreeLinkRegion; - -typedef struct CfreeLinkInputMatch { - const char *file_pattern; /* NULL == "*" */ - const char *section_pattern; - int keep; /* nonzero: exempt from --gc-sections */ -} CfreeLinkInputMatch; - -typedef enum CfreeLinkAsnKind { - CFREE_LAS_DOT, /* . = expr; sym ignored */ - CFREE_LAS_SYM, /* sym = expr */ - CFREE_LAS_PROVIDE, /* PROVIDE(sym = expr) */ -} CfreeLinkAsnKind; - -typedef struct CfreeLinkAssignment { - uint8_t kind; /* CfreeLinkAsnKind */ - const char *sym; /* unused for CFREE_LAS_DOT */ - const CfreeLinkExpr *expr; -} CfreeLinkAssignment; - -typedef struct CfreeLinkOutputSection { - const char *name; - const CfreeLinkExpr *vma; /* NULL: from region/dot */ - const CfreeLinkExpr *lma; /* NULL: equal to vma */ - const CfreeLinkInputMatch *inputs; - uint32_t ninputs; - const char *region; /* > REGION; NULL if absent */ - const char *load_region; /* AT> REGION; NULL if absent */ - const CfreeLinkAssignment *asns; - uint32_t nasns; -} CfreeLinkOutputSection; - -typedef struct CfreeLinkScript { - const char *entry; /* NULL: use CfreeLinkOptions.entry */ - const CfreeLinkRegion *regions; - uint32_t nregions; - const CfreeLinkOutputSection *sections; /* in declaration order */ - uint32_t nsections; - const CfreeLinkAssignment *top_asns; /* outside any SECTIONS{} */ - uint32_t ntop_asns; -} CfreeLinkScript; - -/* Parse GNU-ld-subset text into a structured script. The compiler arena - * owns the result; cfree_link_script_free releases it. The supported v1 - * subset is: - * ENTRY(symbol) - * MEMORY { name (rwx) : ORIGIN = expr, LENGTH = expr } - * SECTIONS { ... } with output sections in declaration order - * Input rules `*(.section.glob)` or `file.o(.section)` - * KEEP(...) for --gc-sections opt-out - * PROVIDE(sym = expr), plain `sym = expr`, `. = expr` - * `> REGION` and `AT> REGION` placement - * Operators + - * / & | ^ << >>, ALIGN(expr, align), MAX(a,b), MIN(a,b) - * slash-star block comments - * Anything outside the subset (OVERLAY, VERSION, INSERT BEFORE/AFTER, - * OUTPUT_FORMAT, INPUT, GROUP, elaborate file patterns, other operators) - * is rejected with a diagnostic and the call returns nonzero with *out - * unchanged. Returns 0 on success. */ -int cfree_link_script_parse(CfreeCompiler *, const char *text, size_t len, - const CfreeLinkScript **out); -void cfree_link_script_free(CfreeCompiler *, const CfreeLinkScript *); - -/* Per-archive resolution mode (mirrors GNU ld's -Bstatic / -Bdynamic / - * --as-needed positional state). Object-file inputs keep the plain - * CfreeBytesInput shape — only archives carry these knobs. */ -typedef enum CfreeLinkMode { - CFREE_LM_DEFAULT, /* output-kind default */ - CFREE_LM_STATIC, /* -Bstatic before this input */ - CFREE_LM_DYNAMIC, /* -Bdynamic */ - CFREE_LM_AS_NEEDED, /* --as-needed */ -} CfreeLinkMode; - -/* Archive input with linker-side state. - * - * link_mode: CfreeLinkMode (-Bstatic/-Bdynamic/--as-needed positional). - * whole_archive: nonzero == --whole-archive: pull every member in - * regardless of whether its symbols satisfy an undef. - * group_id: clusters archives into a cyclic resolution group; - * archives sharing a nonzero id are scanned cyclically - * until no new symbols pull in (--start-group ... - * --end-group). 0 (default) == linear single-pass. - * - * link_mode and whole_archive are orthogonal: --whole-archive applies - * regardless of --as-needed / -Bstatic / -Bdynamic state. */ -typedef struct CfreeBytesInputArchive { - CfreeBytesInput input; - uint8_t link_mode; /* CfreeLinkMode; default CFREE_LM_DEFAULT */ - uint8_t whole_archive; /* nonzero == --whole-archive */ - uint8_t group_id; - uint8_t pad; -} CfreeBytesInputArchive; - -/* Common link-input set, embedded in both CfreeLinkOptions and - * CfreeLinkSharedOptions. Adding a new input shape lands here in one - * place rather than in every options struct. */ -typedef struct CfreeLinkInputs { - CfreeObjBuilder *const *objs; /* fresh-compiled, by reference */ - uint32_t nobjs; - const CfreeBytesInput *obj_bytes; - uint32_t nobj_bytes; - const CfreeBytesInputArchive *archives; - uint32_t narchives; - /* Shared-object inputs (ELF ET_DYN). Each entry's bytes are parsed - * via the linker's read_elf_dso path; the DSO contributes no - * sections to the output image, but its dynsym is searched during - * undef resolution so references against this DSO bind dynamically. - * The DSO's DT_SONAME (or its filename if missing) is recorded in - * the produced image's DT_NEEDED list. */ - const CfreeBytesInput *dso_bytes; - uint32_t ndso_bytes; - /* Structured linker script. NULL means no script (target/format default - * layout). Borrowed: must outlive the cfree_link_* call. */ - const CfreeLinkScript *linker_script; - const char *entry; /* NULL = format/target default */ - CfreeExternResolver extern_resolver; - void *extern_resolver_user; - /* Build-ID. `build_id_mode` is a CfreeBuildIdMode. `build_id_bytes` / - * `build_id_len` are consulted only when mode == CFREE_BUILDID_USER. */ - uint8_t build_id_mode; - const uint8_t *build_id_bytes; - uint32_t build_id_len; -} CfreeLinkInputs; - -/* Options for executable / JIT link. Exe-only fields go on this struct - * (currently none beyond the shared input set). - * - * gc_sections: nonzero enables --gc-sections (drop unreferenced sections - * from the output, transitively from entry / KEEP roots / - * exported symbols). Default 0. */ -typedef struct CfreeLinkOptions { - CfreeLinkInputs inputs; - int gc_sections; - /* PIE / dynamic-exe shape. When `pie` is set or any DSO input is - * present the output is ET_DYN; the runtime loader at - * `interp_path` (default `/lib/ld-musl-aarch64.so.1` for - * aarch64-linux when not specified) binds DT_NEEDED dependencies - * before transferring to the entry symbol. NULL `interp_path` with - * `pie==0` and no DSO inputs preserves the static ET_EXEC path. */ - int pie; - const char *interp_path; -} CfreeLinkOptions; - -/* Options for shared-library link. - * - * soname: recorded in the produced object (DT_SONAME on ELF, - * LC_ID_DYLIB on Mach-O). NULL == none. - * rpaths/runpaths: DT_RPATH / DT_RUNPATH entries, written verbatim. The - * runtime loader expands $ORIGIN and similar tokens. On - * Mach-O both lists collapse to LC_RPATH in - * rpaths-then-runpaths order. - * exports: flat list of symbol names promoted to the dynamic - * symbol table. v1 has no symbol-version-script support; - * that lands later as a separate CfreeVersionScript - * type rather than folded into the linker-script grammar. - * allow_undefined: default 1 for shared output. 0 forces every external - * reference to be resolved at link time. */ -typedef struct CfreeLinkSharedOptions { - CfreeLinkInputs inputs; - const char *soname; - const char *const *rpaths; - uint32_t nrpaths; - const char *const *runpaths; - uint32_t nrunpaths; - const char *const *exports; - uint32_t nexports; - int allow_undefined; - /* Section GC. See CfreeLinkOptions.gc_sections. */ - int gc_sections; -} CfreeLinkSharedOptions; - -/* All bytes inputs (obj_bytes, archives — including the CfreeBytesInput - * nested inside each CfreeBytesInputArchive) must remain alive until the - * matching cfree_link_* call returns. */ - -/* Link to executable. Writer is not closed by the call. On nonzero return - * the Writer may contain partial output and should not be consumed. */ -int cfree_link_exe(CfreeCompiler *, const CfreeLinkOptions *, CfreeWriter *out); - -/* Link to shared library / dylib in the format implied by Compiler.target - * (ELF .so, Mach-O .dylib, PE .dll). Writer is not closed; on nonzero - * return the Writer may contain partial output and should not be - * consumed. */ -int cfree_link_shared(CfreeCompiler *, const CfreeLinkSharedOptions *, - CfreeWriter *out); - -/* Link as JIT. On success, *out_jit owns its image and mapped pages and - * must be released with cfree_jit_free. */ -int cfree_link_jit(CfreeCompiler *, const CfreeLinkOptions *, - CfreeJit **out_jit); - -/* ============================================================ - * Pipeline (stateful driver-facing API) - * ============================================================ - * A CfreePipeline bundles a CfreeCompiler with the lifecycle every - * compile-then-link tool needs. Tools create a pipeline once per build, - * feed bytes into pipeline_compile_obj, then drive one of the link entries. - * `cfree_pipeline_compiler` exposes the underlying compiler so callers can - * reach into APIs that need it directly (e.g. cfree_dwarf_open against a - * JIT image, cfree_dep_iter_new for header-dep emission). - * - * Ownership: CfreeObjBuilders returned by pipeline_compile_obj are owned by - * the pipeline's compiler and must be alive at the matching link call; - * cfree_pipeline_free reaps everything in one shot. Path-shaped source - * loading is the driver's job — pipeline entries take CfreeBytesInput. */ - -CfreePipeline *cfree_pipeline_new(CfreeTarget, const CfreeEnv *); -void cfree_pipeline_free(CfreePipeline *); - -/* Borrowed; must not be freed by callers. Valid until cfree_pipeline_free. */ -CfreeCompiler *cfree_pipeline_compiler(CfreePipeline *); - -int cfree_pipeline_compile_obj(CfreePipeline *, const CfreeCompileOptions *, - const CfreeBytesInput *input, - CfreeObjBuilder **out); - -int cfree_pipeline_link_exe(CfreePipeline *, const CfreeLinkOptions *, - CfreeWriter *out); -int cfree_pipeline_link_shared(CfreePipeline *, const CfreeLinkSharedOptions *, - CfreeWriter *out); -int cfree_pipeline_link_jit(CfreePipeline *, const CfreeLinkOptions *, - CfreeJit **out_jit); - -/* ============================================================ - * Emulator (cfree emu) - * ============================================================ - * Run a guest user-mode ELF on the host via per-basic-block JIT translation. - * Pipeline shape: guest bytes -> per-ISA decoder -> per-ISA lifter -> CG -> - * (opt?) -> MCEmitter -> ObjBuilder -> link_jit (incremental) -> host code. - * The emu owns a single growing CfreeJit for the session: cold blocks are - * translated and incrementally linked into one image; hot edges are patched - * by the runtime (block chaining) outside the linker. - * - * v1 guest archs: aarch64, riscv64. x86_64 deferred. SIMD/vector ISA - * extensions, full-system emulation, self-modifying code, and foreign-OS - * syscalls are not supported in v1 (see doc/EMU.md). - * - * The freestanding core takes guest ELF bytes; path-shaped helpers live in - * the driver and feed bytes via CfreeFileIO.read_all. Guest memory loads - * and stores route through libcfree's runtime (bounds-checked against the - * mapped guest address space); guest syscalls are forwarded to the host OS - * via per-OS tables. */ - -typedef enum CfreeEmuArch { - CFREE_EMU_ARCH_AARCH64, - CFREE_EMU_ARCH_RISCV64, -} CfreeEmuArch; - -/* Trace flag bitmask. PC traces the guest PC at every block entry; INSN - * traces every decoded guest instruction; BLOCK traces each translation - * event (cold-miss into the lifter). All traces are emitted via the env's - * CfreeDiagSink at CFREE_DIAG_NOTE. */ -typedef enum CfreeEmuTraceFlag { - CFREE_EMU_TRACE_PC = 1u << 0, - CFREE_EMU_TRACE_INSN = 1u << 1, - CFREE_EMU_TRACE_BLOCK = 1u << 2, -} CfreeEmuTraceFlag; - -typedef uint32_t CfreeEmuTraceFlags; - -/* Per-invocation emu configuration. `guest_elf_bytes` must outlive the call - * (cfree_emu_run) or the returned CfreeEmu (cfree_emu_new). `argv` and - * `envp`, when non-NULL, are NULL-terminated arrays of NUL-terminated - * strings; the emu copies them into the guest stack at startup, so the - * caller need not keep them alive past the new/run call. argv[0] is - * conventionally the guest program path. envp may be NULL for an empty - * environment. - * - * `optimize` selects the per-block backend: 0 drives a CGTarget directly - * (fast translation, slow execution); 2 wraps with opt_cgtarget (slow - * translation, fast execution). Other levels are reserved. - * - * Guest fd map / sandboxing is not exposed in v1 — guest syscalls are - * forwarded into the host process's fd table verbatim. */ -typedef struct CfreeEmuOptions { - CfreeEmuArch guest_arch; - const uint8_t *guest_elf_bytes; - size_t guest_elf_len; - int optimize; - CfreeEmuTraceFlags trace; - const char *const *argv; /* NULL-terminated; may be NULL */ - const char *const *envp; /* NULL-terminated; may be NULL */ -} CfreeEmuOptions; - -typedef struct CfreeEmu CfreeEmu; - -/* One-shot: load the guest ELF, run until exit/trap, fill *out_exit_code - * with the guest's exit status. Returns 0 on a clean guest exit (including - * a nonzero guest exit_code), nonzero on internal failure (decode/lift - * failure, OOM, unsupported guest arch). */ -int cfree_emu_run(CfreeCompiler *, const CfreeEmuOptions *, int *out_exit_code); - -/* Lower-level surface for dbg integration. Lifecycle: emu_new constructs - * the runtime (reserves the code-cache VA region, maps guest segments, - * builds the initial CPUState); emu_step runs at most `nblocks` translated - * blocks before returning; emu_lookup translates the block at `guest_pc` - * if cold and returns its host entry (NULL on translation failure or an - * unmapped guest_pc). emu_free releases the runtime, the JIT image, and - * the guest address space. */ -CfreeEmu *cfree_emu_new(CfreeCompiler *, const CfreeEmuOptions *); -int cfree_emu_step(CfreeEmu *, uint32_t nblocks); -void *cfree_emu_lookup(CfreeEmu *, uint64_t guest_pc); -void cfree_emu_free(CfreeEmu *); - -/* ============================================================ - * Binary format detection - * ============================================================ - * Sniff the format of a binary blob from its magic bytes. - * COFF is detected by common machine-type values (x86, x86_64, - * ARM, ARM64, RISC-V). Returns CFREE_BIN_UNKNOWN if no magic matches. */ - -typedef enum CfreeBinFmt { - CFREE_BIN_UNKNOWN = 0, - CFREE_BIN_AR, - CFREE_BIN_ELF, - CFREE_BIN_COFF, /* relocatable COFF object; first 2 bytes are machine type */ - CFREE_BIN_PE, /* PE executable/DLL; starts with MZ header */ - CFREE_BIN_MACHO, - CFREE_BIN_WASM, -} CfreeBinFmt; - -CfreeBinFmt cfree_detect_fmt(const uint8_t *data, size_t len); - -/* Derive a CfreeTarget from object-file magic + headers (ELF e_machine / - * EI_CLASS / EI_DATA, COFF Machine, Mach-O cputype, WASM = wasm32). Returns - * 0 on success and fills *out; returns 1 when the input is not a recognized - * relocatable object or its magic carries insufficient information. AR - * archives are not handled here — open a member to detect its target. */ -int cfree_detect_target(const uint8_t *data, size_t len, CfreeTarget *out); - -/* ============================================================ - * Object inspection - * ============================================================ - * Open a relocatable object for inspection. Format and target are both - * auto-detected from the file (see cfree_detect_fmt / cfree_detect_target). - * Returns NULL on failure. The input bytes must remain alive until - * cfree_obj_close. - * - * After a successful open, query functions provide read-only access to - * sections and symbols. Strings returned by query functions are interned - * and valid until cfree_obj_close. */ - -typedef struct CfreeObjFile CfreeObjFile; -typedef struct CfreeObjSymIter CfreeObjSymIter; - -typedef enum CfreeSecKind { - CFREE_SEC_TEXT, - CFREE_SEC_RODATA, - CFREE_SEC_DATA, - CFREE_SEC_BSS, - CFREE_SEC_DEBUG, - CFREE_SEC_OTHER, -} CfreeSecKind; - -typedef enum CfreeSecFlag { - CFREE_SF_EXEC = 1u << 0, - CFREE_SF_WRITE = 1u << 1, - CFREE_SF_ALLOC = 1u << 2, - CFREE_SF_TLS = 1u << 3, - CFREE_SF_MERGE = 1u << 4, - CFREE_SF_STRINGS = 1u << 5, -} CfreeSecFlag; - -/* CfreeSymBind and CfreeSymKind are declared at the top of this header - * because they are also used by the JIT symbol iterator. */ - -#define CFREE_SECTION_NONE UINT32_MAX - -typedef struct CfreeObjSecInfo { - const char *name; /* interned; valid until cfree_obj_close */ - CfreeSecKind kind; - uint32_t flags; /* bitmask of CfreeSecFlag */ - uint32_t size; /* bytes; BSS uses virtual size */ - uint32_t align; /* always a power of 2; 1 means no constraint; - * 0 is reserved and never appears (ELF's "0 or 1 - * means none" is normalized to 1 on read). */ - uint32_t entsize; /* section entry size, or 0 when not specified */ -} CfreeObjSecInfo; - -typedef struct CfreeObjSymInfo { - const char *name; /* interned; valid until cfree_obj_close */ - CfreeSymBind bind; - CfreeSymKind kind; - uint32_t section; /* 0-based index, or CFREE_SECTION_NONE */ - uint64_t value; - uint64_t size; -} CfreeObjSymInfo; - -CfreeObjFile *cfree_obj_open(const CfreeEnv *, const CfreeBytesInput *); -void cfree_obj_close(CfreeObjFile *); -CfreeObjFmt cfree_obj_fmt(const CfreeObjFile *); -CfreeTarget cfree_obj_target(const CfreeObjFile *); -uint32_t cfree_obj_nsections(const CfreeObjFile *); -CfreeObjSecInfo cfree_obj_section(const CfreeObjFile *, uint32_t idx); - -CfreeObjSymIter *cfree_obj_symiter_new(CfreeObjFile *); -int cfree_obj_symiter_next(CfreeObjSymIter *, CfreeObjSymInfo *out); -void cfree_obj_symiter_free(CfreeObjSymIter *); - -/* Raw bytes of a section. Returns a pointer aliasing storage owned by the - * CfreeObjFile and valid until cfree_obj_close. For BSS (no in-file bytes), - * returns NULL with `*len_out = 0`; the section's virtual size is on - * CfreeObjSecInfo.size. Out-of-range idx returns NULL with `*len_out = 0`. */ -const uint8_t *cfree_obj_section_data(const CfreeObjFile *, uint32_t idx, - size_t *len_out); - -/* Expose the underlying CfreeObjBuilder for use with cfree_disasm_iter_new - * (so the disassembler can consult sym/reloc tables for annotation). The - * pointer is owned by the CfreeObjFile and is valid until cfree_obj_close. */ -CfreeObjBuilder *cfree_obj_builder(const CfreeObjFile *); - -/* Relocation iterator. Walks every relocation in the object across all - * sections in section-then-offset order. Strings are interned and valid - * until cfree_obj_close. */ -typedef struct CfreeObjReloc { - uint32_t section; /* 0-based section index the reloc applies to */ - uint64_t offset; /* offset within that section */ - uint32_t sym; /* opaque symbol id; CFREE_SECTION_NONE if none */ - const char *sym_name; /* interned; "" when sym is none/anonymous */ - int64_t addend; - uint32_t kind; /* arch-specific reloc type code */ - const char *kind_name; /* interned, e.g. "R_X86_64_PC32" */ -} CfreeObjReloc; - -typedef struct CfreeObjRelocIter CfreeObjRelocIter; - -CfreeObjRelocIter *cfree_obj_reliter_new(CfreeObjFile *); -int cfree_obj_reliter_next(CfreeObjRelocIter *, CfreeObjReloc *out); -void cfree_obj_reliter_free(CfreeObjRelocIter *); - -/* ============================================================ - * DWARF consumer - * ============================================================ - * Read DWARF (.debug_info / .debug_line / .debug_aranges / .eh_frame) out - * of an already-opened CfreeObjFile. The CfreeObjFile must outlive the - * CfreeDebugInfo. Strings handed back through the query functions are - * interned and valid until cfree_dwarf_close. - * - * cfree_dwarf_open returns NULL when the object has no DWARF, when the - * object's format doesn't carry DWARF (PE/COFF can; the consumer accepts - * the standard sections wherever they live), or on internal failure. - * - * cfree_dwarf_addr_to_line maps a runtime / image PC to the source file, - * line, and column that produced it. Return codes: - * 0 — PC matched a line entry; outputs filled. - * 1 — PC is inside a CU's address range but no row matched (e.g. - * compiler scaffolding). - * 2 — PC is outside every CU's coverage; the caller is in a frame - * that was compiled without `-g` (REPL renders as "no debug info - * for this frame"). - * - * cfree_dwarf_line_to_addr is the inverse. `file` matches a CU's - * line-table filename exactly, or as a path suffix (`util.c` matches - * `/proj/util.c` but not `/proj/run/futile.c`). Return codes: - * 0 — unique match, pc_out filled. - * 1 — file not present in any CU (REPL: "file not covered"). - * 2 — file present, but no row at `line` (REPL: "no line N in file"). - * 3 — ambiguous: more than one distinct PC matches via suffix; - * pc_out is the first match. Use cfree_dwarf_line_to_addr_all to - * enumerate candidates and prompt for disambiguation. - * - * cfree_dwarf_func_at returns the enclosing subprogram's name and - * inclusive PC bounds. Returns 0 on success, 1 if no subprogram contains - * `pc`. */ -typedef struct CfreeDebugInfo CfreeDebugInfo; - -CfreeDebugInfo *cfree_dwarf_open(CfreeCompiler *, const CfreeObjFile *); -void cfree_dwarf_close(CfreeDebugInfo *); - -int cfree_dwarf_addr_to_line(CfreeDebugInfo *, uint64_t pc, - const char **file_out, uint32_t *line_out, - uint32_t *col_out); -int cfree_dwarf_line_to_addr(CfreeDebugInfo *, const char *file, uint32_t line, - uint64_t *pc_out); - -/* Disambiguation enumerator paired with cfree_dwarf_line_to_addr's - * ambiguous return. `out[k]` is filled for the first `cap` distinct - * candidate PCs; `*n_out` is the total candidate count, which may - * exceed `cap`. `file` strings are interned in the CfreeDebugInfo and - * live until cfree_dwarf_close. Returns 0 on success, 1 on invalid args. */ -typedef struct CfreeDwarfLineMatch { - uint64_t pc; - const char *file; -} CfreeDwarfLineMatch; - -int cfree_dwarf_line_to_addr_all(CfreeDebugInfo *, const char *file, - uint32_t line, CfreeDwarfLineMatch *out, - uint32_t cap, uint32_t *n_out); -int cfree_dwarf_func_at(CfreeDebugInfo *, uint64_t pc, const char **name_out, - uint64_t *low_pc_out, uint64_t *high_pc_out); - -/* Richer subprogram description for backtrace rendering. Returns the same - * name and pc range as cfree_dwarf_func_at, plus the source location of the - * subprogram DIE (DW_AT_decl_file/line) and an `inlined` flag (set when - * `pc` resolves to an inlined instance). Returns 0 on success, 1 if no - * subprogram contains `pc`. cfree_dwarf_func_at is kept as a thin - * convenience over this entry. */ -typedef struct CfreeDwarfType CfreeDwarfType; /* opaque */ - -typedef struct CfreeDwarfSubprogram { - const char *name; - uint64_t low_pc; - uint64_t high_pc; - const char *decl_file; - uint32_t decl_line; - const CfreeDwarfType *return_type; - uint8_t inlined; -} CfreeDwarfSubprogram; - -int cfree_dwarf_subprogram_at(CfreeDebugInfo *, uint64_t pc, - CfreeDwarfSubprogram *out); -int cfree_dwarf_subprogram_named(CfreeDebugInfo *, const char *name, - CfreeDwarfSubprogram *out); - -/* CFI-driven unwind step. The caller seeds `frame->pc` (and any callee-saved - * registers known at the leaf) and the consumer walks .eh_frame to compute - * the caller frame in place: pc, cfa, and registers are updated. - * CfreeUnwindFrame is declared at the top of this header. Register indices - * follow the DWARF register numbering for the target arch (which matches - * CfreeArchKind's canonical mapping). Returns 0 on a successful step, 1 at the - * bottom of the stack (no caller), nonzero on decode error. */ -int cfree_dwarf_unwind_step(CfreeDebugInfo *, CfreeUnwindFrame *); - -/* ----- Variable locations ----- - * - * Decode where a named variable lives at PC. Resolution order: the deepest - * lexical scope at `pc` whose `name` matches wins; if no local matches, a - * file-scope global with that name is returned; otherwise 1. - * - * `byte_size` is the variable's storage size in bytes, taken from the - * variable's DIE type. Zero means unknown. - * - * cfree_dwarf_loc_read evaluates the location against `frame` (whose `regs` - * supply register values; the leaf frame's regs come from CfreeStopInfo, - * deeper frames from cfree_dwarf_unwind_step) and reads the underlying - * bytes through the supplied JIT session. Up to `cap` bytes are written - * into `dst`; *read_out reports the number actually read (capped to - * the variable's byte_size). Returns 0 on success, nonzero on bad - * arguments or a read fault. - * - * EXPR locations carry a DWARF expression byte string; libcfree owns the - * stack-machine evaluator. Callers should treat the loc as opaque and - * always go through cfree_dwarf_loc_read. */ -/* ----- Type descriptions ----- - * - * Type DIEs are exposed as opaque CfreeDwarfType handles owned by the - * CfreeDebugInfo (interned for the lifetime of the consumer). Callers - * inspect a type with cfree_dwarf_type_info, which returns a kind tag plus - * shape data (size, name, element count, inner type). For aggregates - * (struct/union) and enums, dedicated iterators yield fields and enum - * values respectively. */ -typedef enum CfreeDwarfTypeKind { - CFREE_DT_VOID, - CFREE_DT_SINT, /* base: signed integer */ - CFREE_DT_UINT, /* base: unsigned integer */ - CFREE_DT_BOOL, - CFREE_DT_FLOAT, - CFREE_DT_CHAR, /* signed_char / unsigned_char distinguished by SINT/UINT */ - CFREE_DT_PTR, /* points to inner */ - CFREE_DT_ARRAY, /* element type + length */ - CFREE_DT_STRUCT, - CFREE_DT_UNION, - CFREE_DT_ENUM, /* base type + named values */ - CFREE_DT_FUNC, /* function type, for function-pointer pretty-print */ - CFREE_DT_TYPEDEF, /* alias name + underlying */ -} CfreeDwarfTypeKind; - -typedef struct CfreeDwarfTypeInfo { - CfreeDwarfTypeKind kind; - uint32_t byte_size; /* 0 = unknown / void */ - const char *name; /* tag/typedef name; "" if anon */ - /* For ARRAY: element_count == 0 means flexible/unknown. */ - uint32_t element_count; - /* For PTR/ARRAY/TYPEDEF: the inner type (NULL otherwise). */ - const CfreeDwarfType *inner; -} CfreeDwarfTypeInfo; - -CfreeDwarfTypeInfo cfree_dwarf_type_info(const CfreeDwarfType *); - -/* Struct/union field iterator. Yields each direct field; nested aggregates - * are reached by recursing on field.type. */ -typedef struct CfreeDwarfFieldIter CfreeDwarfFieldIter; -typedef struct CfreeDwarfField { - const char *name; /* "" for anonymous */ - uint32_t byte_offset; - uint32_t bit_offset; /* for bitfields; 0 otherwise */ - uint32_t bit_size; /* for bitfields; 0 otherwise */ - const CfreeDwarfType *type; -} CfreeDwarfField; - -CfreeDwarfFieldIter *cfree_dwarf_field_iter_new(CfreeDebugInfo *, - const CfreeDwarfType *); -int cfree_dwarf_field_iter_next(CfreeDwarfFieldIter *, CfreeDwarfField *out); -void cfree_dwarf_field_iter_free(CfreeDwarfFieldIter *); - -/* Enum value iterator. */ -typedef struct CfreeDwarfEnumIter CfreeDwarfEnumIter; -typedef struct CfreeDwarfEnumVal { - const char *name; - int64_t value; -} CfreeDwarfEnumVal; - -CfreeDwarfEnumIter *cfree_dwarf_enum_iter_new(CfreeDebugInfo *, - const CfreeDwarfType *); -int cfree_dwarf_enum_iter_next(CfreeDwarfEnumIter *, CfreeDwarfEnumVal *out); -void cfree_dwarf_enum_iter_free(CfreeDwarfEnumIter *); - -typedef enum CfreeDwarfLocKind { - CFREE_DLOC_REG, /* value lives in a register */ - CFREE_DLOC_FRAME_OFS, /* [cfa + frame_ofs] */ - CFREE_DLOC_GLOBAL, /* absolute address */ - CFREE_DLOC_EXPR, /* DWARF expression bytes */ -} CfreeDwarfLocKind; - -typedef struct CfreeDwarfVarLoc { - CfreeDwarfLocKind kind; - uint32_t byte_size; /* 0 = unknown */ - /* DIE type of the variable. NULL when type information was not - * recovered (e.g. stripped binary, hand-written symbol). When NULL, - * callers should fall back to byte_size and treat the bytes opaquely. */ - const CfreeDwarfType *type; - union { - uint32_t reg; - int32_t frame_ofs; - uint64_t global; - struct { - const uint8_t *bytes; - size_t len; - } expr; - } v; -} CfreeDwarfVarLoc; - -/* Look up a variable visible at `pc` by name. Return codes: - * 0 — found; *out filled. - * 1 — `pc` is inside a known subprogram but no variable named `name` - * resolves there (typo / out-of-scope). - * 2 — `pc` is not covered by any subprogram (no debug info for this - * frame); globals were still consulted before returning. */ -int cfree_dwarf_var_at(CfreeDebugInfo *, uint64_t pc, const char *name, - CfreeDwarfVarLoc *out); -int cfree_dwarf_loc_read(CfreeDebugInfo *, const CfreeDwarfVarLoc *, - const CfreeUnwindFrame *, - CfreeJitSession *, /* memory provider */ - void *dst, size_t cap, size_t *read_out); - -/* ----- Locals, arguments, and parameters ----- - * - * cfree_dwarf_var_at resolves a single named variable. To enumerate every - * variable visible at a PC (for `info locals` / `info args`), use the - * vars-at iterator: it yields locals from the deepest scope outward and - * then file-scope globals, with a role mask filter. - * - * cfree_dwarf_var_at is kept as a convenience over the iterator: it - * performs deepest-scope-first matching by name and returns the first - * hit. Both are valid entry points. */ -typedef enum CfreeDwarfVarRole { - CFREE_DVR_LOCAL, - CFREE_DVR_ARG, - CFREE_DVR_GLOBAL, -} CfreeDwarfVarRole; - -typedef struct CfreeDwarfVar { - const char *name; - CfreeDwarfVarRole role; - CfreeDwarfVarLoc loc; -} CfreeDwarfVar; - -typedef struct CfreeDwarfVarIter CfreeDwarfVarIter; - -CfreeDwarfVarIter *cfree_dwarf_vars_at_new(CfreeDebugInfo *, uint64_t pc, - uint32_t role_mask); -int cfree_dwarf_vars_at_next(CfreeDwarfVarIter *, CfreeDwarfVar *out); -void cfree_dwarf_vars_at_free(CfreeDwarfVarIter *); - -/* Iterate the formal parameters of the subprogram covering `pc`, in - * declaration order. Drives gdb-style backtrace argument rendering. - * Returns NULL if `pc` is not inside any subprogram. */ -typedef struct CfreeDwarfParamIter CfreeDwarfParamIter; - -CfreeDwarfParamIter *cfree_dwarf_param_iter_new(CfreeDebugInfo *, uint64_t pc); -CfreeDwarfParamIter *cfree_dwarf_param_iter_new_named(CfreeDebugInfo *, - const char *name); -int cfree_dwarf_param_iter_next(CfreeDwarfParamIter *, CfreeDwarfVar *out); -void cfree_dwarf_param_iter_free(CfreeDwarfParamIter *); - -/* ============================================================ - * Disassembler - * ============================================================ - * Two layers: a high-level convenience that walks a relocatable object's - * text sections and writes an objdump-style listing, and a low-level - * iterator that decodes instructions from a byte buffer with vaddr context. - * - * Operands are pre-rendered to text on CfreeInsn. Structured operands (per- - * arch REG/IMM/MEM/SYM_REL enums) are the principled answer but multiply - * surface per arch (x86 ModR/M, AArch64 vector lanes, RISC-V CSR names) - * without v1 consumers; adding a structured form later is non-breaking - * because the text fields remain accurate. - * - * Strings on CfreeInsn (mnemonic/operands/annotation) and `bytes` are owned - * by the iterator and valid only until the next cfree_disasm_iter_next call - * or cfree_disasm_iter_free, whichever comes first. */ - -typedef struct CfreeInsn { - uint64_t vaddr; - const uint8_t *bytes; - uint32_t nbytes; - const char *mnemonic; - const char *operands; /* pre-rendered; may be "" */ - const char *annotation; /* sym/reloc note; may be "" */ -} CfreeInsn; - -/* Walk a relocatable object's text sections and write an objdump-style - * listing to `out`. Convenience over the iterator. The Writer is not - * closed. Returns 0 on success, nonzero on failure. */ -int cfree_obj_disasm(CfreeCompiler *, const CfreeBytesInput *, - CfreeWriter *out); - -/* Iterate instructions in a byte buffer at virtual address `vaddr`. If - * `obj` is non-NULL, the decoder consults its symbol and relocation tables - * to fill CfreeInsn.annotation; pass NULL for raw decoding. The bytes - * buffer must remain alive until cfree_disasm_iter_free. - * - * cfree_disasm_iter_next returns 1 and fills `*out` for each decoded - * instruction, 0 when the buffer is exhausted. On an undecodable byte the - * iterator advances by the arch's minimum unit and emits a placeholder - * mnemonic so the listing stays in sync. */ -typedef struct CfreeDisasmIter CfreeDisasmIter; - -CfreeDisasmIter *cfree_disasm_iter_new(CfreeCompiler *, const uint8_t *bytes, - size_t len, uint64_t vaddr, - CfreeObjBuilder *obj /* may be NULL */); -int cfree_disasm_iter_next(CfreeDisasmIter *, CfreeInsn *out); -void cfree_disasm_iter_free(CfreeDisasmIter *); - -/* ============================================================ - * Archive (ar) file - * ============================================================ - * Pure format I/O — no compilation context required. - * - * cfree_ar_write packs member byte payloads into a POSIX ar archive written - * to `out`. Options control reproducibility and format extensions: - * - `epoch` Unix seconds written to ar_date for every member; 0 - * leaves the field as the literal "0" (the default). - * - `symbol_index` if nonzero, emit a System V `/` symbol-index member - * as the first member. The index payload is a 4-byte - * big-endian symbol count, then count 4-byte big-endian - * offsets pointing at member headers (relative to start - * of archive), then NUL-terminated symbol names. Symbol - * names are taken from `member_symbols`; an empty index - * (count==0) is emitted when no symbols are supplied. - * - `long_names` if nonzero, emit a `//` long-name table when any - * member name exceeds 15 characters or contains '/'. - * With long_names == 0, over-long names are truncated. - * - `member_symbols` optional; parallel to the `members` array. Entry i - * lists the global symbols defined by member i. NULL - * (or per-entry count==0) means that member contributes - * no symbols. Names point into caller-owned storage and - * need only outlive the cfree_ar_write call. Ignored - * when symbol_index == 0. - * `opts` may be NULL to accept all defaults. - * The Writer is not closed; I/O errors are detectable via out->error(). - * Returns 0 on success, 1 on bad arguments. - * - * cfree_ar_list writes one member name per line to `out` for each non-special - * member in the archive. Returns 0 on success, 1 on bad arguments or - * malformed archive. - * - * CfreeArIter is a stack-allocated cursor for iterating archive members. - * cfree_ar_iter_init validates the archive magic and positions the cursor - * at the first member; returns 1 on success, 0 on bad magic or NULL input. - * cfree_ar_iter_next advances to the next non-special member and fills *out; - * returns 1 if a member was returned, 0 at end or on malformed data. - * Member data pointers alias the original archive bytes and are valid as - * long as the archive bytes remain alive. CfreeArMember.name is interned - * in iterator-owned storage and is valid only until the next iter_next - * call on the same iterator. */ -typedef struct CfreeArMemberSymbols { - const char *const *names; /* count entries; each NUL-terminated */ - uint32_t count; -} CfreeArMemberSymbols; - -typedef struct CfreeArWriteOptions { - uint64_t epoch; /* ar_date for every member; 0 = none */ - int symbol_index; /* emit System V '/' symbol-index member */ - int long_names; /* emit '//' long-name table when needed */ - /* Parallel to the `members` array; NULL means "no symbols anywhere". - * Only consulted when symbol_index is nonzero. */ - const CfreeArMemberSymbols *member_symbols; -} CfreeArWriteOptions; - -int cfree_ar_write(CfreeWriter *out, const CfreeBytesInput *members, - uint32_t nmembers, const CfreeArWriteOptions *opts); -int cfree_ar_list(const CfreeBytesInput *archive, CfreeWriter *out); - -typedef struct CfreeArIter { - const uint8_t *_p; - const uint8_t *_end; - const uint8_t *_longnames; /* `//` table bytes, NULL until seen */ - size_t _longnames_len; - char _namebuf[256]; /* iterator-owned scratch for member name */ -} CfreeArIter; - -typedef struct CfreeArMember { - const char *name; /* iterator-owned; valid until next iter_next */ - const uint8_t *data; /* points into archive bytes */ - size_t size; -} CfreeArMember; - -int cfree_ar_iter_init(CfreeArIter *, const CfreeBytesInput *archive); -int cfree_ar_iter_next(CfreeArIter *, CfreeArMember *out); - -#endif diff --git a/include/cfree/arch.h b/include/cfree/arch.h @@ -0,0 +1,29 @@ +#ifndef CFREE_ARCH_H +#define CFREE_ARCH_H + +#include <cfree/core.h> + +/* + * Architecture helpers shared by debuggers, unwinders, disassemblers, and + * hosts that render target register state. + */ + +typedef struct CfreeUnwindFrame { + uint64_t pc; + uint64_t cfa; + uint64_t regs[32]; /* DWARF register-numbered, zero for absent registers */ +} CfreeUnwindFrame; + +typedef struct CfreeArchReg { + uint32_t dwarf_idx; + const char *name; +} CfreeArchReg; + +const char *cfree_arch_register_name(CfreeArchKind, uint32_t dwarf_idx); +CfreeStatus cfree_arch_register_index(CfreeArchKind, const char *name, + uint32_t *idx_out); +uint32_t cfree_arch_register_count(CfreeArchKind); +CfreeStatus cfree_arch_register_at(CfreeArchKind, uint32_t idx, + CfreeArchReg *out); + +#endif diff --git a/include/cfree/archive.h b/include/cfree/archive.h @@ -0,0 +1,43 @@ +#ifndef CFREE_ARCHIVE_H +#define CFREE_ARCHIVE_H + +#include <cfree/core.h> + +/* + * POSIX ar archive reader/writer. + * + * Pure byte-format I/O: no compiler context is required. Member payloads and + * archive bytes are borrowed for the duration of each call/iterator. + */ + +typedef struct CfreeArMemberSymbols { + const char *const *names; + uint32_t count; +} CfreeArMemberSymbols; + +typedef struct CfreeArWriteOptions { + uint64_t epoch; + int symbol_index; + int long_names; + const CfreeArMemberSymbols *member_symbols; +} CfreeArWriteOptions; + +CfreeStatus cfree_ar_write(CfreeWriter *out, const CfreeBytes *members, + uint32_t nmembers, + const CfreeArWriteOptions *opts); +CfreeStatus cfree_ar_list(const CfreeBytes *archive, CfreeWriter *out); + +typedef struct CfreeArIter CfreeArIter; + +typedef struct CfreeArMember { + const char *name; + const uint8_t *data; + size_t size; +} CfreeArMember; + +CfreeStatus cfree_ar_iter_new(const CfreeContext *, const CfreeBytes *archive, + CfreeArIter **out); +CfreeIterResult cfree_ar_iter_next(CfreeArIter *, CfreeArMember *out); +void cfree_ar_iter_free(CfreeArIter *); + +#endif diff --git a/include/cfree/cg.h b/include/cfree/cg.h @@ -1,7 +1,8 @@ #ifndef CFREE_PUBLIC_CG_H #define CFREE_PUBLIC_CG_H -#include <cfree.h> +#include <cfree/core.h> +#include <cfree/objbuild.h> /* ============================================================ * Handles @@ -185,9 +186,10 @@ CfreeCgCallConv cfree_cg_type_func_call_conv(CfreeCompiler*, CfreeCgTypeId); int cfree_cg_type_func_is_variadic(CfreeCompiler*, CfreeCgTypeId); uint32_t cfree_cg_type_record_nfields(CfreeCompiler*, CfreeCgTypeId); -/* Returns 0 on success and fills any non-NULL out parameters. */ -int cfree_cg_type_record_field(CfreeCompiler*, CfreeCgTypeId, uint32_t index, - CfreeCgField* out, uint64_t* offset_out); +/* Returns CFREE_OK and fills any non-NULL out parameters on success. */ +CfreeStatus cfree_cg_type_record_field(CfreeCompiler*, CfreeCgTypeId, + uint32_t index, CfreeCgField* out, + uint64_t* offset_out); typedef enum CfreeCgSymbolFeature { CFREE_CG_SYMFEAT_WEAK, @@ -369,8 +371,8 @@ CfreeSym cfree_cg_c_linkage_name(CfreeCompiler*, CfreeSym source_name); * Lifecycle and Source Locations * ============================================================ */ -CfreeCg* cfree_cg_new(CfreeCompiler*, CfreeObjBuilder* out, - const CfreeCompileOptions*); +CfreeStatus cfree_cg_new(CfreeCompiler*, CfreeObjBuilder* out, + const CfreeCodeOptions*, CfreeCg** cg_out); void cfree_cg_free(CfreeCg*); /* Sticky source location. Function, scope, local, param, instruction, and diff --git a/include/cfree/compile.h b/include/cfree/compile.h @@ -0,0 +1,111 @@ +#ifndef CFREE_COMPILE_H +#define CFREE_COMPILE_H + +#include <cfree/core.h> +#include <cfree/objbuild.h> + +/* + * Source compiler embedding API. + * + * This layer compiles one source translation unit into a relocatable object + * builder or directly emits encoded object bytes. It is the right API for a + * cc/as-like driver. Language frontends that want to emit code directly + * should use cfree/cg.h instead. + */ + +typedef enum CfreeLanguage { + CFREE_LANG_C = 0, + CFREE_LANG_ASM = 1, + CFREE_LANG_TOY = 2, + CFREE_LANG_WASM = 3, + CFREE_LANG_COUNT = 4, +} CfreeLanguage; + +typedef struct CfreeSourceInput { + CfreeBytes bytes; + CfreeLanguage lang; +} CfreeSourceInput; + +typedef struct CfreeDefine { + const char *name; + const char *body; /* NULL means "1" */ +} CfreeDefine; + +typedef struct CfreePreprocessOptions { + const char *const *include_dirs; + uint32_t ninclude_dirs; + const char *const *system_include_dirs; + uint32_t nsystem_include_dirs; + const CfreeDefine *defines; + uint32_t ndefines; + const char *const *undefines; + uint32_t nundefines; +} CfreePreprocessOptions; + +typedef struct CfreeDiagnosticOptions { + int warnings_are_errors; + uint32_t max_errors; /* 0 means unlimited */ +} CfreeDiagnosticOptions; + +typedef struct CfreeCCompileOptions { + CfreeCodeOptions code; + CfreePreprocessOptions preprocess; + CfreeDiagnosticOptions diagnostics; +} CfreeCCompileOptions; + +typedef struct CfreeAsmCompileOptions { + CfreeCodeOptions code; + CfreeDiagnosticOptions diagnostics; +} CfreeAsmCompileOptions; + +typedef struct CfreeFrontendCompileOptions { + CfreeCodeOptions code; + CfreeDiagnosticOptions diagnostics; + const void *language_options; +} CfreeFrontendCompileOptions; + +typedef CfreeStatus (*CfreeCompileFn)(CfreeCompiler *, + const CfreeFrontendCompileOptions *, + const CfreeSourceInput *, + CfreeObjBuilder *out); + +CfreeLanguage cfree_language_for_path(const char *path); +CfreeStatus cfree_register_frontend(CfreeCompiler *, CfreeLanguage, + CfreeCompileFn); + +CfreeStatus cfree_compile_c_obj(CfreeCompiler *, const CfreeCCompileOptions *, + const CfreeBytes *, CfreeObjBuilder **out); +CfreeStatus cfree_compile_c_obj_emit(CfreeCompiler *, + const CfreeCCompileOptions *, + const CfreeBytes *, CfreeWriter *out); +CfreeStatus cfree_compile_asm_obj(CfreeCompiler *, + const CfreeAsmCompileOptions *, + const CfreeBytes *, CfreeObjBuilder **out); +CfreeStatus cfree_compile_asm_obj_emit(CfreeCompiler *, + const CfreeAsmCompileOptions *, + const CfreeBytes *, CfreeWriter *out); +CfreeStatus cfree_compile_source_obj(CfreeCompiler *, + const CfreeFrontendCompileOptions *, + const CfreeSourceInput *, + CfreeObjBuilder **out); +CfreeStatus cfree_compile_source_obj_emit(CfreeCompiler *, + const CfreeFrontendCompileOptions *, + const CfreeSourceInput *, + CfreeWriter *out); + +typedef struct CfreeDepIter CfreeDepIter; + +typedef struct CfreeDepEdge { + const char *includer_name; + const char *included_name; + CfreeSrcLoc include_loc; + uint8_t from_system_path; + uint8_t bracketed; + uint8_t pad[2]; +} CfreeDepEdge; + +CfreeStatus cfree_dep_iter_new(CfreeCompiler *, CfreeDepIter **out); +CfreeIterResult cfree_dep_iter_next(CfreeDepIter *, CfreeDepEdge *out); +void cfree_dep_iter_free(CfreeDepIter *); + +#endif diff --git a/include/cfree/core.h b/include/cfree/core.h @@ -0,0 +1,242 @@ +#ifndef CFREE_CORE_H +#define CFREE_CORE_H + +/* + * Core libcfree API. + * + * This header is the small substrate shared by every public libcfree + * component: target descriptions, host services, diagnostics, compiler + * lifetime, interned symbols, and byte/writer helpers. It intentionally + * contains no compile, link, codegen, object, JIT, DWARF, or archive entry + * points. Include the narrower component header for those. + */ + +#include <stdarg.h> +#include <stddef.h> +#include <stdint.h> + +/* Opaque handles shared across component headers. */ +typedef struct CfreeCompiler CfreeCompiler; +typedef struct CfreeObjBuilder CfreeObjBuilder; +typedef struct CfreeObjFile CfreeObjFile; +typedef struct CfreeJit CfreeJit; +typedef struct CfreeJitSession CfreeJitSession; +typedef struct CfreeDebugInfo CfreeDebugInfo; +typedef struct CfreeEmu CfreeEmu; + +typedef uint32_t CfreeSym; + +typedef enum CfreeStatus { + CFREE_OK = 0, + CFREE_ERR = 1, + CFREE_NOMEM = 2, + CFREE_INVALID = 3, + CFREE_UNSUPPORTED = 4, + CFREE_MALFORMED = 5, + CFREE_IO = 6, + CFREE_NOT_FOUND = 7, + CFREE_AMBIGUOUS = 8, +} CfreeStatus; + +typedef enum CfreeIterResult { + CFREE_ITER_ERROR = -1, + CFREE_ITER_END = 0, + CFREE_ITER_ITEM = 1, +} CfreeIterResult; + +typedef struct CfreeSrcLoc { + uint32_t file_id; + uint32_t line; + uint32_t col; +} CfreeSrcLoc; + +typedef struct CfreeBytes { + const char *name; /* diagnostic label; may be NULL */ + const uint8_t *data; + size_t len; +} CfreeBytes; + +typedef enum CfreeArchKind { + CFREE_ARCH_X86_32, + CFREE_ARCH_X86_64, + CFREE_ARCH_ARM_32, + CFREE_ARCH_ARM_64, + CFREE_ARCH_RV32, + CFREE_ARCH_RV64, + CFREE_ARCH_WASM, +} CfreeArchKind; + +typedef enum CfreeOSKind { + CFREE_OS_FREESTANDING, + CFREE_OS_LINUX, + CFREE_OS_MACOS, + CFREE_OS_WINDOWS, + CFREE_OS_FREEBSD, + CFREE_OS_WASI, +} CfreeOSKind; + +typedef enum CfreeObjFmt { + CFREE_OBJ_ELF, + CFREE_OBJ_COFF, + CFREE_OBJ_MACHO, + CFREE_OBJ_WASM, +} CfreeObjFmt; + +typedef enum CfreePic { + CFREE_PIC_NONE, + CFREE_PIC_PIC, + CFREE_PIC_PIE, +} CfreePic; + +typedef enum CfreeCodeModel { + CFREE_CM_DEFAULT, + CFREE_CM_SMALL, + CFREE_CM_MEDIUM, + CFREE_CM_LARGE, +} CfreeCodeModel; + +typedef struct CfreeTarget { + CfreeArchKind arch; + CfreeOSKind os; + CfreeObjFmt obj; + uint8_t ptr_size; + uint8_t ptr_align; + uint8_t big_endian; + uint8_t pic; /* CfreePic */ + uint8_t code_model; /* CfreeCodeModel */ +} CfreeTarget; + +typedef enum CfreeSymBind { + CFREE_SB_LOCAL, + CFREE_SB_GLOBAL, + CFREE_SB_WEAK, +} CfreeSymBind; + +typedef enum CfreeSymKind { + CFREE_SK_UNDEF, + CFREE_SK_FUNC, + CFREE_SK_OBJ, + CFREE_SK_SECTION, + CFREE_SK_FILE, + CFREE_SK_COMMON, + CFREE_SK_TLS, + CFREE_SK_ABS, + CFREE_SK_NOTYPE, + CFREE_SK_IFUNC, +} CfreeSymKind; + +typedef struct CfreePathPrefixMap { + const char *old_prefix; + const char *new_prefix; +} CfreePathPrefixMap; + +typedef struct CfreeCodeOptions { + int opt_level; /* 0 direct, 1 minimal, 2 full */ + int debug_info; /* nonzero emits source/debug records when supported */ + uint64_t epoch; /* reproducible timestamp seed; 0 means no timestamp */ + const CfreePathPrefixMap *path_map; + uint32_t npath_map; +} CfreeCodeOptions; + +typedef struct CfreeHeap CfreeHeap; +struct CfreeHeap { + void *(*alloc)(CfreeHeap *, size_t size, size_t align); + void *(*realloc)(CfreeHeap *, void *p, size_t old_size, size_t new_size, + size_t align); + void (*free)(CfreeHeap *, void *p, size_t size); + void *user; +}; + +typedef enum CfreeDiagKind { + CFREE_DIAG_NOTE, + CFREE_DIAG_WARN, + CFREE_DIAG_ERROR, + CFREE_DIAG_FATAL, +} CfreeDiagKind; + +typedef struct CfreeDiagSink CfreeDiagSink; +struct CfreeDiagSink { + void (*emit)(CfreeDiagSink *, CfreeDiagKind, CfreeSrcLoc, const char *fmt, + va_list); + void *user; + uint32_t errors; /* maintained by libcfree; hosts may read */ + uint32_t warnings; /* maintained by libcfree; hosts may read */ +}; + +typedef struct CfreeWriter CfreeWriter; +struct CfreeWriter { + CfreeStatus (*write)(CfreeWriter *, const void *data, size_t n); + CfreeStatus (*seek)(CfreeWriter *, uint64_t offset); + uint64_t (*tell)(CfreeWriter *); + CfreeStatus (*status)(CfreeWriter *); + void (*close)(CfreeWriter *); +}; + +static inline CfreeStatus cfree_writer_write(CfreeWriter *w, const void *d, + size_t n) { + return w->write(w, d, n); +} + +static inline CfreeStatus cfree_writer_seek(CfreeWriter *w, uint64_t off) { + return w->seek(w, off); +} + +static inline uint64_t cfree_writer_tell(CfreeWriter *w) { return w->tell(w); } +static inline CfreeStatus cfree_writer_status(CfreeWriter *w) { + return w->status(w); +} +static inline void cfree_writer_close(CfreeWriter *w) { w->close(w); } + +typedef struct CfreeFileData { + const uint8_t *data; + size_t size; + void *token; +} CfreeFileData; + +typedef struct CfreeFileIO { + CfreeStatus (*read_all)(void *user, const char *path, CfreeFileData *out); + void (*release)(void *user, CfreeFileData *); + CfreeStatus (*open_writer)(void *user, const char *path, CfreeWriter **out); + void *user; +} CfreeFileIO; + +typedef struct CfreeMetrics { + void (*scope_begin)(void *user, const char *name); + void (*scope_end)(void *user, const char *name); + void (*count)(void *user, const char *name, uint64_t value); + void *user; +} CfreeMetrics; + +typedef struct CfreeContext { + CfreeHeap *heap; + /* Optional. Source compilation uses this for include resolution and output + * helpers; pure byte-oriented modules such as object, archive, DWARF, and + * disassembly ignore it. */ + const CfreeFileIO *file_io; + CfreeDiagSink *diag; + const CfreeMetrics *metrics; + /* Unix seconds, or negative when the host provides no clock. */ + int64_t now; +} CfreeContext; + +CfreeStatus cfree_compiler_new(CfreeTarget, const CfreeContext *, + CfreeCompiler **out); +void cfree_compiler_free(CfreeCompiler *); +CfreeTarget cfree_compiler_target(CfreeCompiler *); + +CfreeHeap *cfree_compiler_heap(CfreeCompiler *); +const CfreeFileIO *cfree_compiler_file_io(CfreeCompiler *); +CfreeDiagSink *cfree_compiler_diag_sink(CfreeCompiler *); +int64_t cfree_compiler_now(CfreeCompiler *); +CfreeContext cfree_compiler_context(CfreeCompiler *); + +const char *cfree_compiler_file_name(CfreeCompiler *, uint32_t file_id); + +CfreeSym cfree_sym_intern(CfreeCompiler *, const char *str); +CfreeSym cfree_sym_intern_len(CfreeCompiler *, const char *str, size_t len); +const char *cfree_sym_str(CfreeCompiler *, CfreeSym, size_t *len_out); + +CfreeStatus cfree_writer_mem(CfreeHeap *, CfreeWriter **out); +const uint8_t *cfree_writer_mem_bytes(CfreeWriter *, size_t *len_out); + +#endif diff --git a/include/cfree/dbg.h b/include/cfree/dbg.h @@ -0,0 +1,120 @@ +#ifndef CFREE_DBG_H +#define CFREE_DBG_H + +#include <cfree/arch.h> +#include <cfree/jit.h> + +/* + * Controlled in-process JIT execution. + * + * A session owns the stop/resume state for one JIT image. Hosts provide the + * OS hooks through CfreeDbgHost when they need breakpointing, stepping, or + * signal-aware memory/register access. + */ + +typedef struct CfreeDbgSignalOps { + CfreeStatus (*on_fault)(void *session, int signo, CfreeUnwindFrame *regs); +} CfreeDbgSignalOps; + +typedef struct CfreeDbgOs { + CfreeStatus (*thread_start)(void *user, void (*fn)(void *), void *arg, + void **thread_out); + void (*thread_join)(void *user, void *thread); + CfreeStatus (*thread_interrupt)(void *user, void *thread); + + CfreeStatus (*event_new)(void *user, void **event_out); + void (*event_free)(void *user, void *ev); + CfreeStatus (*event_wait)(void *user, void *ev); + CfreeStatus (*event_signal)(void *user, void *ev); + CfreeStatus (*event_reset)(void *user, void *ev); + + CfreeStatus (*signals_install)(void *user, const CfreeDbgSignalOps *ops, + void *session); + void (*signals_uninstall)(void *user); + int interrupt_signo; + + CfreeStatus (*code_write_begin)(void *user, void *runtime_addr, size_t n, + void **write_out); + void (*code_write_end)(void *user, void *runtime_addr, size_t n); + void (*flush_icache)(void *user, void *runtime_addr, size_t n); + + CfreeStatus (*guarded_copy)(void *user, void *dst, const void *src, + size_t n); + void *user; +} CfreeDbgOs; + +typedef struct CfreeDbgHost { + const CfreeDbgOs *os; +} CfreeDbgHost; + +typedef enum CfreeStopKind { + CFREE_STOP_BREAKPOINT, + CFREE_STOP_SIGNAL, + CFREE_STOP_EXIT, + CFREE_STOP_INTERRUPT, +} CfreeStopKind; + +typedef struct CfreeStopInfo { + CfreeStopKind kind; + int signal; + int exit_code; + uint32_t bp_id; + CfreeUnwindFrame regs; +} CfreeStopInfo; + +typedef enum CfreeResumeMode { + CFREE_RESUME_CONTINUE, + CFREE_RESUME_STEP_INSN, + CFREE_RESUME_STEP_LINE, + CFREE_RESUME_NEXT_LINE, + CFREE_RESUME_STEP_OUT, +} CfreeResumeMode; + +typedef enum CfreeEntryKind { + CFREE_ENTRY_INT_ARGV, + CFREE_ENTRY_U64, +} CfreeEntryKind; + +typedef struct CfreeBreakpointSpec { + uint64_t addr; + uint64_t skip_count; + uint64_t max_hits; + int (*condition)(void *user, const CfreeUnwindFrame *regs); + void *condition_user; +} CfreeBreakpointSpec; + +CfreeStatus cfree_jit_session_new(CfreeJit *, const CfreeDbgHost *, + CfreeJitSession **out); +void cfree_jit_session_free(CfreeJitSession *); +CfreeStatus cfree_jit_session_attach_dwarf(CfreeJitSession *, + CfreeDebugInfo *); + +CfreeStatus cfree_jit_session_call(CfreeJitSession *, void *entry, + CfreeEntryKind, int argc, char **argv, + CfreeStopInfo *stop_out); +CfreeStatus cfree_jit_session_call_u64(CfreeJitSession *, void *entry, + const uint64_t *args, uint32_t nargs, + uint64_t *ret_out, + CfreeStopInfo *stop_out); +CfreeStatus cfree_jit_session_resume(CfreeJitSession *, CfreeResumeMode, + CfreeStopInfo *stop_out); +CfreeStatus cfree_jit_session_interrupt(CfreeJitSession *); + +CfreeStatus cfree_jit_session_read_mem(CfreeJitSession *, uint64_t addr, + void *dst, size_t n); +CfreeStatus cfree_jit_session_write_mem(CfreeJitSession *, uint64_t addr, + const void *src, size_t n); +CfreeStatus cfree_jit_session_get_regs(CfreeJitSession *, + CfreeUnwindFrame *out); +CfreeStatus cfree_jit_session_set_regs(CfreeJitSession *, + const CfreeUnwindFrame *); + +CfreeStatus cfree_jit_session_breakpoint_set(CfreeJitSession *, uint64_t addr, + uint32_t *bp_id_out); +CfreeStatus cfree_jit_session_breakpoint_clear(CfreeJitSession *, + uint32_t bp_id); +CfreeStatus cfree_jit_session_breakpoint_set_spec(CfreeJitSession *, + const CfreeBreakpointSpec *, + uint32_t *bp_id_out); + +#endif diff --git a/include/cfree/disasm.h b/include/cfree/disasm.h @@ -0,0 +1,42 @@ +#ifndef CFREE_DISASM_H +#define CFREE_DISASM_H + +#include <cfree/object.h> + +/* + * Disassembler API. + * + * The low-level iterator decodes a byte range. Passing an object file lets the + * iterator annotate instructions with known symbols/relocations. + */ + +typedef struct CfreeInsn { + uint64_t vaddr; + const uint8_t *bytes; + uint32_t nbytes; + const char *mnemonic; + const char *operands; + const char *annotation; +} CfreeInsn; + +typedef struct CfreeDisasmIter CfreeDisasmIter; + +typedef struct CfreeDisasmContext { + CfreeTarget target; + CfreeContext context; +} CfreeDisasmContext; + +CfreeStatus cfree_disasm_iter_new(const CfreeDisasmContext *, + const uint8_t *bytes, size_t len, + uint64_t vaddr, + const CfreeObjFile *annotations, + CfreeDisasmIter **out); +CfreeIterResult cfree_disasm_iter_next(CfreeDisasmIter *, CfreeInsn *out); +void cfree_disasm_iter_free(CfreeDisasmIter *); + +CfreeStatus cfree_disasm_obj(const CfreeContext *, const CfreeObjFile *, + CfreeWriter *out); +CfreeStatus cfree_disasm_obj_bytes(const CfreeContext *, const CfreeBytes *, + CfreeWriter *out); + +#endif diff --git a/include/cfree/dwarf.h b/include/cfree/dwarf.h @@ -0,0 +1,182 @@ +#ifndef CFREE_DWARF_H +#define CFREE_DWARF_H + +#include <cfree/arch.h> +#include <cfree/object.h> + +/* + * DWARF consumer API. + * + * Addresses are image-relative unless a caller explicitly translates them + * through the JIT API first. Memory reads for variable materialization are + * provided by the caller so this header does not depend on the JIT session API. + */ + +typedef struct CfreeDwarfType CfreeDwarfType; + +CfreeStatus cfree_dwarf_open(const CfreeContext *, const CfreeObjFile *, + CfreeDebugInfo **out); +void cfree_dwarf_free(CfreeDebugInfo *); + +CfreeStatus cfree_dwarf_addr_to_line(CfreeDebugInfo *, uint64_t pc, + const char **file_out, + uint32_t *line_out, uint32_t *col_out); +CfreeStatus cfree_dwarf_line_to_addr(CfreeDebugInfo *, const char *file, + uint32_t line, uint64_t *pc_out); + +typedef struct CfreeDwarfLineMatch { + uint64_t pc; + const char *file; +} CfreeDwarfLineMatch; + +CfreeStatus cfree_dwarf_line_to_addr_all(CfreeDebugInfo *, const char *file, + uint32_t line, + CfreeDwarfLineMatch *out, + uint32_t cap, uint32_t *n_out); +CfreeStatus cfree_dwarf_func_at(CfreeDebugInfo *, uint64_t pc, + const char **name_out, uint64_t *low_pc_out, + uint64_t *high_pc_out); + +typedef struct CfreeDwarfSubprogram { + const char *name; + uint64_t low_pc; + uint64_t high_pc; + const char *decl_file; + uint32_t decl_line; + const CfreeDwarfType *return_type; + uint8_t inlined; +} CfreeDwarfSubprogram; + +CfreeStatus cfree_dwarf_subprogram_at(CfreeDebugInfo *, uint64_t pc, + CfreeDwarfSubprogram *out); +CfreeStatus cfree_dwarf_subprogram_named(CfreeDebugInfo *, const char *name, + CfreeDwarfSubprogram *out); +CfreeStatus cfree_dwarf_unwind_step(CfreeDebugInfo *, CfreeUnwindFrame *); + +typedef enum CfreeDwarfTypeKind { + CFREE_DT_VOID, + CFREE_DT_SINT, + CFREE_DT_UINT, + CFREE_DT_BOOL, + CFREE_DT_FLOAT, + CFREE_DT_CHAR, + CFREE_DT_PTR, + CFREE_DT_ARRAY, + CFREE_DT_STRUCT, + CFREE_DT_UNION, + CFREE_DT_ENUM, + CFREE_DT_FUNC, + CFREE_DT_TYPEDEF, +} CfreeDwarfTypeKind; + +typedef struct CfreeDwarfTypeInfo { + CfreeDwarfTypeKind kind; + uint32_t byte_size; + const char *name; + uint32_t element_count; + const CfreeDwarfType *inner; +} CfreeDwarfTypeInfo; + +CfreeDwarfTypeInfo cfree_dwarf_type_info(const CfreeDwarfType *); + +typedef struct CfreeDwarfFieldIter CfreeDwarfFieldIter; +typedef struct CfreeDwarfField { + const char *name; + uint32_t byte_offset; + uint32_t bit_offset; + uint32_t bit_size; + const CfreeDwarfType *type; +} CfreeDwarfField; + +CfreeStatus cfree_dwarf_field_iter_new(CfreeDebugInfo *, + const CfreeDwarfType *, + CfreeDwarfFieldIter **out); +CfreeIterResult cfree_dwarf_field_iter_next(CfreeDwarfFieldIter *, + CfreeDwarfField *out); +void cfree_dwarf_field_iter_free(CfreeDwarfFieldIter *); + +typedef struct CfreeDwarfEnumIter CfreeDwarfEnumIter; +typedef struct CfreeDwarfEnumVal { + const char *name; + int64_t value; +} CfreeDwarfEnumVal; + +CfreeStatus cfree_dwarf_enum_iter_new(CfreeDebugInfo *, + const CfreeDwarfType *, + CfreeDwarfEnumIter **out); +CfreeIterResult cfree_dwarf_enum_iter_next(CfreeDwarfEnumIter *, + CfreeDwarfEnumVal *out); +void cfree_dwarf_enum_iter_free(CfreeDwarfEnumIter *); + +typedef enum CfreeDwarfLocKind { + CFREE_DLOC_REG, + CFREE_DLOC_FRAME_OFS, + CFREE_DLOC_GLOBAL, + CFREE_DLOC_EXPR, +} CfreeDwarfLocKind; + +typedef struct CfreeDwarfVarLoc { + CfreeDwarfLocKind kind; + uint32_t byte_size; + const CfreeDwarfType *type; + union { + uint32_t reg; + int32_t frame_ofs; + uint64_t global; + struct { + const uint8_t *bytes; + size_t len; + } expr; + } v; +} CfreeDwarfVarLoc; + +typedef CfreeStatus (*CfreeDwarfReadMemFn)(void *user, uint64_t addr, + void *dst, size_t n); + +CfreeStatus cfree_dwarf_var_at(CfreeDebugInfo *, uint64_t pc, + const char *name, CfreeDwarfVarLoc *out); +CfreeStatus cfree_dwarf_loc_read(CfreeDebugInfo *, const CfreeDwarfVarLoc *, + const CfreeUnwindFrame *, + CfreeDwarfReadMemFn read_mem, void *read_user, + void *dst, size_t cap, size_t *read_out); + +typedef enum CfreeDwarfVarRole { + CFREE_DVR_LOCAL, + CFREE_DVR_ARG, + CFREE_DVR_GLOBAL, +} CfreeDwarfVarRole; + +typedef enum CfreeDwarfVarRoleMask { + CFREE_DVRM_LOCAL = 1u << CFREE_DVR_LOCAL, + CFREE_DVRM_ARG = 1u << CFREE_DVR_ARG, + CFREE_DVRM_GLOBAL = 1u << CFREE_DVR_GLOBAL, + CFREE_DVRM_ALL = CFREE_DVRM_LOCAL | CFREE_DVRM_ARG | CFREE_DVRM_GLOBAL, +} CfreeDwarfVarRoleMask; + +typedef struct CfreeDwarfVar { + const char *name; + CfreeDwarfVarRole role; + CfreeDwarfVarLoc loc; +} CfreeDwarfVar; + +typedef struct CfreeDwarfVarIter CfreeDwarfVarIter; + +CfreeStatus cfree_dwarf_vars_at_new(CfreeDebugInfo *, uint64_t pc, + uint32_t role_mask, + CfreeDwarfVarIter **out); +CfreeIterResult cfree_dwarf_vars_at_next(CfreeDwarfVarIter *, + CfreeDwarfVar *out); +void cfree_dwarf_vars_at_free(CfreeDwarfVarIter *); + +typedef struct CfreeDwarfParamIter CfreeDwarfParamIter; + +CfreeStatus cfree_dwarf_param_iter_new(CfreeDebugInfo *, uint64_t pc, + CfreeDwarfParamIter **out); +CfreeStatus cfree_dwarf_param_iter_new_named(CfreeDebugInfo *, + const char *name, + CfreeDwarfParamIter **out); +CfreeIterResult cfree_dwarf_param_iter_next(CfreeDwarfParamIter *, + CfreeDwarfVar *out); +void cfree_dwarf_param_iter_free(CfreeDwarfParamIter *); + +#endif diff --git a/include/cfree/emu.h b/include/cfree/emu.h @@ -0,0 +1,45 @@ +#ifndef CFREE_EMU_H +#define CFREE_EMU_H + +#include <cfree/core.h> + +/* + * User-mode guest ELF emulator. + * + * The emulator translates guest basic blocks through the cfree backend and + * executes them in-process. It is intentionally separate from the JIT API: + * embedders that only run native JIT code do not need this surface. + */ + +typedef enum CfreeEmuArch { + CFREE_EMU_ARCH_AARCH64, + CFREE_EMU_ARCH_RISCV64, +} CfreeEmuArch; + +typedef enum CfreeEmuTraceFlag { + CFREE_EMU_TRACE_PC = 1u << 0, + CFREE_EMU_TRACE_INSN = 1u << 1, + CFREE_EMU_TRACE_BLOCK = 1u << 2, +} CfreeEmuTraceFlag; + +typedef uint32_t CfreeEmuTraceFlags; + +typedef struct CfreeEmuOptions { + CfreeEmuArch guest_arch; + const uint8_t *guest_elf_bytes; + size_t guest_elf_len; + int optimize; + CfreeEmuTraceFlags trace; + const char *const *argv; + const char *const *envp; +} CfreeEmuOptions; + +CfreeStatus cfree_emu_run(CfreeCompiler *, const CfreeEmuOptions *, + int *out_exit_code); +CfreeStatus cfree_emu_new(CfreeCompiler *, const CfreeEmuOptions *, + CfreeEmu **out); +CfreeStatus cfree_emu_step(CfreeEmu *, uint32_t nblocks); +void *cfree_emu_lookup(CfreeEmu *, uint64_t guest_pc); +void cfree_emu_free(CfreeEmu *); + +#endif diff --git a/include/cfree/frontend.h b/include/cfree/frontend.h @@ -1,80 +1,43 @@ #ifndef CFREE_FRONTEND_H #define CFREE_FRONTEND_H -#include <cfree.h> +#include <cfree/cg.h> +#include <cfree/source.h> +#include <cfree/support/arena.h> #include <stdarg.h> -#include <stddef.h> #include <stdint.h> -typedef struct CfreeArena CfreeArena; - -/* Arena storage for frontends. The arena is opaque; allocation is bump-style - * and released wholesale by reset/free. */ -CfreeArena* cfree_arena_new(CfreeHeap*, size_t block_size); -void cfree_arena_free(CfreeArena*); -void cfree_arena_reset(CfreeArena*); -void* cfree_arena_alloc(CfreeArena*, size_t size, size_t align); -void* cfree_arena_zalloc(CfreeArena*, size_t size, size_t align); -char* cfree_arena_strdup(CfreeArena*, const char* s, size_t len); - -#define cfree_arena_new_obj(a, T) \ - ((T*)cfree_arena_alloc((a), sizeof(T), _Alignof(T))) -#define cfree_arena_znew_obj(a, T) \ - ((T*)cfree_arena_zalloc((a), sizeof(T), _Alignof(T))) -#define cfree_arena_array(a, T, n) \ - ((T*)cfree_arena_alloc((a), sizeof(T) * (size_t)(n), _Alignof(T))) -#define cfree_arena_zarray(a, T, n) \ - ((T*)cfree_arena_zalloc((a), sizeof(T) * (size_t)(n), _Alignof(T))) - -/* Compiler-attached host services. These expose the vtables already present - * in CfreeEnv without exposing CfreeCompiler's internal layout. */ -CfreeHeap* cfree_compiler_heap(CfreeCompiler*); -const CfreeFileIO* cfree_compiler_file_io(CfreeCompiler*); -int64_t cfree_compiler_now(CfreeCompiler*); - -/* Symbol pool helpers. cfree_sym_intern is the c-string convenience in - * <cfree.h>; the length-taking form is needed by lexers and preprocessors. */ -CfreeSym cfree_sym_intern_len(CfreeCompiler*, const char* str, size_t len); -const char* cfree_sym_str(CfreeCompiler*, CfreeSym, size_t* len_out); - -/* Source-file registration and include-edge recording. */ -uint32_t cfree_source_add_file(CfreeCompiler*, const char* path, - int system_header); -uint32_t cfree_source_add_memory(CfreeCompiler*, const char* name); -uint32_t cfree_source_add_builtin(CfreeCompiler*, const char* name); -void cfree_source_add_include(CfreeCompiler*, uint32_t includer_file_id, - uint32_t included_file_id, CfreeSrcLoc loc, - int system); - -typedef struct CfreeSourceFile { - uint32_t id; - CfreeSym name; - CfreeSym path; - uint8_t kind; - uint8_t system_header; - uint16_t pad; -} CfreeSourceFile; - -int cfree_source_file(CfreeCompiler*, uint32_t file_id, CfreeSourceFile* out); - -/* Frontend panic boundary. Frontend entry points called by cfree_compile_obj* - * already run under libcfree's top-level boundary; standalone frontend helpers - * such as preprocess/token-dump can use cfree_frontend_run to get the same - * behavior without seeing jmp_buf or CfreeCompiler internals. */ -typedef int (*CfreeFrontendRunFn)(CfreeCompiler*, void* user); -int cfree_frontend_run(CfreeCompiler*, CfreeFrontendRunFn, void* user); +/* + * Language frontend convenience API. + * + * This header is the intended one-stop include for source language + * implementations. It includes: + * - cfree/cg.h for code emission + * - cfree/source.h for source file ids and include-edge recording + * - cfree/support/arena.h for short-lived frontend allocation + * + * The declarations below are the frontend execution boundary and host-service + * shims that do not belong to codegen, source registry, or allocation. + * + * Frontend entry points called by cfree_compile_* already run under this + * boundary. Standalone frontend helpers such as preprocess/token-dump can use + * cfree_frontend_run to get the same behavior without exposing libcfree's + * internal panic machinery. + */ +typedef CfreeStatus (*CfreeFrontendRunFn)(CfreeCompiler *, void *user); +CfreeStatus cfree_frontend_run(CfreeCompiler *, CfreeFrontendRunFn, void *user); /* Optional metrics bridge for frontends. These are no-ops unless the host - * supplied CfreeEnv.metrics. Frontends use this public shim instead of + * supplied CfreeContext.metrics. Frontends use this public shim instead of * depending on libcfree's internal core headers. */ -void cfree_frontend_metrics_scope_begin(CfreeCompiler*, const char* name); -void cfree_frontend_metrics_scope_end(CfreeCompiler*, const char* name); -void cfree_frontend_metrics_count(CfreeCompiler*, const char* name, +void cfree_frontend_metrics_scope_begin(CfreeCompiler *, const char *name); +void cfree_frontend_metrics_scope_end(CfreeCompiler *, const char *name); +void cfree_frontend_metrics_count(CfreeCompiler *, const char *name, uint64_t value); -_Noreturn void cfree_frontend_fatal(CfreeCompiler*, CfreeSrcLoc, - const char* fmt, ...); -_Noreturn void cfree_frontend_vfatal(CfreeCompiler*, CfreeSrcLoc, - const char* fmt, va_list); +_Noreturn void cfree_frontend_fatal(CfreeCompiler *, CfreeSrcLoc, + const char *fmt, ...); +_Noreturn void cfree_frontend_vfatal(CfreeCompiler *, CfreeSrcLoc, + const char *fmt, va_list); #endif diff --git a/include/cfree/hashmap.h b/include/cfree/hashmap.h @@ -1,170 +0,0 @@ -#ifndef CFREE_HASHMAP_H -#define CFREE_HASHMAP_H - -#include <cfree.h> -#include <stdint.h> -#include <string.h> - -static inline uint32_t cfree_hash_u32(uint32_t x) { - x += 0x9e3779b9u; - x ^= x >> 16; - x *= 0x7feb352du; - x ^= x >> 15; - x *= 0x846ca68bu; - x ^= x >> 16; - return x; -} - -static inline uint32_t cfree_hash_u64(uint64_t x) { - x ^= x >> 33; - x *= 0xff51afd7ed558ccdULL; - x ^= x >> 33; - x *= 0xc4ceb9fe1a85ec53ULL; - x ^= x >> 33; - return (uint32_t)x; -} - -#define CFREE_HASHMAP_LOAD_NUM 3u -#define CFREE_HASHMAP_LOAD_DEN 4u -#define CFREE_HASHMAP_INIT_CAP 16u - -#define CFREE_HASHMAP_DEFINE(NAME, KT, VT, HASH_FN) \ - typedef struct NAME##Slot { \ - KT k; \ - VT v; \ - } NAME##Slot; \ - typedef struct NAME { \ - CfreeHeap* heap; \ - NAME##Slot* slots; \ - uint32_t cap; \ - uint32_t used; \ - } NAME; \ - \ - __attribute__((unused)) static void NAME##_resize(NAME* m, \ - uint32_t new_cap) { \ - NAME##Slot* fresh; \ - uint32_t i, mask; \ - fresh = (NAME##Slot*)m->heap->alloc(m->heap, sizeof(*fresh) * new_cap, \ - _Alignof(NAME##Slot)); \ - if (!fresh) return; \ - memset(fresh, 0, sizeof(*fresh) * new_cap); \ - mask = new_cap - 1u; \ - for (i = 0; i < m->cap; ++i) { \ - KT k = m->slots[i].k; \ - uint32_t j; \ - if (!(k)) continue; \ - j = HASH_FN(k) & mask; \ - while (fresh[j].k) j = (j + 1u) & mask; \ - fresh[j] = m->slots[i]; \ - } \ - if (m->slots) \ - m->heap->free(m->heap, m->slots, sizeof(*m->slots) * m->cap); \ - m->slots = fresh; \ - m->cap = new_cap; \ - } \ - \ - __attribute__((unused)) static inline void NAME##_init_cap( \ - NAME* m, CfreeHeap* h, uint32_t cap) { \ - m->heap = h; \ - m->slots = NULL; \ - m->cap = 0; \ - m->used = 0; \ - if (cap) NAME##_resize(m, cap); \ - } \ - \ - __attribute__((unused)) static inline void NAME##_init(NAME* m, \ - CfreeHeap* h) { \ - NAME##_init_cap(m, h, CFREE_HASHMAP_INIT_CAP); \ - } \ - \ - __attribute__((unused)) static inline void NAME##_fini(NAME* m) { \ - if (m->slots) \ - m->heap->free(m->heap, m->slots, sizeof(*m->slots) * m->cap); \ - m->slots = NULL; \ - m->cap = m->used = 0; \ - } \ - \ - __attribute__((unused)) static inline VT* NAME##_get(const NAME* m, KT k) { \ - uint32_t mask, j; \ - if (m->cap == 0 || !(k)) return NULL; \ - mask = m->cap - 1u; \ - j = HASH_FN(k) & mask; \ - while (m->slots[j].k) { \ - if (m->slots[j].k == (k)) return &m->slots[j].v; \ - j = (j + 1u) & mask; \ - } \ - return NULL; \ - } \ - \ - __attribute__((unused)) static inline int NAME##_set(NAME* m, KT k, VT v) { \ - uint32_t mask, j; \ - if (m->cap == 0 || \ - m->used * CFREE_HASHMAP_LOAD_DEN >= m->cap * CFREE_HASHMAP_LOAD_NUM) \ - NAME##_resize(m, m->cap ? m->cap * 2u : CFREE_HASHMAP_INIT_CAP); \ - mask = m->cap - 1u; \ - j = HASH_FN(k) & mask; \ - while (m->slots[j].k) { \ - if (m->slots[j].k == (k)) { \ - m->slots[j].v = (v); \ - return 0; \ - } \ - j = (j + 1u) & mask; \ - } \ - m->slots[j].k = (k); \ - m->slots[j].v = (v); \ - m->used++; \ - return 1; \ - } \ - \ - __attribute__((unused)) static inline int NAME##_try_insert( \ - NAME* m, KT k, VT v, VT* existing_out) { \ - uint32_t mask, j; \ - if (m->cap == 0 || \ - m->used * CFREE_HASHMAP_LOAD_DEN >= m->cap * CFREE_HASHMAP_LOAD_NUM) \ - NAME##_resize(m, m->cap ? m->cap * 2u : CFREE_HASHMAP_INIT_CAP); \ - mask = m->cap - 1u; \ - j = HASH_FN(k) & mask; \ - while (m->slots[j].k) { \ - if (m->slots[j].k == (k)) { \ - if (existing_out) *existing_out = m->slots[j].v; \ - return 0; \ - } \ - j = (j + 1u) & mask; \ - } \ - m->slots[j].k = (k); \ - m->slots[j].v = (v); \ - m->used++; \ - return 1; \ - } \ - \ - __attribute__((unused)) static inline void NAME##_del(NAME* m, KT k) { \ - uint32_t mask, j; \ - if (m->cap == 0 || !(k)) return; \ - mask = m->cap - 1u; \ - j = HASH_FN(k) & mask; \ - while (m->slots[j].k) { \ - if (m->slots[j].k == (k)) { \ - uint32_t i = (j + 1u) & mask; \ - m->slots[j].k = 0; \ - m->used--; \ - while (m->slots[i].k) { \ - KT rk = m->slots[i].k; \ - VT rv = m->slots[i].v; \ - uint32_t nh; \ - m->slots[i].k = 0; \ - m->used--; \ - nh = HASH_FN(rk) & mask; \ - while (m->slots[nh].k) nh = (nh + 1u) & mask; \ - m->slots[nh].k = rk; \ - m->slots[nh].v = rv; \ - m->used++; \ - i = (i + 1u) & mask; \ - } \ - return; \ - } \ - j = (j + 1u) & mask; \ - } \ - } \ - struct NAME - -#endif diff --git a/include/cfree/jit.h b/include/cfree/jit.h @@ -0,0 +1,76 @@ +#ifndef CFREE_JIT_H +#define CFREE_JIT_H + +#include <cfree/arch.h> +#include <cfree/object.h> +#include <cfree/objbuild.h> + +/* + * JIT image API. + * + * Linker-produced JIT images own mapped pages, resolved symbols, and a + * read-only object view for inspection/debug consumers. + */ + +enum { + CFREE_PROT_NONE = 0, + CFREE_PROT_READ = 1 << 0, + CFREE_PROT_WRITE = 1 << 1, + CFREE_PROT_EXEC = 1 << 2, +}; + +typedef struct CfreeExecMemRegion { + void *write; + void *runtime; + size_t size; + void *token; +} CfreeExecMemRegion; + +typedef struct CfreeExecMem { + size_t page_size; + CfreeStatus (*reserve)(void *user, size_t size, int prot, + CfreeExecMemRegion *out); + CfreeStatus (*protect)(void *user, void *addr, size_t size, int prot); + void (*release)(void *user, CfreeExecMemRegion *region); + void (*flush_icache)(void *user, void *addr, size_t size); + void *user; +} CfreeExecMem; + +typedef struct CfreeJitTls { + void *(*ctx_new)(void *user, const void *init_bytes, size_t image_filesz, + size_t image_size, size_t align); + void (*ctx_destroy)(void *user, void *ctx); + void *user; +} CfreeJitTls; + +typedef struct CfreeJitHost { + const CfreeExecMem *execmem; + const CfreeJitTls *tls; +} CfreeJitHost; + +void cfree_jit_free(CfreeJit *); +void *cfree_jit_lookup(CfreeJit *, const char *name); +CfreeStatus cfree_jit_append_obj(CfreeJit *, CfreeObjBuilder *); +uint64_t cfree_jit_generation(CfreeJit *); +void cfree_jit_run_dtors(CfreeJit *); + +const CfreeObjFile *cfree_jit_view(CfreeJit *); +CfreeStatus cfree_jit_addr_to_sym(CfreeJit *, uint64_t addr, + const char **name_out, uint64_t *off_out); +uint64_t cfree_jit_runtime_to_image(CfreeJit *, uint64_t runtime_pc); +uint64_t cfree_jit_image_to_runtime(CfreeJit *, uint64_t image_vaddr); + +typedef struct CfreeJitSymIter CfreeJitSymIter; + +typedef struct CfreeJitSym { + const char *name; + uint64_t addr; + uint64_t size; + CfreeSymKind kind; +} CfreeJitSym; + +CfreeStatus cfree_jit_sym_iter_new(CfreeJit *, CfreeJitSymIter **out); +CfreeIterResult cfree_jit_sym_iter_next(CfreeJitSymIter *, CfreeJitSym *out); +void cfree_jit_sym_iter_free(CfreeJitSymIter *); + +#endif diff --git a/include/cfree/link.h b/include/cfree/link.h @@ -0,0 +1,184 @@ +#ifndef CFREE_LINK_H +#define CFREE_LINK_H + +#include <cfree/core.h> +#include <cfree/objbuild.h> + +/* + * Linker API. + * + * Inputs are explicit byte/object arrays. Path lookup, option parsing, and + * response-file handling are driver responsibilities. + */ + +typedef void *(*CfreeExternResolver)(void *user, const char *name); +typedef struct CfreeJitHost CfreeJitHost; + +typedef enum CfreeBuildIdMode { + CFREE_BUILDID_NONE, + CFREE_BUILDID_SHA256, + CFREE_BUILDID_UUID, + CFREE_BUILDID_USER, +} CfreeBuildIdMode; + +typedef struct CfreeLinkExpr CfreeLinkExpr; + +typedef enum CfreeLinkExprKind { + CFREE_LE_INT, + CFREE_LE_DOT, + CFREE_LE_SYM, + CFREE_LE_REGION_ORIGIN, + CFREE_LE_REGION_LENGTH, + CFREE_LE_ADD, + CFREE_LE_SUB, + CFREE_LE_MUL, + CFREE_LE_DIV, + CFREE_LE_AND, + CFREE_LE_OR, + CFREE_LE_XOR, + CFREE_LE_SHL, + CFREE_LE_SHR, + CFREE_LE_ALIGN, + CFREE_LE_MAX, + CFREE_LE_MIN, +} CfreeLinkExprKind; + +struct CfreeLinkExpr { + uint8_t kind; /* CfreeLinkExprKind */ + union { + int64_t int_val; + const char *name; + struct { + const CfreeLinkExpr *lhs; + const CfreeLinkExpr *rhs; + } bin; + struct { + const CfreeLinkExpr *val; + const CfreeLinkExpr *align; + } align; + } v; +}; + +typedef enum CfreeLinkRegionFlag { + CFREE_LRF_R = 1u << 0, + CFREE_LRF_W = 1u << 1, + CFREE_LRF_X = 1u << 2, +} CfreeLinkRegionFlag; + +typedef struct CfreeLinkRegion { + const char *name; + uint8_t flags; /* CfreeLinkRegionFlag */ + uint64_t origin; + uint64_t length; +} CfreeLinkRegion; + +typedef struct CfreeLinkInputMatch { + const char *file_pattern; /* NULL means "*" */ + const char *section_pattern; + int keep; +} CfreeLinkInputMatch; + +typedef enum CfreeLinkAsnKind { + CFREE_LAS_DOT, + CFREE_LAS_SYM, + CFREE_LAS_PROVIDE, +} CfreeLinkAsnKind; + +typedef struct CfreeLinkAssignment { + uint8_t kind; /* CfreeLinkAsnKind */ + const char *sym; + const CfreeLinkExpr *expr; +} CfreeLinkAssignment; + +typedef struct CfreeLinkOutputSection { + const char *name; + const CfreeLinkExpr *vma; + const CfreeLinkExpr *lma; + const CfreeLinkInputMatch *inputs; + uint32_t ninputs; + const char *region; + const char *load_region; + const CfreeLinkAssignment *asns; + uint32_t nasns; +} CfreeLinkOutputSection; + +typedef struct CfreeLinkScript { + const char *entry; + const CfreeLinkRegion *regions; + uint32_t nregions; + const CfreeLinkOutputSection *sections; + uint32_t nsections; + const CfreeLinkAssignment *top_asns; + uint32_t ntop_asns; +} CfreeLinkScript; + +CfreeStatus cfree_link_script_parse(const CfreeContext *, const char *text, + size_t len, CfreeLinkScript **out); +void cfree_link_script_free(const CfreeContext *, CfreeLinkScript *); + +typedef enum CfreeLinkMode { + CFREE_LM_DEFAULT, + CFREE_LM_STATIC, + CFREE_LM_DYNAMIC, + CFREE_LM_AS_NEEDED, +} CfreeLinkMode; + +typedef struct CfreeLinkArchiveInput { + CfreeBytes bytes; + uint8_t link_mode; /* CfreeLinkMode */ + uint8_t whole_archive; + uint8_t group_id; + uint8_t pad; +} CfreeLinkArchiveInput; + +typedef struct CfreeLinkInputs { + CfreeObjBuilder *const *objs; + uint32_t nobjs; + const CfreeBytes *obj_bytes; + uint32_t nobj_bytes; + const CfreeLinkArchiveInput *archives; + uint32_t narchives; + const CfreeBytes *dso_bytes; + uint32_t ndso_bytes; + const CfreeLinkScript *linker_script; + const char *entry; + uint8_t build_id_mode; /* CfreeBuildIdMode */ + const uint8_t *build_id_bytes; + uint32_t build_id_len; +} CfreeLinkInputs; + +typedef struct CfreeExeLinkOptions { + CfreeLinkInputs inputs; + int gc_sections; + int pie; + const char *interp_path; +} CfreeExeLinkOptions; + +typedef struct CfreeSharedLinkOptions { + CfreeLinkInputs inputs; + const char *soname; + const char *const *rpaths; + uint32_t nrpaths; + const char *const *runpaths; + uint32_t nrunpaths; + const char *const *exports; + uint32_t nexports; + int allow_undefined; + int gc_sections; +} CfreeSharedLinkOptions; + +typedef struct CfreeJitLinkOptions { + CfreeLinkInputs inputs; + int gc_sections; + CfreeExternResolver extern_resolver; + void *extern_resolver_user; +} CfreeJitLinkOptions; + +CfreeStatus cfree_link_exe(CfreeCompiler *, const CfreeExeLinkOptions *, + CfreeWriter *out); +CfreeStatus cfree_link_shared(CfreeCompiler *, const CfreeSharedLinkOptions *, + CfreeWriter *out); +CfreeStatus cfree_link_jit(CfreeCompiler *, const CfreeJitLinkOptions *, + const CfreeJitHost *, CfreeJit **out_jit); + +#endif diff --git a/include/cfree/objbuild.h b/include/cfree/objbuild.h @@ -0,0 +1,82 @@ +#ifndef CFREE_OBJBUILD_H +#define CFREE_OBJBUILD_H + +#include <cfree/objmodel.h> + +/* + * Format-neutral relocatable object builder. + * + * This is for embedders that want to synthesize object files directly without + * going through the CG API. It is intentionally lower level than cfree/cg.h: + * callers choose sections, symbol bindings, raw bytes, and target relocation + * kinds themselves. + */ + +typedef struct CfreeObjSectionDesc { + CfreeSym name; + CfreeSecKind kind; + uint32_t flags; /* CfreeSecFlag */ + uint32_t align; /* 0 means default; otherwise power of two */ + uint32_t entsize; /* 0 means none */ +} CfreeObjSectionDesc; + +typedef struct CfreeObjSymbolDesc { + CfreeSym name; + CfreeSymBind bind; + CfreeSymKind kind; + CfreeObjSection section; /* CFREE_SECTION_NONE for undef/abs/common */ + uint64_t value; + uint64_t size; +} CfreeObjSymbolDesc; + +typedef struct CfreeObjRelocDesc { + CfreeObjSection section; + uint64_t offset; + CfreeRelocKind kind; + CfreeObjSymbol symbol; + int64_t addend; +} CfreeObjRelocDesc; + +CfreeStatus cfree_obj_builder_new(CfreeCompiler *, CfreeObjBuilder **out); +void cfree_obj_builder_free(CfreeObjBuilder *); + +CfreeStatus cfree_obj_builder_section(CfreeObjBuilder *, + const CfreeObjSectionDesc *, + CfreeObjSection *out); +CfreeStatus cfree_obj_builder_section_group(CfreeObjBuilder *, CfreeObjSection, + CfreeObjGroup); + +CfreeStatus cfree_obj_builder_pos(CfreeObjBuilder *, CfreeObjSection, + uint64_t *out); +CfreeStatus cfree_obj_builder_align(CfreeObjBuilder *, CfreeObjSection, + uint32_t align, uint64_t *new_pos_out); +CfreeStatus cfree_obj_builder_write(CfreeObjBuilder *, CfreeObjSection, + const void *data, size_t n); +CfreeStatus cfree_obj_builder_reserve(CfreeObjBuilder *, CfreeObjSection, + size_t n, void **out); +CfreeStatus cfree_obj_builder_reserve_bss(CfreeObjBuilder *, CfreeObjSection, + uint64_t size, uint32_t align); +CfreeStatus cfree_obj_builder_patch(CfreeObjBuilder *, CfreeObjSection, + uint64_t offset, const void *data, + size_t n); + +CfreeStatus cfree_obj_builder_symbol(CfreeObjBuilder *, + const CfreeObjSymbolDesc *, + CfreeObjSymbol *out); +CfreeStatus cfree_obj_builder_symbol_define(CfreeObjBuilder *, CfreeObjSymbol, + CfreeObjSection, uint64_t value, + uint64_t size); +CfreeStatus cfree_obj_builder_reloc(CfreeObjBuilder *, + const CfreeObjRelocDesc *); + +CfreeStatus cfree_obj_builder_group(CfreeObjBuilder *, CfreeSym name, + CfreeObjSymbol signature, uint32_t flags, + CfreeObjGroup *out); +CfreeStatus cfree_obj_builder_group_add_section(CfreeObjBuilder *, + CfreeObjGroup, + CfreeObjSection); + +CfreeStatus cfree_obj_builder_finalize(CfreeObjBuilder *); +CfreeStatus cfree_obj_builder_emit(CfreeObjBuilder *, CfreeWriter *); + +#endif diff --git a/include/cfree/object.h b/include/cfree/object.h @@ -0,0 +1,59 @@ +#ifndef CFREE_OBJECT_H +#define CFREE_OBJECT_H + +#include <cfree/objmodel.h> + +/* + * Object-file detection and read-only inspection. + * + * Object readers keep the caller's byte storage borrowed for the lifetime of + * CfreeObjFile. Strings returned by iterators are owned by the object file and + * remain valid until cfree_obj_free. + */ + +typedef enum CfreeBinFmt { + CFREE_BIN_UNKNOWN = 0, + CFREE_BIN_AR, + CFREE_BIN_ELF, + CFREE_BIN_COFF, + CFREE_BIN_PE, + CFREE_BIN_MACHO, + CFREE_BIN_WASM, +} CfreeBinFmt; + +typedef struct CfreeObjSymIter CfreeObjSymIter; +typedef struct CfreeObjRelocIter CfreeObjRelocIter; + +CfreeBinFmt cfree_detect_fmt(const uint8_t *data, size_t len); +CfreeStatus cfree_detect_target(const uint8_t *data, size_t len, + CfreeTarget *out); + +CfreeStatus cfree_obj_open(const CfreeContext *, const CfreeBytes *, + CfreeObjFile **out); +void cfree_obj_free(CfreeObjFile *); + +CfreeObjFmt cfree_obj_fmt(const CfreeObjFile *); +CfreeTarget cfree_obj_target(const CfreeObjFile *); + +uint32_t cfree_obj_nsections(const CfreeObjFile *); +CfreeStatus cfree_obj_section(const CfreeObjFile *, CfreeObjSection idx, + CfreeObjSecInfo *out); +CfreeStatus cfree_obj_section_data(const CfreeObjFile *, CfreeObjSection idx, + const uint8_t **data_out, size_t *len_out); +CfreeStatus cfree_obj_section_by_name(const CfreeObjFile *, const char *name, + CfreeObjSection *out); + +CfreeStatus cfree_obj_symbol_by_name(const CfreeObjFile *, const char *name, + CfreeObjSymInfo *out); + +CfreeStatus cfree_obj_symiter_new(CfreeObjFile *, CfreeObjSymIter **out); +CfreeIterResult cfree_obj_symiter_next(CfreeObjSymIter *, + CfreeObjSymInfo *out); +void cfree_obj_symiter_free(CfreeObjSymIter *); + +CfreeStatus cfree_obj_reliter_new(CfreeObjFile *, CfreeObjRelocIter **out); +CfreeIterResult cfree_obj_reliter_next(CfreeObjRelocIter *, + CfreeObjReloc *out); +void cfree_obj_reliter_free(CfreeObjRelocIter *); + +#endif diff --git a/include/cfree/objmodel.h b/include/cfree/objmodel.h @@ -0,0 +1,75 @@ +#ifndef CFREE_OBJMODEL_H +#define CFREE_OBJMODEL_H + +#include <cfree/core.h> + +/* + * Format-neutral object model shared by object builders, readers, linkers, + * disassemblers, and JIT image inspection. + */ + +#define CFREE_SECTION_NONE UINT32_MAX +#define CFREE_OBJ_SYMBOL_NONE UINT32_MAX +#define CFREE_OBJ_GROUP_NONE UINT32_MAX + +typedef uint32_t CfreeObjSection; +typedef uint32_t CfreeObjSymbol; +typedef uint32_t CfreeObjGroup; + +typedef enum CfreeSecKind { + CFREE_SEC_TEXT, + CFREE_SEC_RODATA, + CFREE_SEC_DATA, + CFREE_SEC_BSS, + CFREE_SEC_DEBUG, + CFREE_SEC_OTHER, +} CfreeSecKind; + +typedef enum CfreeSecFlag { + CFREE_SF_EXEC = 1u << 0, + CFREE_SF_WRITE = 1u << 1, + CFREE_SF_ALLOC = 1u << 2, + CFREE_SF_TLS = 1u << 3, + CFREE_SF_MERGE = 1u << 4, + CFREE_SF_STRINGS = 1u << 5, +} CfreeSecFlag; + +typedef enum CfreeObjGroupFlag { + CFREE_OBJ_GROUP_COMDAT = 1u << 0, +} CfreeObjGroupFlag; + +typedef struct CfreeRelocKind { + CfreeArchKind arch; + CfreeObjFmt obj_fmt; + uint32_t code; +} CfreeRelocKind; + +typedef struct CfreeObjSecInfo { + const char *name; + CfreeSecKind kind; + uint32_t flags; /* CfreeSecFlag */ + uint64_t size; /* bytes; BSS uses virtual size */ + uint32_t align; /* power of two; 1 means no special alignment */ + uint32_t entsize; /* section entry size, or 0 */ +} CfreeObjSecInfo; + +typedef struct CfreeObjSymInfo { + const char *name; + CfreeSymBind bind; + CfreeSymKind kind; + CfreeObjSection section; + uint64_t value; + uint64_t size; +} CfreeObjSymInfo; + +typedef struct CfreeObjReloc { + CfreeObjSection section; + uint64_t offset; + CfreeObjSymbol sym; + const char *sym_name; + int64_t addend; + CfreeRelocKind kind; + const char *kind_name; /* diagnostic spelling, when known */ +} CfreeObjReloc; + +#endif diff --git a/include/cfree/source.h b/include/cfree/source.h @@ -0,0 +1,36 @@ +#ifndef CFREE_SOURCE_H +#define CFREE_SOURCE_H + +#include <cfree/core.h> + +/* + * Compiler source registry. + * + * Language frontends use this to assign stable file ids to physical files, + * in-memory inputs, and builtin pseudo-files, then record include edges for + * dependency reporting and diagnostics. + */ + +CfreeStatus cfree_source_add_file(CfreeCompiler *, const char *path, + int system_header, uint32_t *file_id_out); +CfreeStatus cfree_source_add_memory(CfreeCompiler *, const char *name, + uint32_t *file_id_out); +CfreeStatus cfree_source_add_builtin(CfreeCompiler *, const char *name, + uint32_t *file_id_out); +CfreeStatus cfree_source_add_include(CfreeCompiler *, uint32_t includer_file_id, + uint32_t included_file_id, + CfreeSrcLoc loc, int system); + +typedef struct CfreeSourceFile { + uint32_t id; + CfreeSym name; + CfreeSym path; + uint8_t kind; + uint8_t system_header; + uint16_t pad; +} CfreeSourceFile; + +CfreeStatus cfree_source_file(CfreeCompiler *, uint32_t file_id, + CfreeSourceFile *out); + +#endif diff --git a/include/cfree/support/arena.h b/include/cfree/support/arena.h @@ -0,0 +1,30 @@ +#ifndef CFREE_SUPPORT_ARENA_H +#define CFREE_SUPPORT_ARENA_H + +#include <cfree/core.h> + +/* + * Opaque bump allocator for frontends and other short-lived public helpers. + * Individual allocations are not freed; reset/free releases arena storage in + * bulk. + */ + +typedef struct CfreeArena CfreeArena; + +CfreeStatus cfree_arena_new(CfreeHeap *, size_t block_size, CfreeArena **out); +void cfree_arena_free(CfreeArena *); +void cfree_arena_reset(CfreeArena *); +void *cfree_arena_alloc(CfreeArena *, size_t size, size_t align); +void *cfree_arena_zalloc(CfreeArena *, size_t size, size_t align); +char *cfree_arena_strdup(CfreeArena *, const char *s, size_t len); + +#define cfree_arena_new_obj(a, T) \ + ((T *)cfree_arena_alloc((a), sizeof(T), _Alignof(T))) +#define cfree_arena_znew_obj(a, T) \ + ((T *)cfree_arena_zalloc((a), sizeof(T), _Alignof(T))) +#define cfree_arena_array(a, T, n) \ + ((T *)cfree_arena_alloc((a), sizeof(T) * (size_t)(n), _Alignof(T))) +#define cfree_arena_zarray(a, T, n) \ + ((T *)cfree_arena_zalloc((a), sizeof(T) * (size_t)(n), _Alignof(T))) + +#endif diff --git a/include/cfree/support/hashmap.h b/include/cfree/support/hashmap.h @@ -0,0 +1,176 @@ +#ifndef CFREE_SUPPORT_HASHMAP_H +#define CFREE_SUPPORT_HASHMAP_H + +#include <cfree/core.h> +#include <stdint.h> +#include <string.h> + +static inline uint32_t cfree_hash_u32(uint32_t x) { + x += 0x9e3779b9u; + x ^= x >> 16; + x *= 0x7feb352du; + x ^= x >> 15; + x *= 0x846ca68bu; + x ^= x >> 16; + return x; +} + +static inline uint32_t cfree_hash_u64(uint64_t x) { + x ^= x >> 33; + x *= 0xff51afd7ed558ccdULL; + x ^= x >> 33; + x *= 0xc4ceb9fe1a85ec53ULL; + x ^= x >> 33; + return (uint32_t)x; +} + +#define CFREE_HASHMAP_LOAD_NUM 3u +#define CFREE_HASHMAP_LOAD_DEN 4u +#define CFREE_HASHMAP_INIT_CAP 16u + +#if defined(__GNUC__) || defined(__clang__) +#define CFREE_HASHMAP_UNUSED __attribute__((unused)) +#else +#define CFREE_HASHMAP_UNUSED +#endif + +#define CFREE_HASHMAP_DEFINE(NAME, KT, VT, HASH_FN) \ + typedef struct NAME##Slot { \ + KT k; \ + VT v; \ + } NAME##Slot; \ + typedef struct NAME { \ + CfreeHeap* heap; \ + NAME##Slot* slots; \ + uint32_t cap; \ + uint32_t used; \ + } NAME; \ + \ + CFREE_HASHMAP_UNUSED static void NAME##_resize(NAME* m, \ + uint32_t new_cap) { \ + NAME##Slot* fresh; \ + uint32_t i, mask; \ + fresh = (NAME##Slot*)m->heap->alloc(m->heap, sizeof(*fresh) * new_cap, \ + _Alignof(NAME##Slot)); \ + if (!fresh) return; \ + memset(fresh, 0, sizeof(*fresh) * new_cap); \ + mask = new_cap - 1u; \ + for (i = 0; i < m->cap; ++i) { \ + KT k = m->slots[i].k; \ + uint32_t j; \ + if (!(k)) continue; \ + j = HASH_FN(k) & mask; \ + while (fresh[j].k) j = (j + 1u) & mask; \ + fresh[j] = m->slots[i]; \ + } \ + if (m->slots) \ + m->heap->free(m->heap, m->slots, sizeof(*m->slots) * m->cap); \ + m->slots = fresh; \ + m->cap = new_cap; \ + } \ + \ + CFREE_HASHMAP_UNUSED static inline void NAME##_init_cap( \ + NAME* m, CfreeHeap* h, uint32_t cap) { \ + m->heap = h; \ + m->slots = NULL; \ + m->cap = 0; \ + m->used = 0; \ + if (cap) NAME##_resize(m, cap); \ + } \ + \ + CFREE_HASHMAP_UNUSED static inline void NAME##_init(NAME* m, \ + CfreeHeap* h) { \ + NAME##_init_cap(m, h, CFREE_HASHMAP_INIT_CAP); \ + } \ + \ + CFREE_HASHMAP_UNUSED static inline void NAME##_fini(NAME* m) { \ + if (m->slots) \ + m->heap->free(m->heap, m->slots, sizeof(*m->slots) * m->cap); \ + m->slots = NULL; \ + m->cap = m->used = 0; \ + } \ + \ + CFREE_HASHMAP_UNUSED static inline VT* NAME##_get(const NAME* m, KT k) { \ + uint32_t mask, j; \ + if (m->cap == 0 || !(k)) return NULL; \ + mask = m->cap - 1u; \ + j = HASH_FN(k) & mask; \ + while (m->slots[j].k) { \ + if (m->slots[j].k == (k)) return &m->slots[j].v; \ + j = (j + 1u) & mask; \ + } \ + return NULL; \ + } \ + \ + CFREE_HASHMAP_UNUSED static inline int NAME##_set(NAME* m, KT k, VT v) { \ + uint32_t mask, j; \ + if (m->cap == 0 || \ + m->used * CFREE_HASHMAP_LOAD_DEN >= m->cap * CFREE_HASHMAP_LOAD_NUM) \ + NAME##_resize(m, m->cap ? m->cap * 2u : CFREE_HASHMAP_INIT_CAP); \ + mask = m->cap - 1u; \ + j = HASH_FN(k) & mask; \ + while (m->slots[j].k) { \ + if (m->slots[j].k == (k)) { \ + m->slots[j].v = (v); \ + return 0; \ + } \ + j = (j + 1u) & mask; \ + } \ + m->slots[j].k = (k); \ + m->slots[j].v = (v); \ + m->used++; \ + return 1; \ + } \ + \ + CFREE_HASHMAP_UNUSED static inline int NAME##_try_insert( \ + NAME* m, KT k, VT v, VT* existing_out) { \ + uint32_t mask, j; \ + if (m->cap == 0 || \ + m->used * CFREE_HASHMAP_LOAD_DEN >= m->cap * CFREE_HASHMAP_LOAD_NUM) \ + NAME##_resize(m, m->cap ? m->cap * 2u : CFREE_HASHMAP_INIT_CAP); \ + mask = m->cap - 1u; \ + j = HASH_FN(k) & mask; \ + while (m->slots[j].k) { \ + if (m->slots[j].k == (k)) { \ + if (existing_out) *existing_out = m->slots[j].v; \ + return 0; \ + } \ + j = (j + 1u) & mask; \ + } \ + m->slots[j].k = (k); \ + m->slots[j].v = (v); \ + m->used++; \ + return 1; \ + } \ + \ + CFREE_HASHMAP_UNUSED static inline void NAME##_del(NAME* m, KT k) { \ + uint32_t mask, j; \ + if (m->cap == 0 || !(k)) return; \ + mask = m->cap - 1u; \ + j = HASH_FN(k) & mask; \ + while (m->slots[j].k) { \ + if (m->slots[j].k == (k)) { \ + uint32_t i = (j + 1u) & mask; \ + m->slots[j].k = 0; \ + m->used--; \ + while (m->slots[i].k) { \ + KT rk = m->slots[i].k; \ + VT rv = m->slots[i].v; \ + uint32_t nh; \ + m->slots[i].k = 0; \ + m->used--; \ + nh = HASH_FN(rk) & mask; \ + while (m->slots[nh].k) nh = (nh + 1u) & mask; \ + m->slots[nh].k = rk; \ + m->slots[nh].v = rv; \ + m->used++; \ + i = (i + 1u) & mask; \ + } \ + return; \ + } \ + j = (j + 1u) & mask; \ + } \ + } \ + struct NAME + +#endif