commit 106346286e80596ca1f647a9ec69c2120494d34a
parent b212b95462ccf02a5f87c6ee4ee1bd24a8e6cb6e
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 8 May 2026 16:59:16 -0700
include/cfree.h: JIT sessions, debug info, target pic/code-model
Diffstat:
| M | include/cfree.h | | | 654 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- |
1 file changed, 637 insertions(+), 17 deletions(-)
diff --git a/include/cfree.h b/include/cfree.h
@@ -17,6 +17,9 @@
typedef struct CfreeCompiler CfreeCompiler;
typedef struct CfreeObjBuilder CfreeObjBuilder;
typedef struct CfreeJit CfreeJit;
+typedef struct CfreeJitSession CfreeJitSession;
+typedef struct CfreeObjFile CfreeObjFile;
+typedef struct CfreeDebugInfo CfreeDebugInfo;
/* ============================================================
* Source locations (carried in diagnostics)
@@ -28,6 +31,20 @@ typedef struct CfreeSrcLoc {
} CfreeSrcLoc;
/* ============================================================
+ * Frame snapshot
+ * ============================================================
+ * Canonical register snapshot. Shared by the unwinder (cfree_dwarf_unwind_step)
+ * and the JIT session's stop notifications (CfreeStopInfo.regs). `pc` and
+ * `cfa` are the program counter and canonical frame address; `regs` uses the
+ * target arch's DWARF register numbering. Registers beyond the arch's defined
+ * range are zero. */
+typedef struct CfreeUnwindFrame {
+ uint64_t pc;
+ uint64_t cfa;
+ uint64_t regs[32];
+} CfreeUnwindFrame;
+
+/* ============================================================
* Host-implemented interfaces (vtables)
* ============================================================
* Heap, DiagSink, and Writer are implemented *outside* libcfree, by the
@@ -100,6 +117,22 @@ typedef enum CfreeObjFmt {
CFREE_OBJ_WASM,
} CfreeObjFmt;
+typedef enum CfreePic {
+ CFREE_PIC_NONE,
+ CFREE_PIC_PIC,
+ CFREE_PIC_PIE,
+} CfreePic;
+
+/* CFREE_CM_DEFAULT is resolved per-arch (small on x86-64/AArch64, medium on
+ * RISC-V, etc.). PIC and code-model are independent: -fPIC -mcmodel=small and
+ * -fPIE -mcmodel=medium are both coherent. */
+typedef enum CfreeCodeModel {
+ CFREE_CM_DEFAULT,
+ CFREE_CM_SMALL,
+ CFREE_CM_MEDIUM,
+ CFREE_CM_LARGE,
+} CfreeCodeModel;
+
typedef struct CfreeTarget {
CfreeArchKind arch;
CfreeOSKind os;
@@ -107,8 +140,15 @@ typedef struct CfreeTarget {
uint8_t ptr_size; /* 4 or 8 */
uint8_t ptr_align;
uint8_t big_endian;
+ uint8_t pic; /* CfreePic; default CFREE_PIC_NONE */
+ uint8_t code_model; /* CfreeCodeModel; default CFREE_CM_DEFAULT */
} CfreeTarget;
+/* JIT note: cfree_link_jit and cfree_jit_from_image force pic = CFREE_PIC_PIC
+ * regardless of caller input — the mmap'd image's address is unknown until
+ * map time. The override happens at the linker entry, not silently inside the
+ * backend. */
+
/* ============================================================
* Host environment
* ============================================================
@@ -169,6 +209,115 @@ const uint8_t* cfree_writer_mem_bytes(CfreeWriter*, size_t* len_out);
void cfree_jit_free (CfreeJit*);
void* cfree_jit_lookup(CfreeJit*, const char* name);
+/* ----- JIT image inspection -----
+ *
+ * cfree_jit_view borrows a CfreeObjFile over the loaded JIT image. Lets the
+ * driver feed the JIT to objdump/dwarf consumers without round-tripping the
+ * image to bytes. The returned pointer is owned by the CfreeJit and is
+ * invalidated by cfree_jit_free; callers must not call cfree_obj_close on it.
+ *
+ * cfree_jit_addr_to_sym is the reverse of cfree_jit_lookup: maps a runtime
+ * PC to the enclosing global symbol. Returns 0 on success and 1 when no
+ * symbol contains `addr`. The interned name string is valid until
+ * cfree_jit_free. */
+const CfreeObjFile* cfree_jit_view (CfreeJit*);
+int cfree_jit_addr_to_sym (CfreeJit*, uint64_t addr,
+ const char** name_out,
+ uint64_t* off_out);
+
+/* ----- JIT session: controlled execution -----
+ *
+ * A session wraps the JIT in a worker thread and a per-arch trap engine
+ * (software breakpoint patch + single-step / displaced-step trampoline).
+ * The library owns all signal handling, ucontext extraction, and per-arch
+ * trap-byte / single-step machinery. The driver side uses the session to
+ * call into the JIT'd code and is notified of stops (breakpoint, fault,
+ * exit) via the blocking session_call/session_resume entries.
+ *
+ * Threading model: a single worker thread runs the JIT entry. session_call
+ * and session_resume block the caller until the worker stops; the worker
+ * is parked on stop and resumed from the next session_resume. Only one
+ * thread may drive the session at a time.
+ *
+ * Lifetime: the CfreeJit must outlive the CfreeJitSession. cfree_jit_free
+ * tears down a session implicitly if one is still attached, but explicit
+ * cfree_jit_session_free is preferred so the worker thread is joined
+ * deterministically.
+ *
+ * Breakpoints: set/clear via session_breakpoint_set/_clear. The trap byte
+ * patch and arch-specific single-step trampoline (used to step over the
+ * patched instruction during resume) are entirely internal. The session
+ * dedupes addresses; setting a breakpoint at an existing address returns
+ * the original handle. */
+
+typedef enum CfreeStopKind {
+ CFREE_STOP_BREAKPOINT, /* worker hit a breakpoint we set */
+ CFREE_STOP_SIGNAL, /* worker took a fault we did not arm */
+ CFREE_STOP_EXIT, /* worker entry returned normally */
+ CFREE_STOP_INTERRUPT, /* host requested via session_interrupt */
+} CfreeStopKind;
+
+typedef struct CfreeStopInfo {
+ CfreeStopKind kind;
+ int signal; /* host signo when kind == STOP_SIGNAL */
+ int exit_code; /* worker return value when kind == EXIT */
+ uint32_t bp_id; /* set bp handle when kind == BREAKPOINT */
+ CfreeUnwindFrame regs; /* register snapshot at the stop site */
+} CfreeStopInfo;
+
+typedef enum CfreeResumeMode {
+ CFREE_RESUME_CONTINUE, /* run until next stop or exit */
+ CFREE_RESUME_STEP_INSN, /* execute one machine instruction */
+} CfreeResumeMode;
+
+/* Entry-point signature dispatched by session_call. The library is
+ * responsible for marshalling argv/argc into the worker's ABI; the driver
+ * is shape-agnostic. New entry shapes extend the enum. */
+typedef enum CfreeEntryKind {
+ CFREE_ENTRY_INT_ARGV, /* int(int, char**) */
+} CfreeEntryKind;
+
+CfreeJitSession* cfree_jit_session_new (CfreeJit*);
+void cfree_jit_session_free(CfreeJitSession*);
+
+/* Begin executing `entry` with `argv`. Blocks until the worker stops.
+ * `entry` must be a pointer returned by cfree_jit_lookup (or otherwise
+ * within the JIT image). Returns 0 on success (including an EXIT stop),
+ * nonzero on internal failure (worker spawn, OOM). On success *stop is
+ * populated. */
+int cfree_jit_session_call (CfreeJitSession*, void* entry,
+ CfreeEntryKind, int argc, char** argv,
+ CfreeStopInfo* stop_out);
+
+/* Resume the parked worker. Blocks until the next stop. Returns 0 on
+ * success, nonzero if no worker is parked. */
+int cfree_jit_session_resume(CfreeJitSession*, CfreeResumeMode,
+ CfreeStopInfo* stop_out);
+
+/* Asynchronously interrupt a running worker. Async-signal-safe: callable
+ * from a SIGINT handler in the host. The next stop event delivered to the
+ * driving thread will be CFREE_STOP_INTERRUPT. Returns 0 on a queued
+ * interrupt, nonzero if no worker is currently running. */
+int cfree_jit_session_interrupt(CfreeJitSession*);
+
+/* Read `n` bytes from the worker's address space starting at `addr` into
+ * `dst`. Used by `p` and `x` in the dbg driver to dereference globals,
+ * frame-relative locals, and arbitrary user memory. Returns 0 on success
+ * and nonzero on a bad address or partial read; partial-read attempts do
+ * not modify `dst`. Safe to call only while the worker is parked at a
+ * stop. */
+int cfree_jit_session_read_mem(CfreeJitSession*, uint64_t addr,
+ void* dst, size_t n);
+
+/* Set / clear a breakpoint at `addr` (which must lie within the JIT image).
+ * On success, *bp_id_out is the session-local handle that future stop
+ * events will report. Idempotent: setting at an address that already has
+ * a breakpoint returns its existing handle. cfree_jit_session_breakpoint_clear
+ * silently succeeds on an unknown handle. */
+int cfree_jit_session_breakpoint_set (CfreeJitSession*, uint64_t addr,
+ uint32_t* bp_id_out);
+int cfree_jit_session_breakpoint_clear(CfreeJitSession*, uint32_t bp_id);
+
/* Resolver invoked when the linker encounters an undefined symbol. Returning
* NULL is an error. */
typedef void* (*CfreeExternResolver)(void* user, const char* name);
@@ -222,11 +371,37 @@ typedef struct CfreePpOptions {
uint32_t nundefines;
} CfreePpOptions;
+/* Path prefix remap entry. Applied by SourceManager whenever it produces a
+ * path for DWARF emission (DW_AT_comp_dir, DW_AT_name, line program). The
+ * first match wins. Diagnostic output uses original paths. */
+typedef struct CfreePathPrefixMap {
+ const char* old_prefix;
+ const char* new_prefix;
+} CfreePathPrefixMap;
+
/* Per-TU compile knobs. */
typedef struct CfreeCompileOptions {
int opt_level; /* 0 direct, 1 minimal, 2 full */
int debug_info;
CfreePpOptions pp;
+ /* Reproducible-build knobs. `epoch` (Unix seconds) is consulted by every
+ * file emitter that would otherwise have written wall-clock time (COFF
+ * header, Mach-O LC_BUILD_VERSION, ar ar_date, DWARF producer). 0 means
+ * write no timestamp at all (the default). */
+ uint64_t epoch;
+ const CfreePathPrefixMap* path_map;
+ uint32_t npath_map;
+ /* Diagnostic policy.
+ *
+ * `warnings_are_errors` (-Werror): warnings emitted to CfreeDiagSink are
+ * counted as errors for the compile_* return-value test and against
+ * max_errors. The sink's `warnings` counter is unaffected.
+ *
+ * `max_errors`: 0 means unlimited. When >0, the parser stops emitting
+ * after sink.errors reaches the cap (the Nth error is still emitted; the
+ * (N+1)th is not), and compile_* returns nonzero. */
+ int warnings_are_errors;
+ uint32_t max_errors;
} CfreeCompileOptions;
/* Preprocess one C input.
@@ -249,29 +424,265 @@ int cfree_preprocess (CfreeCompiler*, const CfreePpOptions* pp,
* temporary builder before returning. The Writer is not closed. On nonzero
* return the Writer may contain partial output and should not be consumed.
*
- * Returns 0 on success, nonzero on failure. */
+ * Diagnostic model: report-all. Every error reachable by the parser's
+ * recovery rules is emitted to env.diag before return; the parser does not
+ * abort on routine syntax/semantic errors. These functions return 0 iff
+ * env.diag->errors == 0 at the end of the call (with warnings counting as
+ * errors when CfreeCompileOptions.warnings_are_errors is set). Returns
+ * nonzero on internal failures (OOM, invariant violation), where the
+ * underlying compiler_panic mechanism unwinds before return. */
int cfree_compile_obj (CfreeCompiler*, const CfreeCompileOptions*,
const CfreeBytesInput* input, CfreeObjBuilder** out);
int cfree_compile_obj_emit(CfreeCompiler*, const CfreeCompileOptions*,
const CfreeBytesInput* input, CfreeWriter* out);
+/* ----- Assembly entries (GAS subset) -----
+ *
+ * Assemble one text source into a relocatable object. Diagnostic and panic
+ * semantics match cfree_compile_obj / cfree_compile_obj_emit: report-all to
+ * env.diag, return 0 iff env.diag->errors == 0 at end of call (warnings count
+ * as errors when warnings_are_errors is set), nonzero on internal failures.
+ *
+ * cfree_assemble_obj returns a finalized CfreeObjBuilder owned by the
+ * CfreeCompiler; pass it to cfree_link_exe / cfree_link_jit. The builder must
+ * be alive until the linker has consumed it; the CfreeCompiler must outlive
+ * it.
+ *
+ * cfree_assemble_obj_emit writes the encoded object to `out` and frees its
+ * temporary builder before returning. The Writer is not closed. On nonzero
+ * return the Writer may contain partial output and should not be consumed.
+ *
+ * Inline asm in the C parser reuses the same parser internally; no separate
+ * public entry. */
+typedef struct CfreeAsmOptions {
+ int debug_info; /* generate .debug_line from .file/.loc */
+ /* Reproducible-build knobs; semantics match CfreeCompileOptions. */
+ uint64_t epoch;
+ const CfreePathPrefixMap* path_map;
+ uint32_t npath_map;
+ /* Diagnostic policy; semantics match CfreeCompileOptions. */
+ int warnings_are_errors;
+ uint32_t max_errors;
+} CfreeAsmOptions;
+
+int cfree_assemble_obj (CfreeCompiler*, const CfreeAsmOptions*,
+ const CfreeBytesInput* input,
+ CfreeObjBuilder** out);
+int cfree_assemble_obj_emit(CfreeCompiler*, const CfreeAsmOptions*,
+ const CfreeBytesInput* input,
+ CfreeWriter* out);
+
+/* ----- Header-dependency iteration -----
+ *
+ * Walks the include edges recorded by SourceManager during a preceding
+ * cfree_preprocess or cfree_compile_obj* call. The library hands out raw
+ * edges; formatting (Make rules, ninja, JSON) is the driver's job.
+ *
+ * cfree_dep_iter_next returns 1 and fills `*out` for each remaining edge,
+ * 0 when iteration is exhausted. The strings in CfreeDepEdge alias storage
+ * owned by the CfreeCompiler and are valid until the next preprocess/
+ * compile call or compiler_free, whichever comes first.
+ *
+ * `includer_name` and `included_name` are the *resolved* paths SourceManager
+ * actually opened — the same byte sequences passed to CfreeFileIO.read_all.
+ * They are not the literal include token text; a `#include "x.h"` resolved
+ * via -I to /abs/inc/x.h reports `/abs/inc/x.h`. This is what build systems
+ * need: a Make rule emitted from these strings refers to files the build
+ * tool will stat on rebuild.
+ *
+ * `from_system_path` distinguishes headers found through a system include
+ * path (-isystem, sysroot, builtin) from user headers found via -I or the
+ * source's own directory. This is the GCC `-MM` filter: drop edges whose
+ * `from_system_path` is set. It is set by the include-path resolver, NOT by
+ * the include syntax — `#include <myheader.h>` resolved through -I is a
+ * user header (from_system_path=0); `#include "stdio.h"` resolved through a
+ * system path is a system header (from_system_path=1).
+ *
+ * `bracketed` records the include *syntax* (1 for `<…>`, 0 for `"…"`). Tools
+ * that round-trip include directives (formatters, IDE indexers) want the
+ * lexical fact; -MM filtering does not.
+ *
+ * Edges are reported across all TUs processed since compiler_new; callers
+ * that want a single TU's edges filter by `includer_name`. */
+typedef struct CfreeDepIter CfreeDepIter;
+
+typedef struct CfreeDepEdge {
+ const char* includer_name; /* resolved path; same string given to read_all */
+ const char* included_name; /* resolved path; same string given to read_all */
+ CfreeSrcLoc include_loc;
+ uint8_t from_system_path; /* 1 if resolved via a system include path */
+ uint8_t bracketed; /* 1 if syntax was <…>; 0 for "…" */
+ uint8_t pad[2];
+} CfreeDepEdge;
+
+CfreeDepIter* cfree_dep_iter_new (CfreeCompiler*);
+int cfree_dep_iter_next(CfreeDepIter*, CfreeDepEdge* out);
+void cfree_dep_iter_free(CfreeDepIter*);
+
+/* Build-ID emission mode (ELF .note.gnu.build-id and friends). */
+typedef enum CfreeBuildIdMode {
+ CFREE_BUILDID_NONE, /* no build-id note (default) */
+ CFREE_BUILDID_SHA256, /* hash all input section bytes in
+ * stable order — reproducible */
+ CFREE_BUILDID_UUID, /* random; opt-in, not reproducible */
+ CFREE_BUILDID_USER, /* caller-supplied bytes */
+} CfreeBuildIdMode;
+
+/* ============================================================
+ * Linker script (structured)
+ * ============================================================
+ * The linker accepts only the structured form. Programmatic build systems
+ * construct a CfreeLinkScript directly; hosts that prefer GNU-ld text feed
+ * the optional cfree_link_script_parse helper, which yields the same
+ * structured form. The data model makes the supported semantics
+ * inspectable rather than implicit in a parser.
+ *
+ * All pointers in a CfreeLinkScript are borrowed: the script and every
+ * sub-object (expressions, regions, sections, assignments, name strings)
+ * must outlive the call to cfree_link_exe / cfree_link_jit that consumes
+ * it. cfree_link_script_parse arena-owns its result; cfree_link_script_free
+ * releases everything reachable from a parser-produced script. */
+
+typedef struct CfreeLinkExpr CfreeLinkExpr;
+
+typedef enum CfreeLinkExprKind {
+ CFREE_LE_INT, /* int_val */
+ CFREE_LE_DOT, /* current location counter */
+ CFREE_LE_SYM, /* name */
+ CFREE_LE_REGION_ORIGIN, /* name = MEMORY region */
+ CFREE_LE_REGION_LENGTH, /* name = MEMORY region */
+ CFREE_LE_ADD, CFREE_LE_SUB, CFREE_LE_MUL, CFREE_LE_DIV,
+ CFREE_LE_AND, CFREE_LE_OR, CFREE_LE_XOR,
+ CFREE_LE_SHL, CFREE_LE_SHR,
+ CFREE_LE_ALIGN, /* ALIGN(val, align) */
+ CFREE_LE_MAX, CFREE_LE_MIN,
+} CfreeLinkExprKind;
+
+struct CfreeLinkExpr {
+ uint8_t kind; /* CfreeLinkExprKind */
+ union {
+ int64_t int_val;
+ const char* name;
+ struct { const CfreeLinkExpr *lhs, *rhs; } bin;
+ struct { const CfreeLinkExpr *val, *align; } align;
+ } v;
+};
+
+typedef enum CfreeLinkRegionFlag {
+ CFREE_LRF_R = 1u << 0,
+ CFREE_LRF_W = 1u << 1,
+ CFREE_LRF_X = 1u << 2,
+} CfreeLinkRegionFlag;
+
+typedef struct CfreeLinkRegion {
+ const char* name;
+ uint8_t flags; /* CfreeLinkRegionFlag mask */
+ uint64_t origin;
+ uint64_t length;
+} CfreeLinkRegion;
+
+typedef struct CfreeLinkInputMatch {
+ const char* file_pattern; /* NULL == "*" */
+ const char* section_pattern;
+ int keep; /* nonzero: exempt from --gc-sections */
+} CfreeLinkInputMatch;
+
+typedef enum CfreeLinkAsnKind {
+ CFREE_LAS_DOT, /* . = expr; sym ignored */
+ CFREE_LAS_SYM, /* sym = expr */
+ CFREE_LAS_PROVIDE, /* PROVIDE(sym = expr) */
+} CfreeLinkAsnKind;
+
+typedef struct CfreeLinkAssignment {
+ uint8_t kind; /* CfreeLinkAsnKind */
+ const char* sym; /* unused for CFREE_LAS_DOT */
+ const CfreeLinkExpr* expr;
+} CfreeLinkAssignment;
+
+typedef struct CfreeLinkOutputSection {
+ const char* name;
+ const CfreeLinkExpr* vma; /* NULL: from region/dot */
+ const CfreeLinkExpr* lma; /* NULL: equal to vma */
+ const CfreeLinkInputMatch* inputs;
+ uint32_t ninputs;
+ const char* region; /* > REGION; NULL if absent */
+ const char* load_region; /* AT> REGION; NULL if absent */
+ const CfreeLinkAssignment* asns;
+ uint32_t nasns;
+} CfreeLinkOutputSection;
+
+typedef struct CfreeLinkScript {
+ const char* entry; /* NULL: use CfreeLinkOptions.entry */
+ const CfreeLinkRegion* regions;
+ uint32_t nregions;
+ const CfreeLinkOutputSection* sections; /* in declaration order */
+ uint32_t nsections;
+ const CfreeLinkAssignment* top_asns; /* outside any SECTIONS{} */
+ uint32_t ntop_asns;
+} CfreeLinkScript;
+
+/* Parse GNU-ld-subset text into a structured script. The compiler arena
+ * owns the result; cfree_link_script_free releases it. The supported v1
+ * subset is:
+ * ENTRY(symbol)
+ * MEMORY { name (rwx) : ORIGIN = expr, LENGTH = expr }
+ * SECTIONS { ... } with output sections in declaration order
+ * Input rules `*(.section.glob)` or `file.o(.section)`
+ * KEEP(...) for --gc-sections opt-out
+ * PROVIDE(sym = expr), plain `sym = expr`, `. = expr`
+ * `> REGION` and `AT> REGION` placement
+ * Operators + - * / & | ^ << >>, ALIGN(expr, align), MAX(a,b), MIN(a,b)
+ * slash-star block comments
+ * Anything outside the subset (OVERLAY, VERSION, INSERT BEFORE/AFTER,
+ * OUTPUT_FORMAT, INPUT, GROUP, elaborate file patterns, other operators)
+ * is rejected with a diagnostic and the call returns nonzero with *out
+ * unchanged. Returns 0 on success. */
+int cfree_link_script_parse(CfreeCompiler*, const char* text, size_t len,
+ const CfreeLinkScript** out);
+void cfree_link_script_free (CfreeCompiler*, const CfreeLinkScript*);
+
+/* Per-archive flags. Object-file inputs (obj_bytes) keep the plain
+ * CfreeBytesInput[] shape — only archives carry these knobs. */
+typedef enum CfreeLinkArchFlag {
+ CFREE_LAF_NONE = 0,
+ /* Pull every member of the archive in regardless of whether its symbols
+ * satisfy an undef. Equivalent to GNU ld --whole-archive. */
+ CFREE_LAF_WHOLE_ARCHIVE = 1u << 0,
+} CfreeLinkArchFlag;
+
+/* Archive input with linker-side flags. `group_id` clusters archives into a
+ * cyclic resolution group: archives sharing a nonzero id are scanned
+ * cyclically until no new symbols are pulled in (equivalent to GNU ld
+ * --start-group ... --end-group). `group_id == 0` (default) means linear
+ * single-pass. */
+typedef struct CfreeBytesInputArchive {
+ CfreeBytesInput input;
+ uint8_t flags; /* bitmask of CfreeLinkArchFlag */
+ uint8_t group_id; /* 0 = none; same nonzero = same cyclic group */
+} CfreeBytesInputArchive;
+
typedef struct CfreeLinkOptions {
- CfreeObjBuilder* const* objs; /* fresh-compiled, by reference */
- uint32_t nobjs;
- const CfreeBytesInput* obj_bytes;
- uint32_t nobj_bytes;
- const CfreeBytesInput* archives;
- uint32_t narchives;
- const char* linker_script_text; /* NULL = no script.
- * Non-NULL: linker_script_len
- * must match the buffer. */
- size_t linker_script_len;
+ CfreeObjBuilder* const* objs; /* fresh-compiled, by reference */
+ uint32_t nobjs;
+ const CfreeBytesInput* obj_bytes;
+ uint32_t nobj_bytes;
+ const CfreeBytesInputArchive* archives;
+ uint32_t narchives;
+ /* Structured linker script. NULL means no script (target/format default
+ * layout). Borrowed: must outlive the cfree_link_* call. */
+ const CfreeLinkScript* linker_script;
const char* entry; /* NULL = format/target default */
CfreeExternResolver extern_resolver;
void* extern_resolver_user;
+ /* Build-ID. `build_id_mode` is a CfreeBuildIdMode. `build_id_bytes` /
+ * `build_id_len` are consulted only when mode == CFREE_BUILDID_USER. */
+ uint8_t build_id_mode;
+ const uint8_t* build_id_bytes;
+ uint32_t build_id_len;
} CfreeLinkOptions;
-/* All bytes inputs (obj_bytes, archives) must remain alive until the
+/* All bytes inputs (obj_bytes, archives — including the CfreeBytesInput
+ * nested inside each CfreeBytesInputArchive) must remain alive until the
* matching cfree_link_* call returns. */
/* Link to executable. Writer is not closed by the call. On nonzero return
@@ -324,6 +735,20 @@ typedef struct CfreeOptions {
CfreeExternResolver extern_resolver;
void* extern_resolver_user;
+ /* Reproducibility — forwarded to the underlying compile/link options. */
+ uint64_t epoch;
+ const CfreePathPrefixMap* path_map;
+ uint32_t npath_map;
+ uint8_t build_id_mode;
+ const uint8_t* build_id_bytes;
+ uint32_t build_id_len;
+
+ /* Diagnostic policy — forwarded to CfreeCompileOptions. cfree_run
+ * returns nonzero when any per-TU compile reports errors (or when a
+ * subsequent link/jit step fails). */
+ int warnings_are_errors;
+ uint32_t max_errors;
+
CfreeJit** out_jit; /* JIT only: caller owns on success */
} CfreeOptions;
@@ -428,14 +853,197 @@ CfreeObjSymIter* cfree_obj_symiter_new (CfreeObjFile*);
int cfree_obj_symiter_next(CfreeObjSymIter*, CfreeObjSymInfo* out);
void cfree_obj_symiter_free(CfreeObjSymIter*);
+/* Raw bytes of a section. Returns a pointer aliasing storage owned by the
+ * CfreeObjFile and valid until cfree_obj_close. For BSS (no in-file bytes),
+ * returns NULL with `*len_out = 0`; the section's virtual size is on
+ * CfreeObjSecInfo.size. Out-of-range idx returns NULL with `*len_out = 0`. */
+const uint8_t* cfree_obj_section_data(const CfreeObjFile*, uint32_t idx,
+ size_t* len_out);
+
+/* Expose the underlying CfreeObjBuilder for use with cfree_disasm_iter_new
+ * (so the disassembler can consult sym/reloc tables for annotation). The
+ * pointer is owned by the CfreeObjFile and is valid until cfree_obj_close. */
+CfreeObjBuilder* cfree_obj_builder(const CfreeObjFile*);
+
+/* Relocation iterator. Walks every relocation in the object across all
+ * sections in section-then-offset order. Strings are interned and valid
+ * until cfree_obj_close. */
+typedef struct CfreeObjReloc {
+ uint32_t section; /* 0-based section index the reloc applies to */
+ uint64_t offset; /* offset within that section */
+ uint32_t sym; /* opaque symbol id; CFREE_SECTION_NONE if none */
+ const char* sym_name; /* interned; "" when sym is none/anonymous */
+ int64_t addend;
+ uint32_t kind; /* arch-specific reloc type code */
+ const char* kind_name; /* interned, e.g. "R_X86_64_PC32" */
+} CfreeObjReloc;
+
+typedef struct CfreeObjRelocIter CfreeObjRelocIter;
+
+CfreeObjRelocIter* cfree_obj_reliter_new (CfreeObjFile*);
+int cfree_obj_reliter_next(CfreeObjRelocIter*, CfreeObjReloc* out);
+void cfree_obj_reliter_free(CfreeObjRelocIter*);
+
+/* ============================================================
+ * DWARF consumer
+ * ============================================================
+ * Read DWARF (.debug_info / .debug_line / .debug_aranges / .eh_frame) out
+ * of an already-opened CfreeObjFile. The CfreeObjFile must outlive the
+ * CfreeDebugInfo. Strings handed back through the query functions are
+ * interned and valid until cfree_dwarf_close.
+ *
+ * cfree_dwarf_open returns NULL when the object has no DWARF, when the
+ * object's format doesn't carry DWARF (PE/COFF can; the consumer accepts
+ * the standard sections wherever they live), or on internal failure.
+ *
+ * cfree_dwarf_addr_to_line maps a runtime / image PC to the source file,
+ * line, and column that produced it. Returns 0 on success and 1 when the
+ * PC has no matching .debug_line entry (e.g. compiler scaffolding).
+ *
+ * cfree_dwarf_line_to_addr is the inverse: returns 0 on success, 1 when no
+ * statement-flagged row matches the (file, line) pair. The first matching
+ * row wins.
+ *
+ * cfree_dwarf_func_at returns the enclosing subprogram's name and
+ * inclusive PC bounds. Returns 0 on success, 1 if no subprogram contains
+ * `pc`. */
+typedef struct CfreeDebugInfo CfreeDebugInfo;
+
+CfreeDebugInfo* cfree_dwarf_open (CfreeCompiler*, const CfreeObjFile*);
+void cfree_dwarf_close(CfreeDebugInfo*);
+
+int cfree_dwarf_addr_to_line(CfreeDebugInfo*, uint64_t pc,
+ const char** file_out,
+ uint32_t* line_out,
+ uint32_t* col_out);
+int cfree_dwarf_line_to_addr(CfreeDebugInfo*, const char* file, uint32_t line,
+ uint64_t* pc_out);
+int cfree_dwarf_func_at (CfreeDebugInfo*, uint64_t pc,
+ const char** name_out,
+ uint64_t* low_pc_out,
+ uint64_t* high_pc_out);
+
+/* CFI-driven unwind step. The caller seeds `frame->pc` (and any callee-saved
+ * registers known at the leaf) and the consumer walks .eh_frame to compute
+ * the caller frame in place: pc, cfa, and registers are updated. CfreeUnwindFrame
+ * is declared at the top of this header. Register indices follow the DWARF
+ * register numbering for the target arch (which matches CfreeArchKind's
+ * canonical mapping). Returns 0 on a successful step, 1 at the bottom of the
+ * stack (no caller), nonzero on decode error. */
+int cfree_dwarf_unwind_step(CfreeDebugInfo*, CfreeUnwindFrame*);
+
+/* ----- Variable locations -----
+ *
+ * Decode where a named variable lives at PC. Resolution order: the deepest
+ * lexical scope at `pc` whose `name` matches wins; if no local matches, a
+ * file-scope global with that name is returned; otherwise 1.
+ *
+ * `byte_size` is the variable's storage size in bytes, taken from the
+ * variable's DIE type. Zero means unknown.
+ *
+ * cfree_dwarf_loc_read evaluates the location against `frame` (whose `regs`
+ * supply register values; the leaf frame's regs come from CfreeStopInfo,
+ * deeper frames from cfree_dwarf_unwind_step) and reads the underlying
+ * bytes through the supplied JIT session. Up to `cap` bytes are written
+ * into `dst`; *read_out reports the number actually read (capped to
+ * the variable's byte_size). Returns 0 on success, nonzero on bad
+ * arguments or a read fault.
+ *
+ * EXPR locations carry a DWARF expression byte string; libcfree owns the
+ * stack-machine evaluator. Callers should treat the loc as opaque and
+ * always go through cfree_dwarf_loc_read. */
+typedef enum CfreeDwarfLocKind {
+ CFREE_DLOC_REG, /* value lives in a register */
+ CFREE_DLOC_FRAME_OFS, /* [cfa + frame_ofs] */
+ CFREE_DLOC_GLOBAL, /* absolute address */
+ CFREE_DLOC_EXPR, /* DWARF expression bytes */
+} CfreeDwarfLocKind;
+
+typedef struct CfreeDwarfVarLoc {
+ CfreeDwarfLocKind kind;
+ uint32_t byte_size; /* 0 = unknown */
+ union {
+ uint32_t reg;
+ int32_t frame_ofs;
+ uint64_t global;
+ struct { const uint8_t* bytes; size_t len; } expr;
+ } v;
+} CfreeDwarfVarLoc;
+
+int cfree_dwarf_var_at (CfreeDebugInfo*, uint64_t pc, const char* name,
+ CfreeDwarfVarLoc* out);
+int cfree_dwarf_loc_read(CfreeDebugInfo*, const CfreeDwarfVarLoc*,
+ const CfreeUnwindFrame*,
+ CfreeJitSession*, /* memory provider */
+ void* dst, size_t cap, size_t* read_out);
+
+/* ============================================================
+ * Disassembler
+ * ============================================================
+ * Two layers: a high-level convenience that walks a relocatable object's
+ * text sections and writes an objdump-style listing, and a low-level
+ * iterator that decodes instructions from a byte buffer with vaddr context.
+ *
+ * Operands are pre-rendered to text on CfreeInsn. Structured operands (per-
+ * arch REG/IMM/MEM/SYM_REL enums) are the principled answer but multiply
+ * surface per arch (x86 ModR/M, AArch64 vector lanes, RISC-V CSR names)
+ * without v1 consumers; adding a structured form later is non-breaking
+ * because the text fields remain accurate.
+ *
+ * Strings on CfreeInsn (mnemonic/operands/annotation) and `bytes` are owned
+ * by the iterator and valid only until the next cfree_disasm_iter_next call
+ * or cfree_disasm_iter_free, whichever comes first. */
+
+typedef struct CfreeInsn {
+ uint64_t vaddr;
+ const uint8_t* bytes;
+ uint32_t nbytes;
+ const char* mnemonic;
+ const char* operands; /* pre-rendered; may be "" */
+ const char* annotation; /* sym/reloc note; may be "" */
+} CfreeInsn;
+
+/* Walk a relocatable object's text sections and write an objdump-style
+ * listing to `out`. Convenience over the iterator. The Writer is not
+ * closed. Returns 0 on success, nonzero on failure. */
+int cfree_obj_disasm(CfreeCompiler*,
+ const CfreeBytesInput*, CfreeWriter* out);
+
+/* Iterate instructions in a byte buffer at virtual address `vaddr`. If
+ * `obj` is non-NULL, the decoder consults its symbol and relocation tables
+ * to fill CfreeInsn.annotation; pass NULL for raw decoding. The bytes
+ * buffer must remain alive until cfree_disasm_iter_free.
+ *
+ * cfree_disasm_iter_next returns 1 and fills `*out` for each decoded
+ * instruction, 0 when the buffer is exhausted. On an undecodable byte the
+ * iterator advances by the arch's minimum unit and emits a placeholder
+ * mnemonic so the listing stays in sync. */
+typedef struct CfreeDisasmIter CfreeDisasmIter;
+
+CfreeDisasmIter* cfree_disasm_iter_new (CfreeCompiler*,
+ const uint8_t* bytes, size_t len,
+ uint64_t vaddr,
+ CfreeObjBuilder* obj /* may be NULL */);
+int cfree_disasm_iter_next(CfreeDisasmIter*, CfreeInsn* out);
+void cfree_disasm_iter_free(CfreeDisasmIter*);
+
/* ============================================================
* Archive (ar) file
* ============================================================
* Pure format I/O — no compilation context required.
*
* cfree_ar_write packs member byte payloads into a POSIX ar archive written
- * to `out`. The Writer is not closed; I/O errors are detectable via
- * out->error(). Returns 0 on success, 1 on bad arguments.
+ * to `out`. Options control reproducibility and format extensions:
+ * - `epoch` Unix seconds written to ar_date for every member; 0
+ * leaves the field as the literal "0" (the default).
+ * - `symbol_index` if nonzero, emit a System V `/` symbol-index member.
+ * Not yet implemented; currently ignored.
+ * - `long_names` if nonzero, emit a `//` long-name table when any
+ * member name exceeds 15 characters or contains '/'.
+ * With long_names == 0, over-long names are truncated.
+ * `opts` may be NULL to accept all defaults.
+ * The Writer is not closed; I/O errors are detectable via out->error().
+ * Returns 0 on success, 1 on bad arguments.
*
* cfree_ar_list writes one member name per line to `out` for each non-special
* member in the archive. Returns 0 on success, 1 on bad arguments or
@@ -447,18 +1055,30 @@ void cfree_obj_symiter_free(CfreeObjSymIter*);
* cfree_ar_iter_next advances to the next non-special member and fills *out;
* returns 1 if a member was returned, 0 at end or on malformed data.
* Member data pointers alias the original archive bytes and are valid as
- * long as the archive bytes remain alive. */
+ * long as the archive bytes remain alive. CfreeArMember.name is interned
+ * in iterator-owned storage and is valid only until the next iter_next
+ * call on the same iterator. */
+typedef struct CfreeArWriteOptions {
+ uint64_t epoch; /* ar_date for every member; 0 = none */
+ int symbol_index; /* emit System V '/' index entry (TODO) */
+ int long_names; /* emit '//' long-name table when needed */
+} CfreeArWriteOptions;
+
int cfree_ar_write(CfreeWriter* out,
- const CfreeBytesInput* members, uint32_t nmembers);
+ const CfreeBytesInput* members, uint32_t nmembers,
+ const CfreeArWriteOptions* opts);
int cfree_ar_list (const CfreeBytesInput* archive, CfreeWriter* out);
typedef struct CfreeArIter {
const uint8_t* _p;
const uint8_t* _end;
+ const uint8_t* _longnames; /* `//` table bytes, NULL until seen */
+ size_t _longnames_len;
+ char _namebuf[256]; /* iterator-owned scratch for member name */
} CfreeArIter;
typedef struct CfreeArMember {
- char name[17]; /* null-terminated, max 16 chars */
+ const char* name; /* iterator-owned; valid until next iter_next */
const uint8_t* data; /* points into archive bytes */
size_t size;
} CfreeArMember;