commit 4bcdfd87a185d107e9685f63bdee5a4968e63763
parent 4b346e744cca9faad6fd986ceb5f1d7e78e3d067
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 8 May 2026 17:19:42 -0700
include: API updates
Diffstat:
| M | include/cfree.h | | | 537 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------- |
1 file changed, 393 insertions(+), 144 deletions(-)
diff --git a/include/cfree.h b/include/cfree.h
@@ -15,6 +15,7 @@
* Opaque handles
* ============================================================ */
typedef struct CfreeCompiler CfreeCompiler;
+typedef struct CfreePipeline CfreePipeline;
typedef struct CfreeObjBuilder CfreeObjBuilder;
typedef struct CfreeJit CfreeJit;
typedef struct CfreeJitSession CfreeJitSession;
@@ -150,6 +151,51 @@ typedef struct CfreeTarget {
* backend. */
/* ============================================================
+ * Symbol classification
+ * ============================================================
+ * Shared by the object inspector and the JIT symbol iterator. Bind
+ * captures linkage; Kind captures what the symbol points at. */
+typedef enum CfreeSymBind {
+ CFREE_SB_LOCAL,
+ CFREE_SB_GLOBAL,
+ CFREE_SB_WEAK,
+} CfreeSymBind;
+
+typedef enum CfreeSymKind {
+ CFREE_SK_UNDEF,
+ CFREE_SK_FUNC,
+ CFREE_SK_OBJ,
+ CFREE_SK_SECTION,
+ CFREE_SK_FILE,
+ CFREE_SK_COMMON,
+ CFREE_SK_TLS,
+ CFREE_SK_ABS,
+} CfreeSymKind;
+
+/* ============================================================
+ * Architecture: register name mapping
+ * ============================================================
+ * DWARF register numbering varies per arch (CfreeUnwindFrame.regs is indexed
+ * by DWARF register number). These helpers translate between DWARF index and
+ * canonical assembler name (e.g. "rax", "x0", "a0") so dbg can render
+ * `info registers` and accept `set $rax = ...` syntax.
+ *
+ * Stateless and allocation-free in the lookup paths — name strings are static
+ * library data. `cfree_arch_register_name` returns NULL for an unmapped
+ * index; `cfree_arch_register_index` returns 0 on a known name and 1 if the
+ * name is unknown. The iterator yields named registers in DWARF index order. */
+typedef struct CfreeArchRegIter CfreeArchRegIter;
+typedef struct CfreeArchReg { uint32_t dwarf_idx; const char* name; } CfreeArchReg;
+
+const char* cfree_arch_register_name (CfreeArchKind, uint32_t dwarf_idx);
+int cfree_arch_register_index(CfreeArchKind, const char* name,
+ uint32_t* idx_out);
+
+CfreeArchRegIter* cfree_arch_reg_iter_new (CfreeArchKind);
+int cfree_arch_reg_iter_next(CfreeArchRegIter*, CfreeArchReg* out);
+void cfree_arch_reg_iter_free(CfreeArchRegIter*);
+
+/* ============================================================
* Host environment
* ============================================================
* The host supplies a heap, optional file I/O, and a diag sink. The
@@ -225,6 +271,23 @@ int cfree_jit_addr_to_sym (CfreeJit*, uint64_t addr,
const char** name_out,
uint64_t* off_out);
+/* Enumerate every globally visible symbol in the resolved JIT image.
+ * Drives `info functions` / `info variables` and tab completion in dbg.
+ * `name` is interned and valid until cfree_jit_free; CfreeSymKind is the
+ * same enum as the object inspector uses (CFREE_SK_FUNC / CFREE_SK_OBJ /
+ * etc.). */
+typedef struct CfreeJitSymIter CfreeJitSymIter;
+typedef struct CfreeJitSym {
+ const char* name;
+ uint64_t addr;
+ uint64_t size;
+ CfreeSymKind kind;
+} CfreeJitSym;
+
+CfreeJitSymIter* cfree_jit_sym_iter_new (CfreeJit*);
+int cfree_jit_sym_iter_next(CfreeJitSymIter*, CfreeJitSym* out);
+void cfree_jit_sym_iter_free(CfreeJitSymIter*);
+
/* ----- JIT session: controlled execution -----
*
* A session wraps the JIT in a worker thread and a per-arch trap engine
@@ -268,6 +331,11 @@ typedef struct CfreeStopInfo {
typedef enum CfreeResumeMode {
CFREE_RESUME_CONTINUE, /* run until next stop or exit */
CFREE_RESUME_STEP_INSN, /* execute one machine instruction */
+ CFREE_RESUME_STEP_LINE, /* until source line changes, staying
+ * in current function */
+ CFREE_RESUME_NEXT_LINE, /* like STEP_LINE but step OVER any
+ * function calls */
+ CFREE_RESUME_STEP_OUT, /* run until current frame returns */
} CfreeResumeMode;
/* Entry-point signature dispatched by session_call. The library is
@@ -309,6 +377,23 @@ int cfree_jit_session_interrupt(CfreeJitSession*);
int cfree_jit_session_read_mem(CfreeJitSession*, uint64_t addr,
void* dst, size_t n);
+/* Write `n` bytes from `src` into the worker's address space at `addr`.
+ * Same constraints as the read variant: caller must be at a stop; partial
+ * writes leave the target untouched and return nonzero. */
+int cfree_jit_session_write_mem(CfreeJitSession*, uint64_t addr,
+ const void* src, size_t n);
+
+/* Read full register snapshot. Snapshot already lives in CfreeStopInfo;
+ * this is for callers that want a refresh outside the stop event (e.g.
+ * after a write). Returns 0 on success, nonzero if no worker is parked. */
+int cfree_jit_session_get_regs(CfreeJitSession*, CfreeUnwindFrame* out);
+
+/* Write back a register snapshot. The frame's `regs` are written into the
+ * worker; `pc` and `cfa` are honored only when changed. The library
+ * validates that `pc` lies inside the JIT image. Returns 0 on success,
+ * nonzero on a bad pc or if no worker is parked. */
+int cfree_jit_session_set_regs(CfreeJitSession*, const CfreeUnwindFrame*);
+
/* Set / clear a breakpoint at `addr` (which must lie within the JIT image).
* On success, *bp_id_out is the session-local handle that future stop
* events will report. Idempotent: setting at an address that already has
@@ -318,6 +403,32 @@ int cfree_jit_session_breakpoint_set (CfreeJitSession*, uint64_t addr,
uint32_t* bp_id_out);
int cfree_jit_session_breakpoint_clear(CfreeJitSession*, uint32_t bp_id);
+/* Extended breakpoint setter with skip count, hit cap, and an optional
+ * in-process predicate. The plain breakpoint_set above is a convenience
+ * over this form (skip_count = max_hits = 0, condition = NULL).
+ *
+ * `condition`, when non-NULL, is invoked by the library on the worker
+ * thread between the trap and the stop notification, after `skip_count`
+ * silent skips have elapsed. It must return nonzero to deliver a stop
+ * and zero to silently resume. The callback runs in a context where
+ * calling back into the session is not safe — restrict it to register
+ * inspection and pure computation.
+ *
+ * `max_hits`, when nonzero, auto-clears the breakpoint after that many
+ * stops have been delivered (post-skip, post-condition). 0 means
+ * unlimited. */
+typedef struct CfreeBreakpointSpec {
+ uint64_t addr;
+ uint64_t skip_count; /* silent skips before the first stop */
+ uint64_t max_hits; /* 0 = unlimited */
+ int (*condition)(void* user, const CfreeUnwindFrame* regs);
+ void* condition_user;
+} CfreeBreakpointSpec;
+
+int cfree_jit_session_breakpoint_set_spec(CfreeJitSession*,
+ const CfreeBreakpointSpec*,
+ uint32_t* bp_id_out);
+
/* Resolver invoked when the linker encounters an undefined symbol. Returning
* NULL is an error. */
typedef void* (*CfreeExternResolver)(void* user, const char* name);
@@ -325,18 +436,20 @@ typedef void* (*CfreeExternResolver)(void* user, const char* name);
/* ============================================================
* Pipeline
* ============================================================
- * Layered driver-facing API. Three core operations + one convenience:
+ * Layered driver-facing API. Four core operations:
*
* cfree_compile_obj one C TU -> in-memory CfreeObjBuilder (chains into link)
* cfree_compile_obj_emit one C TU -> CfreeWriter (cc -c)
* cfree_link_exe link inputs -> CfreeWriter (ld)
* cfree_link_jit link inputs -> owning CfreeJit handle
- * cfree_run convenience: compile N inputs and link/jit
+ *
+ * The CfreePipeline section near the end of this header bundles a
+ * CfreeCompiler with these entries for tools that want a single owning
+ * handle for a compile-then-link build.
*
* The freestanding core takes only byte buffers and Writers — never paths.
- * Path-shaped helpers live in driver-level adapters (and in cfree_run, which
- * is itself a driver convenience), and feed the byte/Writer APIs after
- * consulting CfreeEnv.file_io.
+ * Path-shaped helpers live in driver-level adapters and feed the byte/
+ * Writer APIs after consulting CfreeEnv.file_io.
*
* Errors are reported through libcfree's internal panic mechanism. Each
* top-level function in this header saves and restores the active panic
@@ -350,15 +463,38 @@ typedef struct CfreeDefine {
const char* body; /* NULL means "1" */
} CfreeDefine;
-/* Generic byte-buffer input. Used for C source TUs, encoded objects, and
- * archives. `name` is a diagnostic label (typically a path or pseudo-path);
- * the linker interns it on entry. `data` may be any byte-shaped content. */
+/* Source language tag carried on CfreeBytesInput when the input is fed to
+ * cfree_compile_obj* or cfree_preprocess. Ignored by entries that take bytes
+ * for non-source purposes (linker, archive writer/reader, object reader).
+ *
+ * CFREE_LANG_C is value 0 so a zero-initialized CfreeBytesInput defaults to
+ * C, matching the prior contract.
+ *
+ * `.S` (preprocessed asm) is NOT auto-handled at this layer: the driver runs
+ * cfree_preprocess first and then submits the result as CFREE_LANG_ASM. */
+typedef enum CfreeLanguage {
+ CFREE_LANG_C = 0,
+ CFREE_LANG_ASM = 1,
+} CfreeLanguage;
+
+/* Generic byte-buffer input. Used for source TUs (C/asm), encoded objects,
+ * and archives. `name` is a diagnostic label (typically a path or pseudo-
+ * path); the linker interns it on entry. `data` may be any byte-shaped
+ * content. `lang` is consulted only by source-consuming entries; other
+ * entries ignore it. */
typedef struct CfreeBytesInput {
const char* name;
const uint8_t* data;
size_t len;
+ CfreeLanguage lang;
} CfreeBytesInput;
+/* Suffix-based language inference helper. `.s` -> CFREE_LANG_ASM; `.c`,
+ * `.cc`, `.cpp` and any other suffix (including a path with no suffix) ->
+ * CFREE_LANG_C. `.S` (preprocessed asm) is not recognized — drivers must
+ * preprocess first and submit the result as CFREE_LANG_ASM. */
+CfreeLanguage cfree_language_for_path(const char* path);
+
/* Preprocessor configuration shared by compile_* and the convenience run. */
typedef struct CfreePpOptions {
const char* const* include_dirs;
@@ -413,7 +549,7 @@ typedef struct CfreeCompileOptions {
int cfree_preprocess (CfreeCompiler*, const CfreePpOptions* pp,
const CfreeBytesInput* input, CfreeWriter* out);
-/* Compile one C input (memory bytes).
+/* Compile one source TU (C or GAS-subset asm; selected by input->lang).
*
* cfree_compile_obj returns a CfreeObjBuilder owned by the CfreeCompiler. The
* builder is already finalized; do not write to it further. Pass it to
@@ -430,48 +566,17 @@ int cfree_preprocess (CfreeCompiler*, const CfreePpOptions* pp,
* env.diag->errors == 0 at the end of the call (with warnings counting as
* errors when CfreeCompileOptions.warnings_are_errors is set). Returns
* nonzero on internal failures (OOM, invariant violation), where the
- * underlying compiler_panic mechanism unwinds before return. */
+ * underlying compiler_panic mechanism unwinds before return.
+ *
+ * When input->lang == CFREE_LANG_ASM the input bytes are fed straight to the
+ * GAS-subset assembler; CfreeCompileOptions fields that are C-only
+ * (CfreeCompileOptions.pp, opt_level) are ignored. Inline asm inside C TUs
+ * is handled by the C parser internally — no separate entry. */
int cfree_compile_obj (CfreeCompiler*, const CfreeCompileOptions*,
const CfreeBytesInput* input, CfreeObjBuilder** out);
int cfree_compile_obj_emit(CfreeCompiler*, const CfreeCompileOptions*,
const CfreeBytesInput* input, CfreeWriter* out);
-/* ----- Assembly entries (GAS subset) -----
- *
- * Assemble one text source into a relocatable object. Diagnostic and panic
- * semantics match cfree_compile_obj / cfree_compile_obj_emit: report-all to
- * env.diag, return 0 iff env.diag->errors == 0 at end of call (warnings count
- * as errors when warnings_are_errors is set), nonzero on internal failures.
- *
- * cfree_assemble_obj returns a finalized CfreeObjBuilder owned by the
- * CfreeCompiler; pass it to cfree_link_exe / cfree_link_jit. The builder must
- * be alive until the linker has consumed it; the CfreeCompiler must outlive
- * it.
- *
- * cfree_assemble_obj_emit writes the encoded object to `out` and frees its
- * temporary builder before returning. The Writer is not closed. On nonzero
- * return the Writer may contain partial output and should not be consumed.
- *
- * Inline asm in the C parser reuses the same parser internally; no separate
- * public entry. */
-typedef struct CfreeAsmOptions {
- int debug_info; /* generate .debug_line from .file/.loc */
- /* Reproducible-build knobs; semantics match CfreeCompileOptions. */
- uint64_t epoch;
- const CfreePathPrefixMap* path_map;
- uint32_t npath_map;
- /* Diagnostic policy; semantics match CfreeCompileOptions. */
- int warnings_are_errors;
- uint32_t max_errors;
-} CfreeAsmOptions;
-
-int cfree_assemble_obj (CfreeCompiler*, const CfreeAsmOptions*,
- const CfreeBytesInput* input,
- CfreeObjBuilder** out);
-int cfree_assemble_obj_emit(CfreeCompiler*, const CfreeAsmOptions*,
- const CfreeBytesInput* input,
- CfreeWriter* out);
-
/* ----- Header-dependency iteration -----
*
* Walks the include edges recorded by SourceManager during a preceding
@@ -641,27 +746,40 @@ int cfree_link_script_parse(CfreeCompiler*, const char* text, size_t len,
const CfreeLinkScript** out);
void cfree_link_script_free (CfreeCompiler*, const CfreeLinkScript*);
-/* Per-archive flags. Object-file inputs (obj_bytes) keep the plain
- * CfreeBytesInput[] shape — only archives carry these knobs. */
-typedef enum CfreeLinkArchFlag {
- CFREE_LAF_NONE = 0,
- /* Pull every member of the archive in regardless of whether its symbols
- * satisfy an undef. Equivalent to GNU ld --whole-archive. */
- CFREE_LAF_WHOLE_ARCHIVE = 1u << 0,
-} CfreeLinkArchFlag;
-
-/* Archive input with linker-side flags. `group_id` clusters archives into a
- * cyclic resolution group: archives sharing a nonzero id are scanned
- * cyclically until no new symbols are pulled in (equivalent to GNU ld
- * --start-group ... --end-group). `group_id == 0` (default) means linear
- * single-pass. */
+/* Per-archive resolution mode (mirrors GNU ld's -Bstatic / -Bdynamic /
+ * --as-needed positional state). Object-file inputs keep the plain
+ * CfreeBytesInput shape — only archives carry these knobs. */
+typedef enum CfreeLinkMode {
+ CFREE_LM_DEFAULT, /* output-kind default */
+ CFREE_LM_STATIC, /* -Bstatic before this input */
+ CFREE_LM_DYNAMIC, /* -Bdynamic */
+ CFREE_LM_AS_NEEDED, /* --as-needed */
+} CfreeLinkMode;
+
+/* Archive input with linker-side state.
+ *
+ * link_mode: CfreeLinkMode (-Bstatic/-Bdynamic/--as-needed positional).
+ * whole_archive: nonzero == --whole-archive: pull every member in
+ * regardless of whether its symbols satisfy an undef.
+ * group_id: clusters archives into a cyclic resolution group;
+ * archives sharing a nonzero id are scanned cyclically
+ * until no new symbols pull in (--start-group ...
+ * --end-group). 0 (default) == linear single-pass.
+ *
+ * link_mode and whole_archive are orthogonal: --whole-archive applies
+ * regardless of --as-needed / -Bstatic / -Bdynamic state. */
typedef struct CfreeBytesInputArchive {
CfreeBytesInput input;
- uint8_t flags; /* bitmask of CfreeLinkArchFlag */
- uint8_t group_id; /* 0 = none; same nonzero = same cyclic group */
+ uint8_t link_mode; /* CfreeLinkMode; default CFREE_LM_DEFAULT */
+ uint8_t whole_archive; /* nonzero == --whole-archive */
+ uint8_t group_id;
+ uint8_t pad;
} CfreeBytesInputArchive;
-typedef struct CfreeLinkOptions {
+/* Common link-input set, embedded in both CfreeLinkOptions and
+ * CfreeLinkSharedOptions. Adding a new input shape lands here in one
+ * place rather than in every options struct. */
+typedef struct CfreeLinkInputs {
CfreeObjBuilder* const* objs; /* fresh-compiled, by reference */
uint32_t nobjs;
const CfreeBytesInput* obj_bytes;
@@ -679,80 +797,93 @@ typedef struct CfreeLinkOptions {
uint8_t build_id_mode;
const uint8_t* build_id_bytes;
uint32_t build_id_len;
+} CfreeLinkInputs;
+
+/* Options for executable / JIT link. Exe-only fields go on this struct
+ * (currently none beyond the shared input set).
+ *
+ * gc_sections: nonzero enables --gc-sections (drop unreferenced sections
+ * from the output, transitively from entry / KEEP roots /
+ * exported symbols). Default 0. */
+typedef struct CfreeLinkOptions {
+ CfreeLinkInputs inputs;
+ int gc_sections;
} CfreeLinkOptions;
+/* Options for shared-library link.
+ *
+ * soname: recorded in the produced object (DT_SONAME on ELF,
+ * LC_ID_DYLIB on Mach-O). NULL == none.
+ * rpaths/runpaths: DT_RPATH / DT_RUNPATH entries, written verbatim. The
+ * runtime loader expands $ORIGIN and similar tokens. On
+ * Mach-O both lists collapse to LC_RPATH in
+ * rpaths-then-runpaths order.
+ * exports: flat list of symbol names promoted to the dynamic
+ * symbol table. v1 has no symbol-version-script support;
+ * that lands later as a separate CfreeVersionScript
+ * type rather than folded into the linker-script grammar.
+ * allow_undefined: default 1 for shared output. 0 forces every external
+ * reference to be resolved at link time. */
+typedef struct CfreeLinkSharedOptions {
+ CfreeLinkInputs inputs;
+ const char* soname;
+ const char* const* rpaths; uint32_t nrpaths;
+ const char* const* runpaths; uint32_t nrunpaths;
+ const char* const* exports; uint32_t nexports;
+ int allow_undefined;
+ /* Section GC. See CfreeLinkOptions.gc_sections. */
+ int gc_sections;
+} CfreeLinkSharedOptions;
+
/* All bytes inputs (obj_bytes, archives — including the CfreeBytesInput
* nested inside each CfreeBytesInputArchive) must remain alive until the
* matching cfree_link_* call returns. */
/* Link to executable. Writer is not closed by the call. On nonzero return
* the Writer may contain partial output and should not be consumed. */
-int cfree_link_exe(CfreeCompiler*, const CfreeLinkOptions*, CfreeWriter* out);
+int cfree_link_exe (CfreeCompiler*, const CfreeLinkOptions*, CfreeWriter* out);
+
+/* Link to shared library / dylib in the format implied by Compiler.target
+ * (ELF .so, Mach-O .dylib, PE .dll). Writer is not closed; on nonzero
+ * return the Writer may contain partial output and should not be
+ * consumed. */
+int cfree_link_shared(CfreeCompiler*, const CfreeLinkSharedOptions*, CfreeWriter* out);
/* Link as JIT. On success, *out_jit owns its image and mapped pages and
* must be released with cfree_jit_free. */
-int cfree_link_jit(CfreeCompiler*, const CfreeLinkOptions*, CfreeJit** out_jit);
-
-/* ----- Convenience: compose compile + link/jit for the common case. ----- */
-
-typedef enum CfreeOutputKind {
- CFREE_OUTPUT_OBJ, /* exactly one source input total */
- CFREE_OUTPUT_EXE,
- CFREE_OUTPUT_JIT,
-} CfreeOutputKind;
-
-typedef struct CfreeOptions {
- CfreeTarget target;
- CfreeEnv env;
- CfreeOutputKind output_kind;
-
- int opt_level;
- int debug_info;
+int cfree_link_jit (CfreeCompiler*, const CfreeLinkOptions*, CfreeJit** out_jit);
- const char* output_path; /* OBJ/EXE: required, used via env.file_io */
-
- /* C source inputs. Path-shaped sources are read via env.file_io; memory
- * sources are passed directly. The combined sequence (paths first, then
- * memory) is compiled in order. For OBJ output exactly one total source
- * is required. */
- const char* const* source_files;
- uint32_t nsource_files;
- const CfreeBytesInput* source_memory;
- uint32_t nsource_memory;
-
- CfreePpOptions pp;
-
- /* Path-shaped link inputs: driver reads via env.file_io. Library
- * resolution (-lfoo against -L paths) is the CLI driver's job; by the
- * time options reach cfree_run, archives must be concrete paths. */
- const char* const* object_files;
- uint32_t nobject_files;
- const char* const* archives;
- uint32_t narchives;
- const char* linker_script; /* path; driver reads via env.file_io */
-
- const char* entry; /* NULL = format/target default */
- CfreeExternResolver extern_resolver;
- void* extern_resolver_user;
+/* ============================================================
+ * Pipeline (stateful driver-facing API)
+ * ============================================================
+ * A CfreePipeline bundles a CfreeCompiler with the lifecycle every
+ * compile-then-link tool needs. Tools create a pipeline once per build,
+ * feed bytes into pipeline_compile_obj, then drive one of the link entries.
+ * `cfree_pipeline_compiler` exposes the underlying compiler so callers can
+ * reach into APIs that need it directly (e.g. cfree_dwarf_open against a
+ * JIT image, cfree_dep_iter_new for header-dep emission).
+ *
+ * Ownership: CfreeObjBuilders returned by pipeline_compile_obj are owned by
+ * the pipeline's compiler and must be alive at the matching link call;
+ * cfree_pipeline_free reaps everything in one shot. Path-shaped source
+ * loading is the driver's job — pipeline entries take CfreeBytesInput. */
- /* Reproducibility — forwarded to the underlying compile/link options. */
- uint64_t epoch;
- const CfreePathPrefixMap* path_map;
- uint32_t npath_map;
- uint8_t build_id_mode;
- const uint8_t* build_id_bytes;
- uint32_t build_id_len;
+CfreePipeline* cfree_pipeline_new (CfreeTarget, const CfreeEnv*);
+void cfree_pipeline_free(CfreePipeline*);
- /* Diagnostic policy — forwarded to CfreeCompileOptions. cfree_run
- * returns nonzero when any per-TU compile reports errors (or when a
- * subsequent link/jit step fails). */
- int warnings_are_errors;
- uint32_t max_errors;
+/* Borrowed; must not be freed by callers. Valid until cfree_pipeline_free. */
+CfreeCompiler* cfree_pipeline_compiler(CfreePipeline*);
- CfreeJit** out_jit; /* JIT only: caller owns on success */
-} CfreeOptions;
+int cfree_pipeline_compile_obj(CfreePipeline*, const CfreeCompileOptions*,
+ const CfreeBytesInput* input,
+ CfreeObjBuilder** out);
-int cfree_run(const CfreeOptions*);
+int cfree_pipeline_link_exe (CfreePipeline*, const CfreeLinkOptions*,
+ CfreeWriter* out);
+int cfree_pipeline_link_shared(CfreePipeline*, const CfreeLinkSharedOptions*,
+ CfreeWriter* out);
+int cfree_pipeline_link_jit (CfreePipeline*, const CfreeLinkOptions*,
+ CfreeJit** out_jit);
/* ============================================================
* Binary format detection
@@ -773,12 +904,20 @@ typedef enum CfreeBinFmt {
CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len);
+/* Derive a CfreeTarget from object-file magic + headers (ELF e_machine /
+ * EI_CLASS / EI_DATA, COFF Machine, Mach-O cputype, WASM = wasm32). Returns
+ * 0 on success and fills *out; returns 1 when the input is not a recognized
+ * relocatable object or its magic carries insufficient information. AR
+ * archives are not handled here — open a member to detect its target. */
+int cfree_detect_target(const uint8_t* data, size_t len, CfreeTarget* out);
+
/* ============================================================
* Object inspection
* ============================================================
- * Open a relocatable object for inspection. Format is auto-detected from
- * magic bytes. Returns NULL on failure. The input bytes must remain alive
- * until cfree_obj_close.
+ * Open a relocatable object for inspection. Format and target are both
+ * auto-detected from the file (see cfree_detect_fmt / cfree_detect_target).
+ * Returns NULL on failure. The input bytes must remain alive until
+ * cfree_obj_close.
*
* After a successful open, query functions provide read-only access to
* sections and symbols. Strings returned by query functions are interned
@@ -805,22 +944,8 @@ typedef enum CfreeSecFlag {
CFREE_SF_STRINGS = 1u << 5,
} CfreeSecFlag;
-typedef enum CfreeSymBind {
- CFREE_SB_LOCAL,
- CFREE_SB_GLOBAL,
- CFREE_SB_WEAK,
-} CfreeSymBind;
-
-typedef enum CfreeSymKind {
- CFREE_SK_UNDEF,
- CFREE_SK_FUNC,
- CFREE_SK_OBJ,
- CFREE_SK_SECTION,
- CFREE_SK_FILE,
- CFREE_SK_COMMON,
- CFREE_SK_TLS,
- CFREE_SK_ABS,
-} CfreeSymKind;
+/* CfreeSymBind and CfreeSymKind are declared at the top of this header
+ * because they are also used by the JIT symbol iterator. */
#define CFREE_SECTION_NONE UINT32_MAX
@@ -829,7 +954,9 @@ typedef struct CfreeObjSecInfo {
CfreeSecKind kind;
uint32_t flags; /* bitmask of CfreeSecFlag */
uint32_t size; /* bytes; BSS uses virtual size */
- uint32_t align;
+ uint32_t align; /* always a power of 2; 1 means no constraint;
+ * 0 is reserved and never appears (ELF's "0 or 1
+ * means none" is normalized to 1 on read). */
} CfreeObjSecInfo;
typedef struct CfreeObjSymInfo {
@@ -841,7 +968,7 @@ typedef struct CfreeObjSymInfo {
uint64_t size;
} CfreeObjSymInfo;
-CfreeObjFile* cfree_obj_open (const CfreeEnv*, CfreeTarget,
+CfreeObjFile* cfree_obj_open (const CfreeEnv*,
const CfreeBytesInput*);
void cfree_obj_close (CfreeObjFile*);
CfreeObjFmt cfree_obj_fmt (const CfreeObjFile*);
@@ -923,6 +1050,24 @@ int cfree_dwarf_func_at (CfreeDebugInfo*, uint64_t pc,
uint64_t* low_pc_out,
uint64_t* high_pc_out);
+/* Richer subprogram description for backtrace rendering. Returns the same
+ * name and pc range as cfree_dwarf_func_at, plus the source location of the
+ * subprogram DIE (DW_AT_decl_file/line) and an `inlined` flag (set when
+ * `pc` resolves to an inlined instance). Returns 0 on success, 1 if no
+ * subprogram contains `pc`. cfree_dwarf_func_at is kept as a thin
+ * convenience over this entry. */
+typedef struct CfreeDwarfSubprogram {
+ const char* name;
+ uint64_t low_pc;
+ uint64_t high_pc;
+ const char* decl_file;
+ uint32_t decl_line;
+ uint8_t inlined;
+} CfreeDwarfSubprogram;
+
+int cfree_dwarf_subprogram_at(CfreeDebugInfo*, uint64_t pc,
+ CfreeDwarfSubprogram* out);
+
/* CFI-driven unwind step. The caller seeds `frame->pc` (and any callee-saved
* registers known at the leaf) and the consumer walks .eh_frame to compute
* the caller frame in place: pc, cfa, and registers are updated. CfreeUnwindFrame
@@ -952,6 +1097,67 @@ int cfree_dwarf_unwind_step(CfreeDebugInfo*, CfreeUnwindFrame*);
* EXPR locations carry a DWARF expression byte string; libcfree owns the
* stack-machine evaluator. Callers should treat the loc as opaque and
* always go through cfree_dwarf_loc_read. */
+/* ----- Type descriptions -----
+ *
+ * Type DIEs are exposed as opaque CfreeDwarfType handles owned by the
+ * CfreeDebugInfo (interned for the lifetime of the consumer). Callers
+ * inspect a type with cfree_dwarf_type_info, which returns a kind tag plus
+ * shape data (size, name, element count, inner type). For aggregates
+ * (struct/union) and enums, dedicated iterators yield fields and enum
+ * values respectively. */
+typedef enum CfreeDwarfTypeKind {
+ CFREE_DT_VOID,
+ CFREE_DT_SINT, /* base: signed integer */
+ CFREE_DT_UINT, /* base: unsigned integer */
+ CFREE_DT_BOOL,
+ CFREE_DT_FLOAT,
+ CFREE_DT_CHAR, /* signed_char / unsigned_char distinguished by SINT/UINT */
+ CFREE_DT_PTR, /* points to inner */
+ CFREE_DT_ARRAY, /* element type + length */
+ CFREE_DT_STRUCT,
+ CFREE_DT_UNION,
+ CFREE_DT_ENUM, /* base type + named values */
+ CFREE_DT_FUNC, /* function type, for function-pointer pretty-print */
+ CFREE_DT_TYPEDEF, /* alias name + underlying */
+} CfreeDwarfTypeKind;
+
+typedef struct CfreeDwarfType CfreeDwarfType; /* opaque */
+
+typedef struct CfreeDwarfTypeInfo {
+ CfreeDwarfTypeKind kind;
+ uint32_t byte_size; /* 0 = unknown / void */
+ const char* name; /* tag/typedef name; "" if anon */
+ /* For ARRAY: element_count == 0 means flexible/unknown. */
+ uint32_t element_count;
+ /* For PTR/ARRAY/TYPEDEF: the inner type (NULL otherwise). */
+ const CfreeDwarfType* inner;
+} CfreeDwarfTypeInfo;
+
+CfreeDwarfTypeInfo cfree_dwarf_type_info(const CfreeDwarfType*);
+
+/* Struct/union field iterator. Yields each direct field; nested aggregates
+ * are reached by recursing on field.type. */
+typedef struct CfreeDwarfFieldIter CfreeDwarfFieldIter;
+typedef struct CfreeDwarfField {
+ const char* name; /* "" for anonymous */
+ uint32_t byte_offset;
+ uint32_t bit_offset; /* for bitfields; 0 otherwise */
+ uint32_t bit_size; /* for bitfields; 0 otherwise */
+ const CfreeDwarfType* type;
+} CfreeDwarfField;
+
+CfreeDwarfFieldIter* cfree_dwarf_field_iter_new (CfreeDebugInfo*, const CfreeDwarfType*);
+int cfree_dwarf_field_iter_next(CfreeDwarfFieldIter*, CfreeDwarfField* out);
+void cfree_dwarf_field_iter_free(CfreeDwarfFieldIter*);
+
+/* Enum value iterator. */
+typedef struct CfreeDwarfEnumIter CfreeDwarfEnumIter;
+typedef struct CfreeDwarfEnumVal { const char* name; int64_t value; } CfreeDwarfEnumVal;
+
+CfreeDwarfEnumIter* cfree_dwarf_enum_iter_new (CfreeDebugInfo*, const CfreeDwarfType*);
+int cfree_dwarf_enum_iter_next(CfreeDwarfEnumIter*, CfreeDwarfEnumVal* out);
+void cfree_dwarf_enum_iter_free(CfreeDwarfEnumIter*);
+
typedef enum CfreeDwarfLocKind {
CFREE_DLOC_REG, /* value lives in a register */
CFREE_DLOC_FRAME_OFS, /* [cfa + frame_ofs] */
@@ -960,8 +1166,12 @@ typedef enum CfreeDwarfLocKind {
} CfreeDwarfLocKind;
typedef struct CfreeDwarfVarLoc {
- CfreeDwarfLocKind kind;
- uint32_t byte_size; /* 0 = unknown */
+ CfreeDwarfLocKind kind;
+ uint32_t byte_size; /* 0 = unknown */
+ /* DIE type of the variable. NULL when type information was not
+ * recovered (e.g. stripped binary, hand-written symbol). When NULL,
+ * callers should fall back to byte_size and treat the bytes opaquely. */
+ const CfreeDwarfType* type;
union {
uint32_t reg;
int32_t frame_ofs;
@@ -977,6 +1187,45 @@ int cfree_dwarf_loc_read(CfreeDebugInfo*, const CfreeDwarfVarLoc*,
CfreeJitSession*, /* memory provider */
void* dst, size_t cap, size_t* read_out);
+/* ----- Locals, arguments, and parameters -----
+ *
+ * cfree_dwarf_var_at resolves a single named variable. To enumerate every
+ * variable visible at a PC (for `info locals` / `info args`), use the
+ * vars-at iterator: it yields locals from the deepest scope outward and
+ * then file-scope globals, with a role mask filter.
+ *
+ * cfree_dwarf_var_at is kept as a convenience over the iterator: it
+ * performs deepest-scope-first matching by name and returns the first
+ * hit. Both are valid entry points. */
+typedef enum CfreeDwarfVarRole {
+ CFREE_DVR_LOCAL,
+ CFREE_DVR_ARG,
+ CFREE_DVR_GLOBAL,
+} CfreeDwarfVarRole;
+
+typedef struct CfreeDwarfVar {
+ const char* name;
+ CfreeDwarfVarRole role;
+ CfreeDwarfVarLoc loc;
+} CfreeDwarfVar;
+
+typedef struct CfreeDwarfVarIter CfreeDwarfVarIter;
+
+CfreeDwarfVarIter* cfree_dwarf_vars_at_new (CfreeDebugInfo*, uint64_t pc,
+ uint32_t role_mask);
+int cfree_dwarf_vars_at_next(CfreeDwarfVarIter*, CfreeDwarfVar* out);
+void cfree_dwarf_vars_at_free(CfreeDwarfVarIter*);
+
+/* Iterate the formal parameters of the subprogram covering `pc`, in
+ * declaration order. Drives gdb-style backtrace argument rendering.
+ * Returns NULL if `pc` is not inside any subprogram. */
+typedef struct CfreeDwarfParamIter CfreeDwarfParamIter;
+
+CfreeDwarfParamIter* cfree_dwarf_param_iter_new (CfreeDebugInfo*, uint64_t pc);
+int cfree_dwarf_param_iter_next(CfreeDwarfParamIter*,
+ CfreeDwarfVar* out);
+void cfree_dwarf_param_iter_free(CfreeDwarfParamIter*);
+
/* ============================================================
* Disassembler
* ============================================================