kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 4bcdfd87a185d107e9685f63bdee5a4968e63763
parent 4b346e744cca9faad6fd986ceb5f1d7e78e3d067
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  8 May 2026 17:19:42 -0700

include: API updates

Diffstat:
Minclude/cfree.h | 537++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
1 file changed, 393 insertions(+), 144 deletions(-)

diff --git a/include/cfree.h b/include/cfree.h @@ -15,6 +15,7 @@ * Opaque handles * ============================================================ */ typedef struct CfreeCompiler CfreeCompiler; +typedef struct CfreePipeline CfreePipeline; typedef struct CfreeObjBuilder CfreeObjBuilder; typedef struct CfreeJit CfreeJit; typedef struct CfreeJitSession CfreeJitSession; @@ -150,6 +151,51 @@ typedef struct CfreeTarget { * backend. */ /* ============================================================ + * Symbol classification + * ============================================================ + * Shared by the object inspector and the JIT symbol iterator. Bind + * captures linkage; Kind captures what the symbol points at. */ +typedef enum CfreeSymBind { + CFREE_SB_LOCAL, + CFREE_SB_GLOBAL, + CFREE_SB_WEAK, +} CfreeSymBind; + +typedef enum CfreeSymKind { + CFREE_SK_UNDEF, + CFREE_SK_FUNC, + CFREE_SK_OBJ, + CFREE_SK_SECTION, + CFREE_SK_FILE, + CFREE_SK_COMMON, + CFREE_SK_TLS, + CFREE_SK_ABS, +} CfreeSymKind; + +/* ============================================================ + * Architecture: register name mapping + * ============================================================ + * DWARF register numbering varies per arch (CfreeUnwindFrame.regs is indexed + * by DWARF register number). These helpers translate between DWARF index and + * canonical assembler name (e.g. "rax", "x0", "a0") so dbg can render + * `info registers` and accept `set $rax = ...` syntax. + * + * Stateless and allocation-free in the lookup paths — name strings are static + * library data. `cfree_arch_register_name` returns NULL for an unmapped + * index; `cfree_arch_register_index` returns 0 on a known name and 1 if the + * name is unknown. The iterator yields named registers in DWARF index order. */ +typedef struct CfreeArchRegIter CfreeArchRegIter; +typedef struct CfreeArchReg { uint32_t dwarf_idx; const char* name; } CfreeArchReg; + +const char* cfree_arch_register_name (CfreeArchKind, uint32_t dwarf_idx); +int cfree_arch_register_index(CfreeArchKind, const char* name, + uint32_t* idx_out); + +CfreeArchRegIter* cfree_arch_reg_iter_new (CfreeArchKind); +int cfree_arch_reg_iter_next(CfreeArchRegIter*, CfreeArchReg* out); +void cfree_arch_reg_iter_free(CfreeArchRegIter*); + +/* ============================================================ * Host environment * ============================================================ * The host supplies a heap, optional file I/O, and a diag sink. The @@ -225,6 +271,23 @@ int cfree_jit_addr_to_sym (CfreeJit*, uint64_t addr, const char** name_out, uint64_t* off_out); +/* Enumerate every globally visible symbol in the resolved JIT image. + * Drives `info functions` / `info variables` and tab completion in dbg. + * `name` is interned and valid until cfree_jit_free; CfreeSymKind is the + * same enum as the object inspector uses (CFREE_SK_FUNC / CFREE_SK_OBJ / + * etc.). */ +typedef struct CfreeJitSymIter CfreeJitSymIter; +typedef struct CfreeJitSym { + const char* name; + uint64_t addr; + uint64_t size; + CfreeSymKind kind; +} CfreeJitSym; + +CfreeJitSymIter* cfree_jit_sym_iter_new (CfreeJit*); +int cfree_jit_sym_iter_next(CfreeJitSymIter*, CfreeJitSym* out); +void cfree_jit_sym_iter_free(CfreeJitSymIter*); + /* ----- JIT session: controlled execution ----- * * A session wraps the JIT in a worker thread and a per-arch trap engine @@ -268,6 +331,11 @@ typedef struct CfreeStopInfo { typedef enum CfreeResumeMode { CFREE_RESUME_CONTINUE, /* run until next stop or exit */ CFREE_RESUME_STEP_INSN, /* execute one machine instruction */ + CFREE_RESUME_STEP_LINE, /* until source line changes, staying + * in current function */ + CFREE_RESUME_NEXT_LINE, /* like STEP_LINE but step OVER any + * function calls */ + CFREE_RESUME_STEP_OUT, /* run until current frame returns */ } CfreeResumeMode; /* Entry-point signature dispatched by session_call. The library is @@ -309,6 +377,23 @@ int cfree_jit_session_interrupt(CfreeJitSession*); int cfree_jit_session_read_mem(CfreeJitSession*, uint64_t addr, void* dst, size_t n); +/* Write `n` bytes from `src` into the worker's address space at `addr`. + * Same constraints as the read variant: caller must be at a stop; partial + * writes leave the target untouched and return nonzero. */ +int cfree_jit_session_write_mem(CfreeJitSession*, uint64_t addr, + const void* src, size_t n); + +/* Read full register snapshot. Snapshot already lives in CfreeStopInfo; + * this is for callers that want a refresh outside the stop event (e.g. + * after a write). Returns 0 on success, nonzero if no worker is parked. */ +int cfree_jit_session_get_regs(CfreeJitSession*, CfreeUnwindFrame* out); + +/* Write back a register snapshot. The frame's `regs` are written into the + * worker; `pc` and `cfa` are honored only when changed. The library + * validates that `pc` lies inside the JIT image. Returns 0 on success, + * nonzero on a bad pc or if no worker is parked. */ +int cfree_jit_session_set_regs(CfreeJitSession*, const CfreeUnwindFrame*); + /* Set / clear a breakpoint at `addr` (which must lie within the JIT image). * On success, *bp_id_out is the session-local handle that future stop * events will report. Idempotent: setting at an address that already has @@ -318,6 +403,32 @@ int cfree_jit_session_breakpoint_set (CfreeJitSession*, uint64_t addr, uint32_t* bp_id_out); int cfree_jit_session_breakpoint_clear(CfreeJitSession*, uint32_t bp_id); +/* Extended breakpoint setter with skip count, hit cap, and an optional + * in-process predicate. The plain breakpoint_set above is a convenience + * over this form (skip_count = max_hits = 0, condition = NULL). + * + * `condition`, when non-NULL, is invoked by the library on the worker + * thread between the trap and the stop notification, after `skip_count` + * silent skips have elapsed. It must return nonzero to deliver a stop + * and zero to silently resume. The callback runs in a context where + * calling back into the session is not safe — restrict it to register + * inspection and pure computation. + * + * `max_hits`, when nonzero, auto-clears the breakpoint after that many + * stops have been delivered (post-skip, post-condition). 0 means + * unlimited. */ +typedef struct CfreeBreakpointSpec { + uint64_t addr; + uint64_t skip_count; /* silent skips before the first stop */ + uint64_t max_hits; /* 0 = unlimited */ + int (*condition)(void* user, const CfreeUnwindFrame* regs); + void* condition_user; +} CfreeBreakpointSpec; + +int cfree_jit_session_breakpoint_set_spec(CfreeJitSession*, + const CfreeBreakpointSpec*, + uint32_t* bp_id_out); + /* Resolver invoked when the linker encounters an undefined symbol. Returning * NULL is an error. */ typedef void* (*CfreeExternResolver)(void* user, const char* name); @@ -325,18 +436,20 @@ typedef void* (*CfreeExternResolver)(void* user, const char* name); /* ============================================================ * Pipeline * ============================================================ - * Layered driver-facing API. Three core operations + one convenience: + * Layered driver-facing API. Four core operations: * * cfree_compile_obj one C TU -> in-memory CfreeObjBuilder (chains into link) * cfree_compile_obj_emit one C TU -> CfreeWriter (cc -c) * cfree_link_exe link inputs -> CfreeWriter (ld) * cfree_link_jit link inputs -> owning CfreeJit handle - * cfree_run convenience: compile N inputs and link/jit + * + * The CfreePipeline section near the end of this header bundles a + * CfreeCompiler with these entries for tools that want a single owning + * handle for a compile-then-link build. * * The freestanding core takes only byte buffers and Writers — never paths. - * Path-shaped helpers live in driver-level adapters (and in cfree_run, which - * is itself a driver convenience), and feed the byte/Writer APIs after - * consulting CfreeEnv.file_io. + * Path-shaped helpers live in driver-level adapters and feed the byte/ + * Writer APIs after consulting CfreeEnv.file_io. * * Errors are reported through libcfree's internal panic mechanism. Each * top-level function in this header saves and restores the active panic @@ -350,15 +463,38 @@ typedef struct CfreeDefine { const char* body; /* NULL means "1" */ } CfreeDefine; -/* Generic byte-buffer input. Used for C source TUs, encoded objects, and - * archives. `name` is a diagnostic label (typically a path or pseudo-path); - * the linker interns it on entry. `data` may be any byte-shaped content. */ +/* Source language tag carried on CfreeBytesInput when the input is fed to + * cfree_compile_obj* or cfree_preprocess. Ignored by entries that take bytes + * for non-source purposes (linker, archive writer/reader, object reader). + * + * CFREE_LANG_C is value 0 so a zero-initialized CfreeBytesInput defaults to + * C, matching the prior contract. + * + * `.S` (preprocessed asm) is NOT auto-handled at this layer: the driver runs + * cfree_preprocess first and then submits the result as CFREE_LANG_ASM. */ +typedef enum CfreeLanguage { + CFREE_LANG_C = 0, + CFREE_LANG_ASM = 1, +} CfreeLanguage; + +/* Generic byte-buffer input. Used for source TUs (C/asm), encoded objects, + * and archives. `name` is a diagnostic label (typically a path or pseudo- + * path); the linker interns it on entry. `data` may be any byte-shaped + * content. `lang` is consulted only by source-consuming entries; other + * entries ignore it. */ typedef struct CfreeBytesInput { const char* name; const uint8_t* data; size_t len; + CfreeLanguage lang; } CfreeBytesInput; +/* Suffix-based language inference helper. `.s` -> CFREE_LANG_ASM; `.c`, + * `.cc`, `.cpp` and any other suffix (including a path with no suffix) -> + * CFREE_LANG_C. `.S` (preprocessed asm) is not recognized — drivers must + * preprocess first and submit the result as CFREE_LANG_ASM. */ +CfreeLanguage cfree_language_for_path(const char* path); + /* Preprocessor configuration shared by compile_* and the convenience run. */ typedef struct CfreePpOptions { const char* const* include_dirs; @@ -413,7 +549,7 @@ typedef struct CfreeCompileOptions { int cfree_preprocess (CfreeCompiler*, const CfreePpOptions* pp, const CfreeBytesInput* input, CfreeWriter* out); -/* Compile one C input (memory bytes). +/* Compile one source TU (C or GAS-subset asm; selected by input->lang). * * cfree_compile_obj returns a CfreeObjBuilder owned by the CfreeCompiler. The * builder is already finalized; do not write to it further. Pass it to @@ -430,48 +566,17 @@ int cfree_preprocess (CfreeCompiler*, const CfreePpOptions* pp, * env.diag->errors == 0 at the end of the call (with warnings counting as * errors when CfreeCompileOptions.warnings_are_errors is set). Returns * nonzero on internal failures (OOM, invariant violation), where the - * underlying compiler_panic mechanism unwinds before return. */ + * underlying compiler_panic mechanism unwinds before return. + * + * When input->lang == CFREE_LANG_ASM the input bytes are fed straight to the + * GAS-subset assembler; CfreeCompileOptions fields that are C-only + * (CfreeCompileOptions.pp, opt_level) are ignored. Inline asm inside C TUs + * is handled by the C parser internally — no separate entry. */ int cfree_compile_obj (CfreeCompiler*, const CfreeCompileOptions*, const CfreeBytesInput* input, CfreeObjBuilder** out); int cfree_compile_obj_emit(CfreeCompiler*, const CfreeCompileOptions*, const CfreeBytesInput* input, CfreeWriter* out); -/* ----- Assembly entries (GAS subset) ----- - * - * Assemble one text source into a relocatable object. Diagnostic and panic - * semantics match cfree_compile_obj / cfree_compile_obj_emit: report-all to - * env.diag, return 0 iff env.diag->errors == 0 at end of call (warnings count - * as errors when warnings_are_errors is set), nonzero on internal failures. - * - * cfree_assemble_obj returns a finalized CfreeObjBuilder owned by the - * CfreeCompiler; pass it to cfree_link_exe / cfree_link_jit. The builder must - * be alive until the linker has consumed it; the CfreeCompiler must outlive - * it. - * - * cfree_assemble_obj_emit writes the encoded object to `out` and frees its - * temporary builder before returning. The Writer is not closed. On nonzero - * return the Writer may contain partial output and should not be consumed. - * - * Inline asm in the C parser reuses the same parser internally; no separate - * public entry. */ -typedef struct CfreeAsmOptions { - int debug_info; /* generate .debug_line from .file/.loc */ - /* Reproducible-build knobs; semantics match CfreeCompileOptions. */ - uint64_t epoch; - const CfreePathPrefixMap* path_map; - uint32_t npath_map; - /* Diagnostic policy; semantics match CfreeCompileOptions. */ - int warnings_are_errors; - uint32_t max_errors; -} CfreeAsmOptions; - -int cfree_assemble_obj (CfreeCompiler*, const CfreeAsmOptions*, - const CfreeBytesInput* input, - CfreeObjBuilder** out); -int cfree_assemble_obj_emit(CfreeCompiler*, const CfreeAsmOptions*, - const CfreeBytesInput* input, - CfreeWriter* out); - /* ----- Header-dependency iteration ----- * * Walks the include edges recorded by SourceManager during a preceding @@ -641,27 +746,40 @@ int cfree_link_script_parse(CfreeCompiler*, const char* text, size_t len, const CfreeLinkScript** out); void cfree_link_script_free (CfreeCompiler*, const CfreeLinkScript*); -/* Per-archive flags. Object-file inputs (obj_bytes) keep the plain - * CfreeBytesInput[] shape — only archives carry these knobs. */ -typedef enum CfreeLinkArchFlag { - CFREE_LAF_NONE = 0, - /* Pull every member of the archive in regardless of whether its symbols - * satisfy an undef. Equivalent to GNU ld --whole-archive. */ - CFREE_LAF_WHOLE_ARCHIVE = 1u << 0, -} CfreeLinkArchFlag; - -/* Archive input with linker-side flags. `group_id` clusters archives into a - * cyclic resolution group: archives sharing a nonzero id are scanned - * cyclically until no new symbols are pulled in (equivalent to GNU ld - * --start-group ... --end-group). `group_id == 0` (default) means linear - * single-pass. */ +/* Per-archive resolution mode (mirrors GNU ld's -Bstatic / -Bdynamic / + * --as-needed positional state). Object-file inputs keep the plain + * CfreeBytesInput shape — only archives carry these knobs. */ +typedef enum CfreeLinkMode { + CFREE_LM_DEFAULT, /* output-kind default */ + CFREE_LM_STATIC, /* -Bstatic before this input */ + CFREE_LM_DYNAMIC, /* -Bdynamic */ + CFREE_LM_AS_NEEDED, /* --as-needed */ +} CfreeLinkMode; + +/* Archive input with linker-side state. + * + * link_mode: CfreeLinkMode (-Bstatic/-Bdynamic/--as-needed positional). + * whole_archive: nonzero == --whole-archive: pull every member in + * regardless of whether its symbols satisfy an undef. + * group_id: clusters archives into a cyclic resolution group; + * archives sharing a nonzero id are scanned cyclically + * until no new symbols pull in (--start-group ... + * --end-group). 0 (default) == linear single-pass. + * + * link_mode and whole_archive are orthogonal: --whole-archive applies + * regardless of --as-needed / -Bstatic / -Bdynamic state. */ typedef struct CfreeBytesInputArchive { CfreeBytesInput input; - uint8_t flags; /* bitmask of CfreeLinkArchFlag */ - uint8_t group_id; /* 0 = none; same nonzero = same cyclic group */ + uint8_t link_mode; /* CfreeLinkMode; default CFREE_LM_DEFAULT */ + uint8_t whole_archive; /* nonzero == --whole-archive */ + uint8_t group_id; + uint8_t pad; } CfreeBytesInputArchive; -typedef struct CfreeLinkOptions { +/* Common link-input set, embedded in both CfreeLinkOptions and + * CfreeLinkSharedOptions. Adding a new input shape lands here in one + * place rather than in every options struct. */ +typedef struct CfreeLinkInputs { CfreeObjBuilder* const* objs; /* fresh-compiled, by reference */ uint32_t nobjs; const CfreeBytesInput* obj_bytes; @@ -679,80 +797,93 @@ typedef struct CfreeLinkOptions { uint8_t build_id_mode; const uint8_t* build_id_bytes; uint32_t build_id_len; +} CfreeLinkInputs; + +/* Options for executable / JIT link. Exe-only fields go on this struct + * (currently none beyond the shared input set). + * + * gc_sections: nonzero enables --gc-sections (drop unreferenced sections + * from the output, transitively from entry / KEEP roots / + * exported symbols). Default 0. */ +typedef struct CfreeLinkOptions { + CfreeLinkInputs inputs; + int gc_sections; } CfreeLinkOptions; +/* Options for shared-library link. + * + * soname: recorded in the produced object (DT_SONAME on ELF, + * LC_ID_DYLIB on Mach-O). NULL == none. + * rpaths/runpaths: DT_RPATH / DT_RUNPATH entries, written verbatim. The + * runtime loader expands $ORIGIN and similar tokens. On + * Mach-O both lists collapse to LC_RPATH in + * rpaths-then-runpaths order. + * exports: flat list of symbol names promoted to the dynamic + * symbol table. v1 has no symbol-version-script support; + * that lands later as a separate CfreeVersionScript + * type rather than folded into the linker-script grammar. + * allow_undefined: default 1 for shared output. 0 forces every external + * reference to be resolved at link time. */ +typedef struct CfreeLinkSharedOptions { + CfreeLinkInputs inputs; + const char* soname; + const char* const* rpaths; uint32_t nrpaths; + const char* const* runpaths; uint32_t nrunpaths; + const char* const* exports; uint32_t nexports; + int allow_undefined; + /* Section GC. See CfreeLinkOptions.gc_sections. */ + int gc_sections; +} CfreeLinkSharedOptions; + /* All bytes inputs (obj_bytes, archives — including the CfreeBytesInput * nested inside each CfreeBytesInputArchive) must remain alive until the * matching cfree_link_* call returns. */ /* Link to executable. Writer is not closed by the call. On nonzero return * the Writer may contain partial output and should not be consumed. */ -int cfree_link_exe(CfreeCompiler*, const CfreeLinkOptions*, CfreeWriter* out); +int cfree_link_exe (CfreeCompiler*, const CfreeLinkOptions*, CfreeWriter* out); + +/* Link to shared library / dylib in the format implied by Compiler.target + * (ELF .so, Mach-O .dylib, PE .dll). Writer is not closed; on nonzero + * return the Writer may contain partial output and should not be + * consumed. */ +int cfree_link_shared(CfreeCompiler*, const CfreeLinkSharedOptions*, CfreeWriter* out); /* Link as JIT. On success, *out_jit owns its image and mapped pages and * must be released with cfree_jit_free. */ -int cfree_link_jit(CfreeCompiler*, const CfreeLinkOptions*, CfreeJit** out_jit); - -/* ----- Convenience: compose compile + link/jit for the common case. ----- */ - -typedef enum CfreeOutputKind { - CFREE_OUTPUT_OBJ, /* exactly one source input total */ - CFREE_OUTPUT_EXE, - CFREE_OUTPUT_JIT, -} CfreeOutputKind; - -typedef struct CfreeOptions { - CfreeTarget target; - CfreeEnv env; - CfreeOutputKind output_kind; - - int opt_level; - int debug_info; +int cfree_link_jit (CfreeCompiler*, const CfreeLinkOptions*, CfreeJit** out_jit); - const char* output_path; /* OBJ/EXE: required, used via env.file_io */ - - /* C source inputs. Path-shaped sources are read via env.file_io; memory - * sources are passed directly. The combined sequence (paths first, then - * memory) is compiled in order. For OBJ output exactly one total source - * is required. */ - const char* const* source_files; - uint32_t nsource_files; - const CfreeBytesInput* source_memory; - uint32_t nsource_memory; - - CfreePpOptions pp; - - /* Path-shaped link inputs: driver reads via env.file_io. Library - * resolution (-lfoo against -L paths) is the CLI driver's job; by the - * time options reach cfree_run, archives must be concrete paths. */ - const char* const* object_files; - uint32_t nobject_files; - const char* const* archives; - uint32_t narchives; - const char* linker_script; /* path; driver reads via env.file_io */ - - const char* entry; /* NULL = format/target default */ - CfreeExternResolver extern_resolver; - void* extern_resolver_user; +/* ============================================================ + * Pipeline (stateful driver-facing API) + * ============================================================ + * A CfreePipeline bundles a CfreeCompiler with the lifecycle every + * compile-then-link tool needs. Tools create a pipeline once per build, + * feed bytes into pipeline_compile_obj, then drive one of the link entries. + * `cfree_pipeline_compiler` exposes the underlying compiler so callers can + * reach into APIs that need it directly (e.g. cfree_dwarf_open against a + * JIT image, cfree_dep_iter_new for header-dep emission). + * + * Ownership: CfreeObjBuilders returned by pipeline_compile_obj are owned by + * the pipeline's compiler and must be alive at the matching link call; + * cfree_pipeline_free reaps everything in one shot. Path-shaped source + * loading is the driver's job — pipeline entries take CfreeBytesInput. */ - /* Reproducibility — forwarded to the underlying compile/link options. */ - uint64_t epoch; - const CfreePathPrefixMap* path_map; - uint32_t npath_map; - uint8_t build_id_mode; - const uint8_t* build_id_bytes; - uint32_t build_id_len; +CfreePipeline* cfree_pipeline_new (CfreeTarget, const CfreeEnv*); +void cfree_pipeline_free(CfreePipeline*); - /* Diagnostic policy — forwarded to CfreeCompileOptions. cfree_run - * returns nonzero when any per-TU compile reports errors (or when a - * subsequent link/jit step fails). */ - int warnings_are_errors; - uint32_t max_errors; +/* Borrowed; must not be freed by callers. Valid until cfree_pipeline_free. */ +CfreeCompiler* cfree_pipeline_compiler(CfreePipeline*); - CfreeJit** out_jit; /* JIT only: caller owns on success */ -} CfreeOptions; +int cfree_pipeline_compile_obj(CfreePipeline*, const CfreeCompileOptions*, + const CfreeBytesInput* input, + CfreeObjBuilder** out); -int cfree_run(const CfreeOptions*); +int cfree_pipeline_link_exe (CfreePipeline*, const CfreeLinkOptions*, + CfreeWriter* out); +int cfree_pipeline_link_shared(CfreePipeline*, const CfreeLinkSharedOptions*, + CfreeWriter* out); +int cfree_pipeline_link_jit (CfreePipeline*, const CfreeLinkOptions*, + CfreeJit** out_jit); /* ============================================================ * Binary format detection @@ -773,12 +904,20 @@ typedef enum CfreeBinFmt { CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len); +/* Derive a CfreeTarget from object-file magic + headers (ELF e_machine / + * EI_CLASS / EI_DATA, COFF Machine, Mach-O cputype, WASM = wasm32). Returns + * 0 on success and fills *out; returns 1 when the input is not a recognized + * relocatable object or its magic carries insufficient information. AR + * archives are not handled here — open a member to detect its target. */ +int cfree_detect_target(const uint8_t* data, size_t len, CfreeTarget* out); + /* ============================================================ * Object inspection * ============================================================ - * Open a relocatable object for inspection. Format is auto-detected from - * magic bytes. Returns NULL on failure. The input bytes must remain alive - * until cfree_obj_close. + * Open a relocatable object for inspection. Format and target are both + * auto-detected from the file (see cfree_detect_fmt / cfree_detect_target). + * Returns NULL on failure. The input bytes must remain alive until + * cfree_obj_close. * * After a successful open, query functions provide read-only access to * sections and symbols. Strings returned by query functions are interned @@ -805,22 +944,8 @@ typedef enum CfreeSecFlag { CFREE_SF_STRINGS = 1u << 5, } CfreeSecFlag; -typedef enum CfreeSymBind { - CFREE_SB_LOCAL, - CFREE_SB_GLOBAL, - CFREE_SB_WEAK, -} CfreeSymBind; - -typedef enum CfreeSymKind { - CFREE_SK_UNDEF, - CFREE_SK_FUNC, - CFREE_SK_OBJ, - CFREE_SK_SECTION, - CFREE_SK_FILE, - CFREE_SK_COMMON, - CFREE_SK_TLS, - CFREE_SK_ABS, -} CfreeSymKind; +/* CfreeSymBind and CfreeSymKind are declared at the top of this header + * because they are also used by the JIT symbol iterator. */ #define CFREE_SECTION_NONE UINT32_MAX @@ -829,7 +954,9 @@ typedef struct CfreeObjSecInfo { CfreeSecKind kind; uint32_t flags; /* bitmask of CfreeSecFlag */ uint32_t size; /* bytes; BSS uses virtual size */ - uint32_t align; + uint32_t align; /* always a power of 2; 1 means no constraint; + * 0 is reserved and never appears (ELF's "0 or 1 + * means none" is normalized to 1 on read). */ } CfreeObjSecInfo; typedef struct CfreeObjSymInfo { @@ -841,7 +968,7 @@ typedef struct CfreeObjSymInfo { uint64_t size; } CfreeObjSymInfo; -CfreeObjFile* cfree_obj_open (const CfreeEnv*, CfreeTarget, +CfreeObjFile* cfree_obj_open (const CfreeEnv*, const CfreeBytesInput*); void cfree_obj_close (CfreeObjFile*); CfreeObjFmt cfree_obj_fmt (const CfreeObjFile*); @@ -923,6 +1050,24 @@ int cfree_dwarf_func_at (CfreeDebugInfo*, uint64_t pc, uint64_t* low_pc_out, uint64_t* high_pc_out); +/* Richer subprogram description for backtrace rendering. Returns the same + * name and pc range as cfree_dwarf_func_at, plus the source location of the + * subprogram DIE (DW_AT_decl_file/line) and an `inlined` flag (set when + * `pc` resolves to an inlined instance). Returns 0 on success, 1 if no + * subprogram contains `pc`. cfree_dwarf_func_at is kept as a thin + * convenience over this entry. */ +typedef struct CfreeDwarfSubprogram { + const char* name; + uint64_t low_pc; + uint64_t high_pc; + const char* decl_file; + uint32_t decl_line; + uint8_t inlined; +} CfreeDwarfSubprogram; + +int cfree_dwarf_subprogram_at(CfreeDebugInfo*, uint64_t pc, + CfreeDwarfSubprogram* out); + /* CFI-driven unwind step. The caller seeds `frame->pc` (and any callee-saved * registers known at the leaf) and the consumer walks .eh_frame to compute * the caller frame in place: pc, cfa, and registers are updated. CfreeUnwindFrame @@ -952,6 +1097,67 @@ int cfree_dwarf_unwind_step(CfreeDebugInfo*, CfreeUnwindFrame*); * EXPR locations carry a DWARF expression byte string; libcfree owns the * stack-machine evaluator. Callers should treat the loc as opaque and * always go through cfree_dwarf_loc_read. */ +/* ----- Type descriptions ----- + * + * Type DIEs are exposed as opaque CfreeDwarfType handles owned by the + * CfreeDebugInfo (interned for the lifetime of the consumer). Callers + * inspect a type with cfree_dwarf_type_info, which returns a kind tag plus + * shape data (size, name, element count, inner type). For aggregates + * (struct/union) and enums, dedicated iterators yield fields and enum + * values respectively. */ +typedef enum CfreeDwarfTypeKind { + CFREE_DT_VOID, + CFREE_DT_SINT, /* base: signed integer */ + CFREE_DT_UINT, /* base: unsigned integer */ + CFREE_DT_BOOL, + CFREE_DT_FLOAT, + CFREE_DT_CHAR, /* signed_char / unsigned_char distinguished by SINT/UINT */ + CFREE_DT_PTR, /* points to inner */ + CFREE_DT_ARRAY, /* element type + length */ + CFREE_DT_STRUCT, + CFREE_DT_UNION, + CFREE_DT_ENUM, /* base type + named values */ + CFREE_DT_FUNC, /* function type, for function-pointer pretty-print */ + CFREE_DT_TYPEDEF, /* alias name + underlying */ +} CfreeDwarfTypeKind; + +typedef struct CfreeDwarfType CfreeDwarfType; /* opaque */ + +typedef struct CfreeDwarfTypeInfo { + CfreeDwarfTypeKind kind; + uint32_t byte_size; /* 0 = unknown / void */ + const char* name; /* tag/typedef name; "" if anon */ + /* For ARRAY: element_count == 0 means flexible/unknown. */ + uint32_t element_count; + /* For PTR/ARRAY/TYPEDEF: the inner type (NULL otherwise). */ + const CfreeDwarfType* inner; +} CfreeDwarfTypeInfo; + +CfreeDwarfTypeInfo cfree_dwarf_type_info(const CfreeDwarfType*); + +/* Struct/union field iterator. Yields each direct field; nested aggregates + * are reached by recursing on field.type. */ +typedef struct CfreeDwarfFieldIter CfreeDwarfFieldIter; +typedef struct CfreeDwarfField { + const char* name; /* "" for anonymous */ + uint32_t byte_offset; + uint32_t bit_offset; /* for bitfields; 0 otherwise */ + uint32_t bit_size; /* for bitfields; 0 otherwise */ + const CfreeDwarfType* type; +} CfreeDwarfField; + +CfreeDwarfFieldIter* cfree_dwarf_field_iter_new (CfreeDebugInfo*, const CfreeDwarfType*); +int cfree_dwarf_field_iter_next(CfreeDwarfFieldIter*, CfreeDwarfField* out); +void cfree_dwarf_field_iter_free(CfreeDwarfFieldIter*); + +/* Enum value iterator. */ +typedef struct CfreeDwarfEnumIter CfreeDwarfEnumIter; +typedef struct CfreeDwarfEnumVal { const char* name; int64_t value; } CfreeDwarfEnumVal; + +CfreeDwarfEnumIter* cfree_dwarf_enum_iter_new (CfreeDebugInfo*, const CfreeDwarfType*); +int cfree_dwarf_enum_iter_next(CfreeDwarfEnumIter*, CfreeDwarfEnumVal* out); +void cfree_dwarf_enum_iter_free(CfreeDwarfEnumIter*); + typedef enum CfreeDwarfLocKind { CFREE_DLOC_REG, /* value lives in a register */ CFREE_DLOC_FRAME_OFS, /* [cfa + frame_ofs] */ @@ -960,8 +1166,12 @@ typedef enum CfreeDwarfLocKind { } CfreeDwarfLocKind; typedef struct CfreeDwarfVarLoc { - CfreeDwarfLocKind kind; - uint32_t byte_size; /* 0 = unknown */ + CfreeDwarfLocKind kind; + uint32_t byte_size; /* 0 = unknown */ + /* DIE type of the variable. NULL when type information was not + * recovered (e.g. stripped binary, hand-written symbol). When NULL, + * callers should fall back to byte_size and treat the bytes opaquely. */ + const CfreeDwarfType* type; union { uint32_t reg; int32_t frame_ofs; @@ -977,6 +1187,45 @@ int cfree_dwarf_loc_read(CfreeDebugInfo*, const CfreeDwarfVarLoc*, CfreeJitSession*, /* memory provider */ void* dst, size_t cap, size_t* read_out); +/* ----- Locals, arguments, and parameters ----- + * + * cfree_dwarf_var_at resolves a single named variable. To enumerate every + * variable visible at a PC (for `info locals` / `info args`), use the + * vars-at iterator: it yields locals from the deepest scope outward and + * then file-scope globals, with a role mask filter. + * + * cfree_dwarf_var_at is kept as a convenience over the iterator: it + * performs deepest-scope-first matching by name and returns the first + * hit. Both are valid entry points. */ +typedef enum CfreeDwarfVarRole { + CFREE_DVR_LOCAL, + CFREE_DVR_ARG, + CFREE_DVR_GLOBAL, +} CfreeDwarfVarRole; + +typedef struct CfreeDwarfVar { + const char* name; + CfreeDwarfVarRole role; + CfreeDwarfVarLoc loc; +} CfreeDwarfVar; + +typedef struct CfreeDwarfVarIter CfreeDwarfVarIter; + +CfreeDwarfVarIter* cfree_dwarf_vars_at_new (CfreeDebugInfo*, uint64_t pc, + uint32_t role_mask); +int cfree_dwarf_vars_at_next(CfreeDwarfVarIter*, CfreeDwarfVar* out); +void cfree_dwarf_vars_at_free(CfreeDwarfVarIter*); + +/* Iterate the formal parameters of the subprogram covering `pc`, in + * declaration order. Drives gdb-style backtrace argument rendering. + * Returns NULL if `pc` is not inside any subprogram. */ +typedef struct CfreeDwarfParamIter CfreeDwarfParamIter; + +CfreeDwarfParamIter* cfree_dwarf_param_iter_new (CfreeDebugInfo*, uint64_t pc); +int cfree_dwarf_param_iter_next(CfreeDwarfParamIter*, + CfreeDwarfVar* out); +void cfree_dwarf_param_iter_free(CfreeDwarfParamIter*); + /* ============================================================ * Disassembler * ============================================================