kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 41070170b1823155ec56a416ca5569adf3c5a456
parent 9bd61e8158c1156c1282a4265d7d31a53e5dfbd3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 27 May 2026 12:44:10 -0700

obj/objdump: add linked-image inspection (segments, dynamic info)

Extend the object reader from relocatable-only to executables and shared
objects, exposed through a format-neutral image view on CfreeObjFile:

- obj API: ObjImage model (kind, entry, image base, interp, soname,
  segments, deps, rpaths, dynsyms, dynrelocs) hung off ObjBuilder, with
  builder-side setters/appenders and read-side queries. Public surface:
  cfree_obj_kind, cfree_obj_image_info, segment/dependency iterators, and
  dynsym/dynrel iterators that reuse the existing sym/reloc handles.
  Relocatable inputs report CFREE_OBJ_KIND_REL with empty image views, so
  the section/symbol path is unchanged.
- ELF: read_elf accepts ET_EXEC/ET_DYN and populates the image from the
  program headers (and dynamic info where present).
- objdump: -p (program/dynamic headers, format-neutral), -T (dynamic
  symbols), -R (dynamic relocations); -f now reports EXEC_P/DYNAMIC/D_PAGED
  and the real entry point. Section/symbol dumps now work on executables.

Locked in by a committed aarch64 ELF-exec fixture under test/objdump/.

Diffstat:
Mdriver/objdump.c | 204++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Minclude/cfree/object.h | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/api/object_file.c | 221+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Msrc/obj/elf/read.c | 272+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Msrc/obj/obj.c | 134+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/obj/obj.h | 103+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/objdump/aarch64/cases/01-image-headers.expected | 23+++++++++++++++++++++++
Atest/objdump/aarch64/cases/01-image-headers.sh | 10++++++++++
Atest/objdump/aarch64/cases/exec.c | 14++++++++++++++
Atest/objdump/aarch64/cases/exec.elf | 0
10 files changed, 1030 insertions(+), 38 deletions(-)

diff --git a/driver/objdump.c b/driver/objdump.c @@ -25,7 +25,9 @@ typedef struct ObjdumpOpts { int D; /* -D: disasm all sections */ int r; /* -r: relocations */ int s; /* -s: hex section contents */ - int p; /* -p / --private-headers: PE optional header + data dirs */ + int p; /* -p / --private-headers: program/dynamic headers (image) */ + int T; /* -T / --dynamic-syms: dynamic symbol table */ + int R; /* -R / --dynamic-reloc: dynamic relocations */ unsigned dwarf; /* --dwarf: bitmask of OBJDUMP_DWARF_* (0 = off) */ const char* j[MAX_J_FILTERS]; int nj; @@ -43,8 +45,8 @@ typedef struct ObjdumpOpts { static void objdump_usage(void) { driver_errf(OBJDUMP_TOOL, "%.*s", CFREE_SLICE_ARG(CFREE_SLICE_LIT( - "usage: cfree objdump [-h] [-t] [-d] [-D] [-r] [-s] [-j NAME " - "...] input...\n" + "usage: cfree objdump [-h] [-t] [-d] [-D] [-r] [-s] [-p] " + "[-T] [-R] [-j NAME ...] input...\n" " cfree objdump --help for full option reference"))); } @@ -86,8 +88,16 @@ void driver_help_objdump(void) { " -r Print relocation records\n" " -s Print section contents as a hex+ASCII dump\n" " -p, --private-headers\n" - " Print PE optional header, data directories,\n" - " and per-DLL import lists (PE images only)\n" + " Print the linked-image view: entry point, load\n" + " segments (program headers), and dynamic\n" + " dependencies. For PE images, the optional\n" + " header, data directories, and import lists.\n" + " -T, --dynamic-syms\n" + " Print the dynamic symbol table (.dynsym /\n" + " export table). Empty for relocatable objects.\n" + " -R, --dynamic-reloc\n" + " Print dynamic relocation records. Empty for\n" + " relocatable objects.\n" " -x Aggregate: -f -h -r -t\n" " --dwarf[=LIST] Dump DWARF debug sections. LIST is a comma-\n" " separated subset of info, abbrev, line, str;\n" @@ -800,12 +810,19 @@ static void dump_groups(CfreeObjFile* f, const ObjdumpOpts* opts) { if (printed_header) driver_printf("\n"); } -static void dump_symbols(CfreeObjFile* f, const ObjdumpOpts* opts) { +/* `dynamic` selects the dynamic symbol table (.dynsym / export trie) via + * cfree_obj_dynsymiter_new instead of the static .symtab. Both share the + * CfreeObjSymInfo shape and the same _next/_free, so the body is identical. */ +static void dump_symbols(CfreeObjFile* f, const ObjdumpOpts* opts, + int dynamic) { CfreeObjSymIter* it = NULL; CfreeObjSymInfo sym; + CfreeStatus st; - driver_printf("SYMBOL TABLE:\n"); - if (cfree_obj_symiter_new(f, &it) != CFREE_OK) return; + driver_printf(dynamic ? "DYNAMIC SYMBOL TABLE:\n" : "SYMBOL TABLE:\n"); + st = dynamic ? cfree_obj_dynsymiter_new(f, &it) + : cfree_obj_symiter_new(f, &it); + if (st != CFREE_OK) return; for (;;) { CfreeIterResult r = cfree_obj_symiter_next(it, &sym); CfreeSlice secname; @@ -1006,6 +1023,10 @@ static void dump_file_header(CfreeObjFile* f, const char* label) { uint32_t nsym = 0; int has_relocs = 0; unsigned flags = 0; + CfreeObjKind kind = cfree_obj_kind(f); + CfreeObjImageInfo info; + int have_info = cfree_obj_image_info(f, &info) == CFREE_OK; + const char* sep = ""; if (cfree_obj_symiter_new(f, &sit) == CFREE_OK) { CfreeObjSymInfo s; @@ -1017,19 +1038,33 @@ static void dump_file_header(CfreeObjFile* f, const char* label) { if (cfree_obj_reliter_next(rit, &r) == CFREE_ITER_ITEM) has_relocs = 1; cfree_obj_reliter_free(rit); } - /* GNU objdump's flag bits: 0x01 HAS_RELOC, 0x10 HAS_SYMS, 0x40 D_PAGED. - * cfree's reader handles relocatable .o-shaped inputs only, so we - * never see EXEC_P here; D_PAGED is irrelevant. */ + /* GNU objdump's BFD flag bits: 0x01 HAS_RELOC, 0x02 EXEC_P, 0x10 HAS_SYMS, + * 0x40 DYNAMIC, 0x100 D_PAGED. */ if (has_relocs) flags |= 0x0001u; + if (kind == CFREE_OBJ_KIND_EXEC) flags |= 0x0002u; if (nsym) flags |= 0x0010u; + if (kind == CFREE_OBJ_KIND_DYN) flags |= 0x0040u; + if (kind != CFREE_OBJ_KIND_REL) flags |= 0x0100u; driver_printf("architecture: %.*s, flags 0x%08x:\n", CFREE_SLICE_ARG(cfree_slice_cstr(arch_str(target.arch))), flags); - if (has_relocs) driver_printf("HAS_RELOC, "); - if (nsym) driver_printf("HAS_SYMS"); - if (has_relocs || nsym) driver_printf("\n"); - driver_printf("start address 0x%016llx\n", 0ull); +#define OBJDUMP_FLAG(bit, name) \ + do { \ + if (flags & (bit)) { \ + driver_printf("%s%s", sep, name); \ + sep = ", "; \ + } \ + } while (0) + OBJDUMP_FLAG(0x0001u, "HAS_RELOC"); + OBJDUMP_FLAG(0x0002u, "EXEC_P"); + OBJDUMP_FLAG(0x0010u, "HAS_SYMS"); + OBJDUMP_FLAG(0x0040u, "DYNAMIC"); + OBJDUMP_FLAG(0x0100u, "D_PAGED"); +#undef OBJDUMP_FLAG + if (flags) driver_printf("\n"); + driver_printf("start address 0x%016llx\n", + have_info ? (unsigned long long)info.entry : 0ull); driver_printf( "format: %.*s, sections: %u, symbols: %u\n\n", CFREE_SLICE_ARG(cfree_slice_cstr(fmt_str(fmt, target.ptr_size))), nsec, @@ -1300,6 +1335,124 @@ static void dump_dwarf(const CfreeContext* ctx, CfreeObjFile* f, cfree_dwarf_free(dbg); } +/* Dynamic relocations (-R). Unlike section relocations these aren't grouped + * by section, so we print one flat table in GNU `objdump -R` style. */ +static void dump_dynrelocs(CfreeObjFile* f) { + CfreeObjRelocIter* it = NULL; + CfreeObjReloc r; + int any = 0; + + if (cfree_obj_dynreliter_new(f, &it) != CFREE_OK) return; + for (;;) { + CfreeIterResult res = cfree_obj_reliter_next(it, &r); + if (res != CFREE_ITER_ITEM) break; + if (!any) { + driver_printf("DYNAMIC RELOCATION RECORDS\n"); + driver_printf("OFFSET TYPE VALUE\n"); + any = 1; + } + if (r.addend) { + driver_printf("%016llx %-17s %.*s%c0x%llx\n", + (unsigned long long)r.offset, + r.kind_name.len ? r.kind_name.s : "?", + CFREE_SLICE_ARG(r.sym_name.len ? r.sym_name + : CFREE_SLICE_LIT("*ABS*")), + r.addend < 0 ? '-' : '+', + (unsigned long long)(r.addend < 0 ? -r.addend : r.addend)); + } else { + driver_printf("%016llx %-17s %.*s\n", (unsigned long long)r.offset, + r.kind_name.len ? r.kind_name.s : "?", + CFREE_SLICE_ARG(r.sym_name.len ? r.sym_name + : CFREE_SLICE_LIT("*ABS*"))); + } + } + cfree_obj_reliter_free(it); + driver_printf(any ? "\n" : "DYNAMIC RELOCATION RECORDS (none)\n\n"); +} + +static const char* seg_perms_str(uint32_t perms) { + static char b[4]; + b[0] = (perms & CFREE_SEG_R) ? 'r' : '-'; + b[1] = (perms & CFREE_SEG_W) ? 'w' : '-'; + b[2] = (perms & CFREE_SEG_X) ? 'x' : '-'; + b[3] = '\0'; + return b; +} + +/* align is a power of two; report it as 2**N like GNU objdump. */ +static unsigned u32_log2(uint32_t v) { + unsigned n = 0; + while (v > 1) { + v >>= 1; + ++n; + } + return n; +} + +/* Private/program headers (-p): the linked-image view — entry point, load + * segments, and dynamic dependencies. Format-neutral across ELF / Mach-O / + * (eventually) PE via the cfree_obj image API. Relocatable objects have no + * image and report so. */ +static void dump_private(CfreeObjFile* f) { + CfreeObjImageInfo info; + CfreeObjSegIter* sit = NULL; + CfreeObjDepIter* dit = NULL; + CfreeObjSegInfo seg; + CfreeObjDepInfo dep; + int have_info; + + if (cfree_obj_kind(f) == CFREE_OBJ_KIND_REL) { + driver_printf( + "Private headers:\n" + " relocatable object — no program or dynamic headers\n\n"); + return; + } + + have_info = cfree_obj_image_info(f, &info) == CFREE_OK; + if (have_info) { + driver_printf("Image:\n"); + driver_printf(" entry point 0x%016llx\n", (unsigned long long)info.entry); + driver_printf(" image base 0x%016llx\n", + (unsigned long long)info.image_base); + if (info.interp.len) + driver_printf(" interpreter %.*s\n", CFREE_SLICE_ARG(info.interp)); + driver_printf("\n"); + } + + driver_printf("Program Header:\n"); + if (cfree_obj_segiter_new(f, &sit) == CFREE_OK) { + int any = 0; + while (cfree_obj_segiter_next(sit, &seg) == CFREE_ITER_ITEM) { + any = 1; + driver_printf( + " %-12.*s off 0x%016llx vaddr 0x%016llx align 2**%u\n" + " filesz 0x%016llx memsz 0x%016llx flags %s\n", + CFREE_SLICE_ARG(seg.name.len ? seg.name : CFREE_SLICE_LIT("LOAD")), + (unsigned long long)seg.file_off, (unsigned long long)seg.vaddr, + u32_log2(seg.align), (unsigned long long)seg.file_size, + (unsigned long long)seg.vsize, seg_perms_str(seg.perms)); + } + cfree_obj_segiter_free(sit); + if (!any) driver_printf(" (none)\n"); + } + driver_printf("\n"); + + driver_printf("Dynamic Section:\n"); + if (have_info && info.soname.len) + driver_printf(" SONAME %.*s\n", CFREE_SLICE_ARG(info.soname)); + if (cfree_obj_depiter_new(f, &dit) == CFREE_OK) { + while (cfree_obj_depiter_next(dit, &dep) == CFREE_ITER_ITEM) { + uint32_t k; + driver_printf(" NEEDED %.*s\n", CFREE_SLICE_ARG(dep.name)); + for (k = 0; k < dep.nimports; ++k) + driver_printf(" %.*s\n", + CFREE_SLICE_ARG(dep.imports[k])); + } + cfree_obj_depiter_free(dit); + } + driver_printf("\n"); +} + static void dump_obj(const CfreeContext* ctx, const CfreeDisasmContext* dctx, const char* label, CfreeObjFile* f, const ObjdumpOpts* opts) { @@ -1315,10 +1468,13 @@ static void dump_obj(const CfreeContext* ctx, const CfreeDisasmContext* dctx, if (opts->f) dump_file_header(f, label); if (opts->h) dump_sections(f, opts); if (opts->h) dump_groups(f, opts); - if (opts->t) dump_symbols(f, opts); + if (opts->t) dump_symbols(f, opts, 0); + if (opts->T) dump_symbols(f, opts, 1); + if (opts->p) dump_private(f); if (opts->s) dump_hex(f, opts); if (opts->d || opts->D) dump_disasm(dctx, f, opts); if (opts->r) dump_relocs(f, opts); + if (opts->R) dump_dynrelocs(f); if (opts->dwarf) dump_dwarf(ctx, f, opts); } @@ -1398,6 +1554,12 @@ static int parse_short_flags(const char* arg, ObjdumpOpts* o) { case 'p': o->p = 1; break; + case 'T': + o->T = 1; + break; + case 'R': + o->R = 1; + break; case 'x': o->f = 1; o->h = 1; @@ -1448,6 +1610,14 @@ static int parse_long_flag(const char* arg, ObjdumpOpts* o) { o->p = 1; return 1; } + if (driver_streq(arg, "--dynamic-syms")) { + o->T = 1; + return 1; + } + if (driver_streq(arg, "--dynamic-reloc")) { + o->R = 1; + return 1; + } return 0; } @@ -1546,7 +1716,7 @@ int driver_objdump(int argc, char** argv) { } saw_op = opts.f || opts.h || opts.t || opts.d || opts.D || opts.r || opts.s || - opts.p || opts.dwarf != 0; + opts.p || opts.T || opts.R || opts.dwarf != 0; if (!saw_op) { /* Default = -h -t (matches the prior behavior). */ opts.h = 1; opts.t = 1; diff --git a/include/cfree/object.h b/include/cfree/object.h @@ -265,6 +265,56 @@ typedef struct CfreeObjGroupInfo { const CfreeObjSection* sections; } CfreeObjGroupInfo; +/* ---- linked-image view (executables / shared objects) ---- + * + * Relocatable inputs (.o / .obj) report CFREE_OBJ_KIND_REL and empty image + * iterators. Executables and shared objects additionally expose segments + * (load layout), dependencies, dynamic symbols, and dynamic relocations — + * the dimension the section / symbol view doesn't carry. The section and + * symbol iterators above keep working where the format still has them. */ + +typedef enum CfreeObjKind { + CFREE_OBJ_KIND_REL = 0, /* relocatable object */ + CFREE_OBJ_KIND_EXEC, /* executable */ + CFREE_OBJ_KIND_DYN, /* shared object / dylib / DLL */ + CFREE_OBJ_KIND_CORE, /* core dump — detected, not parsed (reserved) */ +} CfreeObjKind; + +enum { /* CfreeObjSegInfo.perms bits */ + CFREE_SEG_X = 1u << 0, + CFREE_SEG_W = 1u << 1, + CFREE_SEG_R = 1u << 2 +}; + +typedef struct CfreeObjImageInfo { + uint64_t entry; /* entry-point vaddr; 0 if none (e.g. a DSO) */ + uint64_t image_base; /* preferred load address */ + CfreeSlice interp; /* PT_INTERP / dylinker path; empty if none */ + CfreeSlice soname; /* DT_SONAME / install-name / PE export name; empty + if none */ +} CfreeObjImageInfo; + +typedef struct CfreeObjSegInfo { + CfreeSlice name; /* PT_* spelling / Mach-O segname; empty if unnamed */ + uint64_t vaddr; + uint64_t vsize; /* size in memory */ + uint64_t file_off; /* offset of contents in the file */ + uint64_t file_size; /* size on disk (< vsize when the segment has bss) */ + uint32_t perms; /* CFREE_SEG_R | _W | _X */ + uint32_t align; /* power of two; 1 if none */ +} CfreeObjSegInfo; + +typedef struct CfreeObjDepInfo { + CfreeSlice name; /* needed library / imported DLL / dylib install-name */ + /* Imported symbol names (PE / Mach-O). Borrowed; valid until the next + * depiter_next or _free. Empty (nimports == 0) for ELF DT_NEEDED. */ + const CfreeSlice* imports; + uint32_t nimports; +} CfreeObjDepInfo; + +typedef struct CfreeObjSegIter CfreeObjSegIter; +typedef struct CfreeObjDepIter CfreeObjDepIter; + CFREE_API CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len); CFREE_API CfreeStatus cfree_detect_target(const uint8_t* data, size_t len, CfreeTarget* out); @@ -330,6 +380,43 @@ CFREE_API CfreeIterResult cfree_obj_groupiter_next(CfreeObjGroupIter*, CfreeObjGroupInfo* out); CFREE_API void cfree_obj_groupiter_free(CfreeObjGroupIter*); +/* Image kind. CFREE_OBJ_KIND_REL for relocatable objects (and for any + * format whose reader hasn't yet learned the image view). */ +CFREE_API CfreeObjKind cfree_obj_kind(const CfreeObjFile*); + +/* Whole-image scalars. Returns CFREE_NOT_FOUND on a relocatable object + * (no image). */ +CFREE_API CfreeStatus cfree_obj_image_info(const CfreeObjFile*, + CfreeObjImageInfo* out); + +/* Load segments (ELF program headers / Mach-O LC_SEGMENT / PE sections as + * mapped). Empty on relocatable objects. */ +CFREE_API CfreeStatus cfree_obj_segiter_new(CfreeObjFile*, + CfreeObjSegIter** out); +CFREE_API CfreeIterResult cfree_obj_segiter_next(CfreeObjSegIter*, + CfreeObjSegInfo* out); +CFREE_API void cfree_obj_segiter_free(CfreeObjSegIter*); + +/* Dependencies (ELF DT_NEEDED / Mach-O LC_LOAD_DYLIB / PE imports). Empty + * on relocatable objects. */ +CFREE_API CfreeStatus cfree_obj_depiter_new(CfreeObjFile*, + CfreeObjDepIter** out); +CFREE_API CfreeIterResult cfree_obj_depiter_next(CfreeObjDepIter*, + CfreeObjDepInfo* out); +CFREE_API void cfree_obj_depiter_free(CfreeObjDepIter*); + +/* Dynamic symbol table (.dynsym / dyld export trie / PE export table). + * Reuses the CfreeObjSymInfo shape and the CfreeObjSymIter handle — drive + * it with cfree_obj_symiter_next / _free. Empty on relocatable objects. */ +CFREE_API CfreeStatus cfree_obj_dynsymiter_new(CfreeObjFile*, + CfreeObjSymIter** out); + +/* Dynamic relocations (.rela.dyn / .rela.plt / dyld binds / PE base relocs). + * Reuses CfreeObjReloc and the CfreeObjRelocIter handle — drive it with + * cfree_obj_reliter_next / _free. Empty on relocatable objects. */ +CFREE_API CfreeStatus cfree_obj_dynreliter_new(CfreeObjFile*, + CfreeObjRelocIter** out); + /* Roundtrip: open an object via cfree_obj_open, then hand its underlying * builder back. The builder is the same one the reader populated; it is * already finalized, so callers may inspect it (e.g. iterate sections via diff --git a/src/api/object_file.c b/src/api/object_file.c @@ -9,6 +9,7 @@ #include "core/core.h" #include "core/heap.h" #include "core/pool.h" +#include "core/vec.h" #include "core/slice.h" #include "obj/format.h" #include "obj/obj.h" @@ -234,10 +235,16 @@ CfreeStatus cfree_obj_symbol_by_name(const CfreeObjFile* f, CfreeSlice name, struct CfreeObjSymIter { CfreeObjFile* file; - ObjSymIter* inner; + ObjSymIter* inner; /* .symtab walk; NULL when iterating the dynamic table */ + u32 dyn_idx; /* next index into obj_image dynsyms (dynamic mode) */ + int dynamic; }; -CfreeStatus cfree_obj_symiter_new(CfreeObjFile* f, CfreeObjSymIter** out) { +/* Shared by cfree_obj_symiter_new (.symtab) and cfree_obj_dynsymiter_new + * (.dynsym). When dynamic, the inner ObjSymIter is unused and we walk the + * image's dynamic symbol table by index. */ +static CfreeStatus symiter_make(CfreeObjFile* f, int dynamic, + CfreeObjSymIter** out) { Heap* h; CfreeObjSymIter* it; if (!f || !out) return CFREE_INVALID; @@ -245,19 +252,48 @@ CfreeStatus cfree_obj_symiter_new(CfreeObjFile* f, CfreeObjSymIter** out) { it = (CfreeObjSymIter*)h->alloc(h, sizeof(*it), _Alignof(CfreeObjSymIter)); if (!it) return CFREE_NOMEM; it->file = f; - it->inner = obj_symiter_new(f->ob); - if (!it->inner) { - h->free(h, it, sizeof(*it)); - return CFREE_NOMEM; + it->inner = NULL; + it->dyn_idx = 0; + it->dynamic = dynamic; + if (!dynamic) { + it->inner = obj_symiter_new(f->ob); + if (!it->inner) { + h->free(h, it, sizeof(*it)); + return CFREE_NOMEM; + } } *out = it; return CFREE_OK; } +CfreeStatus cfree_obj_symiter_new(CfreeObjFile* f, CfreeObjSymIter** out) { + return symiter_make(f, 0, out); +} + +CfreeStatus cfree_obj_dynsymiter_new(CfreeObjFile* f, CfreeObjSymIter** out) { + return symiter_make(f, 1, out); +} + CfreeIterResult cfree_obj_symiter_next(CfreeObjSymIter* it, CfreeObjSymInfo* out) { ObjSymEntry entry; if (!it || !out) return CFREE_ITER_ERROR; + if (it->dynamic) { + const ObjImage* im = obj_image(it->file->ob); + const ObjImageSym* s; + if (it->dyn_idx >= obj_image_ndynsyms(im)) return CFREE_ITER_END; + s = obj_image_dynsym(im, it->dyn_idx++); + out->name = pool_slice(it->file->compiler.global, s->name); + out->id = CFREE_OBJ_SYMBOL_NONE; + out->bind = (CfreeSymBind)s->bind; + out->kind = (CfreeSymKind)s->kind; + out->section = s->section != OBJ_SEC_NONE + ? (CfreeObjSection)(s->section - 1) + : CFREE_SECTION_NONE; + out->value = s->value; + out->size = s->size; + return CFREE_ITER_ITEM; + } if (!obj_symiter_next(it->inner, &entry)) return CFREE_ITER_END; fill_syminfo(it->file, entry.id, entry.sym, out); return CFREE_ITER_ITEM; @@ -266,7 +302,7 @@ CfreeIterResult cfree_obj_symiter_next(CfreeObjSymIter* it, void cfree_obj_symiter_free(CfreeObjSymIter* it) { Heap* h; if (!it) return; - obj_symiter_free(it->inner); + if (it->inner) obj_symiter_free(it->inner); h = it->file->ctx->heap; h->free(h, it, sizeof(*it)); } @@ -275,9 +311,11 @@ struct CfreeObjRelocIter { CfreeObjFile* file; u32 idx; u32 total; + int dynamic; /* iterate obj_image dynamic relocs instead of section relocs */ }; -CfreeStatus cfree_obj_reliter_new(CfreeObjFile* f, CfreeObjRelocIter** out) { +static CfreeStatus reliter_make(CfreeObjFile* f, int dynamic, + CfreeObjRelocIter** out) { Heap* h; CfreeObjRelocIter* it; if (!f || !out) return CFREE_INVALID; @@ -287,11 +325,21 @@ CfreeStatus cfree_obj_reliter_new(CfreeObjFile* f, CfreeObjRelocIter** out) { if (!it) return CFREE_NOMEM; it->file = f; it->idx = 0; - it->total = obj_reloc_total(f->ob); + it->dynamic = dynamic; + it->total = dynamic ? obj_image_ndynrelocs(obj_image(f->ob)) + : obj_reloc_total(f->ob); *out = it; return CFREE_OK; } +CfreeStatus cfree_obj_reliter_new(CfreeObjFile* f, CfreeObjRelocIter** out) { + return reliter_make(f, 0, out); +} + +CfreeStatus cfree_obj_dynreliter_new(CfreeObjFile* f, CfreeObjRelocIter** out) { + return reliter_make(f, 1, out); +} + static const char* cfree_obj_reloc_kind_name(CfreeArchKind arch, CfreeObjFmt fmt, u32 kind) { if (fmt == CFREE_OBJ_ELF && arch == CFREE_ARCH_X86_64) { @@ -363,6 +411,26 @@ CfreeIterResult cfree_obj_reliter_next(CfreeObjRelocIter* it, if (!it || !out) return CFREE_ITER_ERROR; if (it->idx >= it->total) return CFREE_ITER_END; + if (it->dynamic) { + const ObjImageReloc* dr = obj_image_dynreloc(obj_image(it->file->ob), + it->idx++); + const char* kn; + out->section = dr->section ? (CfreeObjSection)(dr->section - 1) + : CFREE_SECTION_NONE; + out->offset = dr->offset; + out->addend = dr->addend; + out->kind.arch = it->file->target.arch; + out->kind.obj_fmt = it->file->fmt; + out->kind.code = (uint32_t)dr->kind; + kn = cfree_obj_reloc_kind_name(it->file->target.arch, it->file->fmt, + dr->kind); + if (!kn) kn = reloc_kind_name(dr->kind); + out->kind_name = kn ? slice_from_cstr(kn) : SLICE_NULL; + out->sym = CFREE_OBJ_SYMBOL_NONE; + out->sym_name = pool_slice(it->file->compiler.global, dr->sym_name); + return CFREE_ITER_ITEM; + } + r = obj_reloc_at(it->file->ob, it->idx++); out->section = r->section_id ? (CfreeObjSection)(r->section_id - 1) : CFREE_SECTION_NONE; @@ -526,3 +594,138 @@ void cfree_objfile_internal_free(CfreeObjFile* f) { * section data, so we route through the same path. */ cfree_obj_free(f); } + +/* ============================================================ + * Linked-image view + * ============================================================ */ + +CfreeObjKind cfree_obj_kind(const CfreeObjFile* f) { + const ObjImage* im; + if (!f) return CFREE_OBJ_KIND_REL; + im = obj_image(f->ob); + if (!im) return CFREE_OBJ_KIND_REL; + switch (obj_image_kind(im)) { + case OBJ_KIND_EXEC: + return CFREE_OBJ_KIND_EXEC; + case OBJ_KIND_DYN: + return CFREE_OBJ_KIND_DYN; + case OBJ_KIND_CORE: + return CFREE_OBJ_KIND_CORE; + case OBJ_KIND_REL: + default: + return CFREE_OBJ_KIND_REL; + } +} + +CfreeStatus cfree_obj_image_info(const CfreeObjFile* f, + CfreeObjImageInfo* out) { + const ObjImage* im; + if (!f || !out) return CFREE_INVALID; + im = obj_image(f->ob); + if (!im) return CFREE_NOT_FOUND; + out->entry = obj_image_entry(im); + out->image_base = obj_image_base(im); + out->interp = pool_slice(f->compiler.global, obj_image_interp(im)); + out->soname = pool_slice(f->compiler.global, obj_image_soname(im)); + return CFREE_OK; +} + +struct CfreeObjSegIter { + CfreeObjFile* file; + u32 idx; +}; + +CfreeStatus cfree_obj_segiter_new(CfreeObjFile* f, CfreeObjSegIter** out) { + Heap* h; + CfreeObjSegIter* it; + if (!f || !out) return CFREE_INVALID; + h = f->ctx->heap; + it = (CfreeObjSegIter*)h->alloc(h, sizeof(*it), _Alignof(CfreeObjSegIter)); + if (!it) return CFREE_NOMEM; + it->file = f; + it->idx = 0; + *out = it; + return CFREE_OK; +} + +CfreeIterResult cfree_obj_segiter_next(CfreeObjSegIter* it, + CfreeObjSegInfo* out) { + const ObjImage* im; + const ObjSegment* s; + if (!it || !out) return CFREE_ITER_ERROR; + im = obj_image(it->file->ob); + if (it->idx >= obj_image_nsegments(im)) return CFREE_ITER_END; + s = obj_image_segment(im, it->idx++); + out->name = pool_slice(it->file->compiler.global, s->name); + out->vaddr = s->vaddr; + out->vsize = s->vsize; + out->file_off = s->file_off; + out->file_size = s->file_size; + out->perms = s->perms; /* OBJ_SEG_* and CFREE_SEG_* share bit values */ + out->align = s->align; + return CFREE_ITER_ITEM; +} + +void cfree_obj_segiter_free(CfreeObjSegIter* it) { + Heap* h; + if (!it) return; + h = it->file->ctx->heap; + h->free(h, it, sizeof(*it)); +} + +struct CfreeObjDepIter { + CfreeObjFile* file; + u32 idx; + CfreeSlice* import_buf; /* scratch for the current dep's import names */ + u32 import_cap; +}; + +CfreeStatus cfree_obj_depiter_new(CfreeObjFile* f, CfreeObjDepIter** out) { + Heap* h; + CfreeObjDepIter* it; + if (!f || !out) return CFREE_INVALID; + h = f->ctx->heap; + it = (CfreeObjDepIter*)h->alloc(h, sizeof(*it), _Alignof(CfreeObjDepIter)); + if (!it) return CFREE_NOMEM; + it->file = f; + it->idx = 0; + it->import_buf = NULL; + it->import_cap = 0; + *out = it; + return CFREE_OK; +} + +CfreeIterResult cfree_obj_depiter_next(CfreeObjDepIter* it, + CfreeObjDepInfo* out) { + const ObjImage* im; + const ObjImageDep* d; + Heap* h; + u32 i; + if (!it || !out) return CFREE_ITER_ERROR; + im = obj_image(it->file->ob); + if (it->idx >= obj_image_ndeps(im)) return CFREE_ITER_END; + d = obj_image_dep(im, it->idx++); + out->name = pool_slice(it->file->compiler.global, d->name); + out->imports = NULL; + out->nimports = d->nimports; + if (d->nimports) { + h = it->file->ctx->heap; + if (it->import_cap < d->nimports) { + if (VEC_GROW(h, it->import_buf, it->import_cap, d->nimports)) + return CFREE_ITER_ERROR; + } + for (i = 0; i < d->nimports; ++i) + it->import_buf[i] = pool_slice(it->file->compiler.global, d->imports[i]); + out->imports = it->import_buf; + } + return CFREE_ITER_ITEM; +} + +void cfree_obj_depiter_free(CfreeObjDepIter* it) { + Heap* h; + if (!it) return; + h = it->file->ctx->heap; + if (it->import_buf) + h->free(h, it->import_buf, sizeof(*it->import_buf) * it->import_cap); + h->free(h, it, sizeof(*it)); +} diff --git a/src/obj/elf/read.c b/src/obj/elf/read.c @@ -1,9 +1,16 @@ -/* ELF ET_REL reader. Parses a 64-bit little-endian relocatable object - * back into a fresh ObjBuilder. The post-finalize ObjBuilder shape is - * the canonical superset doc/DESIGN.md §5.5 promises: read_elf of an - * emit_elf output produces an ObjBuilder equivalent to the writer's - * input, modulo (a) section ordering and (b) STT_SECTION symbols - * synthesized by the writer. +/* ELF reader. Parses a 64-bit little-endian ELF object back into a fresh + * ObjBuilder. ET_REL produces the section/symbol/reloc view; the + * post-finalize shape is the canonical superset doc/DESIGN.md §5.5 + * promises: read_elf of an emit_elf output produces an ObjBuilder + * equivalent to the writer's input, modulo (a) section ordering and + * (b) STT_SECTION symbols synthesized by the writer. + * + * ET_EXEC / ET_DYN additionally attach the linked-image view via + * read_elf_image (program-header segments, .dynamic dependencies, + * .dynsym dynamic symbols, and allocatable dynamic relocations) — see + * doc/IMAGE_INSPECT.md. Their section tables still parse through the same + * passes. The standalone read_elf_dso (below) remains the linker's + * exports-only DSO-input path. * * Scope: AArch64 little-endian. Other archs / endianness produce a * compiler_panic with a diagnostic. */ @@ -197,6 +204,233 @@ static const char* strtab_lookup(const u8* tab, u64 tab_size, u32 off, return s; } +static const char* pt_type_name(u32 t) { + switch (t) { + case PT_NULL: + return "NULL"; + case PT_LOAD: + return "LOAD"; + case PT_DYNAMIC: + return "DYNAMIC"; + case PT_INTERP: + return "INTERP"; + case PT_NOTE: + return "NOTE"; + case PT_PHDR: + return "PHDR"; + case PT_TLS: + return "TLS"; + case PT_GNU_EH_FRAME: + return "GNU_EH_FRAME"; + case PT_GNU_STACK: + return "GNU_STACK"; + case PT_GNU_RELRO: + return "GNU_RELRO"; + default: + return "UNKNOWN"; + } +} + +static Sym intern_cstr(Compiler* c, const char* s) { + return pool_intern_slice(c->global, (Slice){.s = s, .len = (u32)strlen(s)}); +} + +/* Populate the builder's ObjImage from an ET_EXEC / ET_DYN input: the + * program-header segment table (+ interp + image base), the .dynamic + * dependency view (DT_NEEDED / DT_SONAME / DT_RPATH / DT_RUNPATH), the + * .dynsym dynamic symbols, and the allocatable .rela.* / .rel.* dynamic + * relocations. The section / symbol tables are parsed by read_elf's normal + * passes; this adds the orthogonal image dimension. Lenient where a + * malformed sub-table would otherwise abort a useful inspection: a bad + * .dynamic / .dynsym / dyn-reloc table is skipped rather than panicked. */ +static void read_elf_image(Compiler* c, ObjBuilder* ob, const u8* data, + size_t len, u16 e_type, const ShdrRec* shdrs, + u16 e_shnum, const u32* elf_to_obj, + u32 (*reloc_from)(u32)) { + ObjImage* im = + obj_image_ensure(ob, e_type == ET_DYN ? OBJ_KIND_DYN : OBJ_KIND_EXEC); + if (!im) compiler_panic(c, no_loc(), "read_elf: obj_image_ensure failed"); + + obj_image_set_entry(im, elf_rd_u64(data + 24)); + + /* Program headers -> segments (+ PT_INTERP string, image base). */ + { + u64 e_phoff = elf_rd_u64(data + 32); + u16 e_phentsize = elf_rd_u16(data + 54); + u16 e_phnum = elf_rd_u16(data + 56); + int have_base = 0; + u64 image_base = 0; + if (e_phnum) { + if (e_phentsize != ELF64_PHDR_SIZE) + compiler_panic(c, no_loc(), "read_elf: unexpected e_phentsize %u", + (u32)e_phentsize); + if (e_phoff + (u64)e_phnum * ELF64_PHDR_SIZE > len) + compiler_panic(c, no_loc(), + "read_elf: program header table out of range"); + for (u16 i = 0; i < e_phnum; ++i) { + const u8* p = data + e_phoff + (u64)i * ELF64_PHDR_SIZE; + u32 p_type = elf_rd_u32(p + 0); + u32 p_flags = elf_rd_u32(p + 4); + u64 p_offset = elf_rd_u64(p + 8); + u64 p_vaddr = elf_rd_u64(p + 16); + u64 p_filesz = elf_rd_u64(p + 32); + u64 p_memsz = elf_rd_u64(p + 40); + u64 p_align = elf_rd_u64(p + 48); + ObjSegment seg; + seg.name = intern_cstr(c, pt_type_name(p_type)); + seg.vaddr = p_vaddr; + seg.vsize = p_memsz; + seg.file_off = p_offset; + seg.file_size = p_filesz; + /* PF_R/W/X share bit values with OBJ_SEG_R/W/X. */ + seg.perms = p_flags & (PF_R | PF_W | PF_X); + seg.align = (u32)(p_align ? p_align : 1); + obj_image_add_segment(im, &seg); + + if (p_type == PT_LOAD && (!have_base || p_vaddr < image_base)) { + image_base = p_vaddr; + have_base = 1; + } + if (p_type == PT_INTERP && p_filesz && p_offset + p_filesz <= len) { + u32 ilen = (u32)p_filesz; + while (ilen && data[p_offset + ilen - 1] == '\0') --ilen; + if (ilen) + obj_image_set_interp( + im, pool_intern_slice( + c->global, + (Slice){.s = (const char*)(data + p_offset), + .len = ilen})); + } + } + } + if (have_base) obj_image_set_base(im, image_base); + } + + /* Locate .dynamic and .dynsym. */ + u32 dynamic_idx = 0, dynsym_idx = 0; + for (u16 i = 1; i < e_shnum; ++i) { + if (shdrs[i].sh_type == SHT_DYNAMIC && !dynamic_idx) dynamic_idx = i; + if (shdrs[i].sh_type == SHT_DYNSYM && !dynsym_idx) dynsym_idx = i; + } + + /* .dynamic -> dependency view. */ + if (dynamic_idx) { + const ShdrRec* dsh = &shdrs[dynamic_idx]; + if (dsh->sh_link < e_shnum) { + const ShdrRec* str_sh = &shdrs[dsh->sh_link]; + if (str_sh->sh_offset + str_sh->sh_size <= len && + dsh->sh_offset + dsh->sh_size <= len) { + const u8* dynstr = data + str_sh->sh_offset; + u64 dynstr_sz = str_sh->sh_size; + const u8* dynp = data + dsh->sh_offset; + u64 dynsz = dsh->sh_size; + for (u64 off = 0; off + 16 <= dynsz; off += 16) { + u64 tag = elf_rd_u64(dynp + off); + u64 val = elf_rd_u64(dynp + off + 8); + if (tag == DT_NULL) break; + if (tag != DT_NEEDED && tag != DT_SONAME && tag != DT_RPATH && + tag != DT_RUNPATH) + continue; + { + u32 nlen; + const char* nm = strtab_lookup(dynstr, dynstr_sz, (u32)val, &nlen); + Sym s = nlen ? pool_intern_slice( + c->global, (Slice){.s = nm, .len = nlen}) + : 0; + if (!s) continue; + if (tag == DT_NEEDED) { + ObjImageDep d; + d.name = s; + d.imports = NULL; + d.nimports = 0; + obj_image_add_dep(im, &d); + } else if (tag == DT_SONAME) { + obj_image_set_soname(im, s); + } else { + obj_image_add_rpath(im, s); + } + } + } + } + } + } + + /* .dynsym -> dynamic symbols, plus an index->name table for dyn relocs. */ + Sym* dynsym_names = NULL; + u32 ndynsym = 0; + if (dynsym_idx) { + const ShdrRec* sh = &shdrs[dynsym_idx]; + if (sh->sh_entsize == ELF64_SYM_SIZE && (sh->sh_size % ELF64_SYM_SIZE) == 0 && + sh->sh_link < e_shnum && sh->sh_offset + sh->sh_size <= len) { + const ShdrRec* str_sh = &shdrs[sh->sh_link]; + if (str_sh->sh_offset + str_sh->sh_size <= len) { + const u8* strtab = data + str_sh->sh_offset; + u64 strtab_sz = str_sh->sh_size; + const u8* base = data + sh->sh_offset; + ndynsym = (u32)(sh->sh_size / ELF64_SYM_SIZE); + dynsym_names = arena_zarray(c->scratch, Sym, ndynsym ? ndynsym : 1); + for (u32 i = 1; i < ndynsym; ++i) { + const u8* p = base + (u64)i * ELF64_SYM_SIZE; + u32 st_name = elf_rd_u32(p + 0); + u8 st_info = p[4]; + u16 st_shndx = elf_rd_u16(p + 6); + u64 st_value = elf_rd_u64(p + 8); + u64 st_size = elf_rd_u64(p + 16); + u32 nlen; + const char* nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen); + Sym sn = nlen ? pool_intern_slice(c->global, + (Slice){.s = nm, .len = nlen}) + : 0; + ObjImageSym ds; + dynsym_names[i] = sn; + ds.name = sn; + ds.bind = (SymBind)elf_bind_to_obj(ELF64_ST_BIND(st_info)); + ds.kind = (SymKind)elf_type_to_kind(ELF64_ST_TYPE(st_info), st_shndx); + ds.section = (st_shndx == SHN_UNDEF || st_shndx == SHN_ABS || + st_shndx == SHN_COMMON || st_shndx >= e_shnum) + ? OBJ_SEC_NONE + : elf_to_obj[st_shndx]; + ds.value = st_value; + ds.size = st_size; + obj_image_add_dynsym(im, &ds); + } + } + } + } + + /* Allocatable .rela.* / .rel.* -> dynamic relocations. */ + for (u16 i = 1; i < e_shnum; ++i) { + const ShdrRec* sh = &shdrs[i]; + int is_rela = (sh->sh_type == SHT_RELA); + int is_rel = (sh->sh_type == SHT_REL); + u32 entsize, nrec, j; + const u8* base; + if (!is_rela && !is_rel) continue; + if (!(sh->sh_flags & SHF_ALLOC)) continue; /* link-time relocs: not dynamic */ + entsize = is_rela ? ELF64_RELA_SIZE : 16; + if (sh->sh_entsize != entsize || (sh->sh_size % entsize) != 0) continue; + if (sh->sh_offset + sh->sh_size > len) continue; + nrec = (u32)(sh->sh_size / entsize); + base = data + sh->sh_offset; + for (j = 0; j < nrec; ++j) { + const u8* p = base + (u64)j * entsize; + u64 r_offset = elf_rd_u64(p + 0); + u64 r_info = elf_rd_u64(p + 8); + i64 r_addend = is_rela ? (i64)elf_rd_u64(p + 16) : 0; + u32 esym = ELF64_R_SYM(r_info); + u32 kind = reloc_from(ELF64_R_TYPE(r_info)); + ObjImageReloc dr; + if (kind == (u32)-1) continue; /* unmodeled dyn reloc type: skip */ + dr.section = OBJ_SEC_NONE; /* offset is a vaddr, not section-relative */ + dr.offset = r_offset; + dr.sym_name = (dynsym_names && esym < ndynsym) ? dynsym_names[esym] : 0; + dr.addend = r_addend; + dr.kind = (RelocKind)kind; + obj_image_add_dynreloc(im, &dr); + } + } +} + ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data, size_t len) { (void)name; @@ -216,12 +450,15 @@ ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data, data[EI_DATA]); u16 e_type = elf_rd_u16(data + 16); - if (e_type != ET_REL) - compiler_panic( - c, no_loc(), - "read_elf: only ET_REL inputs are accepted by read_elf " - "(got e_type=%u); use read_elf_dso for ET_DYN shared objects", - (u32)e_type); + /* ET_REL parses to the section/symbol/reloc view only. ET_EXEC/ET_DYN + * additionally get the linked-image view (read_elf_image, below); their + * section tables still parse through the same passes. ET_CORE and other + * types are out of scope (see doc/IMAGE_INSPECT.md). */ + if (e_type != ET_REL && e_type != ET_EXEC && e_type != ET_DYN) + compiler_panic(c, no_loc(), + "read_elf: unsupported e_type=%u (expected ET_REL, " + "ET_EXEC, or ET_DYN)", + (u32)e_type); u16 e_machine = elf_rd_u16(data + 18); const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_ELF); @@ -424,6 +661,12 @@ ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data, int is_rela = (sh->sh_type == SHT_RELA); int is_rel = (sh->sh_type == SHT_REL); if (!is_rela && !is_rel) continue; + /* Allocatable rela/rel in ET_EXEC/ET_DYN are loader (dynamic) + * relocations — sh_info is 0 or a .got index, not a target section. + * They belong to the image's dynamic-reloc view (read_elf_image), not + * the section-relocation table. ET_REL link-time relocs are never + * SHF_ALLOC, so this is a no-op for relocatable objects. */ + if (sh->sh_flags & SHF_ALLOC) continue; u32 entsize = is_rela ? ELF64_RELA_SIZE : 16; if (sh->sh_entsize != entsize) @@ -489,6 +732,11 @@ ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data, } } + /* ET_EXEC / ET_DYN: attach the linked-image view (segments + dynamic). */ + if (e_type != ET_REL) + read_elf_image(c, ob, data, len, e_type, shdrs, e_shnum, elf_to_obj, + reloc_from); + obj_finalize(ob); return ob; } diff --git a/src/obj/obj.c b/src/obj/obj.c @@ -14,6 +14,7 @@ #include "core/heap.h" #include "core/pool.h" #include "core/segvec.h" +#include "core/vec.h" SEGVEC_DEFINE(Sections, Section, 5); /* 32 entries per segment */ SEGVEC_DEFINE(Symbols, ObjSym, 6); /* 64 entries per segment */ @@ -53,6 +54,9 @@ struct CfreeObjBuilder { ObjSymId tlv_bootstrap_sym; /* Format-specific extension payloads keyed by ObjExtKind. */ ObjExtSlot ext[OBJ_EXT_SLOT_COUNT]; + /* Linked-image view (segments + dynamic info). NULL on relocatable + * inputs; lazily created by obj_image_ensure. See obj.h. */ + ObjImage* image; }; struct ObjSymIter { @@ -60,6 +64,8 @@ struct ObjSymIter { u32 idx; /* next index to return */ }; +static void obj_image_free_(ObjBuilder*); + /* ---- lifecycle ---- */ ObjBuilder* obj_new(Compiler* c) { @@ -124,6 +130,7 @@ void obj_free(ObjBuilder* ob) { Relocs_fini(&ob->relocs); Groups_fini(&ob->groups); Atoms_fini(&ob->atoms); + obj_image_free_(ob); ob->heap->free(ob->heap, ob, sizeof(*ob)); } @@ -151,6 +158,133 @@ int obj_get_coff_import_dll(const ObjBuilder* ob, Sym* out) { return 1; } +/* ---- linked-image view ---- */ + +struct ObjImage { + Heap* heap; + ObjKind kind; + u64 entry; + u64 image_base; + Sym interp; + Sym soname; + ObjSegment* segs; + u32 nsegs, cap_segs; + ObjImageDep* deps; + u32 ndeps, cap_deps; + Sym* rpaths; + u32 nrpaths, cap_rpaths; + ObjImageSym* dynsyms; + u32 ndynsyms, cap_dynsyms; + ObjImageReloc* dynrelocs; + u32 ndynrelocs, cap_dynrelocs; +}; + +static void obj_image_free_(ObjBuilder* ob) { + ObjImage* im; + if (!ob || !ob->image) return; + im = ob->image; + /* Dep import-name arrays are caller-owned (interned in the global pool); + * only the deps vector itself is ours to release. */ + if (im->segs) im->heap->free(im->heap, im->segs, sizeof(*im->segs) * im->cap_segs); + if (im->deps) im->heap->free(im->heap, im->deps, sizeof(*im->deps) * im->cap_deps); + if (im->rpaths) + im->heap->free(im->heap, im->rpaths, sizeof(*im->rpaths) * im->cap_rpaths); + if (im->dynsyms) + im->heap->free(im->heap, im->dynsyms, sizeof(*im->dynsyms) * im->cap_dynsyms); + if (im->dynrelocs) + im->heap->free(im->heap, im->dynrelocs, + sizeof(*im->dynrelocs) * im->cap_dynrelocs); + ob->heap->free(ob->heap, im, sizeof(*im)); + ob->image = NULL; +} + +const ObjImage* obj_image(const ObjBuilder* ob) { + return ob ? ob->image : NULL; +} + +ObjImage* obj_image_ensure(ObjBuilder* ob, ObjKind kind) { + ObjImage* im; + if (!ob) return NULL; + if (ob->image) { + ob->image->kind = kind; + return ob->image; + } + im = (ObjImage*)ob->heap->alloc(ob->heap, sizeof(*im), _Alignof(ObjImage)); + if (!im) return NULL; + memset(im, 0, sizeof(*im)); + im->heap = ob->heap; + im->kind = kind; + ob->image = im; + return im; +} + +void obj_image_set_entry(ObjImage* im, u64 entry) { + if (im) im->entry = entry; +} +void obj_image_set_base(ObjImage* im, u64 image_base) { + if (im) im->image_base = image_base; +} +void obj_image_set_interp(ObjImage* im, Sym interp) { + if (im) im->interp = interp; +} +void obj_image_set_soname(ObjImage* im, Sym soname) { + if (im) im->soname = soname; +} + +void obj_image_add_segment(ObjImage* im, const ObjSegment* seg) { + if (!im || !seg) return; + if (VEC_GROW(im->heap, im->segs, im->cap_segs, im->nsegs + 1)) return; + im->segs[im->nsegs++] = *seg; +} +void obj_image_add_dep(ObjImage* im, const ObjImageDep* dep) { + if (!im || !dep) return; + if (VEC_GROW(im->heap, im->deps, im->cap_deps, im->ndeps + 1)) return; + im->deps[im->ndeps++] = *dep; +} +void obj_image_add_rpath(ObjImage* im, Sym rpath) { + if (!im) return; + if (VEC_GROW(im->heap, im->rpaths, im->cap_rpaths, im->nrpaths + 1)) return; + im->rpaths[im->nrpaths++] = rpath; +} +void obj_image_add_dynsym(ObjImage* im, const ObjImageSym* sym) { + if (!im || !sym) return; + if (VEC_GROW(im->heap, im->dynsyms, im->cap_dynsyms, im->ndynsyms + 1)) return; + im->dynsyms[im->ndynsyms++] = *sym; +} +void obj_image_add_dynreloc(ObjImage* im, const ObjImageReloc* rel) { + if (!im || !rel) return; + if (VEC_GROW(im->heap, im->dynrelocs, im->cap_dynrelocs, im->ndynrelocs + 1)) + return; + im->dynrelocs[im->ndynrelocs++] = *rel; +} + +ObjKind obj_image_kind(const ObjImage* im) { return im ? im->kind : OBJ_KIND_REL; } +u64 obj_image_entry(const ObjImage* im) { return im ? im->entry : 0; } +u64 obj_image_base(const ObjImage* im) { return im ? im->image_base : 0; } +Sym obj_image_interp(const ObjImage* im) { return im ? im->interp : 0; } +Sym obj_image_soname(const ObjImage* im) { return im ? im->soname : 0; } + +u32 obj_image_nsegments(const ObjImage* im) { return im ? im->nsegs : 0; } +const ObjSegment* obj_image_segment(const ObjImage* im, u32 idx) { + return (im && idx < im->nsegs) ? &im->segs[idx] : NULL; +} +u32 obj_image_ndeps(const ObjImage* im) { return im ? im->ndeps : 0; } +const ObjImageDep* obj_image_dep(const ObjImage* im, u32 idx) { + return (im && idx < im->ndeps) ? &im->deps[idx] : NULL; +} +u32 obj_image_nrpaths(const ObjImage* im) { return im ? im->nrpaths : 0; } +Sym obj_image_rpath(const ObjImage* im, u32 idx) { + return (im && idx < im->nrpaths) ? im->rpaths[idx] : 0; +} +u32 obj_image_ndynsyms(const ObjImage* im) { return im ? im->ndynsyms : 0; } +const ObjImageSym* obj_image_dynsym(const ObjImage* im, u32 idx) { + return (im && idx < im->ndynsyms) ? &im->dynsyms[idx] : NULL; +} +u32 obj_image_ndynrelocs(const ObjImage* im) { return im ? im->ndynrelocs : 0; } +const ObjImageReloc* obj_image_dynreloc(const ObjImage* im, u32 idx) { + return (im && idx < im->ndynrelocs) ? &im->dynrelocs[idx] : NULL; +} + void obj_ext_set(ObjBuilder* ob, ObjExtKind kind, void* payload, ObjExtFreeFn free_fn) { if (!ob || (u32)kind >= OBJ_EXT_SLOT_COUNT) return; diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -696,6 +696,109 @@ void obj_ext_set(ObjBuilder*, ObjExtKind, void* payload, ObjExtFreeFn); void* obj_ext_get(const ObjBuilder*, ObjExtKind); void obj_ext_clear(ObjBuilder*, ObjExtKind); +/* ============================================================ + * Linked-image view (executables / shared objects) + * + * Relocatable inputs (ET_REL / MH_OBJECT / COFF .obj) have no image: + * obj_image() returns NULL. The ET_EXEC / ET_DYN (and Mach-O / PE peer) + * readers attach an ObjImage carrying the segment + dynamic view that the + * section / symbol tables don't model. The section and symbol tables stay + * populated where the format still carries them; the image is the extra + * dimension. The builder owns the image; obj_free releases it. + * ============================================================ */ + +typedef enum ObjKind { + OBJ_KIND_REL, /* relocatable object — no image */ + OBJ_KIND_EXEC, /* executable */ + OBJ_KIND_DYN, /* shared object / dylib / DLL */ + OBJ_KIND_CORE, /* core dump — detected, not parsed (reserved) */ +} ObjKind; + +enum { /* ObjSegment.perms bits */ + OBJ_SEG_X = 1u << 0, + OBJ_SEG_W = 1u << 1, + OBJ_SEG_R = 1u << 2 +}; + +typedef struct ObjSegment { + Sym name; /* PT_* spelling / Mach-O segname, or 0 */ + u64 vaddr; /* virtual address */ + u64 vsize; /* size in memory */ + u64 file_off; /* offset of segment contents in the file */ + u64 file_size; /* size on disk (< vsize when the segment carries bss) */ + u32 perms; /* OBJ_SEG_R | _W | _X */ + u32 align; /* power of two; 1 if none */ +} ObjSegment; + +typedef struct ObjImageDep { + Sym name; /* DT_NEEDED / imported DLL / dylib install-name */ + const Sym* imports; /* imported symbol names (PE/Mach-O); NULL for ELF */ + u32 nimports; +} ObjImageDep; + +/* Dynamic-table symbol. Distinct from the .symtab entries in the Symbols + * table — these come from .dynsym / dyld export trie / PE export table. */ +typedef struct ObjImageSym { + Sym name; + SymBind bind; + SymKind kind; + ObjSecId section; /* OBJ_SEC_NONE for undefined imports */ + u64 value; + u64 size; +} ObjImageSym; + +/* Dynamic relocation (.rela.dyn / .rela.plt, dyld binds, PE base relocs). + * References the dynamic symbol by interned name; the sym index is implicit + * in the dynamic table and not preserved here. */ +typedef struct ObjImageReloc { + ObjSecId section; /* OBJ_SEC_NONE when the file has no section table */ + u64 offset; + Sym sym_name; /* 0 for symbol-less relative relocs */ + i64 addend; + RelocKind kind; +} ObjImageReloc; + +typedef struct ObjImage ObjImage; /* defined in obj.c */ + +/* Accessor — NULL on relocatable inputs. */ +const ObjImage* obj_image(const ObjBuilder*); +/* Lazily create (and return) the builder's image with the given kind. + * Readers call this once they know the input is EXEC/DYN. Idempotent; + * a second call updates the kind and returns the existing image. */ +ObjImage* obj_image_ensure(ObjBuilder*, ObjKind); + +/* Image scalar setters (readers). */ +void obj_image_set_entry(ObjImage*, u64 entry); +void obj_image_set_base(ObjImage*, u64 image_base); +void obj_image_set_interp(ObjImage*, Sym interp); +void obj_image_set_soname(ObjImage*, Sym soname); + +/* Image table appenders (readers). Each copies its argument by value into a + * builder-heap-owned vector. The Sym array behind ObjImageDep.imports must + * outlive the builder (intern into the compiler's global pool). */ +void obj_image_add_segment(ObjImage*, const ObjSegment*); +void obj_image_add_dep(ObjImage*, const ObjImageDep*); +void obj_image_add_rpath(ObjImage*, Sym rpath); +void obj_image_add_dynsym(ObjImage*, const ObjImageSym*); +void obj_image_add_dynreloc(ObjImage*, const ObjImageReloc*); + +/* Image read-side queries (object_file.c glue, objdump). */ +ObjKind obj_image_kind(const ObjImage*); +u64 obj_image_entry(const ObjImage*); +u64 obj_image_base(const ObjImage*); +Sym obj_image_interp(const ObjImage*); +Sym obj_image_soname(const ObjImage*); +u32 obj_image_nsegments(const ObjImage*); +const ObjSegment* obj_image_segment(const ObjImage*, u32 idx); +u32 obj_image_ndeps(const ObjImage*); +const ObjImageDep* obj_image_dep(const ObjImage*, u32 idx); +u32 obj_image_nrpaths(const ObjImage*); +Sym obj_image_rpath(const ObjImage*, u32 idx); +u32 obj_image_ndynsyms(const ObjImage*); +const ObjImageSym* obj_image_dynsym(const ObjImage*, u32 idx); +u32 obj_image_ndynrelocs(const ObjImage*); +const ObjImageReloc* obj_image_dynreloc(const ObjImage*, u32 idx); + /* ---- file format emitters ---- */ void emit_elf(Compiler*, ObjBuilder*, Writer*); void emit_coff(Compiler*, ObjBuilder*, Writer*); diff --git a/test/objdump/aarch64/cases/01-image-headers.expected b/test/objdump/aarch64/cases/01-image-headers.expected @@ -0,0 +1,23 @@ +exec.elf: file format elf64-arm64 + +architecture: arm64, flags 0x00000112: +EXEC_P, HAS_SYMS, D_PAGED +start address 0x00000000004041b4 +format: elf64, sections: 4, symbols: 15 + +Image: + entry point 0x00000000004041b4 + image base 0x0000000000400000 + +Program Header: + LOAD off 0x0000000000000000 vaddr 0x0000000000400000 align 2**14 + filesz 0x0000000000000140 memsz 0x0000000000000140 flags r-- + LOAD off 0x0000000000004000 vaddr 0x0000000000404000 align 2**14 + filesz 0x0000000000000264 memsz 0x0000000000000264 flags r-x + LOAD off 0x0000000000008000 vaddr 0x0000000000408000 align 2**14 + filesz 0x000000000000006c memsz 0x000000000000006c flags r-- + NOTE off 0x0000000000000120 vaddr 0x0000000000400120 align 2**2 + filesz 0x0000000000000020 memsz 0x0000000000000020 flags r-- + +Dynamic Section: + diff --git a/test/objdump/aarch64/cases/01-image-headers.sh b/test/objdump/aarch64/cases/01-image-headers.sh @@ -0,0 +1,10 @@ +# Golden: `-f` file header + `-p` private/program headers over a committed +# freestanding aarch64 ELF *executable* (exec.elf; source + regen recipe in +# exec.c). Locks in the linked-image inspection added for ET_EXEC/ET_DYN: +# -f EXEC_P / D_PAGED flag bits and the real entry point (no longer 0) +# -p entry point, image base, and the LOAD/NOTE program headers w/ perms +# Committed as a binary (like test/objdump/dwarf/dwarf.o) so the golden is +# stable and decoupled from the code generator. Copied to a fixed name so +# the file-format label line is deterministic. +cp "$(dirname "$0")/exec.elf" exec.elf +"$CFREE" objdump -f -p exec.elf diff --git a/test/objdump/aarch64/cases/exec.c b/test/objdump/aarch64/cases/exec.c @@ -0,0 +1,14 @@ +/* Fixture source for exec.elf — a freestanding static aarch64 ELF + * executable. Regenerate with: + * cfree cc -target aarch64-linux -ffreestanding -nostdlib -e _start \ + * exec.c -o exec.elf + * Committed as a binary so the -f / -p golden is decoupled from the + * code generator (mirrors test/objdump/dwarf/dwarf.o). */ +static long sys_exit(long code) { + register long x8 __asm__("x8") = 93; /* __NR_exit */ + register long x0 __asm__("x0") = code; + __asm__ volatile("svc #0" : "+r"(x0) : "r"(x8) : "memory"); + return x0; +} +int compute(int a, int b) { return a * b + 7; } +void _start(void) { sys_exit(compute(3, 5)); } diff --git a/test/objdump/aarch64/cases/exec.elf b/test/objdump/aarch64/cases/exec.elf Binary files differ.