commit 57579de1edd9188b89bd36ff9a223373b31339b0 parent 012b76ad1bca94a710e3848c71d7f61c471c1f8f Author: Ryan Sepassi <rsepassi@gmail.com> Date: Wed, 27 May 2026 13:36:32 -0700 obj/objdump: extend image inspection — dynsym, rpath, stripped-segment disasm - nm: add -D/--dynamic to iterate .dynsym instead of .symtab - objdump -p: emit RPATH lines from the dynamic segment (ELF DT_RUNPATH, Mach-O LC_RPATH); new cfree_obj_rpathiter_new/next/free API - objdump -d: fall back to executable PT_LOAD segments when the section table is absent (stripped images / packers); disasm_buffer refactor eliminates the duplicated disassembly loop - size -A: use sec.addr so SysV output reports real load vaddrs for linked images instead of all-zero addresses - Mach-O reader: parse LC_SEGMENT_64 into the segment iterator, LC_LOAD_DYLIB into the dep iterator, and LC_RPATH into the rpath iterator; wire up the image-kind/flags (EXEC_P, DYNAMIC) flags - ELF reader: populate sec.addr from sh_addr; segment iterator covers PT_LOAD with file_off/file_size; dynsym iterator over SHT_DYNSYM - New test cases 02-04 for aarch64 (size SysV image, rpath via rpath.so fixture, stripped-segment disasm); aarch64-darwin case 01 image headers Diffstat:
25 files changed, 1093 insertions(+), 67 deletions(-)
diff --git a/doc/IMAGE_INSPECT.md b/doc/IMAGE_INSPECT.md @@ -0,0 +1,255 @@ +# Executable / Image Inspection + +Status: design proposal (2026-05-27) + +## Goal + +Extend the library and `objdump` driver to inspect *linked images* — +executables and shared objects — not just relocatable objects. Cover ELF, +Mach-O, and PE. ELF lands first; Mach-O and PE follow on the same API. + +## Why this is a real extension, not a flag + +The reader today is relocatable-object-shaped. `cfree_obj_open` → +`cfree_detect_target` → `impl->read()`, and the ELF backend hard-rejects +anything but `ET_REL`: + + /* src/obj/elf/read.c:218 */ + if (e_type != ET_REL) + compiler_panic(c, ..., "read_elf: only ET_REL inputs are accepted ..."); + +There are DSO readers (`read_elf_dso`, `read_coff_dso`, a Mach-O dylib stub), +but they're wired only into the *linker's* input path, not into the public +`impl->read` / `cfree_obj_open` surface. `ET_EXEC` has no reader at all. + +The canonical in-memory model (`ObjBuilder`) is section / symbol / reloc +oriented. It has no notion of a **segment** (PT_LOAD), the **dynamic table** +(DT_NEEDED / SONAME / RPATH), an **entry point**, **image base**, **imports**, +or **data directories** — which is exactly what image inspection is about. + +Consequence today: `objdump` carries a hand-rolled `pe_parse_image` raw-byte +walker (`driver/objdump.c:1597`) that bypasses the library to serve `-f`/`-h`/ +`-p` for PE, and emits a soft error for `-t`/`-d`/`-r`/`-s`. nm, size, strip, +objcopy, addr2line all share `cfree_obj_open` and inherit the relocatable-only +limit. + +## Decisions + +- **API:** extend `CfreeObjFile` / `cfree_obj_open` (not a parallel + `CfreeImage` type). One open call; sections/symbols keep working where the + format still carries them; other tools inherit it for free. +- **Rollout:** ELF exec + DSO first (program headers, `.dynamic`, dynsym, + dynamic relocs), de-risking the neutral API on the best-understood format, + then Mach-O and PE. +- **Driver:** extend `objdump` with binutils-style `-p` / `-T` / `-R`, + format-neutral. Delete the PE special-case. + +## Design + +### The image dimension + +Add an internal `ObjImage` populated by the format backends and hung off +`CfreeObjFile` (NULL for pure relocatables). It holds the format-neutral +*common denominator* of a linked image: + + struct ObjImage { + ObjKind kind; /* REL / EXEC / DYN / CORE */ + u64 entry; /* entry point vaddr, 0 if none */ + u64 image_base; /* preferred load address / __TEXT base */ + Slice interp; /* PT_INTERP / LC_LOAD_DYLINKER, empty if none */ + Slice soname; /* DT_SONAME / LC_ID_DYLIB / PE export name */ + Vec segments; /* ImageSegment */ + Vec deps; /* ImageDep: needed lib + (PE/Mach-O) imports */ + Vec rpaths; /* DT_RPATH/RUNPATH, LC_RPATH, @rpath */ + Vec dynsyms; /* dynamic symbol table */ + Vec dynrelocs; /* dynamic relocations */ + const void* raw; /* per-format escape hatch (see below) */ + }; + +`ImageSegment` is the load-layout unit absent from `ObjBuilder`: + + { u64 vaddr; u64 vsize; u64 file_off; u64 file_size; + u32 perms; /* R/W/X bits */ + u32 align; + Slice name; /* PT_* spelling / segname / "" */ } + +Sections continue to map through the existing `ObjBuilder` view where present +(ELF execs usually retain section headers; PE always; Mach-O sections live +inside segments). When section headers are absent (stripped ELF), the section +view is simply empty and the segment view carries the load picture. + +### Reader wiring + +`ObjFormatImpl` gains the ability to populate an image. Cleanest is for each +backend's existing entry to branch on kind and fill both views: + +- `cfree_obj_open` detects kind (reuse `cfree_detect_target` + `e_type` / + `filetype` / PE characteristics) and routes to the backend, which fills + `ObjBuilder` (sections/symbols where present) and, for EXEC/DYN, `ObjImage`. +- **ELF** reuses `read_elf_dso`'s machinery: walk program headers for + segments + PT_INTERP, parse `.dynamic` for DT_NEEDED/SONAME/RPATH/RUNPATH + and the dynsym/dynstr/relocation pointers, read `.dynsym`, and + `.rela.dyn` / `.rela.plt` for dynamic relocs. `ET_EXEC` and `ET_DYN` share + one path; the `e_type != ET_REL` guard is replaced by a kind switch. +- **PE** gets a real library reader (DOS/NT headers, optional header → entry + + image base + subsystem, data directories, section table, import + + export directories, base relocations), replacing the driver's + `pe_parse_image` walker, which is then deleted. +- **Mach-O** parses the load commands currently skipped: LC_SEGMENT_64 vmaddr + (already read for sections — promote to segments), LC_LOAD_DYLIB / + LC_ID_DYLIB, LC_LOAD_DYLINKER, LC_MAIN / LC_UNIXTHREAD (entry), LC_RPATH, + and exports/binds (LC_DYLD_INFO / chained fixups / exports-trie). + +### Public API additions (`include/cfree/object.h`) + +Mirrors the existing reader iterators (`CfreeObjSymInfo`, `CfreeObjReloc`, +`CfreeObjGroupInfo` patterns): + + typedef enum CfreeObjKind { + CFREE_OBJ_KIND_REL, CFREE_OBJ_KIND_EXEC, + CFREE_OBJ_KIND_DYN, CFREE_OBJ_KIND_CORE, + } CfreeObjKind; + CfreeObjKind cfree_obj_kind(const CfreeObjFile*); + + typedef struct CfreeObjImageInfo { /* whole-image scalars */ + uint64_t entry; uint64_t image_base; + CfreeSlice interp; CfreeSlice soname; + } CfreeObjImageInfo; + CfreeStatus cfree_obj_image_info(const CfreeObjFile*, CfreeObjImageInfo*); + + /* segment iterator */ + typedef struct CfreeObjSegInfo { + CfreeSlice name; uint64_t vaddr, vsize, file_off, file_size; + uint32_t perms; /* CFREE_SEG_R | _W | _X */ + uint32_t align; + } CfreeObjSegInfo; + CfreeStatus cfree_obj_segiter_new(CfreeObjFile*, CfreeObjSegIter**); + CfreeIterResult cfree_obj_segiter_next(CfreeObjSegIter*, CfreeObjSegInfo*); + void cfree_obj_segiter_free(CfreeObjSegIter*); + + /* dependency / rpath iterators (deps carry imported names for PE/Mach-O) */ + CfreeStatus cfree_obj_depiter_new(CfreeObjFile*, CfreeObjDepIter**); + CfreeIterResult cfree_obj_depiter_next(CfreeObjDepIter*, CfreeObjDepInfo*); + void cfree_obj_depiter_free(CfreeObjDepIter*); + + /* dynamic symbols + dynamic relocations: reuse CfreeObjSymInfo / + * CfreeObjReloc shapes via parallel iterators */ + CfreeStatus cfree_obj_dynsymiter_new(CfreeObjFile*, CfreeObjSymIter**); + CfreeStatus cfree_obj_dynreliter_new(CfreeObjFile*, CfreeObjRelocIter**); + +Format-specific raw fields (DT_* values, raw load commands, PE data +directories) are surfaced through an escape hatch in the spirit of the +existing `cfree_obj_section_format_flags` — keep the neutral API clean. + +### Driver (`objdump`), binutils-aligned + +- `-f` file header: add entry point + image base for EXEC/DYN. +- `-h` section headers: works once exec sections parse. +- `-p` / `--private-headers`: ELF program headers + dynamic section; Mach-O + load commands; PE optional header + data dirs (matches binutils `objdump -p` + semantics; folds in today's PE path). +- `-T` / `--dynamic-syms`: dynamic symbol table. +- `-R` / `--dynamic-reloc`: dynamic relocations. +- `-d`: already disassembles by vaddr — point it at PT_LOAD/text for execs. + +The `CFREE_BIN_PE` special-case in `driver_objdump` collapses into the normal +`dump_obj` path once PE images open via `cfree_obj_open`. + +## Landed API surface (phase 1 contract) + +The neutral API + internal model are in place and build green (relocatable +inputs report `CFREE_OBJ_KIND_REL` with empty image iterators; the section/ +symbol path is unchanged). Impl and driver work parallelize against this: + +- **Public** (`include/cfree/object.h`): `CfreeObjKind` + `cfree_obj_kind`; + `CfreeObjImageInfo` + `cfree_obj_image_info`; `cfree_obj_segiter_*` over + `CfreeObjSegInfo` (`CFREE_SEG_R/W/X`); `cfree_obj_depiter_*` over + `CfreeObjDepInfo`; `cfree_obj_dynsymiter_new` (reuses `CfreeObjSymIter` / + `cfree_obj_symiter_next`); `cfree_obj_dynreliter_new` (reuses + `CfreeObjRelocIter` / `cfree_obj_reliter_next`). +- **Internal model** (`src/obj/obj.h`, impl in `src/obj/obj.c`): `ObjImage` + hung off `ObjBuilder`. Readers call `obj_image_ensure(ob, OBJ_KIND_*)` then + the setters (`obj_image_set_entry/base/interp/soname`) and appenders + (`obj_image_add_segment/dep/rpath/dynsym/dynreloc`). `obj_image(ob)` is NULL + on relocatables. `obj_free` releases the image. +- **Glue** (`src/api/object_file.c`): maps `ObjImage` → the public iterators. + +**Driver (landed):** objdump grew `-p` (program/dynamic headers, format- +neutral via the image API), `-T` (dynamic symbols), and `-R` (dynamic +relocations); `-f` now reports EXEC_P/DYNAMIC/D_PAGED and the real entry +point. Section/symbol dumps now work on executables too (previously +`cfree_obj_open` rejected ET_EXEC). Verified on a real `elf64-arm64` +executable: `-p` renders the LOAD segments (perms, vaddr, align) + entry + +image base; static execs correctly show empty dynamic tables. Populated +NEEDED/SONAME/dynsym/dynrel can't be exercised from cfree yet — it only emits +static executables (no `-shared`); the empty-path rendering is verified and +the populated paths will light up when dynamic linking lands. + +**Impl (ELF, landed):** `read_elf` accepts `ET_EXEC`/`ET_DYN` and populates +`ObjImage` from program headers (+ dynamic info where present). + +**Impl (Mach-O, landed):** `read_macho` accepts `MH_EXECUTE`/`MH_DYLIB` +alongside `MH_OBJECT`; its existing section/symbol passes run unchanged and +`read_macho_image` (`src/obj/macho/read.c`) adds the image dimension by +re-walking the load commands: `LC_SEGMENT_64` → segments (+ `__TEXT` base, +`VM_PROT_*`→`OBJ_SEG_*` perms), `LC_LOAD_DYLINKER` → interp, `LC_ID_DYLIB` → +soname, `LC_LOAD_DYLIB`/`WEAK`/`REEXPORT` → deps, `LC_RPATH` → rpaths, +`LC_MAIN` (`+__TEXT` base) / `LC_UNIXTHREAD` → entry, the `LC_SYMTAB` external +nlist entries → dynamic symbols, and `LC_DYLD_CHAINED_FIXUPS` binds/rebases +(`DYLD_CHAINED_PTR_64`) → dynamic relocations (bind names resolved through the +chained-imports table). Verified end-to-end: cfree-linked static execs/dylibs +(segments, entry, base, interp, soname, `_main`/exports) and host clang arm64 +binaries (`NEEDED libSystem.B.dylib`, `RPATH`, and `ABS64 _printf` binds). +Classic `LC_DYLD_INFO` and the exports trie are not read (the symbol table is +the authoritative dynamic-symbol source); chained pointer formats other than +the 64-bit family are skipped leniently. + +## Phasing + +1. **Neutral API + ELF.** Add `ObjImage`, the public iterators, and ELF + EXEC/DYN reading (segments, dynamic, dynsym, dynrel). objdump `-p`/`-T`/`-R` + for ELF. Red-green tests on cfree-linked ELF executables and `.so`s. +2. **PE.** Library reader replacing `pe_parse_image`; delete the driver + walker. objdump `-p`/`-T`/`-R` for PE imports/exports/data dirs. +3. **Mach-O (landed).** Load-command parsing for execs/dylibs; chained-fixup + binds + rebases; dynamic symbols from `LC_SYMTAB`. objdump `-f`/`-p`/`-T`/ + `-R` for Mach-O. Committed golden on a cfree-linked aarch64 exec under + `test/objdump/aarch64-darwin/`. +4. **Inherited tools.** Confirm nm `-D`, size, addr2line, strings behave on + images; lift the relocatable-only assumptions where they block image input. + +## Test strategy + +The compiler links its own ELF/Mach-O/PE images, so tests round-trip: link a +small program, open it via `cfree_obj_open`, assert kind/entry/segments/deps/ +dynsyms against what the linker emitted. Cross-check against the host +`readelf`/`objdump` in smoke tests where available. New corpora under +`test/elf/` (image cases), later `test/macho/` and `test/{coff,pe}/`. + +## Resolved decisions + +- **Stripped ELF (no section header table):** do not synthesize pseudo-sections + from segments. This matches GNU `objdump` and `llvm-objdump`, which are + section-header-driven and report "no sections" when the table is absent + (`readelf -l` still shows segments). Note the common `strip` keeps the + section table and `.text`, so this only bites genuinely table-stripped + images. The section view is empty in that case; as a value-add beyond GNU + objdump, `-d` may disassemble executable `PT_LOAD` segments directly by + vaddr, so a fully-stripped image is still disassemblable. *(Implemented: + `read_elf` now accepts a zeroed section-header table and surfaces an empty + section view; `dump_disasm` falls back to iterating X-perm segments via + `cfree_obj_segiter_*` when the section walk yields nothing on an image, + using each segment's vaddr as the base. Format-agnostic — no ELF + special-casing in the driver. The section-driven path also now uses + `sec.addr` as the base, so `-d` on a non-stripped image resolves real + vaddrs and symbol labels too. Golden: `test/objdump/aarch64/cases/ + 04-disasm-stripped-segment`.)* + +- **Core files (`ET_CORE` / Mach-O `MH_CORE`):** out of scope. Keep + `CFREE_OBJ_KIND_CORE` defined but unimplemented; detect and reject cleanly. + Note/register-state parsing is a separate feature. + +- **Mach-O exports/binds (phase 3):** support only `LC_DYLD_CHAINED_FIXUPS` + + `LC_DYLD_EXPORTS_TRIE` (modern). Classic `LC_DYLD_INFO` opcode/trie reading + is not supported; reading older dylibs is out of scope. Confirm cfree's own + linker emits chained fixups when phase 3 begins. diff --git a/driver/nm.c b/driver/nm.c @@ -28,6 +28,7 @@ typedef struct NmOpts { int reverse_sort; /* -r */ int no_sort; /* -p / --no-sort */ int print_file; /* -A */ + int dynamic; /* -D / --dynamic: list dynamic (.dynsym) symbols */ } NmOpts; static int nm_symbol_visible(const CfreeObjSymInfo* si, const NmOpts* opts) { @@ -133,7 +134,9 @@ static int nm_collect_obj(CfreeObjFile* of, const NmOpts* opts, CfreeTarget t = cfree_obj_target(of); CfreeObjSymIter* it = NULL; int rc = 0; - if (cfree_obj_symiter_new(of, &it) != CFREE_OK) return 1; + CfreeStatus st = opts->dynamic ? cfree_obj_dynsymiter_new(of, &it) + : cfree_obj_symiter_new(of, &it); + if (st != CFREE_OK) return 1; for (;;) { CfreeObjSymInfo si; if (cfree_obj_symiter_next(it, &si) != CFREE_ITER_ITEM) break; @@ -255,6 +258,8 @@ void driver_help_nm(void) { " -g, --extern-only show only external (global) symbols\n" " -u, --undefined-only show only undefined symbols\n" " --defined-only show only defined symbols\n" + " -D, --dynamic list dynamic symbols (.dynsym), not " + ".symtab\n" " -n, --numeric-sort sort by address\n" " -r, --reverse-sort reverse the sort order\n" " --no-sort, -p do not sort; print in file order\n" @@ -314,6 +319,10 @@ int driver_nm(int argc, char** argv) { opts.print_file = 1; continue; } + if (driver_streq(a, "-D") || driver_streq(a, "--dynamic")) { + opts.dynamic = 1; + continue; + } if (a[0] == '-') { driver_errf(NM_TOOL, "unknown option: %s", a); rc = 2; diff --git a/driver/objdump.c b/driver/objdump.c @@ -931,6 +931,12 @@ static void dump_relocs(CfreeObjFile* f, const ObjdumpOpts* opts) { if (emitted_any) driver_printf("\n"); } +/* Find a symbol whose value is exactly `value`. When `section_idx` is a real + * index the match is scoped to that section; pass OBJDUMP_SEC_ANY to match by + * address across the whole symbol table — used by the segment-fallback path, + * where the disassembled bytes have no owning section. */ +#define OBJDUMP_SEC_ANY UINT32_MAX + static CfreeSlice objdump_sym_at(CfreeObjFile* f, uint32_t section_idx, uint64_t value) { CfreeObjSymIter* it = NULL; @@ -941,7 +947,8 @@ static CfreeSlice objdump_sym_at(CfreeObjFile* f, uint32_t section_idx, for (;;) { CfreeIterResult r = cfree_obj_symiter_next(it, &sym); if (r != CFREE_ITER_ITEM) break; - if (sym.section != section_idx || sym.value != value) continue; + if (section_idx != OBJDUMP_SEC_ANY && sym.section != section_idx) continue; + if (sym.value != value) continue; if (!sym.name.len) continue; if (sym.kind == CFREE_SK_SECTION) continue; best = sym.name; @@ -951,10 +958,80 @@ static CfreeSlice objdump_sym_at(CfreeObjFile* f, uint32_t section_idx, return best; } +/* Disassemble `len` bytes at `data`, treating `vaddr` as the address of the + * first byte. Symbol labels are looked up via `sym_section` (a real section + * index, or OBJDUMP_SEC_ANY for the segment-fallback path). */ +static void disasm_buffer(const CfreeDisasmContext* dctx, CfreeObjFile* f, + const uint8_t* data, size_t len, uint64_t vaddr, + uint32_t sym_section) { + CfreeDisasmIter* dis = NULL; + CfreeInsn insn; + + if (cfree_disasm_iter_new(dctx, data, len, vaddr, f, &dis) != CFREE_OK) + return; + for (;;) { + CfreeIterResult r = cfree_disasm_iter_next(dis, &insn); + uint32_t b; + CfreeSlice label; + if (r != CFREE_ITER_ITEM) break; + label = objdump_sym_at(f, sym_section, insn.vaddr); + if (label.len) + driver_printf("%016llx <%.*s>:\n", (unsigned long long)insn.vaddr, + CFREE_SLICE_ARG(label)); + driver_printf("%8llx:\t", (unsigned long long)insn.vaddr); + for (b = 0; b < insn.nbytes; ++b) driver_printf("%02x ", insn.bytes[b]); + for (b = insn.nbytes; b < 8; ++b) driver_printf(" "); + driver_printf("\t%.*s", CFREE_SLICE_ARG(insn.mnemonic)); + if (insn.operands.len) { + driver_printf(" %.*s", CFREE_SLICE_ARG(insn.operands)); + } + if (insn.annotation.len) { + driver_printf(" # %.*s", CFREE_SLICE_ARG(insn.annotation)); + } + driver_printf("\n"); + } + cfree_disasm_iter_free(dis); +} + +/* Fallback for fully section-stripped images (objcopy --strip-sections, + * packers): the section table is gone, but the code still lives in the + * executable PT_LOAD segments. We disassemble each such segment's file + * contents directly, using its vaddr as the base. `image` is the raw file + * bytes; segment file_off/file_size index into it. Returns the number of + * segments disassembled. Format-agnostic — driven entirely by the segment + * iterator, no ELF/Mach-O special-casing. */ +static uint32_t dump_disasm_segments(const CfreeDisasmContext* dctx, + CfreeObjFile* f, const ObjdumpOpts* opts, + const CfreeSlice* image) { + CfreeObjSegIter* sit = NULL; + CfreeObjSegInfo seg; + uint32_t emitted = 0; + + if (!image || cfree_obj_segiter_new(f, &sit) != CFREE_OK) return 0; + while (cfree_obj_segiter_next(sit, &seg) == CFREE_ITER_ITEM) { + if (!(seg.perms & CFREE_SEG_X)) continue; + if (!j_match(opts, seg.name)) continue; + if (seg.file_size == 0) continue; + if (seg.file_off > image->len || seg.file_size > image->len - seg.file_off) + continue; + + driver_printf( + "Disassembly of segment %.*s:\n\n", + CFREE_SLICE_ARG(seg.name.len ? seg.name : CFREE_SLICE_LIT("LOAD"))); + disasm_buffer(dctx, f, (const uint8_t*)image->data + seg.file_off, + (size_t)seg.file_size, seg.vaddr, OBJDUMP_SEC_ANY); + driver_printf("\n"); + emitted++; + } + cfree_obj_segiter_free(sit); + return emitted; +} + static void dump_disasm(const CfreeDisasmContext* dctx, CfreeObjFile* f, - const ObjdumpOpts* opts) { + const ObjdumpOpts* opts, const CfreeSlice* image) { uint32_t nsec = cfree_obj_nsections(f); uint32_t i; + uint32_t emitted = 0; CfreeDisasmContext file_dctx; if (!dctx) return; @@ -965,8 +1042,6 @@ static void dump_disasm(const CfreeDisasmContext* dctx, CfreeObjFile* f, CfreeObjSecInfo sec; size_t len = 0; const uint8_t* data = NULL; - CfreeDisasmIter* dis = NULL; - CfreeInsn insn; int want; if (cfree_obj_section(f, i, &sec) != CFREE_OK) continue; @@ -980,33 +1055,17 @@ static void dump_disasm(const CfreeDisasmContext* dctx, CfreeObjFile* f, driver_printf( "Disassembly of section %.*s:\n\n", CFREE_SLICE_ARG(sec.name.len ? sec.name : CFREE_SLICE_LIT("(anon)"))); - - if (cfree_disasm_iter_new(&file_dctx, data, len, 0, f, &dis) != CFREE_OK) - continue; - for (;;) { - CfreeIterResult r = cfree_disasm_iter_next(dis, &insn); - uint32_t b; - CfreeSlice label; - if (r != CFREE_ITER_ITEM) break; - label = objdump_sym_at(f, i, insn.vaddr); - if (label.len) - driver_printf("%016llx <%.*s>:\n", (unsigned long long)insn.vaddr, - CFREE_SLICE_ARG(label)); - driver_printf("%8llx:\t", (unsigned long long)insn.vaddr); - for (b = 0; b < insn.nbytes; ++b) driver_printf("%02x ", insn.bytes[b]); - for (b = insn.nbytes; b < 8; ++b) driver_printf(" "); - driver_printf("\t%.*s", CFREE_SLICE_ARG(insn.mnemonic)); - if (insn.operands.len) { - driver_printf(" %.*s", CFREE_SLICE_ARG(insn.operands)); - } - if (insn.annotation.len) { - driver_printf(" # %.*s", CFREE_SLICE_ARG(insn.annotation)); - } - driver_printf("\n"); - } - cfree_disasm_iter_free(dis); + /* sec.addr is the load vaddr for a linked image, 0 for a relocatable + * object — so branch/call targets resolve correctly in both. */ + disasm_buffer(&file_dctx, f, data, len, sec.addr, i); driver_printf("\n"); + emitted++; } + + /* No disassemblable sections, but this is a linked image: the section table + * was stripped. Fall back to the executable load segments. */ + if (emitted == 0 && cfree_obj_kind(f) != CFREE_OBJ_KIND_REL) + dump_disasm_segments(&file_dctx, f, opts, image); } /* `-f`: GNU objdump-style file header summary. Object files have no @@ -1450,12 +1509,21 @@ static void dump_private(CfreeObjFile* f) { } cfree_obj_depiter_free(dit); } + { + CfreeObjRpathIter* rit = NULL; + CfreeSlice rpath; + if (cfree_obj_rpathiter_new(f, &rit) == CFREE_OK) { + while (cfree_obj_rpathiter_next(rit, &rpath) == CFREE_ITER_ITEM) + driver_printf(" RPATH %.*s\n", CFREE_SLICE_ARG(rpath)); + cfree_obj_rpathiter_free(rit); + } + } driver_printf("\n"); } static void dump_obj(const CfreeContext* ctx, const CfreeDisasmContext* dctx, const char* label, CfreeObjFile* f, - const ObjdumpOpts* opts) { + const ObjdumpOpts* opts, const CfreeSlice* image) { CfreeTarget target = cfree_obj_target(f); CfreeObjFmt fmt = cfree_obj_fmt(f); @@ -1472,7 +1540,7 @@ static void dump_obj(const CfreeContext* ctx, const CfreeDisasmContext* dctx, if (opts->T) dump_symbols(f, opts, 1); if (opts->p) dump_private(f); if (opts->s) dump_hex(f, opts); - if (opts->d || opts->D) dump_disasm(dctx, f, opts); + if (opts->d || opts->D) dump_disasm(dctx, f, opts, image); if (opts->r) dump_relocs(f, opts); if (opts->R) dump_dynrelocs(f); if (opts->dwarf) dump_dwarf(ctx, f, opts); @@ -1518,7 +1586,7 @@ static int dump_archive(const char* path, const CfreeSlice* input, CFREE_SLICE_ARG(cfree_slice_cstr(label))); continue; } - dump_obj(ctx, dctx, label, f, opts); + dump_obj(ctx, dctx, label, f, opts, &min); cfree_obj_free(f); } cfree_ar_iter_free(it); @@ -1799,7 +1867,7 @@ int driver_objdump(int argc, char** argv) { rc = 1; } } else { - dump_obj(&ctx, dctx_p, a, f, &opts); + dump_obj(&ctx, dctx_p, a, f, &opts, &input); if (opts.p && bin == CFREE_BIN_PE) { dump_pe_private(a, input.data, input.len); } diff --git a/driver/size.c b/driver/size.c @@ -142,16 +142,13 @@ static void size_print_sysv(CfreeObjFile* of, const char* name, ns = cfree_obj_nsections(of); for (i = 0; i < ns; ++i) { CfreeObjSecInfo sec; - uint64_t addr = 0; if (cfree_obj_section(of, i, &sec) != CFREE_OK) continue; if (!(sec.flags & CFREE_SF_ALLOC)) continue; if (sec.kind == CFREE_SEC_DEBUG) continue; - if (cfree_obj_section_data(of, i, NULL, NULL) == CFREE_OK) { - /* section has content; try to get its vaddr */ - } + /* addr is the load vaddr in a linked image, 0 for relocatables. */ driver_printf("%-16.16s %08llx %08llx\n", sec.name.len ? sec.name.s : "(anon)", - (unsigned long long)sec.size, (unsigned long long)addr); + (unsigned long long)sec.size, (unsigned long long)sec.addr); } { SizeAgg a = size_compute_obj(of, opts); diff --git a/include/cfree/object.h b/include/cfree/object.h @@ -80,6 +80,7 @@ typedef struct CfreeObjSecInfo { CfreeSecKind kind; uint32_t flags; /* CfreeSecFlag */ uint64_t size; /* bytes; BSS uses virtual size */ + uint64_t addr; /* load vaddr in a linked image; 0 for relocatables */ uint32_t align; /* power of two; 1 means no special alignment */ uint32_t entsize; /* section entry size, or 0 */ } CfreeObjSecInfo; @@ -314,6 +315,7 @@ typedef struct CfreeObjDepInfo { typedef struct CfreeObjSegIter CfreeObjSegIter; typedef struct CfreeObjDepIter CfreeObjDepIter; +typedef struct CfreeObjRpathIter CfreeObjRpathIter; CFREE_API CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len); CFREE_API CfreeStatus cfree_detect_target(const uint8_t* data, size_t len, @@ -405,6 +407,16 @@ CFREE_API CfreeIterResult cfree_obj_depiter_next(CfreeObjDepIter*, CfreeObjDepInfo* out); CFREE_API void cfree_obj_depiter_free(CfreeObjDepIter*); +/* Runtime library search paths (ELF DT_RPATH / DT_RUNPATH, Mach-O LC_RPATH). + * Each item is one path string (may contain $ORIGIN / @rpath tokens). The + * DT_RPATH vs DT_RUNPATH distinction is not preserved. Empty on relocatable + * objects. */ +CFREE_API CfreeStatus cfree_obj_rpathiter_new(CfreeObjFile*, + CfreeObjRpathIter** out); +CFREE_API CfreeIterResult cfree_obj_rpathiter_next(CfreeObjRpathIter*, + CfreeSlice* out); +CFREE_API void cfree_obj_rpathiter_free(CfreeObjRpathIter*); + /* Dynamic symbol table (.dynsym / dyld export trie / PE export table). * Reuses the CfreeObjSymInfo shape and the CfreeObjSymIter handle — drive * it with cfree_obj_symiter_next / _free. Empty on relocatable objects. */ diff --git a/src/api/object_file.c b/src/api/object_file.c @@ -107,6 +107,7 @@ CfreeStatus cfree_obj_section(const CfreeObjFile* f, CfreeObjSection idx, out->kind = (CfreeSecKind)sec->kind; out->flags = (uint32_t)sec->flags; out->size = sec->bss_size ? sec->bss_size : sec->bytes.total; + out->addr = sec->addr; out->align = sec->align > 1u ? sec->align : 1u; out->entsize = sec->entsize; return CFREE_OK; @@ -729,3 +730,39 @@ void cfree_obj_depiter_free(CfreeObjDepIter* it) { h->free(h, it->import_buf, sizeof(*it->import_buf) * it->import_cap); h->free(h, it, sizeof(*it)); } + +struct CfreeObjRpathIter { + CfreeObjFile* file; + u32 idx; +}; + +CfreeStatus cfree_obj_rpathiter_new(CfreeObjFile* f, CfreeObjRpathIter** out) { + Heap* h; + CfreeObjRpathIter* it; + if (!f || !out) return CFREE_INVALID; + h = f->ctx->heap; + it = + (CfreeObjRpathIter*)h->alloc(h, sizeof(*it), _Alignof(CfreeObjRpathIter)); + if (!it) return CFREE_NOMEM; + it->file = f; + it->idx = 0; + *out = it; + return CFREE_OK; +} + +CfreeIterResult cfree_obj_rpathiter_next(CfreeObjRpathIter* it, + CfreeSlice* out) { + const ObjImage* im; + if (!it || !out) return CFREE_ITER_ERROR; + im = obj_image(it->file->ob); + if (it->idx >= obj_image_nrpaths(im)) return CFREE_ITER_END; + *out = pool_slice(it->file->compiler.global, obj_image_rpath(im, it->idx++)); + return CFREE_ITER_ITEM; +} + +void cfree_obj_rpathiter_free(CfreeObjRpathIter* it) { + Heap* h; + if (!it) return; + h = it->file->ctx->heap; + h->free(h, it, sizeof(*it)); +} diff --git a/src/obj/elf/read.c b/src/obj/elf/read.c @@ -477,25 +477,45 @@ ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data, u16 e_shnum = elf_rd_u16(data + 60); u16 e_shstrndx = elf_rd_u16(data + 62); - if (e_shentsize != ELF64_SHDR_SIZE) - compiler_panic(c, no_loc(), "read_elf: unexpected e_shentsize %u", - (u32)e_shentsize); - if (e_shoff + (u64)e_shnum * ELF64_SHDR_SIZE > len) - compiler_panic(c, no_loc(), "read_elf: section header table out of range"); - if (e_shstrndx >= e_shnum) - compiler_panic(c, no_loc(), "read_elf: e_shstrndx %u >= e_shnum %u", - (u32)e_shstrndx, (u32)e_shnum); - - /* Parse all shdrs into scratch. */ - ShdrRec* shdrs = arena_array(c->scratch, ShdrRec, e_shnum); - for (u32 i = 0; i < e_shnum; ++i) - parse_shdr(data + e_shoff + (u64)i * ELF64_SHDR_SIZE, &shdrs[i]); + /* A fully section-stripped image (objcopy --strip-sections, packers, + * some release binaries) sets e_shoff/e_shnum to zero: the section + * header table is gone, but the load segments still describe the file. + * That's valid for ET_EXEC/ET_DYN — parse the image view (segments + + * dynamic) and present an empty section view, matching GNU/LLVM. An + * ET_REL with no sections carries no model state, so still reject it. */ + int has_sht = (e_shoff != 0 && e_shnum != 0); + if (has_sht) { + if (e_shentsize != ELF64_SHDR_SIZE) + compiler_panic(c, no_loc(), "read_elf: unexpected e_shentsize %u", + (u32)e_shentsize); + if (e_shoff + (u64)e_shnum * ELF64_SHDR_SIZE > len) + compiler_panic(c, no_loc(), + "read_elf: section header table out of range"); + if (e_shstrndx >= e_shnum) + compiler_panic(c, no_loc(), "read_elf: e_shstrndx %u >= e_shnum %u", + (u32)e_shstrndx, (u32)e_shnum); + } else { + if (e_type == ET_REL) + compiler_panic(c, no_loc(), + "read_elf: ET_REL with no section header table"); + e_shnum = 0; /* normalize so the section/symbol/reloc passes are no-ops */ + } - const ShdrRec* shstr_sh = &shdrs[e_shstrndx]; - if (shstr_sh->sh_offset + shstr_sh->sh_size > len) - compiler_panic(c, no_loc(), "read_elf: .shstrtab out of range"); - const u8* shstrtab = data + shstr_sh->sh_offset; - u64 shstrtab_sz = shstr_sh->sh_size; + /* Parse all shdrs into scratch. NULL when the table is absent. */ + ShdrRec* shdrs = NULL; + const u8* shstrtab = NULL; + u64 shstrtab_sz = 0; + if (has_sht) { + shdrs = arena_array(c->scratch, ShdrRec, e_shnum); + for (u32 i = 0; i < e_shnum; ++i) + parse_shdr(data + e_shoff + (u64)i * ELF64_SHDR_SIZE, &shdrs[i]); + + const ShdrRec* shstr_sh = &shdrs[e_shstrndx]; + if (shstr_sh->sh_offset + shstr_sh->sh_size > len) + compiler_panic(c, no_loc(), "read_elf: .shstrtab out of range"); + shstrtab = data + shstr_sh->sh_offset; + shstrtab_sz = shstr_sh->sh_size; + } /* Build the ObjBuilder. */ ObjBuilder* ob = obj_new(c); @@ -503,7 +523,7 @@ ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data, obj_set_elf_e_flags(ob, e_flags); /* elf_to_obj[shndx] -> ObjSecId, OBJ_SEC_NONE for skipped sections. */ - u32* elf_to_obj = arena_zarray(c->scratch, u32, e_shnum); + u32* elf_to_obj = arena_zarray(c->scratch, u32, e_shnum ? e_shnum : 1); /* Pass 1: create obj sections for every non-NULL shdr that carries * load-bearing model state. SYMTAB / STRTAB / RELA / REL are @@ -542,6 +562,10 @@ ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data, SLICE_ARG(((Slice){.s = nm, .len = nlen}))); elf_to_obj[i] = id; + /* Load address: 0 for ET_REL, the assigned vaddr for linked images. + * Lets the section view carry the load picture for execs/DSOs. */ + if (sh->sh_addr) obj_section_set_addr(ob, id, sh->sh_addr); + /* Preserve format-specific bits the canonical SecSem/SecFlag * mapping can't represent so emit_elf can write them back * verbatim. ext_type only set when the sh_type fell through diff --git a/src/obj/macho/link.c b/src/obj/macho/link.c @@ -1940,15 +1940,27 @@ static void build_symtab(MCtx* x) { * file actually contains (post-coalesce), matching what we emit * in emit_load_command_segment. */ u8 n_sect = 0; + /* Prefer the section whose half-open [vaddr, vaddr+size) range contains + * the symbol. This must win over the end-boundary fallback below: when + * two sections abut (A ends exactly where B begins), a symbol at the + * boundary is the *start* of B, not the end of A. */ for (u32 k = 0; k < x->nouts; ++k) { OutSec* o = &x->outs[k]; if (s->vaddr >= o->vaddr && s->vaddr < o->vaddr + o->size) { n_sect = (u8)(k + 1u); break; } - if (s->vaddr == o->vaddr + o->size) { - n_sect = (u8)(k + 1u); - break; + } + /* Fallback: a symbol sitting exactly one-past-the-end of a section with + * no following section covering it (e.g. an end-of-section marker) is + * attributed to the section that ends there. */ + if (n_sect == 0) { + for (u32 k = 0; k < x->nouts; ++k) { + OutSec* o = &x->outs[k]; + if (s->vaddr == o->vaddr + o->size) { + n_sect = (u8)(k + 1u); + break; + } } } if (n_sect == 0) continue; diff --git a/src/obj/macho/macho.h b/src/obj/macho/macho.h @@ -50,10 +50,14 @@ #define LC_REQ_DYLD 0x80000000u #define LC_SEGMENT_64 0x19u #define LC_SYMTAB 0x02u +#define LC_UNIXTHREAD 0x05u #define LC_DYSYMTAB 0x0bu #define LC_LOAD_DYLIB 0x0cu #define LC_ID_DYLIB 0x0du #define LC_LOAD_DYLINKER 0x0eu +#define LC_LOAD_WEAK_DYLIB (0x18u | LC_REQ_DYLD) +#define LC_RPATH (0x1cu | LC_REQ_DYLD) +#define LC_REEXPORT_DYLIB (0x1fu | LC_REQ_DYLD) #define LC_UUID 0x1bu #define LC_FUNCTION_STARTS 0x26u #define LC_DATA_IN_CODE 0x29u @@ -63,6 +67,15 @@ #define LC_DYLD_CHAINED_FIXUPS (0x34u | LC_REQ_DYLD) #define LC_MAIN (0x28u | LC_REQ_DYLD) +/* ---- VM protection bits (segment maxprot / initprot) ---- */ +#define VM_PROT_READ 0x1u +#define VM_PROT_WRITE 0x2u +#define VM_PROT_EXECUTE 0x4u + +/* ---- LC_DYLD_CHAINED_FIXUPS encodings (subset cfree emits/reads) ---- */ +#define DYLD_CHAINED_PTR_64 2u /* pointer_format */ +#define DYLD_CHAINED_IMPORT 1u /* imports_format */ + /* ---- header sizes ---- */ #define MACHO_HDR64_SIZE 32u #define MACHO_SEGCMD64_SIZE 72u diff --git a/src/obj/macho/read.c b/src/obj/macho/read.c @@ -5,9 +5,11 @@ * shape-equivalent to the writer's input, modulo the synthesized * "__SEG,__sect"-form section names. * - * Scope: AArch64 little-endian, MH_OBJECT only (MH_DYLIB is a stub — - * the linker has no consumer for it yet). Other archs / endianness - * produce a compiler_panic with a diagnostic. */ + * Scope: AArch64 little-endian. MH_OBJECT parses to the section/symbol/ + * reloc view; MH_EXECUTE / MH_DYLIB additionally get the linked-image view + * (read_macho_image: segments, dylibs, entry, dynamic symbols + relocs). + * read_macho_dso remains the linker's DSO-only input path. Other archs / + * endianness produce a compiler_panic with a diagnostic. */ #include <stdlib.h> #include <string.h> @@ -119,6 +121,285 @@ static u16 sec_sem_from(u32 mflags, u16 sec_kind) { return SSEM_PROGBITS; } +/* Intern a Mach-O lc_str (NUL-terminated string embedded inside a load + * command at `cmd_pos + str_off`, bounded by the command's cmdsize). + * Returns 0 if the offset/string is malformed. */ +static Sym macho_lc_str(Compiler* c, const u8* data, u64 cmd_pos, u32 cmdsize, + u32 str_off) { + if (str_off < 8 || str_off >= cmdsize) return 0; + const char* p = (const char*)(data + cmd_pos + str_off); + u32 maxlen = cmdsize - str_off; + u32 nlen = 0; + while (nlen < maxlen && p[nlen]) ++nlen; + if (!nlen) return 0; + return pool_intern_slice(c->global, (Slice){.s = p, .len = nlen}); +} + +/* ---- read_macho_image ---- + * + * Linked-image (MH_EXECUTE / MH_DYLIB) view, the Mach-O peer of + * read_elf_image. Walks the load commands a second time to populate the + * ObjImage: LC_SEGMENT_64 -> segments (+ __TEXT base), LC_LOAD_DYLINKER -> + * interp, LC_ID_DYLIB -> soname, LC_LOAD_DYLIB/WEAK/REEXPORT -> deps, + * LC_RPATH -> rpaths, LC_MAIN/LC_UNIXTHREAD -> entry, the LC_SYMTAB external + * nlist entries -> dynamic symbols, and LC_DYLD_CHAINED_FIXUPS binds/rebases + * -> dynamic relocations. The section / symbol / reloc views are parsed by + * read_macho's normal passes; this adds the orthogonal image dimension. + * Lenient: a malformed sub-table is skipped rather than panicked, so a + * partially-damaged image still yields a useful inspection. + * + * `msecs`/`nmsecs` carry the section table read in read_macho's pass 1 so a + * defined dynamic symbol's n_sect maps back to its ObjSecId. */ +static void read_macho_image(Compiler* c, ObjBuilder* ob, const u8* data, + size_t len, u32 filetype, u32 cputype, + const MSecRec* msecs, u32 nmsecs) { + ObjImage* im = + obj_image_ensure(ob, filetype == MH_DYLIB ? OBJ_KIND_DYN : OBJ_KIND_EXEC); + if (!im) compiler_panic(c, no_loc(), "read_macho: obj_image_ensure failed"); + + u32 ncmds = rd_u32_le(data + 16); + u32 sizeofcmds = rd_u32_le(data + 20); + + /* Per-segment (vmaddr, file_off) recorded for chained-fixup vaddr + * resolution below; sized to ncmds (segments are a subset of commands). */ + u64* seg_vaddr = arena_array(c->scratch, u64, ncmds ? ncmds : 1); + u64* seg_fileoff = arena_array(c->scratch, u64, ncmds ? ncmds : 1); + u32 nseg = 0; + + int have_text = 0; + u64 text_vmaddr = 0; + int have_main = 0; + u64 main_entryoff = 0; + u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0; + u32 cf_off = 0, cf_size = 0; + + u64 pos = MACHO_HDR64_SIZE; + u64 end = pos + sizeofcmds; + for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) { + u32 cmd = rd_u32_le(data + pos); + u32 cmdsize = rd_u32_le(data + pos + 4); + if (cmdsize < 8 || pos + cmdsize > end) break; + + if (cmd == LC_SEGMENT_64 && cmdsize >= MACHO_SEGCMD64_SIZE) { + const char* segname = (const char*)(data + pos + 8); + u32 seg_len = fixed16_len(segname); + u64 vmaddr = rd_u64_le(data + pos + 24); + u64 vmsize = rd_u64_le(data + pos + 32); + u64 fileoff = rd_u64_le(data + pos + 40); + u64 filesize = rd_u64_le(data + pos + 48); + u32 initprot = rd_u32_le(data + pos + 60); + ObjSegment seg; + seg.name = + seg_len ? pool_intern_slice(c->global, + (Slice){.s = segname, .len = seg_len}) + : 0; + seg.vaddr = vmaddr; + seg.vsize = vmsize; + seg.file_off = fileoff; + seg.file_size = filesize; + /* VM_PROT_* bits differ from OBJ_SEG_* — remap explicitly. */ + seg.perms = ((initprot & VM_PROT_READ) ? OBJ_SEG_R : 0) | + ((initprot & VM_PROT_WRITE) ? OBJ_SEG_W : 0) | + ((initprot & VM_PROT_EXECUTE) ? OBJ_SEG_X : 0); + seg.align = 1; /* Mach-O segments don't carry an explicit p_align */ + obj_image_add_segment(im, &seg); + + seg_vaddr[nseg] = vmaddr; + seg_fileoff[nseg] = fileoff; + ++nseg; + if (!have_text && seg_len == 6 && memcmp(segname, "__TEXT", 6) == 0) { + have_text = 1; + text_vmaddr = vmaddr; + } + } else if (cmd == LC_LOAD_DYLINKER) { + Sym s = macho_lc_str(c, data, pos, cmdsize, rd_u32_le(data + pos + 8)); + if (s) obj_image_set_interp(im, s); + } else if (cmd == LC_ID_DYLIB) { + Sym s = macho_lc_str(c, data, pos, cmdsize, rd_u32_le(data + pos + 8)); + if (s) obj_image_set_soname(im, s); + } else if (cmd == LC_LOAD_DYLIB || cmd == LC_LOAD_WEAK_DYLIB || + cmd == LC_REEXPORT_DYLIB) { + Sym s = macho_lc_str(c, data, pos, cmdsize, rd_u32_le(data + pos + 8)); + if (s) { + ObjImageDep d; + d.name = s; + d.imports = NULL; + d.nimports = 0; + obj_image_add_dep(im, &d); + } + } else if (cmd == LC_RPATH) { + Sym s = macho_lc_str(c, data, pos, cmdsize, rd_u32_le(data + pos + 8)); + if (s) obj_image_add_rpath(im, s); + } else if (cmd == LC_MAIN && cmdsize >= 16) { + have_main = 1; + main_entryoff = rd_u64_le(data + pos + 8); + } else if (cmd == LC_UNIXTHREAD && cmdsize >= 16 && !have_main) { + /* thread_command: flavor (u32) + count (u32) + register state. Pull + * the program counter out of the arch's state. */ + u32 flavor = rd_u32_le(data + pos + 8); + u64 pc_off = 0; + int have_pc = 0; + if (cputype == CPU_TYPE_ARM64 && flavor == 6 /* ARM_THREAD_STATE64 */) { + pc_off = pos + 16 + 32u * 8u; /* x0..x28,fp,lr,sp,pc */ + have_pc = 1; + } else if (cputype == CPU_TYPE_X86_64 && + flavor == 4 /* x86_THREAD_STATE64 */) { + pc_off = pos + 16 + 16u * 8u; /* rax..r15, then rip */ + have_pc = 1; + } + if (have_pc && pc_off + 8 <= pos + cmdsize) + obj_image_set_entry(im, rd_u64_le(data + pc_off)); + } else if (cmd == LC_SYMTAB && cmdsize >= MACHO_SYMTAB_CMD_SIZE) { + symoff = rd_u32_le(data + pos + 8); + nsyms = rd_u32_le(data + pos + 12); + stroff = rd_u32_le(data + pos + 16); + strsize = rd_u32_le(data + pos + 20); + } else if (cmd == LC_DYLD_CHAINED_FIXUPS && cmdsize >= 16) { + cf_off = rd_u32_le(data + pos + 8); + cf_size = rd_u32_le(data + pos + 12); + } + pos += cmdsize; + } + + if (have_text) obj_image_set_base(im, text_vmaddr); + /* LC_MAIN entryoff is a file offset within __TEXT (which maps file 0 to + * its vmaddr); the entry vaddr is __TEXT base + entryoff. */ + if (have_main && have_text) obj_image_set_entry(im, text_vmaddr + main_entryoff); + + /* LC_SYMTAB external nlist entries -> dynamic symbols (Mach-O's analog of + * .dynsym: the dynamically-visible exports and undefined imports). */ + if (nsyms && stroff + (u64)strsize <= len && + symoff + (u64)nsyms * MACHO_NLIST64_SIZE <= len) { + const u8* strtab = data + stroff; + const u8* sbase = data + symoff; + for (u32 i = 0; i < nsyms; ++i) { + const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE; + u32 strx = rd_u32_le(p + 0); + u8 n_type = p[4]; + u8 n_sect = p[5]; + u16 n_desc = rd_u16_le(p + 6); + u64 n_value = rd_u64_le(p + 8); + if (n_type & N_STAB) continue; /* debug stab, not dynamic */ + if (!(n_type & N_EXT)) continue; /* locals aren't dynamic */ + if (strx >= strsize) continue; + const char* nm = (const char*)(strtab + strx); + u32 nlen = 0; + while (strx + nlen < strsize && nm[nlen]) ++nlen; + if (!nlen) continue; + + u8 type_field = (u8)(n_type & N_TYPE); + ObjImageSym ds; + ds.name = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); + ds.bind = (n_desc & (N_WEAK_DEF | N_WEAK_REF)) ? SB_WEAK : SB_GLOBAL; + ds.value = (type_field == N_SECT || type_field == N_ABS) ? n_value : 0; + ds.size = 0; + if (type_field == N_SECT && n_sect >= 1 && n_sect <= nmsecs) { + ds.section = msecs[n_sect - 1].obj_sec; + ds.kind = (msecs[n_sect - 1].flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC + : SK_OBJ; + } else { + ds.section = OBJ_SEC_NONE; /* undefined import / absolute */ + ds.kind = SK_NOTYPE; + } + obj_image_add_dynsym(im, &ds); + } + } + + /* LC_DYLD_CHAINED_FIXUPS binds/rebases -> dynamic relocations. */ + if (cf_size >= 28 && (u64)cf_off + cf_size <= len) { + const u8* cf = data + cf_off; + u32 starts_offset = rd_u32_le(cf + 4); + u32 imports_offset = rd_u32_le(cf + 8); + u32 symbols_offset = rd_u32_le(cf + 12); + u32 imports_count = rd_u32_le(cf + 16); + u32 imports_format = rd_u32_le(cf + 20); + u32 relative_kind = + (cputype == CPU_TYPE_X86_64) ? R_X64_RELATIVE : R_AARCH64_RELATIVE; + + /* Import symbol names, indexed by 0-based bind ordinal. */ + Sym* imp_names = arena_zarray(c->scratch, Sym, imports_count ? imports_count : 1); + if (imports_format == DYLD_CHAINED_IMPORT && + (u64)imports_offset + (u64)imports_count * 4u <= cf_size) { + for (u32 i = 0; i < imports_count; ++i) { + u32 packed = rd_u32_le(cf + imports_offset + i * 4u); + u32 name_off = (packed >> 9) & 0x7fffffu; + u64 so = (u64)symbols_offset + name_off; + if (so >= cf_size) continue; + const char* nm = (const char*)(cf + so); + u32 maxn = (u32)(cf_size - so); + u32 nlen = 0; + while (nlen < maxn && nm[nlen]) ++nlen; + if (nlen) + imp_names[i] = + pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); + } + } + + if ((u64)starts_offset + 4u <= cf_size) { + const u8* sib = cf + starts_offset; + u32 seg_count = rd_u32_le(sib + 0); + for (u32 si = 0; si < seg_count; ++si) { + if ((u64)starts_offset + 4u + (u64)si * 4u + 4u > cf_size) break; + u32 seg_info_offset = rd_u32_le(sib + 4 + si * 4u); + if (!seg_info_offset) continue; + if ((u64)starts_offset + seg_info_offset + 22u > cf_size) continue; + const u8* sis = cf + starts_offset + seg_info_offset; + u16 pointer_format = rd_u16_le(sis + 6); + u64 segment_offset = rd_u64_le(sis + 8); /* file offset of segment */ + u16 page_count = rd_u16_le(sis + 20); + /* Only the DYLD_CHAINED_PTR_64 family shares the bit layout below. */ + if (pointer_format != DYLD_CHAINED_PTR_64 && pointer_format != 6u) + continue; + u16 page_size = rd_u16_le(sis + 4); + if (!page_size) continue; + /* Resolve this segment's vmaddr from its file offset. */ + u64 seg_va = 0; + int found_seg = 0; + for (u32 k = 0; k < nseg; ++k) { + if (seg_fileoff[k] == segment_offset) { + seg_va = seg_vaddr[k]; + found_seg = 1; + break; + } + } + if (!found_seg) continue; + for (u32 pg = 0; pg < page_count; ++pg) { + u64 ps_pos = (u64)starts_offset + seg_info_offset + 22u + pg * 2u; + if (ps_pos + 2u > cf_size) break; + u16 ps = rd_u16_le(cf + ps_pos); + if (ps == 0xFFFFu) continue; + u32 cur = ps; + for (;;) { + u64 file_loc = segment_offset + (u64)pg * page_size + cur; + if (file_loc + 8u > len) break; + u64 v = rd_u64_le(data + file_loc); + u64 vaddr = seg_va + (u64)pg * page_size + cur; + int is_bind = (int)((v >> 63) & 1u); + ObjImageReloc dr; + dr.section = OBJ_SEC_NONE; + dr.offset = vaddr; + if (is_bind) { + u32 ordinal = (u32)(v & 0xffffffu); + dr.sym_name = (ordinal < imports_count) ? imp_names[ordinal] : 0; + dr.addend = (i64)((v >> 24) & 0xffu); + dr.kind = R_ABS64; + } else { + dr.sym_name = 0; + dr.addend = (i64)(v & (((u64)1 << 36) - 1u)); + dr.kind = (RelocKind)relative_kind; + } + obj_image_add_dynreloc(im, &dr); + u32 next = (u32)((v >> 51) & 0xfffu); + if (!next) break; + cur += next * 4u; + if (cur >= page_size) break; + } + } + } + } + } +} + ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data, size_t len) { (void)name; @@ -141,9 +422,13 @@ ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data, if (!macho || !macho->reloc_from) compiler_panic(c, no_loc(), "read_macho: unsupported cputype 0x%x", cputype); - if (filetype != MH_OBJECT) + /* MH_OBJECT parses to the section/symbol/reloc view only. MH_EXECUTE / + * MH_DYLIB additionally get the linked-image view (read_macho_image, at + * the end); their sections still parse through the same passes. */ + if (filetype != MH_OBJECT && filetype != MH_EXECUTE && filetype != MH_DYLIB) compiler_panic(c, no_loc(), - "read_macho: only MH_OBJECT supported, got filetype %u", + "read_macho: unsupported filetype %u (expected MH_OBJECT, " + "MH_EXECUTE, or MH_DYLIB)", filetype); if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len) @@ -574,6 +859,11 @@ ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data, } } + /* MH_EXECUTE / MH_DYLIB: attach the linked-image view (segments, dylibs, + * entry, dynamic symbols + relocations). */ + if (filetype != MH_OBJECT) + read_macho_image(c, ob, data, len, filetype, cputype, msecs, nmsecs); + obj_finalize(ob); return ob; } diff --git a/src/obj/obj.c b/src/obj/obj.c @@ -366,10 +366,16 @@ ObjSecId obj_section_ex(ObjBuilder* ob, Sym name, SecKind kind, SecSem sem, s->info = info; s->group_id = OBJ_GROUP_NONE; s->bss_size = 0; + s->addr = 0; buf_init(&s->bytes, ob->heap); return (ObjSecId)id; } +void obj_section_set_addr(ObjBuilder* ob, ObjSecId id, u64 addr) { + Section* s = Sections_at(&ob->sections, id); + if (s && id != OBJ_SEC_NONE) s->addr = addr; +} + void obj_section_set_flags(ObjBuilder* ob, ObjSecId id, u16 flags) { Section* s = Sections_at(&ob->sections, id); if (s && id != OBJ_SEC_NONE) s->flags = flags; diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -280,6 +280,7 @@ typedef struct Section { u32 info; /* section-format dependent, typed by sem/ext_kind */ ObjGroupId group_id; /* OBJ_GROUP_NONE if not in a COMDAT/group */ u32 bss_size; /* nonzero only for SEC_BSS */ + u64 addr; /* load vaddr (sh_addr); 0 for relocatable inputs */ /* Format-specific raw section type (ELF sh_type, COFF Characteristics * subfield, etc.). Set by .o readers when the canonical SecSem * mapping is lossy — e.g., SHT_LLVM_ADDRSIG (0x6FFF4C03) and @@ -403,6 +404,7 @@ void obj_section_set_flags(ObjBuilder*, ObjSecId, u16 flags); void obj_section_set_align(ObjBuilder*, ObjSecId, u32 align); void obj_section_set_group(ObjBuilder*, ObjSecId, ObjGroupId); void obj_section_set_link_info(ObjBuilder*, ObjSecId, ObjSecId link, u32 info); +void obj_section_set_addr(ObjBuilder*, ObjSecId, u64 addr); /* Set format-specific raw sh_type/sh_flags overrides (see Section.ext_type * comment). Zero ext_type means "no override". */ void obj_section_set_ext(ObjBuilder*, ObjSecId, ObjExtKind, u32 ext_type, diff --git a/test/objdump/aarch64-darwin/cases/01-image-headers.expected b/test/objdump/aarch64-darwin/cases/01-image-headers.expected @@ -0,0 +1,22 @@ +exec.macho: file format macho64-arm64 + +architecture: arm64, flags 0x00000112: +EXEC_P, HAS_SYMS, D_PAGED +start address 0x00000001000003b8 +format: macho64, sections: 3, symbols: 2 + +Image: + entry point 0x00000001000003b8 + image base 0x0000000100000000 + interpreter /usr/lib/dyld + +Program Header: + __PAGEZERO off 0x0000000000000000 vaddr 0x0000000000000000 align 2**0 + filesz 0x0000000000000000 memsz 0x0000000100000000 flags --- + __TEXT off 0x0000000000000000 vaddr 0x0000000100000000 align 2**0 + filesz 0x0000000000000450 memsz 0x0000000000004000 flags r-x + __LINKEDIT off 0x0000000000004000 vaddr 0x0000000100004000 align 2**0 + filesz 0x0000000000000192 memsz 0x0000000000004000 flags r-- + +Dynamic Section: + diff --git a/test/objdump/aarch64-darwin/cases/01-image-headers.sh b/test/objdump/aarch64-darwin/cases/01-image-headers.sh @@ -0,0 +1,12 @@ +# Golden: `-f` file header + `-p` private/program headers over a committed +# freestanding aarch64 Mach-O *executable* (exec.macho; source + regen recipe +# in execm.c). Locks in the linked-image inspection added for the Mach-O +# MH_EXECUTE / MH_DYLIB readers: +# -f EXEC_P / D_PAGED flag bits and the real entry point (LC_MAIN) +# -p entry point, __TEXT image base, interpreter (LC_LOAD_DYLINKER), and the +# __PAGEZERO / __TEXT / __LINKEDIT segments with remapped R/W/X perms +# Committed as a binary (like the ELF exec.elf fixture) so the golden is stable +# and decoupled from the linker. Copied to a fixed name so the file-format +# label line is deterministic. +cp "$(dirname "$0")/exec.macho" exec.macho +"$CFREE" objdump -f -p exec.macho diff --git a/test/objdump/aarch64-darwin/cases/exec.macho b/test/objdump/aarch64-darwin/cases/exec.macho Binary files differ. diff --git a/test/objdump/aarch64-darwin/cases/execm.c b/test/objdump/aarch64-darwin/cases/execm.c @@ -0,0 +1,6 @@ +/* Fixture source for exec.macho — a freestanding aarch64 Mach-O executable. + Regenerate with: + cfree cc -target arm64-darwin -ffreestanding -nostdlib -e _start \ + execm.c -o exec.macho */ +int compute(int a, int b) { return a * b + 7; } +void _start(void) { (void)compute(3, 5); } diff --git a/test/objdump/aarch64/cases/02-size-sysv-image.expected b/test/objdump/aarch64/cases/02-size-sysv-image.expected @@ -0,0 +1,6 @@ +exec.elf : +section size addr +.text 00000264 00404000 +.eh_frame 0000006c 00408000 +.note.gnu.build- 00000020 00400120 +Total 000002f0 diff --git a/test/objdump/aarch64/cases/02-size-sysv-image.sh b/test/objdump/aarch64/cases/02-size-sysv-image.sh @@ -0,0 +1,6 @@ +# Golden: `size -A` (SysV) over the committed aarch64 ELF *executable*. +# Locks in the inherited-tool image support: the section `addr` column now +# carries each section's load vaddr (0 for relocatables), so SysV `size` +# reports the real layout of a linked image rather than all-zero addresses. +cp "$(dirname "$0")/exec.elf" exec.elf +"$CFREE" size -A exec.elf diff --git a/test/objdump/aarch64/cases/03-dynamic-rpath.expected b/test/objdump/aarch64/cases/03-dynamic-rpath.expected @@ -0,0 +1,17 @@ +rpath.so: file format elf64-arm64 + +architecture: arm64, flags 0x00000140: +DYNAMIC, D_PAGED +start address 0x0000000000000000 +format: elf64, sections: 2, symbols: 0 + +Image: + entry point 0x0000000000000000 + image base 0x0000000000000000 + +Program Header: + (none) + +Dynamic Section: + RPATH /opt/lib:$ORIGIN/../lib + diff --git a/test/objdump/aarch64/cases/03-dynamic-rpath.sh b/test/objdump/aarch64/cases/03-dynamic-rpath.sh @@ -0,0 +1,9 @@ +# Golden: `-f` + `-p` over a minimal aarch64 ET_DYN shared object carrying a +# DT_RUNPATH (rpath.so; regenerate with gen_rpath.py). Locks in: +# -f the DYNAMIC kind flag (vs EXEC_P in 01-image-headers) +# -p the RPATH line in the Dynamic Section +# cfree can't emit a shared object yet, so the fixture is hand-built and +# committed (like test/dwarf/dwarf.o). Copied to a fixed name so the +# file-format label is deterministic. +cp "$(dirname "$0")/rpath.so" rpath.so +"$CFREE" objdump -f -p rpath.so diff --git a/test/objdump/aarch64/cases/04-disasm-stripped-segment.expected b/test/objdump/aarch64/cases/04-disasm-stripped-segment.expected @@ -0,0 +1,158 @@ +exec.stripped.elf: file format elf64-arm64 + +Disassembly of segment LOAD: + + 404000: ff c3 01 d1 sub sp, sp, #112 + 404004: f1 83 01 91 add x17, sp, #96 + 404008: 3d 02 00 f8 stur x29, [x17] + 40400c: 3e 82 00 f8 stur x30, [x17, #8] + 404010: fd c3 01 91 add x29, sp, #112 + 404014: 1f 20 03 d5 nop + 404018: 1f 20 03 d5 nop + 40401c: 1f 20 03 d5 nop + 404020: 1f 20 03 d5 nop + 404024: 1f 20 03 d5 nop + 404028: 1f 20 03 d5 nop + 40402c: 1f 20 03 d5 nop + 404030: 1f 20 03 d5 nop + 404034: 1f 20 03 d5 nop + 404038: 1f 20 03 d5 nop + 40403c: 1f 20 03 d5 nop + 404040: 1f 20 03 d5 nop + 404044: 1f 20 03 d5 nop + 404048: 1f 20 03 d5 nop + 40404c: 1f 20 03 d5 nop + 404050: 1f 20 03 d5 nop + 404054: 1f 20 03 d5 nop + 404058: 1f 20 03 d5 nop + 40405c: 1f 20 03 d5 nop + 404060: a0 83 1e f8 stur x0, [x29, #-24] + 404064: a8 0b 80 d2 movz x8, 0x5d + 404068: a9 83 5e f8 ldur x9, [x29, #-24] + 40406c: eb 03 09 aa mov x11, x9 + 404070: a9 83 5e f8 ldur x9, [x29, #-24] + 404074: ec 03 09 aa mov x12, x9 + 404078: ac 83 1d f8 stur x12, [x29, #-40] + 40407c: a9 a3 00 d1 sub x9, x29, #40 + 404080: a9 83 1c f8 stur x9, [x29, #-56] + 404084: a9 83 5c f8 ldur x9, [x29, #-56] + 404088: ec 03 09 aa mov x12, x9 + 40408c: ed 03 0c aa mov x13, x12 + 404090: a9 01 40 f9 ldr x9, [x13] + 404094: a9 03 1b f8 stur x9, [x29, #-80] + 404098: a9 03 5b f8 ldur x9, [x29, #-80] + 40409c: ee 03 09 aa mov x14, x9 + 4040a0: a8 03 1e f8 stur x8, [x29, #-32] + 4040a4: ab 03 1d f8 stur x11, [x29, #-48] + 4040a8: ad 03 1c f8 stur x13, [x29, #-64] + 4040ac: ac 83 1b f8 stur x12, [x29, #-72] + 4040b0: ae 83 1a f8 stur x14, [x29, #-88] + 4040b4: a1 03 5e f8 ldur x1, [x29, #-32] + 4040b8: a0 83 5a f8 ldur x0, [x29, #-88] + 4040bc: 01 00 00 d4 svc #0x0 + 4040c0: a0 83 1a f8 stur x0, [x29, #-88] + 4040c4: a9 83 5a f8 ldur x9, [x29, #-88] + 4040c8: e8 03 09 aa mov x8, x9 + 4040cc: a9 03 5c f8 ldur x9, [x29, #-64] + 4040d0: 28 01 00 f9 str x8, [x9] + 4040d4: a8 03 1a f8 stur x8, [x29, #-96] + 4040d8: a0 83 5d f8 ldur x0, [x29, #-40] + 4040dc: 05 00 00 14 b 0x4040f0 + 4040e0: 08 00 80 d2 movz x8, 0x0 + 4040e4: a8 83 19 f8 stur x8, [x29, #-104] + 4040e8: a0 83 59 f8 ldur x0, [x29, #-104] + 4040ec: 01 00 00 14 b 0x4040f0 + 4040f0: b0 03 00 91 add x16, x29, #0 + 4040f4: 1d 02 5f f8 ldur x29, [x16, #-16] + 4040f8: 1e 82 5f f8 ldur x30, [x16, #-8] + 4040fc: 1f 02 00 91 add sp, x16, #0 + 404100: c0 03 5f d6 ret + 404104: ff c3 00 d1 sub sp, sp, #48 + 404108: f1 83 00 91 add x17, sp, #32 + 40410c: 3d 02 00 f8 stur x29, [x17] + 404110: 3e 82 00 f8 stur x30, [x17, #8] + 404114: fd c3 00 91 add x29, sp, #48 + 404118: 1f 20 03 d5 nop + 40411c: 1f 20 03 d5 nop + 404120: 1f 20 03 d5 nop + 404124: 1f 20 03 d5 nop + 404128: 1f 20 03 d5 nop + 40412c: 1f 20 03 d5 nop + 404130: 1f 20 03 d5 nop + 404134: 1f 20 03 d5 nop + 404138: 1f 20 03 d5 nop + 40413c: 1f 20 03 d5 nop + 404140: 1f 20 03 d5 nop + 404144: 1f 20 03 d5 nop + 404148: 1f 20 03 d5 nop + 40414c: 1f 20 03 d5 nop + 404150: 1f 20 03 d5 nop + 404154: 1f 20 03 d5 nop + 404158: 1f 20 03 d5 nop + 40415c: 1f 20 03 d5 nop + 404160: 1f 20 03 d5 nop + 404164: a0 c3 1e b8 stur w0, [x29, #-20] + 404168: a1 83 1e b8 stur w1, [x29, #-24] + 40416c: a9 c3 5e b8 ldur w9, [x29, #-20] + 404170: aa 83 5e b8 ldur w10, [x29, #-24] + 404174: 28 7d 0a 1b mul w8, w9, w10 + 404178: e9 00 80 52 movz w9, 0x7 + 40417c: 0b 01 09 0b add w11, w8, w9 + 404180: a8 43 1e b8 stur w8, [x29, #-28] + 404184: ab 03 1e b8 stur w11, [x29, #-32] + 404188: a0 03 5e b8 ldur w0, [x29, #-32] + 40418c: 05 00 00 14 b 0x4041a0 + 404190: 08 00 80 52 movz w8, 0x0 + 404194: a8 c3 1d b8 stur w8, [x29, #-36] + 404198: a0 c3 5d b8 ldur w0, [x29, #-36] + 40419c: 01 00 00 14 b 0x4041a0 + 4041a0: b0 03 00 91 add x16, x29, #0 + 4041a4: 1d 02 5f f8 ldur x29, [x16, #-16] + 4041a8: 1e 82 5f f8 ldur x30, [x16, #-8] + 4041ac: 1f 02 00 91 add sp, x16, #0 + 4041b0: c0 03 5f d6 ret + 4041b4: ff c3 00 d1 sub sp, sp, #48 + 4041b8: f1 83 00 91 add x17, sp, #32 + 4041bc: 3d 02 00 f8 stur x29, [x17] + 4041c0: 3e 82 00 f8 stur x30, [x17, #8] + 4041c4: fd c3 00 91 add x29, sp, #48 + 4041c8: 1f 20 03 d5 nop + 4041cc: 1f 20 03 d5 nop + 4041d0: 1f 20 03 d5 nop + 4041d4: 1f 20 03 d5 nop + 4041d8: 1f 20 03 d5 nop + 4041dc: 1f 20 03 d5 nop + 4041e0: 1f 20 03 d5 nop + 4041e4: 1f 20 03 d5 nop + 4041e8: 1f 20 03 d5 nop + 4041ec: 1f 20 03 d5 nop + 4041f0: 1f 20 03 d5 nop + 4041f4: 1f 20 03 d5 nop + 4041f8: 1f 20 03 d5 nop + 4041fc: 1f 20 03 d5 nop + 404200: 1f 20 03 d5 nop + 404204: 1f 20 03 d5 nop + 404208: 1f 20 03 d5 nop + 40420c: 1f 20 03 d5 nop + 404210: 1f 20 03 d5 nop + 404214: a8 00 80 52 movz w8, 0x5 + 404218: 6b 00 80 52 movz w11, 0x3 + 40421c: a8 c3 1e b8 stur w8, [x29, #-20] + 404220: ab 83 1e b8 stur w11, [x29, #-24] + 404224: a0 83 5e b8 ldur w0, [x29, #-24] + 404228: a1 c3 5e b8 ldur w1, [x29, #-20] + 40422c: b6 ff ff 97 bl 0x404104 + 404230: a0 43 1e b8 stur w0, [x29, #-28] + 404234: a9 43 5e b8 ldur w9, [x29, #-28] + 404238: 28 7d 40 93 .inst 0x93407d28 + 40423c: a8 83 1d f8 stur x8, [x29, #-40] + 404240: a0 83 5d f8 ldur x0, [x29, #-40] + 404244: 6f ff ff 97 bl 0x404000 + 404248: a0 03 1d f8 stur x0, [x29, #-48] + 40424c: 01 00 00 14 b 0x404250 + 404250: b0 03 00 91 add x16, x29, #0 + 404254: 1d 02 5f f8 ldur x29, [x16, #-16] + 404258: 1e 82 5f f8 ldur x30, [x16, #-8] + 40425c: 1f 02 00 91 add sp, x16, #0 + 404260: c0 03 5f d6 ret + diff --git a/test/objdump/aarch64/cases/04-disasm-stripped-segment.sh b/test/objdump/aarch64/cases/04-disasm-stripped-segment.sh @@ -0,0 +1,17 @@ +# Golden: `-d` over a *fully section-stripped* aarch64 ELF executable. We +# derive the fixture from the committed exec.elf by zeroing the section-header +# table fields (e_shoff / e_shnum / e_shstrndx) — the same shape objcopy +# --strip-sections / packers produce. With no section table, objdump's normal +# section-driven disassembly finds nothing, so this locks in the fallback: +# disassemble the executable PT_LOAD segment directly, using its vaddr as the +# base (note the `bl 0x4041xx` targets resolve against that base, not 0). The +# stripping is format-agnostic ELF-header surgery; the disasm path is not. +python3 - "$(dirname "$0")/exec.elf" exec.stripped.elf <<'PY' +import sys +data = bytearray(open(sys.argv[1], "rb").read()) +data[0x28:0x30] = b"\x00" * 8 # e_shoff +data[0x3c:0x3e] = b"\x00" * 2 # e_shnum +data[0x3e:0x40] = b"\x00" * 2 # e_shstrndx +open(sys.argv[2], "wb").write(data) +PY +"$CFREE" objdump -d exec.stripped.elf diff --git a/test/objdump/aarch64/cases/gen_rpath.py b/test/objdump/aarch64/cases/gen_rpath.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# Regenerates rpath.so: a minimal aarch64 ET_DYN ELF carrying a DT_RUNPATH +# entry, used to golden objdump's -p rpath rendering. cfree can't emit a +# shared object yet, so this is hand-built (like test/dwarf/dwarf.o). It has +# only .shstrtab / .dynstr / .dynamic — just enough for read_elf's image +# pass to surface the runpath. Run: python3 gen_rpath.py +import struct +EM_AARCH64 = 183 +def u16(x): return struct.pack('<H', x) +def u32(x): return struct.pack('<I', x) +def u64(x): return struct.pack('<Q', x) + +shstr = b'\x00.shstrtab\x00.dynstr\x00.dynamic\x00' +n_shstr = shstr.index(b'.shstrtab') +n_dynstr = shstr.index(b'.dynstr') +n_dyn = shstr.index(b'.dynamic') +dynstr = b'\x00/opt/lib:$ORIGIN/../lib\x00' +DT_RUNPATH, DT_NULL = 29, 0 +dynamic = struct.pack('<QQ', DT_RUNPATH, 1) + struct.pack('<QQ', DT_NULL, 0) + +EH = SH = 64 +off_shstr = EH +off_dynstr = off_shstr + len(shstr) +off_dyn = (off_dynstr + len(dynstr) + 7) & ~7 +off_sht = (off_dyn + len(dynamic) + 7) & ~7 + +def shdr(name, typ, off, size, link, align, ent): + return (u32(name) + u32(typ) + u64(0) + u64(0) + u64(off) + u64(size) + + u32(link) + u32(0) + u64(align) + u64(ent)) + +sht = (shdr(0, 0, 0, 0, 0, 0, 0) + + shdr(n_shstr, 3, off_shstr, len(shstr), 0, 1, 0) + + shdr(n_dynstr, 3, off_dynstr, len(dynstr), 0, 1, 0) + + shdr(n_dyn, 6, off_dyn, len(dynamic), 2, 8, 16)) + +eh = (b'\x7fELF' + bytes([2, 1, 1, 0]) + b'\x00' * 8 + u16(3) + u16(EM_AARCH64) + + u32(1) + u64(0) + u64(0) + u64(off_sht) + u32(0) + u16(EH) + u16(0) + + u16(0) + u16(SH) + u16(4) + u16(1)) + +buf = bytearray(off_sht + len(sht)) +buf[0:64] = eh +buf[off_shstr:off_shstr + len(shstr)] = shstr +buf[off_dynstr:off_dynstr + len(dynstr)] = dynstr +buf[off_dyn:off_dyn + len(dynamic)] = dynamic +buf[off_sht:off_sht + len(sht)] = sht +open('rpath.so', 'wb').write(buf) diff --git a/test/objdump/aarch64/cases/rpath.so b/test/objdump/aarch64/cases/rpath.so Binary files differ. diff --git a/test/objdump/run.sh b/test/objdump/run.sh @@ -1,5 +1,7 @@ #!/bin/sh -# Driver-level `cfree objdump` golden tests. +# Driver-level golden tests for linked-image inspection. Mostly `cfree +# objdump`, plus the inherited tools (e.g. `size`) that read the same +# image view; each case script picks its own subcommand. # # Per-arch subdirectories (test/objdump/<arch>/cases/) hold: # <name>.sh — script invoked with CFREE and a per-case sandbox