kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 704ea4520ffd3e01d0d9f37a02f12119f6e38edc
parent d8f401b016a594425174caeacea85b888c39a1fa
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 20 May 2026 12:23:49 -0700

obj: builder mutators + strip/objcopy drivers

Adds the post-finalize builder mutator surface (remove, rename,
set-bind/vis, replace-bytes) for relocatable .o, driven by tombstone
bits filtered by a new obj_sweep_dead pass that emitters call at the
top of emit_elf / emit_macho. The historical spurious-UNDEF prune
collapses into the same sweep.

Closes CTOOLCHAIN.md gap #1 (ObjGroup reader iterator) and unblocks
strip + objcopy. Both ship as driver tools covering the high-traffic
build-system subset; archive paths reuse a shared per-member
global-symbol collector now hosted in driver/inputs.c.

Scope follows the doc's first-cut plan: relocatable .o and .a only.
ET_EXEC/ET_DYN, --only-keep-debug + --add-gnu-debuglink, --extract-symbol,
--change-section-address, and srec/ihex/binary outputs are deferred.

Diffstat:
Mdriver/ar.c | 92+++++++++----------------------------------------------------------------------
Mdriver/driver.h | 4++++
Mdriver/inputs.c | 109+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/inputs.h | 31+++++++++++++++++++++++++++++++
Mdriver/main.c | 12++++++++++++
Adriver/objcopy.c | 739+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/ranlib.c | 109+++++--------------------------------------------------------------------------
Adriver/strip.c | 661+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/cfree/core.h | 7+++++++
Minclude/cfree/object.h | 74+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Msrc/api/object_builder.c | 79++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/api/object_file.c | 78+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/obj/elf_emit.c | 24++++++++++++++++--------
Msrc/obj/macho_emit.c | 22+++++++++-------------
Msrc/obj/obj.c | 196+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/obj/obj.h | 86++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Atest/elf/unit/groupiter.c | 217+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/elf/unit/mutate.c | 231+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/objcopy/cases/01-rename-section.expected | 1+
Atest/objcopy/cases/01-rename-section.sh | 6++++++
Atest/objcopy/cases/02-redefine-sym.expected | 2++
Atest/objcopy/cases/02-redefine-sym.sh | 7+++++++
Atest/objcopy/cases/03-localize-symbol.expected | 2++
Atest/objcopy/cases/03-localize-symbol.sh | 8++++++++
Atest/objcopy/cases/04-add-section.expected | 2++
Atest/objcopy/cases/04-add-section.sh | 7+++++++
Atest/objcopy/run.sh | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/strip/cases/01-strip-debug.expected | 5+++++
Atest/strip/cases/01-strip-debug.sh | 14++++++++++++++
Atest/strip/cases/02-strip-all-keeps-reloc-targets.expected | 4++++
Atest/strip/cases/02-strip-all-keeps-reloc-targets.sh | 15+++++++++++++++
Atest/strip/cases/03-keep-symbol.expected | 3+++
Atest/strip/cases/03-keep-symbol.sh | 12++++++++++++
Atest/strip/cases/04-archive-strip-debug.expected | 7+++++++
Atest/strip/cases/04-archive-strip-debug.sh | 24++++++++++++++++++++++++
Atest/strip/run.sh | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/test.mk | 10++++++++--
37 files changed, 2820 insertions(+), 218 deletions(-)

diff --git a/driver/ar.c b/driver/ar.c @@ -4,6 +4,8 @@ #include <cfree/core.h> #include <cfree/object.h> +#include "inputs.h" + /* `cfree ar` — POSIX ar archive front-end. * * Supported operations (mutually exclusive): @@ -467,99 +469,25 @@ static int ar_do_write(DriverEnv* env, const char* archive_path, int nmembers, } for (i = 0; i < nm; ++i) { CfreeBytes in; - CfreeObjFile* of = NULL; - CfreeObjSymIter* it = NULL; - CfreeObjSymInfo si; + void* blob = NULL; + size_t blob_size = 0; + const char** names = NULL; uint32_t count = 0; - size_t name_bytes = 0; - size_t alloc_sz; - char* blob; - const char** name_arr; - char* name_storage; - size_t cursor = 0; in.name = members[i].name; in.data = members[i].data; in.len = members[i].len; - if (cfree_obj_open(&ctx, &in, &of) != CFREE_OK) - continue; /* not an object file → no symbols */ - - /* Pass A: count globally-defined symbols and total name bytes. */ - if (cfree_obj_symiter_new(of, &it) != CFREE_OK) { - cfree_obj_free(of); - driver_errf(AR_TOOL, "out of memory"); - rc = 1; - goto done; - } - for (;;) { - CfreeIterResult r = cfree_obj_symiter_next(it, &si); - if (r != CFREE_ITER_ITEM) break; - if (si.bind != CFREE_SB_GLOBAL) continue; - if (si.section == CFREE_SECTION_NONE) continue; - if (!si.name || !si.name[0]) continue; - count += 1; - { - const char* p = si.name; - while (*p++) ++name_bytes; - name_bytes += 1; /* NUL */ - } - } - cfree_obj_symiter_free(it); - - if (count == 0) { - cfree_obj_free(of); - continue; - } - - /* Single allocation: [name_arr][name_storage]. Names are copied - * out of the obj file before close so they outlive cfree_obj_free. */ - alloc_sz = (size_t)count * sizeof(const char*) + name_bytes; - blob = (char*)driver_alloc_zeroed(env, alloc_sz); - if (!blob) { - cfree_obj_free(of); - driver_errf(AR_TOOL, "out of memory"); + if (driver_collect_obj_global_syms(env, &ctx, AR_TOOL, &in, &blob, + &blob_size, &names, &count) != 0) { rc = 1; goto done; } - name_arr = (const char**)blob; - name_storage = blob + (size_t)count * sizeof(const char*); - - /* Pass B: copy names. */ - if (cfree_obj_symiter_new(of, &it) != CFREE_OK) { - driver_free(env, blob, alloc_sz); - cfree_obj_free(of); - driver_errf(AR_TOOL, "out of memory"); - rc = 1; - goto done; - } - { - uint32_t k = 0; - for (;;) { - CfreeIterResult r; - const char* p; - char* dst; - if (k >= count) break; - r = cfree_obj_symiter_next(it, &si); - if (r != CFREE_ITER_ITEM) break; - if (si.bind != CFREE_SB_GLOBAL) continue; - if (si.section == CFREE_SECTION_NONE) continue; - if (!si.name || !si.name[0]) continue; - dst = name_storage + cursor; - name_arr[k] = dst; - for (p = si.name; *p; ++p) *dst++ = *p; - *dst++ = '\0'; - cursor = (size_t)(dst - name_storage); - k++; - } - count = k; - } - cfree_obj_symiter_free(it); - cfree_obj_free(of); + if (count == 0) continue; sym_allocs[i] = blob; - sym_alloc_szs[i] = alloc_sz; - msyms[i].names = name_arr; + sym_alloc_szs[i] = blob_size; + msyms[i].names = names; msyms[i].count = count; } opts.symbol_index = 1; diff --git a/driver/driver.h b/driver/driver.h @@ -36,6 +36,8 @@ int driver_as(int argc, char **argv); int driver_ld(int argc, char **argv); int driver_ar(int argc, char **argv); int driver_ranlib(int argc, char **argv); +int driver_strip(int argc, char **argv); +int driver_objcopy(int argc, char **argv); int driver_objdump(int argc, char **argv); int driver_dbg(int argc, char **argv); int driver_run(int argc, char **argv); @@ -51,6 +53,8 @@ void driver_help_as(void); void driver_help_ld(void); void driver_help_ar(void); void driver_help_ranlib(void); +void driver_help_strip(void); +void driver_help_objcopy(void); void driver_help_objdump(void); void driver_help_dbg(void); void driver_help_run(void); diff --git a/driver/inputs.c b/driver/inputs.c @@ -261,3 +261,112 @@ out: driver_free(env, objs, nsrc * sizeof(*objs)); return rc; } + +/* ---------------------------------------------------------------------- + * Per-object global-symbol collection (shared by ar / ranlib / strip). + * ---------------------------------------------------------------------- */ + +int driver_collect_obj_global_syms(DriverEnv* env, const CfreeContext* ctx, + const char* tool, const CfreeBytes* member, + void** blob_out, size_t* blob_size_out, + const char*** names_out, + uint32_t* count_out) { + CfreeObjFile* of = NULL; + CfreeObjSymIter* it = NULL; + CfreeObjSymInfo si; + uint32_t count = 0; + size_t name_bytes = 0; + size_t alloc_sz; + char* blob; + const char** name_arr; + char* name_storage; + size_t cursor = 0; + + *blob_out = NULL; + *blob_size_out = 0; + *names_out = NULL; + *count_out = 0; + + if (cfree_obj_open(ctx, member, &of) != CFREE_OK) { + /* Not a recognized object — caller treats as "no symbols". */ + return 0; + } + + /* Pass A: count + measure name bytes. */ + if (cfree_obj_symiter_new(of, &it) != CFREE_OK) { + cfree_obj_free(of); + driver_errf(tool, "out of memory"); + return 1; + } + for (;;) { + CfreeIterResult r = cfree_obj_symiter_next(it, &si); + if (r != CFREE_ITER_ITEM) break; + if (si.bind != CFREE_SB_GLOBAL) continue; + if (si.section == CFREE_SECTION_NONE) continue; + if (!si.name || !si.name[0]) continue; + count += 1; + { + const char* p = si.name; + while (*p++) ++name_bytes; + name_bytes += 1; /* NUL */ + } + } + cfree_obj_symiter_free(it); + + if (count == 0) { + cfree_obj_free(of); + return 0; + } + + alloc_sz = (size_t)count * sizeof(const char*) + name_bytes; + blob = (char*)driver_alloc_zeroed(env, alloc_sz); + if (!blob) { + cfree_obj_free(of); + driver_errf(tool, "out of memory"); + return 1; + } + name_arr = (const char**)blob; + name_storage = blob + (size_t)count * sizeof(const char*); + + /* Pass B: copy names. */ + if (cfree_obj_symiter_new(of, &it) != CFREE_OK) { + driver_free(env, blob, alloc_sz); + cfree_obj_free(of); + driver_errf(tool, "out of memory"); + return 1; + } + { + uint32_t k = 0; + for (;;) { + CfreeIterResult r; + const char* p; + char* dst; + if (k >= count) break; + r = cfree_obj_symiter_next(it, &si); + if (r != CFREE_ITER_ITEM) break; + if (si.bind != CFREE_SB_GLOBAL) continue; + if (si.section == CFREE_SECTION_NONE) continue; + if (!si.name || !si.name[0]) continue; + dst = name_storage + cursor; + name_arr[k] = dst; + for (p = si.name; *p; ++p) *dst++ = *p; + *dst++ = '\0'; + cursor = (size_t)(dst - name_storage); + k++; + } + count = k; + } + cfree_obj_symiter_free(it); + cfree_obj_free(of); + + *blob_out = blob; + *blob_size_out = alloc_sz; + *names_out = name_arr; + *count_out = count; + return 0; +} + +void driver_collect_obj_global_syms_free(DriverEnv* env, void* blob, + size_t blob_size) { + if (blob) driver_free(env, blob, blob_size); +} diff --git a/driver/inputs.h b/driver/inputs.h @@ -5,6 +5,7 @@ #include <cfree/compile.h> #include <cfree/link.h> +#include <cfree/object.h> /* Shared input handling for tools that take a mixed list of C sources, * stdin source, object files, and static archives — `cfree run` and @@ -93,4 +94,34 @@ int driver_inputs_compile_and_jit(DriverInputs *, CfreeCompiler *, void *extern_resolver_user, CfreeJit **out_jit); +/* ---------------------------------------------------------------------- + * Per-object global-symbol collection + * + * ar / ranlib / strip all need to enumerate the globally-defined symbols + * of an object file so the archive symbol index (ar/ranlib) or the + * --strip-unneeded keep-set (strip) can be populated. The shape is + * always the same: open the object, walk symbols, copy out the names + * of every SB_GLOBAL symbol with a defining section. + * + * driver_collect_obj_global_syms allocates a single heap block laid out + * as [const char* names[count]][NUL-separated name bytes]. The caller + * frees the block via driver_collect_obj_global_syms_free. + * + * Returns: + * 0 member parsed, output filled (count may be 0 if the object has + * no globally-defined symbols, or the member is not a recognized + * object file at all — in that case *blob_out is NULL). + * 1 fatal failure (out of memory, etc.); an error has been reported + * via driver_errf using the supplied tool tag. + */ +int driver_collect_obj_global_syms(DriverEnv *env, const CfreeContext *ctx, + const char *tool, + const CfreeBytes *member, void **blob_out, + size_t *blob_size_out, + const char ***names_out, + uint32_t *count_out); + +void driver_collect_obj_global_syms_free(DriverEnv *env, void *blob, + size_t blob_size); + #endif diff --git a/driver/main.c b/driver/main.c @@ -26,6 +26,8 @@ static int dispatch(const char* name, int argc, char** argv) { if (driver_streq(name, "ld")) return driver_ld(argc, argv); if (driver_streq(name, "ar")) return driver_ar(argc, argv); if (driver_streq(name, "ranlib")) return driver_ranlib(argc, argv); + if (driver_streq(name, "strip")) return driver_strip(argc, argv); + if (driver_streq(name, "objcopy")) return driver_objcopy(argc, argv); if (driver_streq(name, "objdump")) return driver_objdump(argc, argv); if (driver_streq(name, "dbg")) return driver_dbg(argc, argv); if (driver_streq(name, "run")) return driver_run(argc, argv); @@ -60,6 +62,14 @@ static int print_tool_help(const char* name) { driver_help_ranlib(); return 0; } + if (driver_streq(name, "strip")) { + driver_help_strip(); + return 0; + } + if (driver_streq(name, "objcopy")) { + driver_help_objcopy(); + return 0; + } if (driver_streq(name, "objdump")) { driver_help_objdump(); return 0; @@ -118,6 +128,8 @@ void driver_help_top(void) { " ld Link objects/archives into an executable or shared library\n" " ar Create / modify / list / extract POSIX `ar` archives\n" " ranlib Refresh the symbol index of an `ar` archive\n" + " strip Drop debug sections and/or symbols from a .o or .a\n" + " objcopy Copy and transform an object file (rename / remove / format)\n" " objdump Dump sections, symbols, disassembly, hex, and relocations\n" " run JIT-compile inputs and invoke the entry symbol in-process\n" " dbg Interactive JIT debugger (REPL on top of the JIT image)\n" diff --git a/driver/objcopy.c b/driver/objcopy.c @@ -0,0 +1,739 @@ +#include "driver.h" + +#include <stdint.h> +#include <string.h> + +#include <cfree/archive.h> +#include <cfree/core.h> +#include <cfree/object.h> + +#include "inputs.h" + +/* `cfree objcopy` — copy + transform an object file. v1 scope is the + * high-traffic build-system subset called out in CTOOLCHAIN.md: + * + * --remove-section=NAME drop the named section + * --only-section=NAME drop every section except NAME (may repeat) + * --rename-section=OLD=NEW rename a section + * --redefine-sym=OLD=NEW rename a symbol + * --globalize-symbol=NAME promote a symbol to SB_GLOBAL + * --localize-symbol=NAME demote a symbol to SB_LOCAL + * --weaken-symbol=NAME flip a symbol to SB_WEAK + * --strip-debug drop CFREE_SEC_DEBUG sections + * --strip-all drop debug + every non-essential symbol + * --strip-unneeded drop debug + symbols not needed by relocs + * --add-section=NAME=FILE append a new section with FILE's bytes + * --update-section=NAME=FILE replace NAME's bytes with FILE's bytes + * -O <bfdname> emit in a different object format + * + * Linked ELF (ET_EXEC / ET_DYN) input is out of scope for v1. + * + * Usage: cfree objcopy [OPTIONS] INPUT [OUTPUT] + * If OUTPUT is omitted, INPUT is rewritten in place. */ + +#define OBJCOPY_TOOL "objcopy" + +void driver_help_objcopy(void) { + driver_printf( + "%s", + "cfree objcopy — copy and transform an object file\n" + "\n" + "USAGE\n" + " cfree objcopy [OPTIONS] INPUT [OUTPUT]\n" + "\n" + "SECTION OPS\n" + " --remove-section=NAME drop section NAME (may repeat)\n" + " --only-section=NAME keep only section NAME (may repeat)\n" + " --rename-section=OLD=NEW rename section OLD to NEW\n" + " --add-section=NAME=FILE append a new section with FILE's bytes\n" + " --update-section=NAME=FILE replace NAME's bytes with FILE's bytes\n" + "\n" + "SYMBOL OPS\n" + " --redefine-sym=OLD=NEW rename a symbol (may repeat)\n" + " --globalize-symbol=NAME set NAME's binding to global\n" + " --localize-symbol=NAME set NAME's binding to local\n" + " --weaken-symbol=NAME set NAME's binding to weak\n" + "\n" + "STRIP OPS\n" + " --strip-debug, --strip-unneeded, --strip-all\n" + " same semantics as `cfree strip`\n" + "\n" + "FORMAT\n" + " -O BFDNAME emit as a different format. Recognized\n" + " names: elf*, mach-o / macho*, coff*, wasm*\n" + "\n" + "EXIT CODES\n" + " 0 success 1 I/O or strip error 2 bad usage\n"); +} + +typedef enum CopyOp { + COPY_OP_NONE, + COPY_OP_STRIP_DEBUG, + COPY_OP_STRIP_UNNEEDED, + COPY_OP_STRIP_ALL, +} CopyOp; + +typedef struct NamePair { + const char* old_name; + const char* new_name; +} NamePair; + +typedef struct CopyOpts { + CopyOp op; + /* Section ops */ + const char** remove_sections; + uint32_t nremove; + uint32_t cap_remove; + const char** only_sections; + uint32_t nonly; + uint32_t cap_only; + NamePair* rename_sections; + uint32_t nrename_sec; + uint32_t cap_rename_sec; + NamePair* add_sections; + uint32_t nadd; + uint32_t cap_add; + NamePair* update_sections; + uint32_t nupdate; + uint32_t cap_update; + /* Symbol ops */ + NamePair* redefine_syms; + uint32_t nredef; + uint32_t cap_redef; + const char** globalize; + uint32_t nglob; + uint32_t cap_glob; + const char** localize; + uint32_t nloc; + uint32_t cap_loc; + const char** weaken; + uint32_t nweak; + uint32_t cap_weak; + /* Format conversion */ + int have_output_fmt; + CfreeObjFmt output_fmt; + /* I/O */ + const char* input; + const char* output; +} CopyOpts; + +static int name_in_list(const char* name, const char* const* list, uint32_t n) { + uint32_t i; + if (!name) return 0; + for (i = 0; i < n; ++i) { + if (list[i] && strcmp(list[i], name) == 0) return 1; + } + return 0; +} + +static int push_str(DriverEnv* env, const char*** arr, uint32_t* n, + uint32_t* cap, const char* s) { + if (*n >= *cap) { + uint32_t newcap = *cap ? *cap * 2u : 4u; + const char** nb = (const char**)driver_alloc_zeroed( + env, (size_t)newcap * sizeof(*nb)); + if (!nb) return -1; + if (*arr) { + memcpy(nb, *arr, (size_t)(*n) * sizeof(*nb)); + driver_free(env, (void*)*arr, (size_t)(*cap) * sizeof(*nb)); + } + *arr = nb; + *cap = newcap; + } + (*arr)[(*n)++] = s; + return 0; +} + +static int push_pair(DriverEnv* env, NamePair** arr, uint32_t* n, uint32_t* cap, + const char* old_name, const char* new_name) { + if (*n >= *cap) { + uint32_t newcap = *cap ? *cap * 2u : 4u; + NamePair* nb = + (NamePair*)driver_alloc_zeroed(env, (size_t)newcap * sizeof(*nb)); + if (!nb) return -1; + if (*arr) { + memcpy(nb, *arr, (size_t)(*n) * sizeof(*nb)); + driver_free(env, *arr, (size_t)(*cap) * sizeof(*nb)); + } + *arr = nb; + *cap = newcap; + } + (*arr)[*n].old_name = old_name; + (*arr)[*n].new_name = new_name; + (*n)++; + return 0; +} + +/* Parse VAL of `--flag=VAL` or take the next argv. */ +static int take_value(int* i, int argc, char** argv, const char* flag, + const char** out) { + const char* a = argv[*i]; + size_t flen = strlen(flag); + if (strncmp(a, flag, flen) == 0 && a[flen] == '=') { + *out = a + flen + 1; + return 1; + } + if (strcmp(a, flag) == 0) { + if (*i + 1 >= argc) return -1; + *out = argv[++(*i)]; + return 1; + } + return 0; +} + +/* Split "old=new" / "name=file" at the first '='. */ +static int split_pair(DriverEnv* env, const char* spec, const char** out_left, + const char** out_right) { + const char* eq = strchr(spec, '='); + size_t llen; + char* left; + if (!eq || eq == spec || !eq[1]) return -1; + llen = (size_t)(eq - spec); + left = (char*)driver_alloc_zeroed(env, llen + 1u); + if (!left) return -1; + memcpy(left, spec, llen); + left[llen] = '\0'; + *out_left = left; + *out_right = eq + 1; + return 0; +} + +static int parse_fmt_name(const char* name, CfreeObjFmt* out) { + if (!name) return -1; + if (strncmp(name, "elf", 3) == 0) { + *out = CFREE_OBJ_ELF; + return 0; + } + if (strncmp(name, "mach", 4) == 0) { + *out = CFREE_OBJ_MACHO; + return 0; + } + if (strncmp(name, "coff", 4) == 0 || strncmp(name, "pe-", 3) == 0) { + *out = CFREE_OBJ_COFF; + return 0; + } + if (strncmp(name, "wasm", 4) == 0) { + *out = CFREE_OBJ_WASM; + return 0; + } + return -1; +} + +/* Lookup a symbol by name; CFREE_OBJ_SYMBOL_NONE if not found. */ +static CfreeObjSymbol find_sym_id(CfreeObjFile* of, const char* name) { + CfreeObjSymInfo si; + if (cfree_obj_symbol_by_name(of, name, &si) != CFREE_OK) { + return CFREE_OBJ_SYMBOL_NONE; + } + return si.id; +} + +/* Lookup a section by name; CFREE_SECTION_NONE if not found. */ +static CfreeObjSection find_sec_id(CfreeObjFile* of, const char* name) { + CfreeObjSection s = CFREE_SECTION_NONE; + if (cfree_obj_section_by_name(of, name, &s) != CFREE_OK) { + return CFREE_SECTION_NONE; + } + return s; +} + +static int apply_strip_pass(DriverEnv* env, CfreeObjFile* of, + CfreeObjBuilder* b, const CopyOpts* opts) { + uint32_t i, nsec; + CfreeObjSymbol* needed = NULL; + uint32_t nneeded = 0, cap_needed = 0; + CfreeObjSymIter* sit = NULL; + int filter_syms = (opts->op == COPY_OP_STRIP_UNNEEDED || + opts->op == COPY_OP_STRIP_ALL); + int rc = 1; + + if (opts->op == COPY_OP_NONE) return 0; + + /* Always drop debug sections for any strip op. */ + nsec = cfree_obj_nsections(of); + for (i = 0; i < nsec; ++i) { + CfreeObjSecInfo si; + if (cfree_obj_section(of, i, &si) != CFREE_OK) continue; + if (si.kind == CFREE_SEC_DEBUG) cfree_obj_builder_remove_section(b, i); + } + if (!filter_syms) return 0; + + /* Collect reloc-targeted sym ids, skipping relocs in debug sections. */ + { + CfreeObjRelocIter* rit = NULL; + if (cfree_obj_reliter_new(of, &rit) != CFREE_OK) { + driver_errf(OBJCOPY_TOOL, "out of memory"); + return 1; + } + for (;;) { + CfreeObjReloc r; + CfreeIterResult ir = cfree_obj_reliter_next(rit, &r); + uint32_t k; + int seen = 0; + if (ir != CFREE_ITER_ITEM) break; + if (r.sym == CFREE_OBJ_SYMBOL_NONE) continue; + if (r.section != CFREE_SECTION_NONE) { + CfreeObjSecInfo hi; + if (cfree_obj_section(of, r.section, &hi) == CFREE_OK && + hi.kind == CFREE_SEC_DEBUG) { + continue; + } + } + for (k = 0; k < nneeded; ++k) { + if (needed[k] == r.sym) { + seen = 1; + break; + } + } + if (seen) continue; + if (nneeded >= cap_needed) { + uint32_t newcap = cap_needed ? cap_needed * 2u : 32u; + CfreeObjSymbol* nb = (CfreeObjSymbol*)driver_alloc_zeroed( + env, (size_t)newcap * sizeof(*nb)); + if (!nb) { + cfree_obj_reliter_free(rit); + if (needed) + driver_free(env, needed, (size_t)cap_needed * sizeof(*needed)); + driver_errf(OBJCOPY_TOOL, "out of memory"); + return 1; + } + if (needed) { + memcpy(nb, needed, (size_t)nneeded * sizeof(*needed)); + driver_free(env, needed, (size_t)cap_needed * sizeof(*needed)); + } + needed = nb; + cap_needed = newcap; + } + needed[nneeded++] = r.sym; + } + cfree_obj_reliter_free(rit); + } + + /* Walk syms and drop unneeded ones. */ + if (cfree_obj_symiter_new(of, &sit) != CFREE_OK) { + driver_errf(OBJCOPY_TOOL, "out of memory"); + goto done; + } + for (;;) { + CfreeObjSymInfo si; + CfreeIterResult ir = cfree_obj_symiter_next(sit, &si); + uint32_t k; + int in_needed = 0; + if (ir != CFREE_ITER_ITEM) break; + if (si.kind == CFREE_SK_UNDEF) continue; + for (k = 0; k < nneeded; ++k) { + if (needed[k] == si.id) { + in_needed = 1; + break; + } + } + if (!in_needed) cfree_obj_builder_remove_symbol(b, si.id); + } + cfree_obj_symiter_free(sit); + rc = 0; +done: + if (needed) driver_free(env, needed, (size_t)cap_needed * sizeof(*needed)); + return rc; +} + +/* Apply --only-section: every section whose name isn't on the list is + * dropped. Sections of CFREE_SEC_TEXT/RODATA/DATA/BSS are user-visible; + * symbol-table / strtab / etc. are also affected. */ +static void apply_only_sections(CfreeObjFile* of, CfreeObjBuilder* b, + const CopyOpts* opts) { + uint32_t i, n; + if (!opts->nonly) return; + n = cfree_obj_nsections(of); + for (i = 0; i < n; ++i) { + CfreeObjSecInfo si; + if (cfree_obj_section(of, i, &si) != CFREE_OK) continue; + if (!name_in_list(si.name, opts->only_sections, opts->nonly)) { + cfree_obj_builder_remove_section(b, i); + } + } +} + +static int run_transforms(DriverEnv* env, const CfreeContext* ctx, + CfreeObjFile* of, CfreeObjBuilder* b, + const CopyOpts* opts) { + uint32_t i; + + /* --strip-* */ + if (apply_strip_pass(env, of, b, opts) != 0) return 1; + + /* --only-section overrides --remove-section because they're an inverse + * pair; if both are passed, --only-section's keep-set is authoritative. */ + if (opts->nonly) { + apply_only_sections(of, b, opts); + } else if (opts->nremove) { + for (i = 0; i < opts->nremove; ++i) { + CfreeObjSection sid = find_sec_id(of, opts->remove_sections[i]); + if (sid != CFREE_SECTION_NONE) + cfree_obj_builder_remove_section(b, sid); + } + } + + /* --rename-section */ + for (i = 0; i < opts->nrename_sec; ++i) { + CfreeObjSection sid = find_sec_id(of, opts->rename_sections[i].old_name); + if (sid == CFREE_SECTION_NONE) { + driver_errf(OBJCOPY_TOOL, "rename-section: '%s' not found", + opts->rename_sections[i].old_name); + return 1; + } + CfreeSym ns = cfree_sym_intern(cfree_obj_builder_compiler(b), + opts->rename_sections[i].new_name); + cfree_obj_builder_rename_section(b, sid, ns); + } + + /* --update-section */ + for (i = 0; i < opts->nupdate; ++i) { + CfreeObjSection sid = find_sec_id(of, opts->update_sections[i].old_name); + CfreeFileData fd = {0}; + if (sid == CFREE_SECTION_NONE) { + driver_errf(OBJCOPY_TOOL, "update-section: '%s' not found", + opts->update_sections[i].old_name); + return 1; + } + if (ctx->file_io->read_all(ctx->file_io->user, + opts->update_sections[i].new_name, + &fd) != CFREE_OK) { + driver_errf(OBJCOPY_TOOL, "update-section: cannot read %s", + opts->update_sections[i].new_name); + return 1; + } + cfree_obj_builder_section_replace_bytes(b, sid, fd.data, fd.size); + ctx->file_io->release(ctx->file_io->user, &fd); + } + + /* --add-section: create a new SEC_OTHER PROGBITS section and write its + * contents from the on-disk file. */ + for (i = 0; i < opts->nadd; ++i) { + CfreeObjSectionDesc desc; + CfreeObjSection nsid; + CfreeFileData fd = {0}; + if (ctx->file_io->read_all(ctx->file_io->user, + opts->add_sections[i].new_name, &fd) != + CFREE_OK) { + driver_errf(OBJCOPY_TOOL, "add-section: cannot read %s", + opts->add_sections[i].new_name); + return 1; + } + memset(&desc, 0, sizeof desc); + desc.name = cfree_sym_intern(cfree_obj_builder_compiler(b), + opts->add_sections[i].old_name); + desc.kind = CFREE_SEC_OTHER; + desc.flags = 0; + desc.align = 1; + desc.entsize = 0; + if (cfree_obj_builder_section(b, &desc, &nsid) != CFREE_OK) { + driver_errf(OBJCOPY_TOOL, "add-section: failed to create '%s'", + opts->add_sections[i].old_name); + ctx->file_io->release(ctx->file_io->user, &fd); + return 1; + } + cfree_obj_builder_write(b, nsid, fd.data, fd.size); + ctx->file_io->release(ctx->file_io->user, &fd); + } + + /* --redefine-sym */ + for (i = 0; i < opts->nredef; ++i) { + CfreeObjSymbol sid = find_sym_id(of, opts->redefine_syms[i].old_name); + if (sid == CFREE_OBJ_SYMBOL_NONE) continue; /* tolerate missing */ + cfree_obj_builder_rename_symbol( + b, sid, + cfree_sym_intern(cfree_obj_builder_compiler(b), + opts->redefine_syms[i].new_name)); + } + + /* --globalize-symbol / --localize-symbol / --weaken-symbol */ + for (i = 0; i < opts->nglob; ++i) { + CfreeObjSymbol sid = find_sym_id(of, opts->globalize[i]); + if (sid != CFREE_OBJ_SYMBOL_NONE) + cfree_obj_builder_symbol_set_bind(b, sid, CFREE_SB_GLOBAL); + } + for (i = 0; i < opts->nloc; ++i) { + CfreeObjSymbol sid = find_sym_id(of, opts->localize[i]); + if (sid != CFREE_OBJ_SYMBOL_NONE) + cfree_obj_builder_symbol_set_bind(b, sid, CFREE_SB_LOCAL); + } + for (i = 0; i < opts->nweak; ++i) { + CfreeObjSymbol sid = find_sym_id(of, opts->weaken[i]); + if (sid != CFREE_OBJ_SYMBOL_NONE) + cfree_obj_builder_symbol_set_bind(b, sid, CFREE_SB_WEAK); + } + + return 0; +} + +static int copy_one_object(DriverEnv* env, const CfreeContext* ctx, + const CfreeBytes* input, const CopyOpts* opts, + const char* output_path) { + CfreeObjFile* of = NULL; + CfreeObjBuilder* b; + CfreeWriter* w = NULL; + CfreeStatus st; + int rc = 1; + + if (cfree_obj_open(ctx, input, &of) != CFREE_OK) { + driver_errf(OBJCOPY_TOOL, "%s: not a recognized object", input->name); + return 1; + } + b = cfree_obj_file_builder(of); + if (!b) { + driver_errf(OBJCOPY_TOOL, "%s: no builder", input->name); + cfree_obj_free(of); + return 1; + } + if (run_transforms(env, ctx, of, b, opts) != 0) { + cfree_obj_free(of); + return 1; + } + if (ctx->file_io->open_writer(ctx->file_io->user, output_path, &w) != + CFREE_OK) { + driver_errf(OBJCOPY_TOOL, "cannot open %s", output_path); + cfree_obj_free(of); + return 1; + } + if (opts->have_output_fmt) { + st = cfree_obj_builder_emit_as(b, opts->output_fmt, w); + } else { + st = cfree_obj_builder_emit(b, w); + } + if (st != CFREE_OK) { + driver_errf(OBJCOPY_TOOL, "emit failed"); + cfree_writer_close(w); + cfree_obj_free(of); + return 1; + } + cfree_writer_close(w); + cfree_obj_free(of); + rc = 0; + return rc; +} + +int driver_objcopy(int argc, char** argv) { + DriverEnv env; + CfreeContext ctx; + CopyOpts opts; + CfreeFileData in_fd = {0}; + CfreeBytes input; + int have_in = 0; + int rc = 1; + int i; + const char* out_path; + + if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) { + driver_help_objcopy(); + return 0; + } + + memset(&opts, 0, sizeof opts); + opts.op = COPY_OP_NONE; + driver_env_init(&env); + ctx = driver_env_to_context(&env); + + for (i = 1; i < argc; ++i) { + const char* a = argv[i]; + const char* val = NULL; + int matched; + if (driver_streq(a, "--strip-debug")) { + opts.op = COPY_OP_STRIP_DEBUG; + continue; + } + if (driver_streq(a, "--strip-unneeded")) { + opts.op = COPY_OP_STRIP_UNNEEDED; + continue; + } + if (driver_streq(a, "--strip-all") || driver_streq(a, "-S")) { + opts.op = COPY_OP_STRIP_ALL; + continue; + } + if (driver_streq(a, "-O")) { + if (i + 1 >= argc) { + driver_errf(OBJCOPY_TOOL, "-O requires a format name"); + rc = 2; + goto done; + } + if (parse_fmt_name(argv[++i], &opts.output_fmt) != 0) { + driver_errf(OBJCOPY_TOOL, "unknown output format: %s", argv[i]); + rc = 2; + goto done; + } + opts.have_output_fmt = 1; + continue; + } + matched = take_value(&i, argc, argv, "--remove-section", &val); + if (matched < 0) goto missing_value; + if (matched) { + if (push_str(&env, &opts.remove_sections, &opts.nremove, &opts.cap_remove, + val) != 0) + goto oom; + continue; + } + matched = take_value(&i, argc, argv, "--only-section", &val); + if (matched < 0) goto missing_value; + if (matched) { + if (push_str(&env, &opts.only_sections, &opts.nonly, &opts.cap_only, + val) != 0) + goto oom; + continue; + } + matched = take_value(&i, argc, argv, "--rename-section", &val); + if (matched < 0) goto missing_value; + if (matched) { + const char *left, *right; + if (split_pair(&env, val, &left, &right) != 0) { + driver_errf(OBJCOPY_TOOL, "rename-section: expected OLD=NEW (got %s)", + val); + rc = 2; + goto done; + } + if (push_pair(&env, &opts.rename_sections, &opts.nrename_sec, + &opts.cap_rename_sec, left, right) != 0) + goto oom; + continue; + } + matched = take_value(&i, argc, argv, "--add-section", &val); + if (matched < 0) goto missing_value; + if (matched) { + const char *left, *right; + if (split_pair(&env, val, &left, &right) != 0) { + driver_errf(OBJCOPY_TOOL, "add-section: expected NAME=FILE (got %s)", + val); + rc = 2; + goto done; + } + if (push_pair(&env, &opts.add_sections, &opts.nadd, &opts.cap_add, left, + right) != 0) + goto oom; + continue; + } + matched = take_value(&i, argc, argv, "--update-section", &val); + if (matched < 0) goto missing_value; + if (matched) { + const char *left, *right; + if (split_pair(&env, val, &left, &right) != 0) { + driver_errf(OBJCOPY_TOOL, "update-section: expected NAME=FILE (got %s)", + val); + rc = 2; + goto done; + } + if (push_pair(&env, &opts.update_sections, &opts.nupdate, + &opts.cap_update, left, right) != 0) + goto oom; + continue; + } + matched = take_value(&i, argc, argv, "--redefine-sym", &val); + if (matched < 0) goto missing_value; + if (matched) { + const char *left, *right; + if (split_pair(&env, val, &left, &right) != 0) { + driver_errf(OBJCOPY_TOOL, "redefine-sym: expected OLD=NEW (got %s)", + val); + rc = 2; + goto done; + } + if (push_pair(&env, &opts.redefine_syms, &opts.nredef, &opts.cap_redef, + left, right) != 0) + goto oom; + continue; + } + matched = take_value(&i, argc, argv, "--globalize-symbol", &val); + if (matched < 0) goto missing_value; + if (matched) { + if (push_str(&env, &opts.globalize, &opts.nglob, &opts.cap_glob, val) != + 0) + goto oom; + continue; + } + matched = take_value(&i, argc, argv, "--localize-symbol", &val); + if (matched < 0) goto missing_value; + if (matched) { + if (push_str(&env, &opts.localize, &opts.nloc, &opts.cap_loc, val) != 0) + goto oom; + continue; + } + matched = take_value(&i, argc, argv, "--weaken-symbol", &val); + if (matched < 0) goto missing_value; + if (matched) { + if (push_str(&env, &opts.weaken, &opts.nweak, &opts.cap_weak, val) != 0) + goto oom; + continue; + } + if (a[0] == '-' && a[1] != '\0') { + driver_errf(OBJCOPY_TOOL, "unknown option: %s", a); + rc = 2; + goto done; + } + if (!opts.input) { + opts.input = a; + } else if (!opts.output) { + opts.output = a; + } else { + driver_errf(OBJCOPY_TOOL, "unexpected argument: %s", a); + rc = 2; + goto done; + } + } + + if (!opts.input) { + driver_errf(OBJCOPY_TOOL, "missing input file"); + rc = 2; + goto done; + } + out_path = opts.output ? opts.output : opts.input; + + if (ctx.file_io->read_all(ctx.file_io->user, opts.input, &in_fd) != + CFREE_OK) { + driver_errf(OBJCOPY_TOOL, "cannot read %s", opts.input); + goto done; + } + have_in = 1; + input.name = opts.input; + input.data = in_fd.data; + input.len = in_fd.size; + + rc = copy_one_object(&env, &ctx, &input, &opts, out_path); + +done: + if (have_in) ctx.file_io->release(ctx.file_io->user, &in_fd); + if (opts.remove_sections) + driver_free(&env, (void*)opts.remove_sections, + (size_t)opts.cap_remove * sizeof(*opts.remove_sections)); + if (opts.only_sections) + driver_free(&env, (void*)opts.only_sections, + (size_t)opts.cap_only * sizeof(*opts.only_sections)); + if (opts.rename_sections) + driver_free(&env, opts.rename_sections, + (size_t)opts.cap_rename_sec * sizeof(*opts.rename_sections)); + if (opts.add_sections) + driver_free(&env, opts.add_sections, + (size_t)opts.cap_add * sizeof(*opts.add_sections)); + if (opts.update_sections) + driver_free(&env, opts.update_sections, + (size_t)opts.cap_update * sizeof(*opts.update_sections)); + if (opts.redefine_syms) + driver_free(&env, opts.redefine_syms, + (size_t)opts.cap_redef * sizeof(*opts.redefine_syms)); + if (opts.globalize) + driver_free(&env, (void*)opts.globalize, + (size_t)opts.cap_glob * sizeof(*opts.globalize)); + if (opts.localize) + driver_free(&env, (void*)opts.localize, + (size_t)opts.cap_loc * sizeof(*opts.localize)); + if (opts.weaken) + driver_free(&env, (void*)opts.weaken, + (size_t)opts.cap_weak * sizeof(*opts.weaken)); + driver_env_fini(&env); + return rc; + +missing_value: + driver_errf(OBJCOPY_TOOL, "%s requires a value", argv[i]); + rc = 2; + goto done; +oom: + driver_errf(OBJCOPY_TOOL, "out of memory"); + rc = 1; + goto done; +} diff --git a/driver/ranlib.c b/driver/ranlib.c @@ -5,6 +5,7 @@ #include <cfree/object.h> #include "driver.h" +#include "inputs.h" /* `cfree ranlib` — refresh / add a System-V `/` symbol-index member at the * head of an existing POSIX `ar` archive. Equivalent to `cfree ar s ARCHIVE`, @@ -16,9 +17,8 @@ * member names are preserved via the `//` extended-name table. Reproducible * output via SOURCE_DATE_EPOCH (same epoch handling as `cfree ar`). * - * Note: the per-member symbol-collection loop duplicates the body of - * ar_do_write's `has_s` block (driver/ar.c). Factor into a shared helper - * when adding strip / objcopy. */ + * Per-member symbol collection lives in driver/inputs.c + * (driver_collect_obj_global_syms) — shared with ar / strip. */ #define RANLIB_TOOL "ranlib" @@ -59,104 +59,6 @@ static uint64_t ranlib_epoch_from_env(void) { return v; } -/* Walk an object member's symbol iterator and produce a heap-allocated - * (name_arr[count], name_bytes) blob for CfreeArMemberSymbols. *blob_out is - * NULL when the member is not a recognised object or has no exported - * global symbols; *out_count is then 0. Mirrors driver/ar.c. */ -static int ranlib_collect_symbols(DriverEnv* env, const CfreeContext* ctx, - const CfreeBytes* member, void** blob_out, - size_t* blob_size_out, const char*** names_out, - uint32_t* count_out) { - CfreeObjFile* of = NULL; - CfreeObjSymIter* it = NULL; - CfreeObjSymInfo si; - uint32_t count = 0; - size_t name_bytes = 0; - size_t alloc_sz; - char* blob; - const char** name_arr; - char* name_storage; - size_t cursor = 0; - - *blob_out = NULL; - *blob_size_out = 0; - *names_out = NULL; - *count_out = 0; - - if (cfree_obj_open(ctx, member, &of) != CFREE_OK) return 0; - - if (cfree_obj_symiter_new(of, &it) != CFREE_OK) { - cfree_obj_free(of); - driver_errf(RANLIB_TOOL, "out of memory"); - return 1; - } - for (;;) { - CfreeIterResult r = cfree_obj_symiter_next(it, &si); - if (r != CFREE_ITER_ITEM) break; - if (si.bind != CFREE_SB_GLOBAL) continue; - if (si.section == CFREE_SECTION_NONE) continue; - if (!si.name || !si.name[0]) continue; - count += 1; - { - const char* p = si.name; - while (*p++) ++name_bytes; - name_bytes += 1; - } - } - cfree_obj_symiter_free(it); - - if (count == 0) { - cfree_obj_free(of); - return 0; - } - - alloc_sz = (size_t)count * sizeof(const char*) + name_bytes; - blob = (char*)driver_alloc_zeroed(env, alloc_sz); - if (!blob) { - cfree_obj_free(of); - driver_errf(RANLIB_TOOL, "out of memory"); - return 1; - } - name_arr = (const char**)blob; - name_storage = blob + (size_t)count * sizeof(const char*); - - if (cfree_obj_symiter_new(of, &it) != CFREE_OK) { - driver_free(env, blob, alloc_sz); - cfree_obj_free(of); - driver_errf(RANLIB_TOOL, "out of memory"); - return 1; - } - { - uint32_t k = 0; - for (;;) { - CfreeIterResult r; - const char* p; - char* dst; - if (k >= count) break; - r = cfree_obj_symiter_next(it, &si); - if (r != CFREE_ITER_ITEM) break; - if (si.bind != CFREE_SB_GLOBAL) continue; - if (si.section == CFREE_SECTION_NONE) continue; - if (!si.name || !si.name[0]) continue; - dst = name_storage + cursor; - name_arr[k] = dst; - for (p = si.name; *p; ++p) *dst++ = *p; - *dst++ = '\0'; - cursor = (size_t)(dst - name_storage); - k++; - } - count = k; - } - cfree_obj_symiter_free(it); - cfree_obj_free(of); - - *blob_out = blob; - *blob_size_out = alloc_sz; - *names_out = name_arr; - *count_out = count; - return 0; -} - int driver_ranlib(int argc, char** argv) { DriverEnv env; CfreeContext ctx; @@ -288,8 +190,9 @@ int driver_ranlib(int argc, char** argv) { size_t blob_size = 0; const char** names = NULL; uint32_t count = 0; - if (ranlib_collect_symbols(&env, &ctx, &members[i], &blob, &blob_size, - &names, &count) != 0) { + if (driver_collect_obj_global_syms(&env, &ctx, RANLIB_TOOL, &members[i], + &blob, &blob_size, &names, + &count) != 0) { goto out; } sym_allocs[i] = blob; diff --git a/driver/strip.c b/driver/strip.c @@ -0,0 +1,661 @@ +#include "driver.h" + +#include <stdint.h> +#include <string.h> + +#include <cfree/archive.h> +#include <cfree/core.h> +#include <cfree/object.h> + +#include "inputs.h" + +/* `cfree strip` — drop debug sections and / or unwanted symbols from a + * relocatable object or static archive, then write the result back. Scope + * for the first cut matches the CTOOLCHAIN.md plan: relocatable .o and + * .a inputs only — linked ELF (ET_EXEC / ET_DYN) is rejected. + * + * Operations (the last one wins; default is --strip-all): + * --strip-debug drop sections whose kind is CFREE_SEC_DEBUG + * --strip-unneeded drop debug + symbols not referenced by any reloc + * --strip-all drop debug + every non-essential symbol (default) + * + * Filters applied on top of the operation: + * --keep-symbol=NAME, -K NAME keep NAME even if the operation would drop it + * --strip-symbol=NAME, -N NAME always drop NAME + * + * I/O: + * -o PATH write output to PATH (else rewrite the input in place) + */ + +#define STRIP_TOOL "strip" + +void driver_help_strip(void) { + driver_printf( + "%s", + "cfree strip — drop debug sections and/or symbols\n" + "\n" + "USAGE\n" + " cfree strip [OPTIONS] FILE\n" + "\n" + "OPERATIONS (last one wins; default is --strip-all)\n" + " --strip-debug remove debug-info sections\n" + " --strip-unneeded remove debug + symbols not needed by relocs\n" + " --strip-all remove debug + all non-essential symbols\n" + "\n" + "SYMBOL FILTERS (may repeat)\n" + " --keep-symbol=NAME, -K NAME keep NAME even when the operation\n" + " would otherwise drop it\n" + " --strip-symbol=NAME, -N NAME always drop NAME\n" + "\n" + "OUTPUT\n" + " -o PATH write to PATH (default: rewrite FILE in place)\n" + "\n" + "INPUTS\n" + " FILE may be a relocatable .o or a static .a archive. Linked\n" + " executables / shared libraries are not supported yet.\n" + "\n" + "EXIT CODES\n" + " 0 success 1 I/O or strip error 2 bad usage\n"); +} + +typedef enum StripOp { + STRIP_OP_DEBUG, + STRIP_OP_UNNEEDED, + STRIP_OP_ALL, +} StripOp; + +typedef struct StripOpts { + StripOp op; + const char** keep; + uint32_t nkeep; + uint32_t cap_keep; + const char** strip; + uint32_t nstrip; + uint32_t cap_strip; + const char* output; + const char* input; +} StripOpts; + +static int name_in_list(const char* name, const char* const* list, uint32_t n) { + uint32_t i; + if (!name) return 0; + for (i = 0; i < n; ++i) { + if (list[i] && strcmp(list[i], name) == 0) return 1; + } + return 0; +} + +static int push_name(DriverEnv* env, const char*** arr, uint32_t* n, + uint32_t* cap, const char* name) { + if (*n >= *cap) { + uint32_t newcap = *cap ? *cap * 2u : 8u; + const char** nb = (const char**)driver_alloc_zeroed( + env, (size_t)newcap * sizeof(*nb)); + if (!nb) { + driver_errf(STRIP_TOOL, "out of memory"); + return -1; + } + if (*arr) { + memcpy(nb, *arr, (size_t)(*n) * sizeof(*nb)); + driver_free(env, (void*)*arr, (size_t)(*cap) * sizeof(*nb)); + } + *arr = nb; + *cap = newcap; + } + (*arr)[(*n)++] = name; + return 0; +} + +static int parse_name_arg(int* i, int argc, char** argv, const char* flag, + const char* short_flag, const char** out) { + const char* a = argv[*i]; + size_t flen = strlen(flag); + /* --flag=NAME */ + if (strncmp(a, flag, flen) == 0 && a[flen] == '=') { + *out = a + flen + 1; + return 1; + } + /* --flag NAME (no '='): treat as "next argv" form, used when --flag is + * passed without value. Not standard; skip. */ + if (strcmp(a, flag) == 0) { + if (*i + 1 >= argc) return -1; + *out = argv[++(*i)]; + return 1; + } + /* -K NAME / -N NAME */ + if (short_flag && strcmp(a, short_flag) == 0) { + if (*i + 1 >= argc) return -1; + *out = argv[++(*i)]; + return 1; + } + return 0; +} + +/* Collect the set of CfreeObjSymbol ids targeted by any reloc whose + * containing section will survive emit — relocs inside the + * about-to-be-removed CFREE_SEC_DEBUG sections don't count. Otherwise a + * symbol that's referenced only from DWARF (e.g. main's debug_info entry) + * keeps every function symbol alive even though the on-disk relocs + * holding it won't make it to the output. */ +static int collect_needed_syms(DriverEnv* env, CfreeObjFile* of, + CfreeObjSymbol** needed_out, uint32_t* n_out, + uint32_t* cap_out) { + CfreeObjRelocIter* rit = NULL; + CfreeObjSymbol* arr = NULL; + uint32_t n = 0, cap = 0; + + if (cfree_obj_reliter_new(of, &rit) != CFREE_OK) { + driver_errf(STRIP_TOOL, "out of memory"); + return 1; + } + for (;;) { + CfreeObjReloc r; + CfreeIterResult ir = cfree_obj_reliter_next(rit, &r); + uint32_t k; + int seen = 0; + if (ir != CFREE_ITER_ITEM) break; + if (r.sym == CFREE_OBJ_SYMBOL_NONE) continue; + /* Skip relocs hosted in a debug section — that section is being + * dropped, so its relocs don't actually "need" their targets. */ + if (r.section != CFREE_SECTION_NONE) { + CfreeObjSecInfo hi; + if (cfree_obj_section(of, r.section, &hi) == CFREE_OK && + hi.kind == CFREE_SEC_DEBUG) { + continue; + } + } + for (k = 0; k < n; ++k) { + if (arr[k] == r.sym) { + seen = 1; + break; + } + } + if (seen) continue; + if (n >= cap) { + uint32_t newcap = cap ? cap * 2u : 32u; + CfreeObjSymbol* nb = (CfreeObjSymbol*)driver_alloc_zeroed( + env, (size_t)newcap * sizeof(*nb)); + if (!nb) { + cfree_obj_reliter_free(rit); + if (arr) driver_free(env, arr, (size_t)cap * sizeof(*arr)); + driver_errf(STRIP_TOOL, "out of memory"); + return 1; + } + if (arr) { + memcpy(nb, arr, (size_t)n * sizeof(*arr)); + driver_free(env, arr, (size_t)cap * sizeof(*arr)); + } + arr = nb; + cap = newcap; + } + arr[n++] = r.sym; + } + cfree_obj_reliter_free(rit); + *needed_out = arr; + *n_out = n; + *cap_out = cap; + return 0; +} + +static int id_in_set(CfreeObjSymbol id, const CfreeObjSymbol* arr, uint32_t n) { + uint32_t i; + for (i = 0; i < n; ++i) { + if (arr[i] == id) return 1; + } + return 0; +} + +/* The core strip pass: drop debug sections, then walk symbols and apply + * keep/strip lists and the operation policy. Mutations are issued + * against the builder; emit-time sweep cleans up cascades (orphan + * relocs against removed sections, dropped group memberships, etc.). */ +static int strip_one_builder(DriverEnv* env, CfreeObjFile* of, + CfreeObjBuilder* b, const StripOpts* opts) { + uint32_t i, nsec; + int filter_syms = (opts->op == STRIP_OP_UNNEEDED || + opts->op == STRIP_OP_ALL); + CfreeObjSymbol* needed = NULL; + uint32_t nneeded = 0, cap_needed = 0; + CfreeObjSymIter* sit = NULL; + int rc = 1; + + /* Step 1: drop debug sections (every supported op does this). */ + nsec = cfree_obj_nsections(of); + for (i = 0; i < nsec; ++i) { + CfreeObjSecInfo si; + if (cfree_obj_section(of, i, &si) != CFREE_OK) continue; + if (si.kind == CFREE_SEC_DEBUG) { + cfree_obj_builder_remove_section(b, i); + } + } + + /* Step 2: compute the needed-sym set. */ + if (filter_syms) { + if (collect_needed_syms(env, of, &needed, &nneeded, &cap_needed) != 0) { + return 1; + } + } + + /* Step 3: walk symbols and apply filters. */ + if (cfree_obj_symiter_new(of, &sit) != CFREE_OK) { + driver_errf(STRIP_TOOL, "out of memory"); + goto done; + } + for (;;) { + CfreeObjSymInfo si; + CfreeIterResult ir = cfree_obj_symiter_next(sit, &si); + int drop = 0; + if (ir != CFREE_ITER_ITEM) break; + /* --strip-symbol wins over --keep-symbol if both list the same name. */ + if (opts->nstrip && name_in_list(si.name, opts->strip, opts->nstrip)) { + drop = 1; + } else if (opts->nkeep && name_in_list(si.name, opts->keep, opts->nkeep)) { + drop = 0; + } else if (filter_syms) { + /* Keep undefined externals so the .o stays linkable; keep symbols + * targeted by a surviving reloc; drop everything else. Note that + * section symbols defined in removed debug sections are already + * tombstoned by the emit-time sweep cascade — no explicit handling + * needed here. */ + if (si.kind == CFREE_SK_UNDEF) { + drop = 0; + } else if (id_in_set(si.id, needed, nneeded)) { + drop = 0; + } else { + drop = 1; + } + } + if (drop) { + cfree_obj_builder_remove_symbol(b, si.id); + } + } + cfree_obj_symiter_free(sit); + rc = 0; + +done: + if (needed) driver_free(env, needed, (size_t)cap_needed * sizeof(*needed)); + return rc; +} + +static int strip_object_bytes(DriverEnv* env, const CfreeContext* ctx, + const CfreeBytes* input, const StripOpts* opts, + uint8_t** out_data, size_t* out_size) { + CfreeObjFile* of = NULL; + CfreeObjBuilder* b; + CfreeWriter* w = NULL; + size_t n = 0; + const uint8_t* data; + uint8_t* copy; + int rc = 1; + + *out_data = NULL; + *out_size = 0; + + if (cfree_obj_open(ctx, input, &of) != CFREE_OK) { + driver_errf(STRIP_TOOL, "%s: not a recognized object", input->name); + return 1; + } + b = cfree_obj_file_builder(of); + if (!b) { + driver_errf(STRIP_TOOL, "%s: no builder for object", input->name); + cfree_obj_free(of); + return 1; + } + + if (strip_one_builder(env, of, b, opts) != 0) { + cfree_obj_free(of); + return 1; + } + + if (cfree_writer_mem(env->heap, &w) != CFREE_OK || !w) { + driver_errf(STRIP_TOOL, "out of memory"); + cfree_obj_free(of); + return 1; + } + if (cfree_obj_builder_emit(b, w) != CFREE_OK) { + driver_errf(STRIP_TOOL, "%s: emit failed", input->name); + cfree_writer_close(w); + cfree_obj_free(of); + return 1; + } + data = cfree_writer_mem_bytes(w, &n); + copy = (uint8_t*)driver_alloc(env, n ? n : 1u); + if (!copy) { + driver_errf(STRIP_TOOL, "out of memory"); + cfree_writer_close(w); + cfree_obj_free(of); + return 1; + } + if (n) memcpy(copy, data, n); + cfree_writer_close(w); + cfree_obj_free(of); + + *out_data = copy; + *out_size = n; + rc = 0; + return rc; +} + +static uint64_t strip_epoch_from_env(void) { + const char* s = driver_getenv("SOURCE_DATE_EPOCH"); + uint64_t v = 0; + if (!s || !*s) return 0; + for (; *s; ++s) { + if (*s < '0' || *s > '9') return 0; + v = v * 10 + (uint64_t)(*s - '0'); + } + return v; +} + +/* Strip every object member of an archive, write a fresh archive with + * a refreshed System-V symbol index. Non-object members pass through + * unchanged. */ +static int strip_archive(DriverEnv* env, const CfreeContext* ctx, + const CfreeBytes* input, const StripOpts* opts, + const char* output_path) { + CfreeArIter* it = NULL; + CfreeArMember m; + CfreeBytes* members = NULL; + uint8_t** owned_data = NULL; + size_t* owned_size = NULL; + char* name_storage = NULL; + size_t name_bytes_total = 0; + uint32_t nmembers = 0, k; + CfreeArMemberSymbols* msyms = NULL; + void** sym_allocs = NULL; + size_t* sym_alloc_szs = NULL; + CfreeWriter* out = NULL; + CfreeArWriteOptions opts_ar = {0}; + int rc = 1; + + /* Pass 1: count members + total name bytes. */ + if (cfree_ar_iter_new(ctx, input, &it) != CFREE_OK) { + driver_errf(STRIP_TOOL, "%s: not an archive", input->name); + return 1; + } + for (;;) { + CfreeIterResult r = cfree_ar_iter_next(it, &m); + if (r != CFREE_ITER_ITEM) break; + nmembers++; + name_bytes_total += driver_strlen(m.name) + 1; + } + cfree_ar_iter_free(it); + it = NULL; + + if (nmembers) { + members = (CfreeBytes*)driver_alloc_zeroed( + env, (size_t)nmembers * sizeof(*members)); + owned_data = (uint8_t**)driver_alloc_zeroed( + env, (size_t)nmembers * sizeof(*owned_data)); + owned_size = (size_t*)driver_alloc_zeroed( + env, (size_t)nmembers * sizeof(*owned_size)); + if (!members || !owned_data || !owned_size) { + driver_errf(STRIP_TOOL, "out of memory"); + goto done; + } + } + if (name_bytes_total) { + name_storage = (char*)driver_alloc_zeroed(env, name_bytes_total); + if (!name_storage) { + driver_errf(STRIP_TOOL, "out of memory"); + goto done; + } + } + + /* Pass 2: walk members; strip object members, pass others through. */ + if (cfree_ar_iter_new(ctx, input, &it) != CFREE_OK) { + driver_errf(STRIP_TOOL, "iter re-open failed"); + goto done; + } + { + size_t cursor = 0; + k = 0; + while (k < nmembers) { + CfreeIterResult r = cfree_ar_iter_next(it, &m); + const char* p; + char* dst; + CfreeBinFmt fmt; + CfreeBytes mbytes; + if (r != CFREE_ITER_ITEM) break; + dst = name_storage + cursor; + for (p = m.name; *p; ++p) *dst++ = *p; + *dst++ = '\0'; + members[k].name = name_storage + cursor; + cursor = (size_t)(dst - name_storage); + + mbytes.name = members[k].name; + mbytes.data = m.data; + mbytes.len = m.size; + fmt = cfree_detect_fmt(m.data, m.size); + if (fmt == CFREE_BIN_ELF || fmt == CFREE_BIN_COFF || + fmt == CFREE_BIN_MACHO || fmt == CFREE_BIN_WASM) { + uint8_t* sd = NULL; + size_t ss = 0; + if (strip_object_bytes(env, ctx, &mbytes, opts, &sd, &ss) != 0) { + cfree_ar_iter_free(it); + it = NULL; + goto done; + } + owned_data[k] = sd; + owned_size[k] = ss; + members[k].data = sd; + members[k].len = ss; + } else { + members[k].data = m.data; + members[k].len = m.size; + } + k++; + } + } + cfree_ar_iter_free(it); + it = NULL; + + /* Pass 3: rebuild the System-V symbol index from the new bytes. */ + if (nmembers) { + msyms = (CfreeArMemberSymbols*)driver_alloc_zeroed( + env, (size_t)nmembers * sizeof(*msyms)); + sym_allocs = (void**)driver_alloc_zeroed( + env, (size_t)nmembers * sizeof(*sym_allocs)); + sym_alloc_szs = (size_t*)driver_alloc_zeroed( + env, (size_t)nmembers * sizeof(*sym_alloc_szs)); + if (!msyms || !sym_allocs || !sym_alloc_szs) { + driver_errf(STRIP_TOOL, "out of memory"); + goto done; + } + for (k = 0; k < nmembers; ++k) { + void* blob = NULL; + size_t blob_size = 0; + const char** names = NULL; + uint32_t count = 0; + if (driver_collect_obj_global_syms(env, ctx, STRIP_TOOL, &members[k], + &blob, &blob_size, &names, + &count) != 0) { + goto done; + } + sym_allocs[k] = blob; + sym_alloc_szs[k] = blob_size; + msyms[k].names = names; + msyms[k].count = count; + } + } + + if (ctx->file_io->open_writer(ctx->file_io->user, output_path, &out) != + CFREE_OK) { + driver_errf(STRIP_TOOL, "failed to open: %s", output_path); + goto done; + } + opts_ar.epoch = strip_epoch_from_env(); + opts_ar.long_names = 1; + opts_ar.symbol_index = 1; + opts_ar.member_symbols = msyms; + rc = cfree_ar_write(out, members, nmembers, &opts_ar) == CFREE_OK ? 0 : 1; + if (rc == 0 && cfree_writer_status(out) != CFREE_OK) rc = 1; + +done: + if (out) cfree_writer_close(out); + if (it) cfree_ar_iter_free(it); + if (sym_allocs) { + for (k = 0; k < nmembers; ++k) { + if (sym_allocs[k]) + driver_collect_obj_global_syms_free(env, sym_allocs[k], + sym_alloc_szs[k]); + } + driver_free(env, sym_allocs, (size_t)nmembers * sizeof(*sym_allocs)); + } + if (sym_alloc_szs) + driver_free(env, sym_alloc_szs, + (size_t)nmembers * sizeof(*sym_alloc_szs)); + if (msyms) driver_free(env, msyms, (size_t)nmembers * sizeof(*msyms)); + if (owned_data) { + for (k = 0; k < nmembers; ++k) { + if (owned_data[k]) driver_free(env, owned_data[k], owned_size[k]); + } + driver_free(env, owned_data, (size_t)nmembers * sizeof(*owned_data)); + } + if (owned_size) + driver_free(env, owned_size, (size_t)nmembers * sizeof(*owned_size)); + if (members) driver_free(env, members, (size_t)nmembers * sizeof(*members)); + if (name_storage) driver_free(env, name_storage, name_bytes_total); + return rc; +} + +int driver_strip(int argc, char** argv) { + DriverEnv env; + CfreeContext ctx; + StripOpts opts; + CfreeFileData input_fd = {0}; + CfreeBytes input; + CfreeWriter* w = NULL; + uint8_t* out_data = NULL; + size_t out_size = 0; + int have_input = 0; + int rc = 1; + int i; + + if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) { + driver_help_strip(); + return 0; + } + + memset(&opts, 0, sizeof opts); + opts.op = STRIP_OP_ALL; + driver_env_init(&env); + ctx = driver_env_to_context(&env); + + for (i = 1; i < argc; ++i) { + const char* a = argv[i]; + const char* val = NULL; + int matched; + if (driver_streq(a, "--strip-debug")) { + opts.op = STRIP_OP_DEBUG; + continue; + } + if (driver_streq(a, "--strip-unneeded")) { + opts.op = STRIP_OP_UNNEEDED; + continue; + } + if (driver_streq(a, "--strip-all") || driver_streq(a, "-s")) { + opts.op = STRIP_OP_ALL; + continue; + } + if (driver_streq(a, "-o")) { + if (i + 1 >= argc) { + driver_errf(STRIP_TOOL, "-o requires a path"); + rc = 2; + goto done; + } + opts.output = argv[++i]; + continue; + } + matched = parse_name_arg(&i, argc, argv, "--keep-symbol", "-K", &val); + if (matched < 0) { + driver_errf(STRIP_TOOL, "%s requires a symbol name", a); + rc = 2; + goto done; + } + if (matched) { + if (push_name(&env, &opts.keep, &opts.nkeep, &opts.cap_keep, val) != 0) { + rc = 1; + goto done; + } + continue; + } + matched = parse_name_arg(&i, argc, argv, "--strip-symbol", "-N", &val); + if (matched < 0) { + driver_errf(STRIP_TOOL, "%s requires a symbol name", a); + rc = 2; + goto done; + } + if (matched) { + if (push_name(&env, &opts.strip, &opts.nstrip, &opts.cap_strip, val) != + 0) { + rc = 1; + goto done; + } + continue; + } + if (a[0] == '-' && a[1] != '\0') { + driver_errf(STRIP_TOOL, "unknown option: %s", a); + rc = 2; + goto done; + } + if (opts.input) { + driver_errf(STRIP_TOOL, "only one input file is supported"); + rc = 2; + goto done; + } + opts.input = a; + } + + if (!opts.input) { + driver_errf(STRIP_TOOL, "missing input file"); + rc = 2; + goto done; + } + + if (ctx.file_io->read_all(ctx.file_io->user, opts.input, &input_fd) != + CFREE_OK) { + driver_errf(STRIP_TOOL, "failed to read: %s", opts.input); + goto done; + } + have_input = 1; + input.name = opts.input; + input.data = input_fd.data; + input.len = input_fd.size; + + { + CfreeBinFmt fmt = cfree_detect_fmt(input.data, input.len); + const char* out_path = opts.output ? opts.output : opts.input; + if (fmt == CFREE_BIN_AR) { + rc = strip_archive(&env, &ctx, &input, &opts, out_path); + goto done; + } + if (strip_object_bytes(&env, &ctx, &input, &opts, &out_data, &out_size) != + 0) { + goto done; + } + if (ctx.file_io->open_writer(ctx.file_io->user, out_path, &w) != + CFREE_OK) { + driver_errf(STRIP_TOOL, "failed to open: %s", out_path); + goto done; + } + cfree_writer_write(w, out_data, out_size); + if (cfree_writer_status(w) != CFREE_OK) { + driver_errf(STRIP_TOOL, "write failed: %s", out_path); + goto done; + } + rc = 0; + } + +done: + if (w) cfree_writer_close(w); + if (out_data) driver_free(&env, out_data, out_size); + if (have_input) ctx.file_io->release(ctx.file_io->user, &input_fd); + if (opts.keep) + driver_free(&env, (void*)opts.keep, (size_t)opts.cap_keep * sizeof(*opts.keep)); + if (opts.strip) + driver_free(&env, (void*)opts.strip, + (size_t)opts.cap_strip * sizeof(*opts.strip)); + driver_env_fini(&env); + return rc; +} diff --git a/include/cfree/core.h b/include/cfree/core.h @@ -126,6 +126,13 @@ typedef enum CfreeSymKind { CFREE_SK_IFUNC, } CfreeSymKind; +typedef enum CfreeSymVis { + CFREE_SV_DEFAULT, + CFREE_SV_HIDDEN, + CFREE_SV_PROTECTED, + CFREE_SV_INTERNAL, +} CfreeSymVis; + typedef struct CfreePathPrefixMap { const char *old_prefix; const char *new_prefix; diff --git a/include/cfree/object.h b/include/cfree/object.h @@ -70,6 +70,9 @@ typedef struct CfreeObjSecInfo { typedef struct CfreeObjSymInfo { const char *name; + CfreeObjSymbol id; /* stable handle within this CfreeObjFile / builder; + usable as the target of cfree_obj_builder_remove_symbol, + rename_symbol, etc. */ CfreeSymBind bind; CfreeSymKind kind; CfreeObjSection section; @@ -119,6 +122,12 @@ typedef struct CfreeObjRelocDesc { CfreeStatus cfree_obj_builder_new(CfreeCompiler *, CfreeObjBuilder **out); void cfree_obj_builder_free(CfreeObjBuilder *); +/* Returns the CfreeCompiler this builder is bound to. Used by callers + * that hold a builder via cfree_obj_file_builder (the reader-side path) + * and need to intern strings into the matching Sym pool before issuing + * mutator calls. */ +CfreeCompiler *cfree_obj_builder_compiler(CfreeObjBuilder *); + CfreeStatus cfree_obj_builder_section(CfreeObjBuilder *, const CfreeObjSectionDesc *, CfreeObjSection *out); @@ -158,6 +167,44 @@ CfreeStatus cfree_obj_builder_group_add_section(CfreeObjBuilder *, CfreeStatus cfree_obj_builder_finalize(CfreeObjBuilder *); CfreeStatus cfree_obj_builder_emit(CfreeObjBuilder *, CfreeWriter *); +/* Emit using a caller-specified output format instead of the builder's + * own target.obj. Used by `objcopy -O <bfdname>` to convert between + * ELF / Mach-O / COFF / Wasm at the same arch. Returns + * CFREE_UNSUPPORTED when the active arch doesn't have a backend for + * the requested format (e.g. RISC-V → Mach-O). */ +CfreeStatus cfree_obj_builder_emit_as(CfreeObjBuilder *, CfreeObjFmt, + CfreeWriter *); + +/* ============================================================ + * Mutators (strip / objcopy support) + * ============================================================ + * + * Mark a section/symbol/group as removed, rename it, change a symbol's + * bind or visibility, or replace a section's bytes wholesale. Removed + * entries (and any cascade fallout) are filtered at emit time by the + * internal sweep; ids stay stable across mutations. + * + * Pre-existing readers obtained via cfree_obj_file_builder remain valid + * mutation targets — the reader produces an already-finalized builder + * and mutators are legal on it. + */ +CfreeStatus cfree_obj_builder_remove_section(CfreeObjBuilder *, + CfreeObjSection); +CfreeStatus cfree_obj_builder_remove_symbol(CfreeObjBuilder *, CfreeObjSymbol); +CfreeStatus cfree_obj_builder_remove_group(CfreeObjBuilder *, CfreeObjGroup); +CfreeStatus cfree_obj_builder_rename_section(CfreeObjBuilder *, + CfreeObjSection, CfreeSym new_name); +CfreeStatus cfree_obj_builder_rename_symbol(CfreeObjBuilder *, CfreeObjSymbol, + CfreeSym new_name); +CfreeStatus cfree_obj_builder_symbol_set_bind(CfreeObjBuilder *, + CfreeObjSymbol, CfreeSymBind); +CfreeStatus cfree_obj_builder_symbol_set_vis(CfreeObjBuilder *, CfreeObjSymbol, + CfreeSymVis); +CfreeStatus cfree_obj_builder_section_replace_bytes(CfreeObjBuilder *, + CfreeObjSection, + const void *data, + size_t n); + /* ============================================================ * Reader / inspection * ============================================================ */ @@ -174,6 +221,17 @@ typedef enum CfreeBinFmt { typedef struct CfreeObjSymIter CfreeObjSymIter; typedef struct CfreeObjRelocIter CfreeObjRelocIter; +typedef struct CfreeObjGroupIter CfreeObjGroupIter; + +typedef struct CfreeObjGroupInfo { + const char *name; + CfreeObjSymbol signature; /* COMDAT key, or CFREE_OBJ_SYMBOL_NONE */ + uint32_t flags; /* CfreeObjGroupFlag */ + uint32_t nsections; + /* Borrowed; valid until the next groupiter_next call or _free. Members + * pointing at the OBJ_SEC_NONE sentinel are reported as CFREE_SECTION_NONE. */ + const CfreeObjSection *sections; +} CfreeObjGroupInfo; CfreeBinFmt cfree_detect_fmt(const uint8_t *data, size_t len); CfreeStatus cfree_detect_target(const uint8_t *data, size_t len, @@ -207,6 +265,13 @@ CfreeIterResult cfree_obj_reliter_next(CfreeObjRelocIter *, CfreeObjReloc *out); void cfree_obj_reliter_free(CfreeObjRelocIter *); +/* Section-group iteration (ELF SHT_GROUP / COMDAT and friends). Empty + * for formats / objects that carry no groups. */ +CfreeStatus cfree_obj_groupiter_new(CfreeObjFile *, CfreeObjGroupIter **out); +CfreeIterResult cfree_obj_groupiter_next(CfreeObjGroupIter *, + CfreeObjGroupInfo *out); +void cfree_obj_groupiter_free(CfreeObjGroupIter *); + /* Roundtrip: open an object via cfree_obj_open, then hand its underlying * builder back. The builder is the same one the reader populated; it is * already finalized, so callers may inspect it (e.g. iterate sections via @@ -214,11 +279,10 @@ void cfree_obj_reliter_free(CfreeObjRelocIter *); * cfree_obj_builder_emit to re-serialize the file. The builder lifetime is * tied to the CfreeObjFile; do not call cfree_obj_builder_free on it. * - * Mutation after open (add section, redefine symbol, etc.) is not currently - * supported — the read path closes the builder via obj_finalize, and the - * builder API rejects post-finalize writes. Filtered roundtrip (strip / - * objcopy --remove-section / --redefine-sym) needs a separate mutator - * surface that does not yet exist. */ + * Mutation is supported via the cfree_obj_builder_remove_* / rename_* / + * symbol_set_* / section_replace_bytes calls above. Drops and renames + * are cheap field writes; emit applies the cascade (drop relocs against + * removed sections, etc.) automatically. */ CfreeObjBuilder *cfree_obj_file_builder(const CfreeObjFile *); #endif diff --git a/src/api/object_builder.c b/src/api/object_builder.c @@ -50,6 +50,10 @@ void cfree_obj_builder_free(CfreeObjBuilder* b) { if (b) obj_free(b); } +CfreeCompiler* cfree_obj_builder_compiler(CfreeObjBuilder* b) { + return b ? (CfreeCompiler*)obj_compiler(b) : NULL; +} + CfreeStatus cfree_obj_builder_section(CfreeObjBuilder* b, const CfreeObjSectionDesc* desc, CfreeObjSection* out) { @@ -189,12 +193,85 @@ CfreeStatus cfree_obj_builder_finalize(CfreeObjBuilder* b) { return CFREE_OK; } +/* ---- mutators ---- */ + +CfreeStatus cfree_obj_builder_remove_section(CfreeObjBuilder* b, + CfreeObjSection sec) { + if (!b) return CFREE_INVALID; + obj_section_remove(b, pub_to_intern_sec(sec)); + return CFREE_OK; +} + +CfreeStatus cfree_obj_builder_remove_symbol(CfreeObjBuilder* b, + CfreeObjSymbol sym) { + if (!b) return CFREE_INVALID; + obj_symbol_remove(b, pub_to_intern_sym(sym)); + return CFREE_OK; +} + +CfreeStatus cfree_obj_builder_remove_group(CfreeObjBuilder* b, + CfreeObjGroup grp) { + if (!b) return CFREE_INVALID; + obj_group_remove(b, pub_to_intern_group(grp)); + return CFREE_OK; +} + +CfreeStatus cfree_obj_builder_rename_section(CfreeObjBuilder* b, + CfreeObjSection sec, + CfreeSym new_name) { + if (!b) return CFREE_INVALID; + obj_section_rename(b, pub_to_intern_sec(sec), (Sym)new_name); + return CFREE_OK; +} + +CfreeStatus cfree_obj_builder_rename_symbol(CfreeObjBuilder* b, + CfreeObjSymbol sym, + CfreeSym new_name) { + if (!b) return CFREE_INVALID; + obj_symbol_rename(b, pub_to_intern_sym(sym), (Sym)new_name); + return CFREE_OK; +} + +CfreeStatus cfree_obj_builder_symbol_set_bind(CfreeObjBuilder* b, + CfreeObjSymbol sym, + CfreeSymBind bind) { + if (!b) return CFREE_INVALID; + obj_symbol_set_bind(b, pub_to_intern_sym(sym), (SymBind)bind); + return CFREE_OK; +} + +CfreeStatus cfree_obj_builder_symbol_set_vis(CfreeObjBuilder* b, + CfreeObjSymbol sym, + CfreeSymVis vis) { + if (!b) return CFREE_INVALID; + obj_symbol_set_vis(b, pub_to_intern_sym(sym), (SymVis)vis); + return CFREE_OK; +} + +CfreeStatus cfree_obj_builder_section_replace_bytes(CfreeObjBuilder* b, + CfreeObjSection sec, + const void* data, + size_t n) { + if (!b) return CFREE_INVALID; + obj_section_replace_bytes(b, pub_to_intern_sec(sec), (const u8*)data, n); + return CFREE_OK; +} + CfreeStatus cfree_obj_builder_emit(CfreeObjBuilder* b, CfreeWriter* w) { Compiler* c; if (!b || !w) return CFREE_INVALID; c = obj_compiler(b); if (!c) return CFREE_INVALID; - switch (c->target.obj) { + return cfree_obj_builder_emit_as(b, c->target.obj, w); +} + +CfreeStatus cfree_obj_builder_emit_as(CfreeObjBuilder* b, CfreeObjFmt fmt, + CfreeWriter* w) { + Compiler* c; + if (!b || !w) return CFREE_INVALID; + c = obj_compiler(b); + if (!c) return CFREE_INVALID; + switch (fmt) { case CFREE_OBJ_ELF: emit_elf(c, b, w); break; diff --git a/src/api/object_file.c b/src/api/object_file.c @@ -197,9 +197,10 @@ CfreeStatus cfree_obj_section_by_name(const CfreeObjFile* f, const char* name, return CFREE_NOT_FOUND; } -static void fill_syminfo(const CfreeObjFile* f, const ObjSym* sym, +static void fill_syminfo(const CfreeObjFile* f, ObjSymId id, const ObjSym* sym, CfreeObjSymInfo* out) { out->name = sym->name ? pool_str(f->compiler.global, sym->name, NULL) : ""; + out->id = (id != OBJ_SYM_NONE) ? (CfreeObjSymbol)id : CFREE_OBJ_SYMBOL_NONE; out->bind = (CfreeSymBind)sym->bind; out->kind = (CfreeSymKind)sym->kind; out->section = sym->section_id != OBJ_SEC_NONE @@ -221,7 +222,7 @@ CfreeStatus cfree_obj_symbol_by_name(const CfreeObjFile* f, const char* name, if (!e.sym || !e.sym->name) continue; nm = pool_str(f->compiler.global, e.sym->name, NULL); if (nm && strcmp(nm, name) == 0) { - fill_syminfo(f, e.sym, out); + fill_syminfo(f, e.id, e.sym, out); obj_symiter_free(it); return CFREE_OK; } @@ -257,7 +258,7 @@ CfreeIterResult cfree_obj_symiter_next(CfreeObjSymIter* it, ObjSymEntry entry; if (!it || !out) return CFREE_ITER_ERROR; if (!obj_symiter_next(it->inner, &entry)) return CFREE_ITER_END; - fill_syminfo(it->file, entry.sym, out); + fill_syminfo(it->file, entry.id, entry.sym, out); return CFREE_ITER_ITEM; } @@ -327,6 +328,77 @@ void cfree_obj_reliter_free(CfreeObjRelocIter* it) { h->free(h, it, sizeof(*it)); } +struct CfreeObjGroupIter { + CfreeObjFile* file; + ObjGroupIter* inner; + /* Translation scratch for the borrowed `sections` slice handed back to + * the caller. Lazily grown to the largest group's nsections. */ + CfreeObjSection* secbuf; + u32 seccap; +}; + +CfreeStatus cfree_obj_groupiter_new(CfreeObjFile* f, CfreeObjGroupIter** out) { + Heap* h; + CfreeObjGroupIter* it; + if (!f || !out) return CFREE_INVALID; + h = f->ctx->heap; + it = (CfreeObjGroupIter*)h->alloc(h, sizeof(*it), + _Alignof(CfreeObjGroupIter)); + if (!it) return CFREE_NOMEM; + memset(it, 0, sizeof(*it)); + it->file = f; + it->inner = obj_groupiter_new(f->ob); + if (!it->inner) { + h->free(h, it, sizeof(*it)); + return CFREE_NOMEM; + } + *out = it; + return CFREE_OK; +} + +CfreeIterResult cfree_obj_groupiter_next(CfreeObjGroupIter* it, + CfreeObjGroupInfo* out) { + ObjGroupEntry entry; + Heap* h; + u32 i; + if (!it || !out) return CFREE_ITER_ERROR; + if (!obj_groupiter_next(it->inner, &entry)) return CFREE_ITER_END; + h = it->file->ctx->heap; + if (entry.group->nsections > it->seccap) { + CfreeObjSection* nb; + nb = (CfreeObjSection*)h->alloc( + h, sizeof(*nb) * entry.group->nsections, _Alignof(CfreeObjSection)); + if (!nb) return CFREE_ITER_ERROR; + if (it->secbuf) h->free(h, it->secbuf, sizeof(*it->secbuf) * it->seccap); + it->secbuf = nb; + it->seccap = entry.group->nsections; + } + for (i = 0; i < entry.group->nsections; ++i) { + ObjSecId sid = entry.group->sections[i]; + it->secbuf[i] = (sid != OBJ_SEC_NONE) ? (CfreeObjSection)(sid - 1) + : CFREE_SECTION_NONE; + } + out->name = entry.group->name + ? pool_str(it->file->compiler.global, entry.group->name, NULL) + : ""; + out->signature = (entry.group->signature != OBJ_SYM_NONE) + ? (CfreeObjSymbol)entry.group->signature + : CFREE_OBJ_SYMBOL_NONE; + out->flags = entry.group->flags; + out->nsections = entry.group->nsections; + out->sections = it->secbuf; + return CFREE_ITER_ITEM; +} + +void cfree_obj_groupiter_free(CfreeObjGroupIter* it) { + Heap* h; + if (!it) return; + h = it->file->ctx->heap; + if (it->secbuf) h->free(h, it->secbuf, sizeof(*it->secbuf) * it->seccap); + obj_groupiter_free(it->inner); + h->free(h, it, sizeof(*it)); +} + /* Accessor for disasm/jit to access the underlying ObjBuilder when both * are inside libcfree. Internal name kept stable for existing callers * (src/link/link_jit.c, src/api/disasm.c). */ diff --git a/src/obj/elf_emit.c b/src/obj/elf_emit.c @@ -245,6 +245,13 @@ static u32 strtab_add(Buf* b, const char* s, u32 len) { void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { Heap* h = (Heap*)c->ctx->heap; + /* Run the tombstone sweep before any iteration: cascades removed + * sections into their defining symbols, drops dangling relocs, + * compacts groups, and absorbs the historical UNDEF prune. After this + * call every direct ID-based access below must skip entries whose + * `removed` bit is set. */ + obj_sweep_dead(ob); + /* ---- target validation ------------------------------------------ */ const ArchImpl* arch = arch_for_compiler(c); const ArchElfOps* elf = arch ? arch->elf : NULL; @@ -288,6 +295,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { for (u32 i = 1; i < nobjsec; ++i) { const Section* s = obj_section_get(ob, i); + if (s->removed) continue; /* tombstone — see obj_sweep_dead */ ElfSec* es = &secs[nsecs]; memset(es, 0, sizeof *es); u32 nlen; @@ -378,14 +386,9 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { ObjSymEntry e; while (obj_symiter_next(it, &e)) { const ObjSym* s = e.sym; + if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */ int is_local = (s->bind == SB_LOCAL); if ((pass == 0) != is_local) continue; - /* Prune unreferenced UNDEF externals — they came from header - * `extern` decls the TU never touched. See ObjSym::referenced. */ - if (s->kind == SK_UNDEF && !s->referenced && - (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) { - continue; - } u32 nlen; const char* nm = sym_to_str(c, s->name, &nlen); u32 nameoff = nlen ? strtab_add(&strtab, nm, nlen) : 0; @@ -409,6 +412,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { ObjSymIter* it = obj_symiter_new(ob); ObjSymEntry e; while (obj_symiter_next(it, &e)) { + if (e.sym->removed) continue; if (e.sym->bind == SB_LOCAL) ++nlocals; } obj_symiter_free(it); @@ -430,7 +434,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { if (group_elf_idx) memset(group_elf_idx, 0, sizeof(u32) * nobjgrp); for (u32 gi = 1; gi < nobjgrp; ++gi) { const ObjGroup* g = obj_group_get(ob, gi); - if (!g) continue; + if (!g || g->removed) continue; u32 body_size = 4u + 4u * g->nsections; u8* body = (u8*)arena_alloc(c->scratch, body_size, _Alignof(u32)); @@ -489,6 +493,8 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { u32 nrela_plans = 0; for (u32 si = 1; si < nobjsec; ++si) { + const Section* host = obj_section_get(ob, si); + if (!host || host->removed) continue; u32 nr = obj_reloc_count(ob, si); if (!nr) continue; u8* buf = (u8*)arena_alloc(c->scratch, (size_t)ELF64_RELA_SIZE * nr, @@ -496,6 +502,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { u32 j = 0; for (u32 i = 0; i < total_relocs; ++i) { const Reloc* r = obj_reloc_at(ob, i); + if (r->removed) continue; if (r->section_id != si) continue; u32 etype = reloc_to(r->kind); if (etype == ELF_R_AARCH64_NONE /* == ELF_R_X86_64_NONE == 0 */ && @@ -668,6 +675,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { ObjSymEntry e; u32 nsec = obj_section_count(ob), si; while (obj_symiter_next(it, &e)) { + if (e.sym->removed) continue; if (e.sym->kind == SK_IFUNC) { ident[EI_OSABI] = ELFOSABI_GNU; break; @@ -677,7 +685,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { if (ident[EI_OSABI] != ELFOSABI_GNU) { for (si = 1; si < nsec; ++si) { const Section* sec = obj_section_get(ob, si); - if (sec && (sec->flags & SF_RETAIN)) { + if (sec && !sec->removed && (sec->flags & SF_RETAIN)) { ident[EI_OSABI] = ELFOSABI_GNU; break; } diff --git a/src/obj/macho_emit.c b/src/obj/macho_emit.c @@ -218,6 +218,11 @@ static int sym_is_extdef(const ObjSym* s) { void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) { Heap* h = (Heap*)c->ctx->heap; + /* Tombstone sweep first — strip/objcopy mutations and the historical + * UNDEF prune are both expressed via Section.removed / ObjSym.removed + * post-sweep. See obj_sweep_dead. */ + obj_sweep_dead(ob); + /* ---- target validation ---------------------------------------- */ const ArchImpl* arch = arch_for_compiler(c); const ArchMachoOps* macho = arch ? arch->macho : NULL; @@ -251,6 +256,7 @@ void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) { for (u32 i = 1; i < nobjsec; ++i) { const Section* s = obj_section_get(ob, i); + if (s->removed) continue; /* see obj_sweep_dead */ /* Skip ELF-style synthetic sections that read_elf would have * filtered: SYMTAB / STRTAB / RELA / GROUP have no Mach-O * representation as data sections. */ @@ -353,21 +359,13 @@ void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) { ObjSymEntry e; while (obj_symiter_next(it, &e)) { const ObjSym* s = e.sym; + if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */ int undef = sym_is_undef(s); int extdef = sym_is_extdef(s); int local = !undef && !extdef; int want = (pass == 0 && local) || (pass == 1 && extdef) || (pass == 2 && undef); if (!want) continue; - /* Prune unreferenced UNDEF externals: the C frontend mints an - * ObjSym for every header-supplied `extern` declaration whether - * or not the TU references it. obj_reloc_ex flags the ones we - * actually depend on; the rest never reach the output symtab. */ - if (undef && !s->referenced && - (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) { - continue; - } - MSym* ms = &msyms[nmsyms]; ms->obj_id = e.id; @@ -475,11 +473,8 @@ void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) { ObjSymEntry e; while (obj_symiter_next(it, &e)) { const ObjSym* s = e.sym; + if (s->removed) continue; int undef = sym_is_undef(s); - if (undef && !s->referenced && - (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) { - continue; - } if (undef) ++nundefs; else if (sym_is_extdef(s)) @@ -503,6 +498,7 @@ void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) { u32 j = 0; for (u32 ri = 0; ri < total_relocs; ++ri) { const Reloc* r = obj_reloc_at(ob, ri); + if (r->removed) continue; if (r->section_id != m->obj_sec) continue; if ((r->kind == R_RV_ADD8 || r->kind == R_RV_ADD16 || r->kind == R_RV_ADD32 || r->kind == R_RV_ADD64) && diff --git a/src/obj/obj.c b/src/obj/obj.c @@ -393,6 +393,171 @@ void obj_finalize(ObjBuilder* ob) { (void)ob; } +/* ---- mutators (strip / objcopy support) ---- */ + +void obj_section_remove(ObjBuilder* ob, ObjSecId id) { + Section* s; + if (!ob || id == OBJ_SEC_NONE) return; + s = Sections_at(&ob->sections, id); + if (!s) return; + s->removed = 1; +} + +void obj_symbol_remove(ObjBuilder* ob, ObjSymId id) { + ObjSym* s; + if (!ob || id == OBJ_SYM_NONE) return; + s = Symbols_at(&ob->symbols, id); + if (!s) return; + s->removed = 1; +} + +void obj_group_remove(ObjBuilder* ob, ObjGroupId id) { + ObjGroup* g; + if (!ob || id == OBJ_GROUP_NONE) return; + g = Groups_at(&ob->groups, id); + if (!g) return; + g->removed = 1; +} + +void obj_section_rename(ObjBuilder* ob, ObjSecId id, Sym new_name) { + Section* s; + if (!ob || id == OBJ_SEC_NONE) return; + s = Sections_at(&ob->sections, id); + if (!s) return; + s->name = new_name; +} + +void obj_symbol_rename(ObjBuilder* ob, ObjSymId id, Sym new_name) { + ObjSym* s; + if (!ob || id == OBJ_SYM_NONE) return; + s = Symbols_at(&ob->symbols, id); + if (!s) return; + s->name = new_name; +} + +void obj_symbol_set_bind(ObjBuilder* ob, ObjSymId id, SymBind bind) { + ObjSym* s; + if (!ob || id == OBJ_SYM_NONE) return; + s = Symbols_at(&ob->symbols, id); + if (!s) return; + s->bind = (u16)bind; +} + +void obj_symbol_set_vis(ObjBuilder* ob, ObjSymId id, SymVis vis) { + ObjSym* s; + if (!ob || id == OBJ_SYM_NONE) return; + s = Symbols_at(&ob->symbols, id); + if (!s) return; + s->vis = (u8)vis; +} + +void obj_section_replace_bytes(ObjBuilder* ob, ObjSecId id, const u8* data, + size_t n) { + Section* s; + if (!ob || id == OBJ_SEC_NONE) return; + s = Sections_at(&ob->sections, id); + if (!s) return; + /* Drop the old chunked Buf and reinitialize empty, then write the new + * bytes. Cheaper than scanning + patching when the replacement is + * different-sized — which it usually is (objcopy --update-section). */ + buf_fini(&s->bytes); + buf_init(&s->bytes, ob->heap); + s->bss_size = 0; + if (data && n) buf_write(&s->bytes, data, n); +} + +void obj_sweep_dead(ObjBuilder* ob) { + u32 nsec = Sections_count(&ob->sections); + u32 nsym = Symbols_count(&ob->symbols); + u32 nrel = Relocs_count(&ob->relocs); + u32 ngrp = Groups_count(&ob->groups); + u32 i; + + /* Pass 1: cascade removed sections into their defining symbols. Also + * absorbs the historical UNDEF-prune predicate: any non-referenced + * global/weak symbol that lacks a defining section (and isn't an ABS + * or COMMON definition, both of which legitimately have section_id == + * OBJ_SEC_NONE) is a spurious extern from a header — drop it. + * + * The "no defining section" test matches macho_emit's sym_is_undef, + * which is stronger than `kind == SK_UNDEF`: frontends mint SK_OBJ / + * SK_TLS / SK_FUNC entries for extern decls and only set them to + * SK_UNDEF for true references, so checking section_id catches both. */ + for (i = 1; i < nsym; ++i) { + ObjSym* s = Symbols_at(&ob->symbols, i); + if (!s || s->removed) continue; + if (s->section_id != OBJ_SEC_NONE) { + const Section* sec = Sections_at(&ob->sections, s->section_id); + if (sec && sec->removed) { + s->removed = 1; + continue; + } + } + if (s->section_id == OBJ_SEC_NONE && s->kind != SK_ABS && + s->kind != SK_COMMON && !s->referenced && + (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) { + s->removed = 1; + } + } + + /* Pass 2: drop relocs that became dangling. A reloc is dead if its + * containing section, its target symbol, or the symbol's defining + * section is gone. */ + for (i = 0; i < nrel; ++i) { + Reloc* r = Relocs_at(&ob->relocs, i); + if (!r || r->removed) continue; + if (r->section_id != OBJ_SEC_NONE) { + const Section* sec = Sections_at(&ob->sections, r->section_id); + if (!sec || sec->removed) { + r->removed = 1; + continue; + } + } + if (r->sym != OBJ_SYM_NONE) { + const ObjSym* ts = Symbols_at(&ob->symbols, r->sym); + if (!ts || ts->removed) r->removed = 1; + } + } + + /* Pass 3: compact each group's member list to drop removed sections; + * tombstone the group if its list empties out or its signature symbol + * is removed. Member list is rewritten in place — the storage stays + * the same size, the trailing slots just become unused. */ + for (i = 1; i < ngrp; ++i) { + ObjGroup* g = Groups_at(&ob->groups, i); + u32 w, r; + if (!g || g->removed) continue; + if (g->signature != OBJ_SYM_NONE) { + const ObjSym* sig = Symbols_at(&ob->symbols, g->signature); + if (!sig || sig->removed) { + g->removed = 1; + continue; + } + } + w = 0; + for (r = 0; r < g->nsections; ++r) { + ObjSecId sid = g->sections[r]; + const Section* sec = + (sid != OBJ_SEC_NONE) ? Sections_at(&ob->sections, sid) : NULL; + if (sec && !sec->removed) g->sections[w++] = sid; + } + g->nsections = w; + if (w == 0) g->removed = 1; + } + + /* Pass 4: clear Section.link if it now points at a removed section. + * (Section.info is type-dependent — leave it to the emitter, which + * already inspects the sem to interpret it.) */ + for (i = 1; i < nsec; ++i) { + Section* s = Sections_at(&ob->sections, i); + if (!s || s->removed) continue; + if (s->link != OBJ_SEC_NONE) { + const Section* lk = Sections_at(&ob->sections, s->link); + if (!lk || lk->removed) s->link = OBJ_SEC_NONE; + } + } +} + /* ---- read side ---- */ u32 obj_section_count(const ObjBuilder* ob) { @@ -408,6 +573,7 @@ u32 obj_reloc_count(const ObjBuilder* ob, ObjSecId id) { u32 i, total = Relocs_count(&ob->relocs), n = 0; for (i = 0; i < total; ++i) { const Reloc* r = Relocs_at(&ob->relocs, i); + if (r->removed) continue; if (r->section_id == id) ++n; } return n; @@ -455,3 +621,33 @@ void obj_symiter_free(ObjSymIter* it) { if (!it) return; ((Heap*)it->ob->heap)->free((Heap*)it->ob->heap, it, sizeof(*it)); } + +struct ObjGroupIter { + const ObjBuilder* ob; + u32 idx; /* next index to return */ +}; + +ObjGroupIter* obj_groupiter_new(const ObjBuilder* ob) { + ObjGroupIter* it = (ObjGroupIter*)ob->heap->alloc(ob->heap, sizeof(*it), + _Alignof(ObjGroupIter)); + if (!it) return NULL; + it->ob = ob; + it->idx = 1; /* skip the id-0 sentinel */ + return it; +} + +int obj_groupiter_next(ObjGroupIter* it, ObjGroupEntry* out) { + const ObjGroup* g; + if (!it) return 0; + g = Groups_at(&it->ob->groups, it->idx); + if (!g) return 0; + out->id = it->idx; + out->group = g; + it->idx++; + return 1; +} + +void obj_groupiter_free(ObjGroupIter* it) { + if (!it) return; + ((Heap*)it->ob->heap)->free((Heap*)it->ob->heap, it, sizeof(*it)); +} diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -245,6 +245,11 @@ typedef struct Section { u32 ext_type; u32 ext_flags; /* same idea for format-specific sh_flags bits not represented in SecFlag (e.g. SHF_EXCLUDE) */ + /* Tombstone for strip/objcopy-style mutations. Set by + * obj_section_remove; honored by obj_sweep_dead and the emitters. + * Iterators / direct ID-based access on the builder must consult this + * bit and skip removed entries. */ + u8 removed; Buf bytes; } Section; @@ -254,6 +259,10 @@ typedef struct Reloc { u16 kind; u8 has_explicit_addend; u8 pair; /* paired/following relocation, format-specific */ + /* Tombstone set by obj_sweep_dead when the reloc points at a removed + * section or symbol. Lives in the slack between `pair` and `sym` — no + * struct-size change. */ + u8 removed; ObjSymId sym; i64 addend; } Reloc; @@ -284,6 +293,12 @@ typedef struct ObjSym { * mark every read-in symbol referenced=1 so a roundtrip preserves * UNDEFs that came from another tool's output. */ u8 referenced; + /* Tombstone for strip/objcopy. Set by obj_symbol_remove or cascaded + * by obj_sweep_dead when this symbol is defined in a removed section. + * The UNDEF-prune predicate (was: !referenced && SK_UNDEF && global/weak) + * is also folded into the sweep, so emit-time symbol loops only need to + * check `removed`. */ + u8 removed; } ObjSym; typedef struct ObjGroup { @@ -292,6 +307,10 @@ typedef struct ObjGroup { ObjSecId* sections; u32 nsections; u32 flags; + /* Tombstone — set by obj_group_remove, or cascaded by obj_sweep_dead + * when every member section has been removed (or the signature symbol + * has been removed). */ + u8 removed; } ObjGroup; /* The single concrete in-memory object representation. @@ -379,6 +398,52 @@ void obj_group_add_section(ObjBuilder*, ObjGroupId group_id, void obj_finalize(ObjBuilder*); +/* ---- post-finalize mutators (strip / objcopy support) ---- + * + * Mutators flip per-entry fields and / or `removed` tombstones. Cascading + * cleanup (drop relocs against removed sections, etc.) is deferred to + * obj_sweep_dead, which the emitters call automatically. Mutators are + * cheap individual field writes; they do not re-index or compact storage, + * so ObjSecId / ObjSymId / ObjGroupId remain stable. + * + * No-ops when given OBJ_SEC_NONE / OBJ_SYM_NONE / OBJ_GROUP_NONE, and + * silently ignore ids that are out of range or already removed (the + * driver tools call these in bulk and benefit from idempotency). */ +void obj_section_remove(ObjBuilder*, ObjSecId); +void obj_symbol_remove(ObjBuilder*, ObjSymId); +void obj_group_remove(ObjBuilder*, ObjGroupId); +void obj_section_rename(ObjBuilder*, ObjSecId, Sym new_name); +void obj_symbol_rename(ObjBuilder*, ObjSymId, Sym new_name); +void obj_symbol_set_bind(ObjBuilder*, ObjSymId, SymBind); +void obj_symbol_set_vis(ObjBuilder*, ObjSymId, SymVis); +/* Replace `section_id`'s contents wholesale with `n` bytes from `data`. + * Resets bss_size (so a former NOBITS section gains real bytes) and + * preserves the section's other attributes (name, kind, flags, align). + * Existing relocations against the section are kept — caller is + * responsible for issuing obj_symbol_remove on any defined symbols whose + * (value, size) no longer fits, etc. */ +void obj_section_replace_bytes(ObjBuilder*, ObjSecId, const u8* data, size_t n); + +/* Tombstone-driven consistency sweep. Called by each file-format emitter + * at the top of emit; consumers that walk a builder by raw section/symbol/ + * reloc/group ID after sweep must respect the `removed` bit on each entry. + * + * Does the following passes: + * 1. Cascade: any symbol defined in a removed section becomes removed. + * 2. UNDEF prune: any non-referenced SK_UNDEF global/weak becomes removed + * (folds the historical "spurious extern from a header" filter). + * 3. Reloc cleanup: any reloc whose containing section, defining section, + * or target symbol is removed becomes removed. + * 4. Group compaction: each group's section list is filtered in place to + * drop removed members; a group whose list empties out (or whose + * signature symbol has been removed) is itself marked removed. + * 5. Section link cleanup: Section.link cleared if it points at a + * removed section. + * + * Idempotent — safe to call multiple times. On a never-mutated builder + * only pass 2 has any effect. */ +void obj_sweep_dead(ObjBuilder*); + /* Format-specific ELF e_flags (per-arch ABI bits, e.g. EF_RISCV_RVC | * EF_RISCV_FLOAT_ABI_DOUBLE on RV64). Set by read_elf during input * parsing; consumed by emit_elf for round-trip. The setter records @@ -408,7 +473,13 @@ const ObjGroup* obj_group_get(const ObjBuilder*, ObjGroupId id); /* Symbol iteration: ObjSymId is scoped to this builder, but callers should not * assume dense contiguous ids or direct indexing. The builder may store symbols - * in segments internally; use the cursor. */ + * in segments internally; use the cursor. + * + * The iterator is raw — it visits every symbol slot including those whose + * `removed` tombstone is set. Callers that want post-sweep semantics must + * check ObjSym::removed themselves. (Consistent with Section.removed and + * Reloc.removed: tombstones live as a per-entry field, not behind the + * iterator.) */ typedef struct ObjSymIter ObjSymIter; typedef struct ObjSymEntry { ObjSymId id; @@ -418,6 +489,19 @@ ObjSymIter* obj_symiter_new(const ObjBuilder*); int obj_symiter_next(ObjSymIter*, ObjSymEntry* out); /* returns 0 at end */ void obj_symiter_free(ObjSymIter*); +/* Group iteration: peer of obj_symiter for groups (COMDAT and friends). + * Same segmented-storage caveat — use the cursor, don't index directly. + * Like obj_symiter, this is raw: tombstoned groups are still returned; + * callers consult ObjGroup::removed. */ +typedef struct ObjGroupIter ObjGroupIter; +typedef struct ObjGroupEntry { + ObjGroupId id; + const ObjGroup* group; +} ObjGroupEntry; +ObjGroupIter* obj_groupiter_new(const ObjBuilder*); +int obj_groupiter_next(ObjGroupIter*, ObjGroupEntry* out); /* 0 at end */ +void obj_groupiter_free(ObjGroupIter*); + /* Writer is the public CfreeWriter type aliased to Writer inside libcfree * (see src/core/core.h). The streaming API lives in <cfree/core.h> as * cfree_writer_*. */ diff --git a/test/elf/unit/groupiter.c b/test/elf/unit/groupiter.c @@ -0,0 +1,217 @@ +/* Hand-built ObjBuilder with a COMDAT group, exercised via both the + * internal obj_groupiter and the public cfree_obj_groupiter. + * + * Steps: + * 1. Build an ELF with two sections wired into one COMDAT group. + * 2. Read internal iter on the freshly built builder; verify shape. + * 3. Emit ELF, reopen via cfree_obj_open, walk the public iter; verify + * group survives the on-disk roundtrip and section IDs are + * remapped to the public 0-based space. */ + +#include <cfree/core.h> +#include <cfree/object.h> +#include <setjmp.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "core/core.h" +#include "core/pool.h" +#include "lib/cfree_test_target.h" +#include "obj/obj.h" + +static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) { + (void)h; + (void)a; + return n ? malloc(n) : NULL; +} +static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) { + (void)h; + (void)o; + (void)a; + return realloc(p, n); +} +static void heap_free(CfreeHeap* h, void* p, size_t n) { + (void)h; + (void)n; + free(p); +} +static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL}; + +static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, + const char* fmt, va_list ap) { + static const char* names[] = {"note", "warning", "error", "fatal"}; + (void)s; + (void)loc; + fprintf(stderr, "%s: ", names[k]); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); +} +static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; + +static int g_failures; +#define CHECK(cond, ...) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fputc('\n', stderr); \ + g_failures++; \ + } \ + } while (0) + +/* mov w0, #1 ; ret */ +static const uint8_t TEXT_BYTES[8] = { + 0x20, 0x00, 0x80, 0x52, 0xc0, 0x03, 0x5f, 0xd6, +}; + +static int name_eq(const char* s, const char* want) { + return s && strcmp(s, want) == 0; +} + +int main(void) { + CfreeTarget target; + if (cfree_test_target_init(&target) != 0) { + fprintf(stderr, "FAIL: cfree_test_target_init\n"); + return 1; + } + + CfreeContext ctx = {.heap = &g_heap, + .file_io = NULL, + .diag = &g_diag, + .metrics = NULL, + .now = -1}; + CfreeCompiler* cc = NULL; + if (cfree_compiler_new(target, &ctx, &cc) != CFREE_OK || !cc) { + fprintf(stderr, "FAIL: cfree_compiler_new\n"); + return 1; + } + Compiler* c = (Compiler*)cc; + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free(cc); + fprintf(stderr, "FAIL: compiler_panic\n"); + return 1; + } + + /* ---- build ---- */ + ObjBuilder* in = obj_new(c); + Pool* p = c->global; + + Sym sig_nm = pool_intern_cstr(p, "comdat_sig"); + Sym text_nm = pool_intern_cstr(p, ".text.comdat_fn"); + Sym data_nm = pool_intern_cstr(p, ".data.comdat_fn"); + + ObjSecId sec_text = obj_section(in, text_nm, SEC_TEXT, + SF_ALLOC | SF_EXEC | SF_GROUP, 4); + ObjSecId sec_data = obj_section(in, data_nm, SEC_DATA, + SF_ALLOC | SF_WRITE | SF_GROUP, 8); + obj_write(in, sec_text, TEXT_BYTES, sizeof TEXT_BYTES); + static const uint8_t zero8[8] = {0}; + obj_write(in, sec_data, zero8, sizeof zero8); + + ObjSymId sig_sym = + obj_symbol(in, sig_nm, SB_WEAK, SK_FUNC, sec_text, 0, sizeof TEXT_BYTES); + + ObjGroupId gid = obj_group(in, sig_nm, sig_sym, CFREE_OBJ_GROUP_COMDAT); + obj_group_add_section(in, gid, sec_text); + obj_group_add_section(in, gid, sec_data); + + obj_finalize(in); + + /* ---- internal iter ---- */ + { + ObjGroupIter* it = obj_groupiter_new(in); + ObjGroupEntry e; + int seen = 0; + while (obj_groupiter_next(it, &e)) { + ++seen; + CHECK(e.id == gid, "internal iter: id=%u, want %u", e.id, gid); + CHECK(e.group->nsections == 2, "internal iter: nsections=%u, want 2", + e.group->nsections); + CHECK(e.group->signature == sig_sym, "internal iter: signature=%u", + e.group->signature); + CHECK((e.group->flags & CFREE_OBJ_GROUP_COMDAT) != 0, + "internal iter: missing COMDAT flag (flags=0x%x)", e.group->flags); + CHECK(e.group->sections[0] == sec_text, + "internal iter: sections[0]=%u, want %u", e.group->sections[0], + sec_text); + CHECK(e.group->sections[1] == sec_data, + "internal iter: sections[1]=%u, want %u", e.group->sections[1], + sec_data); + } + CHECK(seen == 1, "internal iter: saw %d groups, want 1", seen); + obj_groupiter_free(it); + } + + /* ---- emit + public-iter readback ---- */ + CfreeWriter* w = NULL; + (void)cfree_writer_mem(&g_heap, &w); + emit_elf(c, in, w); + size_t out_len = 0; + const uint8_t* out_data = cfree_writer_mem_bytes(w, &out_len); + uint8_t* roundtrip = (uint8_t*)malloc(out_len ? out_len : 1); + memcpy(roundtrip, out_data, out_len); + cfree_writer_close(w); + + CfreeBytes input = {.name = "groupiter", .data = roundtrip, .len = out_len}; + CfreeObjFile* f = NULL; + CHECK(cfree_obj_open(&ctx, &input, &f) == CFREE_OK && f, + "cfree_obj_open failed"); + + if (f) { + /* Resolve section IDs by name so the test isn't coupled to ordering. */ + CfreeObjSection text_pub = CFREE_SECTION_NONE; + CfreeObjSection data_pub = CFREE_SECTION_NONE; + CHECK(cfree_obj_section_by_name(f, ".text.comdat_fn", &text_pub) == + CFREE_OK, + "section_by_name .text.comdat_fn"); + CHECK(cfree_obj_section_by_name(f, ".data.comdat_fn", &data_pub) == + CFREE_OK, + "section_by_name .data.comdat_fn"); + + CfreeObjGroupIter* git = NULL; + CHECK(cfree_obj_groupiter_new(f, &git) == CFREE_OK, + "cfree_obj_groupiter_new"); + int seen = 0; + CfreeObjGroupInfo gi; + while (cfree_obj_groupiter_next(git, &gi) == CFREE_ITER_ITEM) { + ++seen; + CHECK(name_eq(gi.name, "comdat_sig"), "public iter: name=%s", + gi.name ? gi.name : "(null)"); + CHECK((gi.flags & CFREE_OBJ_GROUP_COMDAT) != 0, + "public iter: missing COMDAT flag (flags=0x%x)", gi.flags); + CHECK(gi.nsections == 2, "public iter: nsections=%u, want 2", + gi.nsections); + CHECK(gi.sections != NULL, "public iter: NULL sections"); + if (gi.sections && gi.nsections == 2) { + /* Order is preserved by ELF SHT_GROUP, so members[0] should be + * sec_text and members[1] sec_data. */ + CHECK(gi.sections[0] == text_pub, + "public iter: sections[0]=%u, want %u (text)", gi.sections[0], + text_pub); + CHECK(gi.sections[1] == data_pub, + "public iter: sections[1]=%u, want %u (data)", gi.sections[1], + data_pub); + } + /* Signature should be a valid public symbol id, not NONE. */ + CHECK(gi.signature != CFREE_OBJ_SYMBOL_NONE, + "public iter: signature is NONE"); + } + CHECK(seen == 1, "public iter: saw %d groups, want 1", seen); + cfree_obj_groupiter_free(git); + cfree_obj_free(f); + } + + free(roundtrip); + obj_free(in); + cfree_compiler_free(cc); + + if (g_failures) { + fprintf(stderr, "%d failure(s)\n", g_failures); + return 1; + } + fputs("groupiter: OK\n", stderr); + return 0; +} diff --git a/test/elf/unit/mutate.c b/test/elf/unit/mutate.c @@ -0,0 +1,231 @@ +/* Exercises the post-finalize mutator API: + * + * 1. Build a small ELF with .text + .data + an ext-undef symbol. + * 2. After finalize, remove .data and rename the .text symbol; also + * flip a SB_GLOBAL symbol to SB_LOCAL. + * 3. Emit + reopen, verify: + * - .data is gone + * - the .text symbol shows up under the new name + * - the localized symbol round-trips as SB_LOCAL + * - relocs that pointed at .data are dropped + * - the spurious UNDEF (kind=SK_UNDEF, !referenced) is pruned by the + * sweep that mutators now share with the historical UNDEF prune. */ + +#include <cfree/core.h> +#include <cfree/object.h> +#include <setjmp.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "core/core.h" +#include "core/pool.h" +#include "lib/cfree_test_target.h" +#include "obj/obj.h" + +static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) { + (void)h; + (void)a; + return n ? malloc(n) : NULL; +} +static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) { + (void)h; + (void)o; + (void)a; + return realloc(p, n); +} +static void heap_free(CfreeHeap* h, void* p, size_t n) { + (void)h; + (void)n; + free(p); +} +static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL}; + +static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, + const char* fmt, va_list ap) { + static const char* names[] = {"note", "warning", "error", "fatal"}; + (void)s; + (void)loc; + fprintf(stderr, "%s: ", names[k]); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); +} +static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; + +static int g_failures; +#define CHECK(cond, ...) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fputc('\n', stderr); \ + g_failures++; \ + } \ + } while (0) + +/* mov w0, #7 ; ret */ +static const uint8_t TEXT_BYTES[8] = { + 0xe0, 0x00, 0x80, 0x52, 0xc0, 0x03, 0x5f, 0xd6, +}; + +int main(void) { + CfreeTarget target; + if (cfree_test_target_init(&target) != 0) { + fprintf(stderr, "FAIL: cfree_test_target_init\n"); + return 1; + } + CfreeContext ctx = {.heap = &g_heap, + .file_io = NULL, + .diag = &g_diag, + .metrics = NULL, + .now = -1}; + CfreeCompiler* cc = NULL; + if (cfree_compiler_new(target, &ctx, &cc) != CFREE_OK || !cc) { + fprintf(stderr, "FAIL: cfree_compiler_new\n"); + return 1; + } + Compiler* c = (Compiler*)cc; + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free(cc); + fprintf(stderr, "FAIL: compiler_panic\n"); + return 1; + } + + /* ---- build ---- */ + ObjBuilder* ob = obj_new(c); + Pool* p = c->global; + + Sym text_nm = pool_intern_cstr(p, ".text"); + Sym data_nm = pool_intern_cstr(p, ".data"); + Sym entry_nm = pool_intern_cstr(p, "entry"); + Sym entry_new_nm = pool_intern_cstr(p, "renamed_entry"); + Sym keep_nm = pool_intern_cstr(p, "keep_global"); + Sym foo_nm = pool_intern_cstr(p, "foo_ref"); + Sym spurious_nm = pool_intern_cstr(p, "spurious_extern"); + + ObjSecId sec_text = + obj_section(ob, text_nm, SEC_TEXT, SF_ALLOC | SF_EXEC, 4); + ObjSecId sec_data = + obj_section(ob, data_nm, SEC_DATA, SF_ALLOC | SF_WRITE, 8); + + obj_write(ob, sec_text, TEXT_BYTES, sizeof TEXT_BYTES); + static const uint8_t zero8[8] = {0}; + obj_write(ob, sec_data, zero8, sizeof zero8); + + ObjSymId sym_entry = obj_symbol(ob, entry_nm, SB_GLOBAL, SK_FUNC, sec_text, 0, + sizeof TEXT_BYTES); + ObjSymId sym_keep = + obj_symbol(ob, keep_nm, SB_GLOBAL, SK_FUNC, sec_text, 0, 0); + ObjSymId sym_foo = + obj_symbol(ob, foo_nm, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); + /* Spurious extern: !referenced means sweep will tombstone it. */ + ObjSymId sym_spurious = + obj_symbol(ob, spurious_nm, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); + (void)sym_spurious; + + /* One reloc in .data against foo_ref (which is referenced — survives). + * One reloc in .data against the about-to-rename entry (also survives but + * the containing .data gets removed, so the reloc dies via cascade). */ + obj_reloc(ob, sec_data, 0, R_ABS64, sym_foo, 0); + obj_reloc(ob, sec_data, 8, R_ABS64, sym_entry, 0); + + obj_finalize(ob); + + /* ---- mutate via the public API ---- */ + /* The builder we got from obj_new is also a CfreeObjBuilder — same + * handle type — so we can drive the public mutator surface directly. */ + CfreeObjBuilder* pb = (CfreeObjBuilder*)ob; + + /* Convert internal ids to public ids the same way object_builder.c does. */ + CfreeObjSection pub_sec_data = (CfreeObjSection)(sec_data - 1); + CfreeObjSymbol pub_sym_entry = (CfreeObjSymbol)sym_entry; + CfreeObjSymbol pub_sym_keep = (CfreeObjSymbol)sym_keep; + + CHECK(cfree_obj_builder_remove_section(pb, pub_sec_data) == CFREE_OK, + "remove_section .data"); + CHECK(cfree_obj_builder_rename_symbol(pb, pub_sym_entry, + (CfreeSym)entry_new_nm) == CFREE_OK, + "rename_symbol entry -> renamed_entry"); + CHECK(cfree_obj_builder_symbol_set_bind(pb, pub_sym_keep, CFREE_SB_LOCAL) == + CFREE_OK, + "set_bind keep_global -> local"); + + /* ---- emit + reopen ---- */ + CfreeWriter* w = NULL; + (void)cfree_writer_mem(&g_heap, &w); + CHECK(cfree_obj_builder_emit(pb, w) == CFREE_OK, "emit after mutate"); + + size_t out_len = 0; + const uint8_t* out_data = cfree_writer_mem_bytes(w, &out_len); + uint8_t* roundtrip = (uint8_t*)malloc(out_len ? out_len : 1); + memcpy(roundtrip, out_data, out_len); + cfree_writer_close(w); + + CfreeBytes input = {.name = "mutate", .data = roundtrip, .len = out_len}; + CfreeObjFile* f = NULL; + CHECK(cfree_obj_open(&ctx, &input, &f) == CFREE_OK && f, "reopen"); + + if (f) { + /* .data should be gone. */ + CfreeObjSection s_data = CFREE_SECTION_NONE; + CfreeStatus st_data = + cfree_obj_section_by_name(f, ".data", &s_data); + CHECK(st_data == CFREE_NOT_FOUND, ".data still present after removal"); + + /* .text should remain. */ + CfreeObjSection s_text = CFREE_SECTION_NONE; + CHECK(cfree_obj_section_by_name(f, ".text", &s_text) == CFREE_OK, + ".text missing after roundtrip"); + + /* Renamed entry symbol is present; old name is gone. */ + CfreeObjSymInfo si; + CHECK(cfree_obj_symbol_by_name(f, "renamed_entry", &si) == CFREE_OK, + "renamed_entry not found"); + CHECK(cfree_obj_symbol_by_name(f, "entry", &si) == CFREE_NOT_FOUND, + "old 'entry' symbol survived rename"); + + /* keep_global was localized; reads back as SB_LOCAL. */ + CHECK(cfree_obj_symbol_by_name(f, "keep_global", &si) == CFREE_OK, + "keep_global lost"); + CHECK(si.bind == CFREE_SB_LOCAL, "keep_global bind=%d, want LOCAL=%d", + (int)si.bind, (int)CFREE_SB_LOCAL); + + /* Spurious extern was pruned by the sweep. */ + CHECK(cfree_obj_symbol_by_name(f, "spurious_extern", &si) == + CFREE_NOT_FOUND, + "spurious_extern survived sweep"); + + /* foo_ref was the target of a reloc, but its containing section + * (.data) was removed — the reloc is gone, but is foo_ref itself + * still referenced enough to survive? The sweep marks it referenced + * via obj_reloc_ex at build time, BUT the reloc is then dropped at + * sweep. The symbol's `referenced` flag was set at obj_reloc time and + * does not get cleared, so foo_ref survives as a plain UNDEF. */ + CHECK(cfree_obj_symbol_by_name(f, "foo_ref", &si) == CFREE_OK, + "foo_ref UNDEF should survive"); + + /* No relocs should remain — all were in the removed .data. */ + CfreeObjRelocIter* rit = NULL; + CHECK(cfree_obj_reliter_new(f, &rit) == CFREE_OK, "reliter_new"); + int nrel = 0; + CfreeObjReloc r; + while (cfree_obj_reliter_next(rit, &r) == CFREE_ITER_ITEM) ++nrel; + cfree_obj_reliter_free(rit); + CHECK(nrel == 0, "expected 0 relocs after removing .data, got %d", nrel); + + cfree_obj_free(f); + } + + free(roundtrip); + obj_free(ob); + cfree_compiler_free(cc); + + if (g_failures) { + fprintf(stderr, "%d failure(s)\n", g_failures); + return 1; + } + fputs("mutate: OK\n", stderr); + return 0; +} diff --git a/test/objcopy/cases/01-rename-section.expected b/test/objcopy/cases/01-rename-section.expected @@ -0,0 +1 @@ +__TEXT,__mytext diff --git a/test/objcopy/cases/01-rename-section.sh b/test/objcopy/cases/01-rename-section.sh @@ -0,0 +1,6 @@ +cat > smoke.c <<'EOF' +int foo(void) { return 1; } +EOF +"$CFREE" cc -c smoke.c -o smoke.o +"$CFREE" objcopy --rename-section=__TEXT,__text=__TEXT,__mytext smoke.o smoke.r.o +"$CFREE" objdump -h smoke.r.o | awk '/^ *[0-9]+ /{print $2}' diff --git a/test/objcopy/cases/02-redefine-sym.expected b/test/objcopy/cases/02-redefine-sym.expected @@ -0,0 +1,2 @@ +_bar +_renamed_foo diff --git a/test/objcopy/cases/02-redefine-sym.sh b/test/objcopy/cases/02-redefine-sym.sh @@ -0,0 +1,7 @@ +cat > smoke.c <<'EOF' +int foo(void) { return 1; } +int bar(void) { return foo(); } +EOF +"$CFREE" cc -c smoke.c -o smoke.o +"$CFREE" objcopy --redefine-sym=_foo=_renamed_foo smoke.o smoke.r.o +"$CFREE" objdump -t smoke.r.o | awk '$NF ~ /^_/{print $NF}' | sort diff --git a/test/objcopy/cases/03-localize-symbol.expected b/test/objcopy/cases/03-localize-symbol.expected @@ -0,0 +1,2 @@ +g _bar +l _foo diff --git a/test/objcopy/cases/03-localize-symbol.sh b/test/objcopy/cases/03-localize-symbol.sh @@ -0,0 +1,8 @@ +cat > smoke.c <<'EOF' +int foo(void) { return 1; } +int bar(void) { return foo(); } +EOF +"$CFREE" cc -c smoke.c -o smoke.o +"$CFREE" objcopy --localize-symbol=_foo smoke.o smoke.l.o +# Print each symbol's bind ('l' = local, 'g' = global) plus name. +"$CFREE" objdump -t smoke.l.o | awk '$NF ~ /^_/{print $2, $NF}' | sort diff --git a/test/objcopy/cases/04-add-section.expected b/test/objcopy/cases/04-add-section.expected @@ -0,0 +1,2 @@ +__DATA,__custom +__TEXT,__text diff --git a/test/objcopy/cases/04-add-section.sh b/test/objcopy/cases/04-add-section.sh @@ -0,0 +1,7 @@ +cat > smoke.c <<'EOF' +int foo(void) { return 1; } +EOF +printf 'hello\n' > payload.bin +"$CFREE" cc -c smoke.c -o smoke.o +"$CFREE" objcopy --add-section=__DATA,__custom=payload.bin smoke.o smoke.a.o +"$CFREE" objdump -h smoke.a.o | awk '/^ *[0-9]+ /{print $2}' | sort diff --git a/test/objcopy/run.sh b/test/objcopy/run.sh @@ -0,0 +1,69 @@ +#!/bin/sh +# Driver-level `cfree objcopy` test harness. Same shape as test/ar/run.sh. + +set -u + +script_dir=$(cd "$(dirname "$0")" && pwd) +repo_root=$(cd "$script_dir/../.." && pwd) +cases_dir="$script_dir/cases" + +CFREE="${CFREE:-$repo_root/build/cfree}" +export CFREE + +if [ ! -x "$CFREE" ]; then + echo "objcopy-driver: cfree binary not found at $CFREE" >&2 + exit 2 +fi + +work_root=$(mktemp -d "${TMPDIR:-/tmp}/cfree-objcopy-test.XXXXXX") +trap 'rm -rf "$work_root"' EXIT + +pass=0 +fail=0 +failures= + +for sh in "$cases_dir"/*.sh; do + [ -e "$sh" ] || continue + name=$(basename "${sh%.sh}") + expected="${sh%.sh}.expected" + actual="$work_root/$name.actual" + + if [ ! -e "$expected" ]; then + printf 'FAIL %s (missing %s)\n' "$name" "$(basename "$expected")" + fail=$((fail + 1)) + failures="$failures $name" + continue + fi + + sandbox="$work_root/$name" + mkdir -p "$sandbox" + ( cd "$sandbox" && sh "$sh" ) > "$actual" 2>&1 + case_rc=$? + + if [ "$case_rc" -ne 0 ]; then + printf 'FAIL %s (script exit=%d)\n' "$name" "$case_rc" + diff -u "$expected" "$actual" || true + fail=$((fail + 1)) + failures="$failures $name" + continue + fi + + if diff -u "$expected" "$actual" >/dev/null 2>&1; then + printf 'PASS %s\n' "$name" + pass=$((pass + 1)) + else + printf 'FAIL %s\n' "$name" + diff -u "$expected" "$actual" || true + cp "$actual" "$cases_dir/$name.actual" 2>/dev/null || true + fail=$((fail + 1)) + failures="$failures $name" + fi +done + +total=$((pass + fail)) +if [ "$fail" -gt 0 ]; then + printf '\nobjcopy-driver: failures:%s\n' "$failures" + printf 'objcopy-driver: %d/%d passed\n' "$pass" "$total" + exit 1 +fi +printf '\nobjcopy-driver: %d/%d passed\n' "$pass" "$total" diff --git a/test/strip/cases/01-strip-debug.expected b/test/strip/cases/01-strip-debug.expected @@ -0,0 +1,5 @@ +== sections == +__TEXT,__text +== symbols == +_helper +_main diff --git a/test/strip/cases/01-strip-debug.sh b/test/strip/cases/01-strip-debug.sh @@ -0,0 +1,14 @@ +# --strip-debug drops every CFREE_SEC_DEBUG section but leaves the +# symbol table untouched. + +cat > smoke.c <<'EOF' +int helper(void) { return 42; } +int main(void) { return helper(); } +EOF +"$CFREE" cc -g -c smoke.c -o smoke.o +"$CFREE" strip --strip-debug smoke.o -o smoke.stripped.o + +echo "== sections ==" +"$CFREE" objdump -h smoke.stripped.o | awk '/^ *[0-9]+ /{print $2}' | sort +echo "== symbols ==" +"$CFREE" objdump -t smoke.stripped.o | awk '$NF ~ /^_/{print $NF}' | sort diff --git a/test/strip/cases/02-strip-all-keeps-reloc-targets.expected b/test/strip/cases/02-strip-all-keeps-reloc-targets.expected @@ -0,0 +1,4 @@ +== symbols == +_helper +== sections == +__TEXT,__text diff --git a/test/strip/cases/02-strip-all-keeps-reloc-targets.sh b/test/strip/cases/02-strip-all-keeps-reloc-targets.sh @@ -0,0 +1,15 @@ +# --strip-all drops every symbol that isn't referenced by a surviving +# reloc (and isn't an UNDEF extern). Here _helper is reloc-targeted by +# main's call, so it survives; _main itself isn't referenced and is dropped. + +cat > smoke.c <<'EOF' +int helper(void) { return 42; } +int main(void) { return helper(); } +EOF +"$CFREE" cc -g -c smoke.c -o smoke.o +"$CFREE" strip --strip-all smoke.o -o smoke.stripped.o + +echo "== symbols ==" +"$CFREE" objdump -t smoke.stripped.o | awk '$NF ~ /^_/{print $NF}' | sort +echo "== sections ==" +"$CFREE" objdump -h smoke.stripped.o | awk '/^ *[0-9]+ /{print $2}' | sort diff --git a/test/strip/cases/03-keep-symbol.expected b/test/strip/cases/03-keep-symbol.expected @@ -0,0 +1,3 @@ +== symbols == +_helper +_unused diff --git a/test/strip/cases/03-keep-symbol.sh b/test/strip/cases/03-keep-symbol.sh @@ -0,0 +1,12 @@ +# --keep-symbol overrides the operation's drop decision. + +cat > smoke.c <<'EOF' +int unused(void) { return 1; } +int helper(void) { return 42; } +int main(void) { return helper(); } +EOF +"$CFREE" cc -c smoke.c -o smoke.o +"$CFREE" strip --strip-all --keep-symbol=_unused smoke.o -o smoke.stripped.o + +echo "== symbols ==" +"$CFREE" objdump -t smoke.stripped.o | awk '$NF ~ /^_/{print $NF}' | sort diff --git a/test/strip/cases/04-archive-strip-debug.expected b/test/strip/cases/04-archive-strip-debug.expected @@ -0,0 +1,7 @@ +== members == +a.o +b.o +== a.o sections == +__TEXT,__text +== b.o sections == +__TEXT,__text diff --git a/test/strip/cases/04-archive-strip-debug.sh b/test/strip/cases/04-archive-strip-debug.sh @@ -0,0 +1,24 @@ +# Archive: --strip-debug runs on each object member, the symbol index is +# refreshed. + +cat > a.c <<'EOF' +int aaa(void) { return 1; } +EOF +cat > b.c <<'EOF' +int bbb(void) { return 2; } +EOF +"$CFREE" cc -g -c a.c -o a.o +"$CFREE" cc -g -c b.c -o b.o +"$CFREE" ar rcs lib.a a.o b.o +"$CFREE" strip --strip-debug lib.a -o lib.stripped.a + +echo "== members ==" +"$CFREE" ar t lib.stripped.a + +# Extract each member and confirm the debug sections are gone. +mkdir x +( cd x && "$CFREE" ar x ../lib.stripped.a ) +echo "== a.o sections ==" +"$CFREE" objdump -h x/a.o | awk '/^ *[0-9]+ /{print $2}' | sort +echo "== b.o sections ==" +"$CFREE" objdump -h x/b.o | awk '/^ *[0-9]+ /{print $2}' | sort diff --git a/test/strip/run.sh b/test/strip/run.sh @@ -0,0 +1,69 @@ +#!/bin/sh +# Driver-level `cfree strip` test harness. Same shape as test/ar/run.sh. + +set -u + +script_dir=$(cd "$(dirname "$0")" && pwd) +repo_root=$(cd "$script_dir/../.." && pwd) +cases_dir="$script_dir/cases" + +CFREE="${CFREE:-$repo_root/build/cfree}" +export CFREE + +if [ ! -x "$CFREE" ]; then + echo "strip-driver: cfree binary not found at $CFREE" >&2 + exit 2 +fi + +work_root=$(mktemp -d "${TMPDIR:-/tmp}/cfree-strip-test.XXXXXX") +trap 'rm -rf "$work_root"' EXIT + +pass=0 +fail=0 +failures= + +for sh in "$cases_dir"/*.sh; do + [ -e "$sh" ] || continue + name=$(basename "${sh%.sh}") + expected="${sh%.sh}.expected" + actual="$work_root/$name.actual" + + if [ ! -e "$expected" ]; then + printf 'FAIL %s (missing %s)\n' "$name" "$(basename "$expected")" + fail=$((fail + 1)) + failures="$failures $name" + continue + fi + + sandbox="$work_root/$name" + mkdir -p "$sandbox" + ( cd "$sandbox" && sh "$sh" ) > "$actual" 2>&1 + case_rc=$? + + if [ "$case_rc" -ne 0 ]; then + printf 'FAIL %s (script exit=%d)\n' "$name" "$case_rc" + diff -u "$expected" "$actual" || true + fail=$((fail + 1)) + failures="$failures $name" + continue + fi + + if diff -u "$expected" "$actual" >/dev/null 2>&1; then + printf 'PASS %s\n' "$name" + pass=$((pass + 1)) + else + printf 'FAIL %s\n' "$name" + diff -u "$expected" "$actual" || true + cp "$actual" "$cases_dir/$name.actual" 2>/dev/null || true + fail=$((fail + 1)) + failures="$failures $name" + fi +done + +total=$((pass + fail)) +if [ "$fail" -gt 0 ]; then + printf '\nstrip-driver: failures:%s\n' "$failures" + printf 'strip-driver: %d/%d passed\n' "$pass" "$total" + exit 1 +fi +printf '\nstrip-driver: %d/%d passed\n' "$pass" "$total" diff --git a/test/test.mk b/test/test.mk @@ -27,9 +27,9 @@ # asm_parse / cfree_disasm_iter_* are still stubs; the harness builds # and runs end-to-end so the wiring stays exercised. See doc/ASM.md. -.PHONY: test test-driver test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg-api test-toy test-opt test-dwarf test-debug test-parse test-parse-err test-asm test-wasm-front test-isa test-aa64-inline test-rt-headers test-rt-runtime test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64 test-cbackend +.PHONY: test test-driver test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-strip-driver test-objcopy-driver test-link test-cg-api test-toy test-opt test-dwarf test-debug test-parse test-parse-err test-asm test-wasm-front test-isa test-aa64-inline test-rt-headers test-rt-runtime test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64 test-cbackend -test: test-driver test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-toy test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-rt-headers test-lib-deps +test: test-driver test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-strip-driver test-objcopy-driver test-link test-toy test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-rt-headers test-lib-deps # `test-cbackend` is intentionally not in the default `test` target: the # Phase 1 C backend skips most fixtures pending later phases, which would # add noise to the default summary. Run it explicitly to gate progress. @@ -77,6 +77,12 @@ $(AR_TEST_BIN): test/ar_test.c $(LIB_AR) test-ar-driver: bin @CFREE=$(abspath $(BIN)) test/ar/run.sh +test-strip-driver: bin + @CFREE=$(abspath $(BIN)) test/strip/run.sh + +test-objcopy-driver: bin + @CFREE=$(abspath $(BIN)) test/objcopy/run.sh + # DWARF consumer unit test: builds a hand-crafted DWARF-bearing ELF in # memory and exercises every cfree_dwarf_* entry. Depends only on # libcfree.a — the consumer reads bytes; producer involvement isn't