commit 704ea4520ffd3e01d0d9f37a02f12119f6e38edc
parent d8f401b016a594425174caeacea85b888c39a1fa
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 20 May 2026 12:23:49 -0700
obj: builder mutators + strip/objcopy drivers
Adds the post-finalize builder mutator surface (remove, rename,
set-bind/vis, replace-bytes) for relocatable .o, driven by tombstone
bits filtered by a new obj_sweep_dead pass that emitters call at the
top of emit_elf / emit_macho. The historical spurious-UNDEF prune
collapses into the same sweep.
Closes CTOOLCHAIN.md gap #1 (ObjGroup reader iterator) and unblocks
strip + objcopy. Both ship as driver tools covering the high-traffic
build-system subset; archive paths reuse a shared per-member
global-symbol collector now hosted in driver/inputs.c.
Scope follows the doc's first-cut plan: relocatable .o and .a only.
ET_EXEC/ET_DYN, --only-keep-debug + --add-gnu-debuglink, --extract-symbol,
--change-section-address, and srec/ihex/binary outputs are deferred.
Diffstat:
37 files changed, 2820 insertions(+), 218 deletions(-)
diff --git a/driver/ar.c b/driver/ar.c
@@ -4,6 +4,8 @@
#include <cfree/core.h>
#include <cfree/object.h>
+#include "inputs.h"
+
/* `cfree ar` — POSIX ar archive front-end.
*
* Supported operations (mutually exclusive):
@@ -467,99 +469,25 @@ static int ar_do_write(DriverEnv* env, const char* archive_path, int nmembers,
}
for (i = 0; i < nm; ++i) {
CfreeBytes in;
- CfreeObjFile* of = NULL;
- CfreeObjSymIter* it = NULL;
- CfreeObjSymInfo si;
+ void* blob = NULL;
+ size_t blob_size = 0;
+ const char** names = NULL;
uint32_t count = 0;
- size_t name_bytes = 0;
- size_t alloc_sz;
- char* blob;
- const char** name_arr;
- char* name_storage;
- size_t cursor = 0;
in.name = members[i].name;
in.data = members[i].data;
in.len = members[i].len;
- if (cfree_obj_open(&ctx, &in, &of) != CFREE_OK)
- continue; /* not an object file → no symbols */
-
- /* Pass A: count globally-defined symbols and total name bytes. */
- if (cfree_obj_symiter_new(of, &it) != CFREE_OK) {
- cfree_obj_free(of);
- driver_errf(AR_TOOL, "out of memory");
- rc = 1;
- goto done;
- }
- for (;;) {
- CfreeIterResult r = cfree_obj_symiter_next(it, &si);
- if (r != CFREE_ITER_ITEM) break;
- if (si.bind != CFREE_SB_GLOBAL) continue;
- if (si.section == CFREE_SECTION_NONE) continue;
- if (!si.name || !si.name[0]) continue;
- count += 1;
- {
- const char* p = si.name;
- while (*p++) ++name_bytes;
- name_bytes += 1; /* NUL */
- }
- }
- cfree_obj_symiter_free(it);
-
- if (count == 0) {
- cfree_obj_free(of);
- continue;
- }
-
- /* Single allocation: [name_arr][name_storage]. Names are copied
- * out of the obj file before close so they outlive cfree_obj_free. */
- alloc_sz = (size_t)count * sizeof(const char*) + name_bytes;
- blob = (char*)driver_alloc_zeroed(env, alloc_sz);
- if (!blob) {
- cfree_obj_free(of);
- driver_errf(AR_TOOL, "out of memory");
+ if (driver_collect_obj_global_syms(env, &ctx, AR_TOOL, &in, &blob,
+ &blob_size, &names, &count) != 0) {
rc = 1;
goto done;
}
- name_arr = (const char**)blob;
- name_storage = blob + (size_t)count * sizeof(const char*);
-
- /* Pass B: copy names. */
- if (cfree_obj_symiter_new(of, &it) != CFREE_OK) {
- driver_free(env, blob, alloc_sz);
- cfree_obj_free(of);
- driver_errf(AR_TOOL, "out of memory");
- rc = 1;
- goto done;
- }
- {
- uint32_t k = 0;
- for (;;) {
- CfreeIterResult r;
- const char* p;
- char* dst;
- if (k >= count) break;
- r = cfree_obj_symiter_next(it, &si);
- if (r != CFREE_ITER_ITEM) break;
- if (si.bind != CFREE_SB_GLOBAL) continue;
- if (si.section == CFREE_SECTION_NONE) continue;
- if (!si.name || !si.name[0]) continue;
- dst = name_storage + cursor;
- name_arr[k] = dst;
- for (p = si.name; *p; ++p) *dst++ = *p;
- *dst++ = '\0';
- cursor = (size_t)(dst - name_storage);
- k++;
- }
- count = k;
- }
- cfree_obj_symiter_free(it);
- cfree_obj_free(of);
+ if (count == 0) continue;
sym_allocs[i] = blob;
- sym_alloc_szs[i] = alloc_sz;
- msyms[i].names = name_arr;
+ sym_alloc_szs[i] = blob_size;
+ msyms[i].names = names;
msyms[i].count = count;
}
opts.symbol_index = 1;
diff --git a/driver/driver.h b/driver/driver.h
@@ -36,6 +36,8 @@ int driver_as(int argc, char **argv);
int driver_ld(int argc, char **argv);
int driver_ar(int argc, char **argv);
int driver_ranlib(int argc, char **argv);
+int driver_strip(int argc, char **argv);
+int driver_objcopy(int argc, char **argv);
int driver_objdump(int argc, char **argv);
int driver_dbg(int argc, char **argv);
int driver_run(int argc, char **argv);
@@ -51,6 +53,8 @@ void driver_help_as(void);
void driver_help_ld(void);
void driver_help_ar(void);
void driver_help_ranlib(void);
+void driver_help_strip(void);
+void driver_help_objcopy(void);
void driver_help_objdump(void);
void driver_help_dbg(void);
void driver_help_run(void);
diff --git a/driver/inputs.c b/driver/inputs.c
@@ -261,3 +261,112 @@ out:
driver_free(env, objs, nsrc * sizeof(*objs));
return rc;
}
+
+/* ----------------------------------------------------------------------
+ * Per-object global-symbol collection (shared by ar / ranlib / strip).
+ * ---------------------------------------------------------------------- */
+
+int driver_collect_obj_global_syms(DriverEnv* env, const CfreeContext* ctx,
+ const char* tool, const CfreeBytes* member,
+ void** blob_out, size_t* blob_size_out,
+ const char*** names_out,
+ uint32_t* count_out) {
+ CfreeObjFile* of = NULL;
+ CfreeObjSymIter* it = NULL;
+ CfreeObjSymInfo si;
+ uint32_t count = 0;
+ size_t name_bytes = 0;
+ size_t alloc_sz;
+ char* blob;
+ const char** name_arr;
+ char* name_storage;
+ size_t cursor = 0;
+
+ *blob_out = NULL;
+ *blob_size_out = 0;
+ *names_out = NULL;
+ *count_out = 0;
+
+ if (cfree_obj_open(ctx, member, &of) != CFREE_OK) {
+ /* Not a recognized object — caller treats as "no symbols". */
+ return 0;
+ }
+
+ /* Pass A: count + measure name bytes. */
+ if (cfree_obj_symiter_new(of, &it) != CFREE_OK) {
+ cfree_obj_free(of);
+ driver_errf(tool, "out of memory");
+ return 1;
+ }
+ for (;;) {
+ CfreeIterResult r = cfree_obj_symiter_next(it, &si);
+ if (r != CFREE_ITER_ITEM) break;
+ if (si.bind != CFREE_SB_GLOBAL) continue;
+ if (si.section == CFREE_SECTION_NONE) continue;
+ if (!si.name || !si.name[0]) continue;
+ count += 1;
+ {
+ const char* p = si.name;
+ while (*p++) ++name_bytes;
+ name_bytes += 1; /* NUL */
+ }
+ }
+ cfree_obj_symiter_free(it);
+
+ if (count == 0) {
+ cfree_obj_free(of);
+ return 0;
+ }
+
+ alloc_sz = (size_t)count * sizeof(const char*) + name_bytes;
+ blob = (char*)driver_alloc_zeroed(env, alloc_sz);
+ if (!blob) {
+ cfree_obj_free(of);
+ driver_errf(tool, "out of memory");
+ return 1;
+ }
+ name_arr = (const char**)blob;
+ name_storage = blob + (size_t)count * sizeof(const char*);
+
+ /* Pass B: copy names. */
+ if (cfree_obj_symiter_new(of, &it) != CFREE_OK) {
+ driver_free(env, blob, alloc_sz);
+ cfree_obj_free(of);
+ driver_errf(tool, "out of memory");
+ return 1;
+ }
+ {
+ uint32_t k = 0;
+ for (;;) {
+ CfreeIterResult r;
+ const char* p;
+ char* dst;
+ if (k >= count) break;
+ r = cfree_obj_symiter_next(it, &si);
+ if (r != CFREE_ITER_ITEM) break;
+ if (si.bind != CFREE_SB_GLOBAL) continue;
+ if (si.section == CFREE_SECTION_NONE) continue;
+ if (!si.name || !si.name[0]) continue;
+ dst = name_storage + cursor;
+ name_arr[k] = dst;
+ for (p = si.name; *p; ++p) *dst++ = *p;
+ *dst++ = '\0';
+ cursor = (size_t)(dst - name_storage);
+ k++;
+ }
+ count = k;
+ }
+ cfree_obj_symiter_free(it);
+ cfree_obj_free(of);
+
+ *blob_out = blob;
+ *blob_size_out = alloc_sz;
+ *names_out = name_arr;
+ *count_out = count;
+ return 0;
+}
+
+void driver_collect_obj_global_syms_free(DriverEnv* env, void* blob,
+ size_t blob_size) {
+ if (blob) driver_free(env, blob, blob_size);
+}
diff --git a/driver/inputs.h b/driver/inputs.h
@@ -5,6 +5,7 @@
#include <cfree/compile.h>
#include <cfree/link.h>
+#include <cfree/object.h>
/* Shared input handling for tools that take a mixed list of C sources,
* stdin source, object files, and static archives — `cfree run` and
@@ -93,4 +94,34 @@ int driver_inputs_compile_and_jit(DriverInputs *, CfreeCompiler *,
void *extern_resolver_user,
CfreeJit **out_jit);
+/* ----------------------------------------------------------------------
+ * Per-object global-symbol collection
+ *
+ * ar / ranlib / strip all need to enumerate the globally-defined symbols
+ * of an object file so the archive symbol index (ar/ranlib) or the
+ * --strip-unneeded keep-set (strip) can be populated. The shape is
+ * always the same: open the object, walk symbols, copy out the names
+ * of every SB_GLOBAL symbol with a defining section.
+ *
+ * driver_collect_obj_global_syms allocates a single heap block laid out
+ * as [const char* names[count]][NUL-separated name bytes]. The caller
+ * frees the block via driver_collect_obj_global_syms_free.
+ *
+ * Returns:
+ * 0 member parsed, output filled (count may be 0 if the object has
+ * no globally-defined symbols, or the member is not a recognized
+ * object file at all — in that case *blob_out is NULL).
+ * 1 fatal failure (out of memory, etc.); an error has been reported
+ * via driver_errf using the supplied tool tag.
+ */
+int driver_collect_obj_global_syms(DriverEnv *env, const CfreeContext *ctx,
+ const char *tool,
+ const CfreeBytes *member, void **blob_out,
+ size_t *blob_size_out,
+ const char ***names_out,
+ uint32_t *count_out);
+
+void driver_collect_obj_global_syms_free(DriverEnv *env, void *blob,
+ size_t blob_size);
+
#endif
diff --git a/driver/main.c b/driver/main.c
@@ -26,6 +26,8 @@ static int dispatch(const char* name, int argc, char** argv) {
if (driver_streq(name, "ld")) return driver_ld(argc, argv);
if (driver_streq(name, "ar")) return driver_ar(argc, argv);
if (driver_streq(name, "ranlib")) return driver_ranlib(argc, argv);
+ if (driver_streq(name, "strip")) return driver_strip(argc, argv);
+ if (driver_streq(name, "objcopy")) return driver_objcopy(argc, argv);
if (driver_streq(name, "objdump")) return driver_objdump(argc, argv);
if (driver_streq(name, "dbg")) return driver_dbg(argc, argv);
if (driver_streq(name, "run")) return driver_run(argc, argv);
@@ -60,6 +62,14 @@ static int print_tool_help(const char* name) {
driver_help_ranlib();
return 0;
}
+ if (driver_streq(name, "strip")) {
+ driver_help_strip();
+ return 0;
+ }
+ if (driver_streq(name, "objcopy")) {
+ driver_help_objcopy();
+ return 0;
+ }
if (driver_streq(name, "objdump")) {
driver_help_objdump();
return 0;
@@ -118,6 +128,8 @@ void driver_help_top(void) {
" ld Link objects/archives into an executable or shared library\n"
" ar Create / modify / list / extract POSIX `ar` archives\n"
" ranlib Refresh the symbol index of an `ar` archive\n"
+ " strip Drop debug sections and/or symbols from a .o or .a\n"
+ " objcopy Copy and transform an object file (rename / remove / format)\n"
" objdump Dump sections, symbols, disassembly, hex, and relocations\n"
" run JIT-compile inputs and invoke the entry symbol in-process\n"
" dbg Interactive JIT debugger (REPL on top of the JIT image)\n"
diff --git a/driver/objcopy.c b/driver/objcopy.c
@@ -0,0 +1,739 @@
+#include "driver.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <cfree/archive.h>
+#include <cfree/core.h>
+#include <cfree/object.h>
+
+#include "inputs.h"
+
+/* `cfree objcopy` — copy + transform an object file. v1 scope is the
+ * high-traffic build-system subset called out in CTOOLCHAIN.md:
+ *
+ * --remove-section=NAME drop the named section
+ * --only-section=NAME drop every section except NAME (may repeat)
+ * --rename-section=OLD=NEW rename a section
+ * --redefine-sym=OLD=NEW rename a symbol
+ * --globalize-symbol=NAME promote a symbol to SB_GLOBAL
+ * --localize-symbol=NAME demote a symbol to SB_LOCAL
+ * --weaken-symbol=NAME flip a symbol to SB_WEAK
+ * --strip-debug drop CFREE_SEC_DEBUG sections
+ * --strip-all drop debug + every non-essential symbol
+ * --strip-unneeded drop debug + symbols not needed by relocs
+ * --add-section=NAME=FILE append a new section with FILE's bytes
+ * --update-section=NAME=FILE replace NAME's bytes with FILE's bytes
+ * -O <bfdname> emit in a different object format
+ *
+ * Linked ELF (ET_EXEC / ET_DYN) input is out of scope for v1.
+ *
+ * Usage: cfree objcopy [OPTIONS] INPUT [OUTPUT]
+ * If OUTPUT is omitted, INPUT is rewritten in place. */
+
+#define OBJCOPY_TOOL "objcopy"
+
+void driver_help_objcopy(void) {
+ driver_printf(
+ "%s",
+ "cfree objcopy — copy and transform an object file\n"
+ "\n"
+ "USAGE\n"
+ " cfree objcopy [OPTIONS] INPUT [OUTPUT]\n"
+ "\n"
+ "SECTION OPS\n"
+ " --remove-section=NAME drop section NAME (may repeat)\n"
+ " --only-section=NAME keep only section NAME (may repeat)\n"
+ " --rename-section=OLD=NEW rename section OLD to NEW\n"
+ " --add-section=NAME=FILE append a new section with FILE's bytes\n"
+ " --update-section=NAME=FILE replace NAME's bytes with FILE's bytes\n"
+ "\n"
+ "SYMBOL OPS\n"
+ " --redefine-sym=OLD=NEW rename a symbol (may repeat)\n"
+ " --globalize-symbol=NAME set NAME's binding to global\n"
+ " --localize-symbol=NAME set NAME's binding to local\n"
+ " --weaken-symbol=NAME set NAME's binding to weak\n"
+ "\n"
+ "STRIP OPS\n"
+ " --strip-debug, --strip-unneeded, --strip-all\n"
+ " same semantics as `cfree strip`\n"
+ "\n"
+ "FORMAT\n"
+ " -O BFDNAME emit as a different format. Recognized\n"
+ " names: elf*, mach-o / macho*, coff*, wasm*\n"
+ "\n"
+ "EXIT CODES\n"
+ " 0 success 1 I/O or strip error 2 bad usage\n");
+}
+
+typedef enum CopyOp {
+ COPY_OP_NONE,
+ COPY_OP_STRIP_DEBUG,
+ COPY_OP_STRIP_UNNEEDED,
+ COPY_OP_STRIP_ALL,
+} CopyOp;
+
+typedef struct NamePair {
+ const char* old_name;
+ const char* new_name;
+} NamePair;
+
+typedef struct CopyOpts {
+ CopyOp op;
+ /* Section ops */
+ const char** remove_sections;
+ uint32_t nremove;
+ uint32_t cap_remove;
+ const char** only_sections;
+ uint32_t nonly;
+ uint32_t cap_only;
+ NamePair* rename_sections;
+ uint32_t nrename_sec;
+ uint32_t cap_rename_sec;
+ NamePair* add_sections;
+ uint32_t nadd;
+ uint32_t cap_add;
+ NamePair* update_sections;
+ uint32_t nupdate;
+ uint32_t cap_update;
+ /* Symbol ops */
+ NamePair* redefine_syms;
+ uint32_t nredef;
+ uint32_t cap_redef;
+ const char** globalize;
+ uint32_t nglob;
+ uint32_t cap_glob;
+ const char** localize;
+ uint32_t nloc;
+ uint32_t cap_loc;
+ const char** weaken;
+ uint32_t nweak;
+ uint32_t cap_weak;
+ /* Format conversion */
+ int have_output_fmt;
+ CfreeObjFmt output_fmt;
+ /* I/O */
+ const char* input;
+ const char* output;
+} CopyOpts;
+
+static int name_in_list(const char* name, const char* const* list, uint32_t n) {
+ uint32_t i;
+ if (!name) return 0;
+ for (i = 0; i < n; ++i) {
+ if (list[i] && strcmp(list[i], name) == 0) return 1;
+ }
+ return 0;
+}
+
+static int push_str(DriverEnv* env, const char*** arr, uint32_t* n,
+ uint32_t* cap, const char* s) {
+ if (*n >= *cap) {
+ uint32_t newcap = *cap ? *cap * 2u : 4u;
+ const char** nb = (const char**)driver_alloc_zeroed(
+ env, (size_t)newcap * sizeof(*nb));
+ if (!nb) return -1;
+ if (*arr) {
+ memcpy(nb, *arr, (size_t)(*n) * sizeof(*nb));
+ driver_free(env, (void*)*arr, (size_t)(*cap) * sizeof(*nb));
+ }
+ *arr = nb;
+ *cap = newcap;
+ }
+ (*arr)[(*n)++] = s;
+ return 0;
+}
+
+static int push_pair(DriverEnv* env, NamePair** arr, uint32_t* n, uint32_t* cap,
+ const char* old_name, const char* new_name) {
+ if (*n >= *cap) {
+ uint32_t newcap = *cap ? *cap * 2u : 4u;
+ NamePair* nb =
+ (NamePair*)driver_alloc_zeroed(env, (size_t)newcap * sizeof(*nb));
+ if (!nb) return -1;
+ if (*arr) {
+ memcpy(nb, *arr, (size_t)(*n) * sizeof(*nb));
+ driver_free(env, *arr, (size_t)(*cap) * sizeof(*nb));
+ }
+ *arr = nb;
+ *cap = newcap;
+ }
+ (*arr)[*n].old_name = old_name;
+ (*arr)[*n].new_name = new_name;
+ (*n)++;
+ return 0;
+}
+
+/* Parse VAL of `--flag=VAL` or take the next argv. */
+static int take_value(int* i, int argc, char** argv, const char* flag,
+ const char** out) {
+ const char* a = argv[*i];
+ size_t flen = strlen(flag);
+ if (strncmp(a, flag, flen) == 0 && a[flen] == '=') {
+ *out = a + flen + 1;
+ return 1;
+ }
+ if (strcmp(a, flag) == 0) {
+ if (*i + 1 >= argc) return -1;
+ *out = argv[++(*i)];
+ return 1;
+ }
+ return 0;
+}
+
+/* Split "old=new" / "name=file" at the first '='. */
+static int split_pair(DriverEnv* env, const char* spec, const char** out_left,
+ const char** out_right) {
+ const char* eq = strchr(spec, '=');
+ size_t llen;
+ char* left;
+ if (!eq || eq == spec || !eq[1]) return -1;
+ llen = (size_t)(eq - spec);
+ left = (char*)driver_alloc_zeroed(env, llen + 1u);
+ if (!left) return -1;
+ memcpy(left, spec, llen);
+ left[llen] = '\0';
+ *out_left = left;
+ *out_right = eq + 1;
+ return 0;
+}
+
+static int parse_fmt_name(const char* name, CfreeObjFmt* out) {
+ if (!name) return -1;
+ if (strncmp(name, "elf", 3) == 0) {
+ *out = CFREE_OBJ_ELF;
+ return 0;
+ }
+ if (strncmp(name, "mach", 4) == 0) {
+ *out = CFREE_OBJ_MACHO;
+ return 0;
+ }
+ if (strncmp(name, "coff", 4) == 0 || strncmp(name, "pe-", 3) == 0) {
+ *out = CFREE_OBJ_COFF;
+ return 0;
+ }
+ if (strncmp(name, "wasm", 4) == 0) {
+ *out = CFREE_OBJ_WASM;
+ return 0;
+ }
+ return -1;
+}
+
+/* Lookup a symbol by name; CFREE_OBJ_SYMBOL_NONE if not found. */
+static CfreeObjSymbol find_sym_id(CfreeObjFile* of, const char* name) {
+ CfreeObjSymInfo si;
+ if (cfree_obj_symbol_by_name(of, name, &si) != CFREE_OK) {
+ return CFREE_OBJ_SYMBOL_NONE;
+ }
+ return si.id;
+}
+
+/* Lookup a section by name; CFREE_SECTION_NONE if not found. */
+static CfreeObjSection find_sec_id(CfreeObjFile* of, const char* name) {
+ CfreeObjSection s = CFREE_SECTION_NONE;
+ if (cfree_obj_section_by_name(of, name, &s) != CFREE_OK) {
+ return CFREE_SECTION_NONE;
+ }
+ return s;
+}
+
+static int apply_strip_pass(DriverEnv* env, CfreeObjFile* of,
+ CfreeObjBuilder* b, const CopyOpts* opts) {
+ uint32_t i, nsec;
+ CfreeObjSymbol* needed = NULL;
+ uint32_t nneeded = 0, cap_needed = 0;
+ CfreeObjSymIter* sit = NULL;
+ int filter_syms = (opts->op == COPY_OP_STRIP_UNNEEDED ||
+ opts->op == COPY_OP_STRIP_ALL);
+ int rc = 1;
+
+ if (opts->op == COPY_OP_NONE) return 0;
+
+ /* Always drop debug sections for any strip op. */
+ nsec = cfree_obj_nsections(of);
+ for (i = 0; i < nsec; ++i) {
+ CfreeObjSecInfo si;
+ if (cfree_obj_section(of, i, &si) != CFREE_OK) continue;
+ if (si.kind == CFREE_SEC_DEBUG) cfree_obj_builder_remove_section(b, i);
+ }
+ if (!filter_syms) return 0;
+
+ /* Collect reloc-targeted sym ids, skipping relocs in debug sections. */
+ {
+ CfreeObjRelocIter* rit = NULL;
+ if (cfree_obj_reliter_new(of, &rit) != CFREE_OK) {
+ driver_errf(OBJCOPY_TOOL, "out of memory");
+ return 1;
+ }
+ for (;;) {
+ CfreeObjReloc r;
+ CfreeIterResult ir = cfree_obj_reliter_next(rit, &r);
+ uint32_t k;
+ int seen = 0;
+ if (ir != CFREE_ITER_ITEM) break;
+ if (r.sym == CFREE_OBJ_SYMBOL_NONE) continue;
+ if (r.section != CFREE_SECTION_NONE) {
+ CfreeObjSecInfo hi;
+ if (cfree_obj_section(of, r.section, &hi) == CFREE_OK &&
+ hi.kind == CFREE_SEC_DEBUG) {
+ continue;
+ }
+ }
+ for (k = 0; k < nneeded; ++k) {
+ if (needed[k] == r.sym) {
+ seen = 1;
+ break;
+ }
+ }
+ if (seen) continue;
+ if (nneeded >= cap_needed) {
+ uint32_t newcap = cap_needed ? cap_needed * 2u : 32u;
+ CfreeObjSymbol* nb = (CfreeObjSymbol*)driver_alloc_zeroed(
+ env, (size_t)newcap * sizeof(*nb));
+ if (!nb) {
+ cfree_obj_reliter_free(rit);
+ if (needed)
+ driver_free(env, needed, (size_t)cap_needed * sizeof(*needed));
+ driver_errf(OBJCOPY_TOOL, "out of memory");
+ return 1;
+ }
+ if (needed) {
+ memcpy(nb, needed, (size_t)nneeded * sizeof(*needed));
+ driver_free(env, needed, (size_t)cap_needed * sizeof(*needed));
+ }
+ needed = nb;
+ cap_needed = newcap;
+ }
+ needed[nneeded++] = r.sym;
+ }
+ cfree_obj_reliter_free(rit);
+ }
+
+ /* Walk syms and drop unneeded ones. */
+ if (cfree_obj_symiter_new(of, &sit) != CFREE_OK) {
+ driver_errf(OBJCOPY_TOOL, "out of memory");
+ goto done;
+ }
+ for (;;) {
+ CfreeObjSymInfo si;
+ CfreeIterResult ir = cfree_obj_symiter_next(sit, &si);
+ uint32_t k;
+ int in_needed = 0;
+ if (ir != CFREE_ITER_ITEM) break;
+ if (si.kind == CFREE_SK_UNDEF) continue;
+ for (k = 0; k < nneeded; ++k) {
+ if (needed[k] == si.id) {
+ in_needed = 1;
+ break;
+ }
+ }
+ if (!in_needed) cfree_obj_builder_remove_symbol(b, si.id);
+ }
+ cfree_obj_symiter_free(sit);
+ rc = 0;
+done:
+ if (needed) driver_free(env, needed, (size_t)cap_needed * sizeof(*needed));
+ return rc;
+}
+
+/* Apply --only-section: every section whose name isn't on the list is
+ * dropped. Sections of CFREE_SEC_TEXT/RODATA/DATA/BSS are user-visible;
+ * symbol-table / strtab / etc. are also affected. */
+static void apply_only_sections(CfreeObjFile* of, CfreeObjBuilder* b,
+ const CopyOpts* opts) {
+ uint32_t i, n;
+ if (!opts->nonly) return;
+ n = cfree_obj_nsections(of);
+ for (i = 0; i < n; ++i) {
+ CfreeObjSecInfo si;
+ if (cfree_obj_section(of, i, &si) != CFREE_OK) continue;
+ if (!name_in_list(si.name, opts->only_sections, opts->nonly)) {
+ cfree_obj_builder_remove_section(b, i);
+ }
+ }
+}
+
+static int run_transforms(DriverEnv* env, const CfreeContext* ctx,
+ CfreeObjFile* of, CfreeObjBuilder* b,
+ const CopyOpts* opts) {
+ uint32_t i;
+
+ /* --strip-* */
+ if (apply_strip_pass(env, of, b, opts) != 0) return 1;
+
+ /* --only-section overrides --remove-section because they're an inverse
+ * pair; if both are passed, --only-section's keep-set is authoritative. */
+ if (opts->nonly) {
+ apply_only_sections(of, b, opts);
+ } else if (opts->nremove) {
+ for (i = 0; i < opts->nremove; ++i) {
+ CfreeObjSection sid = find_sec_id(of, opts->remove_sections[i]);
+ if (sid != CFREE_SECTION_NONE)
+ cfree_obj_builder_remove_section(b, sid);
+ }
+ }
+
+ /* --rename-section */
+ for (i = 0; i < opts->nrename_sec; ++i) {
+ CfreeObjSection sid = find_sec_id(of, opts->rename_sections[i].old_name);
+ if (sid == CFREE_SECTION_NONE) {
+ driver_errf(OBJCOPY_TOOL, "rename-section: '%s' not found",
+ opts->rename_sections[i].old_name);
+ return 1;
+ }
+ CfreeSym ns = cfree_sym_intern(cfree_obj_builder_compiler(b),
+ opts->rename_sections[i].new_name);
+ cfree_obj_builder_rename_section(b, sid, ns);
+ }
+
+ /* --update-section */
+ for (i = 0; i < opts->nupdate; ++i) {
+ CfreeObjSection sid = find_sec_id(of, opts->update_sections[i].old_name);
+ CfreeFileData fd = {0};
+ if (sid == CFREE_SECTION_NONE) {
+ driver_errf(OBJCOPY_TOOL, "update-section: '%s' not found",
+ opts->update_sections[i].old_name);
+ return 1;
+ }
+ if (ctx->file_io->read_all(ctx->file_io->user,
+ opts->update_sections[i].new_name,
+ &fd) != CFREE_OK) {
+ driver_errf(OBJCOPY_TOOL, "update-section: cannot read %s",
+ opts->update_sections[i].new_name);
+ return 1;
+ }
+ cfree_obj_builder_section_replace_bytes(b, sid, fd.data, fd.size);
+ ctx->file_io->release(ctx->file_io->user, &fd);
+ }
+
+ /* --add-section: create a new SEC_OTHER PROGBITS section and write its
+ * contents from the on-disk file. */
+ for (i = 0; i < opts->nadd; ++i) {
+ CfreeObjSectionDesc desc;
+ CfreeObjSection nsid;
+ CfreeFileData fd = {0};
+ if (ctx->file_io->read_all(ctx->file_io->user,
+ opts->add_sections[i].new_name, &fd) !=
+ CFREE_OK) {
+ driver_errf(OBJCOPY_TOOL, "add-section: cannot read %s",
+ opts->add_sections[i].new_name);
+ return 1;
+ }
+ memset(&desc, 0, sizeof desc);
+ desc.name = cfree_sym_intern(cfree_obj_builder_compiler(b),
+ opts->add_sections[i].old_name);
+ desc.kind = CFREE_SEC_OTHER;
+ desc.flags = 0;
+ desc.align = 1;
+ desc.entsize = 0;
+ if (cfree_obj_builder_section(b, &desc, &nsid) != CFREE_OK) {
+ driver_errf(OBJCOPY_TOOL, "add-section: failed to create '%s'",
+ opts->add_sections[i].old_name);
+ ctx->file_io->release(ctx->file_io->user, &fd);
+ return 1;
+ }
+ cfree_obj_builder_write(b, nsid, fd.data, fd.size);
+ ctx->file_io->release(ctx->file_io->user, &fd);
+ }
+
+ /* --redefine-sym */
+ for (i = 0; i < opts->nredef; ++i) {
+ CfreeObjSymbol sid = find_sym_id(of, opts->redefine_syms[i].old_name);
+ if (sid == CFREE_OBJ_SYMBOL_NONE) continue; /* tolerate missing */
+ cfree_obj_builder_rename_symbol(
+ b, sid,
+ cfree_sym_intern(cfree_obj_builder_compiler(b),
+ opts->redefine_syms[i].new_name));
+ }
+
+ /* --globalize-symbol / --localize-symbol / --weaken-symbol */
+ for (i = 0; i < opts->nglob; ++i) {
+ CfreeObjSymbol sid = find_sym_id(of, opts->globalize[i]);
+ if (sid != CFREE_OBJ_SYMBOL_NONE)
+ cfree_obj_builder_symbol_set_bind(b, sid, CFREE_SB_GLOBAL);
+ }
+ for (i = 0; i < opts->nloc; ++i) {
+ CfreeObjSymbol sid = find_sym_id(of, opts->localize[i]);
+ if (sid != CFREE_OBJ_SYMBOL_NONE)
+ cfree_obj_builder_symbol_set_bind(b, sid, CFREE_SB_LOCAL);
+ }
+ for (i = 0; i < opts->nweak; ++i) {
+ CfreeObjSymbol sid = find_sym_id(of, opts->weaken[i]);
+ if (sid != CFREE_OBJ_SYMBOL_NONE)
+ cfree_obj_builder_symbol_set_bind(b, sid, CFREE_SB_WEAK);
+ }
+
+ return 0;
+}
+
+static int copy_one_object(DriverEnv* env, const CfreeContext* ctx,
+ const CfreeBytes* input, const CopyOpts* opts,
+ const char* output_path) {
+ CfreeObjFile* of = NULL;
+ CfreeObjBuilder* b;
+ CfreeWriter* w = NULL;
+ CfreeStatus st;
+ int rc = 1;
+
+ if (cfree_obj_open(ctx, input, &of) != CFREE_OK) {
+ driver_errf(OBJCOPY_TOOL, "%s: not a recognized object", input->name);
+ return 1;
+ }
+ b = cfree_obj_file_builder(of);
+ if (!b) {
+ driver_errf(OBJCOPY_TOOL, "%s: no builder", input->name);
+ cfree_obj_free(of);
+ return 1;
+ }
+ if (run_transforms(env, ctx, of, b, opts) != 0) {
+ cfree_obj_free(of);
+ return 1;
+ }
+ if (ctx->file_io->open_writer(ctx->file_io->user, output_path, &w) !=
+ CFREE_OK) {
+ driver_errf(OBJCOPY_TOOL, "cannot open %s", output_path);
+ cfree_obj_free(of);
+ return 1;
+ }
+ if (opts->have_output_fmt) {
+ st = cfree_obj_builder_emit_as(b, opts->output_fmt, w);
+ } else {
+ st = cfree_obj_builder_emit(b, w);
+ }
+ if (st != CFREE_OK) {
+ driver_errf(OBJCOPY_TOOL, "emit failed");
+ cfree_writer_close(w);
+ cfree_obj_free(of);
+ return 1;
+ }
+ cfree_writer_close(w);
+ cfree_obj_free(of);
+ rc = 0;
+ return rc;
+}
+
+int driver_objcopy(int argc, char** argv) {
+ DriverEnv env;
+ CfreeContext ctx;
+ CopyOpts opts;
+ CfreeFileData in_fd = {0};
+ CfreeBytes input;
+ int have_in = 0;
+ int rc = 1;
+ int i;
+ const char* out_path;
+
+ if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) {
+ driver_help_objcopy();
+ return 0;
+ }
+
+ memset(&opts, 0, sizeof opts);
+ opts.op = COPY_OP_NONE;
+ driver_env_init(&env);
+ ctx = driver_env_to_context(&env);
+
+ for (i = 1; i < argc; ++i) {
+ const char* a = argv[i];
+ const char* val = NULL;
+ int matched;
+ if (driver_streq(a, "--strip-debug")) {
+ opts.op = COPY_OP_STRIP_DEBUG;
+ continue;
+ }
+ if (driver_streq(a, "--strip-unneeded")) {
+ opts.op = COPY_OP_STRIP_UNNEEDED;
+ continue;
+ }
+ if (driver_streq(a, "--strip-all") || driver_streq(a, "-S")) {
+ opts.op = COPY_OP_STRIP_ALL;
+ continue;
+ }
+ if (driver_streq(a, "-O")) {
+ if (i + 1 >= argc) {
+ driver_errf(OBJCOPY_TOOL, "-O requires a format name");
+ rc = 2;
+ goto done;
+ }
+ if (parse_fmt_name(argv[++i], &opts.output_fmt) != 0) {
+ driver_errf(OBJCOPY_TOOL, "unknown output format: %s", argv[i]);
+ rc = 2;
+ goto done;
+ }
+ opts.have_output_fmt = 1;
+ continue;
+ }
+ matched = take_value(&i, argc, argv, "--remove-section", &val);
+ if (matched < 0) goto missing_value;
+ if (matched) {
+ if (push_str(&env, &opts.remove_sections, &opts.nremove, &opts.cap_remove,
+ val) != 0)
+ goto oom;
+ continue;
+ }
+ matched = take_value(&i, argc, argv, "--only-section", &val);
+ if (matched < 0) goto missing_value;
+ if (matched) {
+ if (push_str(&env, &opts.only_sections, &opts.nonly, &opts.cap_only,
+ val) != 0)
+ goto oom;
+ continue;
+ }
+ matched = take_value(&i, argc, argv, "--rename-section", &val);
+ if (matched < 0) goto missing_value;
+ if (matched) {
+ const char *left, *right;
+ if (split_pair(&env, val, &left, &right) != 0) {
+ driver_errf(OBJCOPY_TOOL, "rename-section: expected OLD=NEW (got %s)",
+ val);
+ rc = 2;
+ goto done;
+ }
+ if (push_pair(&env, &opts.rename_sections, &opts.nrename_sec,
+ &opts.cap_rename_sec, left, right) != 0)
+ goto oom;
+ continue;
+ }
+ matched = take_value(&i, argc, argv, "--add-section", &val);
+ if (matched < 0) goto missing_value;
+ if (matched) {
+ const char *left, *right;
+ if (split_pair(&env, val, &left, &right) != 0) {
+ driver_errf(OBJCOPY_TOOL, "add-section: expected NAME=FILE (got %s)",
+ val);
+ rc = 2;
+ goto done;
+ }
+ if (push_pair(&env, &opts.add_sections, &opts.nadd, &opts.cap_add, left,
+ right) != 0)
+ goto oom;
+ continue;
+ }
+ matched = take_value(&i, argc, argv, "--update-section", &val);
+ if (matched < 0) goto missing_value;
+ if (matched) {
+ const char *left, *right;
+ if (split_pair(&env, val, &left, &right) != 0) {
+ driver_errf(OBJCOPY_TOOL, "update-section: expected NAME=FILE (got %s)",
+ val);
+ rc = 2;
+ goto done;
+ }
+ if (push_pair(&env, &opts.update_sections, &opts.nupdate,
+ &opts.cap_update, left, right) != 0)
+ goto oom;
+ continue;
+ }
+ matched = take_value(&i, argc, argv, "--redefine-sym", &val);
+ if (matched < 0) goto missing_value;
+ if (matched) {
+ const char *left, *right;
+ if (split_pair(&env, val, &left, &right) != 0) {
+ driver_errf(OBJCOPY_TOOL, "redefine-sym: expected OLD=NEW (got %s)",
+ val);
+ rc = 2;
+ goto done;
+ }
+ if (push_pair(&env, &opts.redefine_syms, &opts.nredef, &opts.cap_redef,
+ left, right) != 0)
+ goto oom;
+ continue;
+ }
+ matched = take_value(&i, argc, argv, "--globalize-symbol", &val);
+ if (matched < 0) goto missing_value;
+ if (matched) {
+ if (push_str(&env, &opts.globalize, &opts.nglob, &opts.cap_glob, val) !=
+ 0)
+ goto oom;
+ continue;
+ }
+ matched = take_value(&i, argc, argv, "--localize-symbol", &val);
+ if (matched < 0) goto missing_value;
+ if (matched) {
+ if (push_str(&env, &opts.localize, &opts.nloc, &opts.cap_loc, val) != 0)
+ goto oom;
+ continue;
+ }
+ matched = take_value(&i, argc, argv, "--weaken-symbol", &val);
+ if (matched < 0) goto missing_value;
+ if (matched) {
+ if (push_str(&env, &opts.weaken, &opts.nweak, &opts.cap_weak, val) != 0)
+ goto oom;
+ continue;
+ }
+ if (a[0] == '-' && a[1] != '\0') {
+ driver_errf(OBJCOPY_TOOL, "unknown option: %s", a);
+ rc = 2;
+ goto done;
+ }
+ if (!opts.input) {
+ opts.input = a;
+ } else if (!opts.output) {
+ opts.output = a;
+ } else {
+ driver_errf(OBJCOPY_TOOL, "unexpected argument: %s", a);
+ rc = 2;
+ goto done;
+ }
+ }
+
+ if (!opts.input) {
+ driver_errf(OBJCOPY_TOOL, "missing input file");
+ rc = 2;
+ goto done;
+ }
+ out_path = opts.output ? opts.output : opts.input;
+
+ if (ctx.file_io->read_all(ctx.file_io->user, opts.input, &in_fd) !=
+ CFREE_OK) {
+ driver_errf(OBJCOPY_TOOL, "cannot read %s", opts.input);
+ goto done;
+ }
+ have_in = 1;
+ input.name = opts.input;
+ input.data = in_fd.data;
+ input.len = in_fd.size;
+
+ rc = copy_one_object(&env, &ctx, &input, &opts, out_path);
+
+done:
+ if (have_in) ctx.file_io->release(ctx.file_io->user, &in_fd);
+ if (opts.remove_sections)
+ driver_free(&env, (void*)opts.remove_sections,
+ (size_t)opts.cap_remove * sizeof(*opts.remove_sections));
+ if (opts.only_sections)
+ driver_free(&env, (void*)opts.only_sections,
+ (size_t)opts.cap_only * sizeof(*opts.only_sections));
+ if (opts.rename_sections)
+ driver_free(&env, opts.rename_sections,
+ (size_t)opts.cap_rename_sec * sizeof(*opts.rename_sections));
+ if (opts.add_sections)
+ driver_free(&env, opts.add_sections,
+ (size_t)opts.cap_add * sizeof(*opts.add_sections));
+ if (opts.update_sections)
+ driver_free(&env, opts.update_sections,
+ (size_t)opts.cap_update * sizeof(*opts.update_sections));
+ if (opts.redefine_syms)
+ driver_free(&env, opts.redefine_syms,
+ (size_t)opts.cap_redef * sizeof(*opts.redefine_syms));
+ if (opts.globalize)
+ driver_free(&env, (void*)opts.globalize,
+ (size_t)opts.cap_glob * sizeof(*opts.globalize));
+ if (opts.localize)
+ driver_free(&env, (void*)opts.localize,
+ (size_t)opts.cap_loc * sizeof(*opts.localize));
+ if (opts.weaken)
+ driver_free(&env, (void*)opts.weaken,
+ (size_t)opts.cap_weak * sizeof(*opts.weaken));
+ driver_env_fini(&env);
+ return rc;
+
+missing_value:
+ driver_errf(OBJCOPY_TOOL, "%s requires a value", argv[i]);
+ rc = 2;
+ goto done;
+oom:
+ driver_errf(OBJCOPY_TOOL, "out of memory");
+ rc = 1;
+ goto done;
+}
diff --git a/driver/ranlib.c b/driver/ranlib.c
@@ -5,6 +5,7 @@
#include <cfree/object.h>
#include "driver.h"
+#include "inputs.h"
/* `cfree ranlib` — refresh / add a System-V `/` symbol-index member at the
* head of an existing POSIX `ar` archive. Equivalent to `cfree ar s ARCHIVE`,
@@ -16,9 +17,8 @@
* member names are preserved via the `//` extended-name table. Reproducible
* output via SOURCE_DATE_EPOCH (same epoch handling as `cfree ar`).
*
- * Note: the per-member symbol-collection loop duplicates the body of
- * ar_do_write's `has_s` block (driver/ar.c). Factor into a shared helper
- * when adding strip / objcopy. */
+ * Per-member symbol collection lives in driver/inputs.c
+ * (driver_collect_obj_global_syms) — shared with ar / strip. */
#define RANLIB_TOOL "ranlib"
@@ -59,104 +59,6 @@ static uint64_t ranlib_epoch_from_env(void) {
return v;
}
-/* Walk an object member's symbol iterator and produce a heap-allocated
- * (name_arr[count], name_bytes) blob for CfreeArMemberSymbols. *blob_out is
- * NULL when the member is not a recognised object or has no exported
- * global symbols; *out_count is then 0. Mirrors driver/ar.c. */
-static int ranlib_collect_symbols(DriverEnv* env, const CfreeContext* ctx,
- const CfreeBytes* member, void** blob_out,
- size_t* blob_size_out, const char*** names_out,
- uint32_t* count_out) {
- CfreeObjFile* of = NULL;
- CfreeObjSymIter* it = NULL;
- CfreeObjSymInfo si;
- uint32_t count = 0;
- size_t name_bytes = 0;
- size_t alloc_sz;
- char* blob;
- const char** name_arr;
- char* name_storage;
- size_t cursor = 0;
-
- *blob_out = NULL;
- *blob_size_out = 0;
- *names_out = NULL;
- *count_out = 0;
-
- if (cfree_obj_open(ctx, member, &of) != CFREE_OK) return 0;
-
- if (cfree_obj_symiter_new(of, &it) != CFREE_OK) {
- cfree_obj_free(of);
- driver_errf(RANLIB_TOOL, "out of memory");
- return 1;
- }
- for (;;) {
- CfreeIterResult r = cfree_obj_symiter_next(it, &si);
- if (r != CFREE_ITER_ITEM) break;
- if (si.bind != CFREE_SB_GLOBAL) continue;
- if (si.section == CFREE_SECTION_NONE) continue;
- if (!si.name || !si.name[0]) continue;
- count += 1;
- {
- const char* p = si.name;
- while (*p++) ++name_bytes;
- name_bytes += 1;
- }
- }
- cfree_obj_symiter_free(it);
-
- if (count == 0) {
- cfree_obj_free(of);
- return 0;
- }
-
- alloc_sz = (size_t)count * sizeof(const char*) + name_bytes;
- blob = (char*)driver_alloc_zeroed(env, alloc_sz);
- if (!blob) {
- cfree_obj_free(of);
- driver_errf(RANLIB_TOOL, "out of memory");
- return 1;
- }
- name_arr = (const char**)blob;
- name_storage = blob + (size_t)count * sizeof(const char*);
-
- if (cfree_obj_symiter_new(of, &it) != CFREE_OK) {
- driver_free(env, blob, alloc_sz);
- cfree_obj_free(of);
- driver_errf(RANLIB_TOOL, "out of memory");
- return 1;
- }
- {
- uint32_t k = 0;
- for (;;) {
- CfreeIterResult r;
- const char* p;
- char* dst;
- if (k >= count) break;
- r = cfree_obj_symiter_next(it, &si);
- if (r != CFREE_ITER_ITEM) break;
- if (si.bind != CFREE_SB_GLOBAL) continue;
- if (si.section == CFREE_SECTION_NONE) continue;
- if (!si.name || !si.name[0]) continue;
- dst = name_storage + cursor;
- name_arr[k] = dst;
- for (p = si.name; *p; ++p) *dst++ = *p;
- *dst++ = '\0';
- cursor = (size_t)(dst - name_storage);
- k++;
- }
- count = k;
- }
- cfree_obj_symiter_free(it);
- cfree_obj_free(of);
-
- *blob_out = blob;
- *blob_size_out = alloc_sz;
- *names_out = name_arr;
- *count_out = count;
- return 0;
-}
-
int driver_ranlib(int argc, char** argv) {
DriverEnv env;
CfreeContext ctx;
@@ -288,8 +190,9 @@ int driver_ranlib(int argc, char** argv) {
size_t blob_size = 0;
const char** names = NULL;
uint32_t count = 0;
- if (ranlib_collect_symbols(&env, &ctx, &members[i], &blob, &blob_size,
- &names, &count) != 0) {
+ if (driver_collect_obj_global_syms(&env, &ctx, RANLIB_TOOL, &members[i],
+ &blob, &blob_size, &names,
+ &count) != 0) {
goto out;
}
sym_allocs[i] = blob;
diff --git a/driver/strip.c b/driver/strip.c
@@ -0,0 +1,661 @@
+#include "driver.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <cfree/archive.h>
+#include <cfree/core.h>
+#include <cfree/object.h>
+
+#include "inputs.h"
+
+/* `cfree strip` — drop debug sections and / or unwanted symbols from a
+ * relocatable object or static archive, then write the result back. Scope
+ * for the first cut matches the CTOOLCHAIN.md plan: relocatable .o and
+ * .a inputs only — linked ELF (ET_EXEC / ET_DYN) is rejected.
+ *
+ * Operations (the last one wins; default is --strip-all):
+ * --strip-debug drop sections whose kind is CFREE_SEC_DEBUG
+ * --strip-unneeded drop debug + symbols not referenced by any reloc
+ * --strip-all drop debug + every non-essential symbol (default)
+ *
+ * Filters applied on top of the operation:
+ * --keep-symbol=NAME, -K NAME keep NAME even if the operation would drop it
+ * --strip-symbol=NAME, -N NAME always drop NAME
+ *
+ * I/O:
+ * -o PATH write output to PATH (else rewrite the input in place)
+ */
+
+#define STRIP_TOOL "strip"
+
+void driver_help_strip(void) {
+ driver_printf(
+ "%s",
+ "cfree strip — drop debug sections and/or symbols\n"
+ "\n"
+ "USAGE\n"
+ " cfree strip [OPTIONS] FILE\n"
+ "\n"
+ "OPERATIONS (last one wins; default is --strip-all)\n"
+ " --strip-debug remove debug-info sections\n"
+ " --strip-unneeded remove debug + symbols not needed by relocs\n"
+ " --strip-all remove debug + all non-essential symbols\n"
+ "\n"
+ "SYMBOL FILTERS (may repeat)\n"
+ " --keep-symbol=NAME, -K NAME keep NAME even when the operation\n"
+ " would otherwise drop it\n"
+ " --strip-symbol=NAME, -N NAME always drop NAME\n"
+ "\n"
+ "OUTPUT\n"
+ " -o PATH write to PATH (default: rewrite FILE in place)\n"
+ "\n"
+ "INPUTS\n"
+ " FILE may be a relocatable .o or a static .a archive. Linked\n"
+ " executables / shared libraries are not supported yet.\n"
+ "\n"
+ "EXIT CODES\n"
+ " 0 success 1 I/O or strip error 2 bad usage\n");
+}
+
+typedef enum StripOp {
+ STRIP_OP_DEBUG,
+ STRIP_OP_UNNEEDED,
+ STRIP_OP_ALL,
+} StripOp;
+
+typedef struct StripOpts {
+ StripOp op;
+ const char** keep;
+ uint32_t nkeep;
+ uint32_t cap_keep;
+ const char** strip;
+ uint32_t nstrip;
+ uint32_t cap_strip;
+ const char* output;
+ const char* input;
+} StripOpts;
+
+static int name_in_list(const char* name, const char* const* list, uint32_t n) {
+ uint32_t i;
+ if (!name) return 0;
+ for (i = 0; i < n; ++i) {
+ if (list[i] && strcmp(list[i], name) == 0) return 1;
+ }
+ return 0;
+}
+
+static int push_name(DriverEnv* env, const char*** arr, uint32_t* n,
+ uint32_t* cap, const char* name) {
+ if (*n >= *cap) {
+ uint32_t newcap = *cap ? *cap * 2u : 8u;
+ const char** nb = (const char**)driver_alloc_zeroed(
+ env, (size_t)newcap * sizeof(*nb));
+ if (!nb) {
+ driver_errf(STRIP_TOOL, "out of memory");
+ return -1;
+ }
+ if (*arr) {
+ memcpy(nb, *arr, (size_t)(*n) * sizeof(*nb));
+ driver_free(env, (void*)*arr, (size_t)(*cap) * sizeof(*nb));
+ }
+ *arr = nb;
+ *cap = newcap;
+ }
+ (*arr)[(*n)++] = name;
+ return 0;
+}
+
+static int parse_name_arg(int* i, int argc, char** argv, const char* flag,
+ const char* short_flag, const char** out) {
+ const char* a = argv[*i];
+ size_t flen = strlen(flag);
+ /* --flag=NAME */
+ if (strncmp(a, flag, flen) == 0 && a[flen] == '=') {
+ *out = a + flen + 1;
+ return 1;
+ }
+ /* --flag NAME (no '='): treat as "next argv" form, used when --flag is
+ * passed without value. Not standard; skip. */
+ if (strcmp(a, flag) == 0) {
+ if (*i + 1 >= argc) return -1;
+ *out = argv[++(*i)];
+ return 1;
+ }
+ /* -K NAME / -N NAME */
+ if (short_flag && strcmp(a, short_flag) == 0) {
+ if (*i + 1 >= argc) return -1;
+ *out = argv[++(*i)];
+ return 1;
+ }
+ return 0;
+}
+
+/* Collect the set of CfreeObjSymbol ids targeted by any reloc whose
+ * containing section will survive emit — relocs inside the
+ * about-to-be-removed CFREE_SEC_DEBUG sections don't count. Otherwise a
+ * symbol that's referenced only from DWARF (e.g. main's debug_info entry)
+ * keeps every function symbol alive even though the on-disk relocs
+ * holding it won't make it to the output. */
+static int collect_needed_syms(DriverEnv* env, CfreeObjFile* of,
+ CfreeObjSymbol** needed_out, uint32_t* n_out,
+ uint32_t* cap_out) {
+ CfreeObjRelocIter* rit = NULL;
+ CfreeObjSymbol* arr = NULL;
+ uint32_t n = 0, cap = 0;
+
+ if (cfree_obj_reliter_new(of, &rit) != CFREE_OK) {
+ driver_errf(STRIP_TOOL, "out of memory");
+ return 1;
+ }
+ for (;;) {
+ CfreeObjReloc r;
+ CfreeIterResult ir = cfree_obj_reliter_next(rit, &r);
+ uint32_t k;
+ int seen = 0;
+ if (ir != CFREE_ITER_ITEM) break;
+ if (r.sym == CFREE_OBJ_SYMBOL_NONE) continue;
+ /* Skip relocs hosted in a debug section — that section is being
+ * dropped, so its relocs don't actually "need" their targets. */
+ if (r.section != CFREE_SECTION_NONE) {
+ CfreeObjSecInfo hi;
+ if (cfree_obj_section(of, r.section, &hi) == CFREE_OK &&
+ hi.kind == CFREE_SEC_DEBUG) {
+ continue;
+ }
+ }
+ for (k = 0; k < n; ++k) {
+ if (arr[k] == r.sym) {
+ seen = 1;
+ break;
+ }
+ }
+ if (seen) continue;
+ if (n >= cap) {
+ uint32_t newcap = cap ? cap * 2u : 32u;
+ CfreeObjSymbol* nb = (CfreeObjSymbol*)driver_alloc_zeroed(
+ env, (size_t)newcap * sizeof(*nb));
+ if (!nb) {
+ cfree_obj_reliter_free(rit);
+ if (arr) driver_free(env, arr, (size_t)cap * sizeof(*arr));
+ driver_errf(STRIP_TOOL, "out of memory");
+ return 1;
+ }
+ if (arr) {
+ memcpy(nb, arr, (size_t)n * sizeof(*arr));
+ driver_free(env, arr, (size_t)cap * sizeof(*arr));
+ }
+ arr = nb;
+ cap = newcap;
+ }
+ arr[n++] = r.sym;
+ }
+ cfree_obj_reliter_free(rit);
+ *needed_out = arr;
+ *n_out = n;
+ *cap_out = cap;
+ return 0;
+}
+
+static int id_in_set(CfreeObjSymbol id, const CfreeObjSymbol* arr, uint32_t n) {
+ uint32_t i;
+ for (i = 0; i < n; ++i) {
+ if (arr[i] == id) return 1;
+ }
+ return 0;
+}
+
+/* The core strip pass: drop debug sections, then walk symbols and apply
+ * keep/strip lists and the operation policy. Mutations are issued
+ * against the builder; emit-time sweep cleans up cascades (orphan
+ * relocs against removed sections, dropped group memberships, etc.). */
+static int strip_one_builder(DriverEnv* env, CfreeObjFile* of,
+ CfreeObjBuilder* b, const StripOpts* opts) {
+ uint32_t i, nsec;
+ int filter_syms = (opts->op == STRIP_OP_UNNEEDED ||
+ opts->op == STRIP_OP_ALL);
+ CfreeObjSymbol* needed = NULL;
+ uint32_t nneeded = 0, cap_needed = 0;
+ CfreeObjSymIter* sit = NULL;
+ int rc = 1;
+
+ /* Step 1: drop debug sections (every supported op does this). */
+ nsec = cfree_obj_nsections(of);
+ for (i = 0; i < nsec; ++i) {
+ CfreeObjSecInfo si;
+ if (cfree_obj_section(of, i, &si) != CFREE_OK) continue;
+ if (si.kind == CFREE_SEC_DEBUG) {
+ cfree_obj_builder_remove_section(b, i);
+ }
+ }
+
+ /* Step 2: compute the needed-sym set. */
+ if (filter_syms) {
+ if (collect_needed_syms(env, of, &needed, &nneeded, &cap_needed) != 0) {
+ return 1;
+ }
+ }
+
+ /* Step 3: walk symbols and apply filters. */
+ if (cfree_obj_symiter_new(of, &sit) != CFREE_OK) {
+ driver_errf(STRIP_TOOL, "out of memory");
+ goto done;
+ }
+ for (;;) {
+ CfreeObjSymInfo si;
+ CfreeIterResult ir = cfree_obj_symiter_next(sit, &si);
+ int drop = 0;
+ if (ir != CFREE_ITER_ITEM) break;
+ /* --strip-symbol wins over --keep-symbol if both list the same name. */
+ if (opts->nstrip && name_in_list(si.name, opts->strip, opts->nstrip)) {
+ drop = 1;
+ } else if (opts->nkeep && name_in_list(si.name, opts->keep, opts->nkeep)) {
+ drop = 0;
+ } else if (filter_syms) {
+ /* Keep undefined externals so the .o stays linkable; keep symbols
+ * targeted by a surviving reloc; drop everything else. Note that
+ * section symbols defined in removed debug sections are already
+ * tombstoned by the emit-time sweep cascade — no explicit handling
+ * needed here. */
+ if (si.kind == CFREE_SK_UNDEF) {
+ drop = 0;
+ } else if (id_in_set(si.id, needed, nneeded)) {
+ drop = 0;
+ } else {
+ drop = 1;
+ }
+ }
+ if (drop) {
+ cfree_obj_builder_remove_symbol(b, si.id);
+ }
+ }
+ cfree_obj_symiter_free(sit);
+ rc = 0;
+
+done:
+ if (needed) driver_free(env, needed, (size_t)cap_needed * sizeof(*needed));
+ return rc;
+}
+
+static int strip_object_bytes(DriverEnv* env, const CfreeContext* ctx,
+ const CfreeBytes* input, const StripOpts* opts,
+ uint8_t** out_data, size_t* out_size) {
+ CfreeObjFile* of = NULL;
+ CfreeObjBuilder* b;
+ CfreeWriter* w = NULL;
+ size_t n = 0;
+ const uint8_t* data;
+ uint8_t* copy;
+ int rc = 1;
+
+ *out_data = NULL;
+ *out_size = 0;
+
+ if (cfree_obj_open(ctx, input, &of) != CFREE_OK) {
+ driver_errf(STRIP_TOOL, "%s: not a recognized object", input->name);
+ return 1;
+ }
+ b = cfree_obj_file_builder(of);
+ if (!b) {
+ driver_errf(STRIP_TOOL, "%s: no builder for object", input->name);
+ cfree_obj_free(of);
+ return 1;
+ }
+
+ if (strip_one_builder(env, of, b, opts) != 0) {
+ cfree_obj_free(of);
+ return 1;
+ }
+
+ if (cfree_writer_mem(env->heap, &w) != CFREE_OK || !w) {
+ driver_errf(STRIP_TOOL, "out of memory");
+ cfree_obj_free(of);
+ return 1;
+ }
+ if (cfree_obj_builder_emit(b, w) != CFREE_OK) {
+ driver_errf(STRIP_TOOL, "%s: emit failed", input->name);
+ cfree_writer_close(w);
+ cfree_obj_free(of);
+ return 1;
+ }
+ data = cfree_writer_mem_bytes(w, &n);
+ copy = (uint8_t*)driver_alloc(env, n ? n : 1u);
+ if (!copy) {
+ driver_errf(STRIP_TOOL, "out of memory");
+ cfree_writer_close(w);
+ cfree_obj_free(of);
+ return 1;
+ }
+ if (n) memcpy(copy, data, n);
+ cfree_writer_close(w);
+ cfree_obj_free(of);
+
+ *out_data = copy;
+ *out_size = n;
+ rc = 0;
+ return rc;
+}
+
+static uint64_t strip_epoch_from_env(void) {
+ const char* s = driver_getenv("SOURCE_DATE_EPOCH");
+ uint64_t v = 0;
+ if (!s || !*s) return 0;
+ for (; *s; ++s) {
+ if (*s < '0' || *s > '9') return 0;
+ v = v * 10 + (uint64_t)(*s - '0');
+ }
+ return v;
+}
+
+/* Strip every object member of an archive, write a fresh archive with
+ * a refreshed System-V symbol index. Non-object members pass through
+ * unchanged. */
+static int strip_archive(DriverEnv* env, const CfreeContext* ctx,
+ const CfreeBytes* input, const StripOpts* opts,
+ const char* output_path) {
+ CfreeArIter* it = NULL;
+ CfreeArMember m;
+ CfreeBytes* members = NULL;
+ uint8_t** owned_data = NULL;
+ size_t* owned_size = NULL;
+ char* name_storage = NULL;
+ size_t name_bytes_total = 0;
+ uint32_t nmembers = 0, k;
+ CfreeArMemberSymbols* msyms = NULL;
+ void** sym_allocs = NULL;
+ size_t* sym_alloc_szs = NULL;
+ CfreeWriter* out = NULL;
+ CfreeArWriteOptions opts_ar = {0};
+ int rc = 1;
+
+ /* Pass 1: count members + total name bytes. */
+ if (cfree_ar_iter_new(ctx, input, &it) != CFREE_OK) {
+ driver_errf(STRIP_TOOL, "%s: not an archive", input->name);
+ return 1;
+ }
+ for (;;) {
+ CfreeIterResult r = cfree_ar_iter_next(it, &m);
+ if (r != CFREE_ITER_ITEM) break;
+ nmembers++;
+ name_bytes_total += driver_strlen(m.name) + 1;
+ }
+ cfree_ar_iter_free(it);
+ it = NULL;
+
+ if (nmembers) {
+ members = (CfreeBytes*)driver_alloc_zeroed(
+ env, (size_t)nmembers * sizeof(*members));
+ owned_data = (uint8_t**)driver_alloc_zeroed(
+ env, (size_t)nmembers * sizeof(*owned_data));
+ owned_size = (size_t*)driver_alloc_zeroed(
+ env, (size_t)nmembers * sizeof(*owned_size));
+ if (!members || !owned_data || !owned_size) {
+ driver_errf(STRIP_TOOL, "out of memory");
+ goto done;
+ }
+ }
+ if (name_bytes_total) {
+ name_storage = (char*)driver_alloc_zeroed(env, name_bytes_total);
+ if (!name_storage) {
+ driver_errf(STRIP_TOOL, "out of memory");
+ goto done;
+ }
+ }
+
+ /* Pass 2: walk members; strip object members, pass others through. */
+ if (cfree_ar_iter_new(ctx, input, &it) != CFREE_OK) {
+ driver_errf(STRIP_TOOL, "iter re-open failed");
+ goto done;
+ }
+ {
+ size_t cursor = 0;
+ k = 0;
+ while (k < nmembers) {
+ CfreeIterResult r = cfree_ar_iter_next(it, &m);
+ const char* p;
+ char* dst;
+ CfreeBinFmt fmt;
+ CfreeBytes mbytes;
+ if (r != CFREE_ITER_ITEM) break;
+ dst = name_storage + cursor;
+ for (p = m.name; *p; ++p) *dst++ = *p;
+ *dst++ = '\0';
+ members[k].name = name_storage + cursor;
+ cursor = (size_t)(dst - name_storage);
+
+ mbytes.name = members[k].name;
+ mbytes.data = m.data;
+ mbytes.len = m.size;
+ fmt = cfree_detect_fmt(m.data, m.size);
+ if (fmt == CFREE_BIN_ELF || fmt == CFREE_BIN_COFF ||
+ fmt == CFREE_BIN_MACHO || fmt == CFREE_BIN_WASM) {
+ uint8_t* sd = NULL;
+ size_t ss = 0;
+ if (strip_object_bytes(env, ctx, &mbytes, opts, &sd, &ss) != 0) {
+ cfree_ar_iter_free(it);
+ it = NULL;
+ goto done;
+ }
+ owned_data[k] = sd;
+ owned_size[k] = ss;
+ members[k].data = sd;
+ members[k].len = ss;
+ } else {
+ members[k].data = m.data;
+ members[k].len = m.size;
+ }
+ k++;
+ }
+ }
+ cfree_ar_iter_free(it);
+ it = NULL;
+
+ /* Pass 3: rebuild the System-V symbol index from the new bytes. */
+ if (nmembers) {
+ msyms = (CfreeArMemberSymbols*)driver_alloc_zeroed(
+ env, (size_t)nmembers * sizeof(*msyms));
+ sym_allocs = (void**)driver_alloc_zeroed(
+ env, (size_t)nmembers * sizeof(*sym_allocs));
+ sym_alloc_szs = (size_t*)driver_alloc_zeroed(
+ env, (size_t)nmembers * sizeof(*sym_alloc_szs));
+ if (!msyms || !sym_allocs || !sym_alloc_szs) {
+ driver_errf(STRIP_TOOL, "out of memory");
+ goto done;
+ }
+ for (k = 0; k < nmembers; ++k) {
+ void* blob = NULL;
+ size_t blob_size = 0;
+ const char** names = NULL;
+ uint32_t count = 0;
+ if (driver_collect_obj_global_syms(env, ctx, STRIP_TOOL, &members[k],
+ &blob, &blob_size, &names,
+ &count) != 0) {
+ goto done;
+ }
+ sym_allocs[k] = blob;
+ sym_alloc_szs[k] = blob_size;
+ msyms[k].names = names;
+ msyms[k].count = count;
+ }
+ }
+
+ if (ctx->file_io->open_writer(ctx->file_io->user, output_path, &out) !=
+ CFREE_OK) {
+ driver_errf(STRIP_TOOL, "failed to open: %s", output_path);
+ goto done;
+ }
+ opts_ar.epoch = strip_epoch_from_env();
+ opts_ar.long_names = 1;
+ opts_ar.symbol_index = 1;
+ opts_ar.member_symbols = msyms;
+ rc = cfree_ar_write(out, members, nmembers, &opts_ar) == CFREE_OK ? 0 : 1;
+ if (rc == 0 && cfree_writer_status(out) != CFREE_OK) rc = 1;
+
+done:
+ if (out) cfree_writer_close(out);
+ if (it) cfree_ar_iter_free(it);
+ if (sym_allocs) {
+ for (k = 0; k < nmembers; ++k) {
+ if (sym_allocs[k])
+ driver_collect_obj_global_syms_free(env, sym_allocs[k],
+ sym_alloc_szs[k]);
+ }
+ driver_free(env, sym_allocs, (size_t)nmembers * sizeof(*sym_allocs));
+ }
+ if (sym_alloc_szs)
+ driver_free(env, sym_alloc_szs,
+ (size_t)nmembers * sizeof(*sym_alloc_szs));
+ if (msyms) driver_free(env, msyms, (size_t)nmembers * sizeof(*msyms));
+ if (owned_data) {
+ for (k = 0; k < nmembers; ++k) {
+ if (owned_data[k]) driver_free(env, owned_data[k], owned_size[k]);
+ }
+ driver_free(env, owned_data, (size_t)nmembers * sizeof(*owned_data));
+ }
+ if (owned_size)
+ driver_free(env, owned_size, (size_t)nmembers * sizeof(*owned_size));
+ if (members) driver_free(env, members, (size_t)nmembers * sizeof(*members));
+ if (name_storage) driver_free(env, name_storage, name_bytes_total);
+ return rc;
+}
+
+int driver_strip(int argc, char** argv) {
+ DriverEnv env;
+ CfreeContext ctx;
+ StripOpts opts;
+ CfreeFileData input_fd = {0};
+ CfreeBytes input;
+ CfreeWriter* w = NULL;
+ uint8_t* out_data = NULL;
+ size_t out_size = 0;
+ int have_input = 0;
+ int rc = 1;
+ int i;
+
+ if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) {
+ driver_help_strip();
+ return 0;
+ }
+
+ memset(&opts, 0, sizeof opts);
+ opts.op = STRIP_OP_ALL;
+ driver_env_init(&env);
+ ctx = driver_env_to_context(&env);
+
+ for (i = 1; i < argc; ++i) {
+ const char* a = argv[i];
+ const char* val = NULL;
+ int matched;
+ if (driver_streq(a, "--strip-debug")) {
+ opts.op = STRIP_OP_DEBUG;
+ continue;
+ }
+ if (driver_streq(a, "--strip-unneeded")) {
+ opts.op = STRIP_OP_UNNEEDED;
+ continue;
+ }
+ if (driver_streq(a, "--strip-all") || driver_streq(a, "-s")) {
+ opts.op = STRIP_OP_ALL;
+ continue;
+ }
+ if (driver_streq(a, "-o")) {
+ if (i + 1 >= argc) {
+ driver_errf(STRIP_TOOL, "-o requires a path");
+ rc = 2;
+ goto done;
+ }
+ opts.output = argv[++i];
+ continue;
+ }
+ matched = parse_name_arg(&i, argc, argv, "--keep-symbol", "-K", &val);
+ if (matched < 0) {
+ driver_errf(STRIP_TOOL, "%s requires a symbol name", a);
+ rc = 2;
+ goto done;
+ }
+ if (matched) {
+ if (push_name(&env, &opts.keep, &opts.nkeep, &opts.cap_keep, val) != 0) {
+ rc = 1;
+ goto done;
+ }
+ continue;
+ }
+ matched = parse_name_arg(&i, argc, argv, "--strip-symbol", "-N", &val);
+ if (matched < 0) {
+ driver_errf(STRIP_TOOL, "%s requires a symbol name", a);
+ rc = 2;
+ goto done;
+ }
+ if (matched) {
+ if (push_name(&env, &opts.strip, &opts.nstrip, &opts.cap_strip, val) !=
+ 0) {
+ rc = 1;
+ goto done;
+ }
+ continue;
+ }
+ if (a[0] == '-' && a[1] != '\0') {
+ driver_errf(STRIP_TOOL, "unknown option: %s", a);
+ rc = 2;
+ goto done;
+ }
+ if (opts.input) {
+ driver_errf(STRIP_TOOL, "only one input file is supported");
+ rc = 2;
+ goto done;
+ }
+ opts.input = a;
+ }
+
+ if (!opts.input) {
+ driver_errf(STRIP_TOOL, "missing input file");
+ rc = 2;
+ goto done;
+ }
+
+ if (ctx.file_io->read_all(ctx.file_io->user, opts.input, &input_fd) !=
+ CFREE_OK) {
+ driver_errf(STRIP_TOOL, "failed to read: %s", opts.input);
+ goto done;
+ }
+ have_input = 1;
+ input.name = opts.input;
+ input.data = input_fd.data;
+ input.len = input_fd.size;
+
+ {
+ CfreeBinFmt fmt = cfree_detect_fmt(input.data, input.len);
+ const char* out_path = opts.output ? opts.output : opts.input;
+ if (fmt == CFREE_BIN_AR) {
+ rc = strip_archive(&env, &ctx, &input, &opts, out_path);
+ goto done;
+ }
+ if (strip_object_bytes(&env, &ctx, &input, &opts, &out_data, &out_size) !=
+ 0) {
+ goto done;
+ }
+ if (ctx.file_io->open_writer(ctx.file_io->user, out_path, &w) !=
+ CFREE_OK) {
+ driver_errf(STRIP_TOOL, "failed to open: %s", out_path);
+ goto done;
+ }
+ cfree_writer_write(w, out_data, out_size);
+ if (cfree_writer_status(w) != CFREE_OK) {
+ driver_errf(STRIP_TOOL, "write failed: %s", out_path);
+ goto done;
+ }
+ rc = 0;
+ }
+
+done:
+ if (w) cfree_writer_close(w);
+ if (out_data) driver_free(&env, out_data, out_size);
+ if (have_input) ctx.file_io->release(ctx.file_io->user, &input_fd);
+ if (opts.keep)
+ driver_free(&env, (void*)opts.keep, (size_t)opts.cap_keep * sizeof(*opts.keep));
+ if (opts.strip)
+ driver_free(&env, (void*)opts.strip,
+ (size_t)opts.cap_strip * sizeof(*opts.strip));
+ driver_env_fini(&env);
+ return rc;
+}
diff --git a/include/cfree/core.h b/include/cfree/core.h
@@ -126,6 +126,13 @@ typedef enum CfreeSymKind {
CFREE_SK_IFUNC,
} CfreeSymKind;
+typedef enum CfreeSymVis {
+ CFREE_SV_DEFAULT,
+ CFREE_SV_HIDDEN,
+ CFREE_SV_PROTECTED,
+ CFREE_SV_INTERNAL,
+} CfreeSymVis;
+
typedef struct CfreePathPrefixMap {
const char *old_prefix;
const char *new_prefix;
diff --git a/include/cfree/object.h b/include/cfree/object.h
@@ -70,6 +70,9 @@ typedef struct CfreeObjSecInfo {
typedef struct CfreeObjSymInfo {
const char *name;
+ CfreeObjSymbol id; /* stable handle within this CfreeObjFile / builder;
+ usable as the target of cfree_obj_builder_remove_symbol,
+ rename_symbol, etc. */
CfreeSymBind bind;
CfreeSymKind kind;
CfreeObjSection section;
@@ -119,6 +122,12 @@ typedef struct CfreeObjRelocDesc {
CfreeStatus cfree_obj_builder_new(CfreeCompiler *, CfreeObjBuilder **out);
void cfree_obj_builder_free(CfreeObjBuilder *);
+/* Returns the CfreeCompiler this builder is bound to. Used by callers
+ * that hold a builder via cfree_obj_file_builder (the reader-side path)
+ * and need to intern strings into the matching Sym pool before issuing
+ * mutator calls. */
+CfreeCompiler *cfree_obj_builder_compiler(CfreeObjBuilder *);
+
CfreeStatus cfree_obj_builder_section(CfreeObjBuilder *,
const CfreeObjSectionDesc *,
CfreeObjSection *out);
@@ -158,6 +167,44 @@ CfreeStatus cfree_obj_builder_group_add_section(CfreeObjBuilder *,
CfreeStatus cfree_obj_builder_finalize(CfreeObjBuilder *);
CfreeStatus cfree_obj_builder_emit(CfreeObjBuilder *, CfreeWriter *);
+/* Emit using a caller-specified output format instead of the builder's
+ * own target.obj. Used by `objcopy -O <bfdname>` to convert between
+ * ELF / Mach-O / COFF / Wasm at the same arch. Returns
+ * CFREE_UNSUPPORTED when the active arch doesn't have a backend for
+ * the requested format (e.g. RISC-V → Mach-O). */
+CfreeStatus cfree_obj_builder_emit_as(CfreeObjBuilder *, CfreeObjFmt,
+ CfreeWriter *);
+
+/* ============================================================
+ * Mutators (strip / objcopy support)
+ * ============================================================
+ *
+ * Mark a section/symbol/group as removed, rename it, change a symbol's
+ * bind or visibility, or replace a section's bytes wholesale. Removed
+ * entries (and any cascade fallout) are filtered at emit time by the
+ * internal sweep; ids stay stable across mutations.
+ *
+ * Pre-existing readers obtained via cfree_obj_file_builder remain valid
+ * mutation targets — the reader produces an already-finalized builder
+ * and mutators are legal on it.
+ */
+CfreeStatus cfree_obj_builder_remove_section(CfreeObjBuilder *,
+ CfreeObjSection);
+CfreeStatus cfree_obj_builder_remove_symbol(CfreeObjBuilder *, CfreeObjSymbol);
+CfreeStatus cfree_obj_builder_remove_group(CfreeObjBuilder *, CfreeObjGroup);
+CfreeStatus cfree_obj_builder_rename_section(CfreeObjBuilder *,
+ CfreeObjSection, CfreeSym new_name);
+CfreeStatus cfree_obj_builder_rename_symbol(CfreeObjBuilder *, CfreeObjSymbol,
+ CfreeSym new_name);
+CfreeStatus cfree_obj_builder_symbol_set_bind(CfreeObjBuilder *,
+ CfreeObjSymbol, CfreeSymBind);
+CfreeStatus cfree_obj_builder_symbol_set_vis(CfreeObjBuilder *, CfreeObjSymbol,
+ CfreeSymVis);
+CfreeStatus cfree_obj_builder_section_replace_bytes(CfreeObjBuilder *,
+ CfreeObjSection,
+ const void *data,
+ size_t n);
+
/* ============================================================
* Reader / inspection
* ============================================================ */
@@ -174,6 +221,17 @@ typedef enum CfreeBinFmt {
typedef struct CfreeObjSymIter CfreeObjSymIter;
typedef struct CfreeObjRelocIter CfreeObjRelocIter;
+typedef struct CfreeObjGroupIter CfreeObjGroupIter;
+
+typedef struct CfreeObjGroupInfo {
+ const char *name;
+ CfreeObjSymbol signature; /* COMDAT key, or CFREE_OBJ_SYMBOL_NONE */
+ uint32_t flags; /* CfreeObjGroupFlag */
+ uint32_t nsections;
+ /* Borrowed; valid until the next groupiter_next call or _free. Members
+ * pointing at the OBJ_SEC_NONE sentinel are reported as CFREE_SECTION_NONE. */
+ const CfreeObjSection *sections;
+} CfreeObjGroupInfo;
CfreeBinFmt cfree_detect_fmt(const uint8_t *data, size_t len);
CfreeStatus cfree_detect_target(const uint8_t *data, size_t len,
@@ -207,6 +265,13 @@ CfreeIterResult cfree_obj_reliter_next(CfreeObjRelocIter *,
CfreeObjReloc *out);
void cfree_obj_reliter_free(CfreeObjRelocIter *);
+/* Section-group iteration (ELF SHT_GROUP / COMDAT and friends). Empty
+ * for formats / objects that carry no groups. */
+CfreeStatus cfree_obj_groupiter_new(CfreeObjFile *, CfreeObjGroupIter **out);
+CfreeIterResult cfree_obj_groupiter_next(CfreeObjGroupIter *,
+ CfreeObjGroupInfo *out);
+void cfree_obj_groupiter_free(CfreeObjGroupIter *);
+
/* Roundtrip: open an object via cfree_obj_open, then hand its underlying
* builder back. The builder is the same one the reader populated; it is
* already finalized, so callers may inspect it (e.g. iterate sections via
@@ -214,11 +279,10 @@ void cfree_obj_reliter_free(CfreeObjRelocIter *);
* cfree_obj_builder_emit to re-serialize the file. The builder lifetime is
* tied to the CfreeObjFile; do not call cfree_obj_builder_free on it.
*
- * Mutation after open (add section, redefine symbol, etc.) is not currently
- * supported — the read path closes the builder via obj_finalize, and the
- * builder API rejects post-finalize writes. Filtered roundtrip (strip /
- * objcopy --remove-section / --redefine-sym) needs a separate mutator
- * surface that does not yet exist. */
+ * Mutation is supported via the cfree_obj_builder_remove_* / rename_* /
+ * symbol_set_* / section_replace_bytes calls above. Drops and renames
+ * are cheap field writes; emit applies the cascade (drop relocs against
+ * removed sections, etc.) automatically. */
CfreeObjBuilder *cfree_obj_file_builder(const CfreeObjFile *);
#endif
diff --git a/src/api/object_builder.c b/src/api/object_builder.c
@@ -50,6 +50,10 @@ void cfree_obj_builder_free(CfreeObjBuilder* b) {
if (b) obj_free(b);
}
+CfreeCompiler* cfree_obj_builder_compiler(CfreeObjBuilder* b) {
+ return b ? (CfreeCompiler*)obj_compiler(b) : NULL;
+}
+
CfreeStatus cfree_obj_builder_section(CfreeObjBuilder* b,
const CfreeObjSectionDesc* desc,
CfreeObjSection* out) {
@@ -189,12 +193,85 @@ CfreeStatus cfree_obj_builder_finalize(CfreeObjBuilder* b) {
return CFREE_OK;
}
+/* ---- mutators ---- */
+
+CfreeStatus cfree_obj_builder_remove_section(CfreeObjBuilder* b,
+ CfreeObjSection sec) {
+ if (!b) return CFREE_INVALID;
+ obj_section_remove(b, pub_to_intern_sec(sec));
+ return CFREE_OK;
+}
+
+CfreeStatus cfree_obj_builder_remove_symbol(CfreeObjBuilder* b,
+ CfreeObjSymbol sym) {
+ if (!b) return CFREE_INVALID;
+ obj_symbol_remove(b, pub_to_intern_sym(sym));
+ return CFREE_OK;
+}
+
+CfreeStatus cfree_obj_builder_remove_group(CfreeObjBuilder* b,
+ CfreeObjGroup grp) {
+ if (!b) return CFREE_INVALID;
+ obj_group_remove(b, pub_to_intern_group(grp));
+ return CFREE_OK;
+}
+
+CfreeStatus cfree_obj_builder_rename_section(CfreeObjBuilder* b,
+ CfreeObjSection sec,
+ CfreeSym new_name) {
+ if (!b) return CFREE_INVALID;
+ obj_section_rename(b, pub_to_intern_sec(sec), (Sym)new_name);
+ return CFREE_OK;
+}
+
+CfreeStatus cfree_obj_builder_rename_symbol(CfreeObjBuilder* b,
+ CfreeObjSymbol sym,
+ CfreeSym new_name) {
+ if (!b) return CFREE_INVALID;
+ obj_symbol_rename(b, pub_to_intern_sym(sym), (Sym)new_name);
+ return CFREE_OK;
+}
+
+CfreeStatus cfree_obj_builder_symbol_set_bind(CfreeObjBuilder* b,
+ CfreeObjSymbol sym,
+ CfreeSymBind bind) {
+ if (!b) return CFREE_INVALID;
+ obj_symbol_set_bind(b, pub_to_intern_sym(sym), (SymBind)bind);
+ return CFREE_OK;
+}
+
+CfreeStatus cfree_obj_builder_symbol_set_vis(CfreeObjBuilder* b,
+ CfreeObjSymbol sym,
+ CfreeSymVis vis) {
+ if (!b) return CFREE_INVALID;
+ obj_symbol_set_vis(b, pub_to_intern_sym(sym), (SymVis)vis);
+ return CFREE_OK;
+}
+
+CfreeStatus cfree_obj_builder_section_replace_bytes(CfreeObjBuilder* b,
+ CfreeObjSection sec,
+ const void* data,
+ size_t n) {
+ if (!b) return CFREE_INVALID;
+ obj_section_replace_bytes(b, pub_to_intern_sec(sec), (const u8*)data, n);
+ return CFREE_OK;
+}
+
CfreeStatus cfree_obj_builder_emit(CfreeObjBuilder* b, CfreeWriter* w) {
Compiler* c;
if (!b || !w) return CFREE_INVALID;
c = obj_compiler(b);
if (!c) return CFREE_INVALID;
- switch (c->target.obj) {
+ return cfree_obj_builder_emit_as(b, c->target.obj, w);
+}
+
+CfreeStatus cfree_obj_builder_emit_as(CfreeObjBuilder* b, CfreeObjFmt fmt,
+ CfreeWriter* w) {
+ Compiler* c;
+ if (!b || !w) return CFREE_INVALID;
+ c = obj_compiler(b);
+ if (!c) return CFREE_INVALID;
+ switch (fmt) {
case CFREE_OBJ_ELF:
emit_elf(c, b, w);
break;
diff --git a/src/api/object_file.c b/src/api/object_file.c
@@ -197,9 +197,10 @@ CfreeStatus cfree_obj_section_by_name(const CfreeObjFile* f, const char* name,
return CFREE_NOT_FOUND;
}
-static void fill_syminfo(const CfreeObjFile* f, const ObjSym* sym,
+static void fill_syminfo(const CfreeObjFile* f, ObjSymId id, const ObjSym* sym,
CfreeObjSymInfo* out) {
out->name = sym->name ? pool_str(f->compiler.global, sym->name, NULL) : "";
+ out->id = (id != OBJ_SYM_NONE) ? (CfreeObjSymbol)id : CFREE_OBJ_SYMBOL_NONE;
out->bind = (CfreeSymBind)sym->bind;
out->kind = (CfreeSymKind)sym->kind;
out->section = sym->section_id != OBJ_SEC_NONE
@@ -221,7 +222,7 @@ CfreeStatus cfree_obj_symbol_by_name(const CfreeObjFile* f, const char* name,
if (!e.sym || !e.sym->name) continue;
nm = pool_str(f->compiler.global, e.sym->name, NULL);
if (nm && strcmp(nm, name) == 0) {
- fill_syminfo(f, e.sym, out);
+ fill_syminfo(f, e.id, e.sym, out);
obj_symiter_free(it);
return CFREE_OK;
}
@@ -257,7 +258,7 @@ CfreeIterResult cfree_obj_symiter_next(CfreeObjSymIter* it,
ObjSymEntry entry;
if (!it || !out) return CFREE_ITER_ERROR;
if (!obj_symiter_next(it->inner, &entry)) return CFREE_ITER_END;
- fill_syminfo(it->file, entry.sym, out);
+ fill_syminfo(it->file, entry.id, entry.sym, out);
return CFREE_ITER_ITEM;
}
@@ -327,6 +328,77 @@ void cfree_obj_reliter_free(CfreeObjRelocIter* it) {
h->free(h, it, sizeof(*it));
}
+struct CfreeObjGroupIter {
+ CfreeObjFile* file;
+ ObjGroupIter* inner;
+ /* Translation scratch for the borrowed `sections` slice handed back to
+ * the caller. Lazily grown to the largest group's nsections. */
+ CfreeObjSection* secbuf;
+ u32 seccap;
+};
+
+CfreeStatus cfree_obj_groupiter_new(CfreeObjFile* f, CfreeObjGroupIter** out) {
+ Heap* h;
+ CfreeObjGroupIter* it;
+ if (!f || !out) return CFREE_INVALID;
+ h = f->ctx->heap;
+ it = (CfreeObjGroupIter*)h->alloc(h, sizeof(*it),
+ _Alignof(CfreeObjGroupIter));
+ if (!it) return CFREE_NOMEM;
+ memset(it, 0, sizeof(*it));
+ it->file = f;
+ it->inner = obj_groupiter_new(f->ob);
+ if (!it->inner) {
+ h->free(h, it, sizeof(*it));
+ return CFREE_NOMEM;
+ }
+ *out = it;
+ return CFREE_OK;
+}
+
+CfreeIterResult cfree_obj_groupiter_next(CfreeObjGroupIter* it,
+ CfreeObjGroupInfo* out) {
+ ObjGroupEntry entry;
+ Heap* h;
+ u32 i;
+ if (!it || !out) return CFREE_ITER_ERROR;
+ if (!obj_groupiter_next(it->inner, &entry)) return CFREE_ITER_END;
+ h = it->file->ctx->heap;
+ if (entry.group->nsections > it->seccap) {
+ CfreeObjSection* nb;
+ nb = (CfreeObjSection*)h->alloc(
+ h, sizeof(*nb) * entry.group->nsections, _Alignof(CfreeObjSection));
+ if (!nb) return CFREE_ITER_ERROR;
+ if (it->secbuf) h->free(h, it->secbuf, sizeof(*it->secbuf) * it->seccap);
+ it->secbuf = nb;
+ it->seccap = entry.group->nsections;
+ }
+ for (i = 0; i < entry.group->nsections; ++i) {
+ ObjSecId sid = entry.group->sections[i];
+ it->secbuf[i] = (sid != OBJ_SEC_NONE) ? (CfreeObjSection)(sid - 1)
+ : CFREE_SECTION_NONE;
+ }
+ out->name = entry.group->name
+ ? pool_str(it->file->compiler.global, entry.group->name, NULL)
+ : "";
+ out->signature = (entry.group->signature != OBJ_SYM_NONE)
+ ? (CfreeObjSymbol)entry.group->signature
+ : CFREE_OBJ_SYMBOL_NONE;
+ out->flags = entry.group->flags;
+ out->nsections = entry.group->nsections;
+ out->sections = it->secbuf;
+ return CFREE_ITER_ITEM;
+}
+
+void cfree_obj_groupiter_free(CfreeObjGroupIter* it) {
+ Heap* h;
+ if (!it) return;
+ h = it->file->ctx->heap;
+ if (it->secbuf) h->free(h, it->secbuf, sizeof(*it->secbuf) * it->seccap);
+ obj_groupiter_free(it->inner);
+ h->free(h, it, sizeof(*it));
+}
+
/* Accessor for disasm/jit to access the underlying ObjBuilder when both
* are inside libcfree. Internal name kept stable for existing callers
* (src/link/link_jit.c, src/api/disasm.c). */
diff --git a/src/obj/elf_emit.c b/src/obj/elf_emit.c
@@ -245,6 +245,13 @@ static u32 strtab_add(Buf* b, const char* s, u32 len) {
void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
Heap* h = (Heap*)c->ctx->heap;
+ /* Run the tombstone sweep before any iteration: cascades removed
+ * sections into their defining symbols, drops dangling relocs,
+ * compacts groups, and absorbs the historical UNDEF prune. After this
+ * call every direct ID-based access below must skip entries whose
+ * `removed` bit is set. */
+ obj_sweep_dead(ob);
+
/* ---- target validation ------------------------------------------ */
const ArchImpl* arch = arch_for_compiler(c);
const ArchElfOps* elf = arch ? arch->elf : NULL;
@@ -288,6 +295,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
for (u32 i = 1; i < nobjsec; ++i) {
const Section* s = obj_section_get(ob, i);
+ if (s->removed) continue; /* tombstone — see obj_sweep_dead */
ElfSec* es = &secs[nsecs];
memset(es, 0, sizeof *es);
u32 nlen;
@@ -378,14 +386,9 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
ObjSymEntry e;
while (obj_symiter_next(it, &e)) {
const ObjSym* s = e.sym;
+ if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */
int is_local = (s->bind == SB_LOCAL);
if ((pass == 0) != is_local) continue;
- /* Prune unreferenced UNDEF externals — they came from header
- * `extern` decls the TU never touched. See ObjSym::referenced. */
- if (s->kind == SK_UNDEF && !s->referenced &&
- (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) {
- continue;
- }
u32 nlen;
const char* nm = sym_to_str(c, s->name, &nlen);
u32 nameoff = nlen ? strtab_add(&strtab, nm, nlen) : 0;
@@ -409,6 +412,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
ObjSymIter* it = obj_symiter_new(ob);
ObjSymEntry e;
while (obj_symiter_next(it, &e)) {
+ if (e.sym->removed) continue;
if (e.sym->bind == SB_LOCAL) ++nlocals;
}
obj_symiter_free(it);
@@ -430,7 +434,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
if (group_elf_idx) memset(group_elf_idx, 0, sizeof(u32) * nobjgrp);
for (u32 gi = 1; gi < nobjgrp; ++gi) {
const ObjGroup* g = obj_group_get(ob, gi);
- if (!g) continue;
+ if (!g || g->removed) continue;
u32 body_size = 4u + 4u * g->nsections;
u8* body = (u8*)arena_alloc(c->scratch, body_size, _Alignof(u32));
@@ -489,6 +493,8 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
u32 nrela_plans = 0;
for (u32 si = 1; si < nobjsec; ++si) {
+ const Section* host = obj_section_get(ob, si);
+ if (!host || host->removed) continue;
u32 nr = obj_reloc_count(ob, si);
if (!nr) continue;
u8* buf = (u8*)arena_alloc(c->scratch, (size_t)ELF64_RELA_SIZE * nr,
@@ -496,6 +502,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
u32 j = 0;
for (u32 i = 0; i < total_relocs; ++i) {
const Reloc* r = obj_reloc_at(ob, i);
+ if (r->removed) continue;
if (r->section_id != si) continue;
u32 etype = reloc_to(r->kind);
if (etype == ELF_R_AARCH64_NONE /* == ELF_R_X86_64_NONE == 0 */ &&
@@ -668,6 +675,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
ObjSymEntry e;
u32 nsec = obj_section_count(ob), si;
while (obj_symiter_next(it, &e)) {
+ if (e.sym->removed) continue;
if (e.sym->kind == SK_IFUNC) {
ident[EI_OSABI] = ELFOSABI_GNU;
break;
@@ -677,7 +685,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
if (ident[EI_OSABI] != ELFOSABI_GNU) {
for (si = 1; si < nsec; ++si) {
const Section* sec = obj_section_get(ob, si);
- if (sec && (sec->flags & SF_RETAIN)) {
+ if (sec && !sec->removed && (sec->flags & SF_RETAIN)) {
ident[EI_OSABI] = ELFOSABI_GNU;
break;
}
diff --git a/src/obj/macho_emit.c b/src/obj/macho_emit.c
@@ -218,6 +218,11 @@ static int sym_is_extdef(const ObjSym* s) {
void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) {
Heap* h = (Heap*)c->ctx->heap;
+ /* Tombstone sweep first — strip/objcopy mutations and the historical
+ * UNDEF prune are both expressed via Section.removed / ObjSym.removed
+ * post-sweep. See obj_sweep_dead. */
+ obj_sweep_dead(ob);
+
/* ---- target validation ---------------------------------------- */
const ArchImpl* arch = arch_for_compiler(c);
const ArchMachoOps* macho = arch ? arch->macho : NULL;
@@ -251,6 +256,7 @@ void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) {
for (u32 i = 1; i < nobjsec; ++i) {
const Section* s = obj_section_get(ob, i);
+ if (s->removed) continue; /* see obj_sweep_dead */
/* Skip ELF-style synthetic sections that read_elf would have
* filtered: SYMTAB / STRTAB / RELA / GROUP have no Mach-O
* representation as data sections. */
@@ -353,21 +359,13 @@ void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) {
ObjSymEntry e;
while (obj_symiter_next(it, &e)) {
const ObjSym* s = e.sym;
+ if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */
int undef = sym_is_undef(s);
int extdef = sym_is_extdef(s);
int local = !undef && !extdef;
int want = (pass == 0 && local) || (pass == 1 && extdef) ||
(pass == 2 && undef);
if (!want) continue;
- /* Prune unreferenced UNDEF externals: the C frontend mints an
- * ObjSym for every header-supplied `extern` declaration whether
- * or not the TU references it. obj_reloc_ex flags the ones we
- * actually depend on; the rest never reach the output symtab. */
- if (undef && !s->referenced &&
- (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) {
- continue;
- }
-
MSym* ms = &msyms[nmsyms];
ms->obj_id = e.id;
@@ -475,11 +473,8 @@ void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) {
ObjSymEntry e;
while (obj_symiter_next(it, &e)) {
const ObjSym* s = e.sym;
+ if (s->removed) continue;
int undef = sym_is_undef(s);
- if (undef && !s->referenced &&
- (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) {
- continue;
- }
if (undef)
++nundefs;
else if (sym_is_extdef(s))
@@ -503,6 +498,7 @@ void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) {
u32 j = 0;
for (u32 ri = 0; ri < total_relocs; ++ri) {
const Reloc* r = obj_reloc_at(ob, ri);
+ if (r->removed) continue;
if (r->section_id != m->obj_sec) continue;
if ((r->kind == R_RV_ADD8 || r->kind == R_RV_ADD16 ||
r->kind == R_RV_ADD32 || r->kind == R_RV_ADD64) &&
diff --git a/src/obj/obj.c b/src/obj/obj.c
@@ -393,6 +393,171 @@ void obj_finalize(ObjBuilder* ob) {
(void)ob;
}
+/* ---- mutators (strip / objcopy support) ---- */
+
+void obj_section_remove(ObjBuilder* ob, ObjSecId id) {
+ Section* s;
+ if (!ob || id == OBJ_SEC_NONE) return;
+ s = Sections_at(&ob->sections, id);
+ if (!s) return;
+ s->removed = 1;
+}
+
+void obj_symbol_remove(ObjBuilder* ob, ObjSymId id) {
+ ObjSym* s;
+ if (!ob || id == OBJ_SYM_NONE) return;
+ s = Symbols_at(&ob->symbols, id);
+ if (!s) return;
+ s->removed = 1;
+}
+
+void obj_group_remove(ObjBuilder* ob, ObjGroupId id) {
+ ObjGroup* g;
+ if (!ob || id == OBJ_GROUP_NONE) return;
+ g = Groups_at(&ob->groups, id);
+ if (!g) return;
+ g->removed = 1;
+}
+
+void obj_section_rename(ObjBuilder* ob, ObjSecId id, Sym new_name) {
+ Section* s;
+ if (!ob || id == OBJ_SEC_NONE) return;
+ s = Sections_at(&ob->sections, id);
+ if (!s) return;
+ s->name = new_name;
+}
+
+void obj_symbol_rename(ObjBuilder* ob, ObjSymId id, Sym new_name) {
+ ObjSym* s;
+ if (!ob || id == OBJ_SYM_NONE) return;
+ s = Symbols_at(&ob->symbols, id);
+ if (!s) return;
+ s->name = new_name;
+}
+
+void obj_symbol_set_bind(ObjBuilder* ob, ObjSymId id, SymBind bind) {
+ ObjSym* s;
+ if (!ob || id == OBJ_SYM_NONE) return;
+ s = Symbols_at(&ob->symbols, id);
+ if (!s) return;
+ s->bind = (u16)bind;
+}
+
+void obj_symbol_set_vis(ObjBuilder* ob, ObjSymId id, SymVis vis) {
+ ObjSym* s;
+ if (!ob || id == OBJ_SYM_NONE) return;
+ s = Symbols_at(&ob->symbols, id);
+ if (!s) return;
+ s->vis = (u8)vis;
+}
+
+void obj_section_replace_bytes(ObjBuilder* ob, ObjSecId id, const u8* data,
+ size_t n) {
+ Section* s;
+ if (!ob || id == OBJ_SEC_NONE) return;
+ s = Sections_at(&ob->sections, id);
+ if (!s) return;
+ /* Drop the old chunked Buf and reinitialize empty, then write the new
+ * bytes. Cheaper than scanning + patching when the replacement is
+ * different-sized — which it usually is (objcopy --update-section). */
+ buf_fini(&s->bytes);
+ buf_init(&s->bytes, ob->heap);
+ s->bss_size = 0;
+ if (data && n) buf_write(&s->bytes, data, n);
+}
+
+void obj_sweep_dead(ObjBuilder* ob) {
+ u32 nsec = Sections_count(&ob->sections);
+ u32 nsym = Symbols_count(&ob->symbols);
+ u32 nrel = Relocs_count(&ob->relocs);
+ u32 ngrp = Groups_count(&ob->groups);
+ u32 i;
+
+ /* Pass 1: cascade removed sections into their defining symbols. Also
+ * absorbs the historical UNDEF-prune predicate: any non-referenced
+ * global/weak symbol that lacks a defining section (and isn't an ABS
+ * or COMMON definition, both of which legitimately have section_id ==
+ * OBJ_SEC_NONE) is a spurious extern from a header — drop it.
+ *
+ * The "no defining section" test matches macho_emit's sym_is_undef,
+ * which is stronger than `kind == SK_UNDEF`: frontends mint SK_OBJ /
+ * SK_TLS / SK_FUNC entries for extern decls and only set them to
+ * SK_UNDEF for true references, so checking section_id catches both. */
+ for (i = 1; i < nsym; ++i) {
+ ObjSym* s = Symbols_at(&ob->symbols, i);
+ if (!s || s->removed) continue;
+ if (s->section_id != OBJ_SEC_NONE) {
+ const Section* sec = Sections_at(&ob->sections, s->section_id);
+ if (sec && sec->removed) {
+ s->removed = 1;
+ continue;
+ }
+ }
+ if (s->section_id == OBJ_SEC_NONE && s->kind != SK_ABS &&
+ s->kind != SK_COMMON && !s->referenced &&
+ (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) {
+ s->removed = 1;
+ }
+ }
+
+ /* Pass 2: drop relocs that became dangling. A reloc is dead if its
+ * containing section, its target symbol, or the symbol's defining
+ * section is gone. */
+ for (i = 0; i < nrel; ++i) {
+ Reloc* r = Relocs_at(&ob->relocs, i);
+ if (!r || r->removed) continue;
+ if (r->section_id != OBJ_SEC_NONE) {
+ const Section* sec = Sections_at(&ob->sections, r->section_id);
+ if (!sec || sec->removed) {
+ r->removed = 1;
+ continue;
+ }
+ }
+ if (r->sym != OBJ_SYM_NONE) {
+ const ObjSym* ts = Symbols_at(&ob->symbols, r->sym);
+ if (!ts || ts->removed) r->removed = 1;
+ }
+ }
+
+ /* Pass 3: compact each group's member list to drop removed sections;
+ * tombstone the group if its list empties out or its signature symbol
+ * is removed. Member list is rewritten in place — the storage stays
+ * the same size, the trailing slots just become unused. */
+ for (i = 1; i < ngrp; ++i) {
+ ObjGroup* g = Groups_at(&ob->groups, i);
+ u32 w, r;
+ if (!g || g->removed) continue;
+ if (g->signature != OBJ_SYM_NONE) {
+ const ObjSym* sig = Symbols_at(&ob->symbols, g->signature);
+ if (!sig || sig->removed) {
+ g->removed = 1;
+ continue;
+ }
+ }
+ w = 0;
+ for (r = 0; r < g->nsections; ++r) {
+ ObjSecId sid = g->sections[r];
+ const Section* sec =
+ (sid != OBJ_SEC_NONE) ? Sections_at(&ob->sections, sid) : NULL;
+ if (sec && !sec->removed) g->sections[w++] = sid;
+ }
+ g->nsections = w;
+ if (w == 0) g->removed = 1;
+ }
+
+ /* Pass 4: clear Section.link if it now points at a removed section.
+ * (Section.info is type-dependent — leave it to the emitter, which
+ * already inspects the sem to interpret it.) */
+ for (i = 1; i < nsec; ++i) {
+ Section* s = Sections_at(&ob->sections, i);
+ if (!s || s->removed) continue;
+ if (s->link != OBJ_SEC_NONE) {
+ const Section* lk = Sections_at(&ob->sections, s->link);
+ if (!lk || lk->removed) s->link = OBJ_SEC_NONE;
+ }
+ }
+}
+
/* ---- read side ---- */
u32 obj_section_count(const ObjBuilder* ob) {
@@ -408,6 +573,7 @@ u32 obj_reloc_count(const ObjBuilder* ob, ObjSecId id) {
u32 i, total = Relocs_count(&ob->relocs), n = 0;
for (i = 0; i < total; ++i) {
const Reloc* r = Relocs_at(&ob->relocs, i);
+ if (r->removed) continue;
if (r->section_id == id) ++n;
}
return n;
@@ -455,3 +621,33 @@ void obj_symiter_free(ObjSymIter* it) {
if (!it) return;
((Heap*)it->ob->heap)->free((Heap*)it->ob->heap, it, sizeof(*it));
}
+
+struct ObjGroupIter {
+ const ObjBuilder* ob;
+ u32 idx; /* next index to return */
+};
+
+ObjGroupIter* obj_groupiter_new(const ObjBuilder* ob) {
+ ObjGroupIter* it = (ObjGroupIter*)ob->heap->alloc(ob->heap, sizeof(*it),
+ _Alignof(ObjGroupIter));
+ if (!it) return NULL;
+ it->ob = ob;
+ it->idx = 1; /* skip the id-0 sentinel */
+ return it;
+}
+
+int obj_groupiter_next(ObjGroupIter* it, ObjGroupEntry* out) {
+ const ObjGroup* g;
+ if (!it) return 0;
+ g = Groups_at(&it->ob->groups, it->idx);
+ if (!g) return 0;
+ out->id = it->idx;
+ out->group = g;
+ it->idx++;
+ return 1;
+}
+
+void obj_groupiter_free(ObjGroupIter* it) {
+ if (!it) return;
+ ((Heap*)it->ob->heap)->free((Heap*)it->ob->heap, it, sizeof(*it));
+}
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -245,6 +245,11 @@ typedef struct Section {
u32 ext_type;
u32 ext_flags; /* same idea for format-specific sh_flags bits
not represented in SecFlag (e.g. SHF_EXCLUDE) */
+ /* Tombstone for strip/objcopy-style mutations. Set by
+ * obj_section_remove; honored by obj_sweep_dead and the emitters.
+ * Iterators / direct ID-based access on the builder must consult this
+ * bit and skip removed entries. */
+ u8 removed;
Buf bytes;
} Section;
@@ -254,6 +259,10 @@ typedef struct Reloc {
u16 kind;
u8 has_explicit_addend;
u8 pair; /* paired/following relocation, format-specific */
+ /* Tombstone set by obj_sweep_dead when the reloc points at a removed
+ * section or symbol. Lives in the slack between `pair` and `sym` — no
+ * struct-size change. */
+ u8 removed;
ObjSymId sym;
i64 addend;
} Reloc;
@@ -284,6 +293,12 @@ typedef struct ObjSym {
* mark every read-in symbol referenced=1 so a roundtrip preserves
* UNDEFs that came from another tool's output. */
u8 referenced;
+ /* Tombstone for strip/objcopy. Set by obj_symbol_remove or cascaded
+ * by obj_sweep_dead when this symbol is defined in a removed section.
+ * The UNDEF-prune predicate (was: !referenced && SK_UNDEF && global/weak)
+ * is also folded into the sweep, so emit-time symbol loops only need to
+ * check `removed`. */
+ u8 removed;
} ObjSym;
typedef struct ObjGroup {
@@ -292,6 +307,10 @@ typedef struct ObjGroup {
ObjSecId* sections;
u32 nsections;
u32 flags;
+ /* Tombstone — set by obj_group_remove, or cascaded by obj_sweep_dead
+ * when every member section has been removed (or the signature symbol
+ * has been removed). */
+ u8 removed;
} ObjGroup;
/* The single concrete in-memory object representation.
@@ -379,6 +398,52 @@ void obj_group_add_section(ObjBuilder*, ObjGroupId group_id,
void obj_finalize(ObjBuilder*);
+/* ---- post-finalize mutators (strip / objcopy support) ----
+ *
+ * Mutators flip per-entry fields and / or `removed` tombstones. Cascading
+ * cleanup (drop relocs against removed sections, etc.) is deferred to
+ * obj_sweep_dead, which the emitters call automatically. Mutators are
+ * cheap individual field writes; they do not re-index or compact storage,
+ * so ObjSecId / ObjSymId / ObjGroupId remain stable.
+ *
+ * No-ops when given OBJ_SEC_NONE / OBJ_SYM_NONE / OBJ_GROUP_NONE, and
+ * silently ignore ids that are out of range or already removed (the
+ * driver tools call these in bulk and benefit from idempotency). */
+void obj_section_remove(ObjBuilder*, ObjSecId);
+void obj_symbol_remove(ObjBuilder*, ObjSymId);
+void obj_group_remove(ObjBuilder*, ObjGroupId);
+void obj_section_rename(ObjBuilder*, ObjSecId, Sym new_name);
+void obj_symbol_rename(ObjBuilder*, ObjSymId, Sym new_name);
+void obj_symbol_set_bind(ObjBuilder*, ObjSymId, SymBind);
+void obj_symbol_set_vis(ObjBuilder*, ObjSymId, SymVis);
+/* Replace `section_id`'s contents wholesale with `n` bytes from `data`.
+ * Resets bss_size (so a former NOBITS section gains real bytes) and
+ * preserves the section's other attributes (name, kind, flags, align).
+ * Existing relocations against the section are kept — caller is
+ * responsible for issuing obj_symbol_remove on any defined symbols whose
+ * (value, size) no longer fits, etc. */
+void obj_section_replace_bytes(ObjBuilder*, ObjSecId, const u8* data, size_t n);
+
+/* Tombstone-driven consistency sweep. Called by each file-format emitter
+ * at the top of emit; consumers that walk a builder by raw section/symbol/
+ * reloc/group ID after sweep must respect the `removed` bit on each entry.
+ *
+ * Does the following passes:
+ * 1. Cascade: any symbol defined in a removed section becomes removed.
+ * 2. UNDEF prune: any non-referenced SK_UNDEF global/weak becomes removed
+ * (folds the historical "spurious extern from a header" filter).
+ * 3. Reloc cleanup: any reloc whose containing section, defining section,
+ * or target symbol is removed becomes removed.
+ * 4. Group compaction: each group's section list is filtered in place to
+ * drop removed members; a group whose list empties out (or whose
+ * signature symbol has been removed) is itself marked removed.
+ * 5. Section link cleanup: Section.link cleared if it points at a
+ * removed section.
+ *
+ * Idempotent — safe to call multiple times. On a never-mutated builder
+ * only pass 2 has any effect. */
+void obj_sweep_dead(ObjBuilder*);
+
/* Format-specific ELF e_flags (per-arch ABI bits, e.g. EF_RISCV_RVC |
* EF_RISCV_FLOAT_ABI_DOUBLE on RV64). Set by read_elf during input
* parsing; consumed by emit_elf for round-trip. The setter records
@@ -408,7 +473,13 @@ const ObjGroup* obj_group_get(const ObjBuilder*, ObjGroupId id);
/* Symbol iteration: ObjSymId is scoped to this builder, but callers should not
* assume dense contiguous ids or direct indexing. The builder may store symbols
- * in segments internally; use the cursor. */
+ * in segments internally; use the cursor.
+ *
+ * The iterator is raw — it visits every symbol slot including those whose
+ * `removed` tombstone is set. Callers that want post-sweep semantics must
+ * check ObjSym::removed themselves. (Consistent with Section.removed and
+ * Reloc.removed: tombstones live as a per-entry field, not behind the
+ * iterator.) */
typedef struct ObjSymIter ObjSymIter;
typedef struct ObjSymEntry {
ObjSymId id;
@@ -418,6 +489,19 @@ ObjSymIter* obj_symiter_new(const ObjBuilder*);
int obj_symiter_next(ObjSymIter*, ObjSymEntry* out); /* returns 0 at end */
void obj_symiter_free(ObjSymIter*);
+/* Group iteration: peer of obj_symiter for groups (COMDAT and friends).
+ * Same segmented-storage caveat — use the cursor, don't index directly.
+ * Like obj_symiter, this is raw: tombstoned groups are still returned;
+ * callers consult ObjGroup::removed. */
+typedef struct ObjGroupIter ObjGroupIter;
+typedef struct ObjGroupEntry {
+ ObjGroupId id;
+ const ObjGroup* group;
+} ObjGroupEntry;
+ObjGroupIter* obj_groupiter_new(const ObjBuilder*);
+int obj_groupiter_next(ObjGroupIter*, ObjGroupEntry* out); /* 0 at end */
+void obj_groupiter_free(ObjGroupIter*);
+
/* Writer is the public CfreeWriter type aliased to Writer inside libcfree
* (see src/core/core.h). The streaming API lives in <cfree/core.h> as
* cfree_writer_*. */
diff --git a/test/elf/unit/groupiter.c b/test/elf/unit/groupiter.c
@@ -0,0 +1,217 @@
+/* Hand-built ObjBuilder with a COMDAT group, exercised via both the
+ * internal obj_groupiter and the public cfree_obj_groupiter.
+ *
+ * Steps:
+ * 1. Build an ELF with two sections wired into one COMDAT group.
+ * 2. Read internal iter on the freshly built builder; verify shape.
+ * 3. Emit ELF, reopen via cfree_obj_open, walk the public iter; verify
+ * group survives the on-disk roundtrip and section IDs are
+ * remapped to the public 0-based space. */
+
+#include <cfree/core.h>
+#include <cfree/object.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "core/core.h"
+#include "core/pool.h"
+#include "lib/cfree_test_target.h"
+#include "obj/obj.h"
+
+static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) {
+ (void)h;
+ (void)a;
+ return n ? malloc(n) : NULL;
+}
+static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) {
+ (void)h;
+ (void)o;
+ (void)a;
+ return realloc(p, n);
+}
+static void heap_free(CfreeHeap* h, void* p, size_t n) {
+ (void)h;
+ (void)n;
+ free(p);
+}
+static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL};
+
+static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc,
+ const char* fmt, va_list ap) {
+ static const char* names[] = {"note", "warning", "error", "fatal"};
+ (void)s;
+ (void)loc;
+ fprintf(stderr, "%s: ", names[k]);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+}
+static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0};
+
+static int g_failures;
+#define CHECK(cond, ...) \
+ do { \
+ if (!(cond)) { \
+ fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fputc('\n', stderr); \
+ g_failures++; \
+ } \
+ } while (0)
+
+/* mov w0, #1 ; ret */
+static const uint8_t TEXT_BYTES[8] = {
+ 0x20, 0x00, 0x80, 0x52, 0xc0, 0x03, 0x5f, 0xd6,
+};
+
+static int name_eq(const char* s, const char* want) {
+ return s && strcmp(s, want) == 0;
+}
+
+int main(void) {
+ CfreeTarget target;
+ if (cfree_test_target_init(&target) != 0) {
+ fprintf(stderr, "FAIL: cfree_test_target_init\n");
+ return 1;
+ }
+
+ CfreeContext ctx = {.heap = &g_heap,
+ .file_io = NULL,
+ .diag = &g_diag,
+ .metrics = NULL,
+ .now = -1};
+ CfreeCompiler* cc = NULL;
+ if (cfree_compiler_new(target, &ctx, &cc) != CFREE_OK || !cc) {
+ fprintf(stderr, "FAIL: cfree_compiler_new\n");
+ return 1;
+ }
+ Compiler* c = (Compiler*)cc;
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free(cc);
+ fprintf(stderr, "FAIL: compiler_panic\n");
+ return 1;
+ }
+
+ /* ---- build ---- */
+ ObjBuilder* in = obj_new(c);
+ Pool* p = c->global;
+
+ Sym sig_nm = pool_intern_cstr(p, "comdat_sig");
+ Sym text_nm = pool_intern_cstr(p, ".text.comdat_fn");
+ Sym data_nm = pool_intern_cstr(p, ".data.comdat_fn");
+
+ ObjSecId sec_text = obj_section(in, text_nm, SEC_TEXT,
+ SF_ALLOC | SF_EXEC | SF_GROUP, 4);
+ ObjSecId sec_data = obj_section(in, data_nm, SEC_DATA,
+ SF_ALLOC | SF_WRITE | SF_GROUP, 8);
+ obj_write(in, sec_text, TEXT_BYTES, sizeof TEXT_BYTES);
+ static const uint8_t zero8[8] = {0};
+ obj_write(in, sec_data, zero8, sizeof zero8);
+
+ ObjSymId sig_sym =
+ obj_symbol(in, sig_nm, SB_WEAK, SK_FUNC, sec_text, 0, sizeof TEXT_BYTES);
+
+ ObjGroupId gid = obj_group(in, sig_nm, sig_sym, CFREE_OBJ_GROUP_COMDAT);
+ obj_group_add_section(in, gid, sec_text);
+ obj_group_add_section(in, gid, sec_data);
+
+ obj_finalize(in);
+
+ /* ---- internal iter ---- */
+ {
+ ObjGroupIter* it = obj_groupiter_new(in);
+ ObjGroupEntry e;
+ int seen = 0;
+ while (obj_groupiter_next(it, &e)) {
+ ++seen;
+ CHECK(e.id == gid, "internal iter: id=%u, want %u", e.id, gid);
+ CHECK(e.group->nsections == 2, "internal iter: nsections=%u, want 2",
+ e.group->nsections);
+ CHECK(e.group->signature == sig_sym, "internal iter: signature=%u",
+ e.group->signature);
+ CHECK((e.group->flags & CFREE_OBJ_GROUP_COMDAT) != 0,
+ "internal iter: missing COMDAT flag (flags=0x%x)", e.group->flags);
+ CHECK(e.group->sections[0] == sec_text,
+ "internal iter: sections[0]=%u, want %u", e.group->sections[0],
+ sec_text);
+ CHECK(e.group->sections[1] == sec_data,
+ "internal iter: sections[1]=%u, want %u", e.group->sections[1],
+ sec_data);
+ }
+ CHECK(seen == 1, "internal iter: saw %d groups, want 1", seen);
+ obj_groupiter_free(it);
+ }
+
+ /* ---- emit + public-iter readback ---- */
+ CfreeWriter* w = NULL;
+ (void)cfree_writer_mem(&g_heap, &w);
+ emit_elf(c, in, w);
+ size_t out_len = 0;
+ const uint8_t* out_data = cfree_writer_mem_bytes(w, &out_len);
+ uint8_t* roundtrip = (uint8_t*)malloc(out_len ? out_len : 1);
+ memcpy(roundtrip, out_data, out_len);
+ cfree_writer_close(w);
+
+ CfreeBytes input = {.name = "groupiter", .data = roundtrip, .len = out_len};
+ CfreeObjFile* f = NULL;
+ CHECK(cfree_obj_open(&ctx, &input, &f) == CFREE_OK && f,
+ "cfree_obj_open failed");
+
+ if (f) {
+ /* Resolve section IDs by name so the test isn't coupled to ordering. */
+ CfreeObjSection text_pub = CFREE_SECTION_NONE;
+ CfreeObjSection data_pub = CFREE_SECTION_NONE;
+ CHECK(cfree_obj_section_by_name(f, ".text.comdat_fn", &text_pub) ==
+ CFREE_OK,
+ "section_by_name .text.comdat_fn");
+ CHECK(cfree_obj_section_by_name(f, ".data.comdat_fn", &data_pub) ==
+ CFREE_OK,
+ "section_by_name .data.comdat_fn");
+
+ CfreeObjGroupIter* git = NULL;
+ CHECK(cfree_obj_groupiter_new(f, &git) == CFREE_OK,
+ "cfree_obj_groupiter_new");
+ int seen = 0;
+ CfreeObjGroupInfo gi;
+ while (cfree_obj_groupiter_next(git, &gi) == CFREE_ITER_ITEM) {
+ ++seen;
+ CHECK(name_eq(gi.name, "comdat_sig"), "public iter: name=%s",
+ gi.name ? gi.name : "(null)");
+ CHECK((gi.flags & CFREE_OBJ_GROUP_COMDAT) != 0,
+ "public iter: missing COMDAT flag (flags=0x%x)", gi.flags);
+ CHECK(gi.nsections == 2, "public iter: nsections=%u, want 2",
+ gi.nsections);
+ CHECK(gi.sections != NULL, "public iter: NULL sections");
+ if (gi.sections && gi.nsections == 2) {
+ /* Order is preserved by ELF SHT_GROUP, so members[0] should be
+ * sec_text and members[1] sec_data. */
+ CHECK(gi.sections[0] == text_pub,
+ "public iter: sections[0]=%u, want %u (text)", gi.sections[0],
+ text_pub);
+ CHECK(gi.sections[1] == data_pub,
+ "public iter: sections[1]=%u, want %u (data)", gi.sections[1],
+ data_pub);
+ }
+ /* Signature should be a valid public symbol id, not NONE. */
+ CHECK(gi.signature != CFREE_OBJ_SYMBOL_NONE,
+ "public iter: signature is NONE");
+ }
+ CHECK(seen == 1, "public iter: saw %d groups, want 1", seen);
+ cfree_obj_groupiter_free(git);
+ cfree_obj_free(f);
+ }
+
+ free(roundtrip);
+ obj_free(in);
+ cfree_compiler_free(cc);
+
+ if (g_failures) {
+ fprintf(stderr, "%d failure(s)\n", g_failures);
+ return 1;
+ }
+ fputs("groupiter: OK\n", stderr);
+ return 0;
+}
diff --git a/test/elf/unit/mutate.c b/test/elf/unit/mutate.c
@@ -0,0 +1,231 @@
+/* Exercises the post-finalize mutator API:
+ *
+ * 1. Build a small ELF with .text + .data + an ext-undef symbol.
+ * 2. After finalize, remove .data and rename the .text symbol; also
+ * flip a SB_GLOBAL symbol to SB_LOCAL.
+ * 3. Emit + reopen, verify:
+ * - .data is gone
+ * - the .text symbol shows up under the new name
+ * - the localized symbol round-trips as SB_LOCAL
+ * - relocs that pointed at .data are dropped
+ * - the spurious UNDEF (kind=SK_UNDEF, !referenced) is pruned by the
+ * sweep that mutators now share with the historical UNDEF prune. */
+
+#include <cfree/core.h>
+#include <cfree/object.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "core/core.h"
+#include "core/pool.h"
+#include "lib/cfree_test_target.h"
+#include "obj/obj.h"
+
+static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) {
+ (void)h;
+ (void)a;
+ return n ? malloc(n) : NULL;
+}
+static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) {
+ (void)h;
+ (void)o;
+ (void)a;
+ return realloc(p, n);
+}
+static void heap_free(CfreeHeap* h, void* p, size_t n) {
+ (void)h;
+ (void)n;
+ free(p);
+}
+static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL};
+
+static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc,
+ const char* fmt, va_list ap) {
+ static const char* names[] = {"note", "warning", "error", "fatal"};
+ (void)s;
+ (void)loc;
+ fprintf(stderr, "%s: ", names[k]);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+}
+static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0};
+
+static int g_failures;
+#define CHECK(cond, ...) \
+ do { \
+ if (!(cond)) { \
+ fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fputc('\n', stderr); \
+ g_failures++; \
+ } \
+ } while (0)
+
+/* mov w0, #7 ; ret */
+static const uint8_t TEXT_BYTES[8] = {
+ 0xe0, 0x00, 0x80, 0x52, 0xc0, 0x03, 0x5f, 0xd6,
+};
+
+int main(void) {
+ CfreeTarget target;
+ if (cfree_test_target_init(&target) != 0) {
+ fprintf(stderr, "FAIL: cfree_test_target_init\n");
+ return 1;
+ }
+ CfreeContext ctx = {.heap = &g_heap,
+ .file_io = NULL,
+ .diag = &g_diag,
+ .metrics = NULL,
+ .now = -1};
+ CfreeCompiler* cc = NULL;
+ if (cfree_compiler_new(target, &ctx, &cc) != CFREE_OK || !cc) {
+ fprintf(stderr, "FAIL: cfree_compiler_new\n");
+ return 1;
+ }
+ Compiler* c = (Compiler*)cc;
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free(cc);
+ fprintf(stderr, "FAIL: compiler_panic\n");
+ return 1;
+ }
+
+ /* ---- build ---- */
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+
+ Sym text_nm = pool_intern_cstr(p, ".text");
+ Sym data_nm = pool_intern_cstr(p, ".data");
+ Sym entry_nm = pool_intern_cstr(p, "entry");
+ Sym entry_new_nm = pool_intern_cstr(p, "renamed_entry");
+ Sym keep_nm = pool_intern_cstr(p, "keep_global");
+ Sym foo_nm = pool_intern_cstr(p, "foo_ref");
+ Sym spurious_nm = pool_intern_cstr(p, "spurious_extern");
+
+ ObjSecId sec_text =
+ obj_section(ob, text_nm, SEC_TEXT, SF_ALLOC | SF_EXEC, 4);
+ ObjSecId sec_data =
+ obj_section(ob, data_nm, SEC_DATA, SF_ALLOC | SF_WRITE, 8);
+
+ obj_write(ob, sec_text, TEXT_BYTES, sizeof TEXT_BYTES);
+ static const uint8_t zero8[8] = {0};
+ obj_write(ob, sec_data, zero8, sizeof zero8);
+
+ ObjSymId sym_entry = obj_symbol(ob, entry_nm, SB_GLOBAL, SK_FUNC, sec_text, 0,
+ sizeof TEXT_BYTES);
+ ObjSymId sym_keep =
+ obj_symbol(ob, keep_nm, SB_GLOBAL, SK_FUNC, sec_text, 0, 0);
+ ObjSymId sym_foo =
+ obj_symbol(ob, foo_nm, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+ /* Spurious extern: !referenced means sweep will tombstone it. */
+ ObjSymId sym_spurious =
+ obj_symbol(ob, spurious_nm, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+ (void)sym_spurious;
+
+ /* One reloc in .data against foo_ref (which is referenced — survives).
+ * One reloc in .data against the about-to-rename entry (also survives but
+ * the containing .data gets removed, so the reloc dies via cascade). */
+ obj_reloc(ob, sec_data, 0, R_ABS64, sym_foo, 0);
+ obj_reloc(ob, sec_data, 8, R_ABS64, sym_entry, 0);
+
+ obj_finalize(ob);
+
+ /* ---- mutate via the public API ---- */
+ /* The builder we got from obj_new is also a CfreeObjBuilder — same
+ * handle type — so we can drive the public mutator surface directly. */
+ CfreeObjBuilder* pb = (CfreeObjBuilder*)ob;
+
+ /* Convert internal ids to public ids the same way object_builder.c does. */
+ CfreeObjSection pub_sec_data = (CfreeObjSection)(sec_data - 1);
+ CfreeObjSymbol pub_sym_entry = (CfreeObjSymbol)sym_entry;
+ CfreeObjSymbol pub_sym_keep = (CfreeObjSymbol)sym_keep;
+
+ CHECK(cfree_obj_builder_remove_section(pb, pub_sec_data) == CFREE_OK,
+ "remove_section .data");
+ CHECK(cfree_obj_builder_rename_symbol(pb, pub_sym_entry,
+ (CfreeSym)entry_new_nm) == CFREE_OK,
+ "rename_symbol entry -> renamed_entry");
+ CHECK(cfree_obj_builder_symbol_set_bind(pb, pub_sym_keep, CFREE_SB_LOCAL) ==
+ CFREE_OK,
+ "set_bind keep_global -> local");
+
+ /* ---- emit + reopen ---- */
+ CfreeWriter* w = NULL;
+ (void)cfree_writer_mem(&g_heap, &w);
+ CHECK(cfree_obj_builder_emit(pb, w) == CFREE_OK, "emit after mutate");
+
+ size_t out_len = 0;
+ const uint8_t* out_data = cfree_writer_mem_bytes(w, &out_len);
+ uint8_t* roundtrip = (uint8_t*)malloc(out_len ? out_len : 1);
+ memcpy(roundtrip, out_data, out_len);
+ cfree_writer_close(w);
+
+ CfreeBytes input = {.name = "mutate", .data = roundtrip, .len = out_len};
+ CfreeObjFile* f = NULL;
+ CHECK(cfree_obj_open(&ctx, &input, &f) == CFREE_OK && f, "reopen");
+
+ if (f) {
+ /* .data should be gone. */
+ CfreeObjSection s_data = CFREE_SECTION_NONE;
+ CfreeStatus st_data =
+ cfree_obj_section_by_name(f, ".data", &s_data);
+ CHECK(st_data == CFREE_NOT_FOUND, ".data still present after removal");
+
+ /* .text should remain. */
+ CfreeObjSection s_text = CFREE_SECTION_NONE;
+ CHECK(cfree_obj_section_by_name(f, ".text", &s_text) == CFREE_OK,
+ ".text missing after roundtrip");
+
+ /* Renamed entry symbol is present; old name is gone. */
+ CfreeObjSymInfo si;
+ CHECK(cfree_obj_symbol_by_name(f, "renamed_entry", &si) == CFREE_OK,
+ "renamed_entry not found");
+ CHECK(cfree_obj_symbol_by_name(f, "entry", &si) == CFREE_NOT_FOUND,
+ "old 'entry' symbol survived rename");
+
+ /* keep_global was localized; reads back as SB_LOCAL. */
+ CHECK(cfree_obj_symbol_by_name(f, "keep_global", &si) == CFREE_OK,
+ "keep_global lost");
+ CHECK(si.bind == CFREE_SB_LOCAL, "keep_global bind=%d, want LOCAL=%d",
+ (int)si.bind, (int)CFREE_SB_LOCAL);
+
+ /* Spurious extern was pruned by the sweep. */
+ CHECK(cfree_obj_symbol_by_name(f, "spurious_extern", &si) ==
+ CFREE_NOT_FOUND,
+ "spurious_extern survived sweep");
+
+ /* foo_ref was the target of a reloc, but its containing section
+ * (.data) was removed — the reloc is gone, but is foo_ref itself
+ * still referenced enough to survive? The sweep marks it referenced
+ * via obj_reloc_ex at build time, BUT the reloc is then dropped at
+ * sweep. The symbol's `referenced` flag was set at obj_reloc time and
+ * does not get cleared, so foo_ref survives as a plain UNDEF. */
+ CHECK(cfree_obj_symbol_by_name(f, "foo_ref", &si) == CFREE_OK,
+ "foo_ref UNDEF should survive");
+
+ /* No relocs should remain — all were in the removed .data. */
+ CfreeObjRelocIter* rit = NULL;
+ CHECK(cfree_obj_reliter_new(f, &rit) == CFREE_OK, "reliter_new");
+ int nrel = 0;
+ CfreeObjReloc r;
+ while (cfree_obj_reliter_next(rit, &r) == CFREE_ITER_ITEM) ++nrel;
+ cfree_obj_reliter_free(rit);
+ CHECK(nrel == 0, "expected 0 relocs after removing .data, got %d", nrel);
+
+ cfree_obj_free(f);
+ }
+
+ free(roundtrip);
+ obj_free(ob);
+ cfree_compiler_free(cc);
+
+ if (g_failures) {
+ fprintf(stderr, "%d failure(s)\n", g_failures);
+ return 1;
+ }
+ fputs("mutate: OK\n", stderr);
+ return 0;
+}
diff --git a/test/objcopy/cases/01-rename-section.expected b/test/objcopy/cases/01-rename-section.expected
@@ -0,0 +1 @@
+__TEXT,__mytext
diff --git a/test/objcopy/cases/01-rename-section.sh b/test/objcopy/cases/01-rename-section.sh
@@ -0,0 +1,6 @@
+cat > smoke.c <<'EOF'
+int foo(void) { return 1; }
+EOF
+"$CFREE" cc -c smoke.c -o smoke.o
+"$CFREE" objcopy --rename-section=__TEXT,__text=__TEXT,__mytext smoke.o smoke.r.o
+"$CFREE" objdump -h smoke.r.o | awk '/^ *[0-9]+ /{print $2}'
diff --git a/test/objcopy/cases/02-redefine-sym.expected b/test/objcopy/cases/02-redefine-sym.expected
@@ -0,0 +1,2 @@
+_bar
+_renamed_foo
diff --git a/test/objcopy/cases/02-redefine-sym.sh b/test/objcopy/cases/02-redefine-sym.sh
@@ -0,0 +1,7 @@
+cat > smoke.c <<'EOF'
+int foo(void) { return 1; }
+int bar(void) { return foo(); }
+EOF
+"$CFREE" cc -c smoke.c -o smoke.o
+"$CFREE" objcopy --redefine-sym=_foo=_renamed_foo smoke.o smoke.r.o
+"$CFREE" objdump -t smoke.r.o | awk '$NF ~ /^_/{print $NF}' | sort
diff --git a/test/objcopy/cases/03-localize-symbol.expected b/test/objcopy/cases/03-localize-symbol.expected
@@ -0,0 +1,2 @@
+g _bar
+l _foo
diff --git a/test/objcopy/cases/03-localize-symbol.sh b/test/objcopy/cases/03-localize-symbol.sh
@@ -0,0 +1,8 @@
+cat > smoke.c <<'EOF'
+int foo(void) { return 1; }
+int bar(void) { return foo(); }
+EOF
+"$CFREE" cc -c smoke.c -o smoke.o
+"$CFREE" objcopy --localize-symbol=_foo smoke.o smoke.l.o
+# Print each symbol's bind ('l' = local, 'g' = global) plus name.
+"$CFREE" objdump -t smoke.l.o | awk '$NF ~ /^_/{print $2, $NF}' | sort
diff --git a/test/objcopy/cases/04-add-section.expected b/test/objcopy/cases/04-add-section.expected
@@ -0,0 +1,2 @@
+__DATA,__custom
+__TEXT,__text
diff --git a/test/objcopy/cases/04-add-section.sh b/test/objcopy/cases/04-add-section.sh
@@ -0,0 +1,7 @@
+cat > smoke.c <<'EOF'
+int foo(void) { return 1; }
+EOF
+printf 'hello\n' > payload.bin
+"$CFREE" cc -c smoke.c -o smoke.o
+"$CFREE" objcopy --add-section=__DATA,__custom=payload.bin smoke.o smoke.a.o
+"$CFREE" objdump -h smoke.a.o | awk '/^ *[0-9]+ /{print $2}' | sort
diff --git a/test/objcopy/run.sh b/test/objcopy/run.sh
@@ -0,0 +1,69 @@
+#!/bin/sh
+# Driver-level `cfree objcopy` test harness. Same shape as test/ar/run.sh.
+
+set -u
+
+script_dir=$(cd "$(dirname "$0")" && pwd)
+repo_root=$(cd "$script_dir/../.." && pwd)
+cases_dir="$script_dir/cases"
+
+CFREE="${CFREE:-$repo_root/build/cfree}"
+export CFREE
+
+if [ ! -x "$CFREE" ]; then
+ echo "objcopy-driver: cfree binary not found at $CFREE" >&2
+ exit 2
+fi
+
+work_root=$(mktemp -d "${TMPDIR:-/tmp}/cfree-objcopy-test.XXXXXX")
+trap 'rm -rf "$work_root"' EXIT
+
+pass=0
+fail=0
+failures=
+
+for sh in "$cases_dir"/*.sh; do
+ [ -e "$sh" ] || continue
+ name=$(basename "${sh%.sh}")
+ expected="${sh%.sh}.expected"
+ actual="$work_root/$name.actual"
+
+ if [ ! -e "$expected" ]; then
+ printf 'FAIL %s (missing %s)\n' "$name" "$(basename "$expected")"
+ fail=$((fail + 1))
+ failures="$failures $name"
+ continue
+ fi
+
+ sandbox="$work_root/$name"
+ mkdir -p "$sandbox"
+ ( cd "$sandbox" && sh "$sh" ) > "$actual" 2>&1
+ case_rc=$?
+
+ if [ "$case_rc" -ne 0 ]; then
+ printf 'FAIL %s (script exit=%d)\n' "$name" "$case_rc"
+ diff -u "$expected" "$actual" || true
+ fail=$((fail + 1))
+ failures="$failures $name"
+ continue
+ fi
+
+ if diff -u "$expected" "$actual" >/dev/null 2>&1; then
+ printf 'PASS %s\n' "$name"
+ pass=$((pass + 1))
+ else
+ printf 'FAIL %s\n' "$name"
+ diff -u "$expected" "$actual" || true
+ cp "$actual" "$cases_dir/$name.actual" 2>/dev/null || true
+ fail=$((fail + 1))
+ failures="$failures $name"
+ fi
+done
+
+total=$((pass + fail))
+if [ "$fail" -gt 0 ]; then
+ printf '\nobjcopy-driver: failures:%s\n' "$failures"
+ printf 'objcopy-driver: %d/%d passed\n' "$pass" "$total"
+ exit 1
+fi
+printf '\nobjcopy-driver: %d/%d passed\n' "$pass" "$total"
diff --git a/test/strip/cases/01-strip-debug.expected b/test/strip/cases/01-strip-debug.expected
@@ -0,0 +1,5 @@
+== sections ==
+__TEXT,__text
+== symbols ==
+_helper
+_main
diff --git a/test/strip/cases/01-strip-debug.sh b/test/strip/cases/01-strip-debug.sh
@@ -0,0 +1,14 @@
+# --strip-debug drops every CFREE_SEC_DEBUG section but leaves the
+# symbol table untouched.
+
+cat > smoke.c <<'EOF'
+int helper(void) { return 42; }
+int main(void) { return helper(); }
+EOF
+"$CFREE" cc -g -c smoke.c -o smoke.o
+"$CFREE" strip --strip-debug smoke.o -o smoke.stripped.o
+
+echo "== sections =="
+"$CFREE" objdump -h smoke.stripped.o | awk '/^ *[0-9]+ /{print $2}' | sort
+echo "== symbols =="
+"$CFREE" objdump -t smoke.stripped.o | awk '$NF ~ /^_/{print $NF}' | sort
diff --git a/test/strip/cases/02-strip-all-keeps-reloc-targets.expected b/test/strip/cases/02-strip-all-keeps-reloc-targets.expected
@@ -0,0 +1,4 @@
+== symbols ==
+_helper
+== sections ==
+__TEXT,__text
diff --git a/test/strip/cases/02-strip-all-keeps-reloc-targets.sh b/test/strip/cases/02-strip-all-keeps-reloc-targets.sh
@@ -0,0 +1,15 @@
+# --strip-all drops every symbol that isn't referenced by a surviving
+# reloc (and isn't an UNDEF extern). Here _helper is reloc-targeted by
+# main's call, so it survives; _main itself isn't referenced and is dropped.
+
+cat > smoke.c <<'EOF'
+int helper(void) { return 42; }
+int main(void) { return helper(); }
+EOF
+"$CFREE" cc -g -c smoke.c -o smoke.o
+"$CFREE" strip --strip-all smoke.o -o smoke.stripped.o
+
+echo "== symbols =="
+"$CFREE" objdump -t smoke.stripped.o | awk '$NF ~ /^_/{print $NF}' | sort
+echo "== sections =="
+"$CFREE" objdump -h smoke.stripped.o | awk '/^ *[0-9]+ /{print $2}' | sort
diff --git a/test/strip/cases/03-keep-symbol.expected b/test/strip/cases/03-keep-symbol.expected
@@ -0,0 +1,3 @@
+== symbols ==
+_helper
+_unused
diff --git a/test/strip/cases/03-keep-symbol.sh b/test/strip/cases/03-keep-symbol.sh
@@ -0,0 +1,12 @@
+# --keep-symbol overrides the operation's drop decision.
+
+cat > smoke.c <<'EOF'
+int unused(void) { return 1; }
+int helper(void) { return 42; }
+int main(void) { return helper(); }
+EOF
+"$CFREE" cc -c smoke.c -o smoke.o
+"$CFREE" strip --strip-all --keep-symbol=_unused smoke.o -o smoke.stripped.o
+
+echo "== symbols =="
+"$CFREE" objdump -t smoke.stripped.o | awk '$NF ~ /^_/{print $NF}' | sort
diff --git a/test/strip/cases/04-archive-strip-debug.expected b/test/strip/cases/04-archive-strip-debug.expected
@@ -0,0 +1,7 @@
+== members ==
+a.o
+b.o
+== a.o sections ==
+__TEXT,__text
+== b.o sections ==
+__TEXT,__text
diff --git a/test/strip/cases/04-archive-strip-debug.sh b/test/strip/cases/04-archive-strip-debug.sh
@@ -0,0 +1,24 @@
+# Archive: --strip-debug runs on each object member, the symbol index is
+# refreshed.
+
+cat > a.c <<'EOF'
+int aaa(void) { return 1; }
+EOF
+cat > b.c <<'EOF'
+int bbb(void) { return 2; }
+EOF
+"$CFREE" cc -g -c a.c -o a.o
+"$CFREE" cc -g -c b.c -o b.o
+"$CFREE" ar rcs lib.a a.o b.o
+"$CFREE" strip --strip-debug lib.a -o lib.stripped.a
+
+echo "== members =="
+"$CFREE" ar t lib.stripped.a
+
+# Extract each member and confirm the debug sections are gone.
+mkdir x
+( cd x && "$CFREE" ar x ../lib.stripped.a )
+echo "== a.o sections =="
+"$CFREE" objdump -h x/a.o | awk '/^ *[0-9]+ /{print $2}' | sort
+echo "== b.o sections =="
+"$CFREE" objdump -h x/b.o | awk '/^ *[0-9]+ /{print $2}' | sort
diff --git a/test/strip/run.sh b/test/strip/run.sh
@@ -0,0 +1,69 @@
+#!/bin/sh
+# Driver-level `cfree strip` test harness. Same shape as test/ar/run.sh.
+
+set -u
+
+script_dir=$(cd "$(dirname "$0")" && pwd)
+repo_root=$(cd "$script_dir/../.." && pwd)
+cases_dir="$script_dir/cases"
+
+CFREE="${CFREE:-$repo_root/build/cfree}"
+export CFREE
+
+if [ ! -x "$CFREE" ]; then
+ echo "strip-driver: cfree binary not found at $CFREE" >&2
+ exit 2
+fi
+
+work_root=$(mktemp -d "${TMPDIR:-/tmp}/cfree-strip-test.XXXXXX")
+trap 'rm -rf "$work_root"' EXIT
+
+pass=0
+fail=0
+failures=
+
+for sh in "$cases_dir"/*.sh; do
+ [ -e "$sh" ] || continue
+ name=$(basename "${sh%.sh}")
+ expected="${sh%.sh}.expected"
+ actual="$work_root/$name.actual"
+
+ if [ ! -e "$expected" ]; then
+ printf 'FAIL %s (missing %s)\n' "$name" "$(basename "$expected")"
+ fail=$((fail + 1))
+ failures="$failures $name"
+ continue
+ fi
+
+ sandbox="$work_root/$name"
+ mkdir -p "$sandbox"
+ ( cd "$sandbox" && sh "$sh" ) > "$actual" 2>&1
+ case_rc=$?
+
+ if [ "$case_rc" -ne 0 ]; then
+ printf 'FAIL %s (script exit=%d)\n' "$name" "$case_rc"
+ diff -u "$expected" "$actual" || true
+ fail=$((fail + 1))
+ failures="$failures $name"
+ continue
+ fi
+
+ if diff -u "$expected" "$actual" >/dev/null 2>&1; then
+ printf 'PASS %s\n' "$name"
+ pass=$((pass + 1))
+ else
+ printf 'FAIL %s\n' "$name"
+ diff -u "$expected" "$actual" || true
+ cp "$actual" "$cases_dir/$name.actual" 2>/dev/null || true
+ fail=$((fail + 1))
+ failures="$failures $name"
+ fi
+done
+
+total=$((pass + fail))
+if [ "$fail" -gt 0 ]; then
+ printf '\nstrip-driver: failures:%s\n' "$failures"
+ printf 'strip-driver: %d/%d passed\n' "$pass" "$total"
+ exit 1
+fi
+printf '\nstrip-driver: %d/%d passed\n' "$pass" "$total"
diff --git a/test/test.mk b/test/test.mk
@@ -27,9 +27,9 @@
# asm_parse / cfree_disasm_iter_* are still stubs; the harness builds
# and runs end-to-end so the wiring stays exercised. See doc/ASM.md.
-.PHONY: test test-driver test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg-api test-toy test-opt test-dwarf test-debug test-parse test-parse-err test-asm test-wasm-front test-isa test-aa64-inline test-rt-headers test-rt-runtime test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64 test-cbackend
+.PHONY: test test-driver test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-strip-driver test-objcopy-driver test-link test-cg-api test-toy test-opt test-dwarf test-debug test-parse test-parse-err test-asm test-wasm-front test-isa test-aa64-inline test-rt-headers test-rt-runtime test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64 test-cbackend
-test: test-driver test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-toy test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-rt-headers test-lib-deps
+test: test-driver test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-strip-driver test-objcopy-driver test-link test-toy test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-rt-headers test-lib-deps
# `test-cbackend` is intentionally not in the default `test` target: the
# Phase 1 C backend skips most fixtures pending later phases, which would
# add noise to the default summary. Run it explicitly to gate progress.
@@ -77,6 +77,12 @@ $(AR_TEST_BIN): test/ar_test.c $(LIB_AR)
test-ar-driver: bin
@CFREE=$(abspath $(BIN)) test/ar/run.sh
+test-strip-driver: bin
+ @CFREE=$(abspath $(BIN)) test/strip/run.sh
+
+test-objcopy-driver: bin
+ @CFREE=$(abspath $(BIN)) test/objcopy/run.sh
+
# DWARF consumer unit test: builds a hand-crafted DWARF-bearing ELF in
# memory and exercises every cfree_dwarf_* entry. Depends only on
# libcfree.a — the consumer reads bytes; producer involvement isn't