commit c5025473abe14579fbfda9365b3b7a6d212f1fb9
parent 12bb06edf18181404c60995df67a1dc9755ddae7
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 20 May 2026 11:22:07 -0700
driver: add cpp + ranlib tools and obj-roundtrip API
Two new driver subcommands and a small public-API addition that lets
opened object files be re-emitted through the existing builder path.
Also captures the remaining gaps (strip/objcopy, flag-surface holes
in cc/as/ld/ar/objdump) in doc/CTOOLCHAIN.md so the deferred work is
discoverable.
cpp wraps cfree_c_preprocess directly; functionally equivalent to
`cfree cc -E` but without cc's source-classification scaffolding.
ranlib reads an archive and rewrites it with a System-V symbol index,
filling in CfreeArMemberSymbols from each object member's global
symbols (same logic ar.c already runs under the `s` modifier).
cfree_obj_file_builder promotes the previously-internal accessor on
CfreeObjFile to public API: callers can hand the result to
cfree_obj_builder_emit for a byte-equivalent roundtrip. Mutation
after open is documented as not-yet-supported — that's the gating
work for strip/objcopy.
Diffstat:
9 files changed, 884 insertions(+), 1 deletion(-)
diff --git a/doc/CTOOLCHAIN.md b/doc/CTOOLCHAIN.md
@@ -0,0 +1,263 @@
+# C Toolchain Gap Analysis
+
+What a typical `Makefile` or build-system invokes vs. what `cfree` currently
+ships in its driver, and what's missing inside `libcfree` to close those
+gaps. Companion to the toolchain summary in `README.md`.
+
+Snapshot as of 2026-05-20.
+
+## Tool inventory
+
+| Tool | Status | Notes |
+| --------- | ----------------- | -------------------------------------------------- |
+| `cc` | shipped | `driver/cc.c`; broad GCC-subset surface |
+| `cpp` | shipped | `driver/cpp.c`; thin wrapper over `cfree_c_preprocess` |
+| `as` | shipped | `driver/as.c`; GAS-subset, single input |
+| `ld` | shipped | `driver/ld.c` |
+| `ar` | shipped | `driver/ar.c`; r/c/t/x/p + `s` modifier |
+| `ranlib` | shipped | `driver/ranlib.c` |
+| `objdump` | shipped | `driver/objdump.c` |
+| `nm` | missing | symbols only; reuse `cfree_obj_symiter_*` |
+| `size` | missing | section sizes from `cfree_obj_section` |
+| `strings` | missing | trivial; no `libcfree` API needed |
+| `file` | missing | `cfree_detect_fmt` already classifies |
+| `addr2line` | missing | needs DWARF query API surface (already used internally) |
+| `readelf` | partly via objdump | objdump covers most of GNU `readelf -a` |
+| `strip` | blocked | needs builder mutator API; see below |
+| `objcopy` | blocked | needs builder mutator API; see below |
+| `c++filt` | n/a | C only |
+| `gprof` / `gcov` | n/a | no profiling/coverage support today |
+| `ldd`, `ldconfig`, dynamic loader | n/a | host-provided |
+
+`cfree`-specific tools (`run`, `dbg`, `emu`) are out of scope for this
+document.
+
+## Strip / Objcopy
+
+Both are **blocked on a builder-mutator surface** that does not yet exist
+in `libcfree`. The reader produces an already-finalized `CfreeObjBuilder`
+(per `src/obj/obj.h` "lifecycle gates" — `obj_finalize` freezes the
+read-side view, no further writes permitted). Pure roundtrip works (open
+→ emit), but neither tool needs *only* roundtrip — both need to **remove**
+and **rename** existing structure.
+
+### Operations matrix
+
+| Operation | What it needs | Have today? |
+| ---------------------------------------- | -------------------------------------------- | --------------------------------- |
+| `strip --strip-debug` / `objcopy --strip-debug` | drop `CFREE_SEC_DEBUG` sections | reader exposes kind ✓ — emit filter missing |
+| `strip --strip-all` | drop debug + symtab | needs emit-time symbol filter |
+| `strip --strip-unneeded` | keep only relocation-referenced symbols | reader exposes reloc→sym ✓ — needs builder symbol filter |
+| `strip --keep-symbol=N` / `--strip-symbol=N` | symbol predicate | needs builder symbol filter |
+| `objcopy --remove-section=N` | drop section by name | needs builder mutator |
+| `objcopy --only-section=N` | inverse of above | needs builder mutator |
+| `objcopy --rename-section old=new[,flags]` | mutate section name + flags | needs builder mutator |
+| `objcopy --add-section name=file` | add new section from external bytes | already possible via existing builder API |
+| `objcopy --update-section name=file` | replace section contents | needs builder mutator |
+| `objcopy --redefine-sym old=new` | rename symbol | needs builder mutator |
+| `objcopy --globalize-symbol`/`--localize-symbol`/`--weaken-symbol` | mutate `CfreeSymBind` | needs builder mutator |
+| `objcopy --extract-symbol` | emit a symbol's bytes as its own object | needs builder mutator + new emit |
+| `objcopy --only-keep-debug` | keep only `.debug_*` + symtab | needs builder mutator |
+| `objcopy --add-gnu-debuglink=FILE` | append debuglink section + CRC | needs CRC32 helper + add-section |
+| `objcopy -O <bfdname>` (format convert) | ELF ↔ Mach-O ↔ COFF roundtrip | builder is already format-neutral; should work once mutators land |
+| `objcopy --change-section-address=...` | adjust section VMA / LMA | needs builder mutator |
+| `objcopy -I/-O binary`, `srec`, `ihex` | flat-binary / S-record / Intel-hex output | not supported; new emitters |
+
+### What `libcfree` needs
+
+Beyond the builder mutators, a few smaller items:
+
+1. **Section-group reader iterator** (`CfreeObjGroupIter`,
+ `cfree_obj_groupiter_new/next/free`, `CfreeObjGroupInfo`). The builder
+ has `cfree_obj_builder_group` and `_group_add_section`, but the reader
+ exposes no way to enumerate existing groups. Any objcopy that touches
+ a COMDAT-bearing object would lose grouping on roundtrip without this.
+
+2. **Builder mutator API.** Minimal MVP that unblocks strip and the
+ common objcopy operations:
+
+ ```c
+ CfreeStatus cfree_obj_builder_remove_section(CfreeObjBuilder *, CfreeObjSection);
+ CfreeStatus cfree_obj_builder_remove_symbol(CfreeObjBuilder *, CfreeObjSymbol);
+ CfreeStatus cfree_obj_builder_rename_section(CfreeObjBuilder *, CfreeObjSection,
+ CfreeSym new_name);
+ CfreeStatus cfree_obj_builder_rename_symbol(CfreeObjBuilder *, CfreeObjSymbol,
+ CfreeSym new_name);
+ CfreeStatus cfree_obj_builder_symbol_set_bind(CfreeObjBuilder *, CfreeObjSymbol,
+ CfreeSymBind);
+ ```
+
+ These would need to lift the post-finalize-frozen invariant — either
+ by re-opening the builder for writes, or by adding a parallel
+ filtered-emit path that takes a callback predicate. The latter is
+ probably less invasive.
+
+3. **DSO / executable inputs are a separate problem.** `cfree_obj_open`
+ reads relocatable `.o` cleanly, but stripping a *linked* ELF
+ (executable or DSO) means understanding `.dynsym`, `.dynstr`,
+ `.hash`/`.gnu.hash`, `.dynamic`, `.got`, `.plt`, `.rela.plt`,
+ `PT_NOTE` (build-id), and a `PT_DYNAMIC` segment — most of which are
+ linker-managed, not builder-managed. GNU `strip` and `objcopy` can
+ operate on these because `bfd` round-trips the full dynamic-linking
+ state. We don't model that today. Scope strip/objcopy to `.o` and
+ `.a` for the first cut.
+
+### Suggested sequencing
+
+1. Add the section-group reader iterator (small, no mutator concerns).
+2. Add the builder mutator API for sections + symbols.
+3. Implement `strip` (relocatable inputs only) as a driver tool. Factor
+ the per-member symbol-collection block from `driver/ar.c` and
+ `driver/ranlib.c` into a shared helper while we're touching the area.
+4. Implement `objcopy` (relocatable inputs only). The `--add-section`
+ / `--rename-section` / `--redefine-sym` / `--strip-*` subset covers
+ the vast majority of build-system use.
+5. Defer DSO/exe strip+objcopy, format-conversion to non-object outputs
+ (`binary`, `srec`, `ihex`), and `objcopy --only-keep-debug` /
+ `--add-gnu-debuglink` (the split-debuginfo flow).
+
+## Flag-surface gaps
+
+Methodology: each tool's argv parser was compared against the union of
+GCC's `cc` and the corresponding binutils tool. Flags that are
+silently accepted as no-ops (e.g. `-pipe`, `-std=`) are not gaps.
+
+### `cc` — broad surface; the gaps are mostly autotools/CMake probes
+
+- **Pass-through flag families.** `-Wp,...` (preprocessor) and `-Wa,...`
+ (assembler) are missing. `-Wl,...` is supported. `-Xpreprocessor` /
+ `-Xassembler` similarly missing; `-Xlinker` is supported.
+- **Compiler-information probes** (used by autoconf, CMake's compiler
+ detection): `-print-search-dirs`, `-print-file-name=`,
+ `-print-prog-name=`, `-print-libgcc-file-name`,
+ `-print-multi-os-directory`, `-print-resource-dir`, `-dumpmachine`,
+ `-dumpversion`, `-dumpspecs`. Some build systems hard-fail when these
+ return nothing.
+- **Linker convenience.** `-rdynamic` (≡ `-Wl,--export-dynamic`) not
+ wired through.
+- **Dep emission.** `-Wp,-MD,FILE` form not handled (GNU make's
+ auto-dependency idiom).
+- **Response files.** `@file` not supported; long CMake invocations on
+ some platforms exceed `ARG_MAX`.
+- **Code-gen tuning.** `-march=`, `-mtune=`, `-mcpu=`, `-mfpu=`,
+ `-msse*`, `-mavx*` — none implemented. Currently silently no-op'd
+ via the `-W…`/`-f…` catch-all in `cc_parse`.
+- **Other compiler flags accepted as no-ops** (call-site behaviour ≠
+ ABI-correctness): `-fvisibility=`, `-fcommon`/`-fno-common`,
+ `-fstack-protector*`, `-fno-omit-frame-pointer`, `-funwind-tables`,
+ `-fexceptions`, `-static-libgcc`, `-shared-libgcc`,
+ `-fsyntax-only`, `-fdiagnostics-color`, `-save-temps`.
+- **Long forms.** `--output=PATH`, `--include=`, etc.
+- **Includes.** `-iquote`, `-idirafter`, `-include` are currently
+ swallowed as no-ops (`driver/cc.c:939`); should land in the cflags
+ surface.
+
+### `cpp` — same baseline as `cc -E`
+
+Inherits all of cc's `-I/-isystem/-D/-U` + dep emission. Specific
+gaps that exist equally in `cc -E`:
+
+- `-P` — suppress `#line` markers
+- `-dM` — dump defined macros instead of expanded source
+- `-C`, `-CC` — preserve comments
+- `-traditional-cpp`
+- `-fno-show-column`
+- `-Wp,-MD,FILE` (see above)
+
+### `as` — minimal surface
+
+- **No code-gen target selection.** `-march=`, `-mcpu=`, `-mtune=`,
+ `-mabi=` (riscv `lp64d` vs `lp64`), `--32`/`--64`, `-m32`/`-m64`.
+- **No warnings control.** `-W`, `-Z`, `--warn`, `--fatal-warnings`,
+ `--no-warn`.
+- **No `-MD <file>`** for assembler-side dependency emission on `.S`.
+- **No assembly listings.** `-a` family (`-al`, `-as`, `-an`, …),
+ `--listing-*`, `--statistics`.
+- **No DWARF version selection.** Only blanket `-g`; missing
+ `--gdwarf-2/3/4/5`, `--gstabs`, `--gdwarf-sections`.
+- **No PIC / passthrough flags.** `-K`, `-Q`, `-k`.
+- **One input only.** GNU `as` accepts multiple sources and
+ concatenates.
+- **No `-defsym SYM=VAL`** for assemble-time constant injection.
+- **No stdin input** (`-`).
+
+### `ld` — strong; gaps are advanced features and `-z` flags
+
+- **`-z` options** (used by every distro): `-z now`, `-z relro`,
+ `-z noexecstack`, `-z defs`, `-z origin`, `-z notext`, `-z lazy`,
+ `-z combreloc`, `-z text`. These map to ELF dynamic-tag bits and
+ segment flags that the linker already emits in some form — wiring
+ them up should be small per flag.
+- **Link maps.** `-M` / `--print-map`, `-Map=FILE`,
+ `--print-gc-sections`, `--print-memory-usage`.
+- **Symbol-resolution policy.** `--no-undefined`,
+ `--allow-shlib-undefined`, `--unresolved-symbols={...}`.
+- **Symbol surgery.** `--wrap=SYMBOL`, `--defsym=SYM=EXPR`,
+ `--undefined=SYM`, `--retain-symbols-file`.
+- **Version scripts / dynamic lists.** `--version-script`,
+ `--dynamic-list`, `--exclude-libs`.
+- **Hash style.** `--hash-style={sysv,gnu,both}`, `--no-gnu-hash`.
+- **Section placement.** `--section-start=NAME=ADDR`, `-Ttext=`,
+ `-Tdata=`, `-Tbss=`.
+- **Cross-reference.** `--cref`.
+- **Identical-code folding.** `--icf={none,safe,all}`.
+- **Init/fini.** `--init`, `--fini` for non-default entry symbols.
+- **Sort/common.** `--sort-section`, `--sort-common`,
+ `--no-define-common`.
+- **Endianness / emulation.** `--EB`, `--EL`, `-m EMULATION` (currently
+ auto-detected from inputs; the `-m` form is missing).
+- **Strip flags.** `--strip-all`, `--strip-debug`, `-s`, `-S` (would
+ pair with the strip work above).
+- **ELF notes.** `--package-metadata=` (a fielded use case in distro
+ packaging).
+- **Response files.** `@file`.
+- **Stdin input** (`-`).
+
+### `ar` — POSIX covered; binutils extensions missing
+
+- **Operations.** `d` (delete), `q` (quick append), and standalone
+ `s` (now provided by `cfree ranlib`, but `ar s` is also expected).
+ `m` (move member). `b NAME` / `a NAME` / `i NAME` (positional
+ insertion modifiers paired with `r`/`m`).
+- **Modifiers.** `D`/`U` (deterministic / non-deterministic; `D` is
+ GNU's default. `SOURCE_DATE_EPOCH` is similar but not equivalent).
+ `N <count>` (Nth instance of a duplicated member name). `P` (full
+ pathname match). `o` (preserve mtime on extract). `S` (suppress
+ symbol index — opposite of `s`). `T` (thin archive — used by LLVM).
+- **MRI script mode** (read commands from stdin). Rarely used; skip.
+
+### `objdump` — biggest gap among the shipped tools
+
+- **Aggregate flags.** `-x` (all headers ≡ `-f -p -h -r -t`), `-f`
+ (file header), `-p` (program header / private).
+- **Source intermixing.** `-S` (intermix source — DWARF line info
+ already available), `-l` (line numbers in disasm / relocs).
+- **Disassembly scope.** `--disassemble=SYM`, `--start-address=`,
+ `--stop-address=`.
+- **Disassembly formatting.** `-z` (don't skip zeros), `-w` (wide
+ output), `--no-show-raw-insn`, `--prefix-addresses`, `-M ATTR`
+ (e.g. `-M intel` for x86 syntax).
+- **Dynamic vs. static.** `-R` (dynamic relocations) vs the existing
+ `-r` (static); `-T` (dynamic symbols) vs the existing `-t`
+ (static).
+- **DWARF dumping.** `-W` / `--dwarf=...` — cfree emits DWARF and
+ exposes reader APIs, so this should be straightforward.
+- **Long forms.** `--syms`, `--section-headers`, `--archive-headers`,
+ `--all-headers`, `--file-offsets`.
+- **Override format / arch.** `-b BFDNAME`, `-m ARCH`, `-EB`/`-EL`.
+- **C++ demangling.** `-C`, `--demangle` — N/A for C; can land as a
+ silent no-op once it's needed.
+
+## Recommended next moves
+
+1. **Add to `cc` first**: `-rdynamic`, `-print-search-dirs`,
+ `-print-file-name`, `-print-prog-name`, `-dumpmachine`,
+ `-dumpversion`, `@file`. These unblock most autotools/CMake
+ probes for very little code.
+2. **Add to `ld`** the `-z` family (`-z now`, `-z relro`,
+ `-z noexecstack` are the high-traffic three) and `-Map=FILE`.
+3. **Add to `objdump`** the `-x` aggregate, `-S`, `-l`, and
+ `--dwarf=...`. Most "I want to see what the compiler produced"
+ debug sessions need at least one of these.
+4. **Then unblock strip/objcopy** via the builder mutator API and
+ ship strip first (smaller surface than objcopy).
diff --git a/driver/cpp.c b/driver/cpp.c
@@ -0,0 +1,223 @@
+#include <stdint.h>
+
+#include <cfree/compile.h>
+#include <cfree/core.h>
+
+#include "cflags.h"
+#include "driver.h"
+#include "lang/c/c.h"
+
+/* `cfree cpp` — standalone C preprocessor. Reads one C source (path or
+ * `-` for stdin), writes the preprocessed token stream to `-o PATH` or
+ * stdout. Feature-equivalent to `cfree cc -E` but without cc's source-
+ * classification / link-input scaffolding.
+ *
+ * Flag surface is the cflags subset shared with cc + as:
+ * -I DIR, -IDIR user include path
+ * -isystem DIR system include path
+ * -D NAME[=BODY] define
+ * -U NAME undefine
+ * -o PATH output (default: stdout)
+ * -target TRIPLE cross-target (default: host)
+ * - stdin source
+ *
+ * The classical cpp-only flags (-P, -dM, -C, -CC) are not yet supported
+ * by cfree's preprocessor — they would land equally in `cfree cc -E`. */
+
+#define CPP_TOOL "cpp"
+
+typedef struct CppOptions {
+ const char* output_path;
+ const char* source_path; /* NULL when stdin source */
+ int from_stdin;
+ CfreeTarget target;
+} CppOptions;
+
+void driver_help_cpp(void) {
+ driver_printf(
+ "%s",
+ "cfree cpp — standalone C preprocessor\n"
+ "\n"
+ "USAGE\n"
+ " cfree cpp [options] INPUT.c\n"
+ " cfree cpp [options] - (read from stdin)\n"
+ "\n"
+ "DESCRIPTION\n"
+ " Runs the C preprocessor on a single input and writes the resulting\n"
+ " token stream to -o (or stdout when -o is absent). Functionally\n"
+ " equivalent to `cfree cc -E` but without cc's link/source-input\n"
+ " classification.\n"
+ "\n"
+ "OPTIONS\n"
+ " -o PATH Output path (default: stdout)\n"
+ " -I DIR, -IDIR Add a user include directory\n"
+ " -isystem DIR Add a system include directory\n"
+ " -D NAME[=BODY] Define a preprocessor macro\n"
+ " -U NAME Undefine a preprocessor macro\n"
+ " -target TRIPLE Cross-target. See `cfree cc --help` for the\n"
+ " accepted arches/OSes. Default: host.\n"
+ " -h, --help Show this help and exit\n"
+ "\n"
+ "EXIT CODES\n"
+ " 0 success 1 preprocess / I/O error 2 bad "
+ "usage\n");
+}
+
+static void cpp_usage(void) {
+ driver_errf(CPP_TOOL, "%s",
+ "usage: cfree cpp [options] INPUT.c\n"
+ " cfree cpp --help for full option reference");
+}
+
+static int cpp_parse(int argc, char** argv, CppOptions* o, DriverEnv* env,
+ DriverCflags* cf) {
+ int i;
+
+ o->target = driver_host_target();
+
+ for (i = 1; i < argc; ++i) {
+ const char* a = argv[i];
+ int r;
+
+ r = driver_cflags_try_consume(cf, env, CPP_TOOL, argc, argv, &i);
+ if (r < 0) return 1;
+ if (r > 0) continue;
+
+ if (driver_streq(a, "-o")) {
+ if (++i >= argc) {
+ driver_errf(CPP_TOOL, "-o requires an argument");
+ return 1;
+ }
+ o->output_path = argv[i];
+ continue;
+ }
+
+ if (driver_streq(a, "-target")) {
+ if (++i >= argc) {
+ driver_errf(CPP_TOOL, "-target requires an argument");
+ return 1;
+ }
+ if (driver_target_from_triple(argv[i], &o->target) != 0) {
+ driver_errf(CPP_TOOL, "unrecognized target: %s", argv[i]);
+ return 1;
+ }
+ continue;
+ }
+
+ if (driver_streq(a, "-")) {
+ if (o->source_path || o->from_stdin) {
+ driver_errf(CPP_TOOL, "multiple inputs not supported");
+ return 1;
+ }
+ o->from_stdin = 1;
+ continue;
+ }
+
+ if (a[0] == '-' && a[1] != '\0') {
+ driver_errf(CPP_TOOL, "unknown flag: %s", a);
+ return 1;
+ }
+
+ if (o->source_path || o->from_stdin) {
+ driver_errf(CPP_TOOL, "multiple inputs not supported");
+ return 1;
+ }
+ o->source_path = a;
+ }
+
+ if (!o->source_path && !o->from_stdin) {
+ driver_errf(CPP_TOOL, "no input file");
+ cpp_usage();
+ return 1;
+ }
+ return 0;
+}
+
+int driver_cpp(int argc, char** argv) {
+ DriverEnv env;
+ CppOptions o = {0};
+ DriverCflags cf = {0};
+ CfreeContext ctx;
+ CfreePreprocessOptions pp;
+ CfreeCompiler* compiler = NULL;
+ CfreeWriter* writer = NULL;
+ CfreeFileData src = {0};
+ CfreeBytes input;
+ uint8_t* stdin_buf = NULL;
+ size_t stdin_size = 0;
+ int rc = 1;
+ int loaded = 0;
+
+ if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) {
+ driver_help_cpp();
+ return 0;
+ }
+
+ driver_env_init(&env);
+
+ if (driver_cflags_init(&cf, &env, argc) != 0) {
+ driver_errf(CPP_TOOL, "out of memory");
+ driver_env_fini(&env);
+ return 2;
+ }
+
+ if (cpp_parse(argc, argv, &o, &env, &cf) != 0) {
+ driver_cflags_fini(&cf, &env);
+ driver_env_fini(&env);
+ return 2;
+ }
+ driver_cflags_fill_pp(&cf, &pp);
+
+ ctx = driver_env_to_context(&env);
+
+ if (o.from_stdin) {
+ if (!driver_read_stdin(&env, &stdin_buf, &stdin_size)) {
+ driver_errf(CPP_TOOL, "failed to read stdin");
+ goto out;
+ }
+ input.name = "<stdin>";
+ input.data = stdin_buf;
+ input.len = stdin_size;
+ } else {
+ if (ctx.file_io->read_all(ctx.file_io->user, o.source_path, &src) !=
+ CFREE_OK) {
+ driver_errf(CPP_TOOL, "failed to read: %s", o.source_path);
+ goto out;
+ }
+ loaded = 1;
+ input.name = o.source_path;
+ input.data = src.data;
+ input.len = src.size;
+ }
+
+ if (o.output_path) {
+ if (ctx.file_io->open_writer(ctx.file_io->user, o.output_path, &writer) !=
+ CFREE_OK) {
+ driver_errf(CPP_TOOL, "failed to open output: %s", o.output_path);
+ goto out;
+ }
+ } else {
+ writer = driver_stdout_writer(&env);
+ if (!writer) {
+ driver_errf(CPP_TOOL, "out of memory");
+ goto out;
+ }
+ }
+
+ if (driver_compiler_new(o.target, &ctx, &compiler) != CFREE_OK) {
+ driver_errf(CPP_TOOL, "failed to initialize compiler");
+ goto out;
+ }
+
+ rc =
+ cfree_c_preprocess(compiler, &pp, &input, writer) == CFREE_OK ? 0 : 1;
+
+out:
+ if (compiler) driver_compiler_free(compiler);
+ if (writer) cfree_writer_close(writer);
+ if (loaded) ctx.file_io->release(ctx.file_io->user, &src);
+ if (stdin_buf) driver_free(&env, stdin_buf, stdin_size);
+ driver_cflags_fini(&cf, &env);
+ driver_env_fini(&env);
+ return rc;
+}
diff --git a/driver/driver.h b/driver/driver.h
@@ -15,9 +15,11 @@
typedef enum DriverTool {
DRIVER_TOOL_CC,
+ DRIVER_TOOL_CPP,
DRIVER_TOOL_AS,
DRIVER_TOOL_LD,
DRIVER_TOOL_AR,
+ DRIVER_TOOL_RANLIB,
DRIVER_TOOL_OBJDUMP,
DRIVER_TOOL_DBG,
DRIVER_TOOL_RUN,
@@ -29,9 +31,11 @@ int driver_main(int argc, char **argv);
/* Direct entry per tool. Each lives in driver/<tool>.c. */
int driver_cc(int argc, char **argv);
+int driver_cpp(int argc, char **argv);
int driver_as(int argc, char **argv);
int driver_ld(int argc, char **argv);
int driver_ar(int argc, char **argv);
+int driver_ranlib(int argc, char **argv);
int driver_objdump(int argc, char **argv);
int driver_dbg(int argc, char **argv);
int driver_run(int argc, char **argv);
@@ -42,9 +46,11 @@ int driver_emu(int argc, char **argv);
* or --help (objdump excepts -h, since GNU objdump uses it for section
* headers — only --help triggers help there). */
void driver_help_cc(void);
+void driver_help_cpp(void);
void driver_help_as(void);
void driver_help_ld(void);
void driver_help_ar(void);
+void driver_help_ranlib(void);
void driver_help_objdump(void);
void driver_help_dbg(void);
void driver_help_run(void);
diff --git a/driver/main.c b/driver/main.c
@@ -21,9 +21,11 @@
static int dispatch(const char* name, int argc, char** argv) {
if (driver_streq(name, "cc")) return driver_cc(argc, argv);
+ if (driver_streq(name, "cpp")) return driver_cpp(argc, argv);
if (driver_streq(name, "as")) return driver_as(argc, argv);
if (driver_streq(name, "ld")) return driver_ld(argc, argv);
if (driver_streq(name, "ar")) return driver_ar(argc, argv);
+ if (driver_streq(name, "ranlib")) return driver_ranlib(argc, argv);
if (driver_streq(name, "objdump")) return driver_objdump(argc, argv);
if (driver_streq(name, "dbg")) return driver_dbg(argc, argv);
if (driver_streq(name, "run")) return driver_run(argc, argv);
@@ -38,6 +40,10 @@ static int print_tool_help(const char* name) {
driver_help_cc();
return 0;
}
+ if (driver_streq(name, "cpp")) {
+ driver_help_cpp();
+ return 0;
+ }
if (driver_streq(name, "as")) {
driver_help_as();
return 0;
@@ -50,6 +56,10 @@ static int print_tool_help(const char* name) {
driver_help_ar();
return 0;
}
+ if (driver_streq(name, "ranlib")) {
+ driver_help_ranlib();
+ return 0;
+ }
if (driver_streq(name, "objdump")) {
driver_help_objdump();
return 0;
@@ -101,10 +111,13 @@ void driver_help_top(void) {
"TOOLS\n"
" cc Compile (and link) C sources, with cpp / dep-emit / -shared "
"modes\n"
+ " cpp Standalone C preprocessor (alias for `cc -E` minus link "
+ "scaffold)\n"
" as Assemble a GAS-subset text source into a relocatable "
"object\n"
" ld Link objects/archives into an executable or shared library\n"
" ar Create / modify / list / extract POSIX `ar` archives\n"
+ " ranlib Refresh the symbol index of an `ar` archive\n"
" objdump Dump sections, symbols, disassembly, hex, and relocations\n"
" run JIT-compile inputs and invoke the entry symbol in-process\n"
" dbg Interactive JIT debugger (REPL on top of the JIT image)\n"
diff --git a/driver/ranlib.c b/driver/ranlib.c
@@ -0,0 +1,331 @@
+#include <stdint.h>
+
+#include <cfree/archive.h>
+#include <cfree/core.h>
+#include <cfree/object.h>
+
+#include "driver.h"
+
+/* `cfree ranlib` — refresh / add a System-V `/` symbol-index member at the
+ * head of an existing POSIX `ar` archive. Equivalent to `cfree ar s ARCHIVE`,
+ * which is reserved by the POSIX ar grammar but not yet implemented in the
+ * ar tool. ranlib is the conventional name and the one Makefiles invoke.
+ *
+ * Operation: read all members, rebuild the archive at the same path with
+ * symbol_index=1 and per-member globally-defined symbols filled in. Long
+ * member names are preserved via the `//` extended-name table. Reproducible
+ * output via SOURCE_DATE_EPOCH (same epoch handling as `cfree ar`).
+ *
+ * Note: the per-member symbol-collection loop duplicates the body of
+ * ar_do_write's `has_s` block (driver/ar.c). Factor into a shared helper
+ * when adding strip / objcopy. */
+
+#define RANLIB_TOOL "ranlib"
+
+void driver_help_ranlib(void) {
+ driver_printf(
+ "%s",
+ "cfree ranlib — refresh the symbol index of an `ar` archive\n"
+ "\n"
+ "USAGE\n"
+ " cfree ranlib ARCHIVE.a\n"
+ "\n"
+ "DESCRIPTION\n"
+ " Reads every member of ARCHIVE.a, rebuilds the archive in place\n"
+ " with a System-V `/` symbol-index member at the head. Member names,\n"
+ " contents, and order are preserved. Reproducible: when\n"
+ " SOURCE_DATE_EPOCH is set to a positive integer, that value is\n"
+ " written to ar_date for every member.\n"
+ "\n"
+ " Equivalent to `cfree ar s ARCHIVE.a` (the bare `s` modifier is\n"
+ " reserved by the POSIX ar grammar but not yet implemented).\n"
+ "\n"
+ "OPTIONS\n"
+ " -h, --help Show this help and exit\n"
+ "\n"
+ "EXIT CODES\n"
+ " 0 success 1 archive I/O error 2 bad "
+ "usage\n");
+}
+
+static uint64_t ranlib_epoch_from_env(void) {
+ const char* s = driver_getenv("SOURCE_DATE_EPOCH");
+ uint64_t v = 0;
+ if (!s || !*s) return 0;
+ for (; *s; ++s) {
+ if (*s < '0' || *s > '9') return 0;
+ v = v * 10 + (uint64_t)(*s - '0');
+ }
+ return v;
+}
+
+/* Walk an object member's symbol iterator and produce a heap-allocated
+ * (name_arr[count], name_bytes) blob for CfreeArMemberSymbols. *blob_out is
+ * NULL when the member is not a recognised object or has no exported
+ * global symbols; *out_count is then 0. Mirrors driver/ar.c. */
+static int ranlib_collect_symbols(DriverEnv* env, const CfreeContext* ctx,
+ const CfreeBytes* member, void** blob_out,
+ size_t* blob_size_out, const char*** names_out,
+ uint32_t* count_out) {
+ CfreeObjFile* of = NULL;
+ CfreeObjSymIter* it = NULL;
+ CfreeObjSymInfo si;
+ uint32_t count = 0;
+ size_t name_bytes = 0;
+ size_t alloc_sz;
+ char* blob;
+ const char** name_arr;
+ char* name_storage;
+ size_t cursor = 0;
+
+ *blob_out = NULL;
+ *blob_size_out = 0;
+ *names_out = NULL;
+ *count_out = 0;
+
+ if (cfree_obj_open(ctx, member, &of) != CFREE_OK) return 0;
+
+ if (cfree_obj_symiter_new(of, &it) != CFREE_OK) {
+ cfree_obj_free(of);
+ driver_errf(RANLIB_TOOL, "out of memory");
+ return 1;
+ }
+ for (;;) {
+ CfreeIterResult r = cfree_obj_symiter_next(it, &si);
+ if (r != CFREE_ITER_ITEM) break;
+ if (si.bind != CFREE_SB_GLOBAL) continue;
+ if (si.section == CFREE_SECTION_NONE) continue;
+ if (!si.name || !si.name[0]) continue;
+ count += 1;
+ {
+ const char* p = si.name;
+ while (*p++) ++name_bytes;
+ name_bytes += 1;
+ }
+ }
+ cfree_obj_symiter_free(it);
+
+ if (count == 0) {
+ cfree_obj_free(of);
+ return 0;
+ }
+
+ alloc_sz = (size_t)count * sizeof(const char*) + name_bytes;
+ blob = (char*)driver_alloc_zeroed(env, alloc_sz);
+ if (!blob) {
+ cfree_obj_free(of);
+ driver_errf(RANLIB_TOOL, "out of memory");
+ return 1;
+ }
+ name_arr = (const char**)blob;
+ name_storage = blob + (size_t)count * sizeof(const char*);
+
+ if (cfree_obj_symiter_new(of, &it) != CFREE_OK) {
+ driver_free(env, blob, alloc_sz);
+ cfree_obj_free(of);
+ driver_errf(RANLIB_TOOL, "out of memory");
+ return 1;
+ }
+ {
+ uint32_t k = 0;
+ for (;;) {
+ CfreeIterResult r;
+ const char* p;
+ char* dst;
+ if (k >= count) break;
+ r = cfree_obj_symiter_next(it, &si);
+ if (r != CFREE_ITER_ITEM) break;
+ if (si.bind != CFREE_SB_GLOBAL) continue;
+ if (si.section == CFREE_SECTION_NONE) continue;
+ if (!si.name || !si.name[0]) continue;
+ dst = name_storage + cursor;
+ name_arr[k] = dst;
+ for (p = si.name; *p; ++p) *dst++ = *p;
+ *dst++ = '\0';
+ cursor = (size_t)(dst - name_storage);
+ k++;
+ }
+ count = k;
+ }
+ cfree_obj_symiter_free(it);
+ cfree_obj_free(of);
+
+ *blob_out = blob;
+ *blob_size_out = alloc_sz;
+ *names_out = name_arr;
+ *count_out = count;
+ return 0;
+}
+
+int driver_ranlib(int argc, char** argv) {
+ DriverEnv env;
+ CfreeContext ctx;
+ CfreeFileData old_fd = {0};
+ CfreeBytes input;
+ CfreeArIter* it = NULL;
+ CfreeArMember m;
+ CfreeBytes* members = NULL;
+ char* name_storage = NULL;
+ size_t name_bytes_total = 0;
+ uint32_t nmembers = 0;
+ uint32_t i;
+ CfreeArMemberSymbols* msyms = NULL;
+ void** sym_allocs = NULL;
+ size_t* sym_alloc_szs = NULL;
+ CfreeWriter* out = NULL;
+ CfreeArWriteOptions opts = {0};
+ const char* archive_path;
+ int have_old = 0;
+ int rc = 1;
+
+ if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) {
+ driver_help_ranlib();
+ return 0;
+ }
+ if (argc != 2) {
+ driver_errf(RANLIB_TOOL, "usage: cfree ranlib ARCHIVE.a");
+ return 2;
+ }
+ archive_path = argv[1];
+
+ driver_env_init(&env);
+ ctx = driver_env_to_context(&env);
+
+ if (ctx.file_io->read_all(ctx.file_io->user, archive_path, &old_fd) !=
+ CFREE_OK) {
+ driver_errf(RANLIB_TOOL, "failed to read: %s", archive_path);
+ goto out;
+ }
+ have_old = 1;
+ input.name = archive_path;
+ input.data = old_fd.data;
+ input.len = old_fd.size;
+
+ /* Pass 1: count members and total name bytes (member names returned by
+ * the iterator alias an internal buffer overwritten on each next(), so
+ * we stash a stable copy). */
+ if (cfree_ar_iter_new(&ctx, &input, &it) != CFREE_OK) {
+ driver_errf(RANLIB_TOOL, "not an archive: %s", archive_path);
+ goto out;
+ }
+ for (;;) {
+ CfreeIterResult r = cfree_ar_iter_next(it, &m);
+ if (r != CFREE_ITER_ITEM) break;
+ nmembers++;
+ name_bytes_total += driver_strlen(m.name) + 1;
+ }
+ cfree_ar_iter_free(it);
+ it = NULL;
+
+ if (nmembers == 0) {
+ /* Empty archive — still rewrite with an empty symbol index (matches
+ * GNU ranlib's behaviour). */
+ if (ctx.file_io->open_writer(ctx.file_io->user, archive_path, &out) !=
+ CFREE_OK) {
+ driver_errf(RANLIB_TOOL, "failed to open: %s", archive_path);
+ goto out;
+ }
+ opts.epoch = ranlib_epoch_from_env();
+ opts.long_names = 1;
+ opts.symbol_index = 1;
+ rc = cfree_ar_write(out, NULL, 0, &opts) == CFREE_OK ? 0 : 1;
+ goto out;
+ }
+
+ members = (CfreeBytes*)driver_alloc_zeroed(
+ &env, (size_t)nmembers * sizeof(*members));
+ if (!members) {
+ driver_errf(RANLIB_TOOL, "out of memory");
+ goto out;
+ }
+ if (name_bytes_total > 0) {
+ name_storage = (char*)driver_alloc_zeroed(&env, name_bytes_total);
+ if (!name_storage) {
+ driver_errf(RANLIB_TOOL, "out of memory");
+ goto out;
+ }
+ }
+
+ /* Pass 2: copy names and member byte-spans into our parallel arrays. */
+ if (cfree_ar_iter_new(&ctx, &input, &it) != CFREE_OK) {
+ driver_errf(RANLIB_TOOL, "iter re-open failed");
+ goto out;
+ }
+ {
+ size_t cursor = 0;
+ uint32_t k = 0;
+ while (k < nmembers) {
+ CfreeIterResult r = cfree_ar_iter_next(it, &m);
+ char* dst;
+ const char* p;
+ if (r != CFREE_ITER_ITEM) break;
+ dst = name_storage + cursor;
+ for (p = m.name; *p; ++p) *dst++ = *p;
+ *dst++ = '\0';
+ members[k].name = name_storage + cursor;
+ members[k].data = m.data;
+ members[k].len = m.size;
+ cursor = (size_t)(dst - name_storage);
+ k++;
+ }
+ }
+ cfree_ar_iter_free(it);
+ it = NULL;
+
+ /* Pass 3: collect per-member global symbols. */
+ msyms = (CfreeArMemberSymbols*)driver_alloc_zeroed(
+ &env, (size_t)nmembers * sizeof(*msyms));
+ sym_allocs = (void**)driver_alloc_zeroed(
+ &env, (size_t)nmembers * sizeof(*sym_allocs));
+ sym_alloc_szs = (size_t*)driver_alloc_zeroed(
+ &env, (size_t)nmembers * sizeof(*sym_alloc_szs));
+ if (!msyms || !sym_allocs || !sym_alloc_szs) {
+ driver_errf(RANLIB_TOOL, "out of memory");
+ goto out;
+ }
+ for (i = 0; i < nmembers; ++i) {
+ void* blob = NULL;
+ size_t blob_size = 0;
+ const char** names = NULL;
+ uint32_t count = 0;
+ if (ranlib_collect_symbols(&env, &ctx, &members[i], &blob, &blob_size,
+ &names, &count) != 0) {
+ goto out;
+ }
+ sym_allocs[i] = blob;
+ sym_alloc_szs[i] = blob_size;
+ msyms[i].names = names;
+ msyms[i].count = count;
+ }
+
+ if (ctx.file_io->open_writer(ctx.file_io->user, archive_path, &out) !=
+ CFREE_OK) {
+ driver_errf(RANLIB_TOOL, "failed to open: %s", archive_path);
+ goto out;
+ }
+ opts.epoch = ranlib_epoch_from_env();
+ opts.long_names = 1;
+ opts.symbol_index = 1;
+ opts.member_symbols = msyms;
+ rc = cfree_ar_write(out, members, nmembers, &opts) == CFREE_OK ? 0 : 1;
+ if (rc == 0 && cfree_writer_status(out) != CFREE_OK) rc = 1;
+
+out:
+ if (out) cfree_writer_close(out);
+ if (it) cfree_ar_iter_free(it);
+ if (sym_allocs) {
+ for (i = 0; i < nmembers; ++i) {
+ if (sym_allocs[i]) driver_free(&env, sym_allocs[i], sym_alloc_szs[i]);
+ }
+ driver_free(&env, sym_allocs, (size_t)nmembers * sizeof(*sym_allocs));
+ }
+ if (sym_alloc_szs)
+ driver_free(&env, sym_alloc_szs,
+ (size_t)nmembers * sizeof(*sym_alloc_szs));
+ if (msyms) driver_free(&env, msyms, (size_t)nmembers * sizeof(*msyms));
+ if (name_storage) driver_free(&env, name_storage, name_bytes_total);
+ if (members) driver_free(&env, members, (size_t)nmembers * sizeof(*members));
+ if (have_old) ctx.file_io->release(ctx.file_io->user, &old_fd);
+ driver_env_fini(&env);
+ return rc;
+}
diff --git a/include/cfree/object.h b/include/cfree/object.h
@@ -207,4 +207,18 @@ CfreeIterResult cfree_obj_reliter_next(CfreeObjRelocIter *,
CfreeObjReloc *out);
void cfree_obj_reliter_free(CfreeObjRelocIter *);
+/* Roundtrip: open an object via cfree_obj_open, then hand its underlying
+ * builder back. The builder is the same one the reader populated; it is
+ * already finalized, so callers may inspect it (e.g. iterate sections via
+ * cfree_obj_nsections / cfree_obj_section) and pass it to
+ * cfree_obj_builder_emit to re-serialize the file. The builder lifetime is
+ * tied to the CfreeObjFile; do not call cfree_obj_builder_free on it.
+ *
+ * Mutation after open (add section, redefine symbol, etc.) is not currently
+ * supported — the read path closes the builder via obj_finalize, and the
+ * builder API rejects post-finalize writes. Filtered roundtrip (strip /
+ * objcopy --remove-section / --redefine-sym) needs a separate mutator
+ * surface that does not yet exist. */
+CfreeObjBuilder *cfree_obj_file_builder(const CfreeObjFile *);
+
#endif
diff --git a/src/api/object_file.c b/src/api/object_file.c
@@ -328,11 +328,20 @@ void cfree_obj_reliter_free(CfreeObjRelocIter* it) {
}
/* Accessor for disasm/jit to access the underlying ObjBuilder when both
- * are inside libcfree. Not part of the public API. */
+ * are inside libcfree. Internal name kept stable for existing callers
+ * (src/link/link_jit.c, src/api/disasm.c). */
ObjBuilder* cfree_objfile_builder(const CfreeObjFile* f) {
return f ? f->ob : NULL;
}
+/* Public alias of cfree_objfile_builder. Promoted to the public API so the
+ * driver (and other libcfree consumers) can take an opened object and feed
+ * it into cfree_obj_builder_emit for a byte-equivalent roundtrip without
+ * re-implementing the read-then-replay loop. */
+CfreeObjBuilder* cfree_obj_file_builder(const CfreeObjFile* f) {
+ return cfree_objfile_builder(f);
+}
+
/* Allocate an empty CfreeObjFile wrapping a private Compiler and a fresh
* ObjBuilder. Used by the JIT debug-view builder (src/link/link_jit.c)
* to assemble a synthetic object file from merged input debug sections.
diff --git a/test/ar/cases/05-ranlib-adds-index.expected b/test/ar/cases/05-ranlib-adds-index.expected
@@ -0,0 +1,5 @@
+a.o
+b.o
+a.o
+b.o
+grew
diff --git a/test/ar/cases/05-ranlib-adds-index.sh b/test/ar/cases/05-ranlib-adds-index.sh
@@ -0,0 +1,19 @@
+# `cfree ranlib` should add a System-V symbol-index member to an
+# archive that was created without one. The member list and order
+# must be preserved, and the archive must grow (the index member
+# adds bytes). Compares pre- and post-ranlib `ar t` output and prints
+# whether the archive size strictly increased.
+
+printf 'aaaa' > a.o
+printf 'bbbb' > b.o
+"$CFREE" ar rc lib.a a.o b.o
+size_before=$(wc -c < lib.a | tr -d ' ')
+"$CFREE" ar t lib.a
+"$CFREE" ranlib lib.a
+size_after=$(wc -c < lib.a | tr -d ' ')
+"$CFREE" ar t lib.a
+if [ "$size_after" -gt "$size_before" ]; then
+ echo grew
+else
+ echo "no-growth: $size_before -> $size_after"
+fi