commit ae690b5af361d1c325939f360dcd054623b6fb83
parent 65a9e442ada96be60dd57df653ac339ba6520d7f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 2 Jun 2026 11:02:17 -0700
driver: add xxd tool (raw hex dump with reverse/plain/include modes)
A format-agnostic hex dumper, unlike objdump -s which only dumps object
sections. GNU xxd subset: default hex+ASCII dump, -r (reverse hex->binary),
-p (plain), -i (C array), -c/-g (columns/grouping), -s/-l (seek/length),
-u (uppercase), -o (output file). Pure env file-I/O; output streams through
a CfreeWriter so -o works for both text dumps and binary -r output.
`cfree xxd f | cfree xxd -r` round-trips contiguous data.
Diffstat:
5 files changed, 485 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -391,6 +391,9 @@ endif
ifeq ($(CFREE_TOOL_PKG_ENABLED),1)
DRIVER_TOOL_SRCS += driver/cmd/pkg.c
endif
+ifeq ($(CFREE_TOOL_XXD_ENABLED),1)
+DRIVER_TOOL_SRCS += driver/cmd/xxd.c
+endif
DRIVER_SRCS += $(sort $(DRIVER_TOOL_SRCS))
ifneq ($(filter 1,$(CFREE_TOOL_CC_ENABLED) $(CFREE_TOOL_CHECK_ENABLED) $(CFREE_TOOL_CPP_ENABLED) $(CFREE_TOOL_AS_ENABLED) $(CFREE_TOOL_DBG_ENABLED) $(CFREE_TOOL_RUN_ENABLED)),)
DRIVER_SRCS += driver/lib/cflags.c
diff --git a/driver/cmd/xxd.c b/driver/cmd/xxd.c
@@ -0,0 +1,475 @@
+#include <cfree/core.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "driver.h"
+#include "env.h"
+
+/* `cfree xxd` — hex dump any file, and reverse a dump back to binary. A GNU
+ * xxd subset: default hex+ASCII dump, plus -r (reverse), -p (plain hex),
+ * -i (C array), -c/-g (columns/grouping), -s/-l (seek/length), -u (uppercase).
+ * Unlike `objdump -s` it is format-agnostic: it dumps raw bytes of any input.
+ * With no FILE, or `-`, reads stdin; output goes to stdout or -o FILE. */
+
+#define XXD_TOOL "xxd"
+
+#define XXD_COLS_DUMP 16u
+#define XXD_COLS_PLAIN 30u
+#define XXD_COLS_INCLUDE 12u
+
+typedef enum XxdMode {
+ XXD_DUMP = 0, /* default hex+ASCII */
+ XXD_PLAIN, /* -p: continuous hex */
+ XXD_INCLUDE, /* -i: C array */
+} XxdMode;
+
+typedef struct XxdOpts {
+ XxdMode mode;
+ int reverse; /* -r */
+ int uppercase; /* -u */
+ size_t cols; /* -c; 0 = mode default */
+ size_t group; /* -g; 0 = default (2) */
+ uint64_t seek; /* -s */
+ uint64_t limit; /* -l; 0 = no limit */
+ int have_limit; /* whether -l was given */
+ const char* in; /* input path, or NULL = stdin */
+ const char* out; /* output path, or NULL = stdout */
+} XxdOpts;
+
+/* Streaming output buffer over a CfreeWriter; flushes when full so cols can be
+ * arbitrarily large. */
+typedef struct Xb {
+ char buf[8192];
+ size_t n;
+ CfreeWriter* w;
+ int err;
+} Xb;
+
+static void xb_flush(Xb* b) {
+ if (b->n) {
+ if (cfree_writer_write(b->w, b->buf, b->n) != CFREE_OK) b->err = 1;
+ b->n = 0;
+ }
+}
+static void xb_c(Xb* b, char c) {
+ if (b->n == sizeof b->buf) xb_flush(b);
+ b->buf[b->n++] = c;
+}
+static void xb_s(Xb* b, const char* s) {
+ while (*s) xb_c(b, *s++);
+}
+static void xb_hex2(Xb* b, uint8_t v, const char* hx) {
+ xb_c(b, hx[v >> 4]);
+ xb_c(b, hx[v & 0x0f]);
+}
+static void xb_hexnum(Xb* b, uint64_t v, int width, const char* hx) {
+ char t[16];
+ int i;
+ if (width > 16) width = 16;
+ for (i = width - 1; i >= 0; --i) {
+ t[i] = hx[v & 0x0f];
+ v >>= 4;
+ }
+ for (i = 0; i < width; ++i) xb_c(b, t[i]);
+}
+static void xb_dec(Xb* b, uint64_t v) {
+ char t[24];
+ int i = 0;
+ if (v == 0) {
+ xb_c(b, '0');
+ return;
+ }
+ while (v) {
+ t[i++] = (char)('0' + (int)(v % 10));
+ v /= 10;
+ }
+ while (i) xb_c(b, t[--i]);
+}
+
+void driver_help_xxd(void) {
+ driver_printf(
+ "%.*s",
+ CFREE_SLICE_ARG(CFREE_SLICE_LIT(
+ "cfree xxd — hex dump a file (and reverse a dump back to binary)\n"
+ "\n"
+ "USAGE\n"
+ " cfree xxd [OPTIONS] [INFILE [OUTFILE]]\n"
+ "\n"
+ "DESCRIPTION\n"
+ " Dumps the raw bytes of INFILE (or stdin) as a hex+ASCII table.\n"
+ " Works on any file, not just objects. `xxd f | xxd -r` round-trips\n"
+ " contiguous data back to the original bytes.\n"
+ "\n"
+ "OPTIONS\n"
+ " -r reverse: read a hex dump, write binary\n"
+ " -p plain hex dump (continuous, no offsets/ASCII)\n"
+ " -i output a C `unsigned char[]` array\n"
+ " -c N N bytes per line (default 16; -p 30; -i 12)\n"
+ " -g N group hex into N-byte columns (default 2)\n"
+ " -s OFF start at byte OFF (decimal or 0x-hex)\n"
+ " -l LEN dump at most LEN bytes\n"
+ " -u uppercase hex digits\n"
+ " -o FILE write output to FILE instead of stdout\n"
+ " -h, --help show this help\n"
+ "\n"
+ "NOTE\n"
+ " -r reconstructs contiguous data; leading offsets are read for\n"
+ " context but not used to seek/pad sparse output.\n"
+ "\n"
+ "EXIT CODES\n"
+ " 0 success 1 I/O error 2 bad usage\n")));
+}
+
+static int xxd_is_hex(int c) {
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
+ (c >= 'A' && c <= 'F');
+}
+static int xxd_hexval(int c) {
+ if (c >= '0' && c <= '9') return c - '0';
+ if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+ return c - 'A' + 10;
+}
+
+/* Parse a decimal or 0x-hex non-negative integer. Returns 0 on success. */
+static int xxd_parse_u64(const char* s, uint64_t* out) {
+ uint64_t v = 0;
+ int base = 10;
+ if (!s || !*s) return 1;
+ if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
+ base = 16;
+ s += 2;
+ if (!*s) return 1;
+ }
+ for (; *s; ++s) {
+ unsigned d;
+ char c = *s;
+ if (c >= '0' && c <= '9')
+ d = (unsigned)(c - '0');
+ else if (base == 16 && (c >= 'a' && c <= 'f'))
+ d = (unsigned)(c - 'a' + 10);
+ else if (base == 16 && (c >= 'A' && c <= 'F'))
+ d = (unsigned)(c - 'A' + 10);
+ else
+ return 1;
+ v = v * (uint64_t)base + d;
+ }
+ *out = v;
+ return 0;
+}
+
+/* Derive a C identifier from a file path: basename, non-alnum -> '_', a leading
+ * digit gets an '_' prefix. Writes into out (cap bytes). */
+static void xxd_ident(const char* path, char* out, size_t cap) {
+ const char* base = driver_basename(path);
+ size_t i = 0;
+ if (cap == 0) return;
+ if (base[0] >= '0' && base[0] <= '9' && i + 1 < cap) out[i++] = '_';
+ for (; *base && i + 1 < cap; ++base) {
+ char c = *base;
+ int ok = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9');
+ out[i++] = ok ? c : '_';
+ }
+ out[i] = '\0';
+}
+
+static void xxd_emit_dump(Xb* b, const uint8_t* data, size_t len, uint64_t base,
+ size_t cols, size_t group, const char* hx) {
+ size_t off;
+ for (off = 0; off < len; off += cols) {
+ size_t linelen = (len - off < cols) ? (len - off) : cols;
+ size_t j;
+ xb_hexnum(b, base + off, 8, hx);
+ xb_s(b, ": ");
+ for (j = 0; j < cols; ++j) {
+ if (j < linelen)
+ xb_hex2(b, data[off + j], hx);
+ else {
+ xb_c(b, ' ');
+ xb_c(b, ' ');
+ }
+ if ((j + 1) % group == 0) xb_c(b, ' ');
+ }
+ if (cols % group != 0) xb_c(b, ' ');
+ xb_c(b, ' ');
+ for (j = 0; j < linelen; ++j) {
+ uint8_t c = data[off + j];
+ xb_c(b, (c >= 0x20 && c <= 0x7e) ? (char)c : '.');
+ }
+ xb_c(b, '\n');
+ }
+}
+
+static void xxd_emit_plain(Xb* b, const uint8_t* data, size_t len, size_t cols,
+ const char* hx) {
+ size_t i;
+ for (i = 0; i < len; ++i) {
+ xb_hex2(b, data[i], hx);
+ if ((i + 1) % cols == 0) xb_c(b, '\n');
+ }
+ if (len == 0 || len % cols != 0) xb_c(b, '\n');
+}
+
+static void xxd_emit_include(Xb* b, const uint8_t* data, size_t len,
+ const char* ident, size_t cols, const char* hx) {
+ size_t i;
+ xb_s(b, "unsigned char ");
+ if (ident && ident[0]) xb_s(b, ident);
+ xb_s(b, "[] = {\n");
+ for (i = 0; i < len; ++i) {
+ if (i % cols == 0) xb_s(b, " ");
+ xb_s(b, "0x");
+ xb_hex2(b, data[i], hx);
+ if (i + 1 < len) xb_c(b, ',');
+ if ((i + 1) % cols == 0 || i + 1 == len)
+ xb_c(b, '\n');
+ else
+ xb_c(b, ' ');
+ }
+ xb_s(b, "};\n");
+ xb_s(b, "unsigned int ");
+ if (ident && ident[0]) xb_s(b, ident);
+ xb_s(b, "_len = ");
+ xb_dec(b, len);
+ xb_s(b, ";\n");
+}
+
+/* Reverse a hex dump back to raw bytes. plain=1 treats the whole input as
+ * continuous hex; otherwise each line is `[offset:] hex... ascii` and parsing
+ * stops at the double-space gutter. */
+static void xxd_reverse(Xb* b, const uint8_t* data, size_t len, int plain) {
+ size_t i = 0;
+ while (i < len) {
+ size_t eol = i;
+ size_t s, p;
+ int hi = -1;
+ while (eol < len && data[eol] != '\n') ++eol;
+ s = i;
+ if (!plain) {
+ size_t c;
+ for (c = i; c < eol; ++c) {
+ if (data[c] == ':') {
+ s = c + 1;
+ break;
+ }
+ }
+ }
+ for (p = s; p < eol; ++p) {
+ unsigned char ch = data[p];
+ if (ch == ' ' || ch == '\t') {
+ if (!plain && p + 1 < eol && (data[p + 1] == ' ' || data[p + 1] == '\t'))
+ break; /* gutter before the ASCII column */
+ continue;
+ }
+ if (xxd_is_hex(ch)) {
+ if (hi < 0) {
+ hi = xxd_hexval(ch);
+ } else {
+ xb_c(b, (char)((hi << 4) | xxd_hexval(ch)));
+ hi = -1;
+ }
+ continue;
+ }
+ break; /* non-hex, non-space: rest of line is ASCII/junk */
+ }
+ i = (eol < len) ? eol + 1 : eol;
+ }
+}
+
+/* Extract the value for a short option that may be attached (-c16) or separate
+ * (-c 16). On success advances *i past a consumed separate arg and returns the
+ * value string; returns NULL on a missing argument. */
+static const char* xxd_optval(const char* a, int argc, char** argv, int* i) {
+ if (a[2] != '\0') return a + 2;
+ if (*i + 1 >= argc) return NULL;
+ return argv[++(*i)];
+}
+
+int driver_xxd(int argc, char** argv) {
+ DriverEnv env;
+ CfreeContext ctx;
+ XxdOpts o;
+ Xb b;
+ CfreeWriter* w = NULL;
+ const uint8_t* data = NULL;
+ size_t len = 0;
+ DriverLoad ld = {0};
+ uint8_t* sbuf = NULL;
+ size_t sbuf_len = 0;
+ int loaded = 0, npos = 0, rc = 2, owned_writer = 0;
+ int i;
+ size_t cols, group;
+ const char* hx;
+ char ident[256];
+
+ if (driver_argv_wants_help(argc, argv, 1)) {
+ driver_help_xxd();
+ return 0;
+ }
+
+ memset(&o, 0, sizeof o);
+ driver_env_init(&env);
+ ctx = driver_env_to_context(&env);
+
+ for (i = 1; i < argc; ++i) {
+ const char* a = argv[i];
+ if (driver_streq(a, "-r")) {
+ o.reverse = 1;
+ continue;
+ }
+ if (driver_streq(a, "-p")) {
+ o.mode = XXD_PLAIN;
+ continue;
+ }
+ if (driver_streq(a, "-i")) {
+ o.mode = XXD_INCLUDE;
+ continue;
+ }
+ if (driver_streq(a, "-u")) {
+ o.uppercase = 1;
+ continue;
+ }
+ if (a[0] == '-' && a[1] == 'c') {
+ const char* v = xxd_optval(a, argc, argv, &i);
+ uint64_t n;
+ if (!v || xxd_parse_u64(v, &n) != 0 || n == 0) {
+ driver_errf(XXD_TOOL, "-c requires a positive integer");
+ goto done;
+ }
+ o.cols = (size_t)n;
+ continue;
+ }
+ if (a[0] == '-' && a[1] == 'g') {
+ const char* v = xxd_optval(a, argc, argv, &i);
+ uint64_t n;
+ if (!v || xxd_parse_u64(v, &n) != 0 || n == 0) {
+ driver_errf(XXD_TOOL, "-g requires a positive integer");
+ goto done;
+ }
+ o.group = (size_t)n;
+ continue;
+ }
+ if (a[0] == '-' && a[1] == 's') {
+ const char* v = xxd_optval(a, argc, argv, &i);
+ if (!v || xxd_parse_u64(v, &o.seek) != 0) {
+ driver_errf(XXD_TOOL, "-s requires a byte offset");
+ goto done;
+ }
+ continue;
+ }
+ if (a[0] == '-' && a[1] == 'l') {
+ const char* v = xxd_optval(a, argc, argv, &i);
+ if (!v || xxd_parse_u64(v, &o.limit) != 0) {
+ driver_errf(XXD_TOOL, "-l requires a length");
+ goto done;
+ }
+ o.have_limit = 1;
+ continue;
+ }
+ if (driver_streq(a, "-o")) {
+ if (i + 1 >= argc) {
+ driver_errf(XXD_TOOL, "-o requires a path");
+ goto done;
+ }
+ o.out = argv[++i];
+ continue;
+ }
+ if (driver_streq(a, "-")) {
+ if (npos == 0)
+ o.in = NULL; /* stdin */
+ ++npos;
+ continue;
+ }
+ if (a[0] == '-' && a[1] != '\0') {
+ driver_errf(XXD_TOOL, "unknown option: %s", a);
+ goto done;
+ }
+ if (npos == 0)
+ o.in = a;
+ else if (npos == 1 && !o.out)
+ o.out = a;
+ else {
+ driver_errf(XXD_TOOL, "too many operands: %s", a);
+ goto done;
+ }
+ ++npos;
+ }
+
+ /* Resolve mode-dependent defaults. */
+ cols = o.cols ? o.cols
+ : (o.mode == XXD_PLAIN ? XXD_COLS_PLAIN
+ : o.mode == XXD_INCLUDE ? XXD_COLS_INCLUDE
+ : XXD_COLS_DUMP);
+ group = o.group ? o.group : 2u;
+ hx = o.uppercase ? "0123456789ABCDEF" : "0123456789abcdef";
+
+ /* Load input. */
+ if (o.in) {
+ CfreeSlice in;
+ if (driver_load_bytes(&env.file_io, XXD_TOOL, o.in, &ld, &in) != 0) {
+ rc = 1;
+ goto done;
+ }
+ loaded = 1;
+ data = in.data;
+ len = in.len;
+ } else {
+ if (!driver_read_stdin(&env, &sbuf, &sbuf_len)) {
+ driver_errf(XXD_TOOL, "failed to read stdin");
+ rc = 1;
+ goto done;
+ }
+ data = sbuf;
+ len = sbuf_len;
+ }
+
+ /* Apply seek/length (forward modes only; reverse consumes the whole text). */
+ if (!o.reverse) {
+ if (o.seek < len) {
+ data += o.seek;
+ len -= (size_t)o.seek;
+ } else {
+ data += len;
+ len = 0;
+ }
+ if (o.have_limit && o.limit < (uint64_t)len) len = (size_t)o.limit;
+ }
+
+ /* Open output. */
+ if (o.out) {
+ if (ctx.file_io->open_writer(ctx.file_io->user, o.out, &w) != CFREE_OK) {
+ driver_errf(XXD_TOOL, "failed to open output: %s", o.out);
+ rc = 1;
+ goto done;
+ }
+ owned_writer = 1;
+ } else {
+ w = driver_stdout_writer(&env);
+ owned_writer = 1;
+ }
+
+ memset(&b, 0, sizeof b);
+ b.w = w;
+
+ if (o.reverse) {
+ xxd_reverse(&b, data, len, o.mode == XXD_PLAIN);
+ } else if (o.mode == XXD_PLAIN) {
+ xxd_emit_plain(&b, data, len, cols, hx);
+ } else if (o.mode == XXD_INCLUDE) {
+ ident[0] = '\0';
+ if (o.in) xxd_ident(o.in, ident, sizeof ident);
+ xxd_emit_include(&b, data, len, ident, cols, hx);
+ } else {
+ xxd_emit_dump(&b, data, len, o.seek, cols, group, hx);
+ }
+ xb_flush(&b);
+ rc = b.err ? 1 : 0;
+
+done:
+ if (owned_writer && w) cfree_writer_close(w);
+ if (sbuf) driver_free(&env, sbuf, sbuf_len);
+ if (loaded) driver_release_bytes(&env.file_io, &ld);
+ driver_env_fini(&env);
+ return rc;
+}
diff --git a/driver/driver.h b/driver/driver.h
@@ -55,6 +55,7 @@ int driver_addr2line(int argc, char** argv);
int driver_strings(int argc, char** argv);
int driver_cas(int argc, char** argv);
int driver_pkg(int argc, char** argv);
+int driver_xxd(int argc, char** argv);
/* Per-tool help printers. Write a multi-section help text to stdout and
* return. The tool entry-points call these when invoked with no args, -h,
@@ -79,6 +80,7 @@ void driver_help_addr2line(void);
void driver_help_strings(void);
void driver_help_cas(void);
void driver_help_pkg(void);
+void driver_help_xxd(void);
/* Multi-call top-level help (`cfree`, `cfree -h`, `cfree --help`,
* `cfree help`). Lists each tool with a one-line summary and explains
diff --git a/driver/main.c b/driver/main.c
@@ -96,6 +96,10 @@ static const DriverToolDesc driver_tools[] = {
{"pkg", driver_pkg, driver_help_pkg,
"Bundle, sign, verify, and unpack distributable .cfpkg packages"},
#endif
+#if CFREE_TOOL_XXD_ENABLED
+ {"xxd", driver_xxd, driver_help_xxd,
+ "Hex dump any file (and reverse a dump back to binary)"},
+#endif
{NULL, NULL, NULL, NULL},
};
diff --git a/include/cfree/config.h b/include/cfree/config.h
@@ -114,5 +114,6 @@
#define CFREE_TOOL_STRINGS_ENABLED 1
#define CFREE_TOOL_CAS_ENABLED 1
#define CFREE_TOOL_PKG_ENABLED 1
+#define CFREE_TOOL_XXD_ENABLED 1
#endif /* CFREE_CONFIG_H */