commit 056fbd737350fc097b36588f4e31e3f6bc48028a
parent 668e21ada80ea8280e3be9a229727590e54b1681
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 2 Jun 2026 11:09:19 -0700
driver: add disas tool (disassemble raw machine-code bytes)
Decodes a headerless byte buffer for a chosen arch, unlike objdump -d which
needs a parsed object. Bytes come from an inline hex string (-x, spaces ok),
a file, or stdin; -target selects the arch (host default); --base sets the
display address. Reuses the public cfree_disasm_iter_* API (no new surface)
and objdump's per-instruction print shape, minus symbol annotation.
Verified on aarch64/x86_64/riscv64 against known encodings and objdump -d.
Diffstat:
5 files changed, 282 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -400,6 +400,9 @@ endif
ifeq ($(CFREE_TOOL_HASH_ENABLED),1)
DRIVER_TOOL_SRCS += driver/cmd/hash.c
endif
+ifeq ($(CFREE_TOOL_DISAS_ENABLED),1)
+DRIVER_TOOL_SRCS += driver/cmd/disas.c
+endif
DRIVER_SRCS += $(sort $(DRIVER_TOOL_SRCS))
ifneq ($(filter 1,$(CFREE_TOOL_CC_ENABLED) $(CFREE_TOOL_CHECK_ENABLED) $(CFREE_TOOL_CPP_ENABLED) $(CFREE_TOOL_AS_ENABLED) $(CFREE_TOOL_DBG_ENABLED) $(CFREE_TOOL_RUN_ENABLED)),)
DRIVER_SRCS += driver/lib/cflags.c
diff --git a/driver/cmd/disas.c b/driver/cmd/disas.c
@@ -0,0 +1,272 @@
+#include <cfree/core.h>
+#include <cfree/disasm.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "driver.h"
+#include "env.h"
+
+/* `cfree disas` — disassemble raw machine-code bytes for a target arch. Unlike
+ * `objdump -d`, the input is a headerless byte buffer (a file, stdin, or an
+ * inline hex string), not a parsed object — handy for inspecting a snippet of
+ * codegen or an emitted instruction. The arch comes from -target (host
+ * default); --base sets the address shown for the first byte. */
+
+#define DISAS_TOOL "disas"
+
+typedef struct DisasOpts {
+ CfreeTarget target;
+ uint64_t base; /* --base: vaddr of the first byte */
+ const char* hex; /* -x: inline hex string, or NULL */
+ const char* in; /* positional file, "-" for stdin, or NULL */
+} DisasOpts;
+
+void driver_help_disas(void) {
+ driver_printf(
+ "%.*s",
+ CFREE_SLICE_ARG(CFREE_SLICE_LIT(
+ "cfree disas — disassemble raw machine-code bytes\n"
+ "\n"
+ "USAGE\n"
+ " cfree disas [-target TRIPLE] [--base ADDR] -x \"HEX...\"\n"
+ " cfree disas [-target TRIPLE] [--base ADDR] [FILE|-]\n"
+ "\n"
+ "DESCRIPTION\n"
+ " Decodes a headerless buffer of machine code. Bytes come from an\n"
+ " inline hex string (-x), a FILE, or stdin (no FILE, or `-`).\n"
+ " Hex may contain spaces. The buffer is treated as code for the\n"
+ " -target architecture (the host arch by default).\n"
+ "\n"
+ "OPTIONS\n"
+ " -target TRIPLE architecture to decode for (e.g. aarch64,\n"
+ " x86_64, riscv64). See `cfree cc --help`.\n"
+ " -x \"HEX...\" disassemble these hex bytes (spaces allowed)\n"
+ " --base ADDR address of the first byte (default 0)\n"
+ " -h, --help show this help\n"
+ "\n"
+ "EXIT CODES\n"
+ " 0 success 1 error 2 bad usage\n")));
+}
+
+static int disas_is_hex(int c) {
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
+ (c >= 'A' && c <= 'F');
+}
+static int disas_hexval(int c) {
+ if (c >= '0' && c <= '9') return c - '0';
+ if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+ return c - 'A' + 10;
+}
+
+/* Decode a whitespace-tolerant hex string into freshly-allocated bytes.
+ * Returns 0 on success (caller frees via driver_free(env, *out, *outlen)). */
+static int disas_parse_hex(DriverEnv* env, const char* s, uint8_t** out,
+ size_t* outlen) {
+ size_t ndig = 0, n, bi = 0;
+ const char* p;
+ uint8_t* buf;
+ int hi = -1;
+ for (p = s; *p; ++p) {
+ if (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') continue;
+ if (!disas_is_hex((unsigned char)*p)) {
+ driver_errf(DISAS_TOOL, "invalid hex digit: '%c'", *p);
+ return 1;
+ }
+ ++ndig;
+ }
+ if (ndig == 0) {
+ driver_errf(DISAS_TOOL, "no hex bytes given");
+ return 1;
+ }
+ if (ndig & 1) {
+ driver_errf(DISAS_TOOL, "hex string has an odd number of digits");
+ return 1;
+ }
+ n = ndig / 2;
+ buf = (uint8_t*)driver_alloc(env, n);
+ if (!buf) {
+ driver_errf(DISAS_TOOL, "out of memory");
+ return 1;
+ }
+ for (p = s; *p; ++p) {
+ int v;
+ if (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') continue;
+ v = disas_hexval((unsigned char)*p);
+ if (hi < 0)
+ hi = v;
+ else {
+ buf[bi++] = (uint8_t)((hi << 4) | v);
+ hi = -1;
+ }
+ }
+ *out = buf;
+ *outlen = n;
+ return 0;
+}
+
+static void disas_run(const CfreeDisasmContext* dctx, const uint8_t* data,
+ size_t len, uint64_t base) {
+ CfreeDisasmIter* it = NULL;
+ CfreeInsn insn;
+ if (cfree_disasm_iter_new(dctx, data, len, base, NULL, &it) != CFREE_OK) {
+ driver_errf(DISAS_TOOL, "no disassembler for the selected target");
+ return;
+ }
+ while (cfree_disasm_iter_next(it, &insn) == CFREE_ITER_ITEM) {
+ uint32_t b;
+ driver_printf("%8llx:\t", (unsigned long long)insn.vaddr);
+ for (b = 0; b < insn.nbytes; ++b) driver_printf("%02x ", insn.bytes[b]);
+ for (b = insn.nbytes; b < 8; ++b) driver_printf(" ");
+ driver_printf("\t%.*s", CFREE_SLICE_ARG(insn.mnemonic));
+ if (insn.operands.len) driver_printf(" %.*s", CFREE_SLICE_ARG(insn.operands));
+ if (insn.annotation.len)
+ driver_printf(" # %.*s", CFREE_SLICE_ARG(insn.annotation));
+ driver_printf("\n");
+ }
+ cfree_disasm_iter_free(it);
+}
+
+int driver_disas(int argc, char** argv) {
+ DriverEnv env;
+ CfreeContext ctx;
+ CfreeDisasmContext dctx;
+ DisasOpts o;
+ const uint8_t* data = NULL;
+ size_t len = 0;
+ DriverLoad ld = {0};
+ uint8_t* sbuf = NULL;
+ size_t sbuf_len = 0;
+ uint8_t* hexbuf = NULL;
+ size_t hexbuf_len = 0;
+ int loaded = 0, npos = 0, rc = 2;
+ int i;
+
+ if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) {
+ driver_help_disas();
+ return argc < 2 ? 2 : 0;
+ }
+
+ memset(&o, 0, sizeof o);
+ o.target = driver_host_target();
+ driver_env_init(&env);
+
+ for (i = 1; i < argc; ++i) {
+ const char* a = argv[i];
+ if (driver_streq(a, "-target")) {
+ if (i + 1 >= argc) {
+ driver_errf(DISAS_TOOL, "-target requires an argument");
+ goto done;
+ }
+ if (driver_target_from_triple(argv[++i], &o.target) != 0) {
+ driver_errf(DISAS_TOOL, "unrecognized target: %s", argv[i]);
+ goto done;
+ }
+ continue;
+ }
+ if (driver_streq(a, "-x")) {
+ if (i + 1 >= argc) {
+ driver_errf(DISAS_TOOL, "-x requires a hex string");
+ goto done;
+ }
+ o.hex = argv[++i];
+ continue;
+ }
+ if (driver_streq(a, "--base")) {
+ if (i + 1 >= argc) {
+ driver_errf(DISAS_TOOL, "--base requires an address");
+ goto done;
+ }
+ {
+ const char* v = argv[++i];
+ uint64_t val = 0;
+ int base = 10;
+ if (v[0] == '0' && (v[1] == 'x' || v[1] == 'X')) {
+ base = 16;
+ v += 2;
+ }
+ if (!*v) {
+ driver_errf(DISAS_TOOL, "invalid --base address");
+ goto done;
+ }
+ for (; *v; ++v) {
+ int d;
+ if (*v >= '0' && *v <= '9')
+ d = *v - '0';
+ else if (base == 16 && *v >= 'a' && *v <= 'f')
+ d = *v - 'a' + 10;
+ else if (base == 16 && *v >= 'A' && *v <= 'F')
+ d = *v - 'A' + 10;
+ else {
+ driver_errf(DISAS_TOOL, "invalid --base address");
+ goto done;
+ }
+ val = val * (uint64_t)base + (uint64_t)d;
+ }
+ o.base = val;
+ }
+ continue;
+ }
+ if (driver_streq(a, "-")) {
+ if (npos == 0) o.in = "-";
+ ++npos;
+ continue;
+ }
+ if (a[0] == '-' && a[1] != '\0') {
+ driver_errf(DISAS_TOOL, "unknown option: %s", a);
+ goto done;
+ }
+ if (npos == 0)
+ o.in = a;
+ else {
+ driver_errf(DISAS_TOOL, "too many operands: %s", a);
+ goto done;
+ }
+ ++npos;
+ }
+
+ /* Resolve the byte source: -x wins; else file/stdin. */
+ if (o.hex) {
+ if (o.in) {
+ driver_errf(DISAS_TOOL, "give either -x or a file, not both");
+ goto done;
+ }
+ if (disas_parse_hex(&env, o.hex, &hexbuf, &hexbuf_len) != 0) {
+ rc = 1;
+ goto done;
+ }
+ data = hexbuf;
+ len = hexbuf_len;
+ } else if (o.in && !driver_streq(o.in, "-")) {
+ CfreeSlice in;
+ if (driver_load_bytes(&env.file_io, DISAS_TOOL, o.in, &ld, &in) != 0) {
+ rc = 1;
+ goto done;
+ }
+ loaded = 1;
+ data = in.data;
+ len = in.len;
+ } else {
+ if (!driver_read_stdin(&env, &sbuf, &sbuf_len)) {
+ driver_errf(DISAS_TOOL, "failed to read stdin");
+ rc = 1;
+ goto done;
+ }
+ data = sbuf;
+ len = sbuf_len;
+ }
+
+ ctx = driver_env_to_context(&env);
+ memset(&dctx, 0, sizeof dctx);
+ dctx.target = o.target;
+ dctx.context = ctx;
+ disas_run(&dctx, data, len, o.base);
+ rc = 0;
+
+done:
+ if (hexbuf) driver_free(&env, hexbuf, hexbuf_len);
+ if (sbuf) driver_free(&env, sbuf, sbuf_len);
+ if (loaded) driver_release_bytes(&env.file_io, &ld);
+ driver_env_fini(&env);
+ return rc;
+}
diff --git a/driver/driver.h b/driver/driver.h
@@ -58,6 +58,7 @@ int driver_pkg(int argc, char** argv);
int driver_xxd(int argc, char** argv);
int driver_cmp(int argc, char** argv);
int driver_hash(int argc, char** argv);
+int driver_disas(int argc, char** argv);
/* Per-tool help printers. Write a multi-section help text to stdout and
* return. The tool entry-points call these when invoked with no args, -h,
@@ -85,6 +86,7 @@ void driver_help_pkg(void);
void driver_help_xxd(void);
void driver_help_cmp(void);
void driver_help_hash(void);
+void driver_help_disas(void);
/* Multi-call top-level help (`cfree`, `cfree -h`, `cfree --help`,
* `cfree help`). Lists each tool with a one-line summary and explains
diff --git a/driver/main.c b/driver/main.c
@@ -108,6 +108,10 @@ static const DriverToolDesc driver_tools[] = {
{"hash", driver_hash, driver_help_hash,
"Hash files with SHA-256, BLAKE2b, or CRC-32"},
#endif
+#if CFREE_TOOL_DISAS_ENABLED
+ {"disas", driver_disas, driver_help_disas,
+ "Disassemble raw machine-code bytes for a target arch"},
+#endif
{NULL, NULL, NULL, NULL},
};
diff --git a/include/cfree/config.h b/include/cfree/config.h
@@ -117,5 +117,6 @@
#define CFREE_TOOL_XXD_ENABLED 1
#define CFREE_TOOL_CMP_ENABLED 1
#define CFREE_TOOL_HASH_ENABLED 1
+#define CFREE_TOOL_DISAS_ENABLED 1
#endif /* CFREE_CONFIG_H */