kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 056fbd737350fc097b36588f4e31e3f6bc48028a
parent 668e21ada80ea8280e3be9a229727590e54b1681
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue,  2 Jun 2026 11:09:19 -0700

driver: add disas tool (disassemble raw machine-code bytes)

Decodes a headerless byte buffer for a chosen arch, unlike objdump -d which
needs a parsed object. Bytes come from an inline hex string (-x, spaces ok),
a file, or stdin; -target selects the arch (host default); --base sets the
display address. Reuses the public cfree_disasm_iter_* API (no new surface)
and objdump's per-instruction print shape, minus symbol annotation.

Verified on aarch64/x86_64/riscv64 against known encodings and objdump -d.

Diffstat:
MMakefile | 3+++
Adriver/cmd/disas.c | 272+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/driver.h | 2++
Mdriver/main.c | 4++++
Minclude/cfree/config.h | 1+
5 files changed, 282 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -400,6 +400,9 @@ endif ifeq ($(CFREE_TOOL_HASH_ENABLED),1) DRIVER_TOOL_SRCS += driver/cmd/hash.c endif +ifeq ($(CFREE_TOOL_DISAS_ENABLED),1) +DRIVER_TOOL_SRCS += driver/cmd/disas.c +endif DRIVER_SRCS += $(sort $(DRIVER_TOOL_SRCS)) ifneq ($(filter 1,$(CFREE_TOOL_CC_ENABLED) $(CFREE_TOOL_CHECK_ENABLED) $(CFREE_TOOL_CPP_ENABLED) $(CFREE_TOOL_AS_ENABLED) $(CFREE_TOOL_DBG_ENABLED) $(CFREE_TOOL_RUN_ENABLED)),) DRIVER_SRCS += driver/lib/cflags.c diff --git a/driver/cmd/disas.c b/driver/cmd/disas.c @@ -0,0 +1,272 @@ +#include <cfree/core.h> +#include <cfree/disasm.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include "driver.h" +#include "env.h" + +/* `cfree disas` — disassemble raw machine-code bytes for a target arch. Unlike + * `objdump -d`, the input is a headerless byte buffer (a file, stdin, or an + * inline hex string), not a parsed object — handy for inspecting a snippet of + * codegen or an emitted instruction. The arch comes from -target (host + * default); --base sets the address shown for the first byte. */ + +#define DISAS_TOOL "disas" + +typedef struct DisasOpts { + CfreeTarget target; + uint64_t base; /* --base: vaddr of the first byte */ + const char* hex; /* -x: inline hex string, or NULL */ + const char* in; /* positional file, "-" for stdin, or NULL */ +} DisasOpts; + +void driver_help_disas(void) { + driver_printf( + "%.*s", + CFREE_SLICE_ARG(CFREE_SLICE_LIT( + "cfree disas — disassemble raw machine-code bytes\n" + "\n" + "USAGE\n" + " cfree disas [-target TRIPLE] [--base ADDR] -x \"HEX...\"\n" + " cfree disas [-target TRIPLE] [--base ADDR] [FILE|-]\n" + "\n" + "DESCRIPTION\n" + " Decodes a headerless buffer of machine code. Bytes come from an\n" + " inline hex string (-x), a FILE, or stdin (no FILE, or `-`).\n" + " Hex may contain spaces. The buffer is treated as code for the\n" + " -target architecture (the host arch by default).\n" + "\n" + "OPTIONS\n" + " -target TRIPLE architecture to decode for (e.g. aarch64,\n" + " x86_64, riscv64). See `cfree cc --help`.\n" + " -x \"HEX...\" disassemble these hex bytes (spaces allowed)\n" + " --base ADDR address of the first byte (default 0)\n" + " -h, --help show this help\n" + "\n" + "EXIT CODES\n" + " 0 success 1 error 2 bad usage\n"))); +} + +static int disas_is_hex(int c) { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F'); +} +static int disas_hexval(int c) { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + return c - 'A' + 10; +} + +/* Decode a whitespace-tolerant hex string into freshly-allocated bytes. + * Returns 0 on success (caller frees via driver_free(env, *out, *outlen)). */ +static int disas_parse_hex(DriverEnv* env, const char* s, uint8_t** out, + size_t* outlen) { + size_t ndig = 0, n, bi = 0; + const char* p; + uint8_t* buf; + int hi = -1; + for (p = s; *p; ++p) { + if (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') continue; + if (!disas_is_hex((unsigned char)*p)) { + driver_errf(DISAS_TOOL, "invalid hex digit: '%c'", *p); + return 1; + } + ++ndig; + } + if (ndig == 0) { + driver_errf(DISAS_TOOL, "no hex bytes given"); + return 1; + } + if (ndig & 1) { + driver_errf(DISAS_TOOL, "hex string has an odd number of digits"); + return 1; + } + n = ndig / 2; + buf = (uint8_t*)driver_alloc(env, n); + if (!buf) { + driver_errf(DISAS_TOOL, "out of memory"); + return 1; + } + for (p = s; *p; ++p) { + int v; + if (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') continue; + v = disas_hexval((unsigned char)*p); + if (hi < 0) + hi = v; + else { + buf[bi++] = (uint8_t)((hi << 4) | v); + hi = -1; + } + } + *out = buf; + *outlen = n; + return 0; +} + +static void disas_run(const CfreeDisasmContext* dctx, const uint8_t* data, + size_t len, uint64_t base) { + CfreeDisasmIter* it = NULL; + CfreeInsn insn; + if (cfree_disasm_iter_new(dctx, data, len, base, NULL, &it) != CFREE_OK) { + driver_errf(DISAS_TOOL, "no disassembler for the selected target"); + return; + } + while (cfree_disasm_iter_next(it, &insn) == CFREE_ITER_ITEM) { + uint32_t b; + driver_printf("%8llx:\t", (unsigned long long)insn.vaddr); + for (b = 0; b < insn.nbytes; ++b) driver_printf("%02x ", insn.bytes[b]); + for (b = insn.nbytes; b < 8; ++b) driver_printf(" "); + driver_printf("\t%.*s", CFREE_SLICE_ARG(insn.mnemonic)); + if (insn.operands.len) driver_printf(" %.*s", CFREE_SLICE_ARG(insn.operands)); + if (insn.annotation.len) + driver_printf(" # %.*s", CFREE_SLICE_ARG(insn.annotation)); + driver_printf("\n"); + } + cfree_disasm_iter_free(it); +} + +int driver_disas(int argc, char** argv) { + DriverEnv env; + CfreeContext ctx; + CfreeDisasmContext dctx; + DisasOpts o; + const uint8_t* data = NULL; + size_t len = 0; + DriverLoad ld = {0}; + uint8_t* sbuf = NULL; + size_t sbuf_len = 0; + uint8_t* hexbuf = NULL; + size_t hexbuf_len = 0; + int loaded = 0, npos = 0, rc = 2; + int i; + + if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) { + driver_help_disas(); + return argc < 2 ? 2 : 0; + } + + memset(&o, 0, sizeof o); + o.target = driver_host_target(); + driver_env_init(&env); + + for (i = 1; i < argc; ++i) { + const char* a = argv[i]; + if (driver_streq(a, "-target")) { + if (i + 1 >= argc) { + driver_errf(DISAS_TOOL, "-target requires an argument"); + goto done; + } + if (driver_target_from_triple(argv[++i], &o.target) != 0) { + driver_errf(DISAS_TOOL, "unrecognized target: %s", argv[i]); + goto done; + } + continue; + } + if (driver_streq(a, "-x")) { + if (i + 1 >= argc) { + driver_errf(DISAS_TOOL, "-x requires a hex string"); + goto done; + } + o.hex = argv[++i]; + continue; + } + if (driver_streq(a, "--base")) { + if (i + 1 >= argc) { + driver_errf(DISAS_TOOL, "--base requires an address"); + goto done; + } + { + const char* v = argv[++i]; + uint64_t val = 0; + int base = 10; + if (v[0] == '0' && (v[1] == 'x' || v[1] == 'X')) { + base = 16; + v += 2; + } + if (!*v) { + driver_errf(DISAS_TOOL, "invalid --base address"); + goto done; + } + for (; *v; ++v) { + int d; + if (*v >= '0' && *v <= '9') + d = *v - '0'; + else if (base == 16 && *v >= 'a' && *v <= 'f') + d = *v - 'a' + 10; + else if (base == 16 && *v >= 'A' && *v <= 'F') + d = *v - 'A' + 10; + else { + driver_errf(DISAS_TOOL, "invalid --base address"); + goto done; + } + val = val * (uint64_t)base + (uint64_t)d; + } + o.base = val; + } + continue; + } + if (driver_streq(a, "-")) { + if (npos == 0) o.in = "-"; + ++npos; + continue; + } + if (a[0] == '-' && a[1] != '\0') { + driver_errf(DISAS_TOOL, "unknown option: %s", a); + goto done; + } + if (npos == 0) + o.in = a; + else { + driver_errf(DISAS_TOOL, "too many operands: %s", a); + goto done; + } + ++npos; + } + + /* Resolve the byte source: -x wins; else file/stdin. */ + if (o.hex) { + if (o.in) { + driver_errf(DISAS_TOOL, "give either -x or a file, not both"); + goto done; + } + if (disas_parse_hex(&env, o.hex, &hexbuf, &hexbuf_len) != 0) { + rc = 1; + goto done; + } + data = hexbuf; + len = hexbuf_len; + } else if (o.in && !driver_streq(o.in, "-")) { + CfreeSlice in; + if (driver_load_bytes(&env.file_io, DISAS_TOOL, o.in, &ld, &in) != 0) { + rc = 1; + goto done; + } + loaded = 1; + data = in.data; + len = in.len; + } else { + if (!driver_read_stdin(&env, &sbuf, &sbuf_len)) { + driver_errf(DISAS_TOOL, "failed to read stdin"); + rc = 1; + goto done; + } + data = sbuf; + len = sbuf_len; + } + + ctx = driver_env_to_context(&env); + memset(&dctx, 0, sizeof dctx); + dctx.target = o.target; + dctx.context = ctx; + disas_run(&dctx, data, len, o.base); + rc = 0; + +done: + if (hexbuf) driver_free(&env, hexbuf, hexbuf_len); + if (sbuf) driver_free(&env, sbuf, sbuf_len); + if (loaded) driver_release_bytes(&env.file_io, &ld); + driver_env_fini(&env); + return rc; +} diff --git a/driver/driver.h b/driver/driver.h @@ -58,6 +58,7 @@ int driver_pkg(int argc, char** argv); int driver_xxd(int argc, char** argv); int driver_cmp(int argc, char** argv); int driver_hash(int argc, char** argv); +int driver_disas(int argc, char** argv); /* Per-tool help printers. Write a multi-section help text to stdout and * return. The tool entry-points call these when invoked with no args, -h, @@ -85,6 +86,7 @@ void driver_help_pkg(void); void driver_help_xxd(void); void driver_help_cmp(void); void driver_help_hash(void); +void driver_help_disas(void); /* Multi-call top-level help (`cfree`, `cfree -h`, `cfree --help`, * `cfree help`). Lists each tool with a one-line summary and explains diff --git a/driver/main.c b/driver/main.c @@ -108,6 +108,10 @@ static const DriverToolDesc driver_tools[] = { {"hash", driver_hash, driver_help_hash, "Hash files with SHA-256, BLAKE2b, or CRC-32"}, #endif +#if CFREE_TOOL_DISAS_ENABLED + {"disas", driver_disas, driver_help_disas, + "Disassemble raw machine-code bytes for a target arch"}, +#endif {NULL, NULL, NULL, NULL}, }; diff --git a/include/cfree/config.h b/include/cfree/config.h @@ -117,5 +117,6 @@ #define CFREE_TOOL_XXD_ENABLED 1 #define CFREE_TOOL_CMP_ENABLED 1 #define CFREE_TOOL_HASH_ENABLED 1 +#define CFREE_TOOL_DISAS_ENABLED 1 #endif /* CFREE_CONFIG_H */