kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit ed311622bb71bccb5e5e98fe9c6dfff8f058c999
parent 056fbd737350fc097b36588f4e31e3f6bc48028a
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue,  2 Jun 2026 11:12:10 -0700

driver: add mc tool (assemble an instruction, show its encoding)

llvm-mc --show-encoding style: assembles GAS-subset instruction text through
the same back-end as 'cfree as', emits to an in-memory object, then
disassembles the executable section to print 'mnemonic ops # encoding: [..]'
per instruction. Relocations the assembler emits (undefined-symbol operands)
are listed beneath rather than rejected. -p prints raw .text hex; -target
selects the arch (host default); input is operand tokens or stdin via '-'.

Reuses existing public API only (compile session + cfree_writer_mem +
cfree_obj_open + reloc/disasm iterators). Verified on aarch64/x86_64/riscv64;
mc -p | disas round-trips.

Diffstat:
MMakefile | 3+++
Adriver/cmd/mc.c | 297+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdriver/driver.h | 2++
Mdriver/main.c | 4++++
Minclude/cfree/config.h | 1+
5 files changed, 307 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -403,6 +403,9 @@ endif ifeq ($(CFREE_TOOL_DISAS_ENABLED),1) DRIVER_TOOL_SRCS += driver/cmd/disas.c endif +ifeq ($(CFREE_TOOL_MC_ENABLED),1) +DRIVER_TOOL_SRCS += driver/cmd/mc.c +endif DRIVER_SRCS += $(sort $(DRIVER_TOOL_SRCS)) ifneq ($(filter 1,$(CFREE_TOOL_CC_ENABLED) $(CFREE_TOOL_CHECK_ENABLED) $(CFREE_TOOL_CPP_ENABLED) $(CFREE_TOOL_AS_ENABLED) $(CFREE_TOOL_DBG_ENABLED) $(CFREE_TOOL_RUN_ENABLED)),) DRIVER_SRCS += driver/lib/cflags.c diff --git a/driver/cmd/mc.c b/driver/cmd/mc.c @@ -0,0 +1,297 @@ +#include <cfree/compile.h> +#include <cfree/core.h> +#include <cfree/disasm.h> +#include <cfree/object.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include "driver.h" +#include "env.h" + +/* `cfree mc` — assemble one (or a few) GAS-subset instructions and show the + * machine-code encoding, llvm-mc --show-encoding style. The text is assembled + * through the same back-end as `cfree as`, emitted to an in-memory object, and + * the executable section is disassembled to attribute bytes to each + * instruction. Any relocations the assembler emits (e.g. for a branch to an + * undefined symbol) are listed beneath. -p prints just the raw .text hex. */ + +#define MC_TOOL "mc" + +typedef struct McOpts { + CfreeTarget target; + int plain; /* -p: raw .text hex only */ +} McOpts; + +void driver_help_mc(void) { + driver_printf( + "%.*s", + CFREE_SLICE_ARG(CFREE_SLICE_LIT( + "cfree mc — assemble an instruction and show its encoding\n" + "\n" + "USAGE\n" + " cfree mc [-target TRIPLE] [-p] \"INSN ...\"\n" + " cfree mc [-target TRIPLE] [-p] - (read instructions from " + "stdin)\n" + "\n" + "DESCRIPTION\n" + " Assembles the given instruction text (GAS subset: AT&T on x86,\n" + " standard mnemonics on aarch64/riscv64) and prints each decoded\n" + " instruction with its bytes as `# encoding: [0x..,..]`. Relocations\n" + " emitted for undefined-symbol operands are listed beneath.\n" + "\n" + "OPTIONS\n" + " -target TRIPLE architecture to assemble for (host default)\n" + " -p print only the raw .text bytes as hex\n" + " -h, --help show this help\n" + "\n" + "EXIT CODES\n" + " 0 success 1 assemble error 2 bad usage\n"))); +} + +/* Locate the executable section's bytes (format-agnostic: .text / __text). */ +static int mc_text_section(const CfreeObjFile* f, const uint8_t** data, + size_t* len) { + uint32_t n = cfree_obj_nsections(f); + uint32_t i; + for (i = 0; i < n; ++i) { + CfreeObjSecInfo sec; + if (cfree_obj_section(f, i, &sec) != CFREE_OK) continue; + if (!(sec.flags & CFREE_SF_EXEC)) continue; + if (cfree_obj_section_data(f, i, data, len) != CFREE_OK) continue; + if (*data && *len) return 0; + } + return 1; +} + +static void mc_print_plain(const uint8_t* text, size_t len) { + size_t i; + for (i = 0; i < len; ++i) driver_printf("%02x", text[i]); + driver_printf("\n"); +} + +/* Disassemble the assembled .text and print one `mnemonic ops # encoding: [..]` + * line per instruction. */ +static void mc_print_encoding(const CfreeContext* ctx, CfreeTarget target, + const uint8_t* text, size_t len) { + CfreeDisasmContext dctx; + CfreeDisasmIter* it = NULL; + CfreeInsn insn; + memset(&dctx, 0, sizeof dctx); + dctx.target = target; + dctx.context = *ctx; + if (cfree_disasm_iter_new(&dctx, text, len, 0, NULL, &it) != CFREE_OK) { + /* No disassembler: fall back to a single raw-hex line. */ + mc_print_plain(text, len); + return; + } + while (cfree_disasm_iter_next(it, &insn) == CFREE_ITER_ITEM) { + uint32_t b; + driver_printf("%.*s", CFREE_SLICE_ARG(insn.mnemonic)); + if (insn.operands.len) driver_printf(" %.*s", CFREE_SLICE_ARG(insn.operands)); + driver_printf("\t# encoding: ["); + for (b = 0; b < insn.nbytes; ++b) + driver_printf("%s0x%02x", b ? "," : "", insn.bytes[b]); + driver_printf("]\n"); + } + cfree_disasm_iter_free(it); +} + +/* List any relocations the assembler emitted (undefined-symbol operands). */ +static void mc_print_relocs(CfreeObjFile* f) { + CfreeObjRelocIter* it = NULL; + CfreeObjReloc r; + if (cfree_obj_reliter_new(f, &it) != CFREE_OK) return; + while (cfree_obj_reliter_next(it, &r) == CFREE_ITER_ITEM) { + driver_printf("# reloc %.*s %.*s", + CFREE_SLICE_ARG(r.kind_name.len ? r.kind_name + : CFREE_SLICE_LIT("?")), + CFREE_SLICE_ARG(r.sym_name.len ? r.sym_name + : CFREE_SLICE_LIT("*ABS*"))); + if (r.addend) + driver_printf("%c0x%llx", r.addend < 0 ? '-' : '+', + (unsigned long long)(r.addend < 0 ? -r.addend : r.addend)); + driver_printf(" @ .text+0x%llx\n", (unsigned long long)r.offset); + } + cfree_obj_reliter_free(it); +} + +/* Join the instruction operands argv[first..argc) into a newline-terminated + * source buffer. Returns NULL on OOM. */ +static char* mc_join_source(DriverEnv* env, int first, int argc, char** argv, + size_t* out_len) { + size_t total = 0; + int i; + char* buf; + size_t pos = 0; + for (i = first; i < argc; ++i) total += driver_strlen(argv[i]) + 1; + total += 1; /* trailing NUL */ + buf = (char*)driver_alloc(env, total); + if (!buf) return NULL; + for (i = first; i < argc; ++i) { + size_t n = driver_strlen(argv[i]); + driver_memcpy(buf + pos, argv[i], n); + pos += n; + buf[pos++] = (i + 1 < argc) ? ' ' : '\n'; + } + if (pos == 0) buf[pos++] = '\n'; + *out_len = pos; + return buf; +} + +int driver_mc(int argc, char** argv) { + DriverEnv env; + CfreeContext ctx; + McOpts o; + CfreeCompiler* compiler = NULL; + CfreeCompileSession* session = NULL; + CfreeObjBuilder* ob = NULL; + CfreeWriter* mem = NULL; + CfreeObjFile* objf = NULL; + char* src = NULL; + size_t src_len = 0; + uint8_t* stdin_buf = NULL; + size_t stdin_len = 0; + int first_pos = 0; /* argv index of the first instruction token, 0 = none */ + int read_stdin = 0; + int i, rc = 2; + + if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) { + driver_help_mc(); + return argc < 2 ? 2 : 0; + } + + memset(&o, 0, sizeof o); + o.target = driver_host_target(); + driver_env_init(&env); + + for (i = 1; i < argc; ++i) { + const char* a = argv[i]; + if (driver_streq(a, "-target")) { + if (i + 1 >= argc) { + driver_errf(MC_TOOL, "-target requires an argument"); + goto done; + } + if (driver_target_from_triple(argv[++i], &o.target) != 0) { + driver_errf(MC_TOOL, "unrecognized target: %s", argv[i]); + goto done; + } + continue; + } + if (driver_streq(a, "-p")) { + o.plain = 1; + continue; + } + if (driver_streq(a, "-")) { + read_stdin = 1; + first_pos = i; /* marks "have input"; stdin overrides token join */ + break; + } + if (a[0] == '-' && a[1] != '\0') { + driver_errf(MC_TOOL, "unknown option: %s", a); + goto done; + } + first_pos = i; /* first instruction token */ + break; + } + + if (read_stdin) { + if (!driver_read_stdin(&env, &stdin_buf, &stdin_len)) { + driver_errf(MC_TOOL, "failed to read stdin"); + rc = 1; + goto done; + } + src = (char*)stdin_buf; + src_len = stdin_len; + } else if (first_pos != 0) { + src = mc_join_source(&env, first_pos, argc, argv, &src_len); + if (!src) { + driver_errf(MC_TOOL, "out of memory"); + rc = 1; + goto done; + } + } else { + driver_errf(MC_TOOL, "no instruction given"); + goto done; + } + + ctx = driver_env_to_context(&env); + if (driver_compiler_new(o.target, &ctx, &compiler) != CFREE_OK) { + driver_errf(MC_TOOL, "failed to initialize compiler"); + rc = 1; + goto done; + } + + { + CfreeCompileSessionOptions sopts; + CfreeAsmCompileOptions copts; + CfreeSourceInput sin; + CfreeStatus st; + const uint8_t* mem_bytes; + size_t mem_len; + CfreeSlice objslice; + const uint8_t* text = NULL; + size_t text_len = 0; + + memset(&copts, 0, sizeof copts); + memset(&sopts, 0, sizeof sopts); + sopts.lang = CFREE_LANG_ASM; + sopts.compile.code = copts.code; + sopts.compile.diagnostics = copts.diagnostics; + sopts.compile.language_options = &copts; + memset(&sin, 0, sizeof sin); + sin.name = CFREE_SLICE_LIT("<mc>"); + sin.bytes.data = (const uint8_t*)src; + sin.bytes.len = src_len; + sin.lang = CFREE_LANG_ASM; + + st = cfree_compile_session_new(compiler, &sopts, &session); + if (st == CFREE_OK) st = cfree_compile_session_compile(session, &sin, &ob); + if (st != CFREE_OK) { + /* diagnostics already went to stderr via the diag sink */ + rc = 1; + goto done; + } + if (cfree_writer_mem(ctx.heap, &mem) != CFREE_OK) { + driver_errf(MC_TOOL, "out of memory"); + rc = 1; + goto done; + } + if (cfree_obj_builder_emit(ob, mem) != CFREE_OK) { + driver_errf(MC_TOOL, "failed to emit object"); + rc = 1; + goto done; + } + mem_bytes = cfree_writer_mem_bytes(mem, &mem_len); + objslice.data = mem_bytes; + objslice.len = mem_len; + if (cfree_obj_open(&ctx, CFREE_SLICE_LIT("<mc>"), &objslice, &objf) != + CFREE_OK) { + driver_errf(MC_TOOL, "failed to read assembled object"); + rc = 1; + goto done; + } + if (mc_text_section(objf, &text, &text_len) != 0) { + driver_errf(MC_TOOL, "assembled object has no code"); + rc = 1; + goto done; + } + if (o.plain) + mc_print_plain(text, text_len); + else + mc_print_encoding(&ctx, o.target, text, text_len); + mc_print_relocs(objf); + rc = 0; + } + +done: + if (objf) cfree_obj_free(objf); + if (mem) cfree_writer_close(mem); + if (ob) cfree_obj_builder_free(ob); + if (session) cfree_compile_session_free(session); + if (compiler) driver_compiler_free(compiler); + if (stdin_buf) driver_free(&env, stdin_buf, stdin_len); + if (src && !read_stdin) driver_free(&env, src, src_len); + driver_env_fini(&env); + return rc; +} diff --git a/driver/driver.h b/driver/driver.h @@ -59,6 +59,7 @@ int driver_xxd(int argc, char** argv); int driver_cmp(int argc, char** argv); int driver_hash(int argc, char** argv); int driver_disas(int argc, char** argv); +int driver_mc(int argc, char** argv); /* Per-tool help printers. Write a multi-section help text to stdout and * return. The tool entry-points call these when invoked with no args, -h, @@ -87,6 +88,7 @@ void driver_help_xxd(void); void driver_help_cmp(void); void driver_help_hash(void); void driver_help_disas(void); +void driver_help_mc(void); /* Multi-call top-level help (`cfree`, `cfree -h`, `cfree --help`, * `cfree help`). Lists each tool with a one-line summary and explains diff --git a/driver/main.c b/driver/main.c @@ -112,6 +112,10 @@ static const DriverToolDesc driver_tools[] = { {"disas", driver_disas, driver_help_disas, "Disassemble raw machine-code bytes for a target arch"}, #endif +#if CFREE_TOOL_MC_ENABLED + {"mc", driver_mc, driver_help_mc, + "Assemble one instruction and show its machine-code encoding"}, +#endif {NULL, NULL, NULL, NULL}, }; diff --git a/include/cfree/config.h b/include/cfree/config.h @@ -118,5 +118,6 @@ #define CFREE_TOOL_CMP_ENABLED 1 #define CFREE_TOOL_HASH_ENABLED 1 #define CFREE_TOOL_DISAS_ENABLED 1 +#define CFREE_TOOL_MC_ENABLED 1 #endif /* CFREE_CONFIG_H */