commit ed311622bb71bccb5e5e98fe9c6dfff8f058c999
parent 056fbd737350fc097b36588f4e31e3f6bc48028a
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 2 Jun 2026 11:12:10 -0700
driver: add mc tool (assemble an instruction, show its encoding)
llvm-mc --show-encoding style: assembles GAS-subset instruction text through
the same back-end as 'cfree as', emits to an in-memory object, then
disassembles the executable section to print 'mnemonic ops # encoding: [..]'
per instruction. Relocations the assembler emits (undefined-symbol operands)
are listed beneath rather than rejected. -p prints raw .text hex; -target
selects the arch (host default); input is operand tokens or stdin via '-'.
Reuses existing public API only (compile session + cfree_writer_mem +
cfree_obj_open + reloc/disasm iterators). Verified on aarch64/x86_64/riscv64;
mc -p | disas round-trips.
Diffstat:
5 files changed, 307 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -403,6 +403,9 @@ endif
ifeq ($(CFREE_TOOL_DISAS_ENABLED),1)
DRIVER_TOOL_SRCS += driver/cmd/disas.c
endif
+ifeq ($(CFREE_TOOL_MC_ENABLED),1)
+DRIVER_TOOL_SRCS += driver/cmd/mc.c
+endif
DRIVER_SRCS += $(sort $(DRIVER_TOOL_SRCS))
ifneq ($(filter 1,$(CFREE_TOOL_CC_ENABLED) $(CFREE_TOOL_CHECK_ENABLED) $(CFREE_TOOL_CPP_ENABLED) $(CFREE_TOOL_AS_ENABLED) $(CFREE_TOOL_DBG_ENABLED) $(CFREE_TOOL_RUN_ENABLED)),)
DRIVER_SRCS += driver/lib/cflags.c
diff --git a/driver/cmd/mc.c b/driver/cmd/mc.c
@@ -0,0 +1,297 @@
+#include <cfree/compile.h>
+#include <cfree/core.h>
+#include <cfree/disasm.h>
+#include <cfree/object.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "driver.h"
+#include "env.h"
+
+/* `cfree mc` — assemble one (or a few) GAS-subset instructions and show the
+ * machine-code encoding, llvm-mc --show-encoding style. The text is assembled
+ * through the same back-end as `cfree as`, emitted to an in-memory object, and
+ * the executable section is disassembled to attribute bytes to each
+ * instruction. Any relocations the assembler emits (e.g. for a branch to an
+ * undefined symbol) are listed beneath. -p prints just the raw .text hex. */
+
+#define MC_TOOL "mc"
+
+typedef struct McOpts {
+ CfreeTarget target;
+ int plain; /* -p: raw .text hex only */
+} McOpts;
+
+void driver_help_mc(void) {
+ driver_printf(
+ "%.*s",
+ CFREE_SLICE_ARG(CFREE_SLICE_LIT(
+ "cfree mc — assemble an instruction and show its encoding\n"
+ "\n"
+ "USAGE\n"
+ " cfree mc [-target TRIPLE] [-p] \"INSN ...\"\n"
+ " cfree mc [-target TRIPLE] [-p] - (read instructions from "
+ "stdin)\n"
+ "\n"
+ "DESCRIPTION\n"
+ " Assembles the given instruction text (GAS subset: AT&T on x86,\n"
+ " standard mnemonics on aarch64/riscv64) and prints each decoded\n"
+ " instruction with its bytes as `# encoding: [0x..,..]`. Relocations\n"
+ " emitted for undefined-symbol operands are listed beneath.\n"
+ "\n"
+ "OPTIONS\n"
+ " -target TRIPLE architecture to assemble for (host default)\n"
+ " -p print only the raw .text bytes as hex\n"
+ " -h, --help show this help\n"
+ "\n"
+ "EXIT CODES\n"
+ " 0 success 1 assemble error 2 bad usage\n")));
+}
+
+/* Locate the executable section's bytes (format-agnostic: .text / __text). */
+static int mc_text_section(const CfreeObjFile* f, const uint8_t** data,
+ size_t* len) {
+ uint32_t n = cfree_obj_nsections(f);
+ uint32_t i;
+ for (i = 0; i < n; ++i) {
+ CfreeObjSecInfo sec;
+ if (cfree_obj_section(f, i, &sec) != CFREE_OK) continue;
+ if (!(sec.flags & CFREE_SF_EXEC)) continue;
+ if (cfree_obj_section_data(f, i, data, len) != CFREE_OK) continue;
+ if (*data && *len) return 0;
+ }
+ return 1;
+}
+
+static void mc_print_plain(const uint8_t* text, size_t len) {
+ size_t i;
+ for (i = 0; i < len; ++i) driver_printf("%02x", text[i]);
+ driver_printf("\n");
+}
+
+/* Disassemble the assembled .text and print one `mnemonic ops # encoding: [..]`
+ * line per instruction. */
+static void mc_print_encoding(const CfreeContext* ctx, CfreeTarget target,
+ const uint8_t* text, size_t len) {
+ CfreeDisasmContext dctx;
+ CfreeDisasmIter* it = NULL;
+ CfreeInsn insn;
+ memset(&dctx, 0, sizeof dctx);
+ dctx.target = target;
+ dctx.context = *ctx;
+ if (cfree_disasm_iter_new(&dctx, text, len, 0, NULL, &it) != CFREE_OK) {
+ /* No disassembler: fall back to a single raw-hex line. */
+ mc_print_plain(text, len);
+ return;
+ }
+ while (cfree_disasm_iter_next(it, &insn) == CFREE_ITER_ITEM) {
+ uint32_t b;
+ driver_printf("%.*s", CFREE_SLICE_ARG(insn.mnemonic));
+ if (insn.operands.len) driver_printf(" %.*s", CFREE_SLICE_ARG(insn.operands));
+ driver_printf("\t# encoding: [");
+ for (b = 0; b < insn.nbytes; ++b)
+ driver_printf("%s0x%02x", b ? "," : "", insn.bytes[b]);
+ driver_printf("]\n");
+ }
+ cfree_disasm_iter_free(it);
+}
+
+/* List any relocations the assembler emitted (undefined-symbol operands). */
+static void mc_print_relocs(CfreeObjFile* f) {
+ CfreeObjRelocIter* it = NULL;
+ CfreeObjReloc r;
+ if (cfree_obj_reliter_new(f, &it) != CFREE_OK) return;
+ while (cfree_obj_reliter_next(it, &r) == CFREE_ITER_ITEM) {
+ driver_printf("# reloc %.*s %.*s",
+ CFREE_SLICE_ARG(r.kind_name.len ? r.kind_name
+ : CFREE_SLICE_LIT("?")),
+ CFREE_SLICE_ARG(r.sym_name.len ? r.sym_name
+ : CFREE_SLICE_LIT("*ABS*")));
+ if (r.addend)
+ driver_printf("%c0x%llx", r.addend < 0 ? '-' : '+',
+ (unsigned long long)(r.addend < 0 ? -r.addend : r.addend));
+ driver_printf(" @ .text+0x%llx\n", (unsigned long long)r.offset);
+ }
+ cfree_obj_reliter_free(it);
+}
+
+/* Join the instruction operands argv[first..argc) into a newline-terminated
+ * source buffer. Returns NULL on OOM. */
+static char* mc_join_source(DriverEnv* env, int first, int argc, char** argv,
+ size_t* out_len) {
+ size_t total = 0;
+ int i;
+ char* buf;
+ size_t pos = 0;
+ for (i = first; i < argc; ++i) total += driver_strlen(argv[i]) + 1;
+ total += 1; /* trailing NUL */
+ buf = (char*)driver_alloc(env, total);
+ if (!buf) return NULL;
+ for (i = first; i < argc; ++i) {
+ size_t n = driver_strlen(argv[i]);
+ driver_memcpy(buf + pos, argv[i], n);
+ pos += n;
+ buf[pos++] = (i + 1 < argc) ? ' ' : '\n';
+ }
+ if (pos == 0) buf[pos++] = '\n';
+ *out_len = pos;
+ return buf;
+}
+
+int driver_mc(int argc, char** argv) {
+ DriverEnv env;
+ CfreeContext ctx;
+ McOpts o;
+ CfreeCompiler* compiler = NULL;
+ CfreeCompileSession* session = NULL;
+ CfreeObjBuilder* ob = NULL;
+ CfreeWriter* mem = NULL;
+ CfreeObjFile* objf = NULL;
+ char* src = NULL;
+ size_t src_len = 0;
+ uint8_t* stdin_buf = NULL;
+ size_t stdin_len = 0;
+ int first_pos = 0; /* argv index of the first instruction token, 0 = none */
+ int read_stdin = 0;
+ int i, rc = 2;
+
+ if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) {
+ driver_help_mc();
+ return argc < 2 ? 2 : 0;
+ }
+
+ memset(&o, 0, sizeof o);
+ o.target = driver_host_target();
+ driver_env_init(&env);
+
+ for (i = 1; i < argc; ++i) {
+ const char* a = argv[i];
+ if (driver_streq(a, "-target")) {
+ if (i + 1 >= argc) {
+ driver_errf(MC_TOOL, "-target requires an argument");
+ goto done;
+ }
+ if (driver_target_from_triple(argv[++i], &o.target) != 0) {
+ driver_errf(MC_TOOL, "unrecognized target: %s", argv[i]);
+ goto done;
+ }
+ continue;
+ }
+ if (driver_streq(a, "-p")) {
+ o.plain = 1;
+ continue;
+ }
+ if (driver_streq(a, "-")) {
+ read_stdin = 1;
+ first_pos = i; /* marks "have input"; stdin overrides token join */
+ break;
+ }
+ if (a[0] == '-' && a[1] != '\0') {
+ driver_errf(MC_TOOL, "unknown option: %s", a);
+ goto done;
+ }
+ first_pos = i; /* first instruction token */
+ break;
+ }
+
+ if (read_stdin) {
+ if (!driver_read_stdin(&env, &stdin_buf, &stdin_len)) {
+ driver_errf(MC_TOOL, "failed to read stdin");
+ rc = 1;
+ goto done;
+ }
+ src = (char*)stdin_buf;
+ src_len = stdin_len;
+ } else if (first_pos != 0) {
+ src = mc_join_source(&env, first_pos, argc, argv, &src_len);
+ if (!src) {
+ driver_errf(MC_TOOL, "out of memory");
+ rc = 1;
+ goto done;
+ }
+ } else {
+ driver_errf(MC_TOOL, "no instruction given");
+ goto done;
+ }
+
+ ctx = driver_env_to_context(&env);
+ if (driver_compiler_new(o.target, &ctx, &compiler) != CFREE_OK) {
+ driver_errf(MC_TOOL, "failed to initialize compiler");
+ rc = 1;
+ goto done;
+ }
+
+ {
+ CfreeCompileSessionOptions sopts;
+ CfreeAsmCompileOptions copts;
+ CfreeSourceInput sin;
+ CfreeStatus st;
+ const uint8_t* mem_bytes;
+ size_t mem_len;
+ CfreeSlice objslice;
+ const uint8_t* text = NULL;
+ size_t text_len = 0;
+
+ memset(&copts, 0, sizeof copts);
+ memset(&sopts, 0, sizeof sopts);
+ sopts.lang = CFREE_LANG_ASM;
+ sopts.compile.code = copts.code;
+ sopts.compile.diagnostics = copts.diagnostics;
+ sopts.compile.language_options = &copts;
+ memset(&sin, 0, sizeof sin);
+ sin.name = CFREE_SLICE_LIT("<mc>");
+ sin.bytes.data = (const uint8_t*)src;
+ sin.bytes.len = src_len;
+ sin.lang = CFREE_LANG_ASM;
+
+ st = cfree_compile_session_new(compiler, &sopts, &session);
+ if (st == CFREE_OK) st = cfree_compile_session_compile(session, &sin, &ob);
+ if (st != CFREE_OK) {
+ /* diagnostics already went to stderr via the diag sink */
+ rc = 1;
+ goto done;
+ }
+ if (cfree_writer_mem(ctx.heap, &mem) != CFREE_OK) {
+ driver_errf(MC_TOOL, "out of memory");
+ rc = 1;
+ goto done;
+ }
+ if (cfree_obj_builder_emit(ob, mem) != CFREE_OK) {
+ driver_errf(MC_TOOL, "failed to emit object");
+ rc = 1;
+ goto done;
+ }
+ mem_bytes = cfree_writer_mem_bytes(mem, &mem_len);
+ objslice.data = mem_bytes;
+ objslice.len = mem_len;
+ if (cfree_obj_open(&ctx, CFREE_SLICE_LIT("<mc>"), &objslice, &objf) !=
+ CFREE_OK) {
+ driver_errf(MC_TOOL, "failed to read assembled object");
+ rc = 1;
+ goto done;
+ }
+ if (mc_text_section(objf, &text, &text_len) != 0) {
+ driver_errf(MC_TOOL, "assembled object has no code");
+ rc = 1;
+ goto done;
+ }
+ if (o.plain)
+ mc_print_plain(text, text_len);
+ else
+ mc_print_encoding(&ctx, o.target, text, text_len);
+ mc_print_relocs(objf);
+ rc = 0;
+ }
+
+done:
+ if (objf) cfree_obj_free(objf);
+ if (mem) cfree_writer_close(mem);
+ if (ob) cfree_obj_builder_free(ob);
+ if (session) cfree_compile_session_free(session);
+ if (compiler) driver_compiler_free(compiler);
+ if (stdin_buf) driver_free(&env, stdin_buf, stdin_len);
+ if (src && !read_stdin) driver_free(&env, src, src_len);
+ driver_env_fini(&env);
+ return rc;
+}
diff --git a/driver/driver.h b/driver/driver.h
@@ -59,6 +59,7 @@ int driver_xxd(int argc, char** argv);
int driver_cmp(int argc, char** argv);
int driver_hash(int argc, char** argv);
int driver_disas(int argc, char** argv);
+int driver_mc(int argc, char** argv);
/* Per-tool help printers. Write a multi-section help text to stdout and
* return. The tool entry-points call these when invoked with no args, -h,
@@ -87,6 +88,7 @@ void driver_help_xxd(void);
void driver_help_cmp(void);
void driver_help_hash(void);
void driver_help_disas(void);
+void driver_help_mc(void);
/* Multi-call top-level help (`cfree`, `cfree -h`, `cfree --help`,
* `cfree help`). Lists each tool with a one-line summary and explains
diff --git a/driver/main.c b/driver/main.c
@@ -112,6 +112,10 @@ static const DriverToolDesc driver_tools[] = {
{"disas", driver_disas, driver_help_disas,
"Disassemble raw machine-code bytes for a target arch"},
#endif
+#if CFREE_TOOL_MC_ENABLED
+ {"mc", driver_mc, driver_help_mc,
+ "Assemble one instruction and show its machine-code encoding"},
+#endif
{NULL, NULL, NULL, NULL},
};
diff --git a/include/cfree/config.h b/include/cfree/config.h
@@ -118,5 +118,6 @@
#define CFREE_TOOL_CMP_ENABLED 1
#define CFREE_TOOL_HASH_ENABLED 1
#define CFREE_TOOL_DISAS_ENABLED 1
+#define CFREE_TOOL_MC_ENABLED 1
#endif /* CFREE_CONFIG_H */