kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

mc.c (9902B)


      1 #include <kit/compile.h>
      2 #include <kit/core.h>
      3 #include <kit/disasm.h>
      4 #include <kit/object.h>
      5 #include <stddef.h>
      6 #include <stdint.h>
      7 #include <string.h>
      8 
      9 #include "driver.h"
     10 #include "env.h"
     11 
     12 /* `kit mc` — assemble one (or a few) GAS-subset instructions and show the
     13  * machine-code encoding, llvm-mc --show-encoding style. The text is assembled
     14  * through the same back-end as `kit as`, emitted to an in-memory object, and
     15  * the executable section is disassembled to attribute bytes to each
     16  * instruction. Any relocations the assembler emits (e.g. for a branch to an
     17  * undefined symbol) are listed beneath. -p prints just the raw .text hex. */
     18 
     19 #define MC_TOOL "mc"
     20 
     21 typedef struct McOpts {
     22   KitTargetSpec target;
     23   int plain; /* -p: raw .text hex only */
     24 } McOpts;
     25 
     26 void driver_help_mc(void) {
     27   driver_printf(
     28       "%.*s",
     29       KIT_SLICE_ARG(KIT_SLICE_LIT(
     30           "kit mc — assemble an instruction and show its encoding\n"
     31           "\n"
     32           "USAGE\n"
     33           "  kit mc [-target TRIPLE] [-p] \"INSN ...\"\n"
     34           "  kit mc [-target TRIPLE] [-p] -        (read instructions from "
     35           "stdin)\n"
     36           "\n"
     37           "DESCRIPTION\n"
     38           "  Assembles the given instruction text (GAS subset: AT&T on x86,\n"
     39           "  standard mnemonics on aarch64/riscv64) and prints each decoded\n"
     40           "  instruction with its bytes as `# encoding: [0x..,..]`. "
     41           "Relocations\n"
     42           "  emitted for undefined-symbol operands are listed beneath.\n"
     43           "\n"
     44           "OPTIONS\n"
     45           "  -target TRIPLE   architecture to assemble for (host default)\n"
     46           "  -p               print only the raw .text bytes as hex\n"
     47           "  -h, --help       show this help\n"
     48           "\n"
     49           "EXIT CODES\n"
     50           "  0   success           1   assemble error       2   bad usage\n")));
     51 }
     52 
     53 /* Locate the executable section's bytes (format-agnostic: .text / __text). */
     54 static int mc_text_section(const KitObjFile* f, const uint8_t** data,
     55                            size_t* len) {
     56   uint32_t n = kit_obj_nsections(f);
     57   uint32_t i;
     58   for (i = 0; i < n; ++i) {
     59     KitObjSecInfo sec;
     60     if (kit_obj_section(f, i, &sec) != KIT_OK) continue;
     61     if (!(sec.flags & KIT_SF_EXEC)) continue;
     62     if (kit_obj_section_data(f, i, data, len) != KIT_OK) continue;
     63     if (*data && *len) return 0;
     64   }
     65   return 1;
     66 }
     67 
     68 static void mc_print_plain(const uint8_t* text, size_t len) {
     69   size_t i;
     70   for (i = 0; i < len; ++i) driver_printf("%02x", text[i]);
     71   driver_printf("\n");
     72 }
     73 
     74 /* Disassemble the assembled .text and print one `mnemonic ops # encoding: [..]`
     75  * line per instruction. */
     76 static void mc_print_encoding(const KitContext* ctx, const KitTarget* target,
     77                               const uint8_t* text, size_t len) {
     78   KitDisasmContext dctx;
     79   KitDisasmIter* it = NULL;
     80   KitInsn insn;
     81   memset(&dctx, 0, sizeof dctx);
     82   dctx.target = target;
     83   dctx.context = *ctx;
     84   if (kit_disasm_iter_new(&dctx, text, len, 0, NULL, &it) != KIT_OK) {
     85     /* No disassembler: fall back to a single raw-hex line. */
     86     mc_print_plain(text, len);
     87     return;
     88   }
     89   while (kit_disasm_iter_next(it, &insn) == KIT_ITER_ITEM) {
     90     uint32_t b;
     91     driver_printf("%.*s", KIT_SLICE_ARG(insn.mnemonic));
     92     if (insn.operands.len) driver_printf(" %.*s", KIT_SLICE_ARG(insn.operands));
     93     driver_printf("\t# encoding: [");
     94     for (b = 0; b < insn.nbytes; ++b)
     95       driver_printf("%s0x%02x", b ? "," : "", insn.bytes[b]);
     96     driver_printf("]\n");
     97   }
     98   kit_disasm_iter_free(it);
     99 }
    100 
    101 /* List any relocations the assembler emitted (undefined-symbol operands). */
    102 static void mc_print_relocs(KitObjFile* f) {
    103   KitObjRelocIter* it = NULL;
    104   KitObjReloc r;
    105   if (kit_obj_reliter_new(f, &it) != KIT_OK) return;
    106   while (kit_obj_reliter_next(it, &r) == KIT_ITER_ITEM) {
    107     driver_printf(
    108         "#  reloc %.*s %.*s",
    109         KIT_SLICE_ARG(r.kind_name.len ? r.kind_name : KIT_SLICE_LIT("?")),
    110         KIT_SLICE_ARG(r.sym_name.len ? r.sym_name : KIT_SLICE_LIT("*ABS*")));
    111     if (r.addend)
    112       driver_printf("%c0x%llx", r.addend < 0 ? '-' : '+',
    113                     (unsigned long long)(r.addend < 0 ? -r.addend : r.addend));
    114     driver_printf(" @ .text+0x%llx\n", (unsigned long long)r.offset);
    115   }
    116   kit_obj_reliter_free(it);
    117 }
    118 
    119 /* Join the instruction operands argv[first..argc) into a newline-terminated
    120  * source buffer. Returns NULL on OOM. */
    121 static char* mc_join_source(DriverEnv* env, int first, int argc, char** argv,
    122                             size_t* out_len) {
    123   size_t total = 0;
    124   int i;
    125   char* buf;
    126   size_t pos = 0;
    127   for (i = first; i < argc; ++i) total += driver_strlen(argv[i]) + 1;
    128   total += 1; /* trailing NUL */
    129   buf = (char*)driver_alloc(env, total);
    130   if (!buf) return NULL;
    131   for (i = first; i < argc; ++i) {
    132     size_t n = driver_strlen(argv[i]);
    133     driver_memcpy(buf + pos, argv[i], n);
    134     pos += n;
    135     buf[pos++] = (i + 1 < argc) ? ' ' : '\n';
    136   }
    137   if (pos == 0) buf[pos++] = '\n';
    138   *out_len = pos;
    139   return buf;
    140 }
    141 
    142 int driver_mc(int argc, char** argv) {
    143   DriverEnv env;
    144   KitContext ctx;
    145   McOpts o;
    146   DriverTargetFeatures tf = {0};
    147   KitTarget* target = NULL;
    148   KitCompiler* compiler = NULL;
    149   KitCompileSession* session = NULL;
    150   KitObjBuilder* ob = NULL;
    151   KitWriter* mem = NULL;
    152   KitObjFile* objf = NULL;
    153   char* src = NULL;
    154   size_t src_len = 0;
    155   uint8_t* stdin_buf = NULL;
    156   size_t stdin_len = 0;
    157   int first_pos = 0; /* argv index of the first instruction token, 0 = none */
    158   int read_stdin = 0;
    159   int i, rc = 2;
    160 
    161   if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) {
    162     driver_help_mc();
    163     return argc < 2 ? 2 : 0;
    164   }
    165 
    166   memset(&o, 0, sizeof o);
    167   o.target = driver_host_target();
    168   driver_env_init(&env);
    169   if (driver_target_features_init(&tf, &env, argc) != 0) {
    170     driver_errf(MC_TOOL, "out of memory");
    171     driver_target_features_fini(&tf, &env);
    172     driver_env_fini(&env);
    173     return 1;
    174   }
    175 
    176   for (i = 1; i < argc; ++i) {
    177     const char* a = argv[i];
    178     if (driver_streq(a, "-target")) {
    179       if (i + 1 >= argc) {
    180         driver_errf(MC_TOOL, "-target requires an argument");
    181         goto done;
    182       }
    183       if (driver_target_from_triple(argv[++i], &o.target) != 0) {
    184         driver_errf(MC_TOOL, "unrecognized target: %s", argv[i]);
    185         goto done;
    186       }
    187       continue;
    188     }
    189     if (driver_streq(a, "-p")) {
    190       o.plain = 1;
    191       continue;
    192     }
    193     {
    194       int tr = driver_target_features_try_consume(&tf, &env, MC_TOOL, argc,
    195                                                   argv, &i);
    196       if (tr < 0) goto done;
    197       if (tr > 0) continue;
    198     }
    199     if (driver_streq(a, "-")) {
    200       read_stdin = 1;
    201       first_pos = i; /* marks "have input"; stdin overrides token join */
    202       break;
    203     }
    204     if (a[0] == '-' && a[1] != '\0') {
    205       driver_errf(MC_TOOL, "unknown option: %s", a);
    206       goto done;
    207     }
    208     first_pos = i; /* first instruction token */
    209     break;
    210   }
    211 
    212   if (read_stdin) {
    213     if (!driver_read_stdin(&env, &stdin_buf, &stdin_len)) {
    214       driver_errf(MC_TOOL, "failed to read stdin");
    215       rc = 1;
    216       goto done;
    217     }
    218     src = (char*)stdin_buf;
    219     src_len = stdin_len;
    220   } else if (first_pos != 0) {
    221     src = mc_join_source(&env, first_pos, argc, argv, &src_len);
    222     if (!src) {
    223       driver_errf(MC_TOOL, "out of memory");
    224       rc = 1;
    225       goto done;
    226     }
    227   } else {
    228     driver_errf(MC_TOOL, "no instruction given");
    229     goto done;
    230   }
    231 
    232   ctx = driver_env_to_context(&env);
    233   if (driver_target_new(&ctx, o.target, &tf, MC_TOOL, &target) != KIT_OK ||
    234       driver_compiler_new(target, &ctx, &compiler) != KIT_OK) {
    235     driver_errf(MC_TOOL, "failed to initialize compiler");
    236     rc = 1;
    237     goto done;
    238   }
    239 
    240   {
    241     KitCompileSessionOptions sopts;
    242     KitAsmCompileOptions copts;
    243     KitSourceInput sin;
    244     KitStatus st;
    245     const uint8_t* mem_bytes;
    246     size_t mem_len;
    247     KitSlice objslice;
    248     const uint8_t* text = NULL;
    249     size_t text_len = 0;
    250 
    251     memset(&copts, 0, sizeof copts);
    252     memset(&sopts, 0, sizeof sopts);
    253     sopts.lang = KIT_LANG_ASM;
    254     sopts.compile.code = copts.code;
    255     sopts.compile.diagnostics = copts.diagnostics;
    256     sopts.compile.language_options = &copts;
    257     memset(&sin, 0, sizeof sin);
    258     sin.name = KIT_SLICE_LIT("<mc>");
    259     sin.bytes.data = (const uint8_t*)src;
    260     sin.bytes.len = src_len;
    261     sin.lang = KIT_LANG_ASM;
    262 
    263     st = kit_compile_session_new(compiler, &sopts, &session);
    264     if (st == KIT_OK) st = kit_compile_session_compile(session, &sin, &ob);
    265     if (st != KIT_OK) {
    266       /* diagnostics already went to stderr via the diag sink */
    267       rc = 1;
    268       goto done;
    269     }
    270     if (kit_writer_mem(ctx.heap, &mem) != KIT_OK) {
    271       driver_errf(MC_TOOL, "out of memory");
    272       rc = 1;
    273       goto done;
    274     }
    275     if (kit_obj_builder_emit(ob, mem) != KIT_OK) {
    276       driver_errf(MC_TOOL, "failed to emit object");
    277       rc = 1;
    278       goto done;
    279     }
    280     mem_bytes = kit_writer_mem_bytes(mem, &mem_len);
    281     objslice.data = mem_bytes;
    282     objslice.len = mem_len;
    283     if (kit_obj_open(&ctx, KIT_SLICE_LIT("<mc>"), &objslice, &objf) != KIT_OK) {
    284       driver_errf(MC_TOOL, "failed to read assembled object");
    285       rc = 1;
    286       goto done;
    287     }
    288     if (mc_text_section(objf, &text, &text_len) != 0) {
    289       driver_errf(MC_TOOL, "assembled object has no code");
    290       rc = 1;
    291       goto done;
    292     }
    293     if (o.plain)
    294       mc_print_plain(text, text_len);
    295     else
    296       mc_print_encoding(&ctx, target, text, text_len);
    297     mc_print_relocs(objf);
    298     rc = 0;
    299   }
    300 
    301 done:
    302   if (objf) kit_obj_free(objf);
    303   if (mem) kit_writer_close(mem);
    304   if (ob) kit_obj_builder_free(ob);
    305   if (session) kit_compile_session_free(session);
    306   if (compiler) driver_compiler_free(compiler);
    307   kit_target_free(target);
    308   if (stdin_buf) driver_free(&env, stdin_buf, stdin_len);
    309   if (src && !read_stdin) driver_free(&env, src, src_len);
    310   driver_target_features_fini(&tf, &env);
    311   driver_env_fini(&env);
    312   return rc;
    313 }