kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

disasm.c (4276B)


      1 /* x86_64 disassembler.
      2  *
      3  * Walks legacy prefixes + REX, then asks `arch/x64/isa.c`'s descriptor
      4  * table to identify the opcode. The matched row's format drives operand
      5  * rendering. Everything kit's emit.c produces is in the table; anything
      6  * else falls back to a `.byte 0xNN` rendering so kit objdump never
      7  * crashes on unknown bytes. */
      8 
      9 #include "arch/x64/disasm.h"
     10 
     11 #include <string.h>
     12 
     13 #include "arch/x64/isa.h"
     14 #include "core/bytes.h"
     15 #include "core/heap.h"
     16 #include "core/strbuf.h"
     17 
     18 #define X64_DASM_MNEM_CAP 16u
     19 #define X64_DASM_OPS_CAP 128u
     20 #define X64_DASM_ANN_CAP 96u
     21 
     22 typedef struct X64Disasm {
     23   ArchDisasm base;
     24   Compiler* c;
     25   Heap* heap;
     26   char mnem_buf[X64_DASM_MNEM_CAP];
     27   char ops_buf[X64_DASM_OPS_CAP];
     28   char ann_buf[X64_DASM_ANN_CAP];
     29   StrBuf mnem;
     30   StrBuf ops;
     31   StrBuf ann;
     32 } X64Disasm;
     33 
     34 /* Render the mnemonic with any per-format suffix (size letter, condition
     35  * code, etc.) baked in. */
     36 static void emit_mnemonic(StrBuf* sb, const X64InsnDesc* d,
     37                           const X64DecodeCtx* ctx, const u8* bytes) {
     38   if (ctx->has_lock) strbuf_puts(sb, "lock ");
     39   strbuf_put_slice(sb, d->mnemonic);
     40   /* Jcc / SETcc / CMOVcc: the table stores the bare prefix ("j", "set",
     41    * "cmov") and we append the condition suffix from the opcode byte. */
     42   if (d->fmt == X64_FMT_JCC_REL32 || d->fmt == X64_FMT_SETCC_RM ||
     43       d->fmt == X64_FMT_CMOVCC_RR) {
     44     u8 cc = bytes[ctx->opc_off + d->opc_len - 1u] & 0xFu;
     45     strbuf_puts(sb, x64_cc_name(cc));
     46     if (d->fmt == X64_FMT_SETCC_RM || d->fmt == X64_FMT_CMOVCC_RR) {
     47       /* SETcc operates on r/m8; CMOVcc width comes from REX.W. */
     48       char s = x64_size_suffix_for(d, ctx);
     49       if (s) strbuf_putc(sb, s);
     50     }
     51     return;
     52   }
     53   /* Generic width suffix. */
     54   char s = x64_size_suffix_for(d, ctx);
     55   if (s) strbuf_putc(sb, s);
     56 }
     57 
     58 static void render_byte_fallback(X64Disasm* d, u8 byte) {
     59   strbuf_reset(&d->mnem);
     60   strbuf_puts(&d->mnem, ".byte");
     61   strbuf_reset(&d->ops);
     62   strbuf_put_hex_u64(&d->ops, byte);
     63 }
     64 
     65 static u32 x64_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr,
     66                       KitInsn* out) {
     67   X64Disasm* d = (X64Disasm*)base;
     68   X64DecodeCtx ctx;
     69   const X64InsnDesc* desc;
     70   u32 total;
     71 
     72   if (!len) return 0;
     73 
     74   strbuf_reset(&d->mnem);
     75   strbuf_reset(&d->ops);
     76   strbuf_reset(&d->ann);
     77 
     78   (void)x64_decode_prefixes(bytes, (u32)len, &ctx);
     79   if (ctx.opc_off >= (u32)len) {
     80     render_byte_fallback(d, bytes[0]);
     81     out->vaddr = vaddr;
     82     out->bytes = bytes;
     83     out->nbytes = 1;
     84     out->mnemonic = strbuf_slice(&d->mnem);
     85     out->operands = strbuf_slice(&d->ops);
     86     out->annotation = strbuf_slice(&d->ann);
     87     return 1;
     88   }
     89 
     90   desc = x64_disasm_find(bytes, (u32)len, &ctx);
     91   if (!desc) {
     92     render_byte_fallback(d, bytes[0]);
     93     out->vaddr = vaddr;
     94     out->bytes = bytes;
     95     out->nbytes = 1;
     96     out->mnemonic = strbuf_slice(&d->mnem);
     97     out->operands = strbuf_slice(&d->ops);
     98     out->annotation = strbuf_slice(&d->ann);
     99     return 1;
    100   }
    101 
    102   emit_mnemonic(&d->mnem, desc, &ctx, bytes);
    103   total = x64_print_operands(&d->ops, desc, bytes, (u32)len, &ctx, vaddr);
    104   if (total == 0) {
    105     /* Truncated encoding — fall back to .byte so callers can step past. */
    106     render_byte_fallback(d, bytes[0]);
    107     out->vaddr = vaddr;
    108     out->bytes = bytes;
    109     out->nbytes = 1;
    110     out->mnemonic = strbuf_slice(&d->mnem);
    111     out->operands = strbuf_slice(&d->ops);
    112     out->annotation = strbuf_slice(&d->ann);
    113     return 1;
    114   }
    115 
    116   out->vaddr = vaddr;
    117   out->bytes = bytes;
    118   out->nbytes = total;
    119   out->mnemonic = strbuf_slice(&d->mnem);
    120   out->operands = strbuf_slice(&d->ops);
    121   out->annotation = strbuf_slice(&d->ann);
    122   return total;
    123 }
    124 
    125 static void x64_destroy(ArchDisasm* base) {
    126   X64Disasm* d = (X64Disasm*)base;
    127   d->heap->free(d->heap, d, sizeof *d);
    128 }
    129 
    130 ArchDisasm* x64_disasm_new(Compiler* c) {
    131   Heap* h = (Heap*)c->ctx->heap;
    132   X64Disasm* d = (X64Disasm*)h->alloc(h, sizeof *d, _Alignof(X64Disasm));
    133   if (!d) return NULL;
    134   memset(d, 0, sizeof *d);
    135   d->c = c;
    136   d->heap = h;
    137   d->base.decode = x64_decode;
    138   d->base.destroy = x64_destroy;
    139   strbuf_init(&d->mnem, d->mnem_buf, sizeof d->mnem_buf);
    140   strbuf_init(&d->ops, d->ops_buf, sizeof d->ops_buf);
    141   strbuf_init(&d->ann, d->ann_buf, sizeof d->ann_buf);
    142   return &d->base;
    143 }