kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

disasm.c (4780B)


      1 /* AArch64 disassembler implementation.
      2  *
      3  * Decodes one 4-byte instruction word per call into a KitInsn whose
      4  * string fields point into iterator-owned StrBufs. The decoder shares
      5  * the aa64_isa.{h,c} descriptor table with the encoder: aa64_disasm_find
      6  * matches the word; aa64_print_operands renders operand text via the
      7  * format's unpack + per-format pretty-printer. Mnemonic rewriting (the
      8  * one bit the printer can't own, because b.cond rolls cond into the
      9  * "operand" text) happens here. */
     10 
     11 #include "arch/aa64/disasm.h"
     12 
     13 #include <string.h>
     14 
     15 #include "arch/aa64/isa.h"
     16 #include "core/heap.h"
     17 #include "core/strbuf.h"
     18 
     19 /* Enough for any aarch64 mnemonic-with-suffix ("b.cond" → "b.le", etc.). */
     20 #define AA64_DASM_MNEM_CAP 16u
     21 /* Operand text. The widest cases (LDP X, X, [SP, #-imm]!) fit easily. */
     22 #define AA64_DASM_OPS_CAP 96u
     23 /* Annotation overlay (symbol + addend). */
     24 #define AA64_DASM_ANN_CAP 96u
     25 
     26 typedef struct AA64Disasm {
     27   ArchDisasm base;
     28   Compiler* c;
     29   Heap* heap;
     30   char mnem_buf[AA64_DASM_MNEM_CAP];
     31   char ops_buf[AA64_DASM_OPS_CAP];
     32   char ann_buf[AA64_DASM_ANN_CAP];
     33   StrBuf mnem;
     34   StrBuf ops;
     35   StrBuf ann;
     36 } AA64Disasm;
     37 
     38 static const char* aa64_cond_names[16] = {
     39     "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
     40     "hi", "ls", "ge", "lt", "gt", "le", "al", "nv",
     41 };
     42 
     43 static void aa64_write_mnemonic(AA64Disasm* d, const AA64InsnDesc* desc,
     44                                 u32 word) {
     45   strbuf_reset(&d->mnem);
     46   if (desc->fmt == AA64_FMT_BR_COND) {
     47     /* Synthesize "b.<cond>" so the operands buffer can hold just the
     48      * target. Matches GNU as / objdump conventions. */
     49     u32 cond = word & 0xfu;
     50     strbuf_puts(&d->mnem, "b.");
     51     strbuf_puts(&d->mnem, aa64_cond_names[cond]);
     52     return;
     53   }
     54   if (desc->fmt == AA64_FMT_BITFIELD) {
     55     /* SBFM/UBFM disassemble to their preferred aliases when the
     56      * fields match; aa64_print_operands renders the matching operands. */
     57     u32 shift, lsb, width;
     58     const char* alias = aa64_bitfield_shift_alias(word, &shift);
     59     if (!alias) alias = aa64_bitfield_extend_alias(word);
     60     if (!alias) alias = aa64_bitfield_extract_alias(word, &lsb, &width);
     61     (void)lsb;
     62     (void)width;
     63     if (alias) {
     64       strbuf_puts(&d->mnem, alias);
     65       return;
     66     }
     67   }
     68   strbuf_put_slice(&d->mnem, desc->mnemonic);
     69 }
     70 
     71 static void aa64_write_operands(AA64Disasm* d, const AA64InsnDesc* desc,
     72                                 u32 word, u64 vaddr) {
     73   strbuf_reset(&d->ops);
     74   if (desc->fmt == AA64_FMT_BR_COND) {
     75     /* aa64_print_operands prints "<cond> <target>"; we already lifted
     76      * the cond into the mnemonic, so skip the dispatcher and inline
     77      * just the target. */
     78     AA64BrCond f = aa64_brcond_unpack(word);
     79     i64 ofs = (i64)((u64)f.imm19 & 0x7ffffu);
     80     /* sign-extend 19 bits */
     81     if (ofs & 0x40000) ofs |= ~(i64)0x7ffff;
     82     ofs *= 4;
     83     if (vaddr) {
     84       strbuf_put_hex_u64(&d->ops, vaddr + (u64)ofs);
     85     } else {
     86       strbuf_puts(&d->ops, "#");
     87       strbuf_put_i64(&d->ops, ofs);
     88     }
     89     return;
     90   }
     91   aa64_print_operands(&d->ops, desc, word, vaddr);
     92 }
     93 
     94 static u32 aa64_read_u32_le(const u8* b) {
     95   return (u32)b[0] | ((u32)b[1] << 8) | ((u32)b[2] << 16) | ((u32)b[3] << 24);
     96 }
     97 
     98 static void aa64_write_unknown(AA64Disasm* d, u32 word) {
     99   strbuf_reset(&d->mnem);
    100   strbuf_puts(&d->mnem, ".inst");
    101   strbuf_reset(&d->ops);
    102   strbuf_put_hex_u64(&d->ops, (u64)word);
    103 }
    104 
    105 static u32 aa64_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr,
    106                        KitInsn* out) {
    107   AA64Disasm* d = (AA64Disasm*)base;
    108   if (len < 4u) return 0;
    109   u32 word = aa64_read_u32_le(bytes);
    110   const AA64InsnDesc* desc = aa64_disasm_find(word);
    111   if (desc) {
    112     aa64_write_mnemonic(d, desc, word);
    113     aa64_write_operands(d, desc, word, vaddr);
    114   } else {
    115     aa64_write_unknown(d, word);
    116   }
    117   /* Annotation overlay is owned by the public iterator (kit_disasm_iter_*).
    118    * The arch-level decoder leaves it empty. */
    119   strbuf_reset(&d->ann);
    120   out->vaddr = vaddr;
    121   out->bytes = bytes;
    122   out->nbytes = 4;
    123   out->mnemonic = strbuf_slice(&d->mnem);
    124   out->operands = strbuf_slice(&d->ops);
    125   out->annotation = strbuf_slice(&d->ann);
    126   return 4;
    127 }
    128 
    129 static void aa64_destroy(ArchDisasm* base) {
    130   AA64Disasm* d = (AA64Disasm*)base;
    131   d->heap->free(d->heap, d, sizeof(*d));
    132 }
    133 
    134 ArchDisasm* aa64_disasm_new(Compiler* c) {
    135   Heap* h = (Heap*)c->ctx->heap;
    136   AA64Disasm* d = (AA64Disasm*)h->alloc(h, sizeof(*d), _Alignof(AA64Disasm));
    137   if (!d) return NULL;
    138   memset(d, 0, sizeof(*d));
    139   d->c = c;
    140   d->heap = h;
    141   d->base.decode = aa64_decode;
    142   d->base.destroy = aa64_destroy;
    143   strbuf_init(&d->mnem, d->mnem_buf, sizeof d->mnem_buf);
    144   strbuf_init(&d->ops, d->ops_buf, sizeof d->ops_buf);
    145   strbuf_init(&d->ann, d->ann_buf, sizeof d->ann_buf);
    146   return &d->base;
    147 }