kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

disasm.c (13498B)


      1 /* RV64 disassembler — descriptor-table driven.
      2  *
      3  * Decodes a 4-byte word by linear-scan over `rv64_insn_table` and
      4  * dispatches operand printing on the matched format. Compressed (RV64C)
      5  * instructions are 16-bit: a halfword whose low 2 bits are not 0b11
      6  * goes through the C-decode path; the iterator advances by 2 bytes.
      7  *
      8  * Unknown words/halfwords fall back to ".word"/".hword" placeholders. */
      9 
     10 #include "arch/rv64/disasm.h"
     11 
     12 #include <string.h>
     13 
     14 #include "arch/rv64/isa.h"
     15 #include "core/heap.h"
     16 #include "core/strbuf.h"
     17 
     18 #define RV64_DASM_MNEM_CAP 16u
     19 #define RV64_DASM_OPS_CAP 96u
     20 #define RV64_DASM_ANN_CAP 64u
     21 #define RV64_ENCODING_UNKNOWN 0xffffffffu
     22 
     23 typedef struct Rv64InsnFormatter {
     24   ArchInsnFormatter base;
     25   Compiler* c;
     26   Heap* heap;
     27   char mnem_buf[RV64_DASM_MNEM_CAP];
     28   char ops_buf[RV64_DASM_OPS_CAP];
     29   char ann_buf[RV64_DASM_ANN_CAP];
     30   StrBuf mnem;
     31   StrBuf ops;
     32   StrBuf ann;
     33 } Rv64InsnFormatter;
     34 
     35 typedef struct Rv64Disasm {
     36   ArchDisasm base;
     37   Rv64InsnFormatter fmt;
     38 } Rv64Disasm;
     39 
     40 static KitStatus rv64_format_insn(ArchInsnFormatter*, const KitDecodedInsn*,
     41                                   KitInsn*);
     42 static void rv64_formatter_destroy(ArchInsnFormatter*);
     43 
     44 static u32 rv_read_u32_le(const u8* b) {
     45   return (u32)b[0] | ((u32)b[1] << 8) | ((u32)b[2] << 16) | ((u32)b[3] << 24);
     46 }
     47 
     48 static u32 rv_read_u16_le(const u8* b) { return (u32)b[0] | ((u32)b[1] << 8); }
     49 
     50 static void rv_fmt_emit_fallback32(Rv64InsnFormatter* f, u32 word) {
     51   strbuf_reset(&f->mnem);
     52   strbuf_puts(&f->mnem, ".word");
     53   strbuf_reset(&f->ops);
     54   strbuf_put_hex_u64(&f->ops, (u64)word);
     55 }
     56 
     57 static void rv_fmt_emit_fallback16(Rv64InsnFormatter* f, u32 hw) {
     58   strbuf_reset(&f->mnem);
     59   strbuf_puts(&f->mnem, ".hword");
     60   strbuf_reset(&f->ops);
     61   strbuf_put_hex_u64(&f->ops, (u64)hw);
     62 }
     63 
     64 static u32 rv64_desc_encoding_id(const Rv64InsnDesc* desc) {
     65   u32 i;
     66   if (!desc) return RV64_ENCODING_UNKNOWN;
     67   for (i = 0; i < rv64_insn_table_n; ++i) {
     68     if (desc == &rv64_insn_table[i]) return i;
     69   }
     70   return RV64_ENCODING_UNKNOWN;
     71 }
     72 
     73 static u32 rv64_semantic_opcode(u32 word, u32 nbytes) {
     74   u32 op, funct3, funct7;
     75   if (nbytes != 4u) return RV64_DEC_UNKNOWN;
     76   if (word == rv_ecall()) return RV64_DEC_ECALL;
     77   if (word == rv_ebreak()) return RV64_DEC_EBREAK;
     78   op = word & 0x7fu;
     79   funct3 = (word >> 12) & 0x7u;
     80   funct7 = (word >> 25) & 0x7fu;
     81   if (op == RV_OP_IMM && funct3 == 0u) return RV64_DEC_ADDI;
     82   if (op == RV_OP && funct3 == 0u && funct7 == 0u) return RV64_DEC_ADD;
     83   if (op == RV_AUIPC) return RV64_DEC_AUIPC;
     84   if (op == RV_LOAD && funct3 == 3u) return RV64_DEC_LD;
     85   if (op == RV_STORE && funct3 == 3u) return RV64_DEC_SD;
     86   if (op == RV_JALR && funct3 == 0u) return RV64_DEC_JALR;
     87   return RV64_DEC_UNKNOWN;
     88 }
     89 
     90 static void rv_decop_none(KitDecodedOperand* o) {
     91   memset(o, 0, sizeof(*o));
     92   o->kind = KIT_DECOP_NONE;
     93   o->index_reg = REG_NONE;
     94 }
     95 
     96 static void rv_decop_reg(KitDecodedOperand* o, u32 reg, u8 width_bits) {
     97   rv_decop_none(o);
     98   o->kind = KIT_DECOP_REG;
     99   o->width_bits = width_bits;
    100   o->reg = reg;
    101 }
    102 
    103 static void rv_decop_imm(KitDecodedOperand* o, i64 imm) {
    104   rv_decop_none(o);
    105   o->kind = KIT_DECOP_IMM;
    106   o->imm = imm;
    107 }
    108 
    109 static void rv_decop_sysreg(KitDecodedOperand* o, u32 reg) {
    110   rv_decop_none(o);
    111   o->kind = KIT_DECOP_SYSREG;
    112   o->reg = reg;
    113 }
    114 
    115 static void rv_decop_mem(KitDecodedOperand* o, u32 base, i64 imm,
    116                          u8 width_bits) {
    117   rv_decop_none(o);
    118   o->kind = KIT_DECOP_MEM;
    119   o->width_bits = width_bits;
    120   o->reg = base;
    121   o->imm = imm;
    122 }
    123 
    124 static void rv_decop_pcrel(KitDecodedOperand* o, u64 pc, i64 disp) {
    125   rv_decop_none(o);
    126   o->kind = KIT_DECOP_PCREL;
    127   o->imm = (i64)(pc + (u64)disp);
    128 }
    129 
    130 static u8 rv_load_width_bits(u32 funct3) {
    131   switch (funct3 & 7u) {
    132     case 0:
    133     case 4:
    134       return 8;
    135     case 1:
    136     case 5:
    137       return 16;
    138     case 2:
    139     case 6:
    140       return 32;
    141     case 3:
    142       return 64;
    143     default:
    144       return 0;
    145   }
    146 }
    147 
    148 static u16 rv64_decode_flags(const Rv64InsnDesc* desc, u32 word) {
    149   u16 flags = 0;
    150   Rv64Format fmt;
    151   if (!desc) return 0;
    152   fmt = (Rv64Format)desc->fmt;
    153   switch (fmt) {
    154     case RV64_FMT_B:
    155     case RV64_FMT_CB:
    156     case RV64_FMT_CJ:
    157       flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH;
    158       break;
    159     case RV64_FMT_J:
    160       flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH;
    161       if (((word >> 7) & 0x1fu) == RV_RA) flags |= KIT_DECODE_CALL;
    162       break;
    163     case RV64_FMT_JALR: {
    164       u32 rd = (word >> 7) & 0x1fu;
    165       u32 rs1 = (word >> 15) & 0x1fu;
    166       flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH;
    167       if (rd == RV_RA) flags |= KIT_DECODE_CALL;
    168       if (rd == RV_ZERO && rs1 == RV_RA) flags |= KIT_DECODE_RET;
    169       break;
    170     }
    171     case RV64_FMT_CR:
    172       if (slice_eq_cstr(desc->mnemonic, "c.jr") ||
    173           slice_eq_cstr(desc->mnemonic, "c.jalr")) {
    174         flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH;
    175         if (slice_eq_cstr(desc->mnemonic, "c.jalr")) flags |= KIT_DECODE_CALL;
    176       }
    177       break;
    178     case RV64_FMT_SYSTEM:
    179       if (word == rv_ecall() || word == rv_ebreak())
    180         flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_TRAP;
    181       break;
    182     case RV64_FMT_C_NONE:
    183       if ((word & 0xffffu) == 0x9002u)
    184         flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_TRAP;
    185       break;
    186     case RV64_FMT_LOAD:
    187     case RV64_FMT_STORE:
    188     case RV64_FMT_FP_LOAD:
    189     case RV64_FMT_FP_STORE:
    190     case RV64_FMT_AMO:
    191     case RV64_FMT_LR:
    192     case RV64_FMT_CL:
    193     case RV64_FMT_CS:
    194     case RV64_FMT_CSS:
    195       flags |= KIT_DECODE_MEMORY;
    196       break;
    197     default:
    198       break;
    199   }
    200   return flags;
    201 }
    202 
    203 static void rv64_decode_operands(const Rv64InsnDesc* desc, u32 word, u64 pc,
    204                                  KitDecodedInsn* out) {
    205   Rv64Format fmt;
    206   if (!desc) return;
    207   fmt = (Rv64Format)desc->fmt;
    208   switch (fmt) {
    209     case RV64_FMT_R:
    210     case RV64_FMT_FP_R:
    211     case RV64_FMT_FP_RM: {
    212       Rv64R r = rv64_r_unpack(word);
    213       out->noperands = 3;
    214       rv_decop_reg(&out->operands[0], r.rd, 64);
    215       rv_decop_reg(&out->operands[1], r.rs1, 64);
    216       rv_decop_reg(&out->operands[2], r.rs2, 64);
    217       break;
    218     }
    219     case RV64_FMT_I: {
    220       Rv64I i = rv64_i_unpack(word);
    221       out->noperands = 3;
    222       rv_decop_reg(&out->operands[0], i.rd, 64);
    223       rv_decop_reg(&out->operands[1], i.rs1, 64);
    224       rv_decop_imm(&out->operands[2], rv64_sext(i.imm12, 12));
    225       break;
    226     }
    227     case RV64_FMT_I_SHIFT:
    228     case RV64_FMT_I_SHIFTW: {
    229       Rv64I i = rv64_i_unpack(word);
    230       out->noperands = 3;
    231       rv_decop_reg(&out->operands[0], i.rd, 64);
    232       rv_decop_reg(&out->operands[1], i.rs1, 64);
    233       rv_decop_imm(&out->operands[2], fmt == RV64_FMT_I_SHIFTW
    234                                           ? (i.imm12 & 0x1f)
    235                                           : (i.imm12 & 0x3f));
    236       break;
    237     }
    238     case RV64_FMT_LOAD:
    239     case RV64_FMT_FP_LOAD: {
    240       Rv64I i = rv64_i_unpack(word);
    241       out->noperands = 2;
    242       rv_decop_reg(&out->operands[0], i.rd, 64);
    243       rv_decop_mem(&out->operands[1], i.rs1, rv64_sext(i.imm12, 12),
    244                    rv_load_width_bits(i.funct3));
    245       break;
    246     }
    247     case RV64_FMT_S:
    248     case RV64_FMT_STORE:
    249     case RV64_FMT_FP_STORE: {
    250       Rv64S s = rv64_s_unpack(word);
    251       out->noperands = 2;
    252       rv_decop_reg(&out->operands[0], s.rs2, 64);
    253       rv_decop_mem(&out->operands[1], s.rs1, rv64_sext(s.imm12, 12),
    254                    rv_load_width_bits(s.funct3));
    255       break;
    256     }
    257     case RV64_FMT_B: {
    258       Rv64B b = rv64_b_unpack(word);
    259       out->noperands = 3;
    260       rv_decop_reg(&out->operands[0], b.rs1, 64);
    261       rv_decop_reg(&out->operands[1], b.rs2, 64);
    262       rv_decop_pcrel(&out->operands[2], pc, rv64_sext(b.imm13, 13));
    263       break;
    264     }
    265     case RV64_FMT_U: {
    266       Rv64U u = rv64_u_unpack(word);
    267       out->noperands = 2;
    268       rv_decop_reg(&out->operands[0], u.rd, 64);
    269       rv_decop_imm(&out->operands[1], (i64)(i32)u.imm32_hi20);
    270       break;
    271     }
    272     case RV64_FMT_J: {
    273       Rv64J j = rv64_j_unpack(word);
    274       out->noperands = 2;
    275       rv_decop_reg(&out->operands[0], j.rd, 64);
    276       rv_decop_pcrel(&out->operands[1], pc, rv64_sext(j.imm21, 21));
    277       break;
    278     }
    279     case RV64_FMT_JALR: {
    280       Rv64I i = rv64_i_unpack(word);
    281       out->noperands = 2;
    282       rv_decop_reg(&out->operands[0], i.rd, 64);
    283       rv_decop_mem(&out->operands[1], i.rs1, rv64_sext(i.imm12, 12), 64);
    284       break;
    285     }
    286     case RV64_FMT_CSR: {
    287       Rv64I i = rv64_i_unpack(word);
    288       out->noperands = 3;
    289       rv_decop_reg(&out->operands[0], i.rd, 64);
    290       rv_decop_sysreg(&out->operands[1], i.imm12);
    291       rv_decop_reg(&out->operands[2], i.rs1, 64);
    292       break;
    293     }
    294     case RV64_FMT_CSRI: {
    295       Rv64I i = rv64_i_unpack(word);
    296       out->noperands = 3;
    297       rv_decop_reg(&out->operands[0], i.rd, 64);
    298       rv_decop_sysreg(&out->operands[1], i.imm12);
    299       rv_decop_imm(&out->operands[2], (i64)i.rs1);
    300       break;
    301     }
    302     default:
    303       break;
    304   }
    305 }
    306 
    307 static KitStatus rv64_decode_one(Compiler* c, const u8* bytes, size_t len,
    308                                  u64 pc, KitDecodedInsn* out) {
    309   const Rv64InsnDesc* desc;
    310   u32 first_hw;
    311   u32 word;
    312   u32 encoding_id;
    313   (void)c;
    314   if (!bytes || !out) return KIT_INVALID;
    315   if (len < 2u) return KIT_MALFORMED;
    316   memset(out, 0, sizeof(*out));
    317   for (u32 i = 0; i < KIT_DECODE_MAX_OPERANDS; ++i)
    318     rv_decop_none(&out->operands[i]);
    319 
    320   first_hw = rv_read_u16_le(bytes);
    321   if ((first_hw & 3u) != 3u) {
    322     word = first_hw;
    323     desc = rv64_disasm_find_c(first_hw);
    324     out->nbytes = 2;
    325   } else {
    326     if (len < 4u) return KIT_MALFORMED;
    327     word = rv_read_u32_le(bytes);
    328     desc = rv64_disasm_find(word);
    329     out->nbytes = 4;
    330   }
    331 
    332   encoding_id = rv64_desc_encoding_id(desc);
    333   out->pc = pc;
    334   out->bytes = bytes;
    335   out->encoding_id = encoding_id;
    336   out->opcode = rv64_semantic_opcode(word, out->nbytes);
    337   out->flags = rv64_decode_flags(desc, word);
    338   out->arch[0] = word;
    339   out->arch[1] = desc ? desc->fmt : 0xffu;
    340   rv64_decode_operands(desc, word, pc, out);
    341   return KIT_OK;
    342 }
    343 
    344 static KitStatus rv64_decode_block(Compiler* c, const u8* bytes, size_t len,
    345                                    u64 pc, KitDecodedInsn* out, u32 cap,
    346                                    u32* n_out) {
    347   u32 n = 0;
    348   if (n_out) *n_out = 0;
    349   if (!bytes || !out || !n_out) return KIT_INVALID;
    350   while (n < cap && len > 0) {
    351     KitStatus st = rv64_decode_one(c, bytes, len, pc, &out[n]);
    352     if (st != KIT_OK) return n ? KIT_OK : st;
    353     bytes += out[n].nbytes;
    354     len -= out[n].nbytes;
    355     pc += out[n].nbytes;
    356     ++n;
    357     if (out[n - 1u].flags & KIT_DECODE_TERMINATOR) break;
    358   }
    359   *n_out = n;
    360   return KIT_OK;
    361 }
    362 
    363 static void rv64_formatter_init(Rv64InsnFormatter* f, Compiler* c, Heap* h) {
    364   memset(f, 0, sizeof(*f));
    365   f->c = c;
    366   f->heap = h;
    367   f->base.format = rv64_format_insn;
    368   f->base.destroy = rv64_formatter_destroy;
    369   strbuf_init(&f->mnem, f->mnem_buf, sizeof f->mnem_buf);
    370   strbuf_init(&f->ops, f->ops_buf, sizeof f->ops_buf);
    371   strbuf_init(&f->ann, f->ann_buf, sizeof f->ann_buf);
    372 }
    373 
    374 static KitStatus rv64_format_insn(ArchInsnFormatter* base,
    375                                   const KitDecodedInsn* insn, KitInsn* out) {
    376   Rv64InsnFormatter* f = (Rv64InsnFormatter*)base;
    377   const Rv64InsnDesc* desc;
    378   u32 word;
    379   if (!f || !insn || !out) return KIT_INVALID;
    380   word = (u32)insn->arch[0];
    381   desc = insn->nbytes == 2u ? rv64_disasm_find_c(word) : rv64_disasm_find(word);
    382   if (desc) {
    383     strbuf_reset(&f->mnem);
    384     strbuf_put_slice(&f->mnem, desc->mnemonic);
    385     strbuf_reset(&f->ops);
    386     rv64_print_operands(&f->ops, desc, word, insn->pc);
    387   } else if (insn->nbytes == 2u) {
    388     rv_fmt_emit_fallback16(f, word);
    389   } else {
    390     rv_fmt_emit_fallback32(f, word);
    391   }
    392 
    393   strbuf_reset(&f->ann);
    394   out->vaddr = insn->pc;
    395   out->bytes = insn->bytes;
    396   out->nbytes = insn->nbytes;
    397   out->mnemonic = strbuf_slice(&f->mnem);
    398   out->operands = strbuf_slice(&f->ops);
    399   out->annotation = strbuf_slice(&f->ann);
    400   return KIT_OK;
    401 }
    402 
    403 static void rv64_formatter_destroy(ArchInsnFormatter* base) {
    404   Rv64InsnFormatter* f = (Rv64InsnFormatter*)base;
    405   if (!f) return;
    406   f->heap->free(f->heap, f, sizeof(*f));
    407 }
    408 
    409 static ArchInsnFormatter* rv64_formatter_new(Compiler* c) {
    410   Heap* h = (Heap*)c->ctx->heap;
    411   Rv64InsnFormatter* f =
    412       (Rv64InsnFormatter*)h->alloc(h, sizeof(*f), _Alignof(Rv64InsnFormatter));
    413   if (!f) return NULL;
    414   rv64_formatter_init(f, c, h);
    415   return &f->base;
    416 }
    417 
    418 static u32 rv_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr,
    419                      KitInsn* out) {
    420   Rv64Disasm* d = (Rv64Disasm*)base;
    421   KitDecodedInsn insn;
    422   KitStatus st = rv64_decode_one(d->fmt.c, bytes, len, vaddr, &insn);
    423   if (st != KIT_OK) return 0;
    424   st = rv64_format_insn(&d->fmt.base, &insn, out);
    425   if (st != KIT_OK) return 0;
    426   return insn.nbytes;
    427 }
    428 
    429 static void rv64_destroy(ArchDisasm* base) {
    430   Rv64Disasm* d = (Rv64Disasm*)base;
    431   d->fmt.heap->free(d->fmt.heap, d, sizeof(*d));
    432 }
    433 
    434 ArchDisasm* rv64_disasm_new(Compiler* c) {
    435   Heap* h = (Heap*)c->ctx->heap;
    436   Rv64Disasm* d = (Rv64Disasm*)h->alloc(h, sizeof(*d), _Alignof(Rv64Disasm));
    437   if (!d) return NULL;
    438   memset(d, 0, sizeof(*d));
    439   d->base.decode = rv_decode;
    440   d->base.destroy = rv64_destroy;
    441   rv64_formatter_init(&d->fmt, c, h);
    442   return &d->base;
    443 }
    444 
    445 const ArchDecodeOps rv64_decode_ops = {
    446     .min_insn_len = 2,
    447     .max_insn_len = 4,
    448     .decode_one = rv64_decode_one,
    449     .decode_block = rv64_decode_block,
    450     .formatter_new = rv64_formatter_new,
    451     .format = rv64_format_insn,
    452     .formatter_free = rv64_formatter_destroy,
    453 };