kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

disasm.c (14440B)


      1 /* RV64 disassembler — descriptor-table driven.
      2  *
      3  * Decodes a 4-byte word by linear-scan over `rv64_insn_table` and
      4  * dispatches operand printing on the matched format. Compressed (RV64C)
      5  * instructions are 16-bit: a halfword whose low 2 bits are not 0b11
      6  * goes through the C-decode path; the iterator advances by 2 bytes.
      7  *
      8  * Unknown words/halfwords fall back to ".word"/".hword" placeholders. */
      9 
     10 #include "arch/riscv/disasm.h"
     11 
     12 #include <string.h>
     13 
     14 #include "arch/riscv/isa.h"
     15 #include "arch/riscv/variant.h"
     16 #include "core/heap.h"
     17 #include "core/strbuf.h"
     18 
     19 #define RV64_DASM_MNEM_CAP 16u
     20 #define RV64_DASM_OPS_CAP 96u
     21 #define RV64_DASM_ANN_CAP 64u
     22 #define RV64_ENCODING_UNKNOWN 0xffffffffu
     23 
     24 typedef struct Rv64InsnFormatter {
     25   ArchInsnFormatter base;
     26   Compiler* c;
     27   Heap* heap;
     28   char mnem_buf[RV64_DASM_MNEM_CAP];
     29   char ops_buf[RV64_DASM_OPS_CAP];
     30   char ann_buf[RV64_DASM_ANN_CAP];
     31   StrBuf mnem;
     32   StrBuf ops;
     33   StrBuf ann;
     34 } Rv64InsnFormatter;
     35 
     36 typedef struct Rv64Disasm {
     37   ArchDisasm base;
     38   Rv64InsnFormatter fmt;
     39 } Rv64Disasm;
     40 
     41 static KitStatus rv64_format_insn(ArchInsnFormatter*, const KitDecodedInsn*,
     42                                   KitInsn*);
     43 static void rv64_formatter_destroy(ArchInsnFormatter*);
     44 
     45 /* RV_AV_* mask of the arch being disassembled. Derived from the Compiler's
     46  * target; defaults (and any non-RISC-V kind) map to rv64 so the historical
     47  * decode path is byte-identical. */
     48 static u8 rv_av_for_compiler(Compiler* c) {
     49   const RiscvVariant* v = riscv_variant_for_kind(c->target.arch);
     50   return v->xlen == 32u ? (u8)RV_AV_RV32 : (u8)RV_AV_RV64;
     51 }
     52 
     53 static u32 rv_read_u32_le(const u8* b) {
     54   return (u32)b[0] | ((u32)b[1] << 8) | ((u32)b[2] << 16) | ((u32)b[3] << 24);
     55 }
     56 
     57 static u32 rv_read_u16_le(const u8* b) { return (u32)b[0] | ((u32)b[1] << 8); }
     58 
     59 static void rv_fmt_emit_fallback32(Rv64InsnFormatter* f, u32 word) {
     60   strbuf_reset(&f->mnem);
     61   strbuf_puts(&f->mnem, ".word");
     62   strbuf_reset(&f->ops);
     63   strbuf_put_hex_u64(&f->ops, (u64)word);
     64 }
     65 
     66 static void rv_fmt_emit_fallback16(Rv64InsnFormatter* f, u32 hw) {
     67   strbuf_reset(&f->mnem);
     68   strbuf_puts(&f->mnem, ".hword");
     69   strbuf_reset(&f->ops);
     70   strbuf_put_hex_u64(&f->ops, (u64)hw);
     71 }
     72 
     73 static u32 rv64_desc_encoding_id(const Rv64InsnDesc* desc) {
     74   u32 i;
     75   if (!desc) return RV64_ENCODING_UNKNOWN;
     76   for (i = 0; i < rv64_insn_table_n; ++i) {
     77     if (desc == &rv64_insn_table[i]) return i;
     78   }
     79   return RV64_ENCODING_UNKNOWN;
     80 }
     81 
     82 static u32 rv64_semantic_opcode(u32 word, u32 nbytes) {
     83   u32 op, funct3, funct7;
     84   if (nbytes != 4u) return RV64_DEC_UNKNOWN;
     85   if (word == rv_ecall()) return RV64_DEC_ECALL;
     86   if (word == rv_ebreak()) return RV64_DEC_EBREAK;
     87   op = word & 0x7fu;
     88   funct3 = (word >> 12) & 0x7u;
     89   funct7 = (word >> 25) & 0x7fu;
     90   if (op == RV_OP_IMM && funct3 == 0u) return RV64_DEC_ADDI;
     91   if (op == RV_OP && funct3 == 0u && funct7 == 0u) return RV64_DEC_ADD;
     92   if (op == RV_AUIPC) return RV64_DEC_AUIPC;
     93   if (op == RV_LOAD && funct3 == 3u) return RV64_DEC_LD;
     94   if (op == RV_STORE && funct3 == 3u) return RV64_DEC_SD;
     95   if (op == RV_JALR && funct3 == 0u) return RV64_DEC_JALR;
     96   return RV64_DEC_UNKNOWN;
     97 }
     98 
     99 static void rv_decop_none(KitDecodedOperand* o) {
    100   memset(o, 0, sizeof(*o));
    101   o->kind = KIT_DECOP_NONE;
    102   o->index_reg = REG_NONE;
    103 }
    104 
    105 static void rv_decop_reg(KitDecodedOperand* o, u32 reg, u8 width_bits) {
    106   rv_decop_none(o);
    107   o->kind = KIT_DECOP_REG;
    108   o->width_bits = width_bits;
    109   o->reg = reg;
    110 }
    111 
    112 static void rv_decop_imm(KitDecodedOperand* o, i64 imm) {
    113   rv_decop_none(o);
    114   o->kind = KIT_DECOP_IMM;
    115   o->imm = imm;
    116 }
    117 
    118 static void rv_decop_sysreg(KitDecodedOperand* o, u32 reg) {
    119   rv_decop_none(o);
    120   o->kind = KIT_DECOP_SYSREG;
    121   o->reg = reg;
    122 }
    123 
    124 static void rv_decop_mem(KitDecodedOperand* o, u32 base, i64 imm,
    125                          u8 width_bits) {
    126   rv_decop_none(o);
    127   o->kind = KIT_DECOP_MEM;
    128   o->width_bits = width_bits;
    129   o->reg = base;
    130   o->imm = imm;
    131 }
    132 
    133 static void rv_decop_pcrel(KitDecodedOperand* o, u64 pc, i64 disp) {
    134   rv_decop_none(o);
    135   o->kind = KIT_DECOP_PCREL;
    136   o->imm = (i64)(pc + (u64)disp);
    137 }
    138 
    139 static u8 rv_load_width_bits(u32 funct3) {
    140   switch (funct3 & 7u) {
    141     case 0:
    142     case 4:
    143       return 8;
    144     case 1:
    145     case 5:
    146       return 16;
    147     case 2:
    148     case 6:
    149       return 32;
    150     case 3:
    151       return 64;
    152     default:
    153       return 0;
    154   }
    155 }
    156 
    157 static u16 rv64_decode_flags(const Rv64InsnDesc* desc, u32 word) {
    158   u16 flags = 0;
    159   Rv64Format fmt;
    160   if (!desc) return 0;
    161   fmt = (Rv64Format)desc->fmt;
    162   switch (fmt) {
    163     case RV64_FMT_B:
    164     case RV64_FMT_CB:
    165     case RV64_FMT_CJ:
    166       flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH;
    167       break;
    168     case RV64_FMT_J:
    169       flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH;
    170       if (((word >> 7) & 0x1fu) == RV_RA) flags |= KIT_DECODE_CALL;
    171       break;
    172     case RV64_FMT_JALR: {
    173       u32 rd = (word >> 7) & 0x1fu;
    174       u32 rs1 = (word >> 15) & 0x1fu;
    175       flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH;
    176       if (rd == RV_RA) flags |= KIT_DECODE_CALL;
    177       if (rd == RV_ZERO && rs1 == RV_RA) flags |= KIT_DECODE_RET;
    178       break;
    179     }
    180     case RV64_FMT_CR:
    181       if (slice_eq_cstr(desc->mnemonic, "c.jr") ||
    182           slice_eq_cstr(desc->mnemonic, "c.jalr")) {
    183         flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH;
    184         if (slice_eq_cstr(desc->mnemonic, "c.jalr")) flags |= KIT_DECODE_CALL;
    185       }
    186       break;
    187     case RV64_FMT_SYSTEM:
    188       if (word == rv_ecall() || word == rv_ebreak())
    189         flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_TRAP;
    190       break;
    191     case RV64_FMT_C_NONE:
    192       if ((word & 0xffffu) == 0x9002u)
    193         flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_TRAP;
    194       break;
    195     case RV64_FMT_LOAD:
    196     case RV64_FMT_STORE:
    197     case RV64_FMT_FP_LOAD:
    198     case RV64_FMT_FP_STORE:
    199     case RV64_FMT_AMO:
    200     case RV64_FMT_LR:
    201     case RV64_FMT_CL:
    202     case RV64_FMT_CS:
    203     case RV64_FMT_CSS:
    204       flags |= KIT_DECODE_MEMORY;
    205       break;
    206     default:
    207       break;
    208   }
    209   return flags;
    210 }
    211 
    212 static void rv64_decode_operands(const Rv64InsnDesc* desc, u32 word, u64 pc,
    213                                  const RiscvVariant* variant,
    214                                  KitDecodedInsn* out) {
    215   Rv64Format fmt;
    216   if (!desc) return;
    217   fmt = (Rv64Format)desc->fmt;
    218   switch (fmt) {
    219     case RV64_FMT_R:
    220     case RV64_FMT_FP_R:
    221     case RV64_FMT_FP_RM: {
    222       Rv64R r = rv64_r_unpack(word);
    223       out->noperands = 3;
    224       rv_decop_reg(&out->operands[0], r.rd, 64);
    225       rv_decop_reg(&out->operands[1], r.rs1, 64);
    226       rv_decop_reg(&out->operands[2], r.rs2, 64);
    227       break;
    228     }
    229     case RV64_FMT_I: {
    230       Rv64I i = rv64_i_unpack(word);
    231       out->noperands = 3;
    232       rv_decop_reg(&out->operands[0], i.rd, 64);
    233       rv_decop_reg(&out->operands[1], i.rs1, 64);
    234       rv_decop_imm(&out->operands[2], rv64_sext(i.imm12, 12));
    235       break;
    236     }
    237     case RV64_FMT_I_SHIFT:
    238     case RV64_FMT_I_SHIFTW: {
    239       Rv64I i = rv64_i_unpack(word);
    240       /* SLLIW/SRLIW/SRAIW (I_SHIFTW) are always a 5-bit shamt. The plain
    241        * SLLI/SRLI/SRAI shamt is 6-bit on rv64 but 5-bit on rv32 (bit 25 is
    242        * funct7 there), so the mask follows variant->shamt_bits. */
    243       u32 shamt_mask =
    244           (fmt == RV64_FMT_I_SHIFTW || variant->shamt_bits == 5u) ? 0x1fu
    245                                                                   : 0x3fu;
    246       out->noperands = 3;
    247       rv_decop_reg(&out->operands[0], i.rd, 64);
    248       rv_decop_reg(&out->operands[1], i.rs1, 64);
    249       rv_decop_imm(&out->operands[2], (i64)(i.imm12 & shamt_mask));
    250       break;
    251     }
    252     case RV64_FMT_LOAD:
    253     case RV64_FMT_FP_LOAD: {
    254       Rv64I i = rv64_i_unpack(word);
    255       out->noperands = 2;
    256       rv_decop_reg(&out->operands[0], i.rd, 64);
    257       rv_decop_mem(&out->operands[1], i.rs1, rv64_sext(i.imm12, 12),
    258                    rv_load_width_bits(i.funct3));
    259       break;
    260     }
    261     case RV64_FMT_S:
    262     case RV64_FMT_STORE:
    263     case RV64_FMT_FP_STORE: {
    264       Rv64S s = rv64_s_unpack(word);
    265       out->noperands = 2;
    266       rv_decop_reg(&out->operands[0], s.rs2, 64);
    267       rv_decop_mem(&out->operands[1], s.rs1, rv64_sext(s.imm12, 12),
    268                    rv_load_width_bits(s.funct3));
    269       break;
    270     }
    271     case RV64_FMT_B: {
    272       Rv64B b = rv64_b_unpack(word);
    273       out->noperands = 3;
    274       rv_decop_reg(&out->operands[0], b.rs1, 64);
    275       rv_decop_reg(&out->operands[1], b.rs2, 64);
    276       rv_decop_pcrel(&out->operands[2], pc, rv64_sext(b.imm13, 13));
    277       break;
    278     }
    279     case RV64_FMT_U: {
    280       Rv64U u = rv64_u_unpack(word);
    281       out->noperands = 2;
    282       rv_decop_reg(&out->operands[0], u.rd, 64);
    283       rv_decop_imm(&out->operands[1], (i64)(i32)u.imm32_hi20);
    284       break;
    285     }
    286     case RV64_FMT_J: {
    287       Rv64J j = rv64_j_unpack(word);
    288       out->noperands = 2;
    289       rv_decop_reg(&out->operands[0], j.rd, 64);
    290       rv_decop_pcrel(&out->operands[1], pc, rv64_sext(j.imm21, 21));
    291       break;
    292     }
    293     case RV64_FMT_JALR: {
    294       Rv64I i = rv64_i_unpack(word);
    295       out->noperands = 2;
    296       rv_decop_reg(&out->operands[0], i.rd, 64);
    297       rv_decop_mem(&out->operands[1], i.rs1, rv64_sext(i.imm12, 12), 64);
    298       break;
    299     }
    300     case RV64_FMT_CSR: {
    301       Rv64I i = rv64_i_unpack(word);
    302       out->noperands = 3;
    303       rv_decop_reg(&out->operands[0], i.rd, 64);
    304       rv_decop_sysreg(&out->operands[1], i.imm12);
    305       rv_decop_reg(&out->operands[2], i.rs1, 64);
    306       break;
    307     }
    308     case RV64_FMT_CSRI: {
    309       Rv64I i = rv64_i_unpack(word);
    310       out->noperands = 3;
    311       rv_decop_reg(&out->operands[0], i.rd, 64);
    312       rv_decop_sysreg(&out->operands[1], i.imm12);
    313       rv_decop_imm(&out->operands[2], (i64)i.rs1);
    314       break;
    315     }
    316     default:
    317       break;
    318   }
    319 }
    320 
    321 static KitStatus rv64_decode_one(Compiler* c, const u8* bytes, size_t len,
    322                                  u64 pc, KitDecodedInsn* out) {
    323   const Rv64InsnDesc* desc;
    324   const RiscvVariant* variant;
    325   u8 av;
    326   u32 first_hw;
    327   u32 word;
    328   u32 encoding_id;
    329   if (!bytes || !out) return KIT_INVALID;
    330   if (len < 2u) return KIT_MALFORMED;
    331   variant = riscv_variant_for_kind(c->target.arch);
    332   av = rv_av_for_compiler(c);
    333   memset(out, 0, sizeof(*out));
    334   for (u32 i = 0; i < KIT_DECODE_MAX_OPERANDS; ++i)
    335     rv_decop_none(&out->operands[i]);
    336 
    337   first_hw = rv_read_u16_le(bytes);
    338   if ((first_hw & 3u) != 3u) {
    339     word = first_hw;
    340     desc = rv64_disasm_find_c(first_hw, av);
    341     out->nbytes = 2;
    342   } else {
    343     if (len < 4u) return KIT_MALFORMED;
    344     word = rv_read_u32_le(bytes);
    345     desc = rv64_disasm_find(word, av);
    346     out->nbytes = 4;
    347   }
    348 
    349   encoding_id = rv64_desc_encoding_id(desc);
    350   out->pc = pc;
    351   out->bytes = bytes;
    352   out->encoding_id = encoding_id;
    353   out->opcode = rv64_semantic_opcode(word, out->nbytes);
    354   out->flags = rv64_decode_flags(desc, word);
    355   out->arch[0] = word;
    356   out->arch[1] = desc ? desc->fmt : 0xffu;
    357   rv64_decode_operands(desc, word, pc, variant, out);
    358   return KIT_OK;
    359 }
    360 
    361 static KitStatus rv64_decode_block(Compiler* c, const u8* bytes, size_t len,
    362                                    u64 pc, KitDecodedInsn* out, u32 cap,
    363                                    u32* n_out) {
    364   u32 n = 0;
    365   if (n_out) *n_out = 0;
    366   if (!bytes || !out || !n_out) return KIT_INVALID;
    367   while (n < cap && len > 0) {
    368     KitStatus st = rv64_decode_one(c, bytes, len, pc, &out[n]);
    369     if (st != KIT_OK) return n ? KIT_OK : st;
    370     bytes += out[n].nbytes;
    371     len -= out[n].nbytes;
    372     pc += out[n].nbytes;
    373     ++n;
    374     if (out[n - 1u].flags & KIT_DECODE_TERMINATOR) break;
    375   }
    376   *n_out = n;
    377   return KIT_OK;
    378 }
    379 
    380 static void rv64_formatter_init(Rv64InsnFormatter* f, Compiler* c, Heap* h) {
    381   memset(f, 0, sizeof(*f));
    382   f->c = c;
    383   f->heap = h;
    384   f->base.format = rv64_format_insn;
    385   f->base.destroy = rv64_formatter_destroy;
    386   strbuf_init(&f->mnem, f->mnem_buf, sizeof f->mnem_buf);
    387   strbuf_init(&f->ops, f->ops_buf, sizeof f->ops_buf);
    388   strbuf_init(&f->ann, f->ann_buf, sizeof f->ann_buf);
    389 }
    390 
    391 static KitStatus rv64_format_insn(ArchInsnFormatter* base,
    392                                   const KitDecodedInsn* insn, KitInsn* out) {
    393   Rv64InsnFormatter* f = (Rv64InsnFormatter*)base;
    394   const Rv64InsnDesc* desc;
    395   u32 word;
    396   if (!f || !insn || !out) return KIT_INVALID;
    397   word = (u32)insn->arch[0];
    398   {
    399     u8 av = rv_av_for_compiler(f->c);
    400     desc = insn->nbytes == 2u ? rv64_disasm_find_c(word, av)
    401                               : rv64_disasm_find(word, av);
    402   }
    403   if (desc) {
    404     strbuf_reset(&f->mnem);
    405     strbuf_put_slice(&f->mnem, desc->mnemonic);
    406     strbuf_reset(&f->ops);
    407     rv64_print_operands(&f->ops, desc, word, insn->pc);
    408   } else if (insn->nbytes == 2u) {
    409     rv_fmt_emit_fallback16(f, word);
    410   } else {
    411     rv_fmt_emit_fallback32(f, word);
    412   }
    413 
    414   strbuf_reset(&f->ann);
    415   out->vaddr = insn->pc;
    416   out->bytes = insn->bytes;
    417   out->nbytes = insn->nbytes;
    418   out->mnemonic = strbuf_slice(&f->mnem);
    419   out->operands = strbuf_slice(&f->ops);
    420   out->annotation = strbuf_slice(&f->ann);
    421   return KIT_OK;
    422 }
    423 
    424 static void rv64_formatter_destroy(ArchInsnFormatter* base) {
    425   Rv64InsnFormatter* f = (Rv64InsnFormatter*)base;
    426   if (!f) return;
    427   f->heap->free(f->heap, f, sizeof(*f));
    428 }
    429 
    430 static ArchInsnFormatter* rv64_formatter_new(Compiler* c) {
    431   Heap* h = (Heap*)c->ctx->heap;
    432   Rv64InsnFormatter* f =
    433       (Rv64InsnFormatter*)h->alloc(h, sizeof(*f), _Alignof(Rv64InsnFormatter));
    434   if (!f) return NULL;
    435   rv64_formatter_init(f, c, h);
    436   return &f->base;
    437 }
    438 
    439 static u32 rv_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr,
    440                      KitInsn* out) {
    441   Rv64Disasm* d = (Rv64Disasm*)base;
    442   KitDecodedInsn insn;
    443   KitStatus st = rv64_decode_one(d->fmt.c, bytes, len, vaddr, &insn);
    444   if (st != KIT_OK) return 0;
    445   st = rv64_format_insn(&d->fmt.base, &insn, out);
    446   if (st != KIT_OK) return 0;
    447   return insn.nbytes;
    448 }
    449 
    450 static void rv64_destroy(ArchDisasm* base) {
    451   Rv64Disasm* d = (Rv64Disasm*)base;
    452   d->fmt.heap->free(d->fmt.heap, d, sizeof(*d));
    453 }
    454 
    455 ArchDisasm* rv64_disasm_new(Compiler* c) {
    456   Heap* h = (Heap*)c->ctx->heap;
    457   Rv64Disasm* d = (Rv64Disasm*)h->alloc(h, sizeof(*d), _Alignof(Rv64Disasm));
    458   if (!d) return NULL;
    459   memset(d, 0, sizeof(*d));
    460   d->base.decode = rv_decode;
    461   d->base.destroy = rv64_destroy;
    462   rv64_formatter_init(&d->fmt, c, h);
    463   return &d->base;
    464 }
    465 
    466 const ArchDecodeOps rv64_decode_ops = {
    467     .min_insn_len = 2,
    468     .max_insn_len = 4,
    469     .decode_one = rv64_decode_one,
    470     .decode_block = rv64_decode_block,
    471     .formatter_new = rv64_formatter_new,
    472     .format = rv64_format_insn,
    473     .formatter_free = rv64_formatter_destroy,
    474 };