disasm.c (14440B)
1 /* RV64 disassembler — descriptor-table driven. 2 * 3 * Decodes a 4-byte word by linear-scan over `rv64_insn_table` and 4 * dispatches operand printing on the matched format. Compressed (RV64C) 5 * instructions are 16-bit: a halfword whose low 2 bits are not 0b11 6 * goes through the C-decode path; the iterator advances by 2 bytes. 7 * 8 * Unknown words/halfwords fall back to ".word"/".hword" placeholders. */ 9 10 #include "arch/riscv/disasm.h" 11 12 #include <string.h> 13 14 #include "arch/riscv/isa.h" 15 #include "arch/riscv/variant.h" 16 #include "core/heap.h" 17 #include "core/strbuf.h" 18 19 #define RV64_DASM_MNEM_CAP 16u 20 #define RV64_DASM_OPS_CAP 96u 21 #define RV64_DASM_ANN_CAP 64u 22 #define RV64_ENCODING_UNKNOWN 0xffffffffu 23 24 typedef struct Rv64InsnFormatter { 25 ArchInsnFormatter base; 26 Compiler* c; 27 Heap* heap; 28 char mnem_buf[RV64_DASM_MNEM_CAP]; 29 char ops_buf[RV64_DASM_OPS_CAP]; 30 char ann_buf[RV64_DASM_ANN_CAP]; 31 StrBuf mnem; 32 StrBuf ops; 33 StrBuf ann; 34 } Rv64InsnFormatter; 35 36 typedef struct Rv64Disasm { 37 ArchDisasm base; 38 Rv64InsnFormatter fmt; 39 } Rv64Disasm; 40 41 static KitStatus rv64_format_insn(ArchInsnFormatter*, const KitDecodedInsn*, 42 KitInsn*); 43 static void rv64_formatter_destroy(ArchInsnFormatter*); 44 45 /* RV_AV_* mask of the arch being disassembled. Derived from the Compiler's 46 * target; defaults (and any non-RISC-V kind) map to rv64 so the historical 47 * decode path is byte-identical. */ 48 static u8 rv_av_for_compiler(Compiler* c) { 49 const RiscvVariant* v = riscv_variant_for_kind(c->target.arch); 50 return v->xlen == 32u ? (u8)RV_AV_RV32 : (u8)RV_AV_RV64; 51 } 52 53 static u32 rv_read_u32_le(const u8* b) { 54 return (u32)b[0] | ((u32)b[1] << 8) | ((u32)b[2] << 16) | ((u32)b[3] << 24); 55 } 56 57 static u32 rv_read_u16_le(const u8* b) { return (u32)b[0] | ((u32)b[1] << 8); } 58 59 static void rv_fmt_emit_fallback32(Rv64InsnFormatter* f, u32 word) { 60 strbuf_reset(&f->mnem); 61 strbuf_puts(&f->mnem, ".word"); 62 strbuf_reset(&f->ops); 63 strbuf_put_hex_u64(&f->ops, (u64)word); 64 } 65 66 static void rv_fmt_emit_fallback16(Rv64InsnFormatter* f, u32 hw) { 67 strbuf_reset(&f->mnem); 68 strbuf_puts(&f->mnem, ".hword"); 69 strbuf_reset(&f->ops); 70 strbuf_put_hex_u64(&f->ops, (u64)hw); 71 } 72 73 static u32 rv64_desc_encoding_id(const Rv64InsnDesc* desc) { 74 u32 i; 75 if (!desc) return RV64_ENCODING_UNKNOWN; 76 for (i = 0; i < rv64_insn_table_n; ++i) { 77 if (desc == &rv64_insn_table[i]) return i; 78 } 79 return RV64_ENCODING_UNKNOWN; 80 } 81 82 static u32 rv64_semantic_opcode(u32 word, u32 nbytes) { 83 u32 op, funct3, funct7; 84 if (nbytes != 4u) return RV64_DEC_UNKNOWN; 85 if (word == rv_ecall()) return RV64_DEC_ECALL; 86 if (word == rv_ebreak()) return RV64_DEC_EBREAK; 87 op = word & 0x7fu; 88 funct3 = (word >> 12) & 0x7u; 89 funct7 = (word >> 25) & 0x7fu; 90 if (op == RV_OP_IMM && funct3 == 0u) return RV64_DEC_ADDI; 91 if (op == RV_OP && funct3 == 0u && funct7 == 0u) return RV64_DEC_ADD; 92 if (op == RV_AUIPC) return RV64_DEC_AUIPC; 93 if (op == RV_LOAD && funct3 == 3u) return RV64_DEC_LD; 94 if (op == RV_STORE && funct3 == 3u) return RV64_DEC_SD; 95 if (op == RV_JALR && funct3 == 0u) return RV64_DEC_JALR; 96 return RV64_DEC_UNKNOWN; 97 } 98 99 static void rv_decop_none(KitDecodedOperand* o) { 100 memset(o, 0, sizeof(*o)); 101 o->kind = KIT_DECOP_NONE; 102 o->index_reg = REG_NONE; 103 } 104 105 static void rv_decop_reg(KitDecodedOperand* o, u32 reg, u8 width_bits) { 106 rv_decop_none(o); 107 o->kind = KIT_DECOP_REG; 108 o->width_bits = width_bits; 109 o->reg = reg; 110 } 111 112 static void rv_decop_imm(KitDecodedOperand* o, i64 imm) { 113 rv_decop_none(o); 114 o->kind = KIT_DECOP_IMM; 115 o->imm = imm; 116 } 117 118 static void rv_decop_sysreg(KitDecodedOperand* o, u32 reg) { 119 rv_decop_none(o); 120 o->kind = KIT_DECOP_SYSREG; 121 o->reg = reg; 122 } 123 124 static void rv_decop_mem(KitDecodedOperand* o, u32 base, i64 imm, 125 u8 width_bits) { 126 rv_decop_none(o); 127 o->kind = KIT_DECOP_MEM; 128 o->width_bits = width_bits; 129 o->reg = base; 130 o->imm = imm; 131 } 132 133 static void rv_decop_pcrel(KitDecodedOperand* o, u64 pc, i64 disp) { 134 rv_decop_none(o); 135 o->kind = KIT_DECOP_PCREL; 136 o->imm = (i64)(pc + (u64)disp); 137 } 138 139 static u8 rv_load_width_bits(u32 funct3) { 140 switch (funct3 & 7u) { 141 case 0: 142 case 4: 143 return 8; 144 case 1: 145 case 5: 146 return 16; 147 case 2: 148 case 6: 149 return 32; 150 case 3: 151 return 64; 152 default: 153 return 0; 154 } 155 } 156 157 static u16 rv64_decode_flags(const Rv64InsnDesc* desc, u32 word) { 158 u16 flags = 0; 159 Rv64Format fmt; 160 if (!desc) return 0; 161 fmt = (Rv64Format)desc->fmt; 162 switch (fmt) { 163 case RV64_FMT_B: 164 case RV64_FMT_CB: 165 case RV64_FMT_CJ: 166 flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH; 167 break; 168 case RV64_FMT_J: 169 flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH; 170 if (((word >> 7) & 0x1fu) == RV_RA) flags |= KIT_DECODE_CALL; 171 break; 172 case RV64_FMT_JALR: { 173 u32 rd = (word >> 7) & 0x1fu; 174 u32 rs1 = (word >> 15) & 0x1fu; 175 flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH; 176 if (rd == RV_RA) flags |= KIT_DECODE_CALL; 177 if (rd == RV_ZERO && rs1 == RV_RA) flags |= KIT_DECODE_RET; 178 break; 179 } 180 case RV64_FMT_CR: 181 if (slice_eq_cstr(desc->mnemonic, "c.jr") || 182 slice_eq_cstr(desc->mnemonic, "c.jalr")) { 183 flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH; 184 if (slice_eq_cstr(desc->mnemonic, "c.jalr")) flags |= KIT_DECODE_CALL; 185 } 186 break; 187 case RV64_FMT_SYSTEM: 188 if (word == rv_ecall() || word == rv_ebreak()) 189 flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_TRAP; 190 break; 191 case RV64_FMT_C_NONE: 192 if ((word & 0xffffu) == 0x9002u) 193 flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_TRAP; 194 break; 195 case RV64_FMT_LOAD: 196 case RV64_FMT_STORE: 197 case RV64_FMT_FP_LOAD: 198 case RV64_FMT_FP_STORE: 199 case RV64_FMT_AMO: 200 case RV64_FMT_LR: 201 case RV64_FMT_CL: 202 case RV64_FMT_CS: 203 case RV64_FMT_CSS: 204 flags |= KIT_DECODE_MEMORY; 205 break; 206 default: 207 break; 208 } 209 return flags; 210 } 211 212 static void rv64_decode_operands(const Rv64InsnDesc* desc, u32 word, u64 pc, 213 const RiscvVariant* variant, 214 KitDecodedInsn* out) { 215 Rv64Format fmt; 216 if (!desc) return; 217 fmt = (Rv64Format)desc->fmt; 218 switch (fmt) { 219 case RV64_FMT_R: 220 case RV64_FMT_FP_R: 221 case RV64_FMT_FP_RM: { 222 Rv64R r = rv64_r_unpack(word); 223 out->noperands = 3; 224 rv_decop_reg(&out->operands[0], r.rd, 64); 225 rv_decop_reg(&out->operands[1], r.rs1, 64); 226 rv_decop_reg(&out->operands[2], r.rs2, 64); 227 break; 228 } 229 case RV64_FMT_I: { 230 Rv64I i = rv64_i_unpack(word); 231 out->noperands = 3; 232 rv_decop_reg(&out->operands[0], i.rd, 64); 233 rv_decop_reg(&out->operands[1], i.rs1, 64); 234 rv_decop_imm(&out->operands[2], rv64_sext(i.imm12, 12)); 235 break; 236 } 237 case RV64_FMT_I_SHIFT: 238 case RV64_FMT_I_SHIFTW: { 239 Rv64I i = rv64_i_unpack(word); 240 /* SLLIW/SRLIW/SRAIW (I_SHIFTW) are always a 5-bit shamt. The plain 241 * SLLI/SRLI/SRAI shamt is 6-bit on rv64 but 5-bit on rv32 (bit 25 is 242 * funct7 there), so the mask follows variant->shamt_bits. */ 243 u32 shamt_mask = 244 (fmt == RV64_FMT_I_SHIFTW || variant->shamt_bits == 5u) ? 0x1fu 245 : 0x3fu; 246 out->noperands = 3; 247 rv_decop_reg(&out->operands[0], i.rd, 64); 248 rv_decop_reg(&out->operands[1], i.rs1, 64); 249 rv_decop_imm(&out->operands[2], (i64)(i.imm12 & shamt_mask)); 250 break; 251 } 252 case RV64_FMT_LOAD: 253 case RV64_FMT_FP_LOAD: { 254 Rv64I i = rv64_i_unpack(word); 255 out->noperands = 2; 256 rv_decop_reg(&out->operands[0], i.rd, 64); 257 rv_decop_mem(&out->operands[1], i.rs1, rv64_sext(i.imm12, 12), 258 rv_load_width_bits(i.funct3)); 259 break; 260 } 261 case RV64_FMT_S: 262 case RV64_FMT_STORE: 263 case RV64_FMT_FP_STORE: { 264 Rv64S s = rv64_s_unpack(word); 265 out->noperands = 2; 266 rv_decop_reg(&out->operands[0], s.rs2, 64); 267 rv_decop_mem(&out->operands[1], s.rs1, rv64_sext(s.imm12, 12), 268 rv_load_width_bits(s.funct3)); 269 break; 270 } 271 case RV64_FMT_B: { 272 Rv64B b = rv64_b_unpack(word); 273 out->noperands = 3; 274 rv_decop_reg(&out->operands[0], b.rs1, 64); 275 rv_decop_reg(&out->operands[1], b.rs2, 64); 276 rv_decop_pcrel(&out->operands[2], pc, rv64_sext(b.imm13, 13)); 277 break; 278 } 279 case RV64_FMT_U: { 280 Rv64U u = rv64_u_unpack(word); 281 out->noperands = 2; 282 rv_decop_reg(&out->operands[0], u.rd, 64); 283 rv_decop_imm(&out->operands[1], (i64)(i32)u.imm32_hi20); 284 break; 285 } 286 case RV64_FMT_J: { 287 Rv64J j = rv64_j_unpack(word); 288 out->noperands = 2; 289 rv_decop_reg(&out->operands[0], j.rd, 64); 290 rv_decop_pcrel(&out->operands[1], pc, rv64_sext(j.imm21, 21)); 291 break; 292 } 293 case RV64_FMT_JALR: { 294 Rv64I i = rv64_i_unpack(word); 295 out->noperands = 2; 296 rv_decop_reg(&out->operands[0], i.rd, 64); 297 rv_decop_mem(&out->operands[1], i.rs1, rv64_sext(i.imm12, 12), 64); 298 break; 299 } 300 case RV64_FMT_CSR: { 301 Rv64I i = rv64_i_unpack(word); 302 out->noperands = 3; 303 rv_decop_reg(&out->operands[0], i.rd, 64); 304 rv_decop_sysreg(&out->operands[1], i.imm12); 305 rv_decop_reg(&out->operands[2], i.rs1, 64); 306 break; 307 } 308 case RV64_FMT_CSRI: { 309 Rv64I i = rv64_i_unpack(word); 310 out->noperands = 3; 311 rv_decop_reg(&out->operands[0], i.rd, 64); 312 rv_decop_sysreg(&out->operands[1], i.imm12); 313 rv_decop_imm(&out->operands[2], (i64)i.rs1); 314 break; 315 } 316 default: 317 break; 318 } 319 } 320 321 static KitStatus rv64_decode_one(Compiler* c, const u8* bytes, size_t len, 322 u64 pc, KitDecodedInsn* out) { 323 const Rv64InsnDesc* desc; 324 const RiscvVariant* variant; 325 u8 av; 326 u32 first_hw; 327 u32 word; 328 u32 encoding_id; 329 if (!bytes || !out) return KIT_INVALID; 330 if (len < 2u) return KIT_MALFORMED; 331 variant = riscv_variant_for_kind(c->target.arch); 332 av = rv_av_for_compiler(c); 333 memset(out, 0, sizeof(*out)); 334 for (u32 i = 0; i < KIT_DECODE_MAX_OPERANDS; ++i) 335 rv_decop_none(&out->operands[i]); 336 337 first_hw = rv_read_u16_le(bytes); 338 if ((first_hw & 3u) != 3u) { 339 word = first_hw; 340 desc = rv64_disasm_find_c(first_hw, av); 341 out->nbytes = 2; 342 } else { 343 if (len < 4u) return KIT_MALFORMED; 344 word = rv_read_u32_le(bytes); 345 desc = rv64_disasm_find(word, av); 346 out->nbytes = 4; 347 } 348 349 encoding_id = rv64_desc_encoding_id(desc); 350 out->pc = pc; 351 out->bytes = bytes; 352 out->encoding_id = encoding_id; 353 out->opcode = rv64_semantic_opcode(word, out->nbytes); 354 out->flags = rv64_decode_flags(desc, word); 355 out->arch[0] = word; 356 out->arch[1] = desc ? desc->fmt : 0xffu; 357 rv64_decode_operands(desc, word, pc, variant, out); 358 return KIT_OK; 359 } 360 361 static KitStatus rv64_decode_block(Compiler* c, const u8* bytes, size_t len, 362 u64 pc, KitDecodedInsn* out, u32 cap, 363 u32* n_out) { 364 u32 n = 0; 365 if (n_out) *n_out = 0; 366 if (!bytes || !out || !n_out) return KIT_INVALID; 367 while (n < cap && len > 0) { 368 KitStatus st = rv64_decode_one(c, bytes, len, pc, &out[n]); 369 if (st != KIT_OK) return n ? KIT_OK : st; 370 bytes += out[n].nbytes; 371 len -= out[n].nbytes; 372 pc += out[n].nbytes; 373 ++n; 374 if (out[n - 1u].flags & KIT_DECODE_TERMINATOR) break; 375 } 376 *n_out = n; 377 return KIT_OK; 378 } 379 380 static void rv64_formatter_init(Rv64InsnFormatter* f, Compiler* c, Heap* h) { 381 memset(f, 0, sizeof(*f)); 382 f->c = c; 383 f->heap = h; 384 f->base.format = rv64_format_insn; 385 f->base.destroy = rv64_formatter_destroy; 386 strbuf_init(&f->mnem, f->mnem_buf, sizeof f->mnem_buf); 387 strbuf_init(&f->ops, f->ops_buf, sizeof f->ops_buf); 388 strbuf_init(&f->ann, f->ann_buf, sizeof f->ann_buf); 389 } 390 391 static KitStatus rv64_format_insn(ArchInsnFormatter* base, 392 const KitDecodedInsn* insn, KitInsn* out) { 393 Rv64InsnFormatter* f = (Rv64InsnFormatter*)base; 394 const Rv64InsnDesc* desc; 395 u32 word; 396 if (!f || !insn || !out) return KIT_INVALID; 397 word = (u32)insn->arch[0]; 398 { 399 u8 av = rv_av_for_compiler(f->c); 400 desc = insn->nbytes == 2u ? rv64_disasm_find_c(word, av) 401 : rv64_disasm_find(word, av); 402 } 403 if (desc) { 404 strbuf_reset(&f->mnem); 405 strbuf_put_slice(&f->mnem, desc->mnemonic); 406 strbuf_reset(&f->ops); 407 rv64_print_operands(&f->ops, desc, word, insn->pc); 408 } else if (insn->nbytes == 2u) { 409 rv_fmt_emit_fallback16(f, word); 410 } else { 411 rv_fmt_emit_fallback32(f, word); 412 } 413 414 strbuf_reset(&f->ann); 415 out->vaddr = insn->pc; 416 out->bytes = insn->bytes; 417 out->nbytes = insn->nbytes; 418 out->mnemonic = strbuf_slice(&f->mnem); 419 out->operands = strbuf_slice(&f->ops); 420 out->annotation = strbuf_slice(&f->ann); 421 return KIT_OK; 422 } 423 424 static void rv64_formatter_destroy(ArchInsnFormatter* base) { 425 Rv64InsnFormatter* f = (Rv64InsnFormatter*)base; 426 if (!f) return; 427 f->heap->free(f->heap, f, sizeof(*f)); 428 } 429 430 static ArchInsnFormatter* rv64_formatter_new(Compiler* c) { 431 Heap* h = (Heap*)c->ctx->heap; 432 Rv64InsnFormatter* f = 433 (Rv64InsnFormatter*)h->alloc(h, sizeof(*f), _Alignof(Rv64InsnFormatter)); 434 if (!f) return NULL; 435 rv64_formatter_init(f, c, h); 436 return &f->base; 437 } 438 439 static u32 rv_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr, 440 KitInsn* out) { 441 Rv64Disasm* d = (Rv64Disasm*)base; 442 KitDecodedInsn insn; 443 KitStatus st = rv64_decode_one(d->fmt.c, bytes, len, vaddr, &insn); 444 if (st != KIT_OK) return 0; 445 st = rv64_format_insn(&d->fmt.base, &insn, out); 446 if (st != KIT_OK) return 0; 447 return insn.nbytes; 448 } 449 450 static void rv64_destroy(ArchDisasm* base) { 451 Rv64Disasm* d = (Rv64Disasm*)base; 452 d->fmt.heap->free(d->fmt.heap, d, sizeof(*d)); 453 } 454 455 ArchDisasm* rv64_disasm_new(Compiler* c) { 456 Heap* h = (Heap*)c->ctx->heap; 457 Rv64Disasm* d = (Rv64Disasm*)h->alloc(h, sizeof(*d), _Alignof(Rv64Disasm)); 458 if (!d) return NULL; 459 memset(d, 0, sizeof(*d)); 460 d->base.decode = rv_decode; 461 d->base.destroy = rv64_destroy; 462 rv64_formatter_init(&d->fmt, c, h); 463 return &d->base; 464 } 465 466 const ArchDecodeOps rv64_decode_ops = { 467 .min_insn_len = 2, 468 .max_insn_len = 4, 469 .decode_one = rv64_decode_one, 470 .decode_block = rv64_decode_block, 471 .formatter_new = rv64_formatter_new, 472 .format = rv64_format_insn, 473 .formatter_free = rv64_formatter_destroy, 474 };