disasm.c (13498B)
1 /* RV64 disassembler — descriptor-table driven. 2 * 3 * Decodes a 4-byte word by linear-scan over `rv64_insn_table` and 4 * dispatches operand printing on the matched format. Compressed (RV64C) 5 * instructions are 16-bit: a halfword whose low 2 bits are not 0b11 6 * goes through the C-decode path; the iterator advances by 2 bytes. 7 * 8 * Unknown words/halfwords fall back to ".word"/".hword" placeholders. */ 9 10 #include "arch/rv64/disasm.h" 11 12 #include <string.h> 13 14 #include "arch/rv64/isa.h" 15 #include "core/heap.h" 16 #include "core/strbuf.h" 17 18 #define RV64_DASM_MNEM_CAP 16u 19 #define RV64_DASM_OPS_CAP 96u 20 #define RV64_DASM_ANN_CAP 64u 21 #define RV64_ENCODING_UNKNOWN 0xffffffffu 22 23 typedef struct Rv64InsnFormatter { 24 ArchInsnFormatter base; 25 Compiler* c; 26 Heap* heap; 27 char mnem_buf[RV64_DASM_MNEM_CAP]; 28 char ops_buf[RV64_DASM_OPS_CAP]; 29 char ann_buf[RV64_DASM_ANN_CAP]; 30 StrBuf mnem; 31 StrBuf ops; 32 StrBuf ann; 33 } Rv64InsnFormatter; 34 35 typedef struct Rv64Disasm { 36 ArchDisasm base; 37 Rv64InsnFormatter fmt; 38 } Rv64Disasm; 39 40 static KitStatus rv64_format_insn(ArchInsnFormatter*, const KitDecodedInsn*, 41 KitInsn*); 42 static void rv64_formatter_destroy(ArchInsnFormatter*); 43 44 static u32 rv_read_u32_le(const u8* b) { 45 return (u32)b[0] | ((u32)b[1] << 8) | ((u32)b[2] << 16) | ((u32)b[3] << 24); 46 } 47 48 static u32 rv_read_u16_le(const u8* b) { return (u32)b[0] | ((u32)b[1] << 8); } 49 50 static void rv_fmt_emit_fallback32(Rv64InsnFormatter* f, u32 word) { 51 strbuf_reset(&f->mnem); 52 strbuf_puts(&f->mnem, ".word"); 53 strbuf_reset(&f->ops); 54 strbuf_put_hex_u64(&f->ops, (u64)word); 55 } 56 57 static void rv_fmt_emit_fallback16(Rv64InsnFormatter* f, u32 hw) { 58 strbuf_reset(&f->mnem); 59 strbuf_puts(&f->mnem, ".hword"); 60 strbuf_reset(&f->ops); 61 strbuf_put_hex_u64(&f->ops, (u64)hw); 62 } 63 64 static u32 rv64_desc_encoding_id(const Rv64InsnDesc* desc) { 65 u32 i; 66 if (!desc) return RV64_ENCODING_UNKNOWN; 67 for (i = 0; i < rv64_insn_table_n; ++i) { 68 if (desc == &rv64_insn_table[i]) return i; 69 } 70 return RV64_ENCODING_UNKNOWN; 71 } 72 73 static u32 rv64_semantic_opcode(u32 word, u32 nbytes) { 74 u32 op, funct3, funct7; 75 if (nbytes != 4u) return RV64_DEC_UNKNOWN; 76 if (word == rv_ecall()) return RV64_DEC_ECALL; 77 if (word == rv_ebreak()) return RV64_DEC_EBREAK; 78 op = word & 0x7fu; 79 funct3 = (word >> 12) & 0x7u; 80 funct7 = (word >> 25) & 0x7fu; 81 if (op == RV_OP_IMM && funct3 == 0u) return RV64_DEC_ADDI; 82 if (op == RV_OP && funct3 == 0u && funct7 == 0u) return RV64_DEC_ADD; 83 if (op == RV_AUIPC) return RV64_DEC_AUIPC; 84 if (op == RV_LOAD && funct3 == 3u) return RV64_DEC_LD; 85 if (op == RV_STORE && funct3 == 3u) return RV64_DEC_SD; 86 if (op == RV_JALR && funct3 == 0u) return RV64_DEC_JALR; 87 return RV64_DEC_UNKNOWN; 88 } 89 90 static void rv_decop_none(KitDecodedOperand* o) { 91 memset(o, 0, sizeof(*o)); 92 o->kind = KIT_DECOP_NONE; 93 o->index_reg = REG_NONE; 94 } 95 96 static void rv_decop_reg(KitDecodedOperand* o, u32 reg, u8 width_bits) { 97 rv_decop_none(o); 98 o->kind = KIT_DECOP_REG; 99 o->width_bits = width_bits; 100 o->reg = reg; 101 } 102 103 static void rv_decop_imm(KitDecodedOperand* o, i64 imm) { 104 rv_decop_none(o); 105 o->kind = KIT_DECOP_IMM; 106 o->imm = imm; 107 } 108 109 static void rv_decop_sysreg(KitDecodedOperand* o, u32 reg) { 110 rv_decop_none(o); 111 o->kind = KIT_DECOP_SYSREG; 112 o->reg = reg; 113 } 114 115 static void rv_decop_mem(KitDecodedOperand* o, u32 base, i64 imm, 116 u8 width_bits) { 117 rv_decop_none(o); 118 o->kind = KIT_DECOP_MEM; 119 o->width_bits = width_bits; 120 o->reg = base; 121 o->imm = imm; 122 } 123 124 static void rv_decop_pcrel(KitDecodedOperand* o, u64 pc, i64 disp) { 125 rv_decop_none(o); 126 o->kind = KIT_DECOP_PCREL; 127 o->imm = (i64)(pc + (u64)disp); 128 } 129 130 static u8 rv_load_width_bits(u32 funct3) { 131 switch (funct3 & 7u) { 132 case 0: 133 case 4: 134 return 8; 135 case 1: 136 case 5: 137 return 16; 138 case 2: 139 case 6: 140 return 32; 141 case 3: 142 return 64; 143 default: 144 return 0; 145 } 146 } 147 148 static u16 rv64_decode_flags(const Rv64InsnDesc* desc, u32 word) { 149 u16 flags = 0; 150 Rv64Format fmt; 151 if (!desc) return 0; 152 fmt = (Rv64Format)desc->fmt; 153 switch (fmt) { 154 case RV64_FMT_B: 155 case RV64_FMT_CB: 156 case RV64_FMT_CJ: 157 flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH; 158 break; 159 case RV64_FMT_J: 160 flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH; 161 if (((word >> 7) & 0x1fu) == RV_RA) flags |= KIT_DECODE_CALL; 162 break; 163 case RV64_FMT_JALR: { 164 u32 rd = (word >> 7) & 0x1fu; 165 u32 rs1 = (word >> 15) & 0x1fu; 166 flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH; 167 if (rd == RV_RA) flags |= KIT_DECODE_CALL; 168 if (rd == RV_ZERO && rs1 == RV_RA) flags |= KIT_DECODE_RET; 169 break; 170 } 171 case RV64_FMT_CR: 172 if (slice_eq_cstr(desc->mnemonic, "c.jr") || 173 slice_eq_cstr(desc->mnemonic, "c.jalr")) { 174 flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_BRANCH; 175 if (slice_eq_cstr(desc->mnemonic, "c.jalr")) flags |= KIT_DECODE_CALL; 176 } 177 break; 178 case RV64_FMT_SYSTEM: 179 if (word == rv_ecall() || word == rv_ebreak()) 180 flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_TRAP; 181 break; 182 case RV64_FMT_C_NONE: 183 if ((word & 0xffffu) == 0x9002u) 184 flags |= KIT_DECODE_TERMINATOR | KIT_DECODE_TRAP; 185 break; 186 case RV64_FMT_LOAD: 187 case RV64_FMT_STORE: 188 case RV64_FMT_FP_LOAD: 189 case RV64_FMT_FP_STORE: 190 case RV64_FMT_AMO: 191 case RV64_FMT_LR: 192 case RV64_FMT_CL: 193 case RV64_FMT_CS: 194 case RV64_FMT_CSS: 195 flags |= KIT_DECODE_MEMORY; 196 break; 197 default: 198 break; 199 } 200 return flags; 201 } 202 203 static void rv64_decode_operands(const Rv64InsnDesc* desc, u32 word, u64 pc, 204 KitDecodedInsn* out) { 205 Rv64Format fmt; 206 if (!desc) return; 207 fmt = (Rv64Format)desc->fmt; 208 switch (fmt) { 209 case RV64_FMT_R: 210 case RV64_FMT_FP_R: 211 case RV64_FMT_FP_RM: { 212 Rv64R r = rv64_r_unpack(word); 213 out->noperands = 3; 214 rv_decop_reg(&out->operands[0], r.rd, 64); 215 rv_decop_reg(&out->operands[1], r.rs1, 64); 216 rv_decop_reg(&out->operands[2], r.rs2, 64); 217 break; 218 } 219 case RV64_FMT_I: { 220 Rv64I i = rv64_i_unpack(word); 221 out->noperands = 3; 222 rv_decop_reg(&out->operands[0], i.rd, 64); 223 rv_decop_reg(&out->operands[1], i.rs1, 64); 224 rv_decop_imm(&out->operands[2], rv64_sext(i.imm12, 12)); 225 break; 226 } 227 case RV64_FMT_I_SHIFT: 228 case RV64_FMT_I_SHIFTW: { 229 Rv64I i = rv64_i_unpack(word); 230 out->noperands = 3; 231 rv_decop_reg(&out->operands[0], i.rd, 64); 232 rv_decop_reg(&out->operands[1], i.rs1, 64); 233 rv_decop_imm(&out->operands[2], fmt == RV64_FMT_I_SHIFTW 234 ? (i.imm12 & 0x1f) 235 : (i.imm12 & 0x3f)); 236 break; 237 } 238 case RV64_FMT_LOAD: 239 case RV64_FMT_FP_LOAD: { 240 Rv64I i = rv64_i_unpack(word); 241 out->noperands = 2; 242 rv_decop_reg(&out->operands[0], i.rd, 64); 243 rv_decop_mem(&out->operands[1], i.rs1, rv64_sext(i.imm12, 12), 244 rv_load_width_bits(i.funct3)); 245 break; 246 } 247 case RV64_FMT_S: 248 case RV64_FMT_STORE: 249 case RV64_FMT_FP_STORE: { 250 Rv64S s = rv64_s_unpack(word); 251 out->noperands = 2; 252 rv_decop_reg(&out->operands[0], s.rs2, 64); 253 rv_decop_mem(&out->operands[1], s.rs1, rv64_sext(s.imm12, 12), 254 rv_load_width_bits(s.funct3)); 255 break; 256 } 257 case RV64_FMT_B: { 258 Rv64B b = rv64_b_unpack(word); 259 out->noperands = 3; 260 rv_decop_reg(&out->operands[0], b.rs1, 64); 261 rv_decop_reg(&out->operands[1], b.rs2, 64); 262 rv_decop_pcrel(&out->operands[2], pc, rv64_sext(b.imm13, 13)); 263 break; 264 } 265 case RV64_FMT_U: { 266 Rv64U u = rv64_u_unpack(word); 267 out->noperands = 2; 268 rv_decop_reg(&out->operands[0], u.rd, 64); 269 rv_decop_imm(&out->operands[1], (i64)(i32)u.imm32_hi20); 270 break; 271 } 272 case RV64_FMT_J: { 273 Rv64J j = rv64_j_unpack(word); 274 out->noperands = 2; 275 rv_decop_reg(&out->operands[0], j.rd, 64); 276 rv_decop_pcrel(&out->operands[1], pc, rv64_sext(j.imm21, 21)); 277 break; 278 } 279 case RV64_FMT_JALR: { 280 Rv64I i = rv64_i_unpack(word); 281 out->noperands = 2; 282 rv_decop_reg(&out->operands[0], i.rd, 64); 283 rv_decop_mem(&out->operands[1], i.rs1, rv64_sext(i.imm12, 12), 64); 284 break; 285 } 286 case RV64_FMT_CSR: { 287 Rv64I i = rv64_i_unpack(word); 288 out->noperands = 3; 289 rv_decop_reg(&out->operands[0], i.rd, 64); 290 rv_decop_sysreg(&out->operands[1], i.imm12); 291 rv_decop_reg(&out->operands[2], i.rs1, 64); 292 break; 293 } 294 case RV64_FMT_CSRI: { 295 Rv64I i = rv64_i_unpack(word); 296 out->noperands = 3; 297 rv_decop_reg(&out->operands[0], i.rd, 64); 298 rv_decop_sysreg(&out->operands[1], i.imm12); 299 rv_decop_imm(&out->operands[2], (i64)i.rs1); 300 break; 301 } 302 default: 303 break; 304 } 305 } 306 307 static KitStatus rv64_decode_one(Compiler* c, const u8* bytes, size_t len, 308 u64 pc, KitDecodedInsn* out) { 309 const Rv64InsnDesc* desc; 310 u32 first_hw; 311 u32 word; 312 u32 encoding_id; 313 (void)c; 314 if (!bytes || !out) return KIT_INVALID; 315 if (len < 2u) return KIT_MALFORMED; 316 memset(out, 0, sizeof(*out)); 317 for (u32 i = 0; i < KIT_DECODE_MAX_OPERANDS; ++i) 318 rv_decop_none(&out->operands[i]); 319 320 first_hw = rv_read_u16_le(bytes); 321 if ((first_hw & 3u) != 3u) { 322 word = first_hw; 323 desc = rv64_disasm_find_c(first_hw); 324 out->nbytes = 2; 325 } else { 326 if (len < 4u) return KIT_MALFORMED; 327 word = rv_read_u32_le(bytes); 328 desc = rv64_disasm_find(word); 329 out->nbytes = 4; 330 } 331 332 encoding_id = rv64_desc_encoding_id(desc); 333 out->pc = pc; 334 out->bytes = bytes; 335 out->encoding_id = encoding_id; 336 out->opcode = rv64_semantic_opcode(word, out->nbytes); 337 out->flags = rv64_decode_flags(desc, word); 338 out->arch[0] = word; 339 out->arch[1] = desc ? desc->fmt : 0xffu; 340 rv64_decode_operands(desc, word, pc, out); 341 return KIT_OK; 342 } 343 344 static KitStatus rv64_decode_block(Compiler* c, const u8* bytes, size_t len, 345 u64 pc, KitDecodedInsn* out, u32 cap, 346 u32* n_out) { 347 u32 n = 0; 348 if (n_out) *n_out = 0; 349 if (!bytes || !out || !n_out) return KIT_INVALID; 350 while (n < cap && len > 0) { 351 KitStatus st = rv64_decode_one(c, bytes, len, pc, &out[n]); 352 if (st != KIT_OK) return n ? KIT_OK : st; 353 bytes += out[n].nbytes; 354 len -= out[n].nbytes; 355 pc += out[n].nbytes; 356 ++n; 357 if (out[n - 1u].flags & KIT_DECODE_TERMINATOR) break; 358 } 359 *n_out = n; 360 return KIT_OK; 361 } 362 363 static void rv64_formatter_init(Rv64InsnFormatter* f, Compiler* c, Heap* h) { 364 memset(f, 0, sizeof(*f)); 365 f->c = c; 366 f->heap = h; 367 f->base.format = rv64_format_insn; 368 f->base.destroy = rv64_formatter_destroy; 369 strbuf_init(&f->mnem, f->mnem_buf, sizeof f->mnem_buf); 370 strbuf_init(&f->ops, f->ops_buf, sizeof f->ops_buf); 371 strbuf_init(&f->ann, f->ann_buf, sizeof f->ann_buf); 372 } 373 374 static KitStatus rv64_format_insn(ArchInsnFormatter* base, 375 const KitDecodedInsn* insn, KitInsn* out) { 376 Rv64InsnFormatter* f = (Rv64InsnFormatter*)base; 377 const Rv64InsnDesc* desc; 378 u32 word; 379 if (!f || !insn || !out) return KIT_INVALID; 380 word = (u32)insn->arch[0]; 381 desc = insn->nbytes == 2u ? rv64_disasm_find_c(word) : rv64_disasm_find(word); 382 if (desc) { 383 strbuf_reset(&f->mnem); 384 strbuf_put_slice(&f->mnem, desc->mnemonic); 385 strbuf_reset(&f->ops); 386 rv64_print_operands(&f->ops, desc, word, insn->pc); 387 } else if (insn->nbytes == 2u) { 388 rv_fmt_emit_fallback16(f, word); 389 } else { 390 rv_fmt_emit_fallback32(f, word); 391 } 392 393 strbuf_reset(&f->ann); 394 out->vaddr = insn->pc; 395 out->bytes = insn->bytes; 396 out->nbytes = insn->nbytes; 397 out->mnemonic = strbuf_slice(&f->mnem); 398 out->operands = strbuf_slice(&f->ops); 399 out->annotation = strbuf_slice(&f->ann); 400 return KIT_OK; 401 } 402 403 static void rv64_formatter_destroy(ArchInsnFormatter* base) { 404 Rv64InsnFormatter* f = (Rv64InsnFormatter*)base; 405 if (!f) return; 406 f->heap->free(f->heap, f, sizeof(*f)); 407 } 408 409 static ArchInsnFormatter* rv64_formatter_new(Compiler* c) { 410 Heap* h = (Heap*)c->ctx->heap; 411 Rv64InsnFormatter* f = 412 (Rv64InsnFormatter*)h->alloc(h, sizeof(*f), _Alignof(Rv64InsnFormatter)); 413 if (!f) return NULL; 414 rv64_formatter_init(f, c, h); 415 return &f->base; 416 } 417 418 static u32 rv_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr, 419 KitInsn* out) { 420 Rv64Disasm* d = (Rv64Disasm*)base; 421 KitDecodedInsn insn; 422 KitStatus st = rv64_decode_one(d->fmt.c, bytes, len, vaddr, &insn); 423 if (st != KIT_OK) return 0; 424 st = rv64_format_insn(&d->fmt.base, &insn, out); 425 if (st != KIT_OK) return 0; 426 return insn.nbytes; 427 } 428 429 static void rv64_destroy(ArchDisasm* base) { 430 Rv64Disasm* d = (Rv64Disasm*)base; 431 d->fmt.heap->free(d->fmt.heap, d, sizeof(*d)); 432 } 433 434 ArchDisasm* rv64_disasm_new(Compiler* c) { 435 Heap* h = (Heap*)c->ctx->heap; 436 Rv64Disasm* d = (Rv64Disasm*)h->alloc(h, sizeof(*d), _Alignof(Rv64Disasm)); 437 if (!d) return NULL; 438 memset(d, 0, sizeof(*d)); 439 d->base.decode = rv_decode; 440 d->base.destroy = rv64_destroy; 441 rv64_formatter_init(&d->fmt, c, h); 442 return &d->base; 443 } 444 445 const ArchDecodeOps rv64_decode_ops = { 446 .min_insn_len = 2, 447 .max_insn_len = 4, 448 .decode_one = rv64_decode_one, 449 .decode_block = rv64_decode_block, 450 .formatter_new = rv64_formatter_new, 451 .format = rv64_format_insn, 452 .formatter_free = rv64_formatter_destroy, 453 };