disasm.c (4780B)
1 /* AArch64 disassembler implementation. 2 * 3 * Decodes one 4-byte instruction word per call into a KitInsn whose 4 * string fields point into iterator-owned StrBufs. The decoder shares 5 * the aa64_isa.{h,c} descriptor table with the encoder: aa64_disasm_find 6 * matches the word; aa64_print_operands renders operand text via the 7 * format's unpack + per-format pretty-printer. Mnemonic rewriting (the 8 * one bit the printer can't own, because b.cond rolls cond into the 9 * "operand" text) happens here. */ 10 11 #include "arch/aa64/disasm.h" 12 13 #include <string.h> 14 15 #include "arch/aa64/isa.h" 16 #include "core/heap.h" 17 #include "core/strbuf.h" 18 19 /* Enough for any aarch64 mnemonic-with-suffix ("b.cond" → "b.le", etc.). */ 20 #define AA64_DASM_MNEM_CAP 16u 21 /* Operand text. The widest cases (LDP X, X, [SP, #-imm]!) fit easily. */ 22 #define AA64_DASM_OPS_CAP 96u 23 /* Annotation overlay (symbol + addend). */ 24 #define AA64_DASM_ANN_CAP 96u 25 26 typedef struct AA64Disasm { 27 ArchDisasm base; 28 Compiler* c; 29 Heap* heap; 30 char mnem_buf[AA64_DASM_MNEM_CAP]; 31 char ops_buf[AA64_DASM_OPS_CAP]; 32 char ann_buf[AA64_DASM_ANN_CAP]; 33 StrBuf mnem; 34 StrBuf ops; 35 StrBuf ann; 36 } AA64Disasm; 37 38 static const char* aa64_cond_names[16] = { 39 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", 40 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv", 41 }; 42 43 static void aa64_write_mnemonic(AA64Disasm* d, const AA64InsnDesc* desc, 44 u32 word) { 45 strbuf_reset(&d->mnem); 46 if (desc->fmt == AA64_FMT_BR_COND) { 47 /* Synthesize "b.<cond>" so the operands buffer can hold just the 48 * target. Matches GNU as / objdump conventions. */ 49 u32 cond = word & 0xfu; 50 strbuf_puts(&d->mnem, "b."); 51 strbuf_puts(&d->mnem, aa64_cond_names[cond]); 52 return; 53 } 54 if (desc->fmt == AA64_FMT_BITFIELD) { 55 /* SBFM/UBFM disassemble to their preferred aliases when the 56 * fields match; aa64_print_operands renders the matching operands. */ 57 u32 shift, lsb, width; 58 const char* alias = aa64_bitfield_shift_alias(word, &shift); 59 if (!alias) alias = aa64_bitfield_extend_alias(word); 60 if (!alias) alias = aa64_bitfield_extract_alias(word, &lsb, &width); 61 (void)lsb; 62 (void)width; 63 if (alias) { 64 strbuf_puts(&d->mnem, alias); 65 return; 66 } 67 } 68 strbuf_put_slice(&d->mnem, desc->mnemonic); 69 } 70 71 static void aa64_write_operands(AA64Disasm* d, const AA64InsnDesc* desc, 72 u32 word, u64 vaddr) { 73 strbuf_reset(&d->ops); 74 if (desc->fmt == AA64_FMT_BR_COND) { 75 /* aa64_print_operands prints "<cond> <target>"; we already lifted 76 * the cond into the mnemonic, so skip the dispatcher and inline 77 * just the target. */ 78 AA64BrCond f = aa64_brcond_unpack(word); 79 i64 ofs = (i64)((u64)f.imm19 & 0x7ffffu); 80 /* sign-extend 19 bits */ 81 if (ofs & 0x40000) ofs |= ~(i64)0x7ffff; 82 ofs *= 4; 83 if (vaddr) { 84 strbuf_put_hex_u64(&d->ops, vaddr + (u64)ofs); 85 } else { 86 strbuf_puts(&d->ops, "#"); 87 strbuf_put_i64(&d->ops, ofs); 88 } 89 return; 90 } 91 aa64_print_operands(&d->ops, desc, word, vaddr); 92 } 93 94 static u32 aa64_read_u32_le(const u8* b) { 95 return (u32)b[0] | ((u32)b[1] << 8) | ((u32)b[2] << 16) | ((u32)b[3] << 24); 96 } 97 98 static void aa64_write_unknown(AA64Disasm* d, u32 word) { 99 strbuf_reset(&d->mnem); 100 strbuf_puts(&d->mnem, ".inst"); 101 strbuf_reset(&d->ops); 102 strbuf_put_hex_u64(&d->ops, (u64)word); 103 } 104 105 static u32 aa64_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr, 106 KitInsn* out) { 107 AA64Disasm* d = (AA64Disasm*)base; 108 if (len < 4u) return 0; 109 u32 word = aa64_read_u32_le(bytes); 110 const AA64InsnDesc* desc = aa64_disasm_find(word); 111 if (desc) { 112 aa64_write_mnemonic(d, desc, word); 113 aa64_write_operands(d, desc, word, vaddr); 114 } else { 115 aa64_write_unknown(d, word); 116 } 117 /* Annotation overlay is owned by the public iterator (kit_disasm_iter_*). 118 * The arch-level decoder leaves it empty. */ 119 strbuf_reset(&d->ann); 120 out->vaddr = vaddr; 121 out->bytes = bytes; 122 out->nbytes = 4; 123 out->mnemonic = strbuf_slice(&d->mnem); 124 out->operands = strbuf_slice(&d->ops); 125 out->annotation = strbuf_slice(&d->ann); 126 return 4; 127 } 128 129 static void aa64_destroy(ArchDisasm* base) { 130 AA64Disasm* d = (AA64Disasm*)base; 131 d->heap->free(d->heap, d, sizeof(*d)); 132 } 133 134 ArchDisasm* aa64_disasm_new(Compiler* c) { 135 Heap* h = (Heap*)c->ctx->heap; 136 AA64Disasm* d = (AA64Disasm*)h->alloc(h, sizeof(*d), _Alignof(AA64Disasm)); 137 if (!d) return NULL; 138 memset(d, 0, sizeof(*d)); 139 d->c = c; 140 d->heap = h; 141 d->base.decode = aa64_decode; 142 d->base.destroy = aa64_destroy; 143 strbuf_init(&d->mnem, d->mnem_buf, sizeof d->mnem_buf); 144 strbuf_init(&d->ops, d->ops_buf, sizeof d->ops_buf); 145 strbuf_init(&d->ann, d->ann_buf, sizeof d->ann_buf); 146 return &d->base; 147 }