isa.c (44498B)
1 /* x86_64 instruction descriptor table + operand print/decode dispatch. 2 * 3 * The table mirrors every encoding `src/arch/x64/emit.c` produces, plus a 4 * handful that show up via direct byte writes in arch/x64/{alloc,link,ops}.c 5 * (CALL/JMP rel32, PUSH/POP r64, multi-byte NOP, atomic prefixes). Each 6 * row pins down (leg_pfx, opcode bytes, /digit) so the disassembler can 7 * identify a raw byte stream with one linear pass and then dispatch on 8 * the format to render operands. 9 * 10 * Row ordering: first-match wins. Aliases (rows with X64_ASMFL_ALIAS) 11 * sit BEFORE the canonical row they alias so the disassembler prefers 12 * the alias spelling on output. We keep aliases narrow today (e.g., 13 * SSE-prefixed forms naturally precede their no-prefix neighbours) — we 14 * can add `xor %eax,%eax` zeroing-idiom aliases later if disasm output 15 * needs them. */ 16 17 #include "arch/x64/isa.h" 18 19 #include <stddef.h> 20 #include <string.h> 21 22 #include "core/bytes.h" 23 24 /* ==================================================================== 25 * Table. Mnemonics are AT&T-style, lower-case, no size suffix; the 26 * printer derives the size letter (b/w/l/q) from the fmt + REX.W where 27 * appropriate. 28 * ==================================================================== */ 29 30 #define ROW(mn, lp, ol, b0, b1, b2, lm, mr, wr, f, fl) \ 31 {{{(mn)}, sizeof(mn) - 1}, lp, ol, {b0, b1, b2}, lm, mr, wr, f, fl} 32 #define NO_MODRM 0xFFu 33 34 const X64InsnDesc x64_insn_table[] = { 35 /* ---- single-byte nullary ---- */ 36 ROW("nop", X64_PFX_NONE, 1, 0x90, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 37 X64_FMT_NULLARY, 0), 38 ROW("ret", X64_PFX_NONE, 1, 0xC3, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 39 X64_FMT_NULLARY, 0), 40 ROW("leave", X64_PFX_NONE, 1, 0xC9, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 41 X64_FMT_NULLARY, 0), 42 ROW("cltd", X64_PFX_NONE, 1, 0x99, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_0, 43 X64_FMT_NULLARY, 0), 44 ROW("cqto", X64_PFX_NONE, 1, 0x99, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_1, 45 X64_FMT_NULLARY, 0), 46 47 /* ---- two-byte UD2 ---- */ 48 ROW("ud2", X64_PFX_NONE, 2, 0x0F, 0x0B, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 49 X64_FMT_NULLARY, 0), 50 /* ---- SYSCALL (0F 05): fast system call ---- */ 51 ROW("syscall", X64_PFX_NONE, 2, 0x0F, 0x05, 0, 0xFF, NO_MODRM, 52 X64_W_REQ_ANY, X64_FMT_NULLARY, 0), 53 ROW("mfence", X64_PFX_NONE, 3, 0x0F, 0xAE, 0xF0, 0xFF, NO_MODRM, 54 X64_W_REQ_ANY, X64_FMT_NULLARY, 0), 55 56 /* ---- multi-byte NOP: 66 0F 1F /0 ---- 57 * Matches the 6-byte canonical "NOPW 0(%rax,%rax,1)" kit emits to pad 58 * the IPLT stub. The mod/rm bytes (and any disp) are consumed by the 59 * NOP_MULTI printer. */ 60 ROW("nopw", X64_PFX_66, 2, 0x0F, 0x1F, 0, 0xFF, 0, X64_W_REQ_ANY, 61 X64_FMT_NOP_MULTI, 0), 62 ROW("nopl", X64_PFX_NONE, 2, 0x0F, 0x1F, 0, 0xFF, 0, X64_W_REQ_ANY, 63 X64_FMT_NOP_MULTI, 0), 64 65 /* ---- PUSH/POP r64 (embed-reg in low 3 bits) ---- */ 66 ROW("push", X64_PFX_NONE, 1, 0x50, 0, 0, 0xF8, NO_MODRM, X64_W_REQ_ANY, 67 X64_FMT_PUSH_POP, X64_ASMFL_FORCE_W64), 68 ROW("pop", X64_PFX_NONE, 1, 0x58, 0, 0, 0xF8, NO_MODRM, X64_W_REQ_ANY, 69 X64_FMT_PUSH_POP, X64_ASMFL_FORCE_W64), 70 71 /* ---- MOV r, imm — B8+rd; width via REX.W ---- 72 * imm32 form (no REX.W) and imm64 movabs form (REX.W=1) share the 73 * same row; the printer reads ctx->rex_w to pick the imm width. */ 74 ROW("mov", X64_PFX_NONE, 1, 0xB8, 0, 0, 0xF8, NO_MODRM, X64_W_REQ_ANY, 75 X64_FMT_MOV_RI, X64_ASMFL_W_FROM_REX), 76 77 /* ---- ALU r/m, r — opcode picks op ---- */ 78 ROW("add", X64_PFX_NONE, 1, 0x01, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 79 X64_FMT_ALU_RR, X64_ASMFL_W_FROM_REX), 80 ROW("or", X64_PFX_NONE, 1, 0x09, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 81 X64_FMT_ALU_RR, X64_ASMFL_W_FROM_REX), 82 ROW("and", X64_PFX_NONE, 1, 0x21, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 83 X64_FMT_ALU_RR, X64_ASMFL_W_FROM_REX), 84 ROW("sub", X64_PFX_NONE, 1, 0x29, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 85 X64_FMT_ALU_RR, X64_ASMFL_W_FROM_REX), 86 ROW("xor", X64_PFX_NONE, 1, 0x31, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 87 X64_FMT_ALU_RR, X64_ASMFL_W_FROM_REX), 88 ROW("cmp", X64_PFX_NONE, 1, 0x39, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 89 X64_FMT_ALU_RR, X64_ASMFL_W_FROM_REX), 90 ROW("test", X64_PFX_NONE, 1, 0x85, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 91 X64_FMT_ALU_RR, X64_ASMFL_W_FROM_REX), 92 ROW("mov", X64_PFX_NONE, 1, 0x89, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 93 X64_FMT_ALU_RR, X64_ASMFL_W_FROM_REX), 94 /* Byte form: MOV r/m8, r8 — opcode 88 forces 1-byte operands. */ 95 ROW("mov", X64_PFX_NONE, 1, 0x88, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 96 X64_FMT_ALU_RR, X64_ASMFL_BYTE), 97 /* 16-bit form: 0x66 prefix forces 2-byte operands. */ 98 ROW("mov", X64_PFX_66, 1, 0x89, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 99 X64_FMT_ALU_RR, X64_ASMFL_W16), 100 101 /* ---- MOV r, r/m (load and reg-reg share opcode 8B) ---- 102 * 8B /r matches both r,r and r,[base+disp]; the printer dispatches on 103 * ModR/M.mod. LEA is 8D /r — register-only ModR/M.mod=11 is illegal, 104 * so we use a separate row keyed on the opcode. */ 105 ROW("mov", X64_PFX_NONE, 1, 0x8B, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 106 X64_FMT_MOV_RM_LOAD, X64_ASMFL_W_FROM_REX), 107 /* 16-bit r←r/m via 0x66 prefix. */ 108 ROW("mov", X64_PFX_66, 1, 0x8B, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 109 X64_FMT_MOV_RM_LOAD, X64_ASMFL_W16), 110 ROW("lea", X64_PFX_NONE, 1, 0x8D, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 111 X64_FMT_MOV_RM_LOAD, X64_ASMFL_W_FROM_REX), 112 113 /* ---- MOVZX / MOVSX r{32,64}, r/m{8,16} ---- 114 * The destination width is the *l* (32-bit) form without REX.W and the *q* 115 * (64-bit) form with it; split by W so the disassembler emits a mnemonic 116 * whose size letter matches the printed register width (clang rejects a 117 * `movsbl` with a 64-bit destination). Same opcodes; W disambiguates, 118 * exactly like cltd/cqto (0x99). */ 119 ROW("movzbl", X64_PFX_NONE, 2, 0x0F, 0xB6, 0, 0xFF, NO_MODRM, X64_W_REQ_0, 120 X64_FMT_MOVZX_MOVSX, 0), 121 ROW("movzbq", X64_PFX_NONE, 2, 0x0F, 0xB6, 0, 0xFF, NO_MODRM, X64_W_REQ_1, 122 X64_FMT_MOVZX_MOVSX, 0), 123 ROW("movzwl", X64_PFX_NONE, 2, 0x0F, 0xB7, 0, 0xFF, NO_MODRM, X64_W_REQ_0, 124 X64_FMT_MOVZX_MOVSX, 0), 125 ROW("movzwq", X64_PFX_NONE, 2, 0x0F, 0xB7, 0, 0xFF, NO_MODRM, X64_W_REQ_1, 126 X64_FMT_MOVZX_MOVSX, 0), 127 ROW("movsbl", X64_PFX_NONE, 2, 0x0F, 0xBE, 0, 0xFF, NO_MODRM, X64_W_REQ_0, 128 X64_FMT_MOVZX_MOVSX, 0), 129 ROW("movsbq", X64_PFX_NONE, 2, 0x0F, 0xBE, 0, 0xFF, NO_MODRM, X64_W_REQ_1, 130 X64_FMT_MOVZX_MOVSX, 0), 131 ROW("movswl", X64_PFX_NONE, 2, 0x0F, 0xBF, 0, 0xFF, NO_MODRM, X64_W_REQ_0, 132 X64_FMT_MOVZX_MOVSX, 0), 133 ROW("movswq", X64_PFX_NONE, 2, 0x0F, 0xBF, 0, 0xFF, NO_MODRM, X64_W_REQ_1, 134 X64_FMT_MOVZX_MOVSX, 0), 135 136 /* ---- MOVSXD r64, r/m32 ---- */ 137 ROW("movslq", X64_PFX_NONE, 1, 0x63, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_1, 138 X64_FMT_MOVSXD, 0), 139 140 /* ---- ALU r/m, imm — /digit picks operation ---- 141 * 83 (imm8 sign-extended), 81 (imm32 sign-extended). One row per 142 * (opcode, /digit) pair. 143 * /0 ADD /1 OR /4 AND /5 SUB /6 XOR /7 CMP 144 * (/2 ADC and /3 SBB are also valid in the Intel manual but kit 145 * doesn't emit them; they can land later as additional rows.) */ 146 ROW("add", X64_PFX_NONE, 1, 0x83, 0, 0, 0xFF, 0, X64_W_REQ_ANY, 147 X64_FMT_ALU_RM_IMM8, X64_ASMFL_W_FROM_REX), 148 ROW("or", X64_PFX_NONE, 1, 0x83, 0, 0, 0xFF, 1, X64_W_REQ_ANY, 149 X64_FMT_ALU_RM_IMM8, X64_ASMFL_W_FROM_REX), 150 ROW("and", X64_PFX_NONE, 1, 0x83, 0, 0, 0xFF, 4, X64_W_REQ_ANY, 151 X64_FMT_ALU_RM_IMM8, X64_ASMFL_W_FROM_REX), 152 ROW("sub", X64_PFX_NONE, 1, 0x83, 0, 0, 0xFF, 5, X64_W_REQ_ANY, 153 X64_FMT_ALU_RM_IMM8, X64_ASMFL_W_FROM_REX), 154 ROW("xor", X64_PFX_NONE, 1, 0x83, 0, 0, 0xFF, 6, X64_W_REQ_ANY, 155 X64_FMT_ALU_RM_IMM8, X64_ASMFL_W_FROM_REX), 156 ROW("cmp", X64_PFX_NONE, 1, 0x83, 0, 0, 0xFF, 7, X64_W_REQ_ANY, 157 X64_FMT_ALU_RM_IMM8, X64_ASMFL_W_FROM_REX), 158 ROW("add", X64_PFX_NONE, 1, 0x81, 0, 0, 0xFF, 0, X64_W_REQ_ANY, 159 X64_FMT_ALU_RM_IMM32, X64_ASMFL_W_FROM_REX), 160 ROW("or", X64_PFX_NONE, 1, 0x81, 0, 0, 0xFF, 1, X64_W_REQ_ANY, 161 X64_FMT_ALU_RM_IMM32, X64_ASMFL_W_FROM_REX), 162 ROW("and", X64_PFX_NONE, 1, 0x81, 0, 0, 0xFF, 4, X64_W_REQ_ANY, 163 X64_FMT_ALU_RM_IMM32, X64_ASMFL_W_FROM_REX), 164 ROW("sub", X64_PFX_NONE, 1, 0x81, 0, 0, 0xFF, 5, X64_W_REQ_ANY, 165 X64_FMT_ALU_RM_IMM32, X64_ASMFL_W_FROM_REX), 166 ROW("xor", X64_PFX_NONE, 1, 0x81, 0, 0, 0xFF, 6, X64_W_REQ_ANY, 167 X64_FMT_ALU_RM_IMM32, X64_ASMFL_W_FROM_REX), 168 ROW("cmp", X64_PFX_NONE, 1, 0x81, 0, 0, 0xFF, 7, X64_W_REQ_ANY, 169 X64_FMT_ALU_RM_IMM32, X64_ASMFL_W_FROM_REX), 170 171 /* ---- IMUL r, r/m (0F AF) / IMUL r, r/m, imm (69 / 6B) ---- */ 172 ROW("imul", X64_PFX_NONE, 2, 0x0F, 0xAF, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 173 X64_FMT_IMUL_RR, X64_ASMFL_W_FROM_REX), 174 ROW("imul", X64_PFX_NONE, 1, 0x6B, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 175 X64_FMT_IMUL_RRI, X64_ASMFL_W_FROM_REX), 176 ROW("imul", X64_PFX_NONE, 1, 0x69, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 177 X64_FMT_IMUL_RRI, X64_ASMFL_W_FROM_REX | 0x80u /* imm32 */), 178 179 /* ---- F7 /sub family (no immediate read except for /0 /1 which we 180 * don't emit) ---- */ 181 ROW("not", X64_PFX_NONE, 1, 0xF7, 0, 0, 0xFF, 2, X64_W_REQ_ANY, 182 X64_FMT_F7_RM, X64_ASMFL_W_FROM_REX), 183 ROW("neg", X64_PFX_NONE, 1, 0xF7, 0, 0, 0xFF, 3, X64_W_REQ_ANY, 184 X64_FMT_F7_RM, X64_ASMFL_W_FROM_REX), 185 ROW("mul", X64_PFX_NONE, 1, 0xF7, 0, 0, 0xFF, 4, X64_W_REQ_ANY, 186 X64_FMT_F7_RM, X64_ASMFL_W_FROM_REX), 187 ROW("imul", X64_PFX_NONE, 1, 0xF7, 0, 0, 0xFF, 5, X64_W_REQ_ANY, 188 X64_FMT_F7_RM, X64_ASMFL_W_FROM_REX), 189 ROW("div", X64_PFX_NONE, 1, 0xF7, 0, 0, 0xFF, 6, X64_W_REQ_ANY, 190 X64_FMT_F7_RM, X64_ASMFL_W_FROM_REX), 191 ROW("idiv", X64_PFX_NONE, 1, 0xF7, 0, 0, 0xFF, 7, X64_W_REQ_ANY, 192 X64_FMT_F7_RM, X64_ASMFL_W_FROM_REX), 193 194 /* ---- Shifts ---- */ 195 ROW("rol", X64_PFX_NONE, 1, 0xC1, 0, 0, 0xFF, 0, X64_W_REQ_ANY, 196 X64_FMT_SHIFT_IMM, X64_ASMFL_W_FROM_REX), 197 ROW("ror", X64_PFX_NONE, 1, 0xC1, 0, 0, 0xFF, 1, X64_W_REQ_ANY, 198 X64_FMT_SHIFT_IMM, X64_ASMFL_W_FROM_REX), 199 ROW("shl", X64_PFX_NONE, 1, 0xC1, 0, 0, 0xFF, 4, X64_W_REQ_ANY, 200 X64_FMT_SHIFT_IMM, X64_ASMFL_W_FROM_REX), 201 ROW("shr", X64_PFX_NONE, 1, 0xC1, 0, 0, 0xFF, 5, X64_W_REQ_ANY, 202 X64_FMT_SHIFT_IMM, X64_ASMFL_W_FROM_REX), 203 ROW("sar", X64_PFX_NONE, 1, 0xC1, 0, 0, 0xFF, 7, X64_W_REQ_ANY, 204 X64_FMT_SHIFT_IMM, X64_ASMFL_W_FROM_REX), 205 /* 16-bit ROL imm8 via 0x66 + C1 /0 — used by emit_rol16_imm8. */ 206 ROW("rol", X64_PFX_66, 1, 0xC1, 0, 0, 0xFF, 0, X64_W_REQ_ANY, 207 X64_FMT_SHIFT_IMM, X64_ASMFL_W16), 208 ROW("shl", X64_PFX_NONE, 1, 0xD3, 0, 0, 0xFF, 4, X64_W_REQ_ANY, 209 X64_FMT_SHIFT_CL, X64_ASMFL_W_FROM_REX), 210 ROW("shr", X64_PFX_NONE, 1, 0xD3, 0, 0, 0xFF, 5, X64_W_REQ_ANY, 211 X64_FMT_SHIFT_CL, X64_ASMFL_W_FROM_REX), 212 ROW("sar", X64_PFX_NONE, 1, 0xD3, 0, 0, 0xFF, 7, X64_W_REQ_ANY, 213 X64_FMT_SHIFT_CL, X64_ASMFL_W_FROM_REX), 214 215 /* ---- Branches ---- */ 216 /* Jcc near: 0F 80..8F rel32; condition in low 4 bits. The printer 217 * picks the mnemonic from a per-condition table. */ 218 ROW("j", X64_PFX_NONE, 2, 0x0F, 0x80, 0, 0xF0, NO_MODRM, X64_W_REQ_ANY, 219 X64_FMT_JCC_REL32, 0), 220 ROW("jmp", X64_PFX_NONE, 1, 0xE9, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 221 X64_FMT_JMP_REL32, 0), 222 ROW("callq", X64_PFX_NONE, 1, 0xE8, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 223 X64_FMT_CALL_REL32, 0), 224 /* Indirect jmp / call via FF /4 or /2. */ 225 ROW("callq", X64_PFX_NONE, 1, 0xFF, 0, 0, 0xFF, 2, X64_W_REQ_ANY, 226 X64_FMT_BR_RM, 0), 227 ROW("jmpq", X64_PFX_NONE, 1, 0xFF, 0, 0, 0xFF, 4, X64_W_REQ_ANY, 228 X64_FMT_BR_RM, 0), 229 230 /* ---- SETcc / CMOVcc ---- 231 * SETcc condition in low 4 bits of 2nd opcode byte (0F 90..9F). 232 * CMOVcc same encoding around 0F 40..4F. */ 233 ROW("set", X64_PFX_NONE, 2, 0x0F, 0x90, 0, 0xF0, 0, X64_W_REQ_ANY, 234 X64_FMT_SETCC_RM, 0), 235 ROW("cmov", X64_PFX_NONE, 2, 0x0F, 0x40, 0, 0xF0, NO_MODRM, X64_W_REQ_ANY, 236 X64_FMT_CMOVCC_RR, X64_ASMFL_W_FROM_REX), 237 238 /* ---- BSWAP r — 0F C8+rd ---- */ 239 ROW("bswap", X64_PFX_NONE, 2, 0x0F, 0xC8, 0, 0xF8, NO_MODRM, X64_W_REQ_ANY, 240 X64_FMT_BSWAP, X64_ASMFL_W_FROM_REX), 241 242 /* ---- Bit scan: BSF / BSR ---- */ 243 ROW("bsf", X64_PFX_NONE, 2, 0x0F, 0xBC, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 244 X64_FMT_BS, X64_ASMFL_W_FROM_REX), 245 ROW("bsr", X64_PFX_NONE, 2, 0x0F, 0xBD, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 246 X64_FMT_BS, X64_ASMFL_W_FROM_REX), 247 248 /* ---- POPCNT — F3 0F B8 /r (note: F3 prefix is REQUIRED) ---- */ 249 ROW("popcnt", X64_PFX_F3, 2, 0x0F, 0xB8, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 250 X64_FMT_POPCNT, X64_ASMFL_W_FROM_REX), 251 252 /* ---- Atomic primitives ---- */ 253 /* XADD m, r — 0F C1 /r (LOCK prefix is decoded separately) */ 254 ROW("xadd", X64_PFX_NONE, 2, 0x0F, 0xC1, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 255 X64_FMT_XADD_MEM, X64_ASMFL_W_FROM_REX), 256 /* XCHG r, r/m — 0x87 /r */ 257 ROW("xchg", X64_PFX_NONE, 1, 0x87, 0, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 258 X64_FMT_XCHG_MEM, X64_ASMFL_W_FROM_REX), 259 /* CMPXCHG m, r — 0F B1 /r */ 260 ROW("cmpxchg", X64_PFX_NONE, 2, 0x0F, 0xB1, 0, 0xFF, NO_MODRM, 261 X64_W_REQ_ANY, X64_FMT_CMPXCHG_MEM, X64_ASMFL_W_FROM_REX), 262 263 /* ---- SSE scalar FP — F2/F3 0F xx /r ---- 264 * Three opcodes per (sd, ss) pair: arith / mov / cmp. Each row pairs 265 * the legacy prefix (selects sd vs ss) with the 0F xx /r opcode. */ 266 /* MOVSS / MOVSD */ 267 ROW("movsd", X64_PFX_F2, 2, 0x0F, 0x10, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 268 X64_FMT_SSE_RR, 0), 269 ROW("movsd", X64_PFX_F2, 2, 0x0F, 0x11, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 270 X64_FMT_SSE_RR, X64_ASMFL_ALIAS), 271 ROW("movss", X64_PFX_F3, 2, 0x0F, 0x10, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 272 X64_FMT_SSE_RR, 0), 273 ROW("movss", X64_PFX_F3, 2, 0x0F, 0x11, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 274 X64_FMT_SSE_RR, X64_ASMFL_ALIAS), 275 /* MOVAPS */ 276 ROW("movaps", X64_PFX_NONE, 2, 0x0F, 0x28, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 277 X64_FMT_SSE_RR, 0), 278 ROW("movaps", X64_PFX_NONE, 2, 0x0F, 0x29, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 279 X64_FMT_SSE_RR, X64_ASMFL_ALIAS), 280 /* ADD/SUB/MUL/DIV — opcodes 58/5C/59/5E (same byte for ss and sd; 281 * prefix picks). */ 282 ROW("addsd", X64_PFX_F2, 2, 0x0F, 0x58, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 283 X64_FMT_SSE_RR, 0), 284 ROW("addss", X64_PFX_F3, 2, 0x0F, 0x58, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 285 X64_FMT_SSE_RR, 0), 286 ROW("mulsd", X64_PFX_F2, 2, 0x0F, 0x59, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 287 X64_FMT_SSE_RR, 0), 288 ROW("mulss", X64_PFX_F3, 2, 0x0F, 0x59, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 289 X64_FMT_SSE_RR, 0), 290 ROW("subsd", X64_PFX_F2, 2, 0x0F, 0x5C, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 291 X64_FMT_SSE_RR, 0), 292 ROW("subss", X64_PFX_F3, 2, 0x0F, 0x5C, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 293 X64_FMT_SSE_RR, 0), 294 ROW("divsd", X64_PFX_F2, 2, 0x0F, 0x5E, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 295 X64_FMT_SSE_RR, 0), 296 ROW("divss", X64_PFX_F3, 2, 0x0F, 0x5E, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 297 X64_FMT_SSE_RR, 0), 298 /* Compare scalar (UCOMISS / UCOMISD) */ 299 ROW("ucomisd", X64_PFX_66, 2, 0x0F, 0x2E, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 300 X64_FMT_SSE_RR, 0), 301 ROW("ucomiss", X64_PFX_NONE, 2, 0x0F, 0x2E, 0, 0xFF, NO_MODRM, 302 X64_W_REQ_ANY, X64_FMT_SSE_RR, 0), 303 /* Conversions touched by FP↔int paths: CVTSI2SS/SD, CVTTSS/SD2SI. */ 304 ROW("cvtsi2sd", X64_PFX_F2, 2, 0x0F, 0x2A, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 305 X64_FMT_SSE_RR, X64_ASMFL_W_FROM_REX), 306 ROW("cvtsi2ss", X64_PFX_F3, 2, 0x0F, 0x2A, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 307 X64_FMT_SSE_RR, X64_ASMFL_W_FROM_REX), 308 ROW("cvttsd2si", X64_PFX_F2, 2, 0x0F, 0x2C, 0, 0xFF, NO_MODRM, 309 X64_W_REQ_ANY, X64_FMT_SSE_RR, X64_ASMFL_W_FROM_REX), 310 ROW("cvttss2si", X64_PFX_F3, 2, 0x0F, 0x2C, 0, 0xFF, NO_MODRM, 311 X64_W_REQ_ANY, X64_FMT_SSE_RR, X64_ASMFL_W_FROM_REX), 312 ROW("cvtsd2ss", X64_PFX_F2, 2, 0x0F, 0x5A, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 313 X64_FMT_SSE_RR, 0), 314 ROW("cvtss2sd", X64_PFX_F3, 2, 0x0F, 0x5A, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 315 X64_FMT_SSE_RR, 0), 316 /* MOVD/MOVQ between GPR and XMM. 66 0F 6E /r is gpr->xmm, 66 0F 7E /r is 317 * xmm->gpr (note the reversed operand order, handled in print_xmm_rr). 318 * REX.W picks movq (64-bit GPR) vs movd (32-bit), and since the *mnemonic* 319 * itself changes we split into W_REQ_0 / W_REQ_1 rows rather than a width 320 * suffix. The backend emits these for int<->FP bitcasts (emit_sse_rr_w). */ 321 ROW("movd", X64_PFX_66, 2, 0x0F, 0x6E, 0, 0xFF, NO_MODRM, X64_W_REQ_0, 322 X64_FMT_SSE_RR, 0), 323 ROW("movq", X64_PFX_66, 2, 0x0F, 0x6E, 0, 0xFF, NO_MODRM, X64_W_REQ_1, 324 X64_FMT_SSE_RR, 0), 325 ROW("movd", X64_PFX_66, 2, 0x0F, 0x7E, 0, 0xFF, NO_MODRM, X64_W_REQ_0, 326 X64_FMT_SSE_RR, 0), 327 ROW("movq", X64_PFX_66, 2, 0x0F, 0x7E, 0, 0xFF, NO_MODRM, X64_W_REQ_1, 328 X64_FMT_SSE_RR, 0), 329 /* XORPS / XORPD (0F 57, prefix selects packed-single vs -double). The 330 * backend uses these to clear/negate FP registers. Both operands xmm. */ 331 ROW("xorps", X64_PFX_NONE, 2, 0x0F, 0x57, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 332 X64_FMT_SSE_RR, 0), 333 ROW("xorpd", X64_PFX_66, 2, 0x0F, 0x57, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, 334 X64_FMT_SSE_RR, 0), 335 }; 336 337 const u32 x64_insn_table_n = 338 (u32)(sizeof x64_insn_table / sizeof x64_insn_table[0]); 339 340 /* ==================================================================== 341 * Prefix decode. 342 * ==================================================================== */ 343 344 u32 x64_decode_prefixes(const u8* bytes, u32 len, X64DecodeCtx* ctx) { 345 u32 off = 0; 346 memset(ctx, 0, sizeof *ctx); 347 while (off < len) { 348 u8 b = bytes[off]; 349 if (b == 0x66u || b == 0xF2u || b == 0xF3u) { 350 ctx->leg_pfx = b; 351 ++off; 352 continue; 353 } 354 if (b == 0xF0u) { 355 /* LOCK — ignored for opcode lookup but consumed so the 356 * subsequent opcode aligns. The printer adds a "lock " prefix 357 * separately when annotating, but kit's emit.c currently emits 358 * LOCK only before XADD / XCHG / CMPXCHG. */ 359 ctx->has_lock = 1; 360 ++off; 361 continue; 362 } 363 break; 364 } 365 if (off < len && bytes[off] >= 0x40u && bytes[off] <= 0x4Fu) { 366 u8 r = bytes[off]; 367 ctx->has_rex = 1; 368 ctx->rex_w = (r >> 3) & 1u; 369 ctx->rex_r = (r >> 2) & 1u; 370 ctx->rex_x = (r >> 1) & 1u; 371 ctx->rex_b = r & 1u; 372 ++off; 373 } 374 ctx->opc_off = off; 375 return off; 376 } 377 378 /* ==================================================================== 379 * Disassembler row lookup. 380 * ==================================================================== */ 381 382 const X64InsnDesc* x64_disasm_find(const u8* bytes, u32 len, 383 X64DecodeCtx* ctx) { 384 if (ctx->opc_off >= len) return NULL; 385 for (u32 i = 0; i < x64_insn_table_n; ++i) { 386 const X64InsnDesc* d = &x64_insn_table[i]; 387 if (d->leg_pfx != ctx->leg_pfx) continue; 388 if (d->rex_w_req == X64_W_REQ_1 && !ctx->rex_w) continue; 389 if (d->rex_w_req == X64_W_REQ_0 && ctx->rex_w) continue; 390 if (ctx->opc_off + d->opc_len > len) continue; 391 /* Opcode bytes match exactly except the LAST byte, which may use 392 * a low-bit mask (embed-reg or condition nibble). */ 393 int ok = 1; 394 for (u32 j = 0; j + 1u < d->opc_len; ++j) { 395 if (bytes[ctx->opc_off + j] != d->opc[j]) { 396 ok = 0; 397 break; 398 } 399 } 400 if (!ok) continue; 401 { 402 u8 last_act = bytes[ctx->opc_off + d->opc_len - 1u] & d->opc_last_mask; 403 u8 last_exp = d->opc[d->opc_len - 1u] & d->opc_last_mask; 404 if (last_act != last_exp) continue; 405 } 406 /* /digit constraint reads ModR/M.reg. */ 407 if (d->modrm_reg != NO_MODRM) { 408 u32 mrm_off = ctx->opc_off + d->opc_len; 409 if (mrm_off >= len) continue; 410 u8 mrm = bytes[mrm_off]; 411 if (((mrm >> 3) & 7u) != d->modrm_reg) continue; 412 } 413 return d; 414 } 415 return NULL; 416 } 417 418 /* ==================================================================== 419 * Operand printers. 420 * ==================================================================== */ 421 422 #define X64_REG_RIP 16u 423 424 static const char* g_cc_name[16] = { 425 "o", "no", "b", "ae", "e", "ne", "be", "a", 426 "s", "ns", "p", "np", "l", "ge", "le", "g", 427 }; 428 429 /* AT&T register names by width. Index 0..15 covers RAX..R15. */ 430 static const char* reg_name(u32 reg, u32 width_bytes, int has_rex) { 431 static const char* r64[16] = { 432 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", 433 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 434 }; 435 static const char* r32[16] = { 436 "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", 437 "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d", 438 }; 439 static const char* r16[16] = { 440 "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", 441 "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w", 442 }; 443 static const char* r8[16] = { 444 "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", 445 "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b", 446 }; 447 static const char* rh8[4] = {"ah", "ch", "dh", "bh"}; 448 reg &= 15u; 449 if (width_bytes == 8) return r64[reg]; 450 if (width_bytes == 4) return r32[reg]; 451 if (width_bytes == 2) return r16[reg]; 452 if (!has_rex && reg >= 4u && reg <= 7u) return rh8[reg - 4u]; 453 return r8[reg]; 454 } 455 456 static const char* xmm_name(u32 reg) { 457 static const char* x[16] = { 458 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", 459 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", 460 }; 461 return x[reg & 15u]; 462 } 463 464 static void put_reg(StrBuf* sb, u32 reg, u32 width) { 465 strbuf_putc(sb, '%'); 466 strbuf_puts(sb, reg_name(reg, width, 1)); 467 } 468 static void put_reg_ctx(StrBuf* sb, u32 reg, u32 width, int has_rex) { 469 strbuf_putc(sb, '%'); 470 strbuf_puts(sb, reg_name(reg, width, has_rex)); 471 } 472 static void put_xmm(StrBuf* sb, u32 reg) { 473 strbuf_putc(sb, '%'); 474 strbuf_puts(sb, xmm_name(reg)); 475 } 476 static void put_imm(StrBuf* sb, i64 imm) { 477 strbuf_putc(sb, '$'); 478 strbuf_put_i64(sb, imm); 479 } 480 481 /* Read a signed displacement of n bytes (1 or 4). Returns 1 on success. */ 482 static int read_disp(const u8* bytes, u32 len, u32 off, u32 n, i32* out) { 483 if (off + n > len) return 0; 484 if (n == 1u) { 485 *out = (i32)(i8)bytes[off]; 486 } else if (n == 4u) { 487 *out = (i32)rd_u32_le(bytes + off); 488 } else { 489 *out = 0; 490 } 491 return 1; 492 } 493 494 /* Decode a ModR/M memory operand starting at bytes[off]. Returns number of 495 * extra bytes consumed (ModR/M + SIB? + disp?), or (u32)-1 on truncation. 496 * The ModR/M byte itself is bytes[off]; caller has already read mod/reg/rm. 497 * `disp_out` and `base_out` describe what to print. */ 498 typedef struct DecodedMem { 499 u32 base; 500 u32 index; /* SIB index register (valid when has_index) */ 501 u32 scale; /* SIB scale as the literal 1/2/4/8 (valid when has_index) */ 502 i32 disp; 503 int has_base; 504 int has_index; /* a SIB index register is present */ 505 int rip_relative; 506 u32 bytes_used; 507 } DecodedMem; 508 509 static u32 decode_mem(const u8* bytes, u32 len, u32 off, X64DecodeCtx ctx, 510 u32 mod, u32 rm_low, DecodedMem* out) { 511 out->base = 0; 512 out->index = 0; 513 out->scale = 1; 514 out->disp = 0; 515 out->has_base = 1; 516 out->has_index = 0; 517 out->rip_relative = 0; 518 out->bytes_used = 0; 519 if (mod == 3u) return 0; /* caller handles reg-form */ 520 /* SIB-required form: r/m=100. */ 521 if (rm_low == 4u) { 522 if (off >= len) return (u32)-1; 523 u8 s = bytes[off]; 524 u32 sib_base = (s & 7u) | ((u32)ctx.rex_b << 3); 525 u32 sib_index = ((s >> 3) & 7u) | ((u32)ctx.rex_x << 3); 526 u32 used = 1; 527 /* SIB index = 4 (RSP) with REX.X=0 encodes "no index". */ 528 if (sib_index != 4u) { 529 out->has_index = 1; 530 out->index = sib_index; 531 out->scale = 1u << (s >> 6); 532 } 533 if (mod == 0u && (s & 7u) == 5u) { 534 /* mod=00, base=101: disp32 with no base — either a label-table 535 * disp32 (no index) or an indexed `[index*scale + disp32]`. */ 536 i32 d = 0; 537 if (!read_disp(bytes, len, off + used, 4, &d)) return (u32)-1; 538 used += 4; 539 out->disp = d; 540 out->has_base = 0; 541 out->bytes_used = used; 542 return used; 543 } 544 if (mod == 1u) { 545 i32 d = 0; 546 if (!read_disp(bytes, len, off + used, 1, &d)) return (u32)-1; 547 used += 1; 548 out->disp = d; 549 } else if (mod == 2u) { 550 i32 d = 0; 551 if (!read_disp(bytes, len, off + used, 4, &d)) return (u32)-1; 552 used += 4; 553 out->disp = d; 554 } 555 out->base = sib_base; 556 out->bytes_used = used; 557 return used; 558 } 559 /* Non-SIB form. */ 560 if (mod == 0u && rm_low == 5u) { 561 /* RIP-relative disp32. */ 562 i32 d = 0; 563 if (!read_disp(bytes, len, off, 4, &d)) return (u32)-1; 564 out->disp = d; 565 out->rip_relative = 1; 566 out->bytes_used = 4; 567 return 4; 568 } 569 u32 base = rm_low | ((u32)ctx.rex_b << 3); 570 out->base = base; 571 if (mod == 1u) { 572 i32 d = 0; 573 if (!read_disp(bytes, len, off, 1, &d)) return (u32)-1; 574 out->disp = d; 575 out->bytes_used = 1; 576 return 1; 577 } 578 if (mod == 2u) { 579 i32 d = 0; 580 if (!read_disp(bytes, len, off, 4, &d)) return (u32)-1; 581 out->disp = d; 582 out->bytes_used = 4; 583 return 4; 584 } 585 /* mod == 0u with rm != 5,4 → [reg], no disp. */ 586 return 0; 587 } 588 589 static void put_mem(StrBuf* sb, const DecodedMem* m) { 590 if (m->disp != 0 || (!m->has_base && !m->rip_relative)) { 591 strbuf_put_i64(sb, (i64)m->disp); 592 } 593 if (m->rip_relative) { 594 strbuf_puts(sb, "(%rip)"); 595 } else if (m->has_base || m->has_index) { 596 /* `(base)`, `(base,index,scale)`, or the base-less `(,index,scale)`. */ 597 strbuf_putc(sb, '('); 598 if (m->has_base) put_reg(sb, m->base, 8); 599 if (m->has_index) { 600 strbuf_putc(sb, ','); 601 put_reg(sb, m->index, 8); 602 strbuf_putc(sb, ','); 603 strbuf_put_i64(sb, (i64)m->scale); 604 } 605 strbuf_putc(sb, ')'); 606 } 607 } 608 609 /* ==================================================================== 610 * Width derivation. 611 * ==================================================================== */ 612 613 static u32 width_for(const X64InsnDesc* d, const X64DecodeCtx* ctx) { 614 if (d->flags & X64_ASMFL_FORCE_W64) return 8u; 615 if (d->flags & X64_ASMFL_BYTE) return 1u; 616 if (d->flags & X64_ASMFL_W16) return 2u; 617 if (d->flags & X64_ASMFL_W_FROM_REX) return ctx->rex_w ? 8u : 4u; 618 if (d->leg_pfx == X64_PFX_66) return 2u; 619 return 4u; 620 } 621 622 char x64_size_suffix_for(const X64InsnDesc* desc, const X64DecodeCtx* ctx) { 623 switch ((X64Format)desc->fmt) { 624 case X64_FMT_ALU_RR: 625 case X64_FMT_MOV_RM_LOAD: 626 case X64_FMT_ALU_RM_IMM8: 627 case X64_FMT_ALU_RM_IMM32: 628 case X64_FMT_IMUL_RR: 629 case X64_FMT_IMUL_RRI: 630 case X64_FMT_F7_RM: 631 case X64_FMT_SHIFT_IMM: 632 case X64_FMT_SHIFT_CL: 633 case X64_FMT_BSWAP: 634 case X64_FMT_BS: 635 case X64_FMT_POPCNT: 636 case X64_FMT_XADD_MEM: 637 case X64_FMT_XCHG_MEM: 638 case X64_FMT_CMPXCHG_MEM: 639 case X64_FMT_MOV_RI: 640 switch (width_for(desc, ctx)) { 641 case 1: 642 return 'b'; 643 case 2: 644 return 'w'; 645 case 4: 646 return 'l'; 647 case 8: 648 return 'q'; 649 } 650 return 0; 651 default: 652 return 0; 653 } 654 } 655 656 /* ==================================================================== 657 * Per-format printers. 658 * ==================================================================== */ 659 660 /* Decode a ModR/M with reg+r/m. Returns total bytes consumed by the 661 * ModR/M + any SIB/disp. */ 662 typedef struct RegRm { 663 u32 reg; /* high bit from REX.R */ 664 u32 rm_low; /* low 3 bits */ 665 u32 mod; 666 u32 bytes_after_modrm; /* SIB/disp bytes */ 667 DecodedMem mem; /* valid iff mod != 3 */ 668 } RegRm; 669 670 static int read_modrm(const u8* bytes, u32 len, u32 off, X64DecodeCtx ctx, 671 RegRm* rr) { 672 if (off >= len) return 0; 673 u8 mr = bytes[off]; 674 rr->mod = (mr >> 6) & 3u; 675 rr->reg = ((mr >> 3) & 7u) | ((u32)ctx.rex_r << 3); 676 rr->rm_low = mr & 7u; 677 if (rr->mod == 3u) { 678 rr->bytes_after_modrm = 0; 679 memset(&rr->mem, 0, sizeof rr->mem); 680 return 1; 681 } 682 u32 used = 683 decode_mem(bytes, len, off + 1u, ctx, rr->mod, rr->rm_low, &rr->mem); 684 if (used == (u32)-1) return 0; 685 rr->bytes_after_modrm = used; 686 return 1; 687 } 688 689 /* Print a ModR/M r/m operand at width `w`. */ 690 static void put_rm(StrBuf* sb, const RegRm* rr, X64DecodeCtx ctx, u32 w) { 691 if (rr->mod == 3u) { 692 u32 rm = rr->rm_low | ((u32)ctx.rex_b << 3); 693 put_reg_ctx(sb, rm, w, ctx.has_rex); 694 } else { 695 put_mem(sb, &rr->mem); 696 } 697 } 698 static void put_rm_xmm(StrBuf* sb, const RegRm* rr, X64DecodeCtx ctx) { 699 if (rr->mod == 3u) { 700 u32 rm = rr->rm_low | ((u32)ctx.rex_b << 3); 701 put_xmm(sb, rm); 702 } else { 703 put_mem(sb, &rr->mem); 704 } 705 } 706 707 static u32 print_nullary(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 708 u32 len, const X64DecodeCtx* ctx) { 709 (void)sb; 710 (void)d; 711 (void)bytes; 712 (void)len; 713 return ctx->opc_off + d->opc_len; 714 } 715 716 static u32 print_push_pop(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 717 u32 len, const X64DecodeCtx* ctx) { 718 (void)len; 719 u32 reg = (bytes[ctx->opc_off] & 7u) | ((u32)ctx->rex_b << 3); 720 put_reg(sb, reg, 8); 721 (void)d; 722 return ctx->opc_off + 1u; 723 } 724 725 static u32 print_mov_ri(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 726 u32 len, const X64DecodeCtx* ctx) { 727 (void)d; 728 u32 reg = (bytes[ctx->opc_off] & 7u) | ((u32)ctx->rex_b << 3); 729 u32 off = ctx->opc_off + 1u; 730 if (ctx->rex_w) { 731 if (off + 8u > len) return 0; 732 put_imm(sb, (i64)rd_u64_le(bytes + off)); 733 off += 8u; 734 strbuf_puts(sb, ", "); 735 put_reg(sb, reg, 8); 736 } else { 737 if (off + 4u > len) return 0; 738 put_imm(sb, (i64)(i32)rd_u32_le(bytes + off)); 739 off += 4u; 740 strbuf_puts(sb, ", "); 741 put_reg(sb, reg, 4); 742 } 743 return off; 744 } 745 746 static u32 print_alu_rr(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 747 u32 len, const X64DecodeCtx* ctx) { 748 /* op r/m, r (reg is the source). Width comes from width_for, which 749 * honours the BYTE / W16 / W_FROM_REX flags on the descriptor. */ 750 u32 off = ctx->opc_off + d->opc_len; 751 RegRm rr; 752 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 753 u32 w = width_for(d, ctx); 754 put_reg_ctx(sb, rr.reg, w, ctx->has_rex); 755 strbuf_puts(sb, ", "); 756 put_rm(sb, &rr, *ctx, w); 757 return off + 1u + rr.bytes_after_modrm; 758 } 759 760 static u32 print_mov_rm_load(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 761 u32 len, const X64DecodeCtx* ctx) { 762 /* op r, r/m. */ 763 u32 off = ctx->opc_off + d->opc_len; 764 RegRm rr; 765 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 766 u32 w = width_for(d, ctx); 767 if (d->opc[0] == 0x8Du) w = 8u; /* LEA always loads a 64-bit address */ 768 put_rm(sb, &rr, *ctx, w); 769 strbuf_puts(sb, ", "); 770 put_reg_ctx(sb, rr.reg, w, ctx->has_rex); 771 return off + 1u + rr.bytes_after_modrm; 772 } 773 774 static u32 print_movzx_movsx(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 775 u32 len, const X64DecodeCtx* ctx) { 776 u32 off = ctx->opc_off + d->opc_len; 777 RegRm rr; 778 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 779 /* Source width = 1 for B6/BE, 2 for B7/BF. Destination width = 4 unless 780 * REX.W (then 8). */ 781 u32 src_w = (d->opc[1] == 0xB7u || d->opc[1] == 0xBFu) ? 2u : 1u; 782 u32 dst_w = ctx->rex_w ? 8u : 4u; 783 put_rm(sb, &rr, *ctx, src_w); 784 strbuf_puts(sb, ", "); 785 put_reg_ctx(sb, rr.reg, dst_w, ctx->has_rex); 786 return off + 1u + rr.bytes_after_modrm; 787 } 788 789 static u32 print_movsxd(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 790 u32 len, const X64DecodeCtx* ctx) { 791 u32 off = ctx->opc_off + d->opc_len; 792 RegRm rr; 793 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 794 put_rm(sb, &rr, *ctx, 4u); 795 strbuf_puts(sb, ", "); 796 put_reg_ctx(sb, rr.reg, 8u, ctx->has_rex); 797 return off + 1u + rr.bytes_after_modrm; 798 } 799 800 static u32 print_alu_rm_imm(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 801 u32 len, const X64DecodeCtx* ctx) { 802 u32 off = ctx->opc_off + d->opc_len; 803 RegRm rr; 804 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 805 u32 used = 1u + rr.bytes_after_modrm; 806 i64 imm = 0; 807 if (d->fmt == X64_FMT_ALU_RM_IMM8) { 808 if (off + used >= len) return 0; 809 imm = (i64)(i8)bytes[off + used]; 810 used += 1u; 811 } else { 812 if (off + used + 3u >= len) return 0; 813 imm = (i64)(i32)rd_u32_le(bytes + off + used); 814 used += 4u; 815 } 816 u32 w = width_for(d, ctx); 817 put_imm(sb, imm); 818 strbuf_puts(sb, ", "); 819 put_rm(sb, &rr, *ctx, w); 820 return off + used; 821 } 822 823 static u32 print_imul_rr(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 824 u32 len, const X64DecodeCtx* ctx) { 825 u32 off = ctx->opc_off + d->opc_len; 826 RegRm rr; 827 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 828 u32 w = width_for(d, ctx); 829 put_rm(sb, &rr, *ctx, w); 830 strbuf_puts(sb, ", "); 831 put_reg_ctx(sb, rr.reg, w, ctx->has_rex); 832 return off + 1u + rr.bytes_after_modrm; 833 } 834 835 static u32 print_imul_rri(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 836 u32 len, const X64DecodeCtx* ctx) { 837 /* 69 /r imm32 (full) or 6B /r imm8 (sign-extended). */ 838 u32 off = ctx->opc_off + d->opc_len; 839 RegRm rr; 840 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 841 u32 used = 1u + rr.bytes_after_modrm; 842 i64 imm = 0; 843 u8 op = d->opc[0]; 844 if (op == 0x6Bu) { 845 if (off + used >= len) return 0; 846 imm = (i64)(i8)bytes[off + used]; 847 used += 1u; 848 } else { 849 if (off + used + 3u >= len) return 0; 850 imm = (i64)(i32)rd_u32_le(bytes + off + used); 851 used += 4u; 852 } 853 u32 w = width_for(d, ctx); 854 put_imm(sb, imm); 855 strbuf_puts(sb, ", "); 856 put_rm(sb, &rr, *ctx, w); 857 strbuf_puts(sb, ", "); 858 put_reg_ctx(sb, rr.reg, w, ctx->has_rex); 859 return off + used; 860 } 861 862 static u32 print_f7_rm(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 863 u32 len, const X64DecodeCtx* ctx) { 864 u32 off = ctx->opc_off + d->opc_len; 865 RegRm rr; 866 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 867 u32 w = width_for(d, ctx); 868 put_rm(sb, &rr, *ctx, w); 869 return off + 1u + rr.bytes_after_modrm; 870 } 871 872 static u32 print_shift_imm(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 873 u32 len, const X64DecodeCtx* ctx) { 874 u32 off = ctx->opc_off + d->opc_len; 875 RegRm rr; 876 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 877 u32 used = 1u + rr.bytes_after_modrm; 878 if (off + used >= len) return 0; 879 u8 imm = bytes[off + used]; 880 ++used; 881 u32 w = width_for(d, ctx); 882 put_imm(sb, (i64)imm); 883 strbuf_puts(sb, ", "); 884 put_rm(sb, &rr, *ctx, w); 885 return off + used; 886 } 887 888 static u32 print_shift_cl(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 889 u32 len, const X64DecodeCtx* ctx) { 890 u32 off = ctx->opc_off + d->opc_len; 891 RegRm rr; 892 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 893 u32 w = width_for(d, ctx); 894 strbuf_puts(sb, "%cl, "); 895 put_rm(sb, &rr, *ctx, w); 896 return off + 1u + rr.bytes_after_modrm; 897 } 898 899 static u32 print_jcc_rel32(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 900 u32 len, const X64DecodeCtx* ctx, u64 vaddr) { 901 u32 off = ctx->opc_off + d->opc_len; 902 if (off + 4u > len) return 0; 903 i32 rel = (i32)rd_u32_le(bytes + off); 904 u64 tgt = vaddr + (u64)(off + 4u) + (u64)rel; 905 /* Mnemonic suffix from condition nibble: caller wrote "j"; we append. */ 906 strbuf_putc(sb, ' '); 907 strbuf_put_hex_u64(sb, tgt); 908 return off + 4u; 909 } 910 911 static u32 print_jmp_call_rel32(StrBuf* sb, const X64InsnDesc* d, 912 const u8* bytes, u32 len, 913 const X64DecodeCtx* ctx, u64 vaddr) { 914 u32 off = ctx->opc_off + d->opc_len; 915 if (off + 4u > len) return 0; 916 i32 rel = (i32)rd_u32_le(bytes + off); 917 u64 tgt = vaddr + (u64)(off + 4u) + (u64)rel; 918 strbuf_put_hex_u64(sb, tgt); 919 return off + 4u; 920 } 921 922 static u32 print_br_rm(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 923 u32 len, const X64DecodeCtx* ctx) { 924 u32 off = ctx->opc_off + d->opc_len; 925 RegRm rr; 926 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 927 strbuf_putc(sb, '*'); 928 put_rm(sb, &rr, *ctx, 8u); 929 return off + 1u + rr.bytes_after_modrm; 930 } 931 932 static u32 print_setcc(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 933 u32 len, const X64DecodeCtx* ctx) { 934 u32 off = ctx->opc_off + d->opc_len; 935 RegRm rr; 936 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 937 put_rm(sb, &rr, *ctx, 1u); 938 (void)d; 939 return off + 1u + rr.bytes_after_modrm; 940 } 941 942 static u32 print_cmovcc_rr(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 943 u32 len, const X64DecodeCtx* ctx) { 944 u32 off = ctx->opc_off + d->opc_len; 945 RegRm rr; 946 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 947 u32 w = width_for(d, ctx); 948 put_rm(sb, &rr, *ctx, w); 949 strbuf_puts(sb, ", "); 950 put_reg_ctx(sb, rr.reg, w, ctx->has_rex); 951 return off + 1u + rr.bytes_after_modrm; 952 } 953 954 static u32 print_bswap(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 955 u32 len, const X64DecodeCtx* ctx) { 956 (void)d; 957 (void)len; 958 u32 reg = (bytes[ctx->opc_off + 1u] & 7u) | ((u32)ctx->rex_b << 3); 959 u32 w = ctx->rex_w ? 8u : 4u; 960 put_reg_ctx(sb, reg, w, ctx->has_rex); 961 return ctx->opc_off + 2u; 962 } 963 964 static u32 print_bs(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, u32 len, 965 const X64DecodeCtx* ctx) { 966 /* dst = bsr/bsf(src). Operand order in AT&T is "src, dst". */ 967 u32 off = ctx->opc_off + d->opc_len; 968 RegRm rr; 969 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 970 u32 w = width_for(d, ctx); 971 put_rm(sb, &rr, *ctx, w); 972 strbuf_puts(sb, ", "); 973 put_reg_ctx(sb, rr.reg, w, ctx->has_rex); 974 return off + 1u + rr.bytes_after_modrm; 975 } 976 977 static u32 print_xmm_rr(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 978 u32 len, const X64DecodeCtx* ctx) { 979 u32 off = ctx->opc_off + d->opc_len; 980 RegRm rr; 981 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 982 /* Operand classes/order by opcode (AT&T src, dst): 983 * 2A CVTSI2* : rm=GP(src), reg=xmm(dst) -> "rm_gp, reg_xmm" 984 * 6E MOVD/Q : rm=GP(src), reg=xmm(dst) -> "rm_gp, reg_xmm" (gpr->xmm) 985 * 2C CVTT*2SI : rm=xmm(src), reg=GP(dst) -> "rm_xmm, reg_gp" 986 * 7E MOVD/Q : reg=xmm(src), rm=GP(dst) -> "reg_xmm, rm_gp" (reversed!) 987 * others : both xmm -> "rm_xmm, reg_xmm" 988 * GP width comes from REX.W (movd vs movq / 32- vs 64-bit operands). */ 989 u8 op = d->opc[1]; 990 u32 gp_w = ctx->rex_w ? 8u : 4u; 991 if (op == 0x7Eu) { 992 /* xmm -> r/m GPR: source is the reg-field xmm, dest is the r/m GPR. */ 993 put_xmm(sb, rr.reg); 994 strbuf_puts(sb, ", "); 995 put_rm(sb, &rr, *ctx, gp_w); 996 return off + 1u + rr.bytes_after_modrm; 997 } 998 /* Store-direction XMM moves (MOVSD/MOVSS/MOVUPS 0x11, MOVAPS 0x29): the 999 * reg-field xmm is the SOURCE and the r/m (memory or xmm) is the 1000 * DESTINATION — AT&T order `reg_xmm, rm`. Without this the disassembler 1001 * prints them in load order, so re-assembly flips the data direction. */ 1002 if (op == 0x11u || op == 0x29u) { 1003 put_xmm(sb, rr.reg); 1004 strbuf_puts(sb, ", "); 1005 put_rm_xmm(sb, &rr, *ctx); 1006 return off + 1u + rr.bytes_after_modrm; 1007 } 1008 { 1009 int dst_is_gp = (op == 0x2Cu); /* CVTTSD/SS2SI */ 1010 int src_is_gp = (op == 0x2Au || op == 0x6Eu); /* CVTSI2*, MOVD/Q g->x */ 1011 if (src_is_gp) { 1012 put_rm(sb, &rr, *ctx, gp_w); 1013 } else { 1014 put_rm_xmm(sb, &rr, *ctx); 1015 } 1016 strbuf_puts(sb, ", "); 1017 if (dst_is_gp) { 1018 put_reg_ctx(sb, rr.reg, gp_w, ctx->has_rex); 1019 } else { 1020 put_xmm(sb, rr.reg); 1021 } 1022 } 1023 return off + 1u + rr.bytes_after_modrm; 1024 } 1025 1026 static u32 print_xadd_mem(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 1027 u32 len, const X64DecodeCtx* ctx) { 1028 /* XADD r/m, r — source is the reg, destination is r/m. */ 1029 u32 off = ctx->opc_off + d->opc_len; 1030 RegRm rr; 1031 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 1032 u32 w = width_for(d, ctx); 1033 put_reg_ctx(sb, rr.reg, w, ctx->has_rex); 1034 strbuf_puts(sb, ", "); 1035 put_rm(sb, &rr, *ctx, w); 1036 return off + 1u + rr.bytes_after_modrm; 1037 } 1038 1039 static u32 print_xchg_mem(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 1040 u32 len, const X64DecodeCtx* ctx) { 1041 u32 off = ctx->opc_off + d->opc_len; 1042 RegRm rr; 1043 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 1044 u32 w = width_for(d, ctx); 1045 put_reg_ctx(sb, rr.reg, w, ctx->has_rex); 1046 strbuf_puts(sb, ", "); 1047 put_rm(sb, &rr, *ctx, w); 1048 return off + 1u + rr.bytes_after_modrm; 1049 } 1050 1051 static u32 print_cmpxchg_mem(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 1052 u32 len, const X64DecodeCtx* ctx) { 1053 /* CMPXCHG r/m, r — implicit RAX is the comparand; not shown. */ 1054 u32 off = ctx->opc_off + d->opc_len; 1055 RegRm rr; 1056 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 1057 u32 w = width_for(d, ctx); 1058 put_reg_ctx(sb, rr.reg, w, ctx->has_rex); 1059 strbuf_puts(sb, ", "); 1060 put_rm(sb, &rr, *ctx, w); 1061 return off + 1u + rr.bytes_after_modrm; 1062 } 1063 1064 static u32 print_nop_multi(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 1065 u32 len, const X64DecodeCtx* ctx) { 1066 (void)sb; 1067 u32 off = ctx->opc_off + d->opc_len; 1068 RegRm rr; 1069 if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; 1070 return off + 1u + rr.bytes_after_modrm; 1071 } 1072 1073 /* ==================================================================== 1074 * Dispatch. 1075 * ==================================================================== */ 1076 1077 u32 x64_print_operands(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, 1078 u32 len, const X64DecodeCtx* ctx, u64 vaddr) { 1079 switch ((X64Format)d->fmt) { 1080 case X64_FMT_NULLARY: 1081 return print_nullary(sb, d, bytes, len, ctx); 1082 case X64_FMT_NOP_MULTI: 1083 return print_nop_multi(sb, d, bytes, len, ctx); 1084 case X64_FMT_PUSH_POP: 1085 return print_push_pop(sb, d, bytes, len, ctx); 1086 case X64_FMT_MOV_RI: 1087 return print_mov_ri(sb, d, bytes, len, ctx); 1088 case X64_FMT_ALU_RR: 1089 return print_alu_rr(sb, d, bytes, len, ctx); 1090 case X64_FMT_MOV_RM_LOAD: 1091 return print_mov_rm_load(sb, d, bytes, len, ctx); 1092 case X64_FMT_MOVZX_MOVSX: 1093 return print_movzx_movsx(sb, d, bytes, len, ctx); 1094 case X64_FMT_MOVSXD: 1095 return print_movsxd(sb, d, bytes, len, ctx); 1096 case X64_FMT_ALU_RM_IMM8: 1097 case X64_FMT_ALU_RM_IMM32: 1098 return print_alu_rm_imm(sb, d, bytes, len, ctx); 1099 case X64_FMT_IMUL_RR: 1100 return print_imul_rr(sb, d, bytes, len, ctx); 1101 case X64_FMT_IMUL_RRI: 1102 return print_imul_rri(sb, d, bytes, len, ctx); 1103 case X64_FMT_F7_RM: 1104 return print_f7_rm(sb, d, bytes, len, ctx); 1105 case X64_FMT_SHIFT_IMM: 1106 return print_shift_imm(sb, d, bytes, len, ctx); 1107 case X64_FMT_SHIFT_CL: 1108 return print_shift_cl(sb, d, bytes, len, ctx); 1109 case X64_FMT_JCC_REL32: 1110 return print_jcc_rel32(sb, d, bytes, len, ctx, vaddr); 1111 case X64_FMT_JMP_REL32: 1112 case X64_FMT_CALL_REL32: 1113 return print_jmp_call_rel32(sb, d, bytes, len, ctx, vaddr); 1114 case X64_FMT_BR_RM: 1115 return print_br_rm(sb, d, bytes, len, ctx); 1116 case X64_FMT_SETCC_RM: 1117 return print_setcc(sb, d, bytes, len, ctx); 1118 case X64_FMT_CMOVCC_RR: 1119 return print_cmovcc_rr(sb, d, bytes, len, ctx); 1120 case X64_FMT_BSWAP: 1121 return print_bswap(sb, d, bytes, len, ctx); 1122 case X64_FMT_BS: 1123 return print_bs(sb, d, bytes, len, ctx); 1124 case X64_FMT_POPCNT: 1125 return print_bs(sb, d, bytes, len, ctx); /* same shape */ 1126 case X64_FMT_SSE_RR: 1127 case X64_FMT_SSE_LOAD: 1128 case X64_FMT_SSE_STORE: 1129 return print_xmm_rr(sb, d, bytes, len, ctx); 1130 case X64_FMT_XADD_MEM: 1131 return print_xadd_mem(sb, d, bytes, len, ctx); 1132 case X64_FMT_XCHG_MEM: 1133 return print_xchg_mem(sb, d, bytes, len, ctx); 1134 case X64_FMT_CMPXCHG_MEM: 1135 return print_cmpxchg_mem(sb, d, bytes, len, ctx); 1136 case X64_FMT_RAW_BYTE: 1137 return 0; 1138 } 1139 return 0; 1140 } 1141 1142 /* Resolve the condition nibble for Jcc/SETcc/CMOVcc to its AT&T mnemonic 1143 * suffix. Used by the disassembler to spell j → "je", set → "sete", etc. */ 1144 const char* x64_cc_name(u8 cc) { return g_cc_name[cc & 0xFu]; }