commit 85b11149946e7dc2f7f1a039aa3e4c389be4c962
parent 898aaa7475961d5f8545fffbcc3aba525a4f181b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 13:32:45 -0700
x64 disasm: decode movd/movq (66 0F 6E/7E) and xorps/xorpd (0F 57)
The x64 backend emits these for int<->FP bitcasts and FP negation, but they
were absent from x64_insn_table, so the disassembler fell back to a 1-byte
.byte and desynced the entire following instruction stream (a 5-byte
movq %rax,%xmm15 became five .byte lines, then the next opcode mis-decoded).
This corrupted objdump output and JIT-debugger disassembly for any x64
function touching float/double.
Add table rows split on REX.W (W_REQ_0 -> movd, W_REQ_1 -> movq) so the
mnemonic tracks operand width, plus xorps (no prefix) / xorpd (66). Teach
print_xmm_rr the 66 0F 7E direction (xmm in reg field is the source, GPR r/m
is the dest -> reversed AT&T order); 6E reuses the existing gpr-source path.
Verified byte-for-byte against llvm-objdump for all six forms, and a real
double program no longer produces any .inst/.byte desync. Adds an x64 decode
corpus case (runs under the x64 decode lane).
Diffstat:
4 files changed, 55 insertions(+), 13 deletions(-)
diff --git a/src/arch/x64/isa.c b/src/arch/x64/isa.c
@@ -300,6 +300,25 @@ const X64InsnDesc x64_insn_table[] = {
X64_FMT_SSE_RR, 0),
ROW("cvtss2sd", X64_PFX_F3, 2, 0x0F, 0x5A, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY,
X64_FMT_SSE_RR, 0),
+ /* MOVD/MOVQ between GPR and XMM. 66 0F 6E /r is gpr->xmm, 66 0F 7E /r is
+ * xmm->gpr (note the reversed operand order, handled in print_xmm_rr).
+ * REX.W picks movq (64-bit GPR) vs movd (32-bit), and since the *mnemonic*
+ * itself changes we split into W_REQ_0 / W_REQ_1 rows rather than a width
+ * suffix. The backend emits these for int<->FP bitcasts (emit_sse_rr_w). */
+ ROW("movd", X64_PFX_66, 2, 0x0F, 0x6E, 0, 0xFF, NO_MODRM, X64_W_REQ_0,
+ X64_FMT_SSE_RR, 0),
+ ROW("movq", X64_PFX_66, 2, 0x0F, 0x6E, 0, 0xFF, NO_MODRM, X64_W_REQ_1,
+ X64_FMT_SSE_RR, 0),
+ ROW("movd", X64_PFX_66, 2, 0x0F, 0x7E, 0, 0xFF, NO_MODRM, X64_W_REQ_0,
+ X64_FMT_SSE_RR, 0),
+ ROW("movq", X64_PFX_66, 2, 0x0F, 0x7E, 0, 0xFF, NO_MODRM, X64_W_REQ_1,
+ X64_FMT_SSE_RR, 0),
+ /* XORPS / XORPD (0F 57, prefix selects packed-single vs -double). The
+ * backend uses these to clear/negate FP registers. Both operands xmm. */
+ ROW("xorps", X64_PFX_NONE, 2, 0x0F, 0x57, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY,
+ X64_FMT_SSE_RR, 0),
+ ROW("xorpd", X64_PFX_66, 2, 0x0F, 0x57, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY,
+ X64_FMT_SSE_RR, 0),
};
const u32 x64_insn_table_n =
@@ -927,23 +946,36 @@ static u32 print_xmm_rr(StrBuf* sb, const X64InsnDesc* d, const u8* bytes,
u32 off = ctx->opc_off + d->opc_len;
RegRm rr;
if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0;
- /* CVTSI2*: source is GP reg (size from REX.W), dst is xmm.
- * CVTT*2SI: source is xmm, dst is GP reg.
- * Other SSE arith / mov / cmp: both xmm. */
+ /* Operand classes/order by opcode (AT&T src, dst):
+ * 2A CVTSI2* : rm=GP(src), reg=xmm(dst) -> "rm_gp, reg_xmm"
+ * 6E MOVD/Q : rm=GP(src), reg=xmm(dst) -> "rm_gp, reg_xmm" (gpr->xmm)
+ * 2C CVTT*2SI : rm=xmm(src), reg=GP(dst) -> "rm_xmm, reg_gp"
+ * 7E MOVD/Q : reg=xmm(src), rm=GP(dst) -> "reg_xmm, rm_gp" (reversed!)
+ * others : both xmm -> "rm_xmm, reg_xmm"
+ * GP width comes from REX.W (movd vs movq / 32- vs 64-bit operands). */
u8 op = d->opc[1];
- int dst_is_gp = (op == 0x2Cu); /* CVTTSD/SS2SI */
- int src_is_gp = (op == 0x2Au); /* CVTSI2SD/SS */
u32 gp_w = ctx->rex_w ? 8u : 4u;
- if (src_is_gp) {
+ if (op == 0x7Eu) {
+ /* xmm -> r/m GPR: source is the reg-field xmm, dest is the r/m GPR. */
+ put_xmm(sb, rr.reg);
+ strbuf_puts(sb, ", ");
put_rm(sb, &rr, *ctx, gp_w);
- } else {
- put_rm_xmm(sb, &rr, *ctx);
+ return off + 1u + rr.bytes_after_modrm;
}
- strbuf_puts(sb, ", ");
- if (dst_is_gp) {
- put_reg_ctx(sb, rr.reg, gp_w, ctx->has_rex);
- } else {
- put_xmm(sb, rr.reg);
+ {
+ int dst_is_gp = (op == 0x2Cu); /* CVTTSD/SS2SI */
+ int src_is_gp = (op == 0x2Au || op == 0x6Eu); /* CVTSI2*, MOVD/Q g->x */
+ if (src_is_gp) {
+ put_rm(sb, &rr, *ctx, gp_w);
+ } else {
+ put_rm_xmm(sb, &rr, *ctx);
+ }
+ strbuf_puts(sb, ", ");
+ if (dst_is_gp) {
+ put_reg_ctx(sb, rr.reg, gp_w, ctx->has_rex);
+ } else {
+ put_xmm(sb, rr.reg);
+ }
}
return off + 1u + rr.bytes_after_modrm;
}
diff --git a/test/asm/decode/x64_sse_movd_movq.expected.txt b/test/asm/decode/x64_sse_movd_movq.expected.txt
@@ -0,0 +1,6 @@
+0: movq %xmm0, %rax
+5: movd %xmm1, %ecx
+9: movq %rdx, %xmm2
+e: movd %esi, %xmm3
+12: xorpd %xmm4, %xmm5
+16: xorps %xmm6, %xmm7
diff --git a/test/asm/decode/x64_sse_movd_movq.hex b/test/asm/decode/x64_sse_movd_movq.hex
@@ -0,0 +1 @@
+66480f7ec0660f7ec966480f6ed2660f6ede660f57ec0f57fe
+\ No newline at end of file
diff --git a/test/asm/decode/x64_sse_movd_movq.targets b/test/asm/decode/x64_sse_movd_movq.targets
@@ -0,0 +1 @@
+x64
+\ No newline at end of file