kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 85b11149946e7dc2f7f1a039aa3e4c389be4c962
parent 898aaa7475961d5f8545fffbcc3aba525a4f181b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 13:32:45 -0700

x64 disasm: decode movd/movq (66 0F 6E/7E) and xorps/xorpd (0F 57)

The x64 backend emits these for int<->FP bitcasts and FP negation, but they
were absent from x64_insn_table, so the disassembler fell back to a 1-byte
.byte and desynced the entire following instruction stream (a 5-byte
movq %rax,%xmm15 became five .byte lines, then the next opcode mis-decoded).
This corrupted objdump output and JIT-debugger disassembly for any x64
function touching float/double.

Add table rows split on REX.W (W_REQ_0 -> movd, W_REQ_1 -> movq) so the
mnemonic tracks operand width, plus xorps (no prefix) / xorpd (66). Teach
print_xmm_rr the 66 0F 7E direction (xmm in reg field is the source, GPR r/m
is the dest -> reversed AT&T order); 6E reuses the existing gpr-source path.

Verified byte-for-byte against llvm-objdump for all six forms, and a real
double program no longer produces any .inst/.byte desync. Adds an x64 decode
corpus case (runs under the x64 decode lane).

Diffstat:
Msrc/arch/x64/isa.c | 58+++++++++++++++++++++++++++++++++++++++++++++-------------
Atest/asm/decode/x64_sse_movd_movq.expected.txt | 6++++++
Atest/asm/decode/x64_sse_movd_movq.hex | 2++
Atest/asm/decode/x64_sse_movd_movq.targets | 2++
4 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/src/arch/x64/isa.c b/src/arch/x64/isa.c @@ -300,6 +300,25 @@ const X64InsnDesc x64_insn_table[] = { X64_FMT_SSE_RR, 0), ROW("cvtss2sd", X64_PFX_F3, 2, 0x0F, 0x5A, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, X64_FMT_SSE_RR, 0), + /* MOVD/MOVQ between GPR and XMM. 66 0F 6E /r is gpr->xmm, 66 0F 7E /r is + * xmm->gpr (note the reversed operand order, handled in print_xmm_rr). + * REX.W picks movq (64-bit GPR) vs movd (32-bit), and since the *mnemonic* + * itself changes we split into W_REQ_0 / W_REQ_1 rows rather than a width + * suffix. The backend emits these for int<->FP bitcasts (emit_sse_rr_w). */ + ROW("movd", X64_PFX_66, 2, 0x0F, 0x6E, 0, 0xFF, NO_MODRM, X64_W_REQ_0, + X64_FMT_SSE_RR, 0), + ROW("movq", X64_PFX_66, 2, 0x0F, 0x6E, 0, 0xFF, NO_MODRM, X64_W_REQ_1, + X64_FMT_SSE_RR, 0), + ROW("movd", X64_PFX_66, 2, 0x0F, 0x7E, 0, 0xFF, NO_MODRM, X64_W_REQ_0, + X64_FMT_SSE_RR, 0), + ROW("movq", X64_PFX_66, 2, 0x0F, 0x7E, 0, 0xFF, NO_MODRM, X64_W_REQ_1, + X64_FMT_SSE_RR, 0), + /* XORPS / XORPD (0F 57, prefix selects packed-single vs -double). The + * backend uses these to clear/negate FP registers. Both operands xmm. */ + ROW("xorps", X64_PFX_NONE, 2, 0x0F, 0x57, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, + X64_FMT_SSE_RR, 0), + ROW("xorpd", X64_PFX_66, 2, 0x0F, 0x57, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY, + X64_FMT_SSE_RR, 0), }; const u32 x64_insn_table_n = @@ -927,23 +946,36 @@ static u32 print_xmm_rr(StrBuf* sb, const X64InsnDesc* d, const u8* bytes, u32 off = ctx->opc_off + d->opc_len; RegRm rr; if (!read_modrm(bytes, len, off, *ctx, &rr)) return 0; - /* CVTSI2*: source is GP reg (size from REX.W), dst is xmm. - * CVTT*2SI: source is xmm, dst is GP reg. - * Other SSE arith / mov / cmp: both xmm. */ + /* Operand classes/order by opcode (AT&T src, dst): + * 2A CVTSI2* : rm=GP(src), reg=xmm(dst) -> "rm_gp, reg_xmm" + * 6E MOVD/Q : rm=GP(src), reg=xmm(dst) -> "rm_gp, reg_xmm" (gpr->xmm) + * 2C CVTT*2SI : rm=xmm(src), reg=GP(dst) -> "rm_xmm, reg_gp" + * 7E MOVD/Q : reg=xmm(src), rm=GP(dst) -> "reg_xmm, rm_gp" (reversed!) + * others : both xmm -> "rm_xmm, reg_xmm" + * GP width comes from REX.W (movd vs movq / 32- vs 64-bit operands). */ u8 op = d->opc[1]; - int dst_is_gp = (op == 0x2Cu); /* CVTTSD/SS2SI */ - int src_is_gp = (op == 0x2Au); /* CVTSI2SD/SS */ u32 gp_w = ctx->rex_w ? 8u : 4u; - if (src_is_gp) { + if (op == 0x7Eu) { + /* xmm -> r/m GPR: source is the reg-field xmm, dest is the r/m GPR. */ + put_xmm(sb, rr.reg); + strbuf_puts(sb, ", "); put_rm(sb, &rr, *ctx, gp_w); - } else { - put_rm_xmm(sb, &rr, *ctx); + return off + 1u + rr.bytes_after_modrm; } - strbuf_puts(sb, ", "); - if (dst_is_gp) { - put_reg_ctx(sb, rr.reg, gp_w, ctx->has_rex); - } else { - put_xmm(sb, rr.reg); + { + int dst_is_gp = (op == 0x2Cu); /* CVTTSD/SS2SI */ + int src_is_gp = (op == 0x2Au || op == 0x6Eu); /* CVTSI2*, MOVD/Q g->x */ + if (src_is_gp) { + put_rm(sb, &rr, *ctx, gp_w); + } else { + put_rm_xmm(sb, &rr, *ctx); + } + strbuf_puts(sb, ", "); + if (dst_is_gp) { + put_reg_ctx(sb, rr.reg, gp_w, ctx->has_rex); + } else { + put_xmm(sb, rr.reg); + } } return off + 1u + rr.bytes_after_modrm; } diff --git a/test/asm/decode/x64_sse_movd_movq.expected.txt b/test/asm/decode/x64_sse_movd_movq.expected.txt @@ -0,0 +1,6 @@ +0: movq %xmm0, %rax +5: movd %xmm1, %ecx +9: movq %rdx, %xmm2 +e: movd %esi, %xmm3 +12: xorpd %xmm4, %xmm5 +16: xorps %xmm6, %xmm7 diff --git a/test/asm/decode/x64_sse_movd_movq.hex b/test/asm/decode/x64_sse_movd_movq.hex @@ -0,0 +1 @@ +66480f7ec0660f7ec966480f6ed2660f6ede660f57ec0f57fe +\ No newline at end of file diff --git a/test/asm/decode/x64_sse_movd_movq.targets b/test/asm/decode/x64_sse_movd_movq.targets @@ -0,0 +1 @@ +x64 +\ No newline at end of file