emit.c (22546B)
1 /* arch/x64/emit.c — byte-level emit helpers, function prologue/epilogue. 2 * 3 * Covers: REX, ModR/M, SIB, all emit_* primitives, x_func_begin, 4 * x_func_end, and the per-ABI int_args tables exposed via X64ABIRegs. */ 5 6 #include "arch/x64/emit.h" 7 8 #include <string.h> 9 10 #include "arch/mc.h" 11 #include "arch/x64/isa.h" 12 #include "core/slice.h" 13 #include "obj/obj.h" 14 15 /* ============================================================ 16 * Shared constant tables. */ 17 18 static const u32 g_int_arg_regs_sysv[6] = {X64_RDI, X64_RSI, X64_RDX, 19 X64_RCX, X64_R8, X64_R9}; 20 static const u32 g_int_arg_regs_win64[4] = {X64_RCX, X64_RDX, X64_R8, X64_R9}; 21 22 static const X64ABIRegs g_x64_abi_sysv = { 23 .int_args = g_int_arg_regs_sysv, 24 .n_int_args = 6, 25 .n_fp_args = 8, 26 .slot_shared_int_fp = 0, 27 .shadow_space = 0, 28 .emit_sysv_vararg_save = 1, 29 .vararg_fp_dup_to_gpr = 0, 30 .cs_int_mask = (1ull << X64_RBX) | (1ull << X64_RBP) | (1ull << X64_R12) | 31 (1ull << X64_R13) | (1ull << X64_R14) | (1ull << X64_R15), 32 .cs_fp_mask = 0, 33 }; 34 35 static const X64ABIRegs g_x64_abi_win64 = { 36 .int_args = g_int_arg_regs_win64, 37 .n_int_args = 4, 38 .n_fp_args = 4, 39 .slot_shared_int_fp = 1, 40 .shadow_space = X64_WIN64_SHADOW_SPACE, 41 .emit_sysv_vararg_save = 0, 42 .vararg_fp_dup_to_gpr = 1, 43 .cs_int_mask = (1ull << X64_RBX) | (1ull << X64_RBP) | (1ull << X64_R12) | 44 (1ull << X64_R13) | (1ull << X64_R14) | (1ull << X64_R15) | 45 (1ull << X64_RDI) | (1ull << X64_RSI), 46 .cs_fp_mask = (1ull << X64_XMM6) | (1ull << X64_XMM7) | (1ull << X64_XMM8) | 47 (1ull << (X64_XMM0 + 9)) | (1ull << (X64_XMM0 + 10)) | 48 (1ull << (X64_XMM0 + 11)) | (1ull << (X64_XMM0 + 12)) | 49 (1ull << (X64_XMM0 + 13)) | (1ull << (X64_XMM0 + 14)) | 50 (1ull << X64_XMM15), 51 }; 52 53 const X64ABIRegs* x64_abi_for_os(KitOSKind os) { 54 return (os == KIT_OS_WINDOWS) ? &g_x64_abi_win64 : &g_x64_abi_sysv; 55 } 56 57 /* ============================================================ 58 * Byte-level emit helpers. 59 * 60 * x64 instructions are variable length: optional legacy prefix(es), 61 * optional REX, 1-3 byte opcode, ModR/M, optional SIB, optional 62 * displacement, optional immediate. Helpers below build sequences 63 * into the active MCEmitter section, recording one Debug row per 64 * instruction-start. */ 65 void emit1(MCEmitter* mc, u8 b) { 66 u32 ofs = obj_pos(mc->obj, mc->section_id); 67 mc->emit_bytes(mc, &b, 1); 68 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 69 } 70 void emit_u32le(MCEmitter* mc, u32 v) { 71 u8 b[4]; 72 b[0] = (u8)v; 73 b[1] = (u8)(v >> 8); 74 b[2] = (u8)(v >> 16); 75 b[3] = (u8)(v >> 24); 76 mc->emit_bytes(mc, b, 4); 77 } 78 static u8 make_rex(int w, u32 reg, u32 index, u32 rm) { 79 u8 r = 0; 80 if (w) r |= X64_REX_W; 81 if (reg & 8) r |= X64_REX_R; 82 if (index & 8) r |= X64_REX_X; 83 if (rm & 8) r |= X64_REX_B; 84 return r ? (u8)(X64_REX_BASE | r) : 0; 85 } 86 void emit_rex(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm) { 87 u8 r = make_rex(w, reg, index, rm); 88 if (r) mc->emit_bytes(mc, &r, 1); 89 } 90 /* Force REX (even REX=0x40) — required for byte-reg encodings that 91 * promote SIL/DIL/etc. */ 92 void emit_rex_force(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm) { 93 u8 r = (u8)(X64_REX_BASE | (w ? X64_REX_W : 0) | ((reg & 8) ? X64_REX_R : 0) | 94 ((index & 8) ? X64_REX_X : 0) | ((rm & 8) ? X64_REX_B : 0)); 95 mc->emit_bytes(mc, &r, 1); 96 } 97 98 u8 modrm(u32 mod, u32 reg, u32 rm) { 99 return (u8)(((mod & 3u) << 6) | ((reg & 7u) << 3) | (rm & 7u)); 100 } 101 u8 sib(u32 scale, u32 index, u32 base) { 102 return (u8)(((scale & 3u) << 6) | ((index & 7u) << 3) | (base & 7u)); 103 } 104 105 static u32 disp_mod(u32 base, i32 disp) { 106 if (disp == 0 && (base & 7u) != 5u) return 0u; /* [base] */ 107 if (disp >= -128 && disp <= 127) return 1u; /* [base + disp8] */ 108 return 2u; /* [base + disp32] */ 109 } 110 111 void emit_mem_operand(MCEmitter* mc, u32 reg, u32 base, i32 disp) { 112 u32 m = disp_mod(base, disp); 113 if ((base & 7u) == 4u) { 114 /* SIB byte required: index=4 (none), base=base. */ 115 u8 mr = modrm(m, reg, 4u); 116 mc->emit_bytes(mc, &mr, 1); 117 u8 s = sib(0, 4u, base); 118 mc->emit_bytes(mc, &s, 1); 119 } else { 120 u8 mr = modrm(m, reg, base); 121 mc->emit_bytes(mc, &mr, 1); 122 } 123 if (m == 1u) { 124 u8 d = (u8)(i8)disp; 125 mc->emit_bytes(mc, &d, 1); 126 } else if (m == 2u) { 127 emit_u32le(mc, (u32)disp); 128 } 129 } 130 void emit_rm_reg(MCEmitter* mc, u32 reg, u32 rm) { 131 u8 mr = modrm(3u, reg, rm); 132 mc->emit_bytes(mc, &mr, 1); 133 } 134 135 /* ---- specific instruction emitters ---- */ 136 137 /* mov rd, rs (64-bit if w, else 32-bit). */ 138 void emit_mov_rr(MCEmitter* mc, int w, u32 dst, u32 src) { 139 u32 ofs = obj_pos(mc->obj, mc->section_id); 140 u8 buf[16]; 141 u32 n = x64_alu_rr_pack( 142 (X64AluRR){.w = w, .op = X64_OPC_MOV_RM_R, .dst = dst, .src = src}, buf); 143 mc->emit_bytes(mc, buf, n); 144 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 145 } 146 147 /* mov reg, [base + disp]; size 1/2/4/8. */ 148 void emit_mov_load(MCEmitter* mc, u32 size, int signed_ext, u32 dst, u32 base, 149 i32 disp) { 150 u32 ofs = obj_pos(mc->obj, mc->section_id); 151 u8 buf[16]; 152 u32 n = 0; 153 if (size == 8) { 154 n = x64_mov_rm_load_pack((X64MovRMLoad){.w = 1, 155 .opc0 = X64_OPC_MOV_R_RM, 156 .dst = dst, 157 .base = base, 158 .disp = disp}, 159 buf); 160 } else if (size == 4) { 161 n = x64_mov_rm_load_pack((X64MovRMLoad){.w = 0, 162 .opc0 = X64_OPC_MOV_R_RM, 163 .dst = dst, 164 .base = base, 165 .disp = disp}, 166 buf); 167 } else if (size == 2) { 168 n = x64_mov_rm_load_pack( 169 (X64MovRMLoad){.w = 0, 170 .opc1 = signed_ext ? X64_OPC_MOVSX_W : X64_OPC_MOVZX_W, 171 .dst = dst, 172 .base = base, 173 .disp = disp}, 174 buf); 175 } else if (size == 1) { 176 n = x64_mov_rm_load_pack( 177 (X64MovRMLoad){.w = 0, 178 .opc1 = signed_ext ? X64_OPC_MOVSX_B : X64_OPC_MOVZX_B, 179 .dst = dst, 180 .base = base, 181 .disp = disp}, 182 buf); 183 } 184 if (n) mc->emit_bytes(mc, buf, n); 185 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 186 } 187 188 /* mov [base + disp], src; size 1/2/4/8. */ 189 void emit_mov_store(MCEmitter* mc, u32 size, u32 src, u32 base, i32 disp) { 190 u32 ofs = obj_pos(mc->obj, mc->section_id); 191 u8 buf[16]; 192 u32 n = 0; 193 if (size == 8) { 194 n = x64_alu_rm_pack((X64AluRM){.w = 1, 195 .op = X64_OPC_MOV_RM_R, 196 .src = src, 197 .base = base, 198 .disp = disp}, 199 buf); 200 } else if (size == 4) { 201 n = x64_alu_rm_pack((X64AluRM){.w = 0, 202 .op = X64_OPC_MOV_RM_R, 203 .src = src, 204 .base = base, 205 .disp = disp}, 206 buf); 207 } else if (size == 2) { 208 n = x64_alu_rm_pack((X64AluRM){.prefix = X64_OPSIZE_PFX, 209 .w = 0, 210 .op = X64_OPC_MOV_RM_R, 211 .src = src, 212 .base = base, 213 .disp = disp}, 214 buf); 215 } else if (size == 1) { 216 /* Force REX so SIL/DIL/etc are addressable as byte regs. */ 217 n = x64_alu_rm_pack((X64AluRM){.w = 0, 218 .op = X64_OPC_MOV_RM_R8, 219 .force_rex = 1, 220 .src = src, 221 .base = base, 222 .disp = disp}, 223 buf); 224 } 225 if (n) mc->emit_bytes(mc, buf, n); 226 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 227 } 228 229 void emit_lea(MCEmitter* mc, u32 dst, u32 base, i32 disp) { 230 u32 ofs = obj_pos(mc->obj, mc->section_id); 231 u8 buf[16]; 232 u32 n = x64_mov_rm_load_pack( 233 (X64MovRMLoad){ 234 .w = 1, .opc0 = X64_OPC_LEA, .dst = dst, .base = base, .disp = disp}, 235 buf); 236 mc->emit_bytes(mc, buf, n); 237 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 238 } 239 240 /* Common low-level emit for `[base + index<<log2_scale + disp]` GPR/SSE 241 * memory operands. Builds REX with index, opcode(s), and SIB by hand 242 * (the existing `*_pack` helpers route through `x64_pack_mem`, which 243 * forces SIB index = 4 (none)). */ 244 static void emit_mem_idx_op(MCEmitter* mc, u8 prefix, int w, int force_rex, 245 u8 opc0, u8 opc1, u32 reg, u32 base, u32 index, 246 u32 log2_scale, i32 disp) { 247 u8 buf[16]; 248 u32 n = 0; 249 if (prefix) buf[n++] = prefix; 250 if (force_rex) 251 n += x64_pack_rex_force(buf + n, w, reg, index, base); 252 else 253 n += x64_pack_rex(buf + n, w, reg, index, base); 254 if (opc1) { 255 buf[n++] = X64_OPC_TWOBYTE; 256 buf[n++] = opc1; 257 } else { 258 buf[n++] = opc0; 259 } 260 n += x64_pack_mem_sib(buf + n, reg, base, index, log2_scale, disp); 261 mc->emit_bytes(mc, buf, n); 262 } 263 264 /* mov reg, [base + index<<log2_scale + disp]; size 1/2/4/8. */ 265 void emit_mov_load_idx(MCEmitter* mc, u32 size, int signed_ext, u32 dst, 266 u32 base, u32 index, u32 log2_scale, i32 disp) { 267 if (index == REG_NONE) { 268 emit_mov_load(mc, size, signed_ext, dst, base, disp); 269 return; 270 } 271 u32 ofs = obj_pos(mc->obj, mc->section_id); 272 if (size == 8) { 273 emit_mem_idx_op(mc, 0, 1, 0, X64_OPC_MOV_R_RM, 0, dst, base, index & 0xFu, 274 log2_scale, disp); 275 } else if (size == 4) { 276 emit_mem_idx_op(mc, 0, 0, 0, X64_OPC_MOV_R_RM, 0, dst, base, index & 0xFu, 277 log2_scale, disp); 278 } else if (size == 2) { 279 emit_mem_idx_op(mc, 0, 0, 0, 0, 280 signed_ext ? X64_OPC_MOVSX_W : X64_OPC_MOVZX_W, dst, base, 281 index & 0xFu, log2_scale, disp); 282 } else if (size == 1) { 283 emit_mem_idx_op(mc, 0, 0, 0, 0, 284 signed_ext ? X64_OPC_MOVSX_B : X64_OPC_MOVZX_B, dst, base, 285 index & 0xFu, log2_scale, disp); 286 } 287 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 288 } 289 290 /* mov [base + index<<log2_scale + disp], src; size 1/2/4/8. */ 291 void emit_mov_store_idx(MCEmitter* mc, u32 size, u32 src, u32 base, u32 index, 292 u32 log2_scale, i32 disp) { 293 if (index == REG_NONE) { 294 emit_mov_store(mc, size, src, base, disp); 295 return; 296 } 297 u32 ofs = obj_pos(mc->obj, mc->section_id); 298 if (size == 8) { 299 emit_mem_idx_op(mc, 0, 1, 0, X64_OPC_MOV_RM_R, 0, src, base, index & 0xFu, 300 log2_scale, disp); 301 } else if (size == 4) { 302 emit_mem_idx_op(mc, 0, 0, 0, X64_OPC_MOV_RM_R, 0, src, base, index & 0xFu, 303 log2_scale, disp); 304 } else if (size == 2) { 305 emit_mem_idx_op(mc, X64_OPSIZE_PFX, 0, 0, X64_OPC_MOV_RM_R, 0, src, base, 306 index & 0xFu, log2_scale, disp); 307 } else if (size == 1) { 308 /* Force REX so SIL/DIL/etc are addressable as byte regs. */ 309 emit_mem_idx_op(mc, 0, 0, 1, X64_OPC_MOV_RM_R8, 0, src, base, index & 0xFu, 310 log2_scale, disp); 311 } 312 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 313 } 314 315 /* movabs reg, imm64 (REX.W + B8+r imm64) for is64; mov r32, imm32 (B8+r 316 * imm32) for !is64. Both 10/5 bytes. */ 317 void x64_emit_load_imm(MCEmitter* mc, int is64, u32 dst, i64 imm) { 318 u32 ofs = obj_pos(mc->obj, mc->section_id); 319 u8 buf[16]; 320 u32 n = 321 x64_mov_ri_pack((X64MovRI){.is64 = is64, .dst = dst, .imm = imm}, buf); 322 mc->emit_bytes(mc, buf, n); 323 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 324 } 325 326 /* Two-operand ALU r/m, r. op picks ADD(01)/SUB(29)/AND(21)/OR(09)/XOR(31)/ 327 * CMP(39)/MOV(89)/TEST(85). */ 328 void emit_alu_rr(MCEmitter* mc, int w, u8 op, u32 dst, u32 src) { 329 u32 ofs = obj_pos(mc->obj, mc->section_id); 330 u8 buf[16]; 331 u32 n = x64_alu_rr_pack((X64AluRR){.w = w, .op = op, .dst = dst, .src = src}, 332 buf); 333 mc->emit_bytes(mc, buf, n); 334 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 335 } 336 337 void emit_imul_rr(MCEmitter* mc, int w, u32 dst, u32 src) { 338 u32 ofs = obj_pos(mc->obj, mc->section_id); 339 u8 buf[16]; 340 u32 n = x64_imul_rr_pack((X64ImulRR){.w = w, .dst = dst, .src = src}, buf); 341 mc->emit_bytes(mc, buf, n); 342 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 343 } 344 345 void emit_f7_rm(MCEmitter* mc, int w, u32 sub, u32 reg) { 346 u32 ofs = obj_pos(mc->obj, mc->section_id); 347 u8 buf[16]; 348 u32 n = x64_f7_rm_pack((X64F7RM){.w = w, .sub = sub, .reg = reg}, buf); 349 mc->emit_bytes(mc, buf, n); 350 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 351 } 352 353 void emit_shift_cl(MCEmitter* mc, int w, u32 sub, u32 reg) { 354 u32 ofs = obj_pos(mc->obj, mc->section_id); 355 u8 buf[16]; 356 u32 n = x64_shift_cl_pack((X64ShiftCL){.w = w, .sub = sub, .reg = reg}, buf); 357 mc->emit_bytes(mc, buf, n); 358 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 359 } 360 361 /* Shift r/m by imm8: opcode C1 /sub ib. sub: SHL=4, SHR=5, SAR=7. */ 362 void emit_shift_imm(MCEmitter* mc, int w, u32 sub, u32 reg, u8 imm) { 363 u32 ofs = obj_pos(mc->obj, mc->section_id); 364 u8 buf[16]; 365 u32 n = x64_shift_imm_pack( 366 (X64ShiftImm){.w = w, .sub = sub, .reg = reg, .imm = imm}, buf); 367 mc->emit_bytes(mc, buf, n); 368 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 369 } 370 371 void emit_cqo_or_cdq(MCEmitter* mc, int w) { 372 u8 buf[16]; 373 u32 n = x64_nullary_pack((X64Nullary){.w = w, .opc0 = X64_OPC_CDQ_CQO}, buf); 374 mc->emit_bytes(mc, buf, n); 375 } 376 377 void emit_xor_self(MCEmitter* mc, int w, u32 r) { 378 emit_alu_rr(mc, w, X64_OPC_ALU_XOR, r, r); 379 } 380 381 /* cmp r/m, imm8 (0x83 /7). */ 382 void emit_cmp_imm8(MCEmitter* mc, int w, u32 reg, i8 imm) { 383 u32 ofs = obj_pos(mc->obj, mc->section_id); 384 u8 buf[16]; 385 u32 n = x64_alu_imm8_pack( 386 (X64AluRmImm8){.w = w, .sub = X64_ALU_SUB_CMP, .reg = reg, .imm = imm}, 387 buf); 388 mc->emit_bytes(mc, buf, n); 389 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 390 } 391 392 /* ALU r/m, imm8: opcode 0x83 /sub ib (sign-extended). sub: ADD=0, 393 * OR=1, ADC=2, SBB=3, AND=4, SUB=5, XOR=6, CMP=7. */ 394 void emit_alu_imm8(MCEmitter* mc, int w, u32 sub, u32 reg, i8 imm) { 395 u32 ofs = obj_pos(mc->obj, mc->section_id); 396 u8 buf[16]; 397 u32 n = x64_alu_imm8_pack( 398 (X64AluRmImm8){.w = w, .sub = sub, .reg = reg, .imm = imm}, buf); 399 mc->emit_bytes(mc, buf, n); 400 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 401 } 402 403 /* ALU r/m, imm32: opcode 0x81 /sub id (sign-extended for w=1). */ 404 void emit_alu_imm32(MCEmitter* mc, int w, u32 sub, u32 reg, i32 imm) { 405 u32 ofs = obj_pos(mc->obj, mc->section_id); 406 u8 buf[16]; 407 u32 n = x64_alu_imm32_pack( 408 (X64AluRmImm32){.w = w, .sub = sub, .reg = reg, .imm = imm}, buf); 409 mc->emit_bytes(mc, buf, n); 410 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 411 } 412 413 /* IMUL r, r/m, imm: 0x6B /r ib (imm8 sext) or 0x69 /r id (imm32 sext). 414 * Both forms write the result back to the same `dst` register so the 415 * caller doesn't need an explicit copy beforehand — unlike the ALU 416 * forms which read-modify-write a single operand. */ 417 void emit_imul_imm8(MCEmitter* mc, int w, u32 dst, u32 src, i8 imm) { 418 u32 ofs = obj_pos(mc->obj, mc->section_id); 419 u8 buf[16]; 420 u32 n = x64_imul_rri_pack( 421 (X64ImulRRI){.w = w, .imm32 = 0, .dst = dst, .src = src, .imm = imm}, 422 buf); 423 mc->emit_bytes(mc, buf, n); 424 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 425 } 426 void emit_imul_imm32(MCEmitter* mc, int w, u32 dst, u32 src, i32 imm) { 427 u32 ofs = obj_pos(mc->obj, mc->section_id); 428 u8 buf[16]; 429 u32 n = x64_imul_rri_pack( 430 (X64ImulRRI){.w = w, .imm32 = 1, .dst = dst, .src = src, .imm = imm}, 431 buf); 432 mc->emit_bytes(mc, buf, n); 433 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 434 } 435 436 /* Width predicate: does `imm` fit in an i8 (used by the 0x83/0x6B 437 * imm8-sign-extended forms)? */ 438 int imm_fits_i8(i64 imm) { return imm >= -128 && imm <= 127; } 439 /* Width predicate: does `imm` fit in a signed 32-bit value (the 0x81/ 440 * 0x69 imm32-sign-extended forms; for w=1 the imm is sign-extended to 441 * 64). Returns 0 for values outside [INT32_MIN, INT32_MAX] — those 442 * require a full materialization through x64_emit_load_imm. */ 443 int imm_fits_i32(i64 imm) { 444 return imm >= -2147483648LL && imm <= 2147483647LL; 445 } 446 447 void emit_test_self(MCEmitter* mc, int w, u32 reg) { 448 emit_alu_rr(mc, w, X64_OPC_ALU_TEST, reg, reg); 449 } 450 451 void emit_setcc(MCEmitter* mc, u32 cc, u32 reg) { 452 u32 ofs = obj_pos(mc->obj, mc->section_id); 453 u8 buf[16]; 454 u32 n = x64_setcc_pack((X64Setcc){.cc = cc, .reg = reg}, buf); 455 mc->emit_bytes(mc, buf, n); 456 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 457 } 458 459 void emit_movzx_r32_r8(MCEmitter* mc, u32 dst, u32 src) { 460 u32 ofs = obj_pos(mc->obj, mc->section_id); 461 u8 buf[16]; 462 u32 n = x64_movzx_rr_pack((X64MovzxRR){.w = 0, 463 .opc1 = X64_OPC_MOVZX_B, 464 .force_rex = 1, 465 .dst = dst, 466 .src = src}, 467 buf); 468 mc->emit_bytes(mc, buf, n); 469 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 470 } 471 472 /* movzx/movsx r→r. src_size is source byte width. */ 473 void emit_extend_rr(MCEmitter* mc, int w, int signed_ext, u32 src_size, u32 dst, 474 u32 src) { 475 u32 ofs = obj_pos(mc->obj, mc->section_id); 476 u8 buf[16]; 477 u32 n = 0; 478 if (src_size == 4 && signed_ext) { 479 /* movsxd r64, r32: REX.W 0x63 ModRM */ 480 n = x64_movsxd_pack((X64Movsxd){.dst = dst, .src = src}, buf); 481 } else if (src_size == 4 && !signed_ext) { 482 /* zext 32→64 is `mov r32, r32` (clears high 32). */ 483 n = x64_alu_rr_pack( 484 (X64AluRR){.w = 0, .op = X64_OPC_MOV_RM_R, .dst = dst, .src = src}, 485 buf); 486 } else if (src_size == 1) { 487 n = x64_movzx_rr_pack( 488 (X64MovzxRR){.w = w, 489 .opc1 = signed_ext ? X64_OPC_MOVSX_B : X64_OPC_MOVZX_B, 490 .force_rex = 1, 491 .dst = dst, 492 .src = src}, 493 buf); 494 } else if (src_size == 2) { 495 n = x64_movzx_rr_pack( 496 (X64MovzxRR){.w = w, 497 .opc1 = signed_ext ? X64_OPC_MOVSX_W : X64_OPC_MOVZX_W, 498 .force_rex = 0, 499 .dst = dst, 500 .src = src}, 501 buf); 502 } else { 503 /* No extension to perform (src already at least as wide as dst, e.g. 504 * 64→64 zext/sext). Still need a reg-to-reg move when dst != src so the 505 * destination holds the value. */ 506 if (dst != src) emit_mov_rr(mc, w, dst, src); 507 } 508 if (n) mc->emit_bytes(mc, buf, n); 509 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 510 } 511 512 void emit_ret(MCEmitter* mc) { 513 u8 op = X64_OPC_RET; 514 mc->emit_bytes(mc, &op, 1); 515 } 516 void emit_leave(MCEmitter* mc) { 517 u8 op = X64_OPC_LEAVE; 518 mc->emit_bytes(mc, &op, 1); 519 } 520 521 /* ---- SSE scalar FP encoders ---- */ 522 void emit_sse_rr(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 src) { 523 u32 ofs = obj_pos(mc->obj, mc->section_id); 524 u8 buf[16]; 525 u32 n = x64_sse_rr_pack( 526 (X64SseRR){ 527 .prefix = prefix, .opcode = opcode, .w = 0, .dst = dst, .src = src}, 528 buf); 529 mc->emit_bytes(mc, buf, n); 530 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 531 } 532 void emit_sse_load(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 base, 533 i32 disp) { 534 u32 ofs = obj_pos(mc->obj, mc->section_id); 535 u8 buf[16]; 536 u32 n = x64_sse_mem_pack((X64SseMem){.prefix = prefix, 537 .opcode = opcode, 538 .reg = dst, 539 .base = base, 540 .disp = disp}, 541 buf); 542 mc->emit_bytes(mc, buf, n); 543 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 544 } 545 void emit_sse_store(MCEmitter* mc, u8 prefix, u8 opcode, u32 src, u32 base, 546 i32 disp) { 547 u32 ofs = obj_pos(mc->obj, mc->section_id); 548 u8 buf[16]; 549 u32 n = x64_sse_mem_pack((X64SseMem){.prefix = prefix, 550 .opcode = opcode, 551 .reg = src, 552 .base = base, 553 .disp = disp}, 554 buf); 555 mc->emit_bytes(mc, buf, n); 556 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 557 } 558 void emit_sse_load_idx(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 base, 559 u32 index, u32 log2_scale, i32 disp) { 560 if (index == REG_NONE) { 561 emit_sse_load(mc, prefix, opcode, dst, base, disp); 562 return; 563 } 564 u32 ofs = obj_pos(mc->obj, mc->section_id); 565 emit_mem_idx_op(mc, prefix, 0, 0, 0, opcode, dst, base, index & 0xFu, 566 log2_scale, disp); 567 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 568 } 569 void emit_sse_store_idx(MCEmitter* mc, u8 prefix, u8 opcode, u32 src, u32 base, 570 u32 index, u32 log2_scale, i32 disp) { 571 if (index == REG_NONE) { 572 emit_sse_store(mc, prefix, opcode, src, base, disp); 573 return; 574 } 575 u32 ofs = obj_pos(mc->obj, mc->section_id); 576 emit_mem_idx_op(mc, prefix, 0, 0, 0, opcode, src, base, index & 0xFu, 577 log2_scale, disp); 578 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 579 } 580 void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst, 581 u32 src) { 582 u32 ofs = obj_pos(mc->obj, mc->section_id); 583 u8 buf[16]; 584 u32 n = x64_sse_rr_pack( 585 (X64SseRR){ 586 .prefix = prefix, .opcode = opcode, .w = w, .dst = dst, .src = src}, 587 buf); 588 mc->emit_bytes(mc, buf, n); 589 if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); 590 }