asm.c (67902B)
1 #include "arch/x64/asm.h" 2 3 #include <string.h> 4 5 #include "arch/x64/emit.h" 6 #include "arch/x64/regs.h" 7 #include "asm/asm_helpers.h" 8 #include "core/arena.h" 9 #include "core/pool.h" 10 #include "core/slice.h" 11 #include "core/strbuf.h" 12 13 struct X64Asm { 14 ArchAsm base; 15 Compiler* c; 16 17 const AsmConstraint* outs; 18 Operand* out_ops; 19 const AsmConstraint* ins; 20 const Operand* in_ops; 21 const Sym* clobbers; 22 u32 nout; 23 u32 nin; 24 u32 nclob; 25 }; 26 27 typedef enum X64AsmOperandKind { 28 X64_ASM_OP_REG, 29 X64_ASM_OP_XMM, 30 X64_ASM_OP_IMM, 31 X64_ASM_OP_MEM, 32 X64_ASM_OP_IND_REG, 33 } X64AsmOperandKind; 34 35 typedef struct X64AsmOperand { 36 u8 kind; 37 u8 width; 38 u8 reg; 39 u8 base; 40 u8 high8; 41 u8 seg; 42 u8 no_base; /* MEM: segment-prefixed absolute, no base register */ 43 u8 index; /* MEM SIB: index register (valid when has_index) */ 44 u8 scale; /* MEM SIB: log2 of scale ∈ {0,1,2,3} → 1/2/4/8 */ 45 u8 has_index; /* MEM: SIB index present */ 46 u8 rip_relative; /* MEM: bare (%rip)/disp(%rip) form */ 47 u8 has_reloc; /* MEM: symbolic disp carries a relocation */ 48 u8 pad[3]; 49 i64 imm; 50 i32 disp; 51 RelocKind reloc_kind; /* MEM: reloc on the disp32 (PC32 / REX_GOTPCRELX) */ 52 ObjSymId reloc_sym; /* MEM: relocated symbol */ 53 i64 reloc_off; /* MEM: user addend on the symbol */ 54 } X64AsmOperand; 55 56 static int x64_reg_from_name(AsmDriver* d, Sym s, u32* reg_out, u32* width_out, 57 u32* high8_out) { 58 Slice sl = pool_slice(asm_driver_pool(d), s); 59 char buf[16]; 60 u32 reg; 61 u32 width = 8; 62 Slice q; 63 if (!sl.s || sl.len < 2 || sl.len >= sizeof buf) return 0; 64 memcpy(buf, sl.s, sl.len); 65 buf[sl.len] = '\0'; 66 q = slice_from_cstr(buf); 67 if (slice_eq_cstr(q, "ah") || slice_eq_cstr(q, "ch") || 68 slice_eq_cstr(q, "dh") || slice_eq_cstr(q, "bh")) { 69 static const u32 high_map[4] = {4u, 5u, 6u, 7u}; 70 const char* names = "acdb"; 71 for (u32 i = 0; i < 4u; ++i) { 72 if (buf[0] == names[i]) { 73 if (reg_out) *reg_out = high_map[i]; 74 if (width_out) *width_out = 1; 75 if (high8_out) *high8_out = 1; 76 return 1; 77 } 78 } 79 } 80 if (x64_register_hw_index(buf, ®) != 0) return 0; 81 if (reg > 15u) return 0; 82 if (slice_eq_cstr(q, "al") || slice_eq_cstr(q, "cl") || 83 slice_eq_cstr(q, "dl") || slice_eq_cstr(q, "bl") || 84 slice_eq_cstr(q, "spl") || slice_eq_cstr(q, "bpl") || 85 slice_eq_cstr(q, "sil") || slice_eq_cstr(q, "dil") || 86 buf[sl.len - 1] == 'b') { 87 width = 1; 88 } else if (slice_eq_cstr(q, "ax") || slice_eq_cstr(q, "cx") || 89 slice_eq_cstr(q, "dx") || slice_eq_cstr(q, "bx") || 90 slice_eq_cstr(q, "sp") || slice_eq_cstr(q, "bp") || 91 slice_eq_cstr(q, "si") || slice_eq_cstr(q, "di") || 92 buf[sl.len - 1] == 'w') { 93 width = 2; 94 } else if (buf[sl.len - 1] == 'd' || buf[0] == 'e') { 95 width = 4; 96 } 97 if (reg_out) *reg_out = reg; 98 if (width_out) *width_out = width; 99 if (high8_out) *high8_out = 0; 100 return 1; 101 } 102 103 static int x64_xmm_from_name(AsmDriver* d, Sym s, u32* reg_out) { 104 Slice sl = pool_slice(asm_driver_pool(d), s); 105 const char* p = sl.s; 106 size_t n = sl.len; 107 u32 reg = 0; 108 if (!p || n < 4 || n > 5) return 0; 109 if (p[0] != 'x' || p[1] != 'm' || p[2] != 'm') return 0; 110 for (size_t i = 3; i < n; ++i) { 111 if (p[i] < '0' || p[i] > '9') return 0; 112 reg = reg * 10u + (u32)(p[i] - '0'); 113 } 114 if (reg > 15u) return 0; 115 if (reg_out) *reg_out = reg; 116 return 1; 117 } 118 119 static int x64_segment_prefix_from_name(AsmDriver* d, Sym s, u8* prefix_out) { 120 Slice sl = pool_slice(asm_driver_pool(d), s); 121 const char* p = sl.s; 122 size_t n = sl.len; 123 if (!p || n != 2) return 0; 124 if (p[0] == 'f' && p[1] == 's') { 125 if (prefix_out) *prefix_out = 0x64; 126 return 1; 127 } 128 if (p[0] == 'g' && p[1] == 's') { 129 if (prefix_out) *prefix_out = 0x65; 130 return 1; 131 } 132 return 0; 133 } 134 135 static void expect_comma(AsmDriver* d); 136 137 static u32 parse_reg(AsmDriver* d, u32* width_out, u32* high8_out) { 138 AsmTok t; 139 u32 reg; 140 if (!asm_driver_eat_punct(d, '%')) 141 asm_driver_panic(d, "x64 asm: expected register"); 142 t = asm_driver_next(d); 143 if (t.kind != ASM_TOK_IDENT || 144 !x64_reg_from_name(d, t.v.ident, ®, width_out, high8_out)) { 145 asm_driver_panic(d, "x64 asm: bad register"); 146 } 147 return reg; 148 } 149 150 /* True if the symbol names the instruction pointer ("rip"). */ 151 static int x64_ident_is_rip(AsmDriver* d, Sym s) { 152 Slice sl = pool_slice(asm_driver_pool(d), s); 153 return sl.s && sl.len == 3 && sl.s[0] == 'r' && sl.s[1] == 'i' && 154 sl.s[2] == 'p'; 155 } 156 157 /* Convert a SIB scale literal (1/2/4/8) to its log2 (0/1/2/3). */ 158 static u32 x64_scale_to_log2(AsmDriver* d, i64 scale) { 159 switch (scale) { 160 case 1: 161 return 0u; 162 case 2: 163 return 1u; 164 case 4: 165 return 2u; 166 case 8: 167 return 3u; 168 default: 169 asm_driver_panic(d, "x64 asm: memory scale must be 1, 2, 4, or 8"); 170 } 171 } 172 173 /* Parse the body of a memory operand once the leading '(' has been 174 * consumed: '%base[,%index,scale])', '%rip)', or ',%index,scale)'. 175 * Fills base/index/scale/has_index/rip_relative on `op` and eats the 176 * closing ')'. */ 177 static void parse_mem_paren_body(AsmDriver* d, X64AsmOperand* op) { 178 AsmTok t = asm_driver_peek(d); 179 if (asm_driver_tok_is_punct(t, '%')) { 180 /* Peek the register name to detect the RIP-relative form. */ 181 AsmTok ident; 182 (void)asm_driver_next(d); 183 ident = asm_driver_next(d); 184 if (ident.kind != ASM_TOK_IDENT) 185 asm_driver_panic(d, "x64 asm: bad register"); 186 if (x64_ident_is_rip(d, ident.v.ident)) { 187 op->rip_relative = 1; 188 asm_driver_expect_punct(d, ')', "')' in x64 memory operand"); 189 return; 190 } 191 { 192 u32 reg = 0; 193 if (!x64_reg_from_name(d, ident.v.ident, ®, NULL, NULL)) 194 asm_driver_panic(d, "x64 asm: bad register"); 195 op->base = (u8)reg; 196 } 197 /* Optional ',%index,scale'. */ 198 if (asm_driver_eat_comma(d)) { 199 op->index = (u8)parse_reg(d, NULL, NULL); 200 op->has_index = 1; 201 expect_comma(d); 202 op->scale = (u8)x64_scale_to_log2(d, asm_driver_parse_const(d)); 203 } 204 asm_driver_expect_punct(d, ')', "')' in x64 memory operand"); 205 return; 206 } 207 /* Index-only form: '(,%index,scale)' — base omitted. */ 208 if (asm_driver_eat_comma(d)) { 209 op->no_base = 1; 210 op->index = (u8)parse_reg(d, NULL, NULL); 211 op->has_index = 1; 212 expect_comma(d); 213 op->scale = (u8)x64_scale_to_log2(d, asm_driver_parse_const(d)); 214 asm_driver_expect_punct(d, ')', "')' in x64 memory operand"); 215 return; 216 } 217 asm_driver_panic(d, "x64 asm: expected register in memory operand"); 218 } 219 220 /* Consume an optional `@MOD` relocation suffix after a symbol and return the 221 * RelocKind it selects, or `dflt` when no suffix is present. */ 222 static RelocKind x64_parse_reloc_suffix(AsmDriver* d, RelocKind dflt) { 223 if (!asm_driver_tok_is_punct(asm_driver_peek(d), '@')) return dflt; 224 (void)asm_driver_next(d); /* '@' */ 225 AsmTok n = asm_driver_next(d); 226 if (n.kind != ASM_TOK_IDENT) 227 asm_driver_panic(d, "x64 asm: expected relocation name after '@'"); 228 Slice s = pool_slice(asm_driver_pool(d), n.v.ident); 229 if (slice_eq_cstr(s, "PLT")) return R_X64_PLT32; 230 if (slice_eq_cstr(s, "GOTPCREL")) return R_X64_REX_GOTPCRELX; 231 if (slice_eq_cstr(s, "GOTPCRELX")) return R_X64_GOTPCRELX; 232 asm_driver_panic(d, "x64 asm: unsupported relocation suffix"); 233 } 234 235 static X64AsmOperand parse_operand(AsmDriver* d) { 236 X64AsmOperand op; 237 AsmTok t; 238 memset(&op, 0, sizeof op); 239 t = asm_driver_peek(d); 240 if (asm_driver_eat_punct(d, '*')) { 241 op.kind = X64_ASM_OP_IND_REG; 242 op.reg = (u8)parse_reg(d, NULL, NULL); 243 return op; 244 } 245 if (asm_driver_eat_punct(d, '$')) { 246 op.kind = X64_ASM_OP_IMM; 247 op.imm = asm_driver_parse_const(d); 248 return op; 249 } 250 if (asm_driver_tok_is_punct(t, '%')) { 251 u32 width = 8; 252 u32 high8 = 0; 253 AsmTok ident; 254 (void)asm_driver_next(d); 255 ident = asm_driver_next(d); 256 if (ident.kind != ASM_TOK_IDENT) 257 asm_driver_panic(d, "x64 asm: bad register"); 258 if (x64_segment_prefix_from_name(d, ident.v.ident, &op.seg)) { 259 asm_driver_expect_punct(d, ':', "':' after x64 segment register"); 260 op.kind = X64_ASM_OP_MEM; 261 if (!asm_driver_tok_is_punct(asm_driver_peek(d), '(')) 262 op.disp = (i32)asm_driver_parse_const(d); 263 if (asm_driver_eat_punct(d, '(')) { 264 op.base = (u8)parse_reg(d, NULL, NULL); 265 asm_driver_expect_punct(d, ')', "')' in x64 memory operand"); 266 } else { 267 op.no_base = 1; 268 } 269 return op; 270 } 271 if (x64_xmm_from_name(d, ident.v.ident, &width)) { 272 op.kind = X64_ASM_OP_XMM; 273 op.reg = (u8)width; 274 op.width = 16; 275 return op; 276 } 277 { 278 u32 reg = 0; 279 if (!x64_reg_from_name(d, ident.v.ident, ®, &width, &high8)) 280 asm_driver_panic(d, "x64 asm: bad register"); 281 op.kind = X64_ASM_OP_REG; 282 op.reg = (u8)reg; 283 } 284 op.width = (u8)width; 285 op.high8 = (u8)high8; 286 return op; 287 } 288 op.kind = X64_ASM_OP_MEM; 289 op.disp = 0; 290 if (!asm_driver_tok_is_punct(t, '(')) { 291 /* A symbolic displacement (`sym(%rip)`, `sym@GOTPCREL(%rip)`) becomes a 292 * relocation; a numeric displacement stays literal. */ 293 if (asm_driver_peek(d).kind == ASM_TOK_IDENT) { 294 asm_driver_parse_sym_expr(d, &op.reloc_sym, &op.reloc_off); 295 op.reloc_kind = x64_parse_reloc_suffix(d, R_PC32); 296 op.has_reloc = 1; 297 } else { 298 op.disp = (i32)asm_driver_parse_const(d); 299 } 300 } 301 asm_driver_expect_punct(d, '(', "'(' in x64 memory operand"); 302 parse_mem_paren_body(d, &op); 303 if (op.has_reloc && !op.rip_relative) 304 asm_driver_panic(d, 305 "x64 asm: symbolic memory displacement requires (%rip)"); 306 return op; 307 } 308 309 /* Emit the relocation a symbolic `(%rip)` memory operand carries, if any. The 310 * disp32 field is the last 4 bytes of the instruction except for an immediate 311 * store, where `trailing` immediate bytes follow it. R_X86_64_PC32-style 312 * relocs use addend (off - 4 - trailing) so S+A-P yields the rip-relative 313 * displacement to the end of the instruction. */ 314 static void x64_emit_mem_reloc(AsmDriver* d, MCEmitter* mc, 315 const X64AsmOperand* m, u32 trailing) { 316 if (!m->has_reloc) return; 317 u32 disp_pos = mc->pos(mc) - 4u - trailing; 318 mc->emit_reloc_at(mc, asm_driver_cur_section(d), disp_pos, m->reloc_kind, 319 m->reloc_sym, m->reloc_off - 4 - (i64)trailing, 1, 0); 320 } 321 322 static u32 x64_pack_rex_mem_operand(u8* out, int w, u32 reg, 323 X64AsmOperand mem) { 324 /* RIP-relative carries no base/index registers (rm=101, no SIB). */ 325 if (mem.rip_relative) return x64_pack_rex(out, w, reg, 0, 0u); 326 /* SIB forms supply REX.X from the index register (and REX.B from base 327 * unless the base is omitted in the index-only form). */ 328 if (mem.has_index) 329 return x64_pack_rex(out, w, reg, mem.index, mem.no_base ? 0u : mem.base); 330 return x64_pack_rex(out, w, reg, 0, mem.no_base ? 0u : mem.base); 331 } 332 333 static u32 x64_pack_mem_operand(u8* out, u32 reg, X64AsmOperand mem) { 334 if (mem.rip_relative) return x64_pack_mem_rip(out, reg, mem.disp); 335 if (mem.has_index) { 336 /* Index-only form (no base): mod=00 with SIB.base=101 → disp32. */ 337 if (mem.no_base) { 338 out[0] = x64_modrm(0u, reg, X64_MODRM_RM_SIB); 339 out[1] = x64_sib(mem.scale, mem.index, X64_SIB_NO_BASE); 340 return 2u + x64_put_u32le(out + 2, (u32)mem.disp); 341 } 342 return x64_pack_mem_sib(out, reg, mem.base, mem.index, mem.scale, mem.disp); 343 } 344 if (mem.no_base) { 345 out[0] = x64_modrm(0u, reg, X64_MODRM_RM_SIB); 346 out[1] = x64_sib(0u, X64_SIB_NO_INDEX, X64_SIB_NO_BASE); 347 return 2u + x64_put_u32le(out + 2, (u32)mem.disp); 348 } 349 return x64_pack_mem(out, reg, mem.base, mem.disp); 350 } 351 352 /* reg ← mem with an explicit single-byte opcode (e.g. 0x8B MOV, 0x8D LEA). 353 * Routes the full memory-operand variety (plain / SIB / RIP / segment) 354 * through the shared pack helpers. */ 355 static void emit_reg_mem_operand(AsmDriver* d, MCEmitter* mc, u32 size, u8 opc, 356 u32 dst, X64AsmOperand src) { 357 u8 buf[16]; 358 u32 n = 0; 359 if (size == 2u) buf[n++] = X64_OPSIZE_PFX; 360 if (src.seg) buf[n++] = src.seg; 361 n += x64_pack_rex_mem_operand(buf + n, size == 8u, dst, src); 362 buf[n++] = opc; 363 n += x64_pack_mem_operand(buf + n, dst, src); 364 mc->emit_bytes(mc, buf, n); 365 x64_emit_mem_reloc(d, mc, &src, 0); 366 } 367 368 static void emit_mov_load_operand(AsmDriver* d, MCEmitter* mc, u32 size, 369 u32 dst, X64AsmOperand src) { 370 emit_reg_mem_operand(d, mc, size, X64_OPC_MOV_R_RM, dst, src); 371 } 372 373 /* reg → mem store with an explicit reg-to-r/m opcode. Used by MOV 374 * (0x89/0x88) and the ALU /r stores (ADD 0x01, OR 0x09, AND 0x21, 375 * SUB 0x29, XOR 0x31, CMP 0x39). The register operand occupies the 376 * ModR/M reg field; the memory operand the r/m field. */ 377 static void emit_reg_store_operand(AsmDriver* d, MCEmitter* mc, u32 size, 378 u8 opc, u32 src, X64AsmOperand dst, 379 int force_rex) { 380 u8 buf[16]; 381 u32 n = 0; 382 if (size == 2u) buf[n++] = X64_OPSIZE_PFX; 383 if (dst.seg) buf[n++] = dst.seg; 384 if (force_rex) 385 n += x64_pack_rex_force(buf + n, size == 8u, src, 0, 386 dst.no_base ? 0u : dst.base); 387 else 388 n += x64_pack_rex_mem_operand(buf + n, size == 8u, src, dst); 389 buf[n++] = opc; 390 n += x64_pack_mem_operand(buf + n, src, dst); 391 mc->emit_bytes(mc, buf, n); 392 x64_emit_mem_reloc(d, mc, &dst, 0); 393 } 394 395 static void emit_mov_store_operand(AsmDriver* d, MCEmitter* mc, u32 size, 396 u32 src, X64AsmOperand dst, int force_rex) { 397 emit_reg_store_operand(d, mc, size, 398 size == 1u ? X64_OPC_MOV_RM_R8 : X64_OPC_MOV_RM_R, src, 399 dst, force_rex); 400 } 401 402 /* imm → mem store via a group-1 /digit opcode (group-1 ALU 0x80/0x81/0x83, 403 * or MOV C6/C7). `opc8`/`opc32` select the 8-bit-immediate vs 404 * 32-bit-immediate (sign-extended) opcode; pass equal values when the 405 * encoding has no imm8 short form (e.g. MOV). `imm8` forces the short 406 * form when the immediate fits. */ 407 static void emit_rm_imm_store_operand(AsmDriver* d, MCEmitter* mc, u32 size, 408 u8 opc8, u8 opc32, u32 sub, 409 X64AsmOperand dst, i64 imm, 410 int allow_i8) { 411 u8 buf[16]; 412 u32 n = 0; 413 int use_i8 = allow_i8 && imm_fits_i8(imm); 414 if (!use_i8 && !imm_fits_i32(imm) && size != 1u) 415 asm_driver_panic(d, "x64 asm: immediate out of range"); 416 if (size == 2u) buf[n++] = X64_OPSIZE_PFX; 417 if (dst.seg) buf[n++] = dst.seg; 418 n += x64_pack_rex_mem_operand(buf + n, size == 8u, 0, dst); 419 buf[n++] = use_i8 ? opc8 : opc32; 420 n += x64_pack_mem_operand(buf + n, sub, dst); 421 u32 trailing; 422 if (size == 1u) { 423 buf[n++] = (u8)imm; 424 trailing = 1u; 425 } else if (use_i8) { 426 buf[n++] = (u8)(i8)imm; 427 trailing = 1u; 428 } else if (size == 2u) { 429 /* 16-bit operand size: a 2-byte immediate (under the 0x66 prefix). */ 430 u16 v = (u16)imm; 431 buf[n++] = (u8)v; 432 buf[n++] = (u8)(v >> 8); 433 trailing = 2u; 434 } else { 435 n += x64_put_u32le(buf + n, (u32)(i32)imm); 436 trailing = 4u; 437 } 438 mc->emit_bytes(mc, buf, n); 439 x64_emit_mem_reloc(d, mc, &dst, trailing); 440 } 441 442 static void expect_comma(AsmDriver* d) { 443 if (!asm_driver_eat_comma(d)) asm_driver_panic(d, "x64 asm: expected ','"); 444 } 445 446 static void emit_indirect_branch(MCEmitter* mc, u32 sub, u32 reg) { 447 u8 op = 0xff; 448 emit_rex(mc, 0, 0, 0, reg); 449 mc->emit_bytes(mc, &op, 1); 450 { 451 u8 mr = modrm(3u, sub, reg); 452 mc->emit_bytes(mc, &mr, 1); 453 } 454 } 455 456 static void emit_packed(MCEmitter* mc, const u8* bytes, u32 n) { 457 mc->emit_bytes(mc, bytes, n); 458 } 459 460 static int byte_reg_needs_rex(const X64AsmOperand* op) { 461 return op && !op->high8 && op->reg >= 4u; 462 } 463 464 static void reject_high8_with_rex(AsmDriver* d, const X64AsmOperand* a, 465 const X64AsmOperand* b) { 466 if ((a && a->high8 && byte_reg_needs_rex(b)) || 467 (b && b->high8 && byte_reg_needs_rex(a))) { 468 asm_driver_panic(d, "x64 asm: high-byte register cannot use REX"); 469 } 470 } 471 472 static __attribute__((unused)) void emit_movb_rr_operand(AsmDriver* d, 473 MCEmitter* mc, 474 X64AsmOperand dst, 475 X64AsmOperand src) { 476 u8 ob = 0x88; 477 reject_high8_with_rex(d, &dst, &src); 478 if (byte_reg_needs_rex(&dst) || byte_reg_needs_rex(&src)) 479 emit_rex_force(mc, 0, src.reg, 0, dst.reg); 480 else 481 emit_rex(mc, 0, src.reg, 0, dst.reg); 482 mc->emit_bytes(mc, &ob, 1); 483 { 484 u8 mr = modrm(3u, src.reg, dst.reg); 485 mc->emit_bytes(mc, &mr, 1); 486 } 487 } 488 489 static __attribute__((unused)) void emit_movb_store_operand(AsmDriver* d, 490 MCEmitter* mc, 491 X64AsmOperand src, 492 X64AsmOperand dst) { 493 if (src.high8) { 494 u8 ob = 0x88; 495 if (dst.no_base || dst.base >= 8u) 496 asm_driver_panic(d, "x64 asm: high-byte register cannot use REX"); 497 if (dst.seg) mc->emit_bytes(mc, &dst.seg, 1); 498 mc->emit_bytes(mc, &ob, 1); 499 emit_mem_operand(mc, src.reg, dst.base, dst.disp); 500 return; 501 } 502 emit_mov_store_operand(d, mc, 1, src.reg, dst, 1); 503 } 504 505 static __attribute__((unused)) void emit_rm_imm(AsmDriver* d, MCEmitter* mc, 506 u32 width, u8 opc, u32 sub, 507 X64AsmOperand dst, i32 imm, 508 int imm32) { 509 u8 buf[16]; 510 u32 n = 0; 511 if (dst.kind != X64_ASM_OP_REG && dst.kind != X64_ASM_OP_MEM) 512 asm_driver_panic(d, "x64 asm: expected register or memory destination"); 513 if (width == 2u) buf[n++] = X64_OPSIZE_PFX; 514 if (dst.kind == X64_ASM_OP_REG) { 515 n += x64_pack_rex(buf + n, width == 8u, 0, 0, dst.reg); 516 buf[n++] = opc; 517 buf[n++] = x64_modrm(3u, sub, dst.reg); 518 } else { 519 n += x64_pack_rex(buf + n, width == 8u, 0, 0, dst.base); 520 buf[n++] = opc; 521 n += x64_pack_mem(buf + n, sub, dst.base, dst.disp); 522 } 523 if (imm32) 524 n += x64_put_u32le(buf + n, (u32)imm); 525 else 526 buf[n++] = (u8)(i8)imm; 527 emit_packed(mc, buf, n); 528 } 529 530 static __attribute__((unused)) void emit_rm_op(AsmDriver* d, MCEmitter* mc, 531 u32 width, u8 opc, u32 sub, 532 X64AsmOperand dst) { 533 u8 buf[16]; 534 u32 n = 0; 535 if (dst.kind != X64_ASM_OP_REG && dst.kind != X64_ASM_OP_MEM) 536 asm_driver_panic(d, "x64 asm: expected register or memory operand"); 537 if (width == 2u) buf[n++] = X64_OPSIZE_PFX; 538 if (dst.kind == X64_ASM_OP_REG) { 539 n += x64_pack_rex(buf + n, width == 8u, 0, 0, dst.reg); 540 buf[n++] = opc; 541 buf[n++] = x64_modrm(3u, sub, dst.reg); 542 } else { 543 n += x64_pack_rex(buf + n, width == 8u, 0, 0, dst.base); 544 buf[n++] = opc; 545 n += x64_pack_mem(buf + n, sub, dst.base, dst.disp); 546 } 547 emit_packed(mc, buf, n); 548 } 549 550 static __attribute__((unused)) void emit_reg_rm_twobyte( 551 AsmDriver* d, MCEmitter* mc, u32 width, u8 opcode2, u32 dst, 552 X64AsmOperand src, int force_rex, u8 prefix) { 553 u8 buf[16]; 554 u32 n = 0; 555 if (src.kind != X64_ASM_OP_REG && src.kind != X64_ASM_OP_MEM) 556 asm_driver_panic(d, "x64 asm: expected register or memory source"); 557 if (prefix) buf[n++] = prefix; 558 if (src.kind == X64_ASM_OP_REG) { 559 if (force_rex) 560 n += x64_pack_rex_force(buf + n, width == 8u, dst, 0, src.reg); 561 else 562 n += x64_pack_rex(buf + n, width == 8u, dst, 0, src.reg); 563 buf[n++] = X64_OPC_TWOBYTE; 564 buf[n++] = opcode2; 565 buf[n++] = x64_modrm(3u, dst, src.reg); 566 } else { 567 /* Route the full memory-operand variety (plain / SIB-indexed / RIP / 568 * segment) through the shared pack helpers so a SIB index register is 569 * preserved (e.g. `movzbl (%rcx,%rsi,1), %edx`). */ 570 if (src.seg) buf[n++] = src.seg; 571 n += x64_pack_rex_mem_operand(buf + n, width == 8u, dst, src); 572 buf[n++] = X64_OPC_TWOBYTE; 573 buf[n++] = opcode2; 574 n += x64_pack_mem_operand(buf + n, dst, src); 575 } 576 emit_packed(mc, buf, n); 577 if (src.kind == X64_ASM_OP_MEM) x64_emit_mem_reloc(d, mc, &src, 0); 578 } 579 580 /* ==================================================================== 581 * Descriptor-driven mnemonic dispatch. 582 * 583 * The disassembler's `x64_insn_table` (src/arch/x64/isa.c) lists every 584 * encoding kit emits with its X64Format. We reuse the SAME table for 585 * the assembler: linear-scan to find the row whose mnemonic matches the 586 * user's AT&T spelling (after stripping the size suffix b/w/l/q), then 587 * dispatch to a per-format parser that consumes the operands and calls 588 * the existing `emit_*` helpers in emit.c. 589 * 590 * The width comes from the suffix (or the row's width flags); per-format 591 * parsers receive it via a small X64ParseCtx so they can pick the right 592 * emit overload (e.g., MOV r,r at 32 vs 64 bits). 593 * 594 * Note: a single mnemonic ("mov") has multiple table rows for different 595 * formats (MOV_RI, ALU_RR, MOV_RM_LOAD). We return the FIRST row that 596 * matches the mnemonic + width filter; per-format parsers that need a 597 * different row (e.g., MOV imm→reg uses MOV_RI but our scan may have 598 * returned ALU_RR first) fall through to operand-kind dispatch and 599 * select the correct emit helper directly. Phase 1 of this refactor 600 * only exercises the mnemonics asm.c handled before; richer disambiguation 601 * lands in follow-ups. */ 602 603 #define X64_SFX_NONE 0u 604 #define X64_SFX_B 1u 605 #define X64_SFX_W 2u 606 #define X64_SFX_L 4u 607 #define X64_SFX_Q 8u 608 609 typedef struct X64MnInfo { 610 char base[16]; /* stripped mnemonic (table-spelling) */ 611 u32 base_len; 612 u32 width; /* X64_SFX_* — 0 if mnemonic carries no size letter */ 613 u32 cc; /* condition nibble for jcc/cmovcc/setcc, or 16 if none */ 614 } X64MnInfo; 615 616 /* Parse the user-supplied mnemonic into (root, width, cc). Handles: 617 * - trailing size letter (b/w/l/q) when the table mnemonic has none 618 * - jXX → ("j", cc) 619 * - cmovXX[q|l|w|b] → ("cmov", cc, width) 620 * - setXX → ("set", cc) 621 * - exact-match mnemonics carried verbatim (movslq, movzbl, ud2, ...) */ 622 static int parse_mnemonic(const char* s, size_t n, X64MnInfo* out) { 623 static const struct { 624 const char* name; 625 u8 cc; 626 } kCC[] = { 627 /* Two-letter codes first so e.g. "ne" beats "n" if we ever add it. */ 628 {"ae", X64_CC_AE}, {"be", X64_CC_BE}, {"ge", X64_CC_GE}, 629 {"le", X64_CC_LE}, {"ne", X64_CC_NE}, {"no", X64_CC_NO}, 630 {"np", X64_CC_NP}, {"ns", X64_CC_NS}, {"a", X64_CC_A}, 631 {"b", X64_CC_B}, {"e", X64_CC_E}, {"g", X64_CC_G}, 632 {"l", X64_CC_L}, {"o", X64_CC_O}, {"p", X64_CC_P}, 633 {"s", X64_CC_S}, 634 }; 635 out->base_len = 0; 636 out->width = X64_SFX_NONE; 637 out->cc = 16u; 638 if (n == 0 || n >= sizeof out->base) return 0; 639 memcpy(out->base, s, n); 640 out->base[n] = '\0'; 641 642 /* Exact-match mnemonics that carry their own width letters or are 643 * already canonical table spellings. */ 644 if (n >= 6 && memcmp(s, "movslq", 6) == 0) { 645 memcpy(out->base, "movslq", 6); 646 out->base_len = 6; 647 out->width = X64_SFX_Q; 648 return 1; 649 } 650 if (n >= 6 && (memcmp(s, "movzbl", 6) == 0 || memcmp(s, "movzwl", 6) == 0 || 651 memcmp(s, "movsbl", 6) == 0 || memcmp(s, "movswl", 6) == 0 || 652 memcmp(s, "movzbq", 6) == 0 || memcmp(s, "movzwq", 6) == 0 || 653 memcmp(s, "movsbq", 6) == 0 || memcmp(s, "movswq", 6) == 0)) { 654 memcpy(out->base, s, 6); 655 out->base_len = 6; 656 return 1; 657 } 658 if (n == 3 && memcmp(s, "ud2", 3) == 0) { 659 out->base_len = 3; 660 return 1; 661 } 662 if (n == 3 && memcmp(s, "nop", 3) == 0) { 663 out->base_len = 3; 664 return 1; 665 } 666 if (n == 3 && memcmp(s, "ret", 3) == 0) { 667 out->base_len = 3; 668 return 1; 669 } 670 /* "syscall" ends in 'l' — return early so the generic size-suffix 671 * stripper below does not mistake it for a movl-style width letter. */ 672 if (n == 7 && memcmp(s, "syscall", 7) == 0) { 673 out->base_len = 7; 674 return 1; 675 } 676 677 /* Indirect-branch spellings carry an explicit 'q' suffix that must be 678 * preserved — the BR_RM rows in the table are keyed on "jmpq"/"callq". */ 679 if (n == 4 && memcmp(s, "call", 4) == 0) { 680 memcpy(out->base, "callq", 5); 681 out->base[5] = '\0'; 682 out->base_len = 5; 683 out->width = X64_SFX_Q; 684 return 1; 685 } 686 if (n == 4 && memcmp(s, "jmpq", 4) == 0) { 687 out->base_len = 4; 688 out->width = X64_SFX_Q; 689 return 1; 690 } 691 if (n == 5 && memcmp(s, "callq", 5) == 0) { 692 out->base_len = 5; 693 out->width = X64_SFX_Q; 694 return 1; 695 } 696 697 /* CMOVcc: cmov<cc>[suffix]. Strip optional trailing q/l/w/b first. */ 698 if (n >= 5 && memcmp(s, "cmov", 4) == 0) { 699 size_t after = 4; 700 size_t tail = n; 701 char last = s[n - 1]; 702 if (last == 'b' || last == 'w' || last == 'l' || last == 'q') { 703 out->width = (last == 'b') ? X64_SFX_B 704 : (last == 'w') ? X64_SFX_W 705 : (last == 'l') ? X64_SFX_L 706 : X64_SFX_Q; 707 tail = n - 1; 708 } 709 if (tail > after) { 710 Slice cc = {{s + after}, tail - after}; 711 for (size_t i = 0; i < sizeof kCC / sizeof kCC[0]; ++i) { 712 if (slice_eq_cstr(cc, kCC[i].name)) { 713 out->cc = kCC[i].cc; 714 memcpy(out->base, "cmov", 4); 715 out->base[4] = '\0'; 716 out->base_len = 4; 717 return 1; 718 } 719 } 720 } 721 } 722 723 /* SETcc: set<cc>. */ 724 if (n > 3 && memcmp(s, "set", 3) == 0) { 725 Slice cc = {{s + 3}, n - 3}; 726 for (size_t i = 0; i < sizeof kCC / sizeof kCC[0]; ++i) { 727 if (slice_eq_cstr(cc, kCC[i].name)) { 728 out->cc = kCC[i].cc; 729 memcpy(out->base, "set", 3); 730 out->base[3] = '\0'; 731 out->base_len = 3; 732 return 1; 733 } 734 } 735 } 736 737 /* Jcc: j<cc> — but NOT "jmp" / "jmpq" (handled above). */ 738 if (n > 1 && s[0] == 'j' && !(n >= 3 && s[1] == 'm' && s[2] == 'p')) { 739 Slice cc = {{s + 1}, n - 1}; 740 for (size_t i = 0; i < sizeof kCC / sizeof kCC[0]; ++i) { 741 if (slice_eq_cstr(cc, kCC[i].name)) { 742 out->cc = kCC[i].cc; 743 out->base[0] = 'j'; 744 out->base[1] = '\0'; 745 out->base_len = 1; 746 return 1; 747 } 748 } 749 } 750 751 /* Generic: strip trailing size letter b/w/l/q. */ 752 { 753 char last = s[n - 1]; 754 if (last == 'b' || last == 'w' || last == 'l' || last == 'q') { 755 out->width = (last == 'b') ? X64_SFX_B 756 : (last == 'w') ? X64_SFX_W 757 : (last == 'l') ? X64_SFX_L 758 : X64_SFX_Q; 759 out->base_len = (u32)(n - 1); 760 out->base[out->base_len] = '\0'; 761 return 1; 762 } 763 } 764 765 out->base_len = (u32)n; 766 return 1; 767 } 768 769 /* Width implied by a descriptor row, given the mnemonic's parsed width. */ 770 static u32 row_implied_width(const X64InsnDesc* d) { 771 if (d->flags & X64_ASMFL_FORCE_W64) return 8u; 772 if (d->flags & X64_ASMFL_BYTE) return 1u; 773 if (d->flags & X64_ASMFL_W16) return 2u; 774 if (d->flags & X64_ASMFL_W_FROM_REX) return 0u; /* any */ 775 if (d->leg_pfx == X64_PFX_66) return 2u; 776 return 0u; /* any */ 777 } 778 779 /* Linear scan for the first table row whose mnemonic matches `info->base` 780 * AND whose width filter is compatible. Returns NULL on miss. */ 781 static const X64InsnDesc* find_mnemonic_row(const X64MnInfo* info) { 782 u32 want_w = info->width; /* 0 = any */ 783 Slice base = {{info->base}, info->base_len}; 784 for (u32 i = 0; i < x64_insn_table_n; ++i) { 785 const X64InsnDesc* d = &x64_insn_table[i]; 786 if (!slice_eq(d->mnemonic, base)) continue; 787 if (want_w != 0) { 788 u32 rw = row_implied_width(d); 789 if (rw != 0 && rw != want_w) continue; 790 } 791 return d; 792 } 793 return NULL; 794 } 795 796 /* Per-format parse context. */ 797 typedef struct X64ParseCtx { 798 AsmDriver* d; 799 MCEmitter* mc; 800 const X64InsnDesc* desc; 801 u32 width; /* 1/2/4/8 — derived from suffix or row */ 802 u32 cc; /* condition nibble (jcc/cmovcc/setcc); 16 if unused */ 803 } X64ParseCtx; 804 805 /* w-bit for emit_rex / emit_alu_rr / emit_mov_rr etc. */ 806 static int width_to_w(u32 w) { return w == 8u ? 1 : 0; } 807 808 /* ---- per-format parsers ---- */ 809 810 static void parse_nullary(X64ParseCtx* p) { 811 /* nop / ret / ud2 / leave / cltd / cqto. */ 812 u8 buf[4]; 813 u32 n = 0; 814 if (p->desc->leg_pfx) buf[n++] = p->desc->leg_pfx; 815 if (p->desc->rex_w_req == X64_W_REQ_1) buf[n++] = X64_REX_BASE | X64_REX_W; 816 for (u32 i = 0; i < p->desc->opc_len; ++i) buf[n++] = p->desc->opc[i]; 817 if (p->desc->opc_len >= 1u) { 818 p->mc->emit_bytes(p->mc, buf, n); 819 return; 820 } 821 asm_driver_panic(p->d, "x64 asm: nullary form not implemented"); 822 } 823 824 static void parse_br_rm(X64ParseCtx* p) { 825 /* jmpq *%reg or callq *%reg. /digit picks sub (2 = call, 4 = jmp). */ 826 X64AsmOperand op = parse_operand(p->d); 827 if (op.kind != X64_ASM_OP_IND_REG) 828 asm_driver_panic(p->d, "x64 asm: indirect branch form"); 829 emit_indirect_branch(p->mc, p->desc->modrm_reg, op.reg); 830 } 831 832 /* Look up the ALU_RM_IMM8 row for a given mnemonic root; the /digit 833 * picks the operation (0=add, 1=or, 4=and, 5=sub, 6=xor, 7=cmp). */ 834 static const X64InsnDesc* find_alu_imm_row(Slice root) { 835 for (u32 i = 0; i < x64_insn_table_n; ++i) { 836 const X64InsnDesc* d = &x64_insn_table[i]; 837 if (d->fmt != X64_FMT_ALU_RM_IMM8) continue; 838 if (!slice_eq(d->mnemonic, root)) continue; 839 return d; 840 } 841 return NULL; 842 } 843 844 static void parse_alu_rr(X64ParseCtx* p) { 845 /* op src, dst in AT&T. Row's opc[0] is the ALU opcode (0x01/0x09/... 846 * 0x31/0x85/0x89). The byte/16-bit forms are handled by the 847 * existing emit.c helpers for w=0/w=1 + size suffix; here phase-1 848 * supports only the regular 32/64 forms used by the prior asm.c. */ 849 X64AsmOperand src; 850 X64AsmOperand dst; 851 src = parse_operand(p->d); 852 expect_comma(p->d); 853 dst = parse_operand(p->d); 854 855 /* Immediate source → not an ALU_RR encoding. Redirect to the 856 * ALU_RM_IMM row for this mnemonic. */ 857 if (src.kind == X64_ASM_OP_IMM && 858 (dst.kind == X64_ASM_OP_REG || dst.kind == X64_ASM_OP_MEM)) { 859 const X64InsnDesc* imm_row = find_alu_imm_row(p->desc->mnemonic); 860 if (!imm_row) asm_driver_panic(p->d, "x64 asm: no alu-imm row"); 861 if (dst.kind == X64_ASM_OP_MEM) { 862 emit_rm_imm_store_operand(p->d, p->mc, p->width, X64_OPC_ALU_IMM8, 863 X64_OPC_ALU_IMM32, imm_row->modrm_reg, dst, 864 src.imm, 1); 865 return; 866 } 867 /* Stack-pointer adjustments (`add/sub $imm, %rsp`, 64-bit) always use the 868 * imm32 form in codegen — the prologue and alloca patch a fixed-width 869 * placeholder, so they never shrink to imm8 even for a small frame. Match 870 * that here so `cc -S | as` reproduces codegen's bytes exactly; %rsp is a 871 * reserved register, so codegen never emits an imm8 ALU op against it. */ 872 if (dst.reg == X64_RSP && p->width == 8u && imm_fits_i32(src.imm)) 873 emit_alu_imm32(p->mc, 1, imm_row->modrm_reg, dst.reg, (i32)src.imm); 874 else if (imm_fits_i8(src.imm)) 875 emit_alu_imm8(p->mc, width_to_w(p->width), imm_row->modrm_reg, dst.reg, 876 (i8)src.imm); 877 else if (imm_fits_i32(src.imm)) 878 emit_alu_imm32(p->mc, width_to_w(p->width), imm_row->modrm_reg, dst.reg, 879 (i32)src.imm); 880 else 881 asm_driver_panic(p->d, "x64 asm: alu-imm out of range"); 882 return; 883 } 884 885 if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_REG) { 886 u8 op = p->desc->opc[0]; 887 if (p->width == 2u) { 888 u8 pfx = X64_OPSIZE_PFX; 889 p->mc->emit_bytes(p->mc, &pfx, 1); 890 } 891 if (op == 0x89u) { 892 /* MOV r/m, r — phase-1 keeps the existing helper. */ 893 emit_mov_rr(p->mc, width_to_w(p->width), dst.reg, src.reg); 894 return; 895 } 896 if (op == 0x88u) { 897 emit_movb_rr_operand(p->d, p->mc, dst, src); 898 return; 899 } 900 if (op == 0x88u) { 901 /* MOV r/m8, r8 — byte form (preserved from prior asm.c). */ 902 u8 ob = 0x88; 903 emit_rex(p->mc, 0, src.reg, 0, dst.reg); 904 p->mc->emit_bytes(p->mc, &ob, 1); 905 { 906 u8 mr = modrm(3u, src.reg, dst.reg); 907 p->mc->emit_bytes(p->mc, &mr, 1); 908 } 909 return; 910 } 911 /* xor/test/and/... — emit_alu_rr handles the generic shape. */ 912 emit_alu_rr(p->mc, width_to_w(p->width), op, dst.reg, src.reg); 913 return; 914 } 915 /* MOV r, r/m goes through MOV_RM_LOAD; MOV r, m goes through 916 * MOV_RM_LOAD (load) or ALU_RR with mem dst (store). We handle the 917 * store side here only when the mnemonic is "mov" (opc 0x89). */ 918 if (p->desc->opc[0] == 0x89u && src.kind == X64_ASM_OP_REG && 919 dst.kind == X64_ASM_OP_MEM) { 920 if (p->width == 1u) 921 emit_movb_store_operand(p->d, p->mc, src, dst); 922 else 923 emit_mov_store_operand(p->d, p->mc, p->width, src.reg, dst, 0); 924 return; 925 } 926 if (p->desc->opc[0] == 0x89u && src.kind == X64_ASM_OP_MEM && 927 dst.kind == X64_ASM_OP_REG) { 928 emit_mov_load_operand(p->d, p->mc, p->width, dst.reg, src); 929 return; 930 } 931 /* ALU reg → mem store (add/or/and/sub/xor/cmp %reg, mem): the reg-to-r/m 932 * /r opcode (opc[0]) with a memory ModR/M. The byte form clears the 933 * opcode's W bit (e.g. ADD r/m,r 0x01 → r/m8,r8 0x00). */ 934 if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_MEM) { 935 u8 op = p->width == 1u ? (u8)(p->desc->opc[0] & ~1u) : p->desc->opc[0]; 936 emit_reg_store_operand(p->d, p->mc, p->width, op, src.reg, dst, 937 p->width == 1u && byte_reg_needs_rex(&src)); 938 return; 939 } 940 asm_driver_panic(p->d, "x64 asm: unsupported alu_rr form"); 941 } 942 943 static void parse_mov_ri(X64ParseCtx* p) { 944 X64AsmOperand src; 945 X64AsmOperand dst; 946 src = parse_operand(p->d); 947 expect_comma(p->d); 948 dst = parse_operand(p->d); 949 if (src.kind != X64_ASM_OP_IMM || 950 (dst.kind != X64_ASM_OP_REG && dst.kind != X64_ASM_OP_MEM)) 951 asm_driver_panic(p->d, "x64 asm: mov-imm form"); 952 /* MOV $imm → mem: C6 /0 (byte) or C7 /0 (32/64 sign-extended imm32). */ 953 if (dst.kind == X64_ASM_OP_MEM) { 954 if (p->width != 8u && !imm_fits_i32(src.imm)) 955 asm_driver_panic(p->d, "x64 asm: mov immediate out of range"); 956 emit_rm_imm_store_operand( 957 p->d, p->mc, p->width, X64_OPC_MOV_RM_IMM8, 958 p->width == 1u ? X64_OPC_MOV_RM_IMM8 : X64_OPC_MOV_RM_IMM32, 959 X64_MOV_RM_IMM_SUB, dst, src.imm, 0); 960 return; 961 } 962 if (p->width != 4u && p->width != 8u) 963 asm_driver_panic(p->d, "x64 asm: mov imm only supports l/q forms"); 964 x64_emit_load_imm(p->mc, p->width == 8u ? 1 : 0, dst.reg, src.imm); 965 } 966 967 static void parse_mov_rm_load(X64ParseCtx* p) { 968 /* MOV r, r/m (0x8B) or LEA r, m (0x8D). AT&T order is src, dst. 969 * Phase-1 covers reg-reg, reg←mem (load) and lea. */ 970 X64AsmOperand src; 971 X64AsmOperand dst; 972 src = parse_operand(p->d); 973 expect_comma(p->d); 974 dst = parse_operand(p->d); 975 if (p->desc->opc[0] == 0x8Du) { 976 if (src.kind != X64_ASM_OP_MEM || dst.kind != X64_ASM_OP_REG) 977 asm_driver_panic(p->d, "x64 asm: lea form"); 978 emit_reg_mem_operand(p->d, p->mc, p->width, X64_OPC_LEA, dst.reg, src); 979 return; 980 } 981 if (src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) { 982 emit_mov_load_operand(p->d, p->mc, p->width, dst.reg, src); 983 return; 984 } 985 if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_REG) { 986 if (p->width == 2u) { 987 u8 pfx = X64_OPSIZE_PFX; 988 p->mc->emit_bytes(p->mc, &pfx, 1); 989 } 990 emit_mov_rr(p->mc, width_to_w(p->width), dst.reg, src.reg); 991 return; 992 } 993 asm_driver_panic(p->d, "x64 asm: mov-load form"); 994 } 995 996 static void parse_movsxd(X64ParseCtx* p) { 997 X64AsmOperand src; 998 X64AsmOperand dst; 999 src = parse_operand(p->d); 1000 expect_comma(p->d); 1001 dst = parse_operand(p->d); 1002 if (dst.kind != X64_ASM_OP_REG) 1003 asm_driver_panic(p->d, "x64 asm: movslq form"); 1004 if (src.kind == X64_ASM_OP_REG) { 1005 emit_extend_rr(p->mc, 1, 1, 4, dst.reg, src.reg); 1006 } else if (src.kind == X64_ASM_OP_MEM) { 1007 u8 buf[16]; 1008 u32 n = x64_mov_rm_load_pack((X64MovRMLoad){.w = 1, 1009 .opc0 = X64_OPC_MOVSXD, 1010 .dst = dst.reg, 1011 .base = src.base, 1012 .disp = src.disp}, 1013 buf); 1014 emit_packed(p->mc, buf, n); 1015 } else { 1016 asm_driver_panic(p->d, "x64 asm: movslq source"); 1017 } 1018 } 1019 1020 static void parse_alu_rm_imm(X64ParseCtx* p) { 1021 X64AsmOperand src; 1022 X64AsmOperand dst; 1023 src = parse_operand(p->d); 1024 expect_comma(p->d); 1025 dst = parse_operand(p->d); 1026 if (src.kind != X64_ASM_OP_IMM || dst.kind != X64_ASM_OP_REG) 1027 asm_driver_panic(p->d, "x64 asm: alu-imm form"); 1028 if (imm_fits_i8(src.imm)) 1029 emit_alu_imm8(p->mc, width_to_w(p->width), p->desc->modrm_reg, dst.reg, 1030 (i8)src.imm); 1031 else if (imm_fits_i32(src.imm)) 1032 emit_alu_imm32(p->mc, width_to_w(p->width), p->desc->modrm_reg, dst.reg, 1033 (i32)src.imm); 1034 else 1035 asm_driver_panic(p->d, "x64 asm: alu-imm out of range"); 1036 } 1037 1038 static void parse_cmovcc(X64ParseCtx* p) { 1039 X64AsmOperand src; 1040 X64AsmOperand dst; 1041 src = parse_operand(p->d); 1042 expect_comma(p->d); 1043 dst = parse_operand(p->d); 1044 if (src.kind != X64_ASM_OP_REG || dst.kind != X64_ASM_OP_REG) 1045 asm_driver_panic(p->d, "x64 asm: cmovcc form"); 1046 { 1047 u8 op[2] = {0x0f, (u8)(0x40u | (p->cc & 0xfu))}; 1048 if (p->width == 2u) { 1049 u8 pfx = X64_OPSIZE_PFX; 1050 p->mc->emit_bytes(p->mc, &pfx, 1); 1051 } 1052 emit_rex(p->mc, width_to_w(p->width), dst.reg, 0, src.reg); 1053 p->mc->emit_bytes(p->mc, op, 2); 1054 emit_rm_reg(p->mc, dst.reg, src.reg); 1055 } 1056 } 1057 1058 static void parse_push_pop(X64ParseCtx* p) { 1059 X64AsmOperand op = parse_operand(p->d); 1060 u8 base = p->desc->opc[0]; 1061 u8 ob; 1062 if (op.kind != X64_ASM_OP_REG) 1063 asm_driver_panic(p->d, "x64 asm: push/pop register"); 1064 emit_rex(p->mc, 0, 0, 0, op.reg); 1065 ob = (u8)(base | (op.reg & 7u)); 1066 p->mc->emit_bytes(p->mc, &ob, 1); 1067 } 1068 1069 static void parse_movzx_movsx(X64ParseCtx* p) { 1070 X64AsmOperand src = parse_operand(p->d); 1071 X64AsmOperand dst; 1072 expect_comma(p->d); 1073 dst = parse_operand(p->d); 1074 if (dst.kind != X64_ASM_OP_REG) 1075 asm_driver_panic(p->d, "x64 asm: movx dst register"); 1076 /* REX.W follows the destination register width: `movsbq …, %rcx` (64-bit) 1077 * needs REX.W; `movsbl …, %ecx` (32-bit) does not. The disassembler spells 1078 * the q/l form from REX.W, so honoring dst width here round-trips it. */ 1079 emit_reg_rm_twobyte( 1080 p->d, p->mc, dst.width == 8u ? 8u : 4u, p->desc->opc[1], dst.reg, src, 1081 p->desc->opc[1] == X64_OPC_MOVZX_B || p->desc->opc[1] == X64_OPC_MOVSX_B, 1082 0); 1083 } 1084 1085 static void parse_imul_rr(X64ParseCtx* p) { 1086 X64AsmOperand src = parse_operand(p->d); 1087 X64AsmOperand dst; 1088 if (src.kind == X64_ASM_OP_IMM) { 1089 X64AsmOperand real_src; 1090 expect_comma(p->d); 1091 real_src = parse_operand(p->d); 1092 expect_comma(p->d); 1093 dst = parse_operand(p->d); 1094 if (dst.kind != X64_ASM_OP_REG) 1095 asm_driver_panic(p->d, "x64 asm: imul dst register"); 1096 if (real_src.kind == X64_ASM_OP_REG) { 1097 if (imm_fits_i8(src.imm)) 1098 emit_imul_imm8(p->mc, width_to_w(p->width), dst.reg, real_src.reg, 1099 (i8)src.imm); 1100 else if (imm_fits_i32(src.imm)) 1101 emit_imul_imm32(p->mc, width_to_w(p->width), dst.reg, real_src.reg, 1102 (i32)src.imm); 1103 else 1104 asm_driver_panic(p->d, "x64 asm: imul imm out of range"); 1105 return; 1106 } 1107 if (real_src.kind == X64_ASM_OP_MEM) { 1108 u8 buf[16]; 1109 u32 n = 0; 1110 int imm32 = !imm_fits_i8(src.imm); 1111 if (imm32 && !imm_fits_i32(src.imm)) 1112 asm_driver_panic(p->d, "x64 asm: imul imm out of range"); 1113 n += x64_pack_rex(buf + n, width_to_w(p->width), dst.reg, 0, 1114 real_src.base); 1115 buf[n++] = imm32 ? X64_OPC_IMUL_IMM32 : X64_OPC_IMUL_IMM8; 1116 n += x64_pack_mem(buf + n, dst.reg, real_src.base, real_src.disp); 1117 if (imm32) 1118 n += x64_put_u32le(buf + n, (u32)(i32)src.imm); 1119 else 1120 buf[n++] = (u8)(i8)src.imm; 1121 emit_packed(p->mc, buf, n); 1122 return; 1123 } 1124 asm_driver_panic(p->d, "x64 asm: imul source"); 1125 } 1126 expect_comma(p->d); 1127 dst = parse_operand(p->d); 1128 if (dst.kind != X64_ASM_OP_REG) 1129 asm_driver_panic(p->d, "x64 asm: imul dst register"); 1130 emit_reg_rm_twobyte(p->d, p->mc, p->width, X64_OPC_IMUL_2B, dst.reg, src, 0, 1131 0); 1132 } 1133 1134 static void parse_imul_rri(X64ParseCtx* p) { 1135 X64AsmOperand imm = parse_operand(p->d); 1136 X64AsmOperand src; 1137 X64AsmOperand dst; 1138 if (imm.kind != X64_ASM_OP_IMM) asm_driver_panic(p->d, "x64 asm: imul imm"); 1139 expect_comma(p->d); 1140 src = parse_operand(p->d); 1141 expect_comma(p->d); 1142 dst = parse_operand(p->d); 1143 if (dst.kind != X64_ASM_OP_REG) 1144 asm_driver_panic(p->d, "x64 asm: imul dst register"); 1145 if (src.kind == X64_ASM_OP_REG) { 1146 if (p->desc->opc[0] == X64_OPC_IMUL_IMM8 || imm_fits_i8(imm.imm)) 1147 emit_imul_imm8(p->mc, width_to_w(p->width), dst.reg, src.reg, 1148 (i8)imm.imm); 1149 else if (imm_fits_i32(imm.imm)) 1150 emit_imul_imm32(p->mc, width_to_w(p->width), dst.reg, src.reg, 1151 (i32)imm.imm); 1152 else 1153 asm_driver_panic(p->d, "x64 asm: imul imm out of range"); 1154 return; 1155 } 1156 if (src.kind == X64_ASM_OP_MEM) { 1157 u8 buf[16]; 1158 u32 n = 0; 1159 int imm32 = !(p->desc->opc[0] == X64_OPC_IMUL_IMM8 || imm_fits_i8(imm.imm)); 1160 if (imm32 && !imm_fits_i32(imm.imm)) 1161 asm_driver_panic(p->d, "x64 asm: imul imm out of range"); 1162 n += x64_pack_rex(buf + n, width_to_w(p->width), dst.reg, 0, src.base); 1163 buf[n++] = imm32 ? X64_OPC_IMUL_IMM32 : X64_OPC_IMUL_IMM8; 1164 n += x64_pack_mem(buf + n, dst.reg, src.base, src.disp); 1165 if (imm32) 1166 n += x64_put_u32le(buf + n, (u32)(i32)imm.imm); 1167 else 1168 buf[n++] = (u8)(i8)imm.imm; 1169 emit_packed(p->mc, buf, n); 1170 return; 1171 } 1172 asm_driver_panic(p->d, "x64 asm: imul source"); 1173 } 1174 1175 static void parse_f7_rm(X64ParseCtx* p) { 1176 X64AsmOperand op = parse_operand(p->d); 1177 emit_rm_op(p->d, p->mc, p->width, X64_OPC_F7, p->desc->modrm_reg, op); 1178 } 1179 1180 static void parse_shift(X64ParseCtx* p) { 1181 X64AsmOperand src = parse_operand(p->d); 1182 X64AsmOperand dst; 1183 expect_comma(p->d); 1184 dst = parse_operand(p->d); 1185 if (src.kind == X64_ASM_OP_REG && src.reg == X64_RCX && src.width == 1u) { 1186 emit_rm_op(p->d, p->mc, p->width, X64_OPC_SHIFT_CL, p->desc->modrm_reg, 1187 dst); 1188 return; 1189 } 1190 if (src.kind != X64_ASM_OP_IMM) asm_driver_panic(p->d, "x64 asm: shift imm"); 1191 emit_rm_imm(p->d, p->mc, p->width, X64_OPC_SHIFT_IMM, p->desc->modrm_reg, dst, 1192 (i32)src.imm, 0); 1193 } 1194 1195 static void parse_rel32_branch(X64ParseCtx* p) { 1196 ObjSymId sym = OBJ_SYM_NONE; 1197 i64 off = 0; 1198 u32 disp_pos; 1199 if (p->desc->fmt == X64_FMT_JCC_REL32) { 1200 u8 op[2] = {0x0f, (u8)(0x80u | (p->cc & 0xfu))}; 1201 p->mc->emit_bytes(p->mc, op, 2); 1202 } else { 1203 u8 op = (p->desc->fmt == X64_FMT_CALL_REL32) ? X64_OPC_CALL_REL32 1204 : X64_OPC_JMP_REL32; 1205 p->mc->emit_bytes(p->mc, &op, 1); 1206 } 1207 disp_pos = p->mc->pos(p->mc); 1208 emit_u32le(p->mc, 0); 1209 asm_driver_parse_sym_expr(p->d, &sym, &off); 1210 if (sym == OBJ_SYM_NONE) 1211 asm_driver_panic(p->d, "x64 asm: symbolic branch target required"); 1212 /* A `@PLT` suffix forces the PLT32 reloc (the default for `call`); plain 1213 * `jmp sym` uses PC32. */ 1214 RelocKind dflt = p->desc->fmt == X64_FMT_CALL_REL32 ? R_X64_PLT32 : R_PC32; 1215 RelocKind k = x64_parse_reloc_suffix(p->d, dflt); 1216 if (k != R_X64_PLT32 && k != R_PC32) 1217 asm_driver_panic(p->d, "x64 asm: only @PLT is valid on a branch target"); 1218 p->mc->emit_reloc_at(p->mc, asm_driver_cur_section(p->d), disp_pos, k, sym, 1219 off - 4, 1, 0); 1220 } 1221 1222 static void parse_setcc(X64ParseCtx* p) { 1223 X64AsmOperand dst = parse_operand(p->d); 1224 if (dst.kind == X64_ASM_OP_REG) { 1225 if (dst.high8) { 1226 u8 op[2] = {0x0f, (u8)(0x90u | (p->cc & 0xfu))}; 1227 p->mc->emit_bytes(p->mc, op, 2); 1228 emit_rm_reg(p->mc, 0, dst.reg); 1229 } else { 1230 emit_setcc(p->mc, p->cc, dst.reg); 1231 } 1232 return; 1233 } 1234 if (dst.kind == X64_ASM_OP_MEM) { 1235 u8 buf[16]; 1236 u32 n = x64_pack_rex(buf, 0, 0, 0, dst.base); 1237 buf[n++] = X64_OPC_TWOBYTE; 1238 buf[n++] = (u8)(X64_OPC_SETCC_BASE | (p->cc & 0xfu)); 1239 n += x64_pack_mem(buf + n, 0, dst.base, dst.disp); 1240 emit_packed(p->mc, buf, n); 1241 return; 1242 } 1243 asm_driver_panic(p->d, "x64 asm: setcc destination"); 1244 } 1245 1246 static void parse_sse_rr(X64ParseCtx* p) { 1247 X64AsmOperand src = parse_operand(p->d); 1248 X64AsmOperand dst; 1249 int cvt_to_int = p->desc->opc[1] == 0x2cu; 1250 int cvt_from_int = p->desc->opc[1] == 0x2au; 1251 expect_comma(p->d); 1252 dst = parse_operand(p->d); 1253 if (cvt_to_int) { 1254 /* cvttsd2si/cvttss2si XMM/m -> GPR: REX.W follows the GPR destination 1255 * width (`%rdx` = 64-bit, `%edx` = 32-bit), not the mnemonic — these rows 1256 * carry no size suffix. */ 1257 if (dst.kind != X64_ASM_OP_REG) 1258 asm_driver_panic(p->d, "x64 asm: cvtt dst register"); 1259 u32 gpr_w = dst.width == 8u ? 8u : 4u; 1260 if (src.kind == X64_ASM_OP_XMM) 1261 emit_sse_rr_w(p->mc, p->desc->leg_pfx, p->desc->opc[1], width_to_w(gpr_w), 1262 dst.reg, src.reg); 1263 else if (src.kind == X64_ASM_OP_MEM) 1264 emit_reg_rm_twobyte(p->d, p->mc, gpr_w, p->desc->opc[1], dst.reg, src, 0, 1265 p->desc->leg_pfx); 1266 else 1267 asm_driver_panic(p->d, "x64 asm: cvtt source"); 1268 return; 1269 } 1270 if (cvt_from_int) { 1271 /* cvtsi2sd/cvtsi2ss GPR/m -> XMM: REX.W follows the GPR source width. */ 1272 if (dst.kind != X64_ASM_OP_XMM) 1273 asm_driver_panic(p->d, "x64 asm: cvtsi dst xmm"); 1274 if (src.kind == X64_ASM_OP_REG) { 1275 u32 gpr_w = src.width == 8u ? 8u : 4u; 1276 emit_sse_rr_w(p->mc, p->desc->leg_pfx, p->desc->opc[1], width_to_w(gpr_w), 1277 dst.reg, src.reg); 1278 } else if (src.kind == X64_ASM_OP_MEM) 1279 emit_sse_load(p->mc, p->desc->leg_pfx, p->desc->opc[1], dst.reg, src.base, 1280 src.disp); 1281 else 1282 asm_driver_panic(p->d, "x64 asm: cvtsi source"); 1283 return; 1284 } 1285 if (dst.kind == X64_ASM_OP_MEM && src.kind == X64_ASM_OP_XMM && 1286 p->desc->opc[1] == 0x10u && 1287 (slice_eq_cstr(p->desc->mnemonic, "movsd") || 1288 slice_eq_cstr(p->desc->mnemonic, "movss"))) { 1289 emit_sse_store(p->mc, p->desc->leg_pfx, 0x11, src.reg, dst.base, dst.disp); 1290 return; 1291 } 1292 if (dst.kind == X64_ASM_OP_MEM && src.kind == X64_ASM_OP_XMM && 1293 p->desc->opc[1] == 0x28u && slice_eq_cstr(p->desc->mnemonic, "movaps")) { 1294 emit_sse_store(p->mc, p->desc->leg_pfx, 0x29, src.reg, dst.base, dst.disp); 1295 return; 1296 } 1297 if (dst.kind != X64_ASM_OP_XMM) 1298 asm_driver_panic(p->d, "x64 asm: sse dst xmm"); 1299 if (src.kind == X64_ASM_OP_XMM) 1300 emit_sse_rr(p->mc, p->desc->leg_pfx, p->desc->opc[1], dst.reg, src.reg); 1301 else if (src.kind == X64_ASM_OP_MEM) 1302 emit_sse_load(p->mc, p->desc->leg_pfx, p->desc->opc[1], dst.reg, src.base, 1303 src.disp); 1304 else 1305 asm_driver_panic(p->d, "x64 asm: sse source"); 1306 } 1307 1308 static void parse_bswap(X64ParseCtx* p) { 1309 X64AsmOperand reg = parse_operand(p->d); 1310 u8 op[2]; 1311 if (reg.kind != X64_ASM_OP_REG) asm_driver_panic(p->d, "x64 asm: bswap reg"); 1312 emit_rex(p->mc, width_to_w(p->width), 0, 0, reg.reg); 1313 op[0] = 0x0f; 1314 op[1] = (u8)(0xc8u | (reg.reg & 7u)); 1315 p->mc->emit_bytes(p->mc, op, 2); 1316 } 1317 1318 static void parse_bs_popcnt(X64ParseCtx* p) { 1319 X64AsmOperand src = parse_operand(p->d); 1320 X64AsmOperand dst; 1321 expect_comma(p->d); 1322 dst = parse_operand(p->d); 1323 if (dst.kind != X64_ASM_OP_REG) 1324 asm_driver_panic(p->d, "x64 asm: bit-scan dst register"); 1325 emit_reg_rm_twobyte(p->d, p->mc, p->width, p->desc->opc[1], dst.reg, src, 0, 1326 p->desc->leg_pfx); 1327 } 1328 1329 static void parse_atomic(X64ParseCtx* p) { 1330 X64AsmOperand src = parse_operand(p->d); 1331 X64AsmOperand dst; 1332 u8 buf[16]; 1333 u32 n = 0; 1334 expect_comma(p->d); 1335 dst = parse_operand(p->d); 1336 if (src.kind != X64_ASM_OP_REG || 1337 (dst.kind != X64_ASM_OP_REG && dst.kind != X64_ASM_OP_MEM)) 1338 asm_driver_panic(p->d, "x64 asm: atomic form"); 1339 n += x64_pack_rex(buf + n, width_to_w(p->width), src.reg, 0, 1340 dst.kind == X64_ASM_OP_REG ? dst.reg : dst.base); 1341 if (p->desc->opc_len == 2) { 1342 buf[n++] = X64_OPC_TWOBYTE; 1343 buf[n++] = p->desc->opc[1]; 1344 } else { 1345 buf[n++] = p->desc->opc[0]; 1346 } 1347 if (dst.kind == X64_ASM_OP_REG) 1348 buf[n++] = x64_modrm(3u, src.reg, dst.reg); 1349 else 1350 n += x64_pack_mem(buf + n, src.reg, dst.base, dst.disp); 1351 emit_packed(p->mc, buf, n); 1352 } 1353 1354 static void parse_nop_multi(X64ParseCtx* p) { 1355 u8 nop6[6] = {X64_NOP6_BYTE0, X64_NOP6_BYTE1, X64_NOP6_BYTE2, 1356 X64_NOP6_BYTE3, X64_NOP6_BYTE4, X64_NOP6_BYTE5}; 1357 p->mc->emit_bytes(p->mc, nop6, sizeof nop6); 1358 } 1359 1360 static void parse_and_emit_for_format(X64ParseCtx* p) { 1361 switch ((X64Format)p->desc->fmt) { 1362 case X64_FMT_NULLARY: 1363 parse_nullary(p); 1364 return; 1365 case X64_FMT_NOP_MULTI: 1366 parse_nop_multi(p); 1367 return; 1368 case X64_FMT_PUSH_POP: 1369 parse_push_pop(p); 1370 return; 1371 case X64_FMT_BR_RM: 1372 parse_br_rm(p); 1373 return; 1374 case X64_FMT_ALU_RR: 1375 parse_alu_rr(p); 1376 return; 1377 case X64_FMT_MOV_RI: 1378 parse_mov_ri(p); 1379 return; 1380 case X64_FMT_MOV_RM_LOAD: 1381 parse_mov_rm_load(p); 1382 return; 1383 case X64_FMT_MOVZX_MOVSX: 1384 parse_movzx_movsx(p); 1385 return; 1386 case X64_FMT_MOVSXD: 1387 parse_movsxd(p); 1388 return; 1389 case X64_FMT_ALU_RM_IMM8: 1390 case X64_FMT_ALU_RM_IMM32: 1391 parse_alu_rm_imm(p); 1392 return; 1393 case X64_FMT_CMOVCC_RR: 1394 parse_cmovcc(p); 1395 return; 1396 case X64_FMT_IMUL_RR: 1397 parse_imul_rr(p); 1398 return; 1399 case X64_FMT_IMUL_RRI: 1400 parse_imul_rri(p); 1401 return; 1402 case X64_FMT_F7_RM: 1403 parse_f7_rm(p); 1404 return; 1405 case X64_FMT_SHIFT_IMM: 1406 case X64_FMT_SHIFT_CL: 1407 parse_shift(p); 1408 return; 1409 case X64_FMT_JCC_REL32: 1410 case X64_FMT_JMP_REL32: 1411 case X64_FMT_CALL_REL32: 1412 parse_rel32_branch(p); 1413 return; 1414 case X64_FMT_SETCC_RM: 1415 parse_setcc(p); 1416 return; 1417 case X64_FMT_SSE_RR: 1418 case X64_FMT_SSE_LOAD: 1419 case X64_FMT_SSE_STORE: 1420 parse_sse_rr(p); 1421 return; 1422 case X64_FMT_BSWAP: 1423 parse_bswap(p); 1424 return; 1425 case X64_FMT_BS: 1426 case X64_FMT_POPCNT: 1427 parse_bs_popcnt(p); 1428 return; 1429 case X64_FMT_XADD_MEM: 1430 case X64_FMT_XCHG_MEM: 1431 case X64_FMT_CMPXCHG_MEM: 1432 parse_atomic(p); 1433 return; 1434 default: 1435 asm_driver_panic(p->d, "x64 asm: format not implemented"); 1436 } 1437 } 1438 1439 /* Width letter (b/w/l/q) → width in bytes. Falls back to row-implied 1440 * width if the suffix is absent. */ 1441 static u32 width_from_info(const X64MnInfo* info, const X64InsnDesc* d) { 1442 if (info->width != 0) return info->width; 1443 { 1444 u32 rw = row_implied_width(d); 1445 return rw ? rw : 4u; 1446 } 1447 } 1448 1449 static void x64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) { 1450 X64Asm* a = (X64Asm*)base; 1451 MCEmitter* mc = asm_driver_mc(d); 1452 Slice mnsl = pool_slice(asm_driver_pool(d), mnemonic); 1453 const char* p = mnsl.s; 1454 size_t n = mnsl.len; 1455 X64MnInfo info; 1456 const X64InsnDesc* desc; 1457 X64ParseCtx ctx; 1458 (void)a; 1459 (void)asm_driver_cur_section(d); 1460 1461 if (!p || !parse_mnemonic(p, n, &info)) 1462 asm_driver_panic(d, "x64 asm: bad mnemonic"); 1463 1464 if (n == 4 && memcmp(p, "lock", 4) == 0) { 1465 AsmTok next; 1466 u8 pfx = 0xf0; 1467 mc->emit_bytes(mc, &pfx, 1); 1468 next = asm_driver_next(d); 1469 if (next.kind != ASM_TOK_IDENT) 1470 asm_driver_panic(d, "x64 asm: lock requires an instruction"); 1471 x64_arch_asm_insn(base, d, next.v.ident); 1472 return; 1473 } 1474 1475 /* Special case: imm→reg "mov" still spelled "movl"/"movq" but the 1476 * generic scan returns ALU_RR (0x89) first. When we see a "$" 1477 * immediate as the first operand, we want MOV_RI instead. Defer this 1478 * disambiguation to parse_alu_rr would force pre-parsing operands; 1479 * simpler is to special-case MOV here. */ 1480 if (info.base_len == 3 && memcmp(info.base, "mov", 3) == 0) { 1481 /* Peek for leading '$' → immediate form. */ 1482 AsmTok t = asm_driver_peek(d); 1483 if (asm_driver_tok_is_punct(t, '$')) { 1484 /* Find the MOV_RI row. */ 1485 for (u32 i = 0; i < x64_insn_table_n; ++i) { 1486 const X64InsnDesc* dr = &x64_insn_table[i]; 1487 if (dr->fmt == X64_FMT_MOV_RI && slice_eq_cstr(dr->mnemonic, "mov")) { 1488 ctx.d = d; 1489 ctx.mc = mc; 1490 ctx.desc = dr; 1491 ctx.width = info.width ? info.width : 4u; 1492 ctx.cc = info.cc; 1493 parse_mov_ri(&ctx); 1494 return; 1495 } 1496 } 1497 } 1498 /* For mov reg,mem and mov mem,reg we need MOV_RM_LOAD (0x8B) for 1499 * the load side. Easiest: pre-parse src; if mem and dst is reg → 1500 * MOV_RM_LOAD. Doing so re-uses the AT&T parser cleanly. */ 1501 { 1502 X64AsmOperand src = parse_operand(d); 1503 expect_comma(d); 1504 { 1505 X64AsmOperand dst = parse_operand(d); 1506 u32 w = info.width ? info.width : 4u; 1507 if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_REG) { 1508 if (w == 1u) { 1509 /* MOV r/m8, r8 — opcode 0x88. */ 1510 emit_movb_rr_operand(d, mc, dst, src); 1511 return; 1512 } 1513 if (w == 2u) { 1514 u8 pfx = X64_OPSIZE_PFX; 1515 mc->emit_bytes(mc, &pfx, 1); 1516 } 1517 emit_mov_rr(mc, width_to_w(w), dst.reg, src.reg); 1518 return; 1519 } 1520 if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_MEM) { 1521 if (w == 1u) 1522 emit_movb_store_operand(d, mc, src, dst); 1523 else 1524 emit_mov_store_operand(d, mc, w, src.reg, dst, 0); 1525 return; 1526 } 1527 if (src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) { 1528 emit_mov_load_operand(d, mc, w, dst.reg, src); 1529 return; 1530 } 1531 /* movd/movq between a GPR and an XMM register: 66 [REX.W] 0F 6E (to 1532 * xmm) / 7E (to gpr). The xmm is always the ModRM.reg field, the gpr 1533 * the r/m; movq sets REX.W (w==8), movd does not (w==4). */ 1534 if ((src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_XMM) || 1535 (src.kind == X64_ASM_OP_XMM && dst.kind == X64_ASM_OP_REG)) { 1536 int to_xmm = (dst.kind == X64_ASM_OP_XMM); 1537 u32 xmm = to_xmm ? dst.reg : src.reg; 1538 u32 gpr = to_xmm ? src.reg : dst.reg; 1539 emit_sse_rr_w(mc, X64_OPSIZE_PFX, to_xmm ? 0x6Eu : 0x7Eu, 1540 width_to_w(w), xmm, gpr); 1541 return; 1542 } 1543 asm_driver_panic(d, "x64 asm: mov form"); 1544 } 1545 } 1546 } 1547 1548 desc = find_mnemonic_row(&info); 1549 if (!desc) asm_driver_panic(d, "x64 asm: unknown mnemonic"); 1550 1551 /* If the user wrote an indirect branch (`*%reg`), prefer the BR_RM row 1552 * over the rel32 row that may sort first in the table. */ 1553 if (desc->fmt == X64_FMT_CALL_REL32 || desc->fmt == X64_FMT_JMP_REL32) { 1554 AsmTok t = asm_driver_peek(d); 1555 if (asm_driver_tok_is_punct(t, '*')) { 1556 for (u32 i = 0; i < x64_insn_table_n; ++i) { 1557 const X64InsnDesc* dr = &x64_insn_table[i]; 1558 if (dr->fmt != X64_FMT_BR_RM) continue; 1559 if (!slice_eq(dr->mnemonic, (Slice){{info.base}, info.base_len})) 1560 continue; 1561 desc = dr; 1562 break; 1563 } 1564 } 1565 } 1566 1567 ctx.d = d; 1568 ctx.mc = mc; 1569 ctx.desc = desc; 1570 ctx.width = width_from_info(&info, desc); 1571 ctx.cc = info.cc; 1572 parse_and_emit_for_format(&ctx); 1573 } 1574 1575 static void x64_arch_asm_destroy(ArchAsm* base) { 1576 x64_asm_close((X64Asm*)base); 1577 } 1578 1579 X64Asm* x64_asm_open(Compiler* c) { 1580 X64Asm* a = arena_new(c->tu, X64Asm); 1581 memset(a, 0, sizeof *a); 1582 a->base.insn = x64_arch_asm_insn; 1583 a->base.destroy = x64_arch_asm_destroy; 1584 a->c = c; 1585 return a; 1586 } 1587 1588 void x64_asm_close(X64Asm* a) { (void)a; } 1589 1590 /* ---- cc -S symbolization hooks (printer <-> this parser) ------------------ 1591 * 1592 * Inverse of the operand-syntax this parser accepts (parse_rel32_branch, 1593 * x64_parse_reloc_suffix): how the printer spells a relocated x64 operand so it 1594 * re-assembles. x64 relocs store addend-4 (rel32 bias), so addend_bias=4 makes 1595 * the printed offset the symbol offset. R_PC32 covers BOTH a branch target and 1596 * a RIP-relative lea/mov, so surgery is chosen from the operand text by the 1597 * printer (an `(%rip)` operand uses RIP surgery); we just supply the modifier. 1598 * Calls (R_X64_PLT32) print as a bare symbol — both kit-as (call default) and 1599 * clang resolve a same-TU callee, so execution matches regardless of the exact 1600 * reloc kind each assembler picks. */ 1601 static int x64_reloc_operand(u16 kind, KitObjFmt fmt, ArchRelocOperand* out) { 1602 const char* suffix; 1603 (void)fmt; /* x64 cc -S cross-targets ELF; one spelling */ 1604 switch (kind) { 1605 case R_PC32: /* jmp/jcc target, or RIP-relative lea/mov */ 1606 case R_X64_PLT32: /* call target -> bare symbol */ 1607 suffix = ""; 1608 break; 1609 case R_X64_GOTPCREL: 1610 case R_X64_GOTPCRELX: 1611 case R_X64_REX_GOTPCRELX: 1612 suffix = "@GOTPCREL"; /* RIP-relative GOT load */ 1613 break; 1614 default: 1615 return 0; /* data (R_ABS*) via emit_data_range; TLS/etc. unsymbolized */ 1616 } 1617 out->surg = 1618 ARCH_RELOC_SURG_TAIL; /* promoted to RIP by the printer if (%rip) */ 1619 out->prefix = ""; 1620 out->suffix = suffix; 1621 out->addend_bias = 4; 1622 return 1; 1623 } 1624 1625 /* Intra-section local branches whose target codegen resolved in place (no 1626 * relocation): jmp and the Jcc family. Excludes call (always relocated) and 1627 * indirect/register-form jumps (no numeric target to relabel). */ 1628 static int x64_is_local_branch(KitSlice m) { 1629 static const char* const br[] = { 1630 "jmp", 1631 "jo", 1632 "jno", 1633 "jb", 1634 "jae", 1635 "je", 1636 "jne", 1637 "jbe", 1638 "ja", 1639 "js", 1640 "jns", 1641 "jp", 1642 "jnp", 1643 "jl", 1644 "jge", 1645 "jle", 1646 "jg", 1647 /* aliases the disassembler may not emit but harmless to accept */ 1648 "jz", 1649 "jnz", 1650 "jc", 1651 "jnc", 1652 }; 1653 u32 i; 1654 for (i = 0; i < sizeof br / sizeof br[0]; ++i) { 1655 size_t n = strlen(br[i]); 1656 if (m.len == (u32)n && memcmp(m.s, br[i], n) == 0) return 1; 1657 } 1658 return 0; 1659 } 1660 1661 const ArchAsmOps x64_asm_ops = { 1662 .reloc_operand = x64_reloc_operand, 1663 .is_local_branch = x64_is_local_branch, 1664 }; 1665 1666 ArchAsm* x64_arch_asm_new(Compiler* c) { return &x64_asm_open(c)->base; } 1667 1668 void x64_inline_bind(X64Asm* a, const AsmConstraint* outs, u32 nout, 1669 Operand* out_ops, const AsmConstraint* ins, u32 nin, 1670 const Operand* in_ops, const Sym* clobbers, u32 nclob) { 1671 a->outs = outs; 1672 a->out_ops = out_ops; 1673 a->ins = ins; 1674 a->in_ops = in_ops; 1675 a->clobbers = clobbers; 1676 a->nout = nout; 1677 a->nin = nin; 1678 a->nclob = nclob; 1679 } 1680 1681 #define X64_INLINE_LINE_CAP 1024 1682 1683 _Noreturn static void inline_panic(X64Asm* a, const char* msg) { 1684 SrcLoc loc = {0, 0, 0}; 1685 compiler_panic(a->c, loc, "x64 inline asm: %.*s", 1686 SLICE_ARG(slice_from_cstr(msg))); 1687 } 1688 1689 /* Width selector for x64_reg_spelling: matches the operand-modifier 1690 * forms recognised by the template walker. */ 1691 #define X64_REG_WIDTH_64 0 1692 #define X64_REG_WIDTH_32 1 1693 #define X64_REG_WIDTH_8 2 1694 #define X64_REG_WIDTH_16 3 1695 #define X64_REG_WIDTH_H8 4 1696 1697 static void render_xmm(StrBuf* sb, u32 reg) { 1698 strbuf_putc(sb, '%'); 1699 strbuf_puts(sb, "xmm"); 1700 reg &= 15u; 1701 if (reg >= 10u) strbuf_putc(sb, (char)('0' + (reg / 10u))); 1702 strbuf_putc(sb, (char)('0' + (reg % 10u))); 1703 } 1704 1705 static const char* x64_reg_spelling(u32 reg, int width) { 1706 static const char* r64[16] = { 1707 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", 1708 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 1709 }; 1710 static const char* r32[16] = { 1711 "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", 1712 "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d", 1713 }; 1714 static const char* r8[16] = { 1715 "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", 1716 "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b", 1717 }; 1718 static const char* r16[16] = { 1719 "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", 1720 "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w", 1721 }; 1722 static const char* rh8[4] = {"ah", "ch", "dh", "bh"}; 1723 if (width == X64_REG_WIDTH_H8) return reg < 4u ? rh8[reg] : NULL; 1724 if (width == X64_REG_WIDTH_16) return r16[reg & 15u]; 1725 if (width == X64_REG_WIDTH_8) return r8[reg & 15u]; 1726 if (width == X64_REG_WIDTH_32) return r32[reg & 15u]; 1727 return r64[reg & 15u]; 1728 } 1729 1730 static int x64_type_prefers_32(KitCgTypeId type) { 1731 if (type == 0) return 0; 1732 return !type_is_64(type); 1733 } 1734 1735 static void render_reg(StrBuf* sb, u32 reg, int width) { 1736 const char* name = x64_reg_spelling(reg, width); 1737 strbuf_putc(sb, '%'); 1738 if (name) strbuf_puts(sb, name); 1739 } 1740 1741 static void render_imm(StrBuf* sb, i64 v) { 1742 strbuf_putc(sb, '$'); 1743 strbuf_put_i64(sb, v); 1744 } 1745 1746 static void render_indirect(StrBuf* sb, Reg base, i32 ofs) { 1747 if (ofs) strbuf_put_i64(sb, (i64)ofs); 1748 strbuf_putc(sb, '('); 1749 render_reg(sb, (u32)base, X64_REG_WIDTH_64); 1750 strbuf_putc(sb, ')'); 1751 } 1752 1753 /* Operand-modifier forms used by the template walker. */ 1754 #define X64_FORM_DEFAULT 0 1755 #define X64_FORM_W 1 /* %w — 16-bit */ 1756 #define X64_FORM_X 2 /* %x — 64-bit */ 1757 #define X64_FORM_A 3 /* %a — address / memory */ 1758 #define X64_FORM_B 4 /* %b — 8-bit (byte) register */ 1759 #define X64_FORM_K 5 /* %k — 32-bit */ 1760 #define X64_FORM_H 6 /* %h — high 8-bit register (a/c/d/b only) */ 1761 1762 static char x64_size_suffix_for_operand(X64Asm* a, u32 idx) { 1763 u32 ntot = a->nout + a->nin; 1764 const Operand* op; 1765 u32 size; 1766 if (idx >= ntot) inline_panic(a, "operand index out of range"); 1767 op = (idx < a->nout) ? &a->out_ops[idx] : &a->in_ops[idx - a->nout]; 1768 if (op->type) 1769 size = type_byte_size(op->type); 1770 else if (op->kind == OPK_IMM) 1771 size = 4; 1772 else 1773 size = 8; 1774 switch (size) { 1775 case 1: 1776 return 'b'; 1777 case 2: 1778 return 'w'; 1779 case 4: 1780 return 'l'; 1781 case 8: 1782 return 'q'; 1783 default: 1784 inline_panic(a, "%z requires a scalar 1/2/4/8-byte operand"); 1785 } 1786 } 1787 1788 static void render_operand(X64Asm* a, StrBuf* sb, u32 idx, int form) { 1789 u32 ntot = a->nout + a->nin; 1790 const Operand* op; 1791 if (idx >= ntot) inline_panic(a, "operand index out of range"); 1792 op = (idx < a->nout) ? &a->out_ops[idx] : &a->in_ops[idx - a->nout]; 1793 if (form == X64_FORM_A) { 1794 if (op->kind != OPK_INDIRECT) inline_panic(a, "%a on non-memory operand"); 1795 if (op->v.ind.index != REG_NONE) 1796 inline_panic(a, "inline asm: indexed addressing not supported"); 1797 render_indirect(sb, op->v.ind.base, op->v.ind.ofs); 1798 return; 1799 } 1800 if ((form == X64_FORM_B || form == X64_FORM_H) && 1801 op->kind != X64_INLINE_OPK_REG) { 1802 inline_panic(a, "byte-register modifier requires a register operand"); 1803 } 1804 if (op->kind == X64_INLINE_OPK_REG) { 1805 int width; 1806 if (op->pad[0] == X64_INLINE_OPCLS_FP) { 1807 render_xmm(sb, (u32)op->v.local); 1808 return; 1809 } 1810 if (form == X64_FORM_B) 1811 width = X64_REG_WIDTH_8; 1812 else if (form == X64_FORM_H) { 1813 if (op->v.local > X64_RBX) { 1814 inline_panic(a, "%h modifier requires ax/cx/dx/bx register"); 1815 } 1816 width = X64_REG_WIDTH_H8; 1817 } else if (form == X64_FORM_W) 1818 width = X64_REG_WIDTH_16; 1819 else if (form == X64_FORM_K) 1820 width = X64_REG_WIDTH_32; 1821 else if (form == X64_FORM_X) 1822 width = X64_REG_WIDTH_64; 1823 else 1824 width = 1825 x64_type_prefers_32(op->type) ? X64_REG_WIDTH_32 : X64_REG_WIDTH_64; 1826 render_reg(sb, (u32)op->v.local, width); 1827 return; 1828 } 1829 if (op->kind == OPK_IMM) { 1830 render_imm(sb, op->v.imm); 1831 return; 1832 } 1833 if (op->kind == OPK_INDIRECT) { 1834 if (op->v.ind.index != REG_NONE) 1835 inline_panic(a, "inline asm: indexed addressing not supported"); 1836 render_indirect(sb, op->v.ind.base, op->v.ind.ofs); 1837 return; 1838 } 1839 inline_panic(a, "unsupported operand kind"); 1840 } 1841 1842 static u32 find_named_operand(X64Asm* a, const char* name, size_t len) { 1843 Sym needle = pool_intern_slice(a->c->global, (Slice){.s = name, .len = len}); 1844 u32 i; 1845 for (i = 0; i < a->nout; ++i) { 1846 if (a->outs[i].name == needle) return i; 1847 } 1848 for (i = 0; i < a->nin; ++i) { 1849 if (a->ins[i].name == needle) return a->nout + i; 1850 } 1851 inline_panic(a, "%[name] does not match any constraint"); 1852 } 1853 1854 static void run_one_line(X64Asm* a, MCEmitter* mc, const char* text, 1855 size_t len) { 1856 size_t i; 1857 AsmLexer* lx; 1858 AsmDriver* d; 1859 AsmTok t; 1860 for (i = 0; i < len; ++i) { 1861 if (text[i] != ' ' && text[i] != '\t') break; 1862 } 1863 if (i == len) return; 1864 lx = asm_lex_open_mem(a->c, "<inline-asm>", text, len); 1865 d = asm_driver_open_inline(a->c, mc, lx); 1866 t = asm_driver_peek(d); 1867 while (t.kind == ASM_TOK_NEWLINE || t.kind == ASM_TOK_HASH) { 1868 (void)asm_driver_next(d); 1869 if (t.kind == ASM_TOK_HASH) { 1870 while (!asm_driver_at_eol(d)) (void)asm_driver_next(d); 1871 } 1872 t = asm_driver_peek(d); 1873 } 1874 if (t.kind == ASM_TOK_EOF) { 1875 asm_driver_close_inline(d); 1876 asm_lex_close(lx); 1877 return; 1878 } 1879 if (t.kind != ASM_TOK_IDENT) inline_panic(a, "expected mnemonic"); 1880 (void)asm_driver_next(d); 1881 x64_arch_asm_insn(&a->base, d, t.v.ident); 1882 asm_driver_close_inline(d); 1883 asm_lex_close(lx); 1884 } 1885 1886 static void render_and_run_line(X64Asm* a, MCEmitter* mc, StrBuf* sb, 1887 const char* start, const char* end) { 1888 strbuf_reset(sb); 1889 for (const char* p = start; p < end; ++p) { 1890 char c = *p; 1891 char n; 1892 int form = 0; 1893 if (c != '%') { 1894 strbuf_putc(sb, c); 1895 continue; 1896 } 1897 if (p + 1 >= end) inline_panic(a, "trailing '%' in template"); 1898 n = *(p + 1); 1899 if (n == '%') { 1900 strbuf_putc(sb, '%'); 1901 ++p; 1902 continue; 1903 } 1904 if (n == 'w' || n == 'x' || n == 'a' || n == 'b' || n == 'k' || n == 'h' || 1905 n == 'z') { 1906 form = (n == 'w') ? X64_FORM_W 1907 : (n == 'x') ? X64_FORM_X 1908 : (n == 'a') ? X64_FORM_A 1909 : (n == 'b') ? X64_FORM_B 1910 : (n == 'k') ? X64_FORM_K 1911 : (n == 'h') ? X64_FORM_H 1912 : -1; 1913 ++p; 1914 if (p + 1 >= end) inline_panic(a, "trailing '%' modifier"); 1915 n = *(p + 1); 1916 } 1917 if (n == '[') { 1918 const char* nbeg = p + 2; 1919 const char* nend = nbeg; 1920 u32 idx; 1921 while (nend < end && *nend != ']') ++nend; 1922 if (nend == end) inline_panic(a, "unterminated %[name]"); 1923 idx = find_named_operand(a, nbeg, (size_t)(nend - nbeg)); 1924 p = nend; 1925 if (form == -1) 1926 strbuf_putc(sb, x64_size_suffix_for_operand(a, idx)); 1927 else 1928 render_operand(a, sb, idx, form); 1929 continue; 1930 } 1931 if (n < '0' || n > '9') inline_panic(a, "expected digit after '%'"); 1932 { 1933 u32 idx = (u32)(n - '0'); 1934 ++p; 1935 if (p + 1 < end && *(p + 1) >= '0' && *(p + 1) <= '9') { 1936 idx = idx * 10u + (u32)(*(p + 1) - '0'); 1937 ++p; 1938 } 1939 if (form == -1) 1940 strbuf_putc(sb, x64_size_suffix_for_operand(a, idx)); 1941 else 1942 render_operand(a, sb, idx, form); 1943 } 1944 } 1945 if (sb->truncated) inline_panic(a, "inline asm line buffer overflow"); 1946 run_one_line(a, mc, strbuf_cstr(sb), strbuf_len(sb)); 1947 } 1948 1949 void x64_asm_run_template(X64Asm* a, MCEmitter* mc, const char* tmpl) { 1950 char buf[X64_INLINE_LINE_CAP]; 1951 StrBuf sb; 1952 const char* line_start; 1953 int bracket = 0; 1954 char quote = 0; 1955 if (!tmpl || !*tmpl) return; 1956 strbuf_init(&sb, buf, sizeof buf); 1957 line_start = tmpl; 1958 for (const char* p = tmpl;; ++p) { 1959 char c = *p; 1960 if (c == '\0') { 1961 render_and_run_line(a, mc, &sb, line_start, p); 1962 break; 1963 } 1964 if (quote) { 1965 if (c == '\\' && *(p + 1)) { 1966 ++p; 1967 continue; 1968 } 1969 if (c == quote) quote = 0; 1970 continue; 1971 } 1972 if (c == '"' || c == '\'') { 1973 quote = c; 1974 continue; 1975 } 1976 if (c == '[') { 1977 ++bracket; 1978 continue; 1979 } 1980 if (c == ']') { 1981 if (bracket) --bracket; 1982 continue; 1983 } 1984 if (bracket == 0 && (c == '\n' || c == ';')) { 1985 render_and_run_line(a, mc, &sb, line_start, p); 1986 line_start = p + 1; 1987 } 1988 } 1989 }