asm.c (110411B)
1 /* AArch64 standalone .s instruction parser. 2 * 3 * Per-mnemonic dispatch: each entry in the mnemonic table names a 4 * parse function that reads operand tokens through the asm-driver 5 * surface and emits the encoded word via the inline encoders in 6 * aa64_isa.h. Encoders are the single source of truth for bit 7 * layout — the disassembler shares them through aa64_*_unpack. 8 * 9 * Aliases (`mov`, `neg`, `cmp`, `mul`, ...) live in this table as 10 * dedicated rows that pick the canonical form's encoder with the 11 * alias-specific operand shape. When a mnemonic admits multiple 12 * forms (e.g. `mov` register-vs-immediate, `add` register-vs- 13 * immediate), the parser branches on operand shape after reading 14 * the first non-Rd operand. */ 15 16 #include "arch/aa64/asm.h" 17 18 #include <string.h> 19 20 #include "arch/aa64/isa.h" 21 #include "arch/aa64/regs.h" 22 #include "arch/arch.h" 23 #include "asm/asm_helpers.h" 24 #include "asm/asm_lex.h" 25 #include "cg/type.h" 26 #include "core/arena.h" 27 #include "core/pool.h" 28 #include "core/slice.h" 29 #include "core/strbuf.h" 30 #include "obj/obj.h" 31 32 /* ---- public handle ---- */ 33 34 struct AA64Asm { 35 ArchAsm base; 36 Compiler* c; 37 38 /* Inline-asm bound state (set by aa64_inline_bind, cleared otherwise). 39 * Operand indexing per GCC convention: 0..nout-1 are outputs, then 40 * nout..nout+nin-1 are inputs. Templates address into this combined 41 * list via %N / %wN / %xN / %aN. out_ops is mutable (the binder fills 42 * in result locations); in_ops + constraints + clobbers are read-only 43 * borrows. */ 44 const AsmConstraint* outs; 45 Operand* out_ops; 46 const AsmConstraint* ins; 47 const Operand* in_ops; 48 const Sym* clobbers; 49 u32 nout; 50 u32 nin; 51 u32 nclob; 52 }; 53 54 static void aa64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic); 55 static void aa64_arch_asm_destroy(ArchAsm* base); 56 57 AA64Asm* aa64_asm_open(Compiler* c) { 58 AA64Asm* a = arena_new(c->tu, AA64Asm); 59 memset(a, 0, sizeof *a); 60 a->base.insn = aa64_arch_asm_insn; 61 a->base.destroy = aa64_arch_asm_destroy; 62 a->c = c; 63 return a; 64 } 65 66 void aa64_asm_close(AA64Asm* a) { (void)a; } 67 68 ArchAsm* aa64_arch_asm_new(Compiler* c) { return &aa64_asm_open(c)->base; } 69 70 static void aa64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) { 71 aa64_asm_insn((AA64Asm*)base, d, mnemonic); 72 } 73 74 static void aa64_arch_asm_destroy(ArchAsm* base) { 75 aa64_asm_close((AA64Asm*)base); 76 } 77 78 void aa64_inline_bind(AA64Asm* a, const AsmConstraint* outs, u32 nout, 79 Operand* out_ops, const AsmConstraint* ins, u32 nin, 80 const Operand* in_ops, const Sym* clobbers, u32 nclob) { 81 a->outs = outs; 82 a->out_ops = out_ops; 83 a->ins = ins; 84 a->in_ops = in_ops; 85 a->clobbers = clobbers; 86 a->nout = nout; 87 a->nin = nin; 88 a->nclob = nclob; 89 } 90 91 /* ---- helpers ---- */ 92 93 static int tok_punct(AsmTok t, u32 p) { return asm_driver_tok_is_punct(t, p); } 94 95 static int icase_eq(const char* a, size_t an, const char* b) { 96 size_t i; 97 for (i = 0; i < an; ++i) { 98 char x = a[i], y = b[i]; 99 if (x >= 'A' && x <= 'Z') x = (char)(x + ('a' - 'A')); 100 if (y >= 'A' && y <= 'Z') y = (char)(y + ('a' - 'A')); 101 if (x != y || !y) return 0; 102 } 103 return b[an] == '\0'; 104 } 105 106 /* Parse a register operand. Returns the 5-bit encoded register number 107 * via *reg_out and the form via *is64_out. Recognized forms (case- 108 * insensitive): 109 * w0..w30, wzr → is64=0, reg=0..30 / 31 110 * x0..x30, xzr, lr (=x30) → is64=1, reg=0..30 / 31 111 * sp → is64=1, reg=31 (sp_means_sp set) 112 * wsp → is64=0, reg=31 (sp_means_sp set) 113 * Aliases: 114 * fp = x29 115 * ip0 = x16, ip1 = x17 (PLT scratch — useful for hand-written PLTs) */ 116 typedef struct AA64Reg { 117 u32 num; 118 u8 is64; 119 u8 is_sp; /* 1 if the spelling was "sp" / "wsp" */ 120 u8 is_fp; /* 1 for SIMD/FP register spellings accepted in FP forms */ 121 u8 fp_bytes; /* 8 for Dn, 16 for Qn */ 122 } AA64Reg; 123 124 static int parse_reg_from_ident(AsmDriver* d, Sym ident, AA64Reg* out) { 125 Slice sl = pool_slice(asm_driver_pool(d), ident); 126 const char* p = sl.s; 127 size_t n = sl.len; 128 if (!p || !n) return 0; 129 /* "sp" */ 130 if (icase_eq(p, n, "sp")) { 131 out->num = 31; 132 out->is64 = 1; 133 out->is_sp = 1; 134 out->is_fp = 0; 135 return 1; 136 } 137 if (icase_eq(p, n, "wsp")) { 138 out->num = 31; 139 out->is64 = 0; 140 out->is_sp = 1; 141 out->is_fp = 0; 142 return 1; 143 } 144 if (icase_eq(p, n, "lr")) { 145 out->num = 30; 146 out->is64 = 1; 147 out->is_sp = 0; 148 out->is_fp = 0; 149 return 1; 150 } 151 if (icase_eq(p, n, "fp")) { 152 out->num = 29; 153 out->is64 = 1; 154 out->is_sp = 0; 155 out->is_fp = 0; 156 return 1; 157 } 158 if (icase_eq(p, n, "ip0")) { 159 out->num = 16; 160 out->is64 = 1; 161 out->is_sp = 0; 162 out->is_fp = 0; 163 return 1; 164 } 165 if (icase_eq(p, n, "ip1")) { 166 out->num = 17; 167 out->is64 = 1; 168 out->is_sp = 0; 169 out->is_fp = 0; 170 return 1; 171 } 172 if (icase_eq(p, n, "xzr")) { 173 out->num = 31; 174 out->is64 = 1; 175 out->is_sp = 0; 176 out->is_fp = 0; 177 return 1; 178 } 179 if (icase_eq(p, n, "wzr")) { 180 out->num = 31; 181 out->is64 = 0; 182 out->is_sp = 0; 183 out->is_fp = 0; 184 return 1; 185 } 186 /* W/X<num> */ 187 if ((p[0] == 'w' || p[0] == 'W' || p[0] == 'x' || p[0] == 'X') && n >= 2) { 188 u32 r = 0; 189 size_t i; 190 for (i = 1; i < n; ++i) { 191 char c = p[i]; 192 if (c < '0' || c > '9') return 0; 193 r = r * 10 + (u32)(c - '0'); 194 if (r > 31) return 0; 195 } 196 out->num = r; 197 out->is64 = (p[0] == 'x' || p[0] == 'X') ? 1 : 0; 198 out->is_sp = 0; 199 out->is_fp = 0; 200 return 1; 201 } 202 return 0; 203 } 204 205 static int parse_fp_pair_reg_from_ident(AsmDriver* d, Sym ident, AA64Reg* out) { 206 Slice sl = pool_slice(asm_driver_pool(d), ident); 207 const char* p = sl.s; 208 size_t n = sl.len; 209 if (!p || n < 2 || (p[0] != 'd' && p[0] != 'D' && p[0] != 'q' && p[0] != 'Q')) 210 return 0; 211 u32 r = 0; 212 for (size_t i = 1; i < n; ++i) { 213 char c = p[i]; 214 if (c < '0' || c > '9') return 0; 215 r = r * 10 + (u32)(c - '0'); 216 if (r > 31) return 0; 217 } 218 out->num = r; 219 out->is64 = 1; 220 out->is_sp = 0; 221 out->is_fp = 1; 222 out->fp_bytes = (p[0] == 'q' || p[0] == 'Q') ? 16u : 8u; 223 return 1; 224 } 225 226 /* Scalar SIMD/FP transfer register for ldr/str/ldur/stur: b/h/s/d/q with the 227 * access width in fp_bytes (1/2/4/8/16). Unlike parse_fp_pair_reg_from_ident 228 * (ldp/stp, d/q only) this accepts the sub-64-bit scalar widths a single-reg 229 * FP load/store can carry. */ 230 static int parse_fp_scalar_reg_from_ident(AsmDriver* d, Sym ident, 231 AA64Reg* out) { 232 Slice sl = pool_slice(asm_driver_pool(d), ident); 233 const char* p = sl.s; 234 size_t n = sl.len; 235 u8 bytes; 236 u32 r = 0; 237 size_t i; 238 if (!p || n < 2) return 0; 239 switch (p[0]) { 240 case 'b': 241 case 'B': 242 bytes = 1; 243 break; 244 case 'h': 245 case 'H': 246 bytes = 2; 247 break; 248 case 's': 249 case 'S': 250 bytes = 4; 251 break; 252 case 'd': 253 case 'D': 254 bytes = 8; 255 break; 256 case 'q': 257 case 'Q': 258 bytes = 16; 259 break; 260 default: 261 return 0; 262 } 263 for (i = 1; i < n; ++i) { 264 char c = p[i]; 265 if (c < '0' || c > '9') return 0; 266 r = r * 10 + (u32)(c - '0'); 267 if (r > 31) return 0; 268 } 269 out->num = r; 270 out->is64 = 1; 271 out->is_sp = 0; 272 out->is_fp = 1; 273 out->fp_bytes = bytes; 274 return 1; 275 } 276 277 static AA64Reg parse_reg(AsmDriver* d) { 278 AsmTok t = asm_driver_next(d); 279 AA64Reg r; 280 memset(&r, 0, sizeof r); 281 if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) 282 asm_driver_panic(d, "asm: expected register"); 283 return r; 284 } 285 286 /* Non-consuming lookahead: is the next operand a register? Used to pick 287 * between the register and immediate forms of dual-form mnemonics (e.g. 288 * `and Rd,Rn,Rm` vs `and Rd,Rn,#imm`). */ 289 static int peek_is_reg(AsmDriver* d) { 290 AsmTok t = asm_driver_peek(d); 291 AA64Reg r; 292 return t.kind == ASM_TOK_IDENT && parse_reg_from_ident(d, t.v.ident, &r); 293 } 294 295 /* ldr/str transfer register: GPR (Wt/Xt) or scalar SIMD/FP (Bt..Qt). */ 296 static AA64Reg parse_ldst_reg(AsmDriver* d) { 297 AsmTok t = asm_driver_next(d); 298 AA64Reg r; 299 memset(&r, 0, sizeof r); 300 if (t.kind != ASM_TOK_IDENT || 301 (!parse_reg_from_ident(d, t.v.ident, &r) && 302 !parse_fp_scalar_reg_from_ident(d, t.v.ident, &r))) 303 asm_driver_panic(d, "asm: ldr/str: expected register"); 304 return r; 305 } 306 307 /* Resolve the (size, V, opc, scale) load/store encoding fields from the 308 * transfer register and mnemonic flavor. GPR width comes from fixed_size (the 309 * sized mnemonics ldrb/ldrsw/…) or the register; FP uses V=1 with size/opc 310 * keyed on the scalar width (b/h/s/d = size 0/1/2/3, opc store=0/load=1; the 311 * 128-bit q is size=0 opc=2/3). `scale` is the byte access width for the 312 * scaled unsigned-imm12 form. */ 313 typedef struct { 314 u32 size, V, opc, scale; 315 } AA64LdStEnc; 316 317 static AA64LdStEnc ldst_encoding(AsmDriver* d, AA64Reg rt, int is_load, 318 int fixed_size, int sign_ext) { 319 AA64LdStEnc e; 320 if (rt.is_fp) { 321 if (fixed_size >= 0 || sign_ext) 322 asm_driver_panic(d, "asm: sized/signed ld/st takes a GPR, not an FP reg"); 323 e.V = 1; 324 e.scale = rt.fp_bytes; 325 e.size = (rt.fp_bytes == 1) ? 0u 326 : (rt.fp_bytes == 2) ? 1u 327 : (rt.fp_bytes == 4) ? 2u 328 : (rt.fp_bytes == 8) ? 3u 329 : 0u; /* 16 (Q): size=0, opc carries width */ 330 e.opc = (rt.fp_bytes == 16) ? (is_load ? 3u : 2u) : (is_load ? 1u : 0u); 331 } else { 332 e.V = 0; 333 e.size = (fixed_size >= 0) ? (u32)fixed_size : (rt.is64 ? 3u : 2u); 334 e.scale = 1u << e.size; 335 e.opc = !is_load ? AA64_LDST_OPC_STR 336 : !sign_ext ? AA64_LDST_OPC_LDR 337 : rt.is64 ? 2u /* LDRS*, 64-bit dst */ 338 : 3u; /* LDRS*, 32-bit dst */ 339 } 340 return e; 341 } 342 343 static AA64Reg parse_ldstp_reg(AsmDriver* d) { 344 AsmTok t = asm_driver_next(d); 345 AA64Reg r; 346 memset(&r, 0, sizeof r); 347 if (t.kind != ASM_TOK_IDENT || 348 (!parse_reg_from_ident(d, t.v.ident, &r) && 349 !parse_fp_pair_reg_from_ident(d, t.v.ident, &r))) { 350 asm_driver_panic(d, "asm: expected register"); 351 } 352 return r; 353 } 354 355 static void reject_sp_reg(AsmDriver* d, AA64Reg r, const char* what) { 356 if (r.is_sp) 357 asm_driver_panic(d, "asm: %.*s: SP register not allowed", 358 SLICE_ARG(slice_from_cstr(what))); 359 } 360 361 static void require_sp_spelling(AsmDriver* d, AA64Reg r, const char* what) { 362 if (r.num == 31u && !r.is_sp) 363 asm_driver_panic(d, "asm: %.*s: zero register not allowed in SP operand", 364 SLICE_ARG(slice_from_cstr(what))); 365 } 366 367 /* Parse "#imm" (with optional + / -) or a bare expression — GNU as is 368 * lenient about the leading hash. Returns an i64. */ 369 static i64 parse_imm_const(AsmDriver* d) { 370 (void)asm_driver_eat_punct(d, '#'); 371 return asm_driver_parse_const(d); 372 } 373 374 /* Parse a possibly-symbolic operand prefixed by '#'. */ 375 static void parse_imm_sym(AsmDriver* d, ObjSymId* sym_out, i64* val_out) { 376 (void)asm_driver_eat_punct(d, '#'); 377 asm_driver_parse_sym_expr(d, sym_out, val_out); 378 } 379 380 /* GNU-as relocation modifier on an aarch64 operand (`:lo12:`, `:got:`, 381 * `:got_lo12:`). AA64_RELMOD_NONE means no modifier was present. */ 382 typedef enum AA64RelMod { 383 AA64_RELMOD_NONE = 0, 384 AA64_RELMOD_PAGE, /* explicit adrp page reloc (Mach-O `@PAGE`); == bare adrp 385 */ 386 AA64_RELMOD_LO12, 387 AA64_RELMOD_GOT, 388 AA64_RELMOD_GOT_LO12, 389 } AA64RelMod; 390 391 /* True when the assembler's target object format is Mach-O, which spells 392 * operand relocations as `@PAGE`/`@PAGEOFF` suffixes; ELF/COFF spell them as 393 * `:lo12:`/`:got:` prefixes. kit as parses the dialect of its target only 394 * (no hybrid), mirroring what `cc -S` emits for that format. */ 395 static int target_is_macho(AsmDriver* d) { 396 return asm_driver_compiler(d)->target.obj == KIT_OBJ_MACHO; 397 } 398 399 /* If the next token is ':', consume a `:name:` relocation modifier prefix and 400 * return its kind. A leading ':' is unambiguous at an operand position (a 401 * label's ':' only appears at end-of-mnemonic). Returns AA64_RELMOD_NONE and 402 * leaves the stream untouched when there is no modifier. */ 403 static AA64RelMod parse_reloc_mod(AsmDriver* d) { 404 if (!tok_punct(asm_driver_peek(d), ':')) return AA64_RELMOD_NONE; 405 (void)asm_driver_next(d); /* eat ':' */ 406 AsmTok name = asm_driver_next(d); 407 if (name.kind != ASM_TOK_IDENT) 408 asm_driver_panic(d, "asm: expected relocation modifier name after ':'"); 409 Slice s = pool_slice(asm_driver_pool(d), name.v.ident); 410 AA64RelMod mod; 411 if (icase_eq(s.s, s.len, "lo12")) 412 mod = AA64_RELMOD_LO12; 413 else if (icase_eq(s.s, s.len, "got")) 414 mod = AA64_RELMOD_GOT; 415 else if (icase_eq(s.s, s.len, "got_lo12")) 416 mod = AA64_RELMOD_GOT_LO12; 417 else 418 asm_driver_panic(d, "asm: unsupported relocation modifier"); 419 asm_driver_expect_punct(d, ':', "':' closing relocation modifier"); 420 return mod; 421 } 422 423 /* Mach-O operand relocation suffix: after a symbol(+addend), an optional 424 * `@PAGE` / `@PAGEOFF` / `@GOTPAGE` / `@GOTPAGEOFF`. Maps to the same 425 * AA64RelMod the ELF `:mod:` prefix produces, so downstream encoding/reloc 426 * emission is shared. `@PAGE` is the explicit spelling of an adrp page reloc 427 * (a bare adrp on ELF). Returns AA64_RELMOD_NONE, stream untouched, when the 428 * next token is not '@'. */ 429 static AA64RelMod parse_reloc_suffix(AsmDriver* d) { 430 if (!tok_punct(asm_driver_peek(d), '@')) return AA64_RELMOD_NONE; 431 (void)asm_driver_next(d); /* eat '@' */ 432 AsmTok name = asm_driver_next(d); 433 if (name.kind != ASM_TOK_IDENT) 434 asm_driver_panic(d, "asm: expected relocation suffix name after '@'"); 435 Slice s = pool_slice(asm_driver_pool(d), name.v.ident); 436 if (icase_eq(s.s, s.len, "PAGE")) return AA64_RELMOD_PAGE; 437 if (icase_eq(s.s, s.len, "PAGEOFF")) return AA64_RELMOD_LO12; 438 if (icase_eq(s.s, s.len, "GOTPAGE")) return AA64_RELMOD_GOT; 439 if (icase_eq(s.s, s.len, "GOTPAGEOFF")) return AA64_RELMOD_GOT_LO12; 440 asm_driver_panic(d, "asm: unsupported relocation suffix"); 441 } 442 443 /* The R_AARCH64_LDST{8,16,32,64}_ABS_LO12_NC reloc for an access log2-size. */ 444 static RelocKind aa64_ldst_lo12_reloc(AsmDriver* d, u32 size) { 445 switch (size) { 446 case 0: 447 return R_AARCH64_LDST8_ABS_LO12_NC; 448 case 1: 449 return R_AARCH64_LDST16_ABS_LO12_NC; 450 case 2: 451 return R_AARCH64_LDST32_ABS_LO12_NC; 452 case 3: 453 return R_AARCH64_LDST64_ABS_LO12_NC; 454 default: 455 asm_driver_panic(d, 456 "asm: ldr/str: :lo12: not valid for this access size"); 457 } 458 } 459 460 /* Printer-side inverse of the operand reloc-modifier parsers above: how a 461 * relocated aarch64 operand is spelled in `cc -S` text for the target object 462 * format. ELF uses a `:mod:` prefix; Mach-O uses an `@MOD` suffix — and even 463 * a bare adrp page reloc needs an explicit `@PAGE` there. Kept adjacent to 464 * the `.s` parser (parse_reloc_mod / parse_reloc_suffix and their call sites) 465 * so the emit and parse spellings stay in lockstep. See ArchAsmOps. */ 466 static int aa64_reloc_operand(u16 kind, KitObjFmt fmt, ArchRelocOperand* out) { 467 ArchRelocSurg surg; 468 const char* elf; /* `:mod:` prefix */ 469 const char* macho; /* `@MOD` suffix */ 470 switch (kind) { 471 case R_AARCH64_CALL26: 472 case R_AARCH64_JUMP26: 473 case R_AARCH64_CONDBR19: 474 case R_AARCH64_ADR_PREL_LO21: 475 surg = ARCH_RELOC_SURG_TAIL, elf = "", macho = ""; 476 break; 477 case R_AARCH64_ADR_PREL_PG_HI21: 478 surg = ARCH_RELOC_SURG_TAIL, elf = "", macho = "@PAGE"; 479 break; 480 case R_AARCH64_ADR_GOT_PAGE: 481 surg = ARCH_RELOC_SURG_TAIL, elf = ":got:", macho = "@GOTPAGE"; 482 break; 483 case R_AARCH64_ADD_ABS_LO12_NC: 484 surg = ARCH_RELOC_SURG_TAIL, elf = ":lo12:", macho = "@PAGEOFF"; 485 break; 486 case R_AARCH64_LDST8_ABS_LO12_NC: 487 case R_AARCH64_LDST16_ABS_LO12_NC: 488 case R_AARCH64_LDST32_ABS_LO12_NC: 489 case R_AARCH64_LDST64_ABS_LO12_NC: 490 surg = ARCH_RELOC_SURG_MEM, elf = ":lo12:", macho = "@PAGEOFF"; 491 break; 492 case R_AARCH64_LD64_GOT_LO12_NC: 493 surg = ARCH_RELOC_SURG_MEM, elf = ":got_lo12:", macho = "@GOTPAGEOFF"; 494 break; 495 default: 496 return 0; /* TLV and anything else: keep the numeric operand */ 497 } 498 out->surg = surg; 499 out->addend_bias = 0; /* aarch64 relocs store the symbol offset directly */ 500 if (fmt == KIT_OBJ_MACHO) { 501 out->prefix = ""; 502 out->suffix = macho; 503 } else { 504 out->prefix = elf; 505 out->suffix = ""; 506 } 507 return 1; 508 } 509 510 /* Intra-section local branches whose target codegen resolved in place (no 511 * relocation): b, b.<cc>, cbz/cbnz, tbz/tbnz, and adr (address-of-label, e.g. 512 * `&&label`). Excludes bl (a call — always relocated), adrp (page-relative; its 513 * lo12 partner carries the reloc), and register-form branches. Moved here from 514 * the printer so branch-mnemonic knowledge is arch-local. */ 515 static int aa64_is_local_branch(KitSlice m) { 516 if (m.len == 1 && m.s[0] == 'b') return 1; 517 if (m.len >= 2 && m.s[0] == 'b' && m.s[1] == '.') return 1; 518 if (m.len == 3 && memcmp(m.s, "cbz", 3) == 0) return 1; 519 if (m.len == 4 && memcmp(m.s, "cbnz", 4) == 0) return 1; 520 if (m.len == 3 && memcmp(m.s, "tbz", 3) == 0) return 1; 521 if (m.len == 4 && memcmp(m.s, "tbnz", 4) == 0) return 1; 522 return 0; 523 } 524 525 const ArchAsmOps aa64_asm_ops = { 526 .reloc_operand = aa64_reloc_operand, 527 .is_local_branch = aa64_is_local_branch, 528 }; 529 530 static void emit32(AsmDriver* d, u32 word) { 531 MCEmitter* mc = asm_driver_mc(d); 532 (void)asm_driver_cur_section(d); 533 u8 buf[4]; 534 buf[0] = (u8)(word & 0xff); 535 buf[1] = (u8)((word >> 8) & 0xff); 536 buf[2] = (u8)((word >> 16) & 0xff); 537 buf[3] = (u8)((word >> 24) & 0xff); 538 mc->emit_bytes(mc, buf, 4); 539 } 540 541 static int parse_cond_from_ident(AsmDriver* d, Sym ident, u32* out) { 542 Slice sl = pool_slice(asm_driver_pool(d), ident); 543 const char* s = sl.s; 544 size_t n = sl.len; 545 if (!s) return 0; 546 if (icase_eq(s, n, "eq")) 547 *out = 0; 548 else if (icase_eq(s, n, "ne")) 549 *out = 1; 550 else if (icase_eq(s, n, "cs") || icase_eq(s, n, "hs")) 551 *out = 2; 552 else if (icase_eq(s, n, "cc") || icase_eq(s, n, "lo")) 553 *out = 3; 554 else if (icase_eq(s, n, "mi")) 555 *out = 4; 556 else if (icase_eq(s, n, "pl")) 557 *out = 5; 558 else if (icase_eq(s, n, "vs")) 559 *out = 6; 560 else if (icase_eq(s, n, "vc")) 561 *out = 7; 562 else if (icase_eq(s, n, "hi")) 563 *out = 8; 564 else if (icase_eq(s, n, "ls")) 565 *out = 9; 566 else if (icase_eq(s, n, "ge")) 567 *out = 10; 568 else if (icase_eq(s, n, "lt")) 569 *out = 11; 570 else if (icase_eq(s, n, "gt")) 571 *out = 12; 572 else if (icase_eq(s, n, "le")) 573 *out = 13; 574 else if (icase_eq(s, n, "al")) 575 *out = 14; 576 else 577 return 0; 578 return 1; 579 } 580 581 static u32 parse_cond(AsmDriver* d, const char* what) { 582 AsmTok t = asm_driver_next(d); 583 u32 cond = 0; 584 if (t.kind != ASM_TOK_IDENT || !parse_cond_from_ident(d, t.v.ident, &cond)) 585 asm_driver_panic(d, "asm: %.*s: expected condition code", 586 SLICE_ARG(slice_from_cstr(what))); 587 return cond; 588 } 589 590 static void expect_comma(AsmDriver* d, const char* what) { 591 if (!asm_driver_eat_comma(d)) 592 asm_driver_panic(d, "asm: expected ',' (%.*s)", 593 SLICE_ARG(slice_from_cstr(what))); 594 } 595 596 /* ---- per-mnemonic parsers ---- */ 597 598 /* ret [Xn] — Xn defaults to x30. */ 599 static void p_ret(AsmDriver* d) { 600 if (asm_driver_at_eol(d)) { 601 emit32(d, aa64_ret(30)); 602 return; 603 } 604 AA64Reg r = parse_reg(d); 605 if (!r.is64) asm_driver_panic(d, "asm: ret: 64-bit register expected"); 606 emit32(d, aa64_ret(r.num)); 607 } 608 609 static void p_br(AsmDriver* d) { 610 AA64Reg r = parse_reg(d); 611 if (!r.is64) asm_driver_panic(d, "asm: br: 64-bit register expected"); 612 emit32(d, aa64_br(r.num)); 613 } 614 615 static void p_blr(AsmDriver* d) { 616 AA64Reg r = parse_reg(d); 617 if (!r.is64) asm_driver_panic(d, "asm: blr: 64-bit register expected"); 618 emit32(d, aa64_blr(r.num)); 619 } 620 621 static void p_nop(AsmDriver* d) { 622 (void)d; 623 emit32(d, aa64_nop()); 624 } 625 626 /* Memory barriers (DMB / DSB / ISB / CLREX). 627 * 628 * dmb <option> ; option in {sy, ish, nsh, osh, ld, st, ishld, 629 * ishst, nshld, nshst, oshld, oshst} 630 * dmb #imm4 ; numeric form 631 * dsb <option> | #imm4 632 * isb [<option>] ; option defaults to sy when omitted 633 * clrex [#imm4] ; option defaults to sy (15) when omitted */ 634 static u32 parse_barrier_option(AsmDriver* d, int allow_dmb_ld_st) { 635 if (asm_driver_at_eol(d)) return AA64_BARRIER_OPT_SY; 636 AsmTok t = asm_driver_peek(d); 637 if (t.kind == ASM_TOK_IDENT) { 638 (void)asm_driver_next(d); 639 Slice sl = pool_slice(asm_driver_pool(d), t.v.ident); 640 const char* s = sl.s; 641 size_t n = sl.len; 642 if (icase_eq(s, n, "sy")) return AA64_BARRIER_OPT_SY; 643 if (icase_eq(s, n, "ish")) return AA64_BARRIER_OPT_ISH; 644 if (icase_eq(s, n, "ishld")) return AA64_BARRIER_OPT_ISHLD; 645 if (icase_eq(s, n, "ishst")) return AA64_BARRIER_OPT_ISHST; 646 if (icase_eq(s, n, "nsh")) return AA64_BARRIER_OPT_NSH; 647 if (icase_eq(s, n, "nshld")) return AA64_BARRIER_OPT_NSHLD; 648 if (icase_eq(s, n, "nshst")) return AA64_BARRIER_OPT_NSHST; 649 if (icase_eq(s, n, "osh")) return AA64_BARRIER_OPT_OSH; 650 if (icase_eq(s, n, "oshld")) return AA64_BARRIER_OPT_OSHLD; 651 if (icase_eq(s, n, "oshst")) return AA64_BARRIER_OPT_OSHST; 652 if (allow_dmb_ld_st) { 653 if (icase_eq(s, n, "ld")) return AA64_BARRIER_OPT_LD; 654 if (icase_eq(s, n, "st")) return AA64_BARRIER_OPT_ST; 655 } 656 asm_driver_panic(d, "asm: unknown barrier option"); 657 } 658 /* Numeric form: '#imm4'. */ 659 i64 imm = parse_imm_const(d); 660 if (imm < 0 || imm > 15) asm_driver_panic(d, "asm: barrier imm out of range"); 661 return (u32)imm; 662 } 663 664 static void p_dmb(AsmDriver* d) { 665 u32 opt = parse_barrier_option(d, /*allow_dmb_ld_st=*/1); 666 emit32(d, aa64_dmb(opt)); 667 } 668 static void p_dsb(AsmDriver* d) { 669 u32 opt = parse_barrier_option(d, /*allow_dmb_ld_st=*/0); 670 emit32(d, aa64_dsb(opt)); 671 } 672 static void p_isb(AsmDriver* d) { 673 u32 opt = parse_barrier_option(d, /*allow_dmb_ld_st=*/0); 674 emit32(d, aa64_isb(opt)); 675 } 676 static void p_clrex(AsmDriver* d) { 677 u32 opt = parse_barrier_option(d, /*allow_dmb_ld_st=*/0); 678 emit32(d, aa64_clrex(opt)); 679 } 680 681 /* System-register access (MRS/MSR register form). 682 * 683 * mrs Xt, <sysreg> ; read system register into Xt 684 * msr <sysreg>, Xt ; write Xt into the system register 685 * 686 * A system register is named (resolved against the shared isa.c name table 687 * via aa64_sysreg_by_name) or given by the architectural generic spelling 688 * S<op0>_<op1>_C<crn>_C<crm>_<op2> (e.g. s3_3_c13_c0_2 == tpidr_el0), which 689 * covers any encodable register. The immediate PSTATE forms (msr daifset, 690 * #imm / spsel, #imm) are not handled. */ 691 692 /* Consume the next token as a system-register name into the 5 fields. */ 693 static void parse_sysreg(AsmDriver* d, const char* what, u32* op0, u32* op1, 694 u32* crn, u32* crm, u32* op2) { 695 AsmTok t = asm_driver_peek(d); 696 if (t.kind != ASM_TOK_IDENT) 697 asm_driver_panic(d, "asm: expected system register"); 698 (void)asm_driver_next(d); 699 Slice sl = pool_slice(asm_driver_pool(d), t.v.ident); 700 if (!aa64_sysreg_by_name(sl.s, sl.len, op0, op1, crn, crm, op2)) 701 asm_driver_panic(d, what); 702 } 703 704 static void p_mrs_(AsmDriver* d) { 705 AA64Reg rt = parse_reg(d); 706 if (!rt.is64 || rt.is_sp) 707 asm_driver_panic(d, "asm: mrs: destination must be a 64-bit GPR"); 708 expect_comma(d, "mrs"); 709 u32 op0, op1, crn, crm, op2; 710 parse_sysreg(d, "asm: mrs: unknown system register", &op0, &op1, &crn, &crm, 711 &op2); 712 emit32(d, aa64_sysreg_move(/*is_read=*/1, op0, op1, crn, crm, op2, rt.num)); 713 } 714 715 static void p_msr_(AsmDriver* d) { 716 u32 op0, op1, crn, crm, op2; 717 parse_sysreg(d, 718 "asm: msr: unknown system register (immediate PSTATE forms " 719 "like daifset are unsupported)", 720 &op0, &op1, &crn, &crm, &op2); 721 expect_comma(d, "msr"); 722 AA64Reg rt = parse_reg(d); 723 if (!rt.is64 || rt.is_sp) 724 asm_driver_panic(d, "asm: msr: source must be a 64-bit GPR"); 725 emit32(d, aa64_sysreg_move(/*is_read=*/0, op0, op1, crn, crm, op2, rt.num)); 726 } 727 728 /* mov: 729 * mov Rd, Rm → ORR Rd, ZR, Rm 730 * mov Rd, #imm → MOVZ (if imm fits in a single halfword unshifted) 731 * MOVN (if ~imm fits) 732 * otherwise: panic (multi-step expansion deferred). */ 733 static void p_mov(AsmDriver* d) { 734 AA64Reg rd = parse_reg(d); 735 expect_comma(d, "mov"); 736 AsmTok t = asm_driver_peek(d); 737 if (t.kind == ASM_TOK_IDENT) { 738 AA64Reg src; 739 memset(&src, 0, sizeof src); 740 if (parse_reg_from_ident(d, t.v.ident, &src)) { 741 (void)asm_driver_next(d); 742 if (src.is64 != rd.is64) 743 asm_driver_panic(d, "asm: mov: register width mismatch"); 744 /* mov involving SP encodes as `ADD Rd, Rsp, #0` per AArch64; 745 * approximate with that exact form. */ 746 if (rd.is_sp || src.is_sp) { 747 require_sp_spelling(d, rd, "mov sp"); 748 require_sp_spelling(d, src, "mov sp"); 749 emit32(d, aa64_add_imm(rd.is64, rd.num, src.num, 0, 0)); 750 return; 751 } 752 emit32(d, aa64_mov_reg(rd.is64, rd.num, src.num)); 753 return; 754 } 755 /* fall through: identifier that is not a register → treat as 756 * symbol/equate via expression below. */ 757 } 758 /* Immediate. */ 759 i64 imm = parse_imm_const(d); 760 if (rd.is_sp) asm_driver_panic(d, "asm: mov: cannot move imm into SP"); 761 u64 uv = (u64)imm; 762 u64 mask = rd.is64 ? ~0ull : 0xffffffffull; 763 uv &= mask; 764 /* Try MOVZ with one of four halfwords. */ 765 for (u32 hw = 0; hw < (rd.is64 ? 4u : 2u); ++hw) { 766 u64 shift = (u64)hw * 16; 767 u64 hwmask = 0xffffull << shift; 768 if ((uv & ~hwmask) == 0) { 769 u32 v = (u32)((uv >> shift) & 0xffff); 770 emit32(d, aa64_movz(rd.is64, rd.num, v, hw)); 771 return; 772 } 773 } 774 /* Try MOVN with one halfword (encodes ~imm in that halfword). */ 775 u64 nv = (~uv) & mask; 776 for (u32 hw = 0; hw < (rd.is64 ? 4u : 2u); ++hw) { 777 u64 shift = (u64)hw * 16; 778 u64 hwmask = 0xffffull << shift; 779 if ((nv & ~hwmask) == 0) { 780 u32 v = (u32)((nv >> shift) & 0xffff); 781 emit32(d, aa64_movn(rd.is64, rd.num, v, hw)); 782 return; 783 } 784 } 785 /* Try the ORR-bitmask alias (mov Rd,#imm → ORR Rd,ZR,#bitmask). */ 786 { 787 u32 N = 0, immr = 0, imms = 0; 788 if (aa64_logimm_encode(uv, rd.is64, &N, &immr, &imms)) { 789 emit32(d, aa64_orr_imm(rd.is64, rd.num, AA64_ZR, N, immr, imms)); 790 return; 791 } 792 } 793 asm_driver_panic(d, "asm: mov: immediate cannot be encoded in one insn"); 794 } 795 796 /* mvn Rd, Rm */ 797 static void p_mvn(AsmDriver* d) { 798 AA64Reg rd = parse_reg(d); 799 expect_comma(d, "mvn"); 800 AA64Reg rm = parse_reg(d); 801 if (rd.is64 != rm.is64) asm_driver_panic(d, "asm: mvn: width mismatch"); 802 emit32(d, aa64_mvn(rd.is64, rd.num, rm.num)); 803 } 804 805 /* movz / movn / movk Rd, #imm[, lsl #shift] */ 806 static void p_movwide(AsmDriver* d, u32 opc) { 807 AA64Reg rd = parse_reg(d); 808 expect_comma(d, "movz/n/k"); 809 i64 imm = parse_imm_const(d); 810 u32 hw = 0; 811 if (asm_driver_eat_comma(d)) { 812 /* lsl #N (N is 0/16/32/48). */ 813 AsmTok lid = asm_driver_next(d); 814 if (lid.kind != ASM_TOK_IDENT) asm_driver_panic(d, "asm: expected 'lsl'"); 815 Slice lsl = pool_slice(asm_driver_pool(d), lid.v.ident); 816 const char* lp = lsl.s; 817 size_t ln = lsl.len; 818 if (!lp || !icase_eq(lp, ln, "lsl")) 819 asm_driver_panic(d, "asm: expected 'lsl'"); 820 i64 sh = parse_imm_const(d); 821 if (sh % 16 != 0 || sh < 0 || sh > 48) 822 asm_driver_panic(d, "asm: movz/n/k: bad lsl shift"); 823 hw = (u32)(sh / 16); 824 } 825 u32 word = ((rd.is64 & 1u) << 31) | ((opc & 3u) << 29) | 826 AA64_MOVEWIDE_FAMILY_MATCH | ((hw & 3u) << 21) | 827 (((u32)imm & 0xffffu) << 5) | (rd.num & 0x1fu); 828 emit32(d, word); 829 } 830 831 /* svc / brk / hlt #imm */ 832 static void p_except(AsmDriver* d, u32 form) { 833 i64 imm = parse_imm_const(d); 834 switch (form) { 835 case 0: 836 emit32(d, aa64_svc((u32)imm)); 837 break; 838 case 1: 839 emit32(d, aa64_brk((u32)imm)); 840 break; 841 case 2: { 842 /* HLT */ 843 u32 word = AA64_EXCEPT_FAMILY_MATCH | ((u32)2 << 21) | 844 (((u32)imm & 0xffffu) << 5); 845 emit32(d, word); 846 break; 847 } 848 default: 849 asm_driver_panic(d, "asm: bad exception form"); 850 } 851 } 852 853 /* Read optional `, lsl|lsr|asr|ror #imm` shift modifier. Returns 1 if 854 * present. */ 855 static int parse_shift_mod(AsmDriver* d, u32* shift_out, u32* imm6_out) { 856 AsmTok t = asm_driver_peek(d); 857 if (t.kind != ASM_TOK_IDENT) return 0; 858 Slice sl = pool_slice(asm_driver_pool(d), t.v.ident); 859 const char* p = sl.s; 860 size_t n = sl.len; 861 u32 sh; 862 if (icase_eq(p, n, "lsl")) 863 sh = 0; 864 else if (icase_eq(p, n, "lsr")) 865 sh = 1; 866 else if (icase_eq(p, n, "asr")) 867 sh = 2; 868 else if (icase_eq(p, n, "ror")) 869 sh = 3; 870 else 871 return 0; 872 (void)asm_driver_next(d); 873 i64 imm = parse_imm_const(d); 874 if (imm < 0 || imm > 63) 875 asm_driver_panic(d, "asm: shift amount out of range"); 876 *shift_out = sh; 877 *imm6_out = (u32)imm; 878 return 1; 879 } 880 881 /* add / sub family. 882 * Forms: 883 * add Rd, Rn, Rm[, lsl #s] shifted-register 884 * add Rd, Rn, #imm immediate 885 * add Rd, Rn, #imm, lsl #12 immediate w/ shift 886 * S-suffixed (adds/subs) sets flags. */ 887 static void p_addsub(AsmDriver* d, int is_sub, int set_flags) { 888 AA64Reg rd = parse_reg(d); 889 expect_comma(d, "add/sub"); 890 AA64Reg rn = parse_reg(d); 891 expect_comma(d, "add/sub"); 892 AsmTok t = asm_driver_peek(d); 893 /* `add Rd, Rn, <sym lo12>` — ADD (immediate), zero imm12, plus an 894 * R_AARCH64_ADD_ABS_LO12_NC relocation (the low-12 PIC/abs sequence). ELF 895 * spells the modifier as a `:lo12:` prefix (leading ':'); Mach-O spells it 896 * as a `sym@PAGEOFF` suffix, so the trigger there is a non-register IDENT 897 * third operand (probe with parse_reg_from_ident so `add x0,x1,x2` stays the 898 * register path). */ 899 int symbolic = 0; 900 if (!is_sub && !set_flags) { 901 if (target_is_macho(d)) { 902 AA64Reg probe; 903 memset(&probe, 0, sizeof probe); 904 symbolic = (t.kind == ASM_TOK_IDENT && 905 !parse_reg_from_ident(d, t.v.ident, &probe)); 906 } else { 907 symbolic = tok_punct(t, ':'); 908 } 909 } 910 if (symbolic) { 911 AA64RelMod mod; 912 ObjSymId sym = OBJ_SYM_NONE; 913 i64 off = 0; 914 if (target_is_macho(d)) { 915 parse_imm_sym(d, &sym, &off); 916 mod = parse_reloc_suffix(d); 917 } else { 918 mod = parse_reloc_mod(d); 919 parse_imm_sym(d, &sym, &off); 920 } 921 if (mod != AA64_RELMOD_LO12) 922 asm_driver_panic(d, 923 "asm: add: only :lo12: (ELF) / @PAGEOFF (Mach-O) is " 924 "valid here"); 925 if (rd.is64 != rn.is64) 926 asm_driver_panic(d, "asm: add lo12: width mismatch"); 927 u32 word = aa64_addsubimm_pack((AA64AddSubImm){.sf = rd.is64, 928 .op = 0, 929 .S = 0, 930 .sh = 0, 931 .imm12 = 0, 932 .Rn = rn.num, 933 .Rd = rd.num}); 934 emit32(d, word); 935 MCEmitter* mc = asm_driver_mc(d); 936 mc->emit_reloc_at(mc, asm_driver_cur_section(d), mc->pos(mc) - 4, 937 R_AARCH64_ADD_ABS_LO12_NC, sym, off, 1, 0); 938 return; 939 } 940 if (tok_punct(t, '#') || t.kind == ASM_TOK_NUM || tok_punct(t, '-') || 941 tok_punct(t, '+')) { 942 /* immediate form */ 943 if (rd.is64 != rn.is64) 944 asm_driver_panic(d, "asm: add/sub imm: width mismatch"); 945 require_sp_spelling(d, rn, "add/sub imm"); 946 if (set_flags) { 947 reject_sp_reg(d, rd, "add/sub imm"); 948 } else { 949 require_sp_spelling(d, rd, "add/sub imm"); 950 } 951 i64 imm = parse_imm_const(d); 952 u32 sh = 0; 953 if (asm_driver_eat_comma(d)) { 954 AsmTok lid = asm_driver_next(d); 955 if (lid.kind != ASM_TOK_IDENT) 956 asm_driver_panic(d, "asm: expected 'lsl #12'"); 957 Slice lsl = pool_slice(asm_driver_pool(d), lid.v.ident); 958 const char* lp = lsl.s; 959 size_t ln = lsl.len; 960 if (!lp || !icase_eq(lp, ln, "lsl")) 961 asm_driver_panic(d, "asm: expected 'lsl'"); 962 i64 s = parse_imm_const(d); 963 if (s == 12) 964 sh = 1; 965 else if (s == 0) 966 sh = 0; 967 else 968 asm_driver_panic(d, "asm: add/sub imm: lsl must be 0 or 12"); 969 } 970 if (imm < 0 || imm > 0xfff) 971 asm_driver_panic(d, "asm: add/sub imm out of range"); 972 u32 word = aa64_addsubimm_pack((AA64AddSubImm){.sf = rd.is64, 973 .op = (u32)is_sub, 974 .S = (u32)set_flags, 975 .sh = sh, 976 .imm12 = (u32)imm, 977 .Rn = rn.num, 978 .Rd = rd.num}); 979 emit32(d, word); 980 return; 981 } 982 /* register form */ 983 AA64Reg rm = parse_reg(d); 984 reject_sp_reg(d, rd, "add/sub reg"); 985 reject_sp_reg(d, rn, "add/sub reg"); 986 reject_sp_reg(d, rm, "add/sub reg"); 987 if (rd.is64 != rm.is64 || rd.is64 != rn.is64) 988 asm_driver_panic(d, "asm: add/sub reg: width mismatch"); 989 u32 shift = 0, imm6 = 0; 990 if (asm_driver_eat_comma(d)) { 991 if (!parse_shift_mod(d, &shift, &imm6)) 992 asm_driver_panic(d, "asm: add/sub reg: expected shift modifier"); 993 } 994 u32 word = aa64_addsubsr_pack((AA64AddSubSR){.sf = rd.is64, 995 .op = (u32)is_sub, 996 .S = (u32)set_flags, 997 .shift = shift, 998 .Rm = rm.num, 999 .imm6 = imm6, 1000 .Rn = rn.num, 1001 .Rd = rd.num}); 1002 emit32(d, word); 1003 } 1004 1005 /* cmp Rn, Rm | cmp Rn, #imm → SUBS ZR, Rn, ... */ 1006 static void p_cmp(AsmDriver* d, int is_neg /* cmn flips op */) { 1007 AA64Reg rn = parse_reg(d); 1008 expect_comma(d, "cmp"); 1009 AsmTok t = asm_driver_peek(d); 1010 if (tok_punct(t, '#') || t.kind == ASM_TOK_NUM || tok_punct(t, '-') || 1011 tok_punct(t, '+')) { 1012 require_sp_spelling(d, rn, "cmp imm"); 1013 i64 imm = parse_imm_const(d); 1014 u32 sh = 0; 1015 if (asm_driver_eat_comma(d)) { 1016 AsmTok lid = asm_driver_next(d); 1017 Slice lsl = (lid.kind == ASM_TOK_IDENT) 1018 ? pool_slice(asm_driver_pool(d), lid.v.ident) 1019 : SLICE_NULL; 1020 const char* lp = lsl.s; 1021 size_t ln = lsl.len; 1022 if (!lp || !icase_eq(lp, ln, "lsl")) 1023 asm_driver_panic(d, "asm: cmp imm: expected 'lsl'"); 1024 i64 s = parse_imm_const(d); 1025 if (s == 12) 1026 sh = 1; 1027 else if (s != 0) 1028 asm_driver_panic(d, "asm: cmp imm: lsl must be 0 or 12"); 1029 } 1030 if (imm < 0 || imm > 0xfff) 1031 asm_driver_panic(d, "asm: cmp imm out of range"); 1032 u32 word = aa64_addsubimm_pack((AA64AddSubImm){.sf = rn.is64, 1033 .op = (u32)(!is_neg), 1034 .S = 1, 1035 .sh = sh, 1036 .imm12 = (u32)imm, 1037 .Rn = rn.num, 1038 .Rd = AA64_ZR}); 1039 emit32(d, word); 1040 return; 1041 } 1042 AA64Reg rm = parse_reg(d); 1043 reject_sp_reg(d, rn, "cmp reg"); 1044 reject_sp_reg(d, rm, "cmp reg"); 1045 if (rm.is64 != rn.is64) asm_driver_panic(d, "asm: cmp: width mismatch"); 1046 u32 shift = 0, imm6 = 0; 1047 if (asm_driver_eat_comma(d)) parse_shift_mod(d, &shift, &imm6); 1048 u32 word = aa64_addsubsr_pack((AA64AddSubSR){.sf = rn.is64, 1049 .op = (u32)(!is_neg), 1050 .S = 1, 1051 .shift = shift, 1052 .Rm = rm.num, 1053 .imm6 = imm6, 1054 .Rn = rn.num, 1055 .Rd = AA64_ZR}); 1056 emit32(d, word); 1057 } 1058 1059 static void p_condsel(AsmDriver* d, u32 op, u32 op2, const char* what) { 1060 AA64Reg rd = parse_reg(d); 1061 expect_comma(d, what); 1062 AA64Reg rn = parse_reg(d); 1063 expect_comma(d, what); 1064 AA64Reg rm = parse_reg(d); 1065 expect_comma(d, what); 1066 u32 cond = parse_cond(d, what); 1067 if (rd.is_sp || rn.is_sp || rm.is_sp) 1068 asm_driver_panic(d, "asm: %.*s: SP register not allowed", 1069 SLICE_ARG(slice_from_cstr(what))); 1070 if (rd.is64 != rn.is64 || rd.is64 != rm.is64) 1071 asm_driver_panic(d, "asm: %.*s: width mismatch", 1072 SLICE_ARG(slice_from_cstr(what))); 1073 u32 word = aa64_condsel_pack((AA64CondSel){.sf = (u32)rd.is64, 1074 .op = op, 1075 .S = 0, 1076 .Rm = rm.num, 1077 .cond = cond, 1078 .op2 = op2, 1079 .Rn = rn.num, 1080 .Rd = rd.num}); 1081 emit32(d, word); 1082 } 1083 1084 static void p_cset_like(AsmDriver* d, u32 op, u32 op2, const char* what) { 1085 AA64Reg rd = parse_reg(d); 1086 expect_comma(d, what); 1087 u32 cond = parse_cond(d, what); 1088 if (rd.is_sp) 1089 asm_driver_panic(d, "asm: %.*s: SP register not allowed", 1090 SLICE_ARG(slice_from_cstr(what))); 1091 u32 word = aa64_condsel_pack((AA64CondSel){.sf = (u32)rd.is64, 1092 .op = op, 1093 .S = 0, 1094 .Rm = AA64_ZR, 1095 .cond = cond ^ 1u, 1096 .op2 = op2, 1097 .Rn = AA64_ZR, 1098 .Rd = rd.num}); 1099 emit32(d, word); 1100 } 1101 1102 /* neg / negs Rd, Rm → SUB / SUBS Rd, ZR, Rm */ 1103 static void p_neg(AsmDriver* d, int set_flags) { 1104 AA64Reg rd = parse_reg(d); 1105 expect_comma(d, "neg"); 1106 AA64Reg rm = parse_reg(d); 1107 reject_sp_reg(d, rd, "neg"); 1108 reject_sp_reg(d, rm, "neg"); 1109 if (rd.is64 != rm.is64) asm_driver_panic(d, "asm: neg: width mismatch"); 1110 u32 shift = 0, imm6 = 0; 1111 if (asm_driver_eat_comma(d)) parse_shift_mod(d, &shift, &imm6); 1112 u32 word = aa64_addsubsr_pack((AA64AddSubSR){.sf = rd.is64, 1113 .op = 1, 1114 .S = (u32)set_flags, 1115 .shift = shift, 1116 .Rm = rm.num, 1117 .imm6 = imm6, 1118 .Rn = AA64_ZR, 1119 .Rd = rd.num}); 1120 emit32(d, word); 1121 } 1122 1123 /* Logical family: shifted-register `<op> Rd,Rn,Rm{,shift}` or, for the 1124 * non-negated AND/ORR/EOR/ANDS, the bitmask-immediate `<op> Rd,Rn,#imm`. 1125 * N is the SR-form negate bit (BIC/ORN/EON/BICS); those have no immediate 1126 * form, so an immediate third operand is only valid when N==0. */ 1127 static void p_log_sr(AsmDriver* d, u32 opc, u32 N) { 1128 AA64Reg rd = parse_reg(d); 1129 expect_comma(d, "logical"); 1130 AA64Reg rn = parse_reg(d); 1131 expect_comma(d, "logical"); 1132 if (!peek_is_reg(d)) { 1133 /* Bitmask-immediate form. AND/ORR/EOR use the SP-capable destination; 1134 * ANDS uses ZR. Rn is always a GPR (caller's parse_reg already enforced 1135 * GP for the two register operands). */ 1136 if (N) asm_driver_panic(d, "asm: logical: immediate form has no negation"); 1137 if (rd.is64 != rn.is64) asm_driver_panic(d, "asm: logical: width mismatch"); 1138 u64 imm = (u64)parse_imm_const(d); 1139 u32 bN = 0, immr = 0, imms = 0; 1140 if (!aa64_logimm_encode(imm, rd.is64, &bN, &immr, &imms)) 1141 asm_driver_panic(d, "asm: logical: immediate is not a valid bitmask"); 1142 emit32(d, aa64_logimm_pack((AA64LogImm){.sf = rd.is64, 1143 .opc = opc, 1144 .N = bN, 1145 .immr = immr, 1146 .imms = imms, 1147 .Rn = rn.num, 1148 .Rd = rd.num})); 1149 return; 1150 } 1151 AA64Reg rm = parse_reg(d); 1152 if (rd.is64 != rn.is64 || rd.is64 != rm.is64) 1153 asm_driver_panic(d, "asm: logical: width mismatch"); 1154 u32 shift = 0, imm6 = 0; 1155 if (asm_driver_eat_comma(d)) parse_shift_mod(d, &shift, &imm6); 1156 u32 word = aa64_logsr_pack((AA64LogSR){.sf = rd.is64, 1157 .opc = opc, 1158 .shift = shift, 1159 .N = N, 1160 .Rm = rm.num, 1161 .imm6 = imm6, 1162 .Rn = rn.num, 1163 .Rd = rd.num}); 1164 emit32(d, word); 1165 } 1166 1167 /* Data-processing 3-source: madd/msub Rd, Rn, Rm, Ra. */ 1168 static void p_dp3(AsmDriver* d, u32 o0) { 1169 AA64Reg rd = parse_reg(d); 1170 expect_comma(d, "dp3"); 1171 AA64Reg rn = parse_reg(d); 1172 expect_comma(d, "dp3"); 1173 AA64Reg rm = parse_reg(d); 1174 expect_comma(d, "dp3"); 1175 AA64Reg ra = parse_reg(d); 1176 if (rd.is64 != rn.is64 || rd.is64 != rm.is64 || rd.is64 != ra.is64) 1177 asm_driver_panic(d, "asm: dp3: width mismatch"); 1178 u32 word = aa64_dp3_pack((AA64DP3){.sf = rd.is64, 1179 .op31 = 0, 1180 .o0 = o0, 1181 .Rm = rm.num, 1182 .Ra = ra.num, 1183 .Rn = rn.num, 1184 .Rd = rd.num}); 1185 emit32(d, word); 1186 } 1187 1188 /* mul Rd, Rn, Rm → MADD Rd, Rn, Rm, ZR */ 1189 static void p_mul(AsmDriver* d, u32 o0) { 1190 AA64Reg rd = parse_reg(d); 1191 expect_comma(d, "mul"); 1192 AA64Reg rn = parse_reg(d); 1193 expect_comma(d, "mul"); 1194 AA64Reg rm = parse_reg(d); 1195 if (rd.is64 != rn.is64 || rd.is64 != rm.is64) 1196 asm_driver_panic(d, "asm: mul: width mismatch"); 1197 u32 word = aa64_dp3_pack((AA64DP3){.sf = rd.is64, 1198 .op31 = 0, 1199 .o0 = o0, 1200 .Rm = rm.num, 1201 .Ra = AA64_ZR, 1202 .Rn = rn.num, 1203 .Rd = rd.num}); 1204 emit32(d, word); 1205 } 1206 1207 /* DP2: udiv/sdiv/lslv/lsrv/asrv/rorv Rd, Rn, Rm. */ 1208 static void p_dp2(AsmDriver* d, u32 opcode) { 1209 AA64Reg rd = parse_reg(d); 1210 expect_comma(d, "dp2"); 1211 AA64Reg rn = parse_reg(d); 1212 expect_comma(d, "dp2"); 1213 AA64Reg rm = parse_reg(d); 1214 if (rd.is64 != rn.is64 || rd.is64 != rm.is64) 1215 asm_driver_panic(d, "asm: dp2: width mismatch"); 1216 u32 word = aa64_dp2_pack((AA64DP2){.sf = rd.is64, 1217 .opcode = opcode, 1218 .Rm = rm.num, 1219 .Rn = rn.num, 1220 .Rd = rd.num}); 1221 emit32(d, word); 1222 } 1223 1224 /* Shift aliases: `<op> Rd, Rn, (Rm | #imm)`. 1225 * register form → LSLV/LSRV/ASRV (DP2 variable shift) 1226 * immediate form → UBFM (lsl/lsr) / SBFM (asr) bitfield alias 1227 * `kind` indexes the three shifts: 0=lsl 1=lsr 2=asr. The immediate aliases 1228 * are exactly what the disassembler prints for these UBFM/SBFM encodings, so 1229 * `cc -S | as` round-trips. (ROR's immediate form is EXTR, which the 1230 * disassembler doesn't decode, so it is left out — `rorv` covers the register 1231 * rotate.) */ 1232 static void p_shift(AsmDriver* d, u32 kind) { 1233 static const u32 dp2op[3] = {AA64_DP2_LSLV_OP, AA64_DP2_LSRV_OP, 1234 AA64_DP2_ASRV_OP}; 1235 AA64Reg rd = parse_reg(d); 1236 expect_comma(d, "shift"); 1237 AA64Reg rn = parse_reg(d); 1238 if (rd.is64 != rn.is64) asm_driver_panic(d, "asm: shift: width mismatch"); 1239 expect_comma(d, "shift"); 1240 if (peek_is_reg(d)) { 1241 AA64Reg rm = parse_reg(d); 1242 if (rd.is64 != rm.is64) asm_driver_panic(d, "asm: shift: width mismatch"); 1243 emit32(d, aa64_dp2_pack((AA64DP2){.sf = rd.is64, 1244 .opcode = dp2op[kind], 1245 .Rm = rm.num, 1246 .Rn = rn.num, 1247 .Rd = rd.num})); 1248 return; 1249 } 1250 i64 sv = parse_imm_const(d); 1251 u32 width = rd.is64 ? 64u : 32u; 1252 if (sv < 0 || (u64)sv >= width) 1253 asm_driver_panic(d, "asm: shift: amount out of range"); 1254 u32 shift = (u32)sv, immr = 0, imms = 0; 1255 if (kind == 2) { /* asr → SBFM */ 1256 aa64_asr_imm_fields(shift, rd.is64, &immr, &imms); 1257 emit32(d, aa64_bitfield(rd.is64, 0u, immr, imms, rd.num, rn.num)); 1258 } else { /* lsl/lsr → UBFM */ 1259 if (kind == 0) 1260 aa64_lsl_imm_fields(shift, rd.is64, &immr, &imms); 1261 else 1262 aa64_lsr_imm_fields(shift, rd.is64, &immr, &imms); 1263 emit32(d, aa64_bitfield(rd.is64, 2u, immr, imms, rd.num, rn.num)); 1264 } 1265 } 1266 1267 /* Branch immediate / conditional / compare-and-branch. */ 1268 1269 static void emit_branch_imm(AsmDriver* d, u32 op_bl, ObjSymId target, 1270 i64 addend, i64 const_disp) { 1271 MCEmitter* mc = asm_driver_mc(d); 1272 /* Emit a B/BL with imm26 = 0; record a CALL26/JUMP26 reloc against 1273 * either the symbol or the constant displacement. */ 1274 u32 word = aa64_brimm_pack((AA64BrImm){.op = op_bl, .imm26 = 0}); 1275 emit32(d, word); 1276 u32 ofs = mc->pos(mc) - 4; 1277 RelocKind k = op_bl ? R_AARCH64_CALL26 : R_AARCH64_JUMP26; 1278 if (target != OBJ_SYM_NONE) { 1279 mc->emit_reloc_at(mc, asm_driver_cur_section(d), ofs, k, target, addend, 1, 1280 0); 1281 } else { 1282 /* Pure constant displacement is rare in real .s; reject it now. 1283 * The recommended form is to use a label and let the assembler 1284 * compute the displacement. */ 1285 (void)const_disp; 1286 asm_driver_panic(d, "asm: branch with pure constant disp not supported"); 1287 } 1288 } 1289 1290 static void p_b(AsmDriver* d, u32 op_bl) { 1291 ObjSymId sym = OBJ_SYM_NONE; 1292 i64 off = 0; 1293 /* GNU as accepts `b sym`, `bl sym+8`, etc. */ 1294 parse_imm_sym(d, &sym, &off); 1295 if (sym == OBJ_SYM_NONE) 1296 asm_driver_panic(d, "asm: b/bl: symbolic target required"); 1297 emit_branch_imm(d, op_bl, sym, off, 0); 1298 } 1299 1300 static void p_b_cond(AsmDriver* d, u32 cond) { 1301 ObjSymId sym = OBJ_SYM_NONE; 1302 i64 off = 0; 1303 parse_imm_sym(d, &sym, &off); 1304 if (sym == OBJ_SYM_NONE) 1305 asm_driver_panic(d, "asm: b.cond: symbolic target required"); 1306 /* Emit the instruction with imm19=0 + R_AARCH64_CONDBR19 reloc. */ 1307 u32 word = aa64_brcond_pack((AA64BrCond){.imm19 = 0, .cond = cond}); 1308 emit32(d, word); 1309 MCEmitter* mc = asm_driver_mc(d); 1310 u32 ofs = mc->pos(mc) - 4; 1311 mc->emit_reloc_at(mc, asm_driver_cur_section(d), ofs, R_AARCH64_CONDBR19, sym, 1312 off, 1, 0); 1313 } 1314 1315 static void p_cbz(AsmDriver* d, u32 op) { 1316 AA64Reg rt = parse_reg(d); 1317 expect_comma(d, "cbz"); 1318 ObjSymId sym = OBJ_SYM_NONE; 1319 i64 off = 0; 1320 parse_imm_sym(d, &sym, &off); 1321 if (sym == OBJ_SYM_NONE) 1322 asm_driver_panic(d, "asm: cbz: symbolic target required"); 1323 u32 word = 1324 aa64_cb_pack((AA64CB){.sf = rt.is64, .op = op, .imm19 = 0, .Rt = rt.num}); 1325 emit32(d, word); 1326 MCEmitter* mc = asm_driver_mc(d); 1327 u32 ofs = mc->pos(mc) - 4; 1328 mc->emit_reloc_at(mc, asm_driver_cur_section(d), ofs, R_AARCH64_CONDBR19, sym, 1329 off, 1, 0); 1330 } 1331 1332 /* Memory-operand parser. Recognized shapes: 1333 * [Xn] base only 1334 * [Xn, #imm] base + immediate offset 1335 * [Xn, #imm]! pre-index (writeback) 1336 * [Xn], #imm post-index (writeback) 1337 * [Xn, Xm] register offset (LSL #0) 1338 * [Xn, Xm, LSL #s] register offset, scaled 1339 * [Xn, Wm, {U,S}XTW {#s}] 32-bit index, extended 1340 * [Xn, Xm, {U,S}XTX {#s}] / SXTX 64-bit index, extended 1341 * 1342 * imm is the literal byte offset (no scaling). When has_index is set, 1343 * `index` is the index register, `option` its 3-bit extend code, and 1344 * shift_present records whether an explicit `#s` was written (with the 1345 * amount in `shift`). pre_index / post_index flag the writeback forms. */ 1346 typedef struct AA64Mem { 1347 AA64Reg base; 1348 AA64Reg index; 1349 i64 imm; /* byte offset (literal as written) */ 1350 u32 option; 1351 u32 shift; 1352 AA64RelMod reloc_mod; /* :lo12: / :got_lo12: on the offset, or NONE */ 1353 ObjSymId reloc_sym; /* symbol when reloc_mod != NONE */ 1354 i64 reloc_off; /* addend when reloc_mod != NONE */ 1355 u8 pre_index; 1356 u8 post_index; 1357 u8 has_offset; 1358 u8 has_index; 1359 u8 shift_present; 1360 u8 pad[3]; 1361 } AA64Mem; 1362 1363 /* Parse the optional extend/shift modifier of a register-offset memory 1364 * operand: `LSL #s`, `UXTW {#s}`, `SXTW {#s}`, `UXTX {#s}`, `SXTX {#s}`. 1365 * The index register width (32 vs 64) must agree with the extend kind. 1366 * Fills m->option / m->shift / m->shift_present. */ 1367 static void parse_mem_extend(AsmDriver* d, AA64Mem* m) { 1368 AsmTok t = asm_driver_next(d); 1369 if (t.kind != ASM_TOK_IDENT) 1370 asm_driver_panic(d, "asm: ldr/str: expected extend (lsl/sxtw/uxtw/...)"); 1371 Slice sl = pool_slice(asm_driver_pool(d), t.v.ident); 1372 const char* p = sl.s; 1373 size_t n = sl.len; 1374 int need64 = 0; /* index must be 64-bit */ 1375 if (icase_eq(p, n, "lsl") || icase_eq(p, n, "uxtx")) { 1376 m->option = AA64_LDST_OPTION_LSL; 1377 need64 = 1; 1378 } else if (icase_eq(p, n, "sxtx")) { 1379 m->option = AA64_LDST_OPTION_SXTX; 1380 need64 = 1; 1381 } else if (icase_eq(p, n, "uxtw")) { 1382 m->option = AA64_LDST_OPTION_UXTW; 1383 need64 = 0; 1384 } else if (icase_eq(p, n, "sxtw")) { 1385 m->option = AA64_LDST_OPTION_SXTW; 1386 need64 = 0; 1387 } else { 1388 asm_driver_panic(d, "asm: ldr/str: unknown index extend"); 1389 } 1390 if (need64 && !m->index.is64) 1391 asm_driver_panic(d, "asm: ldr/str: index must be 64-bit for this extend"); 1392 if (!need64 && m->index.is64) 1393 asm_driver_panic(d, "asm: ldr/str: index must be 32-bit for sxtw/uxtw"); 1394 /* LSL requires an explicit shift; the extends accept an optional one. */ 1395 AsmTok nt = asm_driver_peek(d); 1396 if (tok_punct(nt, '#') || nt.kind == ASM_TOK_NUM) { 1397 i64 s = parse_imm_const(d); 1398 if (s < 0) asm_driver_panic(d, "asm: ldr/str: negative index shift"); 1399 m->shift = (u32)s; 1400 m->shift_present = 1; 1401 } else if (m->option == AA64_LDST_OPTION_LSL) { 1402 asm_driver_panic(d, "asm: ldr/str: lsl requires a shift amount"); 1403 } 1404 } 1405 1406 static AA64Mem parse_mem(AsmDriver* d) { 1407 AA64Mem m; 1408 memset(&m, 0, sizeof m); 1409 if (!asm_driver_eat_punct(d, '[')) asm_driver_panic(d, "asm: expected '['"); 1410 m.base = parse_reg(d); 1411 if (!m.base.is64) 1412 asm_driver_panic(d, "asm: ldr/str: base register must be 64-bit"); 1413 require_sp_spelling(d, m.base, "ldr/str base"); 1414 if (asm_driver_eat_comma(d)) { 1415 /* A relocation offset (ELF `:lo12:sym`/`:got_lo12:sym` prefix, or Mach-O 1416 * `sym@PAGEOFF`/`sym@GOTPAGEOFF` suffix), a register index, or a plain 1417 * `#imm`/expression. */ 1418 AsmTok t = asm_driver_peek(d); 1419 AA64Reg idx; 1420 memset(&idx, 0, sizeof idx); 1421 if (!target_is_macho(d) && tok_punct(t, ':')) { 1422 m.reloc_mod = parse_reloc_mod(d); 1423 parse_imm_sym(d, &m.reloc_sym, &m.reloc_off); 1424 m.has_offset = 1; /* imm field stays 0; reloc supplies the low bits */ 1425 } else if (t.kind == ASM_TOK_IDENT && 1426 parse_reg_from_ident(d, t.v.ident, &idx)) { 1427 (void)asm_driver_next(d); 1428 reject_sp_reg(d, idx, "ldr/str index"); 1429 m.index = idx; 1430 m.has_index = 1; 1431 m.option = idx.is64 ? AA64_LDST_OPTION_LSL : AA64_LDST_OPTION_UXTW; 1432 if (asm_driver_eat_comma(d)) parse_mem_extend(d, &m); 1433 } else if (target_is_macho(d) && t.kind == ASM_TOK_IDENT) { 1434 /* Mach-O: `[Xn, sym@PAGEOFF]` / `[Xn, sym@GOTPAGEOFF]`. */ 1435 parse_imm_sym(d, &m.reloc_sym, &m.reloc_off); 1436 m.reloc_mod = parse_reloc_suffix(d); 1437 if (m.reloc_mod != AA64_RELMOD_LO12 && 1438 m.reloc_mod != AA64_RELMOD_GOT_LO12) 1439 asm_driver_panic( 1440 d, "asm: ldr/str: symbol offset needs @PAGEOFF/@GOTPAGEOFF"); 1441 m.has_offset = 1; 1442 } else { 1443 m.imm = parse_imm_const(d); 1444 m.has_offset = 1; 1445 } 1446 } 1447 if (!asm_driver_eat_punct(d, ']')) asm_driver_panic(d, "asm: expected ']'"); 1448 if (asm_driver_eat_punct(d, '!')) { 1449 if (m.has_index) 1450 asm_driver_panic(d, "asm: ldr/str: writeback not allowed with index"); 1451 m.pre_index = 1; 1452 } else if (asm_driver_eat_comma(d)) { 1453 /* Post-index: `[Xn], #imm`. */ 1454 if (m.has_index || m.has_offset) 1455 asm_driver_panic(d, "asm: ldr/str: malformed post-index operand"); 1456 m.imm = parse_imm_const(d); 1457 m.has_offset = 1; 1458 m.post_index = 1; 1459 } 1460 return m; 1461 } 1462 1463 /* ldr/str Rt, [Xn, #imm] — chooses scaled or unscaled form based on 1464 * alignment of imm. */ 1465 /* Core load/store. `fixed_size` is the access log2-size (0=byte..3=dword) for 1466 * ldrb/ldrh/ldrsw etc., or -1 to derive it from the register width (ldr/str). 1467 * `sign_ext` selects the signed-load opc (10 = sign-extend to 64-bit, 11 = to 1468 * 32-bit), keyed on the destination register width. */ 1469 static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size, 1470 int sign_ext) { 1471 AA64Reg rt = parse_ldst_reg(d); 1472 reject_sp_reg(d, rt, "ldr/str"); 1473 expect_comma(d, "ldr/str"); 1474 AA64Mem m = parse_mem(d); 1475 AA64LdStEnc e = ldst_encoding(d, rt, is_load, fixed_size, sign_ext); 1476 u32 size = e.size, opc = e.opc, V = e.V; 1477 if (m.reloc_mod != AA64_RELMOD_NONE) { 1478 /* [Xn, :lo12:sym] / [Xn, :got_lo12:sym] — unsigned-imm12 form with a zero 1479 * immediate; the relocation supplies the low 12 bits. :got_lo12: only 1480 * applies to a 64-bit `ldr` (the GOT entry is an 8-byte pointer); llvm-mc 1481 * rejects it on stores, signed loads, and sub-word loads. */ 1482 if (m.reloc_mod == AA64_RELMOD_GOT_LO12 && 1483 !(V == 0 && size == 3 && opc == AA64_LDST_OPC_LDR)) 1484 asm_driver_panic(d, "asm: :got_lo12: only valid on a 64-bit ldr"); 1485 u32 word = aa64_ldst_uimm_pack((AA64LdStUimm){.size = size, 1486 .V = V, 1487 .opc = opc, 1488 .imm12 = 0, 1489 .Rn = m.base.num, 1490 .Rt = rt.num}); 1491 emit32(d, word); 1492 RelocKind k = (m.reloc_mod == AA64_RELMOD_GOT_LO12) 1493 ? R_AARCH64_LD64_GOT_LO12_NC 1494 : aa64_ldst_lo12_reloc(d, size); 1495 MCEmitter* mc = asm_driver_mc(d); 1496 mc->emit_reloc_at(mc, asm_driver_cur_section(d), mc->pos(mc) - 4, k, 1497 m.reloc_sym, m.reloc_off, 1, 0); 1498 return; 1499 } 1500 if (m.has_index) { 1501 /* Register-offset form. The S bit (scale by access size) is set when an 1502 * explicit shift equal to the access log2-size is written. An explicit 1503 * `#0` is legal and stays unscaled (S=0); for byte access #0 == size so it 1504 * sets S — matching llvm-mc, where the only legal amounts are 0 or size. */ 1505 u32 S = 0; 1506 if (m.shift_present) { 1507 if (m.shift == size) 1508 S = 1; 1509 else if (m.shift != 0) 1510 asm_driver_panic(d, 1511 "asm: ldr/str: index shift must be 0 or access size"); 1512 } 1513 u32 word = aa64_ldst_regoff_pack((AA64LdStRegOff){.size = size, 1514 .V = V, 1515 .opc = opc, 1516 .Rm = m.index.num, 1517 .option = m.option, 1518 .S = S, 1519 .Rn = m.base.num, 1520 .Rt = rt.num}); 1521 emit32(d, word); 1522 return; 1523 } 1524 if (m.pre_index || m.post_index) { 1525 /* Immediate writeback (unscaled signed imm9). */ 1526 if (m.imm < -256 || m.imm > 255) 1527 asm_driver_panic(d, "asm: ldr/str: writeback imm9 out of range"); 1528 u32 imm9 = (u32)((u64)m.imm & 0x1ffu); 1529 u32 idx = m.pre_index ? AA64_LDST_IDX_PRE : AA64_LDST_IDX_POST; 1530 u32 word = aa64_ldst_wback_pack((AA64LdStWBack){.size = size, 1531 .V = V, 1532 .opc = opc, 1533 .imm9 = imm9, 1534 .idx = idx, 1535 .Rn = m.base.num, 1536 .Rt = rt.num}); 1537 emit32(d, word); 1538 return; 1539 } 1540 { 1541 /* Try scaled unsigned-imm12 first. */ 1542 u32 scale = e.scale; 1543 if (m.imm >= 0 && (i64)((u64)m.imm % scale) == 0 && 1544 (u64)m.imm / scale <= 0xfff) { 1545 u32 imm12 = (u32)((u64)m.imm / scale); 1546 u32 word = aa64_ldst_uimm_pack((AA64LdStUimm){.size = size, 1547 .V = V, 1548 .opc = opc, 1549 .imm12 = imm12, 1550 .Rn = m.base.num, 1551 .Rt = rt.num}); 1552 emit32(d, word); 1553 return; 1554 } 1555 /* Fall back to unscaled signed-imm9 (LDUR/STUR). */ 1556 if (m.imm >= -256 && m.imm <= 255) { 1557 u32 imm9 = (u32)((u64)m.imm & 0x1ffu); 1558 u32 word = aa64_ldst_simm9_pack((AA64LdStSimm9){.size = size, 1559 .V = V, 1560 .opc = opc, 1561 .imm9 = imm9, 1562 .Rn = m.base.num, 1563 .Rt = rt.num}); 1564 emit32(d, word); 1565 return; 1566 } 1567 asm_driver_panic(d, "asm: ldr/str: immediate out of range"); 1568 } 1569 } 1570 1571 /* ldr/str: access width follows the register (Wt=word, Xt=dword). */ 1572 static void p_ldr_str(AsmDriver* d, int is_load) { 1573 p_ldst_core(d, is_load, /*fixed_size=*/-1, /*sign_ext=*/0); 1574 } 1575 /* Byte/half + signed sub-word loads/stores (fixed access width). */ 1576 static void p_ldrb(AsmDriver* d) { p_ldst_core(d, 1, 0, 0); } 1577 static void p_strb(AsmDriver* d) { p_ldst_core(d, 0, 0, 0); } 1578 static void p_ldrh(AsmDriver* d) { p_ldst_core(d, 1, 1, 0); } 1579 static void p_strh(AsmDriver* d) { p_ldst_core(d, 0, 1, 0); } 1580 static void p_ldrsb(AsmDriver* d) { p_ldst_core(d, 1, 0, 1); } 1581 static void p_ldrsh(AsmDriver* d) { p_ldst_core(d, 1, 1, 1); } 1582 static void p_ldrsw(AsmDriver* d) { p_ldst_core(d, 1, 2, 1); } 1583 1584 /* ldur/stur — unscaled signed-imm9. `fixed_size` is the access log2-size 1585 * (0=byte..3=dword) for sturb/ldurb/sturh/ldurh/ldursw etc., or -1 to derive 1586 * it from the register width (stur/ldur). `sign_ext` selects the signed-load 1587 * opc (ldursb/ldursh/ldursw), keyed on the destination register width — the 1588 * unscaled mirror of p_ldst_core. */ 1589 static void p_ldur_stur(AsmDriver* d, int is_load, int fixed_size, 1590 int sign_ext) { 1591 AA64Reg rt = parse_ldst_reg(d); 1592 reject_sp_reg(d, rt, "ldur/stur"); 1593 expect_comma(d, "ldur/stur"); 1594 AA64Mem m = parse_mem(d); 1595 AA64LdStEnc e = ldst_encoding(d, rt, is_load, fixed_size, sign_ext); 1596 if (m.imm < -256 || m.imm > 255) 1597 asm_driver_panic(d, "asm: ldur/stur: imm9 out of range"); 1598 u32 imm9 = (u32)((u64)m.imm & 0x1ffu); 1599 u32 word = aa64_ldst_simm9_pack((AA64LdStSimm9){.size = e.size, 1600 .V = e.V, 1601 .opc = e.opc, 1602 .imm9 = imm9, 1603 .Rn = m.base.num, 1604 .Rt = rt.num}); 1605 emit32(d, word); 1606 } 1607 1608 /* ldp / stp Rt, Rt2, [Xn, #imm] or [Xn, #imm]! */ 1609 static void p_ldp_stp(AsmDriver* d, int is_load) { 1610 AA64Reg rt = parse_ldstp_reg(d); 1611 expect_comma(d, "ldp/stp"); 1612 AA64Reg rt2 = parse_ldstp_reg(d); 1613 expect_comma(d, "ldp/stp"); 1614 reject_sp_reg(d, rt, "ldp/stp"); 1615 reject_sp_reg(d, rt2, "ldp/stp"); 1616 if (rt.is64 != rt2.is64 || rt.is_fp != rt2.is_fp || 1617 rt.fp_bytes != rt2.fp_bytes) 1618 asm_driver_panic(d, "asm: ldp/stp: width mismatch"); 1619 AA64Mem m = parse_mem(d); 1620 u32 scale = rt.is_fp ? (u32)rt.fp_bytes : (rt.is64 ? 8u : 4u); 1621 if ((i64)((u64)m.imm % scale) != 0) 1622 asm_driver_panic(d, "asm: ldp/stp: imm not scale-aligned"); 1623 i64 imm7 = m.imm / (i64)scale; 1624 if (imm7 < -64 || imm7 > 63) 1625 asm_driver_panic(d, "asm: ldp/stp: imm7 out of range"); 1626 AA64LdStPPre f = { 1627 .opc = rt.is_fp ? (rt.fp_bytes == 16u ? 2u : 1u) : (rt.is64 ? 2u : 0u), 1628 .V = rt.is_fp ? 1u : 0u, 1629 .L = is_load ? 1u : 0u, 1630 .imm7 = (u32)imm7 & 0x7fu, 1631 .Rt2 = rt2.num, 1632 .Rn = m.base.num, 1633 .Rt = rt.num}; 1634 if (m.pre_index) 1635 emit32(d, aa64_ldstp_pre_pack(f)); 1636 else if (m.post_index) 1637 emit32(d, aa64_ldstp_post_pack(f)); 1638 else 1639 emit32(d, aa64_ldstp_soff_pack(f)); 1640 } 1641 1642 /* adr / adrp Rd, sym */ 1643 static void p_adr(AsmDriver* d, int is_adrp) { 1644 AA64Reg rd = parse_reg(d); 1645 expect_comma(d, "adr"); 1646 /* adrp page reloc on a symbol: ELF spells a bare symbol (`:got:` selects the 1647 * GOT page); Mach-O spells `sym@PAGE` / `sym@GOTPAGE`. adr takes a bare 1648 * symbol on both. cc -S emits the form matching the target format. */ 1649 AA64RelMod mod = AA64_RELMOD_NONE; 1650 ObjSymId sym = OBJ_SYM_NONE; 1651 i64 off = 0; 1652 if (target_is_macho(d)) { 1653 parse_imm_sym(d, &sym, &off); 1654 mod = parse_reloc_suffix(d); 1655 } else { 1656 mod = parse_reloc_mod(d); 1657 parse_imm_sym(d, &sym, &off); 1658 } 1659 if (!is_adrp) { 1660 if (mod != AA64_RELMOD_NONE) 1661 asm_driver_panic(d, "asm: adr: no relocation modifier valid here"); 1662 } else if (mod != AA64_RELMOD_NONE && mod != AA64_RELMOD_PAGE && 1663 mod != AA64_RELMOD_GOT) { 1664 asm_driver_panic(d, 1665 "asm: adrp: only @PAGE/@GOTPAGE (Mach-O) or :got: " 1666 "(ELF) valid here"); 1667 } 1668 if (sym == OBJ_SYM_NONE) 1669 asm_driver_panic(d, "asm: adr/adrp: symbol required"); 1670 AA64PCRelAdr f = {.op = is_adrp ? AA64_ADR_OP_ADRP : AA64_ADR_OP_ADR, 1671 .immlo = 0, 1672 .immhi = 0, 1673 .Rd = rd.num}; 1674 emit32(d, aa64_pcrel_adr_pack(f)); 1675 MCEmitter* mc = asm_driver_mc(d); 1676 u32 ofs = mc->pos(mc) - 4; 1677 RelocKind k = !is_adrp ? R_AARCH64_ADR_PREL_LO21 1678 : mod == AA64_RELMOD_GOT ? R_AARCH64_ADR_GOT_PAGE 1679 : R_AARCH64_ADR_PREL_PG_HI21; 1680 mc->emit_reloc_at(mc, asm_driver_cur_section(d), ofs, k, sym, off, 1, 0); 1681 } 1682 1683 /* ---- atomics / exclusive ---- 1684 * 1685 * Every form here addresses a bare base register `[Xn]` (no offset, no 1686 * index, no writeback). parse_mem already rejects malformed shapes; we 1687 * additionally reject any offset/index so `ldxr w0,[x1,#4]` is an error, 1688 * matching llvm/gas. */ 1689 static AA64Mem parse_mem_bare(AsmDriver* d, const char* what) { 1690 AA64Mem m = parse_mem(d); 1691 if (m.has_offset || m.has_index || m.pre_index || m.post_index) 1692 asm_driver_panic(d, "asm: %.*s: expected bare [Xn] address", 1693 SLICE_ARG(slice_from_cstr(what))); 1694 return m; 1695 } 1696 1697 /* Map an access log2-size (0..3) onto the GPR width the operand register 1698 * must have: byte/half/word use Wt (32-bit), dword uses Xt (64-bit). */ 1699 static void require_gpr_width(AsmDriver* d, AA64Reg r, u32 size, 1700 const char* what) { 1701 reject_sp_reg(d, r, what); 1702 u32 want64 = (size == 3u) ? 1u : 0u; 1703 if ((u32)r.is64 != want64) 1704 asm_driver_panic(d, "asm: %.*s: register width mismatch", 1705 SLICE_ARG(slice_from_cstr(what))); 1706 } 1707 1708 /* Load-exclusive / load-acquire: `<op> Wt|Xt, [Xn]`. 1709 * o2/o0 select the family member (see aa64_ldstex_pack). size is the 1710 * access log2-size; Rs/Rt2 are fixed to 11111. */ 1711 static void p_ldex(AsmDriver* d, u32 size, u32 o2, u32 o0, const char* what) { 1712 AA64Reg rt = parse_reg(d); 1713 require_gpr_width(d, rt, size, what); 1714 expect_comma(d, what); 1715 AA64Mem m = parse_mem_bare(d, what); 1716 emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size, 1717 .o2 = o2, 1718 .L = 1u, 1719 .o1 = 0u, 1720 .Rs = AA64_ZR, 1721 .o0 = o0, 1722 .Rt2 = AA64_ZR, 1723 .Rn = m.base.num, 1724 .Rt = rt.num})); 1725 } 1726 1727 /* Store-release without status: `stlr Wt|Xt, [Xn]` (o2=1, L=0, o0=1). */ 1728 static void p_stlr(AsmDriver* d, u32 size, const char* what) { 1729 AA64Reg rt = parse_reg(d); 1730 require_gpr_width(d, rt, size, what); 1731 expect_comma(d, what); 1732 AA64Mem m = parse_mem_bare(d, what); 1733 emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size, 1734 .o2 = 1u, 1735 .L = 0u, 1736 .o1 = 0u, 1737 .Rs = AA64_ZR, 1738 .o0 = 1u, 1739 .Rt2 = AA64_ZR, 1740 .Rn = m.base.num, 1741 .Rt = rt.num})); 1742 } 1743 1744 /* Store-exclusive with status: `<op> Ws, Wt|Xt, [Xn]` (L=0). Ws (the 1745 * 32-bit status result) must be a W register and distinct from Rt/Rn. */ 1746 /* Store-exclusive constraint (ARM ARM): the status register Ws must differ 1747 * from the stored value Rt and from the base Rn, else the result is 1748 * UNPREDICTABLE. The base is exempt when it is SP (reg #31 names SP, not the 1749 * WZR the status reg would be). CAS/LSE atomics do NOT share this rule. */ 1750 static void reject_stex_alias(AsmDriver* d, AA64Reg rs, AA64Reg rt, AA64Mem m, 1751 const char* what) { 1752 if (rs.num == rt.num) 1753 asm_driver_panic(d, "asm: %.*s: status reg cannot be the value reg", 1754 SLICE_ARG(slice_from_cstr(what))); 1755 if (!m.base.is_sp && rs.num == m.base.num) 1756 asm_driver_panic(d, "asm: %.*s: status reg cannot be the base reg", 1757 SLICE_ARG(slice_from_cstr(what))); 1758 } 1759 1760 static void p_stex(AsmDriver* d, u32 size, u32 o0, const char* what) { 1761 AA64Reg rs = parse_reg(d); 1762 reject_sp_reg(d, rs, what); 1763 if (rs.is64) 1764 asm_driver_panic(d, "asm: %.*s: status reg must be 32-bit", 1765 SLICE_ARG(slice_from_cstr(what))); 1766 expect_comma(d, what); 1767 AA64Reg rt = parse_reg(d); 1768 require_gpr_width(d, rt, size, what); 1769 expect_comma(d, what); 1770 AA64Mem m = parse_mem_bare(d, what); 1771 reject_stex_alias(d, rs, rt, m, what); 1772 emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size, 1773 .o2 = 0u, 1774 .L = 0u, 1775 .o1 = 0u, 1776 .Rs = rs.num, 1777 .o0 = o0, 1778 .Rt2 = AA64_ZR, 1779 .Rn = m.base.num, 1780 .Rt = rt.num})); 1781 } 1782 1783 /* Compare-and-swap: `<op> Ws, Wt, [Xn]` / `<op> Xs, Xt, [Xn]`. Rs and Rt 1784 * share the operand width selected by `size` (word or dword). */ 1785 static void p_cas(AsmDriver* d, u32 size, u32 L, u32 o0, const char* what) { 1786 AA64Reg rs = parse_reg(d); 1787 require_gpr_width(d, rs, size, what); 1788 expect_comma(d, what); 1789 AA64Reg rt = parse_reg(d); 1790 require_gpr_width(d, rt, size, what); 1791 expect_comma(d, what); 1792 AA64Mem m = parse_mem_bare(d, what); 1793 emit32(d, aa64_cas_pack((AA64Cas){.size = size, 1794 .L = L, 1795 .Rs = rs.num, 1796 .o0 = o0, 1797 .Rn = m.base.num, 1798 .Rt = rt.num})); 1799 } 1800 1801 /* LSE atomic memory op: `<op> Ws, Wt, [Xn]` / `<op> Xs, Xt, [Xn]`. 1802 * o3=1 selects SWP; otherwise opc names LDADD/LDCLR/LDEOR/LDSET. */ 1803 static void p_lse(AsmDriver* d, u32 size, u32 A, u32 R, u32 o3, u32 opc, 1804 const char* what) { 1805 AA64Reg rs = parse_reg(d); 1806 require_gpr_width(d, rs, size, what); 1807 expect_comma(d, what); 1808 AA64Reg rt = parse_reg(d); 1809 require_gpr_width(d, rt, size, what); 1810 expect_comma(d, what); 1811 AA64Mem m = parse_mem_bare(d, what); 1812 emit32(d, aa64_lse_atomic_pack((AA64LseAtomic){.size = size, 1813 .A = A, 1814 .R = R, 1815 .Rs = rs.num, 1816 .o3 = o3, 1817 .opc = opc, 1818 .Rn = m.base.num, 1819 .Rt = rt.num})); 1820 } 1821 1822 /* ---- mnemonic dispatch table ---- */ 1823 1824 typedef void (*P_Fn)(AsmDriver*); 1825 1826 typedef struct AA64Mn { 1827 const char* name; 1828 P_Fn fn; 1829 u32 arg; /* per-fn discriminator (alias parameter) */ 1830 } AA64Mn; 1831 1832 /* Wrapper functions for the discriminator-taking parsers, since the 1833 * table holds a uniform P_Fn pointer. Each wraps a single (fn, arg) 1834 * tuple. */ 1835 static void p_addsub_add(AsmDriver* d) { p_addsub(d, /*is_sub=*/0, 0); } 1836 static void p_addsub_adds(AsmDriver* d) { p_addsub(d, 0, 1); } 1837 static void p_addsub_sub(AsmDriver* d) { p_addsub(d, 1, 0); } 1838 static void p_addsub_subs(AsmDriver* d) { p_addsub(d, 1, 1); } 1839 static void p_cmp_w(AsmDriver* d) { p_cmp(d, 0); } 1840 static void p_cmn_w(AsmDriver* d) { p_cmp(d, 1); } 1841 static void p_csel_(AsmDriver* d) { p_condsel(d, 0, 0, "csel"); } 1842 static void p_csinc_(AsmDriver* d) { p_condsel(d, 0, 1, "csinc"); } 1843 static void p_csinv_(AsmDriver* d) { p_condsel(d, 1, 0, "csinv"); } 1844 static void p_csneg_(AsmDriver* d) { p_condsel(d, 1, 1, "csneg"); } 1845 static void p_cset_(AsmDriver* d) { p_cset_like(d, 0, 1, "cset"); } 1846 static void p_csetm_(AsmDriver* d) { p_cset_like(d, 1, 0, "csetm"); } 1847 static void p_neg_w(AsmDriver* d) { p_neg(d, 0); } 1848 static void p_negs_w(AsmDriver* d) { p_neg(d, 1); } 1849 static void p_and_w(AsmDriver* d) { p_log_sr(d, AA64_LOG_AND_OPC, 0); } 1850 static void p_bic_w(AsmDriver* d) { p_log_sr(d, AA64_LOG_AND_OPC, 1); } 1851 static void p_orr_w(AsmDriver* d) { p_log_sr(d, AA64_LOG_ORR_OPC, 0); } 1852 static void p_orn_w(AsmDriver* d) { p_log_sr(d, AA64_LOG_ORR_OPC, 1); } 1853 static void p_eor_w(AsmDriver* d) { p_log_sr(d, AA64_LOG_EOR_OPC, 0); } 1854 static void p_eon_w(AsmDriver* d) { p_log_sr(d, AA64_LOG_EOR_OPC, 1); } 1855 static void p_ands_w(AsmDriver* d) { p_log_sr(d, AA64_LOG_ANDS_OPC, 0); } 1856 static void p_bics_w(AsmDriver* d) { p_log_sr(d, AA64_LOG_ANDS_OPC, 1); } 1857 static void p_madd(AsmDriver* d) { p_dp3(d, 0); } 1858 static void p_msub(AsmDriver* d) { p_dp3(d, 1); } 1859 static void p_mul_w(AsmDriver* d) { p_mul(d, 0); } 1860 static void p_mneg_w(AsmDriver* d) { p_mul(d, 1); } 1861 static void p_udiv_w(AsmDriver* d) { p_dp2(d, AA64_DP2_UDIV_OP); } 1862 static void p_sdiv_w(AsmDriver* d) { p_dp2(d, AA64_DP2_SDIV_OP); } 1863 static void p_lslv_w(AsmDriver* d) { p_dp2(d, AA64_DP2_LSLV_OP); } 1864 static void p_lsrv_w(AsmDriver* d) { p_dp2(d, AA64_DP2_LSRV_OP); } 1865 static void p_asrv_w(AsmDriver* d) { p_dp2(d, AA64_DP2_ASRV_OP); } 1866 static void p_rorv_w(AsmDriver* d) { p_dp2(d, AA64_DP2_RORV_OP); } 1867 static void p_lsl_(AsmDriver* d) { p_shift(d, 0); } 1868 static void p_lsr_(AsmDriver* d) { p_shift(d, 1); } 1869 static void p_asr_(AsmDriver* d) { p_shift(d, 2); } 1870 static void p_b_(AsmDriver* d) { p_b(d, 0); } 1871 static void p_bl_(AsmDriver* d) { p_b(d, 1); } 1872 static void p_cbz_(AsmDriver* d) { p_cbz(d, 0); } 1873 static void p_cbnz_(AsmDriver* d) { p_cbz(d, 1); } 1874 static void p_movz_(AsmDriver* d) { p_movwide(d, AA64_MOVZ_OPC); } 1875 static void p_movn_(AsmDriver* d) { p_movwide(d, AA64_MOVN_OPC); } 1876 static void p_movk_(AsmDriver* d) { p_movwide(d, AA64_MOVK_OPC); } 1877 static void p_svc_(AsmDriver* d) { p_except(d, 0); } 1878 static void p_brk_(AsmDriver* d) { p_except(d, 1); } 1879 static void p_hlt_(AsmDriver* d) { p_except(d, 2); } 1880 static void p_ldr_(AsmDriver* d) { p_ldr_str(d, 1); } 1881 static void p_str_(AsmDriver* d) { p_ldr_str(d, 0); } 1882 static void p_ldur_(AsmDriver* d) { p_ldur_stur(d, 1, -1, 0); } 1883 static void p_stur_(AsmDriver* d) { p_ldur_stur(d, 0, -1, 0); } 1884 static void p_ldurb(AsmDriver* d) { p_ldur_stur(d, 1, 0, 0); } 1885 static void p_sturb(AsmDriver* d) { p_ldur_stur(d, 0, 0, 0); } 1886 static void p_ldurh(AsmDriver* d) { p_ldur_stur(d, 1, 1, 0); } 1887 static void p_sturh(AsmDriver* d) { p_ldur_stur(d, 0, 1, 0); } 1888 static void p_ldursb(AsmDriver* d) { p_ldur_stur(d, 1, 0, 1); } 1889 static void p_ldursh(AsmDriver* d) { p_ldur_stur(d, 1, 1, 1); } 1890 static void p_ldursw(AsmDriver* d) { p_ldur_stur(d, 1, 2, 1); } 1891 static void p_ldp_(AsmDriver* d) { p_ldp_stp(d, 1); } 1892 static void p_stp_(AsmDriver* d) { p_ldp_stp(d, 0); } 1893 static void p_adr_(AsmDriver* d) { p_adr(d, 0); } 1894 static void p_adrp_(AsmDriver* d) { p_adr(d, 1); } 1895 1896 /* b.cond family. cond codes follow the standard ARMv8 numbering. */ 1897 static void p_b_eq(AsmDriver* d) { p_b_cond(d, 0); } 1898 static void p_b_ne(AsmDriver* d) { p_b_cond(d, 1); } 1899 static void p_b_cs(AsmDriver* d) { p_b_cond(d, 2); } 1900 static void p_b_hs(AsmDriver* d) { p_b_cond(d, 2); } 1901 static void p_b_cc(AsmDriver* d) { p_b_cond(d, 3); } 1902 static void p_b_lo(AsmDriver* d) { p_b_cond(d, 3); } 1903 static void p_b_mi(AsmDriver* d) { p_b_cond(d, 4); } 1904 static void p_b_pl(AsmDriver* d) { p_b_cond(d, 5); } 1905 static void p_b_vs(AsmDriver* d) { p_b_cond(d, 6); } 1906 static void p_b_vc(AsmDriver* d) { p_b_cond(d, 7); } 1907 static void p_b_hi(AsmDriver* d) { p_b_cond(d, 8); } 1908 static void p_b_ls(AsmDriver* d) { p_b_cond(d, 9); } 1909 static void p_b_ge(AsmDriver* d) { p_b_cond(d, 10); } 1910 static void p_b_lt(AsmDriver* d) { p_b_cond(d, 11); } 1911 static void p_b_gt(AsmDriver* d) { p_b_cond(d, 12); } 1912 static void p_b_le(AsmDriver* d) { p_b_cond(d, 13); } 1913 static void p_b_al(AsmDriver* d) { p_b_cond(d, 14); } 1914 1915 /* ---- Scalar floating-point ---- 1916 * Sn/Dn/Hn are the single/double/half views of the FP register file; the 1917 * 2-bit ftype (0=s,1=d,3=h) drives both the encoding and the operand text. */ 1918 static int parse_fp_scalar_from_ident(AsmDriver* d, Sym ident, u32* num, 1919 u32* ftype) { 1920 Slice sl = pool_slice(asm_driver_pool(d), ident); 1921 const char* p = sl.s; 1922 size_t n = sl.len; 1923 u32 ft, r = 0; 1924 size_t i; 1925 if (!p || n < 2) return 0; 1926 if (p[0] == 's' || p[0] == 'S') 1927 ft = 0u; 1928 else if (p[0] == 'd' || p[0] == 'D') 1929 ft = 1u; 1930 else if (p[0] == 'h' || p[0] == 'H') 1931 ft = 3u; 1932 else 1933 return 0; 1934 for (i = 1; i < n; ++i) { 1935 char c = p[i]; 1936 if (c < '0' || c > '9') return 0; 1937 r = r * 10u + (u32)(c - '0'); 1938 if (r > 31u) return 0; 1939 } 1940 *num = r; 1941 *ftype = ft; 1942 return 1; 1943 } 1944 1945 static void parse_fp_scalar(AsmDriver* d, u32* num, u32* ftype) { 1946 AsmTok t = asm_driver_next(d); 1947 if (t.kind != ASM_TOK_IDENT || 1948 !parse_fp_scalar_from_ident(d, t.v.ident, num, ftype)) 1949 asm_driver_panic(d, "asm: expected FP register (Sn/Dn/Hn)"); 1950 } 1951 1952 /* A register operand that may be either a GPR or a scalar FP register — used 1953 * by fmov, whose three forms differ only by operand class. */ 1954 typedef struct FpOrGpr { 1955 int is_fp; 1956 u32 num; 1957 u32 ftype; /* when is_fp */ 1958 int is64; /* when !is_fp */ 1959 } FpOrGpr; 1960 1961 static FpOrGpr parse_fp_or_gpr(AsmDriver* d) { 1962 AsmTok t = asm_driver_next(d); 1963 FpOrGpr r; 1964 AA64Reg g; 1965 memset(&r, 0, sizeof r); 1966 if (t.kind == ASM_TOK_IDENT && 1967 parse_fp_scalar_from_ident(d, t.v.ident, &r.num, &r.ftype)) { 1968 r.is_fp = 1; 1969 return r; 1970 } 1971 memset(&g, 0, sizeof g); 1972 if (t.kind == ASM_TOK_IDENT && parse_reg_from_ident(d, t.v.ident, &g)) { 1973 r.is_fp = 0; 1974 r.num = g.num; 1975 r.is64 = (int)g.is64; 1976 return r; 1977 } 1978 asm_driver_panic(d, "asm: fmov: expected register"); 1979 return r; /* unreachable */ 1980 } 1981 1982 static void p_fp_dp2(AsmDriver* d, u32 op) { 1983 u32 rd, rn, rm, ftd, ftn, ftm; 1984 parse_fp_scalar(d, &rd, &ftd); 1985 expect_comma(d, "fp"); 1986 parse_fp_scalar(d, &rn, &ftn); 1987 expect_comma(d, "fp"); 1988 parse_fp_scalar(d, &rm, &ftm); 1989 if (ftd != ftn || ftd != ftm) 1990 asm_driver_panic(d, "asm: fp: operand type mismatch"); 1991 emit32(d, aa64_fp_dp2(ftd, op, rd, rn, rm)); 1992 } 1993 static void p_fp_dp1(AsmDriver* d, u32 op) { 1994 u32 rd, rn, ftd, ftn; 1995 parse_fp_scalar(d, &rd, &ftd); 1996 expect_comma(d, "fp"); 1997 parse_fp_scalar(d, &rn, &ftn); 1998 if (ftd != ftn) asm_driver_panic(d, "asm: fp: operand type mismatch"); 1999 emit32(d, aa64_fp_dp1(ftd, op, rd, rn)); 2000 } 2001 static void p_fadd(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FADD); } 2002 static void p_fsub(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FSUB); } 2003 static void p_fmul(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FMUL); } 2004 static void p_fdiv(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FDIV); } 2005 static void p_fmax(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FMAX); } 2006 static void p_fmin(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FMIN); } 2007 static void p_fnmul(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FNMUL); } 2008 static void p_fneg(AsmDriver* d) { p_fp_dp1(d, AA64_FP_DP1_FNEG); } 2009 static void p_fabs(AsmDriver* d) { p_fp_dp1(d, AA64_FP_DP1_FABS); } 2010 static void p_fsqrt(AsmDriver* d) { p_fp_dp1(d, AA64_FP_DP1_FSQRT); } 2011 2012 static void p_fcmp(AsmDriver* d) { 2013 u32 rn, rm, ftn, ftm; 2014 parse_fp_scalar(d, &rn, &ftn); 2015 expect_comma(d, "fcmp"); 2016 parse_fp_scalar(d, &rm, &ftm); 2017 if (ftn != ftm) asm_driver_panic(d, "asm: fcmp: operand type mismatch"); 2018 emit32(d, aa64_fcmp_reg(ftn, rn, rm)); 2019 } 2020 static void p_fcvt(AsmDriver* d) { 2021 u32 rd, rn, ftd, ftn; 2022 parse_fp_scalar(d, &rd, &ftd); 2023 expect_comma(d, "fcvt"); 2024 parse_fp_scalar(d, &rn, &ftn); 2025 emit32(d, aa64_fcvt_prec(ftn /*src*/, ftd /*dst*/, rd, rn)); 2026 } 2027 /* scvtf/ucvtf: FP dst, GPR src. */ 2028 static void p_cvtf(AsmDriver* d, u32 opcode) { 2029 u32 fd, ft; 2030 AA64Reg rn; 2031 parse_fp_scalar(d, &fd, &ft); 2032 expect_comma(d, "cvtf"); 2033 rn = parse_reg(d); 2034 emit32(d, aa64_fp_int_cvt((u32)rn.is64, ft, opcode, fd, rn.num)); 2035 } 2036 /* fcvtzs/fcvtzu: GPR dst, FP src. */ 2037 static void p_fcvtz(AsmDriver* d, u32 opcode) { 2038 AA64Reg rd; 2039 u32 fn, ft; 2040 rd = parse_reg(d); 2041 expect_comma(d, "fcvtz"); 2042 parse_fp_scalar(d, &fn, &ft); 2043 emit32(d, aa64_fp_int_cvt((u32)rd.is64, ft, opcode, rd.num, fn)); 2044 } 2045 static void p_scvtf(AsmDriver* d) { p_cvtf(d, AA64_FP_ICVT_SCVTF); } 2046 static void p_ucvtf(AsmDriver* d) { p_cvtf(d, AA64_FP_ICVT_UCVTF); } 2047 static void p_fcvtzs(AsmDriver* d) { p_fcvtz(d, AA64_FP_ICVT_FCVTZS); } 2048 static void p_fcvtzu(AsmDriver* d) { p_fcvtz(d, AA64_FP_ICVT_FCVTZU); } 2049 2050 /* Data-processing (1 source): clz/rbit/rev16, and rev (whose opcode2 is the 2051 * width: 2 for 32-bit, 3 for 64-bit). */ 2052 static void p_dp1_op(AsmDriver* d, u32 opcode2) { 2053 AA64Reg rd = parse_reg(d); 2054 AA64Reg rn; 2055 expect_comma(d, "dp1"); 2056 rn = parse_reg(d); 2057 if (rd.is64 != rn.is64) asm_driver_panic(d, "asm: dp1: width mismatch"); 2058 emit32(d, aa64_dp1(rd.is64, opcode2, rd.num, rn.num)); 2059 } 2060 static void p_clz(AsmDriver* d) { p_dp1_op(d, AA64_DP1_CLZ); } 2061 static void p_rbit(AsmDriver* d) { p_dp1_op(d, AA64_DP1_RBIT); } 2062 static void p_rev16(AsmDriver* d) { p_dp1_op(d, AA64_DP1_REV16); } 2063 static void p_rev(AsmDriver* d) { 2064 AA64Reg rd = parse_reg(d); 2065 AA64Reg rn; 2066 expect_comma(d, "rev"); 2067 rn = parse_reg(d); 2068 if (rd.is64 != rn.is64) asm_driver_panic(d, "asm: rev: width mismatch"); 2069 emit32(d, aa64_dp1(rd.is64, rd.is64 ? AA64_DP1_REV64 : AA64_DP1_REV32, rd.num, 2070 rn.num)); 2071 } 2072 2073 /* Bitfield move (opc: 0=sbfm, 1=bfm, 2=ubfm): Rd, Rn, #immr, #imms. */ 2074 static void p_bitfield(AsmDriver* d, u32 opc) { 2075 AA64Reg rd = parse_reg(d); 2076 AA64Reg rn; 2077 i64 immr, imms; 2078 expect_comma(d, "bitfield"); 2079 rn = parse_reg(d); 2080 expect_comma(d, "bitfield"); 2081 immr = parse_imm_const(d); 2082 expect_comma(d, "bitfield"); 2083 imms = parse_imm_const(d); 2084 if (rd.is64 != rn.is64) asm_driver_panic(d, "asm: bitfield: width mismatch"); 2085 emit32(d, aa64_bitfield(rd.is64, opc, (u32)immr, (u32)imms, rd.num, rn.num)); 2086 } 2087 static void p_sbfm(AsmDriver* d) { p_bitfield(d, 0u); } 2088 static void p_bfm(AsmDriver* d) { p_bitfield(d, 1u); } 2089 static void p_ubfm(AsmDriver* d) { p_bitfield(d, 2u); } 2090 2091 static void p_bfx(AsmDriver* d, u32 opc, const char* what) { 2092 AA64Reg rd = parse_reg(d); 2093 AA64Reg rn; 2094 i64 lsb, width; 2095 u32 reg_width; 2096 expect_comma(d, what); 2097 rn = parse_reg(d); 2098 reject_sp_reg(d, rd, what); 2099 reject_sp_reg(d, rn, what); 2100 if (rd.is64 != rn.is64) 2101 asm_driver_panic(d, "asm: %.*s: width mismatch", 2102 SLICE_ARG(slice_from_cstr(what))); 2103 expect_comma(d, what); 2104 lsb = parse_imm_const(d); 2105 expect_comma(d, what); 2106 width = parse_imm_const(d); 2107 reg_width = rd.is64 ? 64u : 32u; 2108 if (lsb < 0 || width <= 0 || (u64)lsb >= reg_width || 2109 (u64)width > (u64)reg_width - (u64)lsb) { 2110 asm_driver_panic(d, "asm: %.*s: bit range out of bounds", 2111 SLICE_ARG(slice_from_cstr(what))); 2112 } 2113 emit32(d, aa64_bitfield(rd.is64, opc, (u32)lsb, (u32)(lsb + width - 1), 2114 rd.num, rn.num)); 2115 } 2116 2117 static void p_sbfx(AsmDriver* d) { p_bfx(d, 0u, "sbfx"); } 2118 static void p_ubfx(AsmDriver* d) { p_bfx(d, 2u, "ubfx"); } 2119 2120 static void p_sxt(AsmDriver* d, u32 bits, const char* what) { 2121 AA64Reg rd = parse_reg(d); 2122 AA64Reg rn; 2123 expect_comma(d, what); 2124 rn = parse_reg(d); 2125 reject_sp_reg(d, rd, what); 2126 reject_sp_reg(d, rn, what); 2127 if (rn.is64) 2128 asm_driver_panic(d, "asm: %.*s: source must be a W register", 2129 SLICE_ARG(slice_from_cstr(what))); 2130 if (bits == 32u && !rd.is64) 2131 asm_driver_panic(d, "asm: sxtw: destination must be an X register"); 2132 emit32(d, aa64_bitfield(rd.is64, 0u, 0u, bits - 1u, rd.num, rn.num)); 2133 } 2134 2135 static void p_uxt(AsmDriver* d, u32 bits, const char* what) { 2136 AA64Reg rd = parse_reg(d); 2137 AA64Reg rn; 2138 u32 sf; 2139 expect_comma(d, what); 2140 rn = parse_reg(d); 2141 reject_sp_reg(d, rd, what); 2142 reject_sp_reg(d, rn, what); 2143 if (rn.is64) 2144 asm_driver_panic(d, "asm: %.*s: source must be a W register", 2145 SLICE_ARG(slice_from_cstr(what))); 2146 if (bits == 32u && !rd.is64) 2147 asm_driver_panic(d, "asm: uxtw: destination must be an X register"); 2148 sf = bits == 32u ? 1u : 0u; 2149 emit32(d, aa64_bitfield(sf, 2u, 0u, bits - 1u, rd.num, rn.num)); 2150 } 2151 2152 static void p_sxtb(AsmDriver* d) { p_sxt(d, 8u, "sxtb"); } 2153 static void p_sxth(AsmDriver* d) { p_sxt(d, 16u, "sxth"); } 2154 static void p_sxtw(AsmDriver* d) { p_sxt(d, 32u, "sxtw"); } 2155 static void p_uxtb(AsmDriver* d) { p_uxt(d, 8u, "uxtb"); } 2156 static void p_uxth(AsmDriver* d) { p_uxt(d, 16u, "uxth"); } 2157 static void p_uxtw(AsmDriver* d) { p_uxt(d, 32u, "uxtw"); } 2158 2159 /* fmov: Vd,Vn (FP reg move) | Rd,Vn (fp->gpr) | Vd,Rn (gpr->fp). */ 2160 static void p_fmov(AsmDriver* d) { 2161 FpOrGpr a = parse_fp_or_gpr(d); 2162 FpOrGpr b; 2163 expect_comma(d, "fmov"); 2164 b = parse_fp_or_gpr(d); 2165 if (a.is_fp && b.is_fp) { 2166 if (a.ftype != b.ftype) 2167 asm_driver_panic(d, "asm: fmov: operand type mismatch"); 2168 emit32(d, aa64_fp_dp1(a.ftype, AA64_FP_DP1_FMOV, a.num, b.num)); 2169 } else if (!a.is_fp && b.is_fp) { 2170 emit32(d, aa64_fp_int_cvt((u32)a.is64, b.ftype, AA64_FP_ICVT_FMOV_TO_GPR, 2171 a.num, b.num)); 2172 } else if (a.is_fp && !b.is_fp) { 2173 emit32(d, aa64_fp_int_cvt((u32)b.is64, a.ftype, AA64_FP_ICVT_FMOV_TO_FP, 2174 a.num, b.num)); 2175 } else { 2176 asm_driver_panic(d, "asm: fmov: gpr,gpr form not supported (use mov)"); 2177 } 2178 } 2179 2180 /* ---- atomics / exclusive wrappers ---- 2181 * 2182 * Access log2-sizes: byte=0, half=1, word=2, dword=3. The w/x variants 2183 * share a mnemonic stem (e.g. `ldxr`) and pick the size from the operand 2184 * register width — the encoders key on the explicit size, so a width- 2185 * sensing wrapper peeks the operand register before dispatching. */ 2186 #define AA64_ATOMIC_SIZE_B 0u 2187 #define AA64_ATOMIC_SIZE_H 1u 2188 #define AA64_ATOMIC_SIZE_W 2u 2189 #define AA64_ATOMIC_SIZE_X 3u 2190 2191 /* Load-exclusive family: o2,o0 select ldxr/ldaxr/ldar. */ 2192 #define DEF_LDEX(fn, sz, o2, o0, name) \ 2193 static void fn(AsmDriver* d) { p_ldex(d, sz, o2, o0, name); } 2194 /* ldxr / ldxrb / ldxrh: o2=0 o0=0. The non-b/h stem derives size from 2195 * the register width, so we route it through a width-sensing wrapper. */ 2196 static void p_ldxr_wx(AsmDriver* d) { 2197 /* Peek the destination register to choose word vs dword size. */ 2198 AsmTok t = asm_driver_peek(d); 2199 AA64Reg r; 2200 memset(&r, 0, sizeof r); 2201 if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) 2202 asm_driver_panic(d, "asm: ldxr: expected register"); 2203 p_ldex(d, r.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W, 0u, 0u, "ldxr"); 2204 } 2205 DEF_LDEX(p_ldxrb, AA64_ATOMIC_SIZE_B, 0u, 0u, "ldxrb") 2206 DEF_LDEX(p_ldxrh, AA64_ATOMIC_SIZE_H, 0u, 0u, "ldxrh") 2207 static void p_ldaxr_wx(AsmDriver* d) { 2208 AsmTok t = asm_driver_peek(d); 2209 AA64Reg r; 2210 memset(&r, 0, sizeof r); 2211 if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) 2212 asm_driver_panic(d, "asm: ldaxr: expected register"); 2213 p_ldex(d, r.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W, 0u, 1u, "ldaxr"); 2214 } 2215 DEF_LDEX(p_ldaxrb, AA64_ATOMIC_SIZE_B, 0u, 1u, "ldaxrb") 2216 DEF_LDEX(p_ldaxrh, AA64_ATOMIC_SIZE_H, 0u, 1u, "ldaxrh") 2217 static void p_ldar_wx(AsmDriver* d) { 2218 AsmTok t = asm_driver_peek(d); 2219 AA64Reg r; 2220 memset(&r, 0, sizeof r); 2221 if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) 2222 asm_driver_panic(d, "asm: ldar: expected register"); 2223 p_ldex(d, r.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W, 1u, 1u, "ldar"); 2224 } 2225 DEF_LDEX(p_ldarb, AA64_ATOMIC_SIZE_B, 1u, 1u, "ldarb") 2226 DEF_LDEX(p_ldarh, AA64_ATOMIC_SIZE_H, 1u, 1u, "ldarh") 2227 2228 /* stlr (no status): width-driven for the non-b/h stem. */ 2229 static void p_stlr_wx(AsmDriver* d) { 2230 AsmTok t = asm_driver_peek(d); 2231 AA64Reg r; 2232 memset(&r, 0, sizeof r); 2233 if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) 2234 asm_driver_panic(d, "asm: stlr: expected register"); 2235 p_stlr(d, r.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W, "stlr"); 2236 } 2237 static void p_stlrb_(AsmDriver* d) { p_stlr(d, AA64_ATOMIC_SIZE_B, "stlrb"); } 2238 static void p_stlrh_(AsmDriver* d) { p_stlr(d, AA64_ATOMIC_SIZE_H, "stlrh"); } 2239 2240 /* Store-exclusive family: o0 selects stxr vs stlxr. Status reg is always 2241 * 32-bit; the stored value reg drives the size for the non-b/h stem. */ 2242 static void p_stxr_wx(AsmDriver* d) { 2243 AA64Reg rs = parse_reg(d); 2244 reject_sp_reg(d, rs, "stxr"); 2245 if (rs.is64) asm_driver_panic(d, "asm: stxr: status reg must be 32-bit"); 2246 expect_comma(d, "stxr"); 2247 AsmTok t = asm_driver_peek(d); 2248 AA64Reg rt; 2249 memset(&rt, 0, sizeof rt); 2250 if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &rt)) 2251 asm_driver_panic(d, "asm: stxr: expected value register"); 2252 u32 size = rt.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W; 2253 rt = parse_reg(d); 2254 require_gpr_width(d, rt, size, "stxr"); 2255 expect_comma(d, "stxr"); 2256 AA64Mem m = parse_mem_bare(d, "stxr"); 2257 reject_stex_alias(d, rs, rt, m, "stxr"); 2258 emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size, 2259 .o2 = 0u, 2260 .L = 0u, 2261 .o1 = 0u, 2262 .Rs = rs.num, 2263 .o0 = 0u, 2264 .Rt2 = AA64_ZR, 2265 .Rn = m.base.num, 2266 .Rt = rt.num})); 2267 } 2268 static void p_stlxr_wx(AsmDriver* d) { 2269 AA64Reg rs = parse_reg(d); 2270 reject_sp_reg(d, rs, "stlxr"); 2271 if (rs.is64) asm_driver_panic(d, "asm: stlxr: status reg must be 32-bit"); 2272 expect_comma(d, "stlxr"); 2273 AsmTok t = asm_driver_peek(d); 2274 AA64Reg rt; 2275 memset(&rt, 0, sizeof rt); 2276 if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &rt)) 2277 asm_driver_panic(d, "asm: stlxr: expected value register"); 2278 u32 size = rt.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W; 2279 rt = parse_reg(d); 2280 require_gpr_width(d, rt, size, "stlxr"); 2281 expect_comma(d, "stlxr"); 2282 AA64Mem m = parse_mem_bare(d, "stlxr"); 2283 reject_stex_alias(d, rs, rt, m, "stlxr"); 2284 emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size, 2285 .o2 = 0u, 2286 .L = 0u, 2287 .o1 = 0u, 2288 .Rs = rs.num, 2289 .o0 = 1u, 2290 .Rt2 = AA64_ZR, 2291 .Rn = m.base.num, 2292 .Rt = rt.num})); 2293 } 2294 static void p_stxrb_(AsmDriver* d) { 2295 p_stex(d, AA64_ATOMIC_SIZE_B, 0u, "stxrb"); 2296 } 2297 static void p_stxrh_(AsmDriver* d) { 2298 p_stex(d, AA64_ATOMIC_SIZE_H, 0u, "stxrh"); 2299 } 2300 static void p_stlxrb_(AsmDriver* d) { 2301 p_stex(d, AA64_ATOMIC_SIZE_B, 1u, "stlxrb"); 2302 } 2303 static void p_stlxrh_(AsmDriver* d) { 2304 p_stex(d, AA64_ATOMIC_SIZE_H, 1u, "stlxrh"); 2305 } 2306 2307 /* CAS family: width-driven for the non-b/h stems (Rs/Rt are same width). */ 2308 #define DEF_CAS(fn, L, o0, name) \ 2309 static void fn##_wx(AsmDriver* d) { \ 2310 AsmTok t = asm_driver_peek(d); \ 2311 AA64Reg r; \ 2312 memset(&r, 0, sizeof r); \ 2313 if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) \ 2314 asm_driver_panic(d, "asm: " name ": expected register"); \ 2315 p_cas(d, r.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W, L, o0, name); \ 2316 } \ 2317 static void fn##b(AsmDriver* d) { \ 2318 p_cas(d, AA64_ATOMIC_SIZE_B, L, o0, name "b"); \ 2319 } \ 2320 static void fn##h(AsmDriver* d) { \ 2321 p_cas(d, AA64_ATOMIC_SIZE_H, L, o0, name "h"); \ 2322 } 2323 DEF_CAS(p_cas, 0u, 0u, "cas") 2324 DEF_CAS(p_casa, 1u, 0u, "casa") 2325 DEF_CAS(p_casl, 0u, 1u, "casl") 2326 DEF_CAS(p_casal, 1u, 1u, "casal") 2327 2328 /* LSE atomic family: A/R from the suffix, o3/opc from the stem. Each 2329 * mnemonic generates a width-driven stem plus b/h wrappers. */ 2330 #define DEF_LSE(fn, A, R, o3, opc, name) \ 2331 static void fn##_wx(AsmDriver* d) { \ 2332 AsmTok t = asm_driver_peek(d); \ 2333 AA64Reg r; \ 2334 memset(&r, 0, sizeof r); \ 2335 if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) \ 2336 asm_driver_panic(d, "asm: " name ": expected register"); \ 2337 p_lse(d, r.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W, A, R, o3, opc, \ 2338 name); \ 2339 } \ 2340 static void fn##b(AsmDriver* d) { \ 2341 p_lse(d, AA64_ATOMIC_SIZE_B, A, R, o3, opc, name "b"); \ 2342 } \ 2343 static void fn##h(AsmDriver* d) { \ 2344 p_lse(d, AA64_ATOMIC_SIZE_H, A, R, o3, opc, name "h"); \ 2345 } 2346 /* SWP (o3=1, opc=000). */ 2347 DEF_LSE(p_swp, 0u, 0u, 1u, AA64_LSE_OPC_SWP, "swp") 2348 DEF_LSE(p_swpa, 1u, 0u, 1u, AA64_LSE_OPC_SWP, "swpa") 2349 DEF_LSE(p_swpl, 0u, 1u, 1u, AA64_LSE_OPC_SWP, "swpl") 2350 DEF_LSE(p_swpal, 1u, 1u, 1u, AA64_LSE_OPC_SWP, "swpal") 2351 /* LDADD. */ 2352 DEF_LSE(p_ldadd, 0u, 0u, 0u, AA64_LSE_OPC_LDADD, "ldadd") 2353 DEF_LSE(p_ldadda, 1u, 0u, 0u, AA64_LSE_OPC_LDADD, "ldadda") 2354 DEF_LSE(p_ldaddl, 0u, 1u, 0u, AA64_LSE_OPC_LDADD, "ldaddl") 2355 DEF_LSE(p_ldaddal, 1u, 1u, 0u, AA64_LSE_OPC_LDADD, "ldaddal") 2356 /* LDCLR. */ 2357 DEF_LSE(p_ldclr, 0u, 0u, 0u, AA64_LSE_OPC_LDCLR, "ldclr") 2358 DEF_LSE(p_ldclra, 1u, 0u, 0u, AA64_LSE_OPC_LDCLR, "ldclra") 2359 DEF_LSE(p_ldclrl, 0u, 1u, 0u, AA64_LSE_OPC_LDCLR, "ldclrl") 2360 DEF_LSE(p_ldclral, 1u, 1u, 0u, AA64_LSE_OPC_LDCLR, "ldclral") 2361 /* LDEOR. */ 2362 DEF_LSE(p_ldeor, 0u, 0u, 0u, AA64_LSE_OPC_LDEOR, "ldeor") 2363 DEF_LSE(p_ldeora, 1u, 0u, 0u, AA64_LSE_OPC_LDEOR, "ldeora") 2364 DEF_LSE(p_ldeorl, 0u, 1u, 0u, AA64_LSE_OPC_LDEOR, "ldeorl") 2365 DEF_LSE(p_ldeoral, 1u, 1u, 0u, AA64_LSE_OPC_LDEOR, "ldeoral") 2366 /* LDSET. */ 2367 DEF_LSE(p_ldset, 0u, 0u, 0u, AA64_LSE_OPC_LDSET, "ldset") 2368 DEF_LSE(p_ldseta, 1u, 0u, 0u, AA64_LSE_OPC_LDSET, "ldseta") 2369 DEF_LSE(p_ldsetl, 0u, 1u, 0u, AA64_LSE_OPC_LDSET, "ldsetl") 2370 DEF_LSE(p_ldsetal, 1u, 1u, 0u, AA64_LSE_OPC_LDSET, "ldsetal") 2371 2372 static const AA64Mn kTable[] = { 2373 {"fadd", p_fadd, 0}, 2374 {"fsub", p_fsub, 0}, 2375 {"fmul", p_fmul, 0}, 2376 {"fdiv", p_fdiv, 0}, 2377 {"fmax", p_fmax, 0}, 2378 {"fmin", p_fmin, 0}, 2379 {"fnmul", p_fnmul, 0}, 2380 {"fneg", p_fneg, 0}, 2381 {"fabs", p_fabs, 0}, 2382 {"fsqrt", p_fsqrt, 0}, 2383 {"fmov", p_fmov, 0}, 2384 {"fcmp", p_fcmp, 0}, 2385 {"fcvt", p_fcvt, 0}, 2386 {"scvtf", p_scvtf, 0}, 2387 {"ucvtf", p_ucvtf, 0}, 2388 {"fcvtzs", p_fcvtzs, 0}, 2389 {"fcvtzu", p_fcvtzu, 0}, 2390 {"clz", p_clz, 0}, 2391 {"rbit", p_rbit, 0}, 2392 {"rev", p_rev, 0}, 2393 {"rev16", p_rev16, 0}, 2394 {"sbfm", p_sbfm, 0}, 2395 {"ubfm", p_ubfm, 0}, 2396 {"bfm", p_bfm, 0}, 2397 {"sbfx", p_sbfx, 0}, 2398 {"ubfx", p_ubfx, 0}, 2399 {"sxtb", p_sxtb, 0}, 2400 {"sxth", p_sxth, 0}, 2401 {"sxtw", p_sxtw, 0}, 2402 {"uxtb", p_uxtb, 0}, 2403 {"uxth", p_uxth, 0}, 2404 {"uxtw", p_uxtw, 0}, 2405 {"nop", p_nop, 0}, 2406 {"dmb", p_dmb, 0}, 2407 {"dsb", p_dsb, 0}, 2408 {"isb", p_isb, 0}, 2409 {"clrex", p_clrex, 0}, 2410 {"ret", p_ret, 0}, 2411 {"br", p_br, 0}, 2412 {"blr", p_blr, 0}, 2413 {"mov", p_mov, 0}, 2414 {"mvn", p_mvn, 0}, 2415 {"movz", p_movz_, 0}, 2416 {"movn", p_movn_, 0}, 2417 {"movk", p_movk_, 0}, 2418 {"add", p_addsub_add, 0}, 2419 {"adds", p_addsub_adds, 0}, 2420 {"sub", p_addsub_sub, 0}, 2421 {"subs", p_addsub_subs, 0}, 2422 {"cmp", p_cmp_w, 0}, 2423 {"cmn", p_cmn_w, 0}, 2424 {"csel", p_csel_, 0}, 2425 {"csinc", p_csinc_, 0}, 2426 {"csinv", p_csinv_, 0}, 2427 {"csneg", p_csneg_, 0}, 2428 {"cset", p_cset_, 0}, 2429 {"csetm", p_csetm_, 0}, 2430 {"neg", p_neg_w, 0}, 2431 {"negs", p_negs_w, 0}, 2432 {"and", p_and_w, 0}, 2433 {"bic", p_bic_w, 0}, 2434 {"orr", p_orr_w, 0}, 2435 {"orn", p_orn_w, 0}, 2436 {"eor", p_eor_w, 0}, 2437 {"eon", p_eon_w, 0}, 2438 {"ands", p_ands_w, 0}, 2439 {"bics", p_bics_w, 0}, 2440 {"madd", p_madd, 0}, 2441 {"msub", p_msub, 0}, 2442 {"mul", p_mul_w, 0}, 2443 {"mneg", p_mneg_w, 0}, 2444 {"udiv", p_udiv_w, 0}, 2445 {"sdiv", p_sdiv_w, 0}, 2446 {"lslv", p_lslv_w, 0}, 2447 {"lsrv", p_lsrv_w, 0}, 2448 {"asrv", p_asrv_w, 0}, 2449 {"rorv", p_rorv_w, 0}, 2450 {"lsl", p_lsl_, 0}, 2451 {"lsr", p_lsr_, 0}, 2452 {"asr", p_asr_, 0}, 2453 {"b", p_b_, 0}, 2454 {"bl", p_bl_, 0}, 2455 {"cbz", p_cbz_, 0}, 2456 {"cbnz", p_cbnz_, 0}, 2457 {"svc", p_svc_, 0}, 2458 {"brk", p_brk_, 0}, 2459 {"hlt", p_hlt_, 0}, 2460 {"mrs", p_mrs_, 0}, 2461 {"msr", p_msr_, 0}, 2462 {"ldr", p_ldr_, 0}, 2463 {"str", p_str_, 0}, 2464 {"ldrb", p_ldrb, 0}, 2465 {"strb", p_strb, 0}, 2466 {"ldrh", p_ldrh, 0}, 2467 {"strh", p_strh, 0}, 2468 {"ldrsb", p_ldrsb, 0}, 2469 {"ldrsh", p_ldrsh, 0}, 2470 {"ldrsw", p_ldrsw, 0}, 2471 {"ldur", p_ldur_, 0}, 2472 {"stur", p_stur_, 0}, 2473 {"ldurb", p_ldurb, 0}, 2474 {"sturb", p_sturb, 0}, 2475 {"ldurh", p_ldurh, 0}, 2476 {"sturh", p_sturh, 0}, 2477 {"ldursb", p_ldursb, 0}, 2478 {"ldursh", p_ldursh, 0}, 2479 {"ldursw", p_ldursw, 0}, 2480 {"ldp", p_ldp_, 0}, 2481 {"stp", p_stp_, 0}, 2482 {"adr", p_adr_, 0}, 2483 {"adrp", p_adrp_, 0}, 2484 /* ---- atomics / exclusive ---- */ 2485 {"ldxr", p_ldxr_wx, 0}, 2486 {"ldxrb", p_ldxrb, 0}, 2487 {"ldxrh", p_ldxrh, 0}, 2488 {"ldaxr", p_ldaxr_wx, 0}, 2489 {"ldaxrb", p_ldaxrb, 0}, 2490 {"ldaxrh", p_ldaxrh, 0}, 2491 {"ldar", p_ldar_wx, 0}, 2492 {"ldarb", p_ldarb, 0}, 2493 {"ldarh", p_ldarh, 0}, 2494 {"stxr", p_stxr_wx, 0}, 2495 {"stxrb", p_stxrb_, 0}, 2496 {"stxrh", p_stxrh_, 0}, 2497 {"stlxr", p_stlxr_wx, 0}, 2498 {"stlxrb", p_stlxrb_, 0}, 2499 {"stlxrh", p_stlxrh_, 0}, 2500 {"stlr", p_stlr_wx, 0}, 2501 {"stlrb", p_stlrb_, 0}, 2502 {"stlrh", p_stlrh_, 0}, 2503 {"cas", p_cas_wx, 0}, 2504 {"casb", p_casb, 0}, 2505 {"cash", p_cash, 0}, 2506 {"casa", p_casa_wx, 0}, 2507 {"casab", p_casab, 0}, 2508 {"casah", p_casah, 0}, 2509 {"casl", p_casl_wx, 0}, 2510 {"caslb", p_caslb, 0}, 2511 {"caslh", p_caslh, 0}, 2512 {"casal", p_casal_wx, 0}, 2513 {"casalb", p_casalb, 0}, 2514 {"casalh", p_casalh, 0}, 2515 {"swp", p_swp_wx, 0}, 2516 {"swpb", p_swpb, 0}, 2517 {"swph", p_swph, 0}, 2518 {"swpa", p_swpa_wx, 0}, 2519 {"swpab", p_swpab, 0}, 2520 {"swpah", p_swpah, 0}, 2521 {"swpl", p_swpl_wx, 0}, 2522 {"swplb", p_swplb, 0}, 2523 {"swplh", p_swplh, 0}, 2524 {"swpal", p_swpal_wx, 0}, 2525 {"swpalb", p_swpalb, 0}, 2526 {"swpalh", p_swpalh, 0}, 2527 {"ldadd", p_ldadd_wx, 0}, 2528 {"ldaddb", p_ldaddb, 0}, 2529 {"ldaddh", p_ldaddh, 0}, 2530 {"ldadda", p_ldadda_wx, 0}, 2531 {"ldaddab", p_ldaddab, 0}, 2532 {"ldaddah", p_ldaddah, 0}, 2533 {"ldaddl", p_ldaddl_wx, 0}, 2534 {"ldaddlb", p_ldaddlb, 0}, 2535 {"ldaddlh", p_ldaddlh, 0}, 2536 {"ldaddal", p_ldaddal_wx, 0}, 2537 {"ldaddalb", p_ldaddalb, 0}, 2538 {"ldaddalh", p_ldaddalh, 0}, 2539 {"ldclr", p_ldclr_wx, 0}, 2540 {"ldclrb", p_ldclrb, 0}, 2541 {"ldclrh", p_ldclrh, 0}, 2542 {"ldclra", p_ldclra_wx, 0}, 2543 {"ldclrab", p_ldclrab, 0}, 2544 {"ldclrah", p_ldclrah, 0}, 2545 {"ldclrl", p_ldclrl_wx, 0}, 2546 {"ldclrlb", p_ldclrlb, 0}, 2547 {"ldclrlh", p_ldclrlh, 0}, 2548 {"ldclral", p_ldclral_wx, 0}, 2549 {"ldclralb", p_ldclralb, 0}, 2550 {"ldclralh", p_ldclralh, 0}, 2551 {"ldeor", p_ldeor_wx, 0}, 2552 {"ldeorb", p_ldeorb, 0}, 2553 {"ldeorh", p_ldeorh, 0}, 2554 {"ldeora", p_ldeora_wx, 0}, 2555 {"ldeorab", p_ldeorab, 0}, 2556 {"ldeorah", p_ldeorah, 0}, 2557 {"ldeorl", p_ldeorl_wx, 0}, 2558 {"ldeorlb", p_ldeorlb, 0}, 2559 {"ldeorlh", p_ldeorlh, 0}, 2560 {"ldeoral", p_ldeoral_wx, 0}, 2561 {"ldeoralb", p_ldeoralb, 0}, 2562 {"ldeoralh", p_ldeoralh, 0}, 2563 {"ldset", p_ldset_wx, 0}, 2564 {"ldsetb", p_ldsetb, 0}, 2565 {"ldseth", p_ldseth, 0}, 2566 {"ldseta", p_ldseta_wx, 0}, 2567 {"ldsetab", p_ldsetab, 0}, 2568 {"ldsetah", p_ldsetah, 0}, 2569 {"ldsetl", p_ldsetl_wx, 0}, 2570 {"ldsetlb", p_ldsetlb, 0}, 2571 {"ldsetlh", p_ldsetlh, 0}, 2572 {"ldsetal", p_ldsetal_wx, 0}, 2573 {"ldsetalb", p_ldsetalb, 0}, 2574 {"ldsetalh", p_ldsetalh, 0}, 2575 {"b.eq", p_b_eq, 0}, 2576 {"b.ne", p_b_ne, 0}, 2577 {"b.cs", p_b_cs, 0}, 2578 {"b.hs", p_b_hs, 0}, 2579 {"b.cc", p_b_cc, 0}, 2580 {"b.lo", p_b_lo, 0}, 2581 {"b.mi", p_b_mi, 0}, 2582 {"b.pl", p_b_pl, 0}, 2583 {"b.vs", p_b_vs, 0}, 2584 {"b.vc", p_b_vc, 0}, 2585 {"b.hi", p_b_hi, 0}, 2586 {"b.ls", p_b_ls, 0}, 2587 {"b.ge", p_b_ge, 0}, 2588 {"b.lt", p_b_lt, 0}, 2589 {"b.gt", p_b_gt, 0}, 2590 {"b.le", p_b_le, 0}, 2591 {"b.al", p_b_al, 0}, 2592 {NULL, NULL, 0}, 2593 }; 2594 2595 void aa64_asm_insn(AA64Asm* a, AsmDriver* d, Sym mnemonic) { 2596 (void)a; 2597 Slice msl = pool_slice(asm_driver_pool(d), mnemonic); 2598 const char* mp = msl.s; 2599 size_t mn = msl.len; 2600 for (const AA64Mn* row = kTable; row->name; ++row) { 2601 if (icase_eq(mp, mn, row->name)) { 2602 row->fn(d); 2603 return; 2604 } 2605 } 2606 asm_driver_panic(d, "asm: unknown mnemonic"); 2607 } 2608 2609 /* ---- inline-asm template walker (Phase 4b Track C) ---- */ 2610 2611 /* Per-call rendered-line buffer. GCC's inline asm rarely emits more 2612 * than a handful of instructions per block; one line of substituted 2613 * text fits comfortably inside this. Truncation panics — the operator 2614 * grammar should never grow a single line beyond this without a 2615 * deliberate reason. */ 2616 #define AA64_INLINE_LINE_CAP 1024 2617 2618 _Noreturn static void inline_panic(AA64Asm* a, const char* msg); 2619 2620 /* Render a 5-bit register number into the StrBuf using the requested 2621 * width form. is64 picks x-form vs w-form; SP / ZR encode as 2622 * register #31 and we render them as wzr/xzr or wsp/sp depending on 2623 * caller intent — for inline-asm v1 the bound operand always names a 2624 * GP register, never SP, so we emit wzr/xzr for #31. */ 2625 static void render_reg(StrBuf* sb, u32 reg, int is64) { 2626 if (reg == 31u) { 2627 strbuf_puts(sb, is64 ? "xzr" : "wzr"); 2628 return; 2629 } 2630 strbuf_putc(sb, is64 ? 'x' : 'w'); 2631 if (reg >= 10u) strbuf_putc(sb, (char)('0' + (reg / 10u))); 2632 strbuf_putc(sb, (char)('0' + (reg % 10u))); 2633 } 2634 2635 static void render_fp_reg(StrBuf* sb, u32 reg, u32 nbytes) { 2636 strbuf_putc(sb, nbytes <= 4u ? 's' : 'd'); 2637 if (reg >= 10u) strbuf_putc(sb, (char)('0' + (reg / 10u))); 2638 strbuf_putc(sb, (char)('0' + (reg % 10u))); 2639 } 2640 2641 static u32 inline_op_size(AA64Asm* a, const Operand* op) { 2642 if (!op->type) return 8u; 2643 u64 n = cg_type_size(a->c, op->type); 2644 if (!n) return 8u; 2645 if (n > 16u) inline_panic(a, "inline asm operand is too large"); 2646 return (u32)n; 2647 } 2648 2649 static int inline_op_is_ptr(AA64Asm* a, const Operand* op) { 2650 return op->type && cg_type_is_ptr(a->c, op->type); 2651 } 2652 2653 /* Render a signed 64-bit integer prefixed with '#'. */ 2654 static void render_imm(StrBuf* sb, i64 v) { 2655 strbuf_putc(sb, '#'); 2656 strbuf_put_i64(sb, v); 2657 } 2658 2659 /* Render an addressing form `[xN, #ofs]` for OPK_INDIRECT. */ 2660 static void render_indirect(StrBuf* sb, Reg base, i32 ofs) { 2661 strbuf_putc(sb, '['); 2662 render_reg(sb, (u32)base, /*is64=*/1); 2663 if (ofs != 0) { 2664 strbuf_puts(sb, ", "); 2665 render_imm(sb, (i64)ofs); 2666 } 2667 strbuf_putc(sb, ']'); 2668 } 2669 2670 _Noreturn static void inline_panic(AA64Asm* a, const char* msg) { 2671 SrcLoc loc = {0, 0, 0}; 2672 compiler_panic(a->c, loc, "inline asm: %.*s", 2673 SLICE_ARG(slice_from_cstr(msg))); 2674 } 2675 2676 /* Resolve operand index N → (kind=0 forced default, 1=force-w, 2=force-x, 2677 * 3=address form `%aN`). Renders into sb. */ 2678 static void render_operand(AA64Asm* a, StrBuf* sb, u32 idx, int form) { 2679 u32 ntot = a->nout + a->nin; 2680 if (idx >= ntot) inline_panic(a, "operand index out of range"); 2681 const Operand* op = 2682 (idx < a->nout) ? &a->out_ops[idx] : &a->in_ops[idx - a->nout]; 2683 switch (form) { 2684 case 1: /* %wN — force 32-bit register form */ 2685 if (op->kind != AA64_INLINE_OPK_REG || 2686 op->pad[0] != AA64_INLINE_OPCLS_INT) 2687 inline_panic(a, "%w on non-integer-register operand"); 2688 render_reg(sb, (u32)op->v.local, 0); 2689 return; 2690 case 2: /* %xN — force 64-bit register form */ 2691 if (op->kind != AA64_INLINE_OPK_REG || 2692 op->pad[0] != AA64_INLINE_OPCLS_INT) 2693 inline_panic(a, "%x on non-integer-register operand"); 2694 render_reg(sb, (u32)op->v.local, 1); 2695 return; 2696 case 3: /* %aN — memory addressing form */ 2697 if (op->kind != OPK_INDIRECT) inline_panic(a, "%a on non-memory operand"); 2698 /* Inline asm consumes a plain pointer-shaped address; the cg 2699 * contract guarantees no EA index here. */ 2700 if (op->v.ind.index != REG_NONE) 2701 inline_panic(a, "%a operand has unexpected EA index"); 2702 render_indirect(sb, op->v.ind.base, op->v.ind.ofs); 2703 return; 2704 default: 2705 break; 2706 } 2707 /* Default rendering by operand kind. */ 2708 switch (op->kind) { 2709 case AA64_INLINE_OPK_REG: 2710 if (op->pad[0] == AA64_INLINE_OPCLS_FP) { 2711 render_fp_reg(sb, (u32)op->v.local, inline_op_size(a, op)); 2712 } else { 2713 render_reg(sb, (u32)op->v.local, 2714 inline_op_is_ptr(a, op) || inline_op_size(a, op) > 4u); 2715 } 2716 return; 2717 case OPK_IMM: 2718 render_imm(sb, op->v.imm); 2719 return; 2720 case OPK_INDIRECT: 2721 if (op->v.ind.index != REG_NONE) 2722 inline_panic(a, "inline-asm operand has unexpected EA index"); 2723 render_indirect(sb, op->v.ind.base, op->v.ind.ofs); 2724 return; 2725 default: 2726 inline_panic(a, "unsupported operand kind for %N"); 2727 } 2728 } 2729 2730 /* Lex one line of substituted asm and dispatch via aa64_asm_insn. */ 2731 static void run_one_line(AA64Asm* a, MCEmitter* mc, const char* text, 2732 size_t len) { 2733 /* Skip blank lines. */ 2734 size_t i; 2735 for (i = 0; i < len; ++i) { 2736 if (text[i] != ' ' && text[i] != '\t') break; 2737 } 2738 if (i == len) return; 2739 2740 AsmLexer* lx = asm_lex_open_mem(a->c, "<inline-asm>", text, len); 2741 AsmDriver* d = asm_driver_open_inline(a->c, mc, lx); 2742 2743 /* The first non-trivial token must be the mnemonic identifier (or a 2744 * `.directive`, but inline asm doesn't normally use directives — leave 2745 * that path unsupported until needed). */ 2746 AsmTok t = asm_driver_peek(d); 2747 while (t.kind == ASM_TOK_NEWLINE || t.kind == ASM_TOK_HASH) { 2748 (void)asm_driver_next(d); 2749 if (t.kind == ASM_TOK_HASH) { 2750 /* Skip cpp linemarker rest of line. */ 2751 while (!asm_driver_at_eol(d)) (void)asm_driver_next(d); 2752 } 2753 t = asm_driver_peek(d); 2754 } 2755 if (t.kind == ASM_TOK_EOF) { 2756 asm_driver_close_inline(d); 2757 asm_lex_close(lx); 2758 return; 2759 } 2760 if (t.kind != ASM_TOK_IDENT) 2761 inline_panic(a, "expected mnemonic at start of inline asm line"); 2762 (void)asm_driver_next(d); 2763 Sym mn = t.v.ident; 2764 /* Compose `b.eq` etc. — same trick as the standalone driver. */ 2765 AsmTok dot = asm_driver_peek(d); 2766 if (asm_driver_tok_is_punct(dot, '.')) { 2767 (void)asm_driver_next(d); 2768 AsmTok rest = asm_driver_next(d); 2769 if (rest.kind != ASM_TOK_IDENT) 2770 inline_panic(a, "composite mnemonic: expected ident after '.'"); 2771 Slice hsl = pool_slice(asm_driver_pool(d), mn); 2772 Slice rsl = pool_slice(asm_driver_pool(d), rest.v.ident); 2773 size_t hn = hsl.len, rn = rsl.len; 2774 const char* hp = hsl.s; 2775 const char* rp = rsl.s; 2776 char buf[64]; 2777 if (hn + 1 + rn >= sizeof buf) 2778 inline_panic(a, "composite mnemonic too long"); 2779 for (size_t k = 0; k < hn; ++k) buf[k] = hp[k]; 2780 buf[hn] = '.'; 2781 for (size_t k = 0; k < rn; ++k) buf[hn + 1 + k] = rp[k]; 2782 mn = pool_intern_slice(asm_driver_pool(d), 2783 (Slice){.s = buf, .len = hn + 1 + rn}); 2784 } 2785 aa64_asm_insn(a, d, mn); 2786 asm_driver_close_inline(d); 2787 asm_lex_close(lx); 2788 } 2789 2790 /* Substitute placeholders into one line's StrBuf, then dispatch. 2791 * 2792 * The input range is [start, end) inside `tmpl`. Updates `*line_idx` 2793 * is not used — the caller resets the StrBuf between lines. */ 2794 static void render_and_run_line(AA64Asm* a, MCEmitter* mc, StrBuf* sb, 2795 const char* start, const char* end) { 2796 strbuf_reset(sb); 2797 for (const char* p = start; p < end; ++p) { 2798 char c = *p; 2799 if (c != '%') { 2800 strbuf_putc(sb, c); 2801 continue; 2802 } 2803 /* Placeholder. */ 2804 if (p + 1 >= end) inline_panic(a, "trailing '%' in template"); 2805 char n = *(p + 1); 2806 if (n == '%') { 2807 strbuf_putc(sb, '%'); 2808 ++p; 2809 continue; 2810 } 2811 if (n == '[') { 2812 /* %[name] — scan to the closing ']' and resolve against 2813 * AsmConstraint.name on the combined outs+ins list. Match by 2814 * comparing the named-bracket contents against the interned name 2815 * Sym stored on each constraint. */ 2816 const char* nbeg = p + 2; 2817 const char* nend = nbeg; 2818 while (nend < end && *nend != ']') ++nend; 2819 if (nend == end) inline_panic(a, "unterminated %[name]"); 2820 size_t nlen = (size_t)(nend - nbeg); 2821 Sym needle = 2822 pool_intern_slice(a->c->global, (Slice){.s = nbeg, .len = nlen}); 2823 u32 idx = (u32)-1; 2824 for (u32 k = 0; k < a->nout; ++k) { 2825 if (a->outs[k].name == needle) { 2826 idx = k; 2827 break; 2828 } 2829 } 2830 if (idx == (u32)-1) { 2831 for (u32 k = 0; k < a->nin; ++k) { 2832 if (a->ins[k].name == needle) { 2833 idx = a->nout + k; 2834 break; 2835 } 2836 } 2837 } 2838 if (idx == (u32)-1) 2839 inline_panic(a, "%[name] does not match any constraint"); 2840 p = nend; /* loop's ++p steps past the ']' */ 2841 render_operand(a, sb, idx, 0); 2842 continue; 2843 } 2844 int form = 0; /* 0=default, 1=w, 2=x, 3=a */ 2845 if (n == 'w' || n == 'x' || n == 'a') { 2846 form = (n == 'w') ? 1 : (n == 'x') ? 2 : 3; 2847 ++p; 2848 if (p + 1 >= end) inline_panic(a, "trailing '%' modifier in template"); 2849 n = *(p + 1); 2850 } 2851 if (n == '[') { 2852 /* %w[name] / %x[name] / %a[name] — width modifier + symbolic 2853 * operand. Resolves the same way as %[name] but renders with the 2854 * declared form. */ 2855 const char* nbeg = p + 2; 2856 const char* nend = nbeg; 2857 while (nend < end && *nend != ']') ++nend; 2858 if (nend == end) inline_panic(a, "unterminated %[name]"); 2859 size_t nlen = (size_t)(nend - nbeg); 2860 Sym needle = 2861 pool_intern_slice(a->c->global, (Slice){.s = nbeg, .len = nlen}); 2862 u32 idx = (u32)-1; 2863 for (u32 k = 0; k < a->nout; ++k) { 2864 if (a->outs[k].name == needle) { 2865 idx = k; 2866 break; 2867 } 2868 } 2869 if (idx == (u32)-1) { 2870 for (u32 k = 0; k < a->nin; ++k) { 2871 if (a->ins[k].name == needle) { 2872 idx = a->nout + k; 2873 break; 2874 } 2875 } 2876 } 2877 if (idx == (u32)-1) 2878 inline_panic(a, "%[name] does not match any constraint"); 2879 p = nend; /* loop's ++p steps past the ']' */ 2880 render_operand(a, sb, idx, form); 2881 continue; 2882 } 2883 if (n < '0' || n > '9') inline_panic(a, "expected digit after '%'"); 2884 u32 idx = (u32)(n - '0'); 2885 ++p; 2886 /* GCC syntax permits up to two digits (%0..%99). */ 2887 if (p + 1 < end && *(p + 1) >= '0' && *(p + 1) <= '9') { 2888 idx = idx * 10 + (u32)(*(p + 1) - '0'); 2889 ++p; 2890 } 2891 render_operand(a, sb, idx, form); 2892 } 2893 if (sb->truncated) inline_panic(a, "inline asm line buffer overflow"); 2894 run_one_line(a, mc, strbuf_cstr(sb), strbuf_len(sb)); 2895 } 2896 2897 void aa64_asm_run_template(AA64Asm* a, MCEmitter* mc, const char* tmpl) { 2898 if (!tmpl || !*tmpl) return; 2899 2900 char buf[AA64_INLINE_LINE_CAP]; 2901 StrBuf sb; 2902 strbuf_init(&sb, buf, sizeof buf); 2903 2904 /* Walk tmpl, splitting on '\n' and ';' line terminators. Track bracket 2905 * depth and quote state so that a literal ';' inside `[ ... ]` or a 2906 * quoted string is not mistaken for a statement separator. */ 2907 const char* line_start = tmpl; 2908 int bracket = 0; 2909 char quote = 0; 2910 for (const char* p = tmpl;; ++p) { 2911 char c = *p; 2912 if (c == '\0') { 2913 render_and_run_line(a, mc, &sb, line_start, p); 2914 break; 2915 } 2916 if (quote) { 2917 if (c == '\\' && *(p + 1)) { 2918 ++p; 2919 continue; 2920 } 2921 if (c == quote) quote = 0; 2922 continue; 2923 } 2924 if (c == '"' || c == '\'') { 2925 quote = c; 2926 continue; 2927 } 2928 if (c == '[') { 2929 ++bracket; 2930 continue; 2931 } 2932 if (c == ']') { 2933 if (bracket) --bracket; 2934 continue; 2935 } 2936 if (bracket == 0 && (c == '\n' || c == ';')) { 2937 render_and_run_line(a, mc, &sb, line_start, p); 2938 line_start = p + 1; 2939 } 2940 } 2941 }