asm.c (52855B)
1 /* RV64 assembler — descriptor-table driven. 2 * 3 * Mnemonic → Rv64InsnDesc via rv64_asm_find; operand parsing dispatches 4 * on the format kind. The descriptor's `match` field already carries 5 * the funct3/funct7/opcode bits; the parser only needs to fill in the 6 * register operands and immediate. 7 * 8 * Aliases (li, mv, ret, jr, j, nop, sext.w, beqz, bnez) are recognized 9 * by their alias rows in the descriptor table and rewritten to the 10 * canonical encoding here. Inline rv_* encoders in isa.h remain the 11 * hot path for codegen; the assembler uses them to assemble the 12 * machine word once it has the operand values. */ 13 14 #include "arch/rv64/asm.h" 15 16 #include <string.h> 17 18 #include "arch/rv64/isa.h" 19 #include "arch/rv64/regs.h" 20 #include "arch/rv64/rv64.h" 21 #include "asm/asm_helpers.h" 22 #include "core/arena.h" 23 #include "core/pool.h" 24 #include "core/slice.h" 25 #include "core/strbuf.h" 26 #include "obj/obj.h" 27 28 struct Rv64Asm { 29 ArchAsm base; 30 Compiler* c; 31 32 /* Inline-asm bound state (set by rv64_inline_bind, cleared otherwise). 33 * Operand indexing per GCC convention: 0..nout-1 are outputs, then 34 * nout..nout+nin-1 are inputs. Templates address into this combined 35 * list via %N / %zN / %aN / %w[name] / %x[name]. */ 36 const AsmConstraint* outs; 37 Operand* out_ops; 38 const AsmConstraint* ins; 39 const Operand* in_ops; 40 const Sym* clobbers; 41 u32 nout; 42 u32 nin; 43 u32 nclob; 44 }; 45 46 typedef struct Rv64Asm Rv64Asm; 47 48 /* Relocation modifier on a 12-bit immediate offset (`%lo`/`%pcrel_lo`). 49 * RV_MEMMOD_NONE means a plain numeric displacement in `disp`. */ 50 typedef enum RvMemMod { 51 RV_MEMMOD_NONE = 0, 52 RV_MEMMOD_LO, 53 RV_MEMMOD_PCREL_LO, 54 } RvMemMod; 55 56 typedef struct Rv64Mem { 57 i32 disp; 58 u32 base; 59 RvMemMod mod; /* reloc modifier on the offset, or RV_MEMMOD_NONE */ 60 ObjSymId sym; /* symbol when mod != NONE */ 61 i64 off; /* addend when mod != NONE */ 62 } Rv64Mem; 63 64 static int sym_to_cstr(AsmDriver* d, Sym s, char* out, size_t cap) { 65 Slice sl = pool_slice(asm_driver_pool(d), s); 66 if (!sl.s || sl.len >= cap) return 0; 67 memcpy(out, sl.s, sl.len); 68 out[sl.len] = '\0'; 69 return 1; 70 } 71 72 /* True if `s` begins with the NUL-terminated literal `pfx` (length-explicit). 73 */ 74 static bool slice_has_prefix_cstr(Slice s, const char* pfx, size_t n) { 75 return s.len >= n && memcmp(s.s, pfx, n) == 0; 76 } 77 78 static int rv_reg_from_name(AsmDriver* d, Sym s, u32* reg_out, int* fp_out) { 79 char name[16]; 80 uint32_t dwarf = 0; 81 if (!sym_to_cstr(d, s, name, sizeof name)) return 0; 82 if (rv64_register_index(name, &dwarf) != 0) return 0; 83 if (reg_out) *reg_out = dwarf & 31u; 84 if (fp_out) *fp_out = dwarf >= 32u; 85 return 1; 86 } 87 88 static u32 parse_reg(AsmDriver* d, int* fp_out) { 89 AsmTok t = asm_driver_next(d); 90 u32 r; 91 if (t.kind != ASM_TOK_IDENT || !rv_reg_from_name(d, t.v.ident, &r, fp_out)) 92 asm_driver_panic(d, "rv64 asm: bad register"); 93 return r; 94 } 95 96 static u32 parse_xreg(AsmDriver* d) { 97 int fp = 0; 98 u32 r = parse_reg(d, &fp); 99 if (fp) asm_driver_panic(d, "rv64 asm: expected integer register"); 100 return r; 101 } 102 103 static u32 parse_freg(AsmDriver* d) { 104 int fp = 0; 105 u32 r = parse_reg(d, &fp); 106 if (!fp) asm_driver_panic(d, "rv64 asm: expected float register"); 107 return r; 108 } 109 110 static void expect_comma(AsmDriver* d) { 111 if (!asm_driver_eat_comma(d)) asm_driver_panic(d, "rv64 asm: expected ','"); 112 } 113 114 /* Position of a `%mod(sym)` relocation operand: the 20-bit upper field of 115 * lui/auipc, or a 12-bit I-type (addi/load) or S-type (store) immediate. */ 116 typedef enum RvModPos { 117 RV_MODPOS_HI20, 118 RV_MODPOS_LO_I, 119 RV_MODPOS_LO_S, 120 } RvModPos; 121 122 /* Map a relocation-modifier name (`hi`, `lo`, `pcrel_hi`, `pcrel_lo`, 123 * `got_pcrel_hi`) to the RelocKind appropriate for `pos`. Panics on a name 124 * that is not valid at this operand position. */ 125 static RelocKind rv_mod_to_reloc(AsmDriver* d, Slice name, RvModPos pos) { 126 if (pos == RV_MODPOS_HI20) { 127 if (slice_eq_cstr(name, "hi")) return R_RV_HI20; 128 if (slice_eq_cstr(name, "pcrel_hi")) return R_RV_PCREL_HI20; 129 if (slice_eq_cstr(name, "got_pcrel_hi")) return R_RV_GOT_HI20; 130 } else { 131 int store = (pos == RV_MODPOS_LO_S); 132 if (slice_eq_cstr(name, "lo")) return store ? R_RV_LO12_S : R_RV_LO12_I; 133 if (slice_eq_cstr(name, "pcrel_lo")) 134 return store ? R_RV_PCREL_LO12_S : R_RV_PCREL_LO12_I; 135 } 136 asm_driver_panic(d, "rv64 asm: relocation modifier not valid here"); 137 } 138 139 /* If the next token is `%`, parse a `%mod(sym{+off})` relocation operand, 140 * emit the relocation at the current emit position (where the about-to-be- 141 * returned instruction word will land), and return 1. The caller encodes a 142 * zero placeholder in the immediate field. Returns 0 if there is no modifier 143 * (leaving the stream untouched for normal constant parsing). A leading `%` 144 * is unambiguous here: modulo is infix and never starts an operand. */ 145 static int rv_parse_mod_reloc(AsmDriver* d, RvModPos pos, ObjSymId* sym_out, 146 i64* off_out, RelocKind* kind_out) { 147 if (!asm_driver_tok_is_punct(asm_driver_peek(d), '%')) return 0; 148 (void)asm_driver_next(d); /* eat '%' */ 149 AsmTok name = asm_driver_next(d); 150 if (name.kind != ASM_TOK_IDENT) 151 asm_driver_panic(d, "rv64 asm: expected relocation modifier name"); 152 Slice nm = pool_slice(asm_driver_pool(d), name.v.ident); 153 asm_driver_expect_punct(d, '(', "'(' after relocation modifier"); 154 ObjSymId sym = OBJ_SYM_NONE; 155 i64 off = 0; 156 asm_driver_parse_sym_expr(d, &sym, &off); 157 asm_driver_expect_punct(d, ')', "')' after %mod(sym)"); 158 RelocKind k = rv_mod_to_reloc(d, nm, pos); 159 if (sym_out) *sym_out = sym; 160 if (off_out) *off_out = off; 161 if (kind_out) *kind_out = k; 162 return 1; 163 } 164 165 /* Parse a RISC-V rounding-mode mnemonic (the comma is already consumed) into 166 * its 3-bit funct3 value. cc -S emits this suffix on fcvt/fsqrt when the mode 167 * isn't the default `dyn`, so the round-trip (and clang) re-encode the exact 168 * mode rather than guessing a default. */ 169 static u32 rv_parse_rm_name(AsmDriver* d) { 170 AsmTok t = asm_driver_next(d); 171 Slice s; 172 if (t.kind != ASM_TOK_IDENT) 173 asm_driver_panic(d, "rv64 asm: expected rounding mode"); 174 s = pool_slice(asm_driver_pool(d), t.v.ident); 175 if (slice_eq_cstr(s, "rne")) return 0u; 176 if (slice_eq_cstr(s, "rtz")) return 1u; 177 if (slice_eq_cstr(s, "rdn")) return 2u; 178 if (slice_eq_cstr(s, "rup")) return 3u; 179 if (slice_eq_cstr(s, "rmm")) return 4u; 180 if (slice_eq_cstr(s, "dyn")) return 7u; 181 asm_driver_panic(d, "rv64 asm: unknown rounding mode"); 182 } 183 184 /* Emit a relocation for a U-type / I-type immediate `%mod(sym)` operand at 185 * the current instruction position; returns 1 if one was present. */ 186 static int rv_emit_imm_mod_reloc(AsmDriver* d, RvModPos pos) { 187 ObjSymId sym; 188 i64 off; 189 RelocKind k; 190 if (!rv_parse_mod_reloc(d, pos, &sym, &off, &k)) return 0; 191 MCEmitter* mc = asm_driver_mc(d); 192 mc->emit_reloc_at(mc, mc->section_id, mc->pos(mc), k, sym, off, 0, 0); 193 return 1; 194 } 195 196 static Rv64Mem parse_mem(AsmDriver* d) { 197 Rv64Mem m; 198 m.disp = 0; 199 m.mod = RV_MEMMOD_NONE; 200 m.sym = OBJ_SYM_NONE; 201 m.off = 0; 202 if (asm_driver_tok_is_punct(asm_driver_peek(d), '%')) { 203 /* `%lo(sym)(base)` / `%pcrel_lo(label)(base)` — record the modifier; the 204 * load/store caller emits the I- or S-type relocation. */ 205 ObjSymId sym; 206 i64 off; 207 RelocKind k; 208 (void)rv_parse_mod_reloc(d, RV_MODPOS_LO_I, &sym, &off, &k); 209 m.mod = (k == R_RV_PCREL_LO12_I) ? RV_MEMMOD_PCREL_LO : RV_MEMMOD_LO; 210 m.sym = sym; 211 m.off = off; 212 } else { 213 m.disp = (i32)asm_driver_parse_const(d); 214 } 215 asm_driver_expect_punct(d, '(', "'(' in rv64 memory operand"); 216 m.base = parse_xreg(d); 217 asm_driver_expect_punct(d, ')', "')' in rv64 memory operand"); 218 return m; 219 } 220 221 /* Emit the I/S-type relocation recorded by parse_mem for a `%lo`/`%pcrel_lo` 222 * memory offset, picking the S-type variant for stores. */ 223 static void rv_emit_mem_mod_reloc(AsmDriver* d, const Rv64Mem* m, 224 int is_store) { 225 if (m->mod == RV_MEMMOD_NONE) return; 226 RelocKind k = (m->mod == RV_MEMMOD_PCREL_LO) 227 ? (is_store ? R_RV_PCREL_LO12_S : R_RV_PCREL_LO12_I) 228 : (is_store ? R_RV_LO12_S : R_RV_LO12_I); 229 MCEmitter* mc = asm_driver_mc(d); 230 mc->emit_reloc_at(mc, mc->section_id, mc->pos(mc), k, m->sym, m->off, 0, 0); 231 } 232 233 /* Fence pred/succ parser — accepts a string like "rw" / "iorw" / "0" / 234 * a numeric literal. Returns the 4-bit mask: bit3=i, bit2=o, bit1=r, 235 * bit0=w. */ 236 static u32 parse_fence_mask(AsmDriver* d) { 237 AsmTok t = asm_driver_peek(d); 238 if (t.kind == ASM_TOK_NUM) { 239 (void)asm_driver_next(d); 240 return (u32)asm_driver_parse_const(d) & 0xfu; 241 } 242 if (t.kind == ASM_TOK_IDENT) { 243 char name[8]; 244 AsmTok tt = asm_driver_next(d); 245 if (!sym_to_cstr(d, tt.v.ident, name, sizeof name)) 246 asm_driver_panic(d, "rv64 asm: bad fence mask"); 247 u32 mask = 0; 248 for (const char* p = name; *p; ++p) { 249 switch (*p) { 250 case 'i': 251 mask |= 8u; 252 break; 253 case 'o': 254 mask |= 4u; 255 break; 256 case 'r': 257 mask |= 2u; 258 break; 259 case 'w': 260 mask |= 1u; 261 break; 262 default: 263 asm_driver_panic(d, "rv64 asm: bad fence char"); 264 } 265 } 266 return mask; 267 } 268 asm_driver_panic(d, "rv64 asm: bad fence operand"); 269 } 270 271 /* Field overlay onto a descriptor's `match` word. 272 * 273 * For most formats the descriptor's match already pins opcode + 274 * funct3 + funct7. We OR in the per-operand fields. For shift-imm and 275 * AMO families the layouts diverge from the basic R/I templates — we 276 * handle those explicitly below. */ 277 278 static u32 enc_r(u32 match, u32 rd, u32 rs1, u32 rs2) { 279 return match | ((rs2 & 0x1fu) << 20) | ((rs1 & 0x1fu) << 15) | 280 ((rd & 0x1fu) << 7); 281 } 282 static u32 enc_i(u32 match, u32 rd, u32 rs1, i32 imm12) { 283 return match | (((u32)imm12 & 0xfffu) << 20) | ((rs1 & 0x1fu) << 15) | 284 ((rd & 0x1fu) << 7); 285 } 286 static u32 enc_s(u32 match, u32 rs2, u32 rs1, i32 imm12) { 287 u32 ui = (u32)imm12 & 0xfffu; 288 return match | ((ui >> 5) << 25) | ((rs2 & 0x1fu) << 20) | 289 ((rs1 & 0x1fu) << 15) | ((ui & 0x1fu) << 7); 290 } 291 static u32 enc_b(u32 match, u32 rs1, u32 rs2, i32 imm13) { 292 u32 ui = (u32)imm13; 293 return match | (((ui >> 12) & 1u) << 31) | (((ui >> 5) & 0x3fu) << 25) | 294 ((rs2 & 0x1fu) << 20) | ((rs1 & 0x1fu) << 15) | 295 (((ui >> 1) & 0xfu) << 8) | (((ui >> 11) & 1u) << 7); 296 } 297 static u32 enc_u(u32 match, u32 rd, u32 imm20) { 298 return match | ((imm20 & 0xfffffu) << 12) | ((rd & 0x1fu) << 7); 299 } 300 static u32 enc_j(u32 match, u32 rd, i32 imm21) { 301 u32 ui = (u32)imm21; 302 return match | (((ui >> 20) & 1u) << 31) | (((ui >> 1) & 0x3ffu) << 21) | 303 (((ui >> 11) & 1u) << 20) | (((ui >> 12) & 0xffu) << 12) | 304 ((rd & 0x1fu) << 7); 305 } 306 static u32 enc_r4(u32 match, u32 rd, u32 rs1, u32 rs2, u32 rs3, u32 rm) { 307 return match | ((rs3 & 0x1fu) << 27) | ((rs2 & 0x1fu) << 20) | 308 ((rs1 & 0x1fu) << 15) | ((rm & 0x7u) << 12) | ((rd & 0x1fu) << 7); 309 } 310 311 /* RV64I shift-imm: shamt6 occupies bits 25:20; funct6 already in match. */ 312 static u32 enc_ishift(u32 match, u32 rd, u32 rs1, u32 shamt) { 313 return match | ((shamt & 0x3fu) << 20) | ((rs1 & 0x1fu) << 15) | 314 ((rd & 0x1fu) << 7); 315 } 316 /* RV32 word shift-imm: shamt5 occupies bits 24:20 (funct7 already pinned). */ 317 static u32 enc_ishiftw(u32 match, u32 rd, u32 rs1, u32 shamt) { 318 return match | ((shamt & 0x1fu) << 20) | ((rs1 & 0x1fu) << 15) | 319 ((rd & 0x1fu) << 7); 320 } 321 /* AMO: aq/rl bits 26/25 — we accept them as optional .aq/.rl suffixes 322 * on the mnemonic. For now mnemonics arrive bare. */ 323 static u32 enc_amo(u32 match, u32 aq, u32 rl, u32 rd, u32 rs1, u32 rs2) { 324 return match | ((aq & 1u) << 26) | ((rl & 1u) << 25) | ((rs2 & 0x1fu) << 20) | 325 ((rs1 & 0x1fu) << 15) | ((rd & 0x1fu) << 7); 326 } 327 328 static u32 c_reg3(AsmDriver* d, u32 r) { 329 if (r < 8u || r > 15u) 330 asm_driver_panic(d, 331 "rv64 asm: compressed register must be x8..x15/f8..f15"); 332 return r - 8u; 333 } 334 335 static u32 enc_c_ci(u32 match, u32 rd, i32 imm) { 336 u32 u = (u32)imm & 0x3fu; 337 return match | (((u >> 5) & 1u) << 12) | ((rd & 0x1fu) << 7) | 338 ((u & 0x1fu) << 2); 339 } 340 341 static u32 enc_c_cr(u32 match, u32 rd_rs1, u32 rs2) { 342 return match | ((rd_rs1 & 0x1fu) << 7) | ((rs2 & 0x1fu) << 2); 343 } 344 345 static u32 enc_c_addi16sp(u32 match, i32 imm) { 346 u32 u = (u32)imm & 0x3ffu; 347 return match | (((u >> 9) & 1u) << 12) | (((u >> 4) & 1u) << 6) | 348 (((u >> 6) & 1u) << 5) | (((u >> 7) & 3u) << 3) | 349 (((u >> 5) & 1u) << 2); 350 } 351 352 static u32 enc_c_addi4spn(u32 match, u32 rd3, u32 imm) { 353 u32 enc = (((imm >> 4) & 3u) << 6) | (((imm >> 6) & 0xfu) << 2) | 354 (((imm >> 2) & 1u) << 1) | ((imm >> 3) & 1u); 355 return match | ((enc & 0xffu) << 5) | ((rd3 & 7u) << 2); 356 } 357 358 static u32 enc_c_lwld(u32 match, u32 rd3, u32 rs1_3, u32 off, int wide64) { 359 if (wide64) { 360 return match | (((off >> 3) & 7u) << 10) | ((rs1_3 & 7u) << 7) | 361 (((off >> 6) & 3u) << 5) | ((rd3 & 7u) << 2); 362 } 363 return match | (((off >> 3) & 7u) << 10) | ((rs1_3 & 7u) << 7) | 364 (((off >> 2) & 1u) << 6) | (((off >> 6) & 1u) << 5) | 365 ((rd3 & 7u) << 2); 366 } 367 368 static u32 enc_c_swld(u32 match, u32 rs2_3, u32 rs1_3, u32 off, int wide64) { 369 return enc_c_lwld(match, rs2_3, rs1_3, off, wide64); 370 } 371 372 static u32 enc_c_lwsp(u32 match, u32 rd, u32 off, int wide64) { 373 if (wide64) { 374 return match | (((off >> 5) & 1u) << 12) | ((rd & 0x1fu) << 7) | 375 (((off >> 3) & 3u) << 5) | (((off >> 6) & 7u) << 2); 376 } 377 return match | (((off >> 5) & 1u) << 12) | ((rd & 0x1fu) << 7) | 378 (((off >> 2) & 7u) << 4) | (((off >> 6) & 3u) << 2); 379 } 380 381 static u32 enc_c_swsp(u32 match, u32 rs2, u32 off, int wide64) { 382 u32 imm6; 383 if (wide64) 384 imm6 = (((off >> 3) & 7u) << 3) | ((off >> 6) & 7u); 385 else 386 imm6 = (((off >> 2) & 0xfu) << 2) | ((off >> 6) & 3u); 387 return match | ((imm6 & 0x3fu) << 7) | ((rs2 & 0x1fu) << 2); 388 } 389 390 static u32 enc_c_cb_imm(u32 match, u32 rs1_3, i32 imm) { 391 u32 u = (u32)imm & 0x1ffu; 392 return match | (((u >> 8) & 1u) << 12) | (((u >> 3) & 3u) << 10) | 393 ((rs1_3 & 7u) << 7) | (((u >> 6) & 3u) << 5) | (((u >> 1) & 3u) << 3) | 394 (((u >> 5) & 1u) << 2); 395 } 396 397 static u32 enc_c_cb_alu_imm(u32 match, u32 rd3, i32 imm) { 398 u32 u = (u32)imm & 0x3fu; 399 return match | (((u >> 5) & 1u) << 12) | ((rd3 & 7u) << 7) | 400 ((u & 0x1fu) << 2); 401 } 402 403 static u32 enc_c_cj(u32 match, i32 imm) { 404 u32 u = (u32)imm & 0xfffu; 405 return match | (((u >> 11) & 1u) << 12) | (((u >> 4) & 1u) << 11) | 406 (((u >> 8) & 3u) << 9) | (((u >> 10) & 1u) << 8) | 407 (((u >> 6) & 1u) << 7) | (((u >> 7) & 1u) << 6) | 408 (((u >> 1) & 7u) << 3) | (((u >> 5) & 1u) << 2); 409 } 410 411 /* Parse a branch/jump target operand. With a symbolic target (a label), emit 412 * the relocation at the current position — which is exactly where the caller 413 * is about to write this instruction word — and return 0 as the placeholder 414 * immediate. With a bare constant, return it as the PC-relative byte 415 * displacement (preserving the existing numeric-offset corpus behavior). */ 416 static i32 rv_reloc_target(AsmDriver* d, RelocKind kind) { 417 ObjSymId sym = OBJ_SYM_NONE; 418 i64 off = 0; 419 asm_driver_parse_sym_expr(d, &sym, &off); 420 if (sym != OBJ_SYM_NONE) { 421 MCEmitter* mc = asm_driver_mc(d); 422 mc->emit_reloc_at(mc, mc->section_id, mc->pos(mc), kind, sym, off, 0, 0); 423 return 0; 424 } 425 return (i32)off; 426 } 427 428 /* Per-format parser — reads the operand list off the driver and returns 429 * the encoded 32-bit word, given the matched descriptor. */ 430 static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) { 431 u32 m = desc->match; 432 u32 rd = 0, rs1 = 0, rs2 = 0; 433 i32 imm = 0; 434 Rv64Mem mem; 435 436 switch ((Rv64Format)desc->fmt) { 437 case RV64_FMT_R: 438 /* Two-operand aliases: snez/neg/negw — rd, rs (rs1=x0). */ 439 if (desc->flags & RV64_ASMFL_ALIAS) { 440 rd = parse_xreg(d); 441 expect_comma(d); 442 rs2 = parse_xreg(d); 443 return enc_r(m, rd, 0u, rs2); 444 } 445 rd = parse_xreg(d); 446 expect_comma(d); 447 rs1 = parse_xreg(d); 448 expect_comma(d); 449 rs2 = parse_xreg(d); 450 return enc_r(m, rd, rs1, rs2); 451 452 case RV64_FMT_R4: { 453 u32 rs3; 454 rd = parse_freg(d); 455 expect_comma(d); 456 rs1 = parse_freg(d); 457 expect_comma(d); 458 rs2 = parse_freg(d); 459 expect_comma(d); 460 rs3 = parse_freg(d); 461 return enc_r4(m, rd, rs1, rs2, rs3, 0x7u); 462 } 463 464 case RV64_FMT_I: 465 /* Aliases first. `li` is handled earlier by rv64_emit_pseudo (it may 466 * need a multi-word expansion), so it never reaches here. */ 467 if (desc->flags & RV64_ASMFL_ALIAS) { 468 if (slice_eq_cstr(desc->mnemonic, "mv")) { 469 /* Standard two-operand `mv rd, rs` = `addi rd, rs, 0`. (A %pcrel_lo 470 * low-half is emitted as the canonical `addi rd, rs, %pcrel_lo(L)`, 471 * not a non-standard 3-operand `mv`, so it lands in the ADDI path 472 * below — matching clang.) */ 473 rd = parse_xreg(d); 474 expect_comma(d); 475 rs1 = parse_xreg(d); 476 return enc_i(m, rd, rs1, 0); 477 } 478 if (slice_eq_cstr(desc->mnemonic, "sext.w")) { 479 rd = parse_xreg(d); 480 expect_comma(d); 481 rs1 = parse_xreg(d); 482 return enc_i(m, rd, rs1, 0); 483 } 484 if (slice_eq_cstr(desc->mnemonic, "seqz") || 485 slice_eq_cstr(desc->mnemonic, "not")) { 486 rd = parse_xreg(d); 487 expect_comma(d); 488 rs1 = parse_xreg(d); 489 /* match already has imm12 + funct3 + op pinned. */ 490 return m | ((rs1 & 0x1fu) << 15) | ((rd & 0x1fu) << 7); 491 } 492 } 493 rd = parse_xreg(d); 494 expect_comma(d); 495 rs1 = parse_xreg(d); 496 expect_comma(d); 497 /* `addi rd, rs1, %lo(sym)` / `%pcrel_lo(label)` → R_RV_LO12_I. */ 498 if (rv_emit_imm_mod_reloc(d, RV_MODPOS_LO_I)) return enc_i(m, rd, rs1, 0); 499 imm = (i32)asm_driver_parse_const(d); 500 return enc_i(m, rd, rs1, imm); 501 502 case RV64_FMT_I_SHIFT: 503 rd = parse_xreg(d); 504 expect_comma(d); 505 rs1 = parse_xreg(d); 506 expect_comma(d); 507 return enc_ishift(m, rd, rs1, (u32)asm_driver_parse_const(d)); 508 509 case RV64_FMT_I_SHIFTW: 510 rd = parse_xreg(d); 511 expect_comma(d); 512 rs1 = parse_xreg(d); 513 expect_comma(d); 514 return enc_ishiftw(m, rd, rs1, (u32)asm_driver_parse_const(d)); 515 516 case RV64_FMT_U: 517 rd = parse_xreg(d); 518 expect_comma(d); 519 /* `lui rd, %hi(sym)` → R_RV_HI20; `auipc rd, %pcrel_hi(sym)` → 520 * R_RV_PCREL_HI20 (or %got_pcrel_hi → R_RV_GOT_HI20). */ 521 if (rv_emit_imm_mod_reloc(d, RV_MODPOS_HI20)) return enc_u(m, rd, 0); 522 imm = (i32)asm_driver_parse_const(d); 523 /* LUI/AUIPC immediate is the upper-20 value: the input is interpreted 524 * as the literal 20-bit value (already shifted-out form). */ 525 return enc_u(m, rd, (u32)imm); 526 527 case RV64_FMT_J: 528 /* `j label` / `jal rd, label` accept a symbolic target (R_RV_JAL) or a 529 * bare numeric displacement. */ 530 if ((desc->flags & RV64_ASMFL_ALIAS) && 531 slice_eq_cstr(desc->mnemonic, "j")) { 532 return enc_j(m, 0u, rv_reloc_target(d, R_RV_JAL)); 533 } 534 rd = parse_xreg(d); 535 expect_comma(d); 536 return enc_j(m, rd, rv_reloc_target(d, R_RV_JAL)); 537 538 case RV64_FMT_B: 539 /* `beq rs1, rs2, label` (and beqz/bnez aliases) accept a symbolic target 540 * (R_RV_BRANCH) or a bare numeric displacement. */ 541 if (desc->flags & RV64_ASMFL_ALIAS) { 542 /* beqz / bnez: rs, off. */ 543 rs1 = parse_xreg(d); 544 expect_comma(d); 545 return enc_b(m, rs1, 0u, rv_reloc_target(d, R_RV_BRANCH)); 546 } 547 rs1 = parse_xreg(d); 548 expect_comma(d); 549 rs2 = parse_xreg(d); 550 expect_comma(d); 551 return enc_b(m, rs1, rs2, rv_reloc_target(d, R_RV_BRANCH)); 552 553 case RV64_FMT_LOAD: 554 rd = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d); 555 expect_comma(d); 556 mem = parse_mem(d); 557 rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/0); 558 return enc_i(m, rd, mem.base, mem.disp); 559 560 case RV64_FMT_FP_LOAD: 561 rd = parse_freg(d); 562 expect_comma(d); 563 mem = parse_mem(d); 564 rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/0); 565 return enc_i(m, rd, mem.base, mem.disp); 566 567 case RV64_FMT_STORE: 568 rs2 = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d); 569 expect_comma(d); 570 mem = parse_mem(d); 571 rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/1); 572 return enc_s(m, rs2, mem.base, mem.disp); 573 574 case RV64_FMT_FP_STORE: 575 rs2 = parse_freg(d); 576 expect_comma(d); 577 mem = parse_mem(d); 578 rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/1); 579 return enc_s(m, rs2, mem.base, mem.disp); 580 581 case RV64_FMT_JALR: 582 if ((desc->flags & RV64_ASMFL_ALIAS) && 583 slice_eq_cstr(desc->mnemonic, "jr")) { 584 rs1 = parse_xreg(d); 585 return enc_i(m, 0u, rs1, 0); 586 } 587 rd = parse_xreg(d); 588 if (!asm_driver_eat_comma(d)) { 589 if (slice_eq_cstr(desc->mnemonic, "jalr")) 590 return enc_i(m, RV_RA, rd, 0); 591 asm_driver_panic(d, "rv64 asm: expected ','"); 592 } 593 /* Accept both `jalr rd, imm(rs1)` and `jalr rd, rs1, imm`. */ 594 { 595 AsmTok t = asm_driver_peek(d); 596 if (t.kind == ASM_TOK_IDENT) { 597 /* register first → register form */ 598 rs1 = parse_xreg(d); 599 if (asm_driver_eat_comma(d)) { 600 imm = (i32)asm_driver_parse_const(d); 601 } else { 602 imm = 0; 603 } 604 return enc_i(m, rd, rs1, imm); 605 } 606 } 607 mem = parse_mem(d); 608 return enc_i(m, rd, mem.base, mem.disp); 609 610 case RV64_FMT_FENCE: { 611 u32 pred, succ; 612 pred = parse_fence_mask(d); 613 expect_comma(d); 614 succ = parse_fence_mask(d); 615 return m | (pred << 24) | (succ << 20); 616 } 617 618 case RV64_FMT_SYSTEM: 619 /* No operands. nop/ret/ecall/ebreak. */ 620 return m; 621 622 case RV64_FMT_FP_RM: 623 rd = parse_freg(d); 624 expect_comma(d); 625 rs1 = parse_freg(d); 626 expect_comma(d); 627 rs2 = parse_freg(d); 628 /* Use DYN(=7) rounding mode by default. */ 629 return enc_r(m | (0x7u << 12), rd, rs1, rs2); 630 631 case RV64_FMT_FP_R: 632 if (desc->flags & RV64_ASMFL_FP) { 633 rd = parse_freg(d); 634 } else { 635 rd = parse_xreg(d); 636 } 637 expect_comma(d); 638 rs1 = parse_freg(d); 639 expect_comma(d); 640 rs2 = parse_freg(d); 641 return enc_r(m, rd, rs1, rs2); 642 643 case RV64_FMT_FP_CVT: 644 if (desc->flags & RV64_ASMFL_FP) { 645 rd = parse_freg(d); 646 expect_comma(d); 647 /* Source: integer reg for fcvt.s.w etc (no FP flag would 648 * indicate); but since we have ASMFL_FP set on dest, source may 649 * be either. Disambiguate by mnemonic. */ 650 if (slice_has_prefix_cstr(desc->mnemonic, "fcvt.s.", 7) && 651 (desc->mnemonic.s[7] == 'w' || desc->mnemonic.s[7] == 'l')) { 652 rs1 = parse_xreg(d); 653 } else if (slice_has_prefix_cstr(desc->mnemonic, "fcvt.d.", 7) && 654 (desc->mnemonic.s[7] == 'w' || desc->mnemonic.s[7] == 'l')) { 655 rs1 = parse_xreg(d); 656 } else if (slice_eq_cstr(desc->mnemonic, "fmv.w.x") || 657 slice_eq_cstr(desc->mnemonic, "fmv.d.x")) { 658 rs1 = parse_xreg(d); 659 } else { 660 rs1 = parse_freg(d); 661 } 662 } else { 663 rd = parse_xreg(d); 664 expect_comma(d); 665 rs1 = parse_freg(d); 666 } 667 /* match encodes rs2 (type selector); OR in rd/rs1 and the rounding mode. 668 * An explicit `, <rm>` suffix (cc -S emits it for non-default modes, and 669 * clang/gas accept it) takes precedence; otherwise the rm is fixed per 670 * conversion family (mirrors the rv_fcvt_* encoders in isa.h, the codegen 671 * source of truth): fp->int truncates (RTZ=1); int->fp and fp->fp use the 672 * default DYN=7; fmv bit-moves carry no rounding (rm=0). */ 673 { 674 u32 funct7 = (m >> 25) & 0x7fu; 675 u32 rm; 676 if (asm_driver_eat_comma(d)) { 677 rm = rv_parse_rm_name(d); 678 } else { 679 switch (funct7) { 680 case 0x60: /* fcvt.{w,wu,l,lu}.s */ 681 case 0x61: /* fcvt.{w,wu,l,lu}.d */ 682 rm = 0x1u; /* RTZ */ 683 break; 684 case 0x70: /* fmv.x.w */ 685 case 0x71: /* fmv.x.d */ 686 case 0x78: /* fmv.w.x */ 687 case 0x79: /* fmv.d.x */ 688 rm = 0x0u; 689 break; 690 default: /* int->fp (0x68/0x69) and fp<->fp (0x20/0x21): DYN */ 691 rm = 0x7u; 692 break; 693 } 694 } 695 return m | (rm << 12) | ((rs1 & 0x1fu) << 15) | ((rd & 0x1fu) << 7); 696 } 697 698 case RV64_FMT_AMO: 699 rd = parse_xreg(d); 700 expect_comma(d); 701 rs2 = parse_xreg(d); 702 expect_comma(d); 703 asm_driver_expect_punct(d, '(', "'(' in rv64 amo operand"); 704 rs1 = parse_xreg(d); 705 asm_driver_expect_punct(d, ')', "')' in rv64 amo operand"); 706 return enc_amo(m, 0u, 0u, rd, rs1, rs2); 707 708 case RV64_FMT_LR: 709 rd = parse_xreg(d); 710 expect_comma(d); 711 asm_driver_expect_punct(d, '(', "'(' in rv64 lr operand"); 712 rs1 = parse_xreg(d); 713 asm_driver_expect_punct(d, ')', "')' in rv64 lr operand"); 714 return enc_amo(m, 0u, 0u, rd, rs1, 0u); 715 716 case RV64_FMT_CSR: { 717 i32 csr; 718 rd = parse_xreg(d); 719 expect_comma(d); 720 csr = (i32)asm_driver_parse_const(d); 721 expect_comma(d); 722 rs1 = parse_xreg(d); 723 return enc_i(m, rd, rs1, csr); 724 } 725 726 case RV64_FMT_CSRI: { 727 i32 csr; 728 rd = parse_xreg(d); 729 expect_comma(d); 730 csr = (i32)asm_driver_parse_const(d); 731 expect_comma(d); 732 u32 uimm = (u32)asm_driver_parse_const(d) & 0x1fu; 733 return enc_i(m, rd, uimm, csr); 734 } 735 736 case RV64_FMT_CR: 737 if (slice_eq_cstr(desc->mnemonic, "c.jr") || 738 slice_eq_cstr(desc->mnemonic, "c.jalr")) { 739 rs1 = parse_xreg(d); 740 return enc_c_cr(m, rs1, 0u); 741 } 742 rd = parse_xreg(d); 743 expect_comma(d); 744 rs2 = parse_xreg(d); 745 return enc_c_cr(m, rd, rs2); 746 747 case RV64_FMT_CI: 748 if (slice_eq_cstr(desc->mnemonic, "c.lwsp") || 749 slice_eq_cstr(desc->mnemonic, "c.ldsp") || 750 slice_eq_cstr(desc->mnemonic, "c.fldsp")) { 751 rd = slice_eq_cstr(desc->mnemonic, "c.fldsp") ? parse_freg(d) 752 : parse_xreg(d); 753 expect_comma(d); 754 mem = parse_mem(d); 755 if (mem.base != RV_SP) 756 asm_driver_panic(d, "rv64 asm: compressed stack load needs sp base"); 757 return enc_c_lwsp(m, rd, (u32)mem.disp, 758 !slice_eq_cstr(desc->mnemonic, "c.lwsp")); 759 } 760 rd = parse_xreg(d); 761 expect_comma(d); 762 imm = (i32)asm_driver_parse_const(d); 763 if (slice_eq_cstr(desc->mnemonic, "c.lui") && ((u32)imm & 0xfffu) == 0) 764 imm >>= 12; 765 if (slice_eq_cstr(desc->mnemonic, "c.addi16sp")) { 766 if (rd != RV_SP) 767 asm_driver_panic(d, "rv64 asm: c.addi16sp needs sp destination"); 768 return enc_c_addi16sp(m, imm); 769 } 770 return enc_c_ci(m, rd, imm); 771 772 case RV64_FMT_CSS: 773 rs2 = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d); 774 expect_comma(d); 775 mem = parse_mem(d); 776 if (mem.base != RV_SP) 777 asm_driver_panic(d, "rv64 asm: compressed stack store needs sp base"); 778 return enc_c_swsp(m, rs2, (u32)mem.disp, 779 !slice_eq_cstr(desc->mnemonic, "c.swsp")); 780 781 case RV64_FMT_CIW: 782 rd = parse_xreg(d); 783 expect_comma(d); 784 rs1 = parse_xreg(d); 785 expect_comma(d); 786 if (rs1 != RV_SP) 787 asm_driver_panic(d, "rv64 asm: c.addi4spn needs sp source"); 788 imm = (i32)asm_driver_parse_const(d); 789 return enc_c_addi4spn(m, c_reg3(d, rd), (u32)imm); 790 791 case RV64_FMT_CL: 792 rd = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d); 793 expect_comma(d); 794 mem = parse_mem(d); 795 return enc_c_lwld(m, c_reg3(d, rd), c_reg3(d, mem.base), (u32)mem.disp, 796 !slice_eq_cstr(desc->mnemonic, "c.lw")); 797 798 case RV64_FMT_CS: 799 rs2 = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d); 800 expect_comma(d); 801 mem = parse_mem(d); 802 return enc_c_swld(m, c_reg3(d, rs2), c_reg3(d, mem.base), (u32)mem.disp, 803 !slice_eq_cstr(desc->mnemonic, "c.sw")); 804 805 case RV64_FMT_CA: 806 rd = parse_xreg(d); 807 expect_comma(d); 808 rs2 = parse_xreg(d); 809 return m | (c_reg3(d, rd) << 7) | (c_reg3(d, rs2) << 2); 810 811 case RV64_FMT_CB: 812 rs1 = parse_xreg(d); 813 expect_comma(d); 814 imm = (i32)asm_driver_parse_const(d); 815 if (slice_eq_cstr(desc->mnemonic, "c.beqz") || 816 slice_eq_cstr(desc->mnemonic, "c.bnez")) { 817 return enc_c_cb_imm(m, c_reg3(d, rs1), imm); 818 } 819 return enc_c_cb_alu_imm(m, c_reg3(d, rs1), imm); 820 821 case RV64_FMT_CJ: 822 imm = (i32)asm_driver_parse_const(d); 823 return enc_c_cj(m, imm); 824 825 case RV64_FMT_C_NONE: 826 return m; 827 828 default: 829 asm_driver_panic(d, "rv64 asm: unsupported format"); 830 } 831 } 832 833 /* ============================================================ 834 * Multi-word pseudo-instruction expansion. 835 * 836 * call/tail/la/lla expand to a PC-relative AUIPC + (JALR | ADDI) pair; 837 * `li` with a constant that does not fit a 12-bit signed immediate 838 * expands to an LUI/ADDI(W)/SLLI chain (no relocations). Each 32-bit 839 * word goes out through rv64_emit32 — the same path assemble_one's 840 * single-word result uses — and relocations are attached via 841 * mc->emit_reloc_at at the appropriate word offset. */ 842 843 /* 12-bit signed immediate range check for li short-circuit. */ 844 static bool rv_fits_i12(i64 v) { return v >= -2048 && v <= 2047; } 845 846 /* Sign-extend the low 12 bits of v. */ 847 static i64 rv_sext12(i64 v) { 848 return (i64)((((u64)v & 0xfffu) ^ 0x800u)) - 0x800; 849 } 850 851 /* Emit an AUIPC rd,0 + a R_RV_PCREL_HI20(sym) reloc, then create a local 852 * `.LpcrelHi` anchor at the AUIPC offset and return that anchor symbol so 853 * the paired low-half reloc can reference it. Mirrors native.c's 854 * rv_emit_global_addr (the non-GOT branch). */ 855 static ObjSymId rv_emit_pcrel_hi(AsmDriver* d, u32 rd, ObjSymId sym, 856 i64 addend) { 857 MCEmitter* mc = asm_driver_mc(d); 858 ObjBuilder* obj = asm_driver_ob(d); 859 Compiler* c = asm_driver_compiler(d); 860 u32 sec = mc->section_id; 861 u32 ap = mc->pos(mc); 862 rv64_emit32(mc, rv_auipc(rd, 0)); 863 mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, addend, 0, 0); 864 Sym an = pool_intern_slice(c->global, SLICE_LIT(".LpcrelHi")); 865 return obj_symbol(obj, an, SB_LOCAL, SK_OBJ, sec, (u64)ap, 0); 866 } 867 868 /* call/tail: AUIPC <link>,0 + JALR <rd>,<link>,0 with one R_RV_CALL reloc 869 * at the AUIPC. `link` is the register the AUIPC materializes into and the 870 * JALR's base; `rd` is the JALR link-register (ra for call, zero for 871 * tail). The linker patches both words from the single R_RV_CALL reloc. */ 872 static void rv_emit_call_pseudo(AsmDriver* d, u32 link, u32 rd) { 873 MCEmitter* mc = asm_driver_mc(d); 874 ObjSymId sym = OBJ_SYM_NONE; 875 i64 off = 0; 876 asm_driver_parse_sym_expr(d, &sym, &off); 877 if (sym == OBJ_SYM_NONE) 878 asm_driver_panic(d, "rv64 asm: call/tail target must be a symbol"); 879 u32 sec = mc->section_id; 880 u32 ap = mc->pos(mc); 881 rv64_emit32(mc, rv_auipc(link, 0)); 882 rv64_emit32(mc, rv_jalr(rd, link, 0)); 883 mc->emit_reloc_at(mc, sec, ap, R_RV_CALL, sym, off, 0, 0); 884 } 885 886 /* la/lla rd, sym: AUIPC rd,%pcrel_hi(sym) + ADDI rd,rd,%pcrel_lo(anchor). 887 * kit's static Local-Exec model has no GOT, so `la` == `lla`. */ 888 static void rv_emit_la_pseudo(AsmDriver* d) { 889 MCEmitter* mc = asm_driver_mc(d); 890 u32 rd = parse_xreg(d); 891 expect_comma(d); 892 ObjSymId sym = OBJ_SYM_NONE; 893 i64 off = 0; 894 asm_driver_parse_sym_expr(d, &sym, &off); 895 if (sym == OBJ_SYM_NONE) 896 asm_driver_panic(d, "rv64 asm: la/lla target must be a symbol"); 897 ObjSymId anchor = rv_emit_pcrel_hi(d, rd, sym, off); 898 u32 sec = mc->section_id; 899 u32 lp = mc->pos(mc); 900 rv64_emit32(mc, rv_addi(rd, rd, 0)); 901 mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0); 902 } 903 904 /* LUI immediate that sign-extends to a negative 32-bit value: bit 19 of 905 * the 20-bit field is set, i.e. Hi20 >= 0x80000. */ 906 #define RV_LUI_HI20_SIGN 0x80000LL 907 908 /* Materialize a 64-bit constant into `rd` via the LLVM RISCVMatInt 909 * sequence: for values fitting a signed 32-bit range, LUI + ADDI/ADDIW; 910 * otherwise a recursive top-down hi20/lo12 split with SLLI shifts that 911 * absorb trailing zeros. No relocations. 912 * 913 * After an LUI, the low-half add uses ADDIW only when the LUI value is 914 * negative in 32-bit form (Hi20 >= RV_LUI_HI20_SIGN): there the add must 915 * wrap in 32-bit arithmetic and re-sign-extend to land in range. When the 916 * LUI value is non-negative in its low 32 bits, plain ADDI keeps the 917 * 64-bit result correct (matching LLVM's generateInstSeqImpl). */ 918 static void rv_emit_li_value(MCEmitter* mc, u32 rd, i64 val) { 919 if (val >= -2147483648LL && val <= 2147483647LL) { 920 i64 hi20 = ((val + 0x800) >> 12) & 0xfffffLL; 921 i64 lo12 = rv_sext12(val); 922 if (hi20) rv64_emit32(mc, rv_lui(rd, (u32)hi20)); 923 if (lo12 || hi20 == 0) { 924 u32 src = hi20 ? rd : (u32)RV_ZERO; 925 if (hi20 >= RV_LUI_HI20_SIGN) 926 rv64_emit32(mc, rv_addiw(rd, src, (i32)lo12)); 927 else 928 rv64_emit32(mc, rv_addi(rd, src, (i32)lo12)); 929 } 930 return; 931 } 932 /* >32-bit: split off the low 12 bits, recurse on the (shifted) high 933 * part, then SLLI back and ADD the low bits. The subtraction is done in 934 * unsigned space so it cannot signed-overflow at the int64 extremes 935 * (e.g. val=INT64_MAX, lo12=-1); the result has its low 12 bits clear, 936 * and the arithmetic right shift recovers the sign-extended high part. */ 937 i64 lo12 = rv_sext12(val); 938 i64 hi = (i64)((u64)val - (u64)lo12) >> 12; 939 u32 shift = 12; 940 /* Absorb trailing zeros of the high part into the shift amount. */ 941 while ((hi & 1) == 0) { 942 hi >>= 1; 943 ++shift; 944 } 945 rv_emit_li_value(mc, rd, hi); 946 rv64_emit32(mc, rv_slli(rd, rd, shift)); 947 if (lo12) rv64_emit32(mc, rv_addi(rd, rd, (i32)lo12)); 948 } 949 950 /* Dispatch a multi-word pseudo. Returns true if it consumed the operands 951 * and emitted its expansion; false to fall through to the single-word 952 * path. `li` is handled here only when its immediate exceeds the 12-bit 953 * signed range the alias row encodes directly. */ 954 static bool rv64_emit_pseudo(AsmDriver* d, const Rv64InsnDesc* desc) { 955 MCEmitter* mc = asm_driver_mc(d); 956 if (desc->fmt == RV64_FMT_PSEUDO) { 957 if (slice_eq_cstr(desc->mnemonic, "call")) { 958 rv_emit_call_pseudo(d, RV_RA, RV_RA); 959 return true; 960 } 961 if (slice_eq_cstr(desc->mnemonic, "tail")) { 962 /* Standard RISC-V `tail` materializes the address into t1 (x6). kit 963 * codegen uses t0 for its own tail-call temp, so a `cc -S`-fused 964 * `tail sym` re-assembles to t1 not t0 — execution-equivalent (both are 965 * caller-saved temps clobbered by the tail jump; cross-exec still 966 * matches), only the byte image differs on tail-call cases. Keeping the 967 * assembler's `tail` standard preserves clang/gas interop. */ 968 rv_emit_call_pseudo(d, RV_T1, RV_ZERO); 969 return true; 970 } 971 /* la / lla — identical PC-relative expansion in kit. */ 972 rv_emit_la_pseudo(d); 973 return true; 974 } 975 if ((desc->flags & RV64_ASMFL_ALIAS) && slice_eq_cstr(desc->mnemonic, "li")) { 976 /* Peek the immediate without consuming the destination register: the 977 * single-word alias path re-parses both. We commit to the multi-word 978 * path only for out-of-range constants, leaving the existing 12-bit 979 * fast path (and its golden behavior) untouched. */ 980 u32 rd = parse_xreg(d); 981 expect_comma(d); 982 i64 imm = asm_driver_parse_const(d); 983 if (rv_fits_i12(imm)) { 984 rv64_emit32(mc, rv_addi(rd, RV_ZERO, (i32)imm)); 985 } else { 986 rv_emit_li_value(mc, rd, imm); 987 } 988 return true; 989 } 990 return false; 991 } 992 993 static void rv64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) { 994 MCEmitter* mc = asm_driver_mc(d); 995 const Rv64InsnDesc* desc; 996 (void)base; 997 (void)asm_driver_cur_section(d); 998 desc = rv64_asm_find(pool_slice(asm_driver_pool(d), mnemonic)); 999 if (!desc) asm_driver_panic(d, "rv64 asm: unsupported instruction"); 1000 if (rv64_emit_pseudo(d, desc)) return; 1001 if (desc->flags & RV64_ASMFL_C16) 1002 rv64_emit16(mc, assemble_one(d, desc)); 1003 else 1004 rv64_emit32(mc, assemble_one(d, desc)); 1005 } 1006 1007 static void rv64_arch_asm_destroy(ArchAsm* base) { (void)base; } 1008 1009 /* ---- textual-assembly operand syntax (printer <-> parser) ---------------- 1010 * 1011 * Inverse of the `.s` parsers above (rv_parse_mod_reloc / rv_reloc_target and 1012 * the call/la pseudo expanders): how a relocated rv64 operand is spelled in 1013 * `cc -S` so the same text re-assembles under kit-as. RISC-V uses the same 1014 * `%hi`/`%lo`/`%pcrel_hi`/`%pcrel_lo` operator syntax on every object format, 1015 * so `fmt` is unused. See ArchAsmOps and src/api/asm_emit.c. */ 1016 static int rv64_reloc_operand(u16 kind, KitObjFmt fmt, ArchRelocOperand* out) { 1017 (void)fmt; 1018 out->prefix = ""; 1019 out->suffix = ""; 1020 out->addend_bias = 0; 1021 out->emit_anchor = 0; 1022 out->ref_anchor = 0; 1023 switch (kind) { 1024 case R_RV_PCREL_HI20: 1025 out->surg = ARCH_RELOC_SURG_TAIL; 1026 out->prefix = "%pcrel_hi("; 1027 out->suffix = ")"; 1028 out->emit_anchor = 1; /* define a unique anchor label at this AUIPC */ 1029 return 1; 1030 case R_RV_GOT_HI20: 1031 out->surg = ARCH_RELOC_SURG_TAIL; 1032 out->prefix = "%got_pcrel_hi("; 1033 out->suffix = ")"; 1034 out->emit_anchor = 1; 1035 return 1; 1036 case R_RV_PCREL_LO12_I: 1037 case R_RV_PCREL_LO12_S: 1038 out->surg = ARCH_RELOC_SURG_RV_LO12; 1039 out->prefix = "%pcrel_lo("; 1040 out->suffix = ")"; 1041 out->ref_anchor = 1; /* references the preceding AUIPC's anchor label */ 1042 return 1; 1043 case R_RV_HI20: 1044 out->surg = ARCH_RELOC_SURG_TAIL; 1045 out->prefix = "%hi("; 1046 out->suffix = ")"; 1047 return 1; 1048 case R_RV_LO12_I: 1049 case R_RV_LO12_S: 1050 out->surg = ARCH_RELOC_SURG_RV_LO12; 1051 out->prefix = "%lo("; 1052 out->suffix = ")"; 1053 return 1; 1054 case R_RV_BRANCH: 1055 case R_RV_JAL: 1056 out->surg = ARCH_RELOC_SURG_TAIL; 1057 return 1; 1058 default: 1059 return 0; /* R_ABS*, R_RV_RVC_*, R_RV_RELAX, TLS, ... → keep numeric */ 1060 } 1061 } 1062 1063 /* Intra-section local branches whose target codegen resolved in place (no 1064 * relocation): the disassembler renders the target numerically, so cc -S 1065 * synthesizes a label there. `j`/`jal x0` are JAL aliases; the conditional 1066 * branches are B-type. `call`/`tail` are excluded — they carry R_RV_CALL. */ 1067 static int rv64_is_local_branch(KitSlice m) { 1068 if (m.len == 1 && m.s[0] == 'j') return 1; 1069 if (m.len == 3 && memcmp(m.s, "jal", 3) == 0) return 1; 1070 if (m.len == 3 && memcmp(m.s, "beq", 3) == 0) return 1; 1071 if (m.len == 3 && memcmp(m.s, "bne", 3) == 0) return 1; 1072 if (m.len == 3 && memcmp(m.s, "blt", 3) == 0) return 1; 1073 if (m.len == 3 && memcmp(m.s, "bge", 3) == 0) return 1; 1074 if (m.len == 4 && memcmp(m.s, "bltu", 4) == 0) return 1; 1075 if (m.len == 4 && memcmp(m.s, "bgeu", 4) == 0) return 1; 1076 if (m.len == 4 && memcmp(m.s, "beqz", 4) == 0) return 1; 1077 if (m.len == 4 && memcmp(m.s, "bnez", 4) == 0) return 1; 1078 if (m.len == 4 && memcmp(m.s, "blez", 4) == 0) return 1; 1079 if (m.len == 4 && memcmp(m.s, "bgez", 4) == 0) return 1; 1080 if (m.len == 4 && memcmp(m.s, "bltz", 4) == 0) return 1; 1081 if (m.len == 4 && memcmp(m.s, "bgtz", 4) == 0) return 1; 1082 if (m.len == 6 && memcmp(m.s, "c.beqz", 6) == 0) return 1; 1083 if (m.len == 6 && memcmp(m.s, "c.bnez", 6) == 0) return 1; 1084 if (m.len == 3 && memcmp(m.s, "c.j", 3) == 0) return 1; 1085 return 0; 1086 } 1087 1088 /* R_RV_CALL fuses an AUIPC+JALR pair into a single `call`/`tail sym` pseudo 1089 * (the canonical `.s` spelling the assembler re-expands to the same pair + 1090 * reloc). The reloc sits on the AUIPC; the JALR partner carries no reloc. A 1091 * tail call links into x0 (the JALR's rd is `zero`); a regular call links into 1092 * ra. We read that from the partner JALR's disassembled text. */ 1093 static int rv64_reloc_call_pair(u16 kind, KitSlice pair_mnemonic, 1094 KitSlice pair_ops, const char** mnemonic_out) { 1095 if (kind != R_RV_CALL) return 0; 1096 /* The partner JALR links into ra (regular call) or x0 (tail). The 1097 * disassembler renders the x0-link, zero-immediate form as the `jr rs` 1098 * alias, and the ra form as `jalr ra, 0(ra)`. So a `jr` partner is always a 1099 * tail; a `jalr` partner is a tail iff its link register is `zero`. */ 1100 if (pair_mnemonic.len == 2 && memcmp(pair_mnemonic.s, "jr", 2) == 0) { 1101 *mnemonic_out = "tail"; 1102 return 1; 1103 } 1104 if (pair_mnemonic.len == 4 && memcmp(pair_mnemonic.s, "jalr", 4) == 0) { 1105 if (pair_ops.len >= 4 && memcmp(pair_ops.s, "zero", 4) == 0) 1106 *mnemonic_out = "tail"; 1107 else 1108 *mnemonic_out = "call"; 1109 return 1; 1110 } 1111 return 0; 1112 } 1113 1114 const ArchAsmOps rv64_asm_ops = { 1115 .reloc_operand = rv64_reloc_operand, 1116 .is_local_branch = rv64_is_local_branch, 1117 .reloc_call_pair = rv64_reloc_call_pair, 1118 }; 1119 1120 ArchAsm* rv64_arch_asm_new(Compiler* c) { 1121 Rv64Asm* a = arena_new(c->tu, Rv64Asm); 1122 memset(a, 0, sizeof *a); 1123 a->base.insn = rv64_arch_asm_insn; 1124 a->base.destroy = rv64_arch_asm_destroy; 1125 a->c = c; 1126 return &a->base; 1127 } 1128 1129 /* ============================================================ 1130 * Inline-asm template walker (parallel to aa64 asm.c §"inline-asm 1131 * template walker"). The walker substitutes %N / %[name] / %% / %a%w%x 1132 * placeholders into a per-line StrBuf, then re-lexes each line through 1133 * rv64_arch_asm_insn for assembly. Statement separators recognised are 1134 * '\n' and ';' (outside parens / quoted strings). 1135 * ============================================================ */ 1136 1137 Rv64Asm* rv64_asm_open(Compiler* c) { 1138 Rv64Asm* a = arena_new(c->tu, Rv64Asm); 1139 memset(a, 0, sizeof *a); 1140 a->base.insn = rv64_arch_asm_insn; 1141 a->base.destroy = rv64_arch_asm_destroy; 1142 a->c = c; 1143 return a; 1144 } 1145 1146 void rv64_asm_close(Rv64Asm* a) { (void)a; } 1147 1148 void rv64_inline_bind(Rv64Asm* a, const AsmConstraint* outs, u32 nout, 1149 Operand* out_ops, const AsmConstraint* ins, u32 nin, 1150 const Operand* in_ops, const Sym* clobbers, u32 nclob) { 1151 a->outs = outs; 1152 a->out_ops = out_ops; 1153 a->ins = ins; 1154 a->in_ops = in_ops; 1155 a->clobbers = clobbers; 1156 a->nout = nout; 1157 a->nin = nin; 1158 a->nclob = nclob; 1159 } 1160 1161 /* Per-line rendered buffer cap. Inline asm rarely emits more than a 1162 * handful of insns per block; one substituted line fits comfortably. 1163 * Truncation panics — the operator grammar should never grow a single 1164 * line beyond this without a deliberate reason. */ 1165 #define RV64_INLINE_LINE_CAP 1024 1166 1167 _Noreturn static void inline_panic(Rv64Asm* a, const char* msg) { 1168 SrcLoc loc = {0, 0, 0}; 1169 compiler_panic(a->c, loc, "rv64 inline asm: %.*s", 1170 SLICE_ARG(slice_from_cstr(msg))); 1171 } 1172 1173 /* Render a 5-bit integer register number using its canonical psABI name. */ 1174 static void render_xreg(StrBuf* sb, u32 reg) { 1175 const char* nm = rv64_register_name(reg & 0x1fu); 1176 if (!nm) { 1177 strbuf_putc(sb, 'x'); 1178 if ((reg & 0x1fu) >= 10u) 1179 strbuf_putc(sb, (char)('0' + ((reg & 0x1fu) / 10u))); 1180 strbuf_putc(sb, (char)('0' + ((reg & 0x1fu) % 10u))); 1181 return; 1182 } 1183 strbuf_puts(sb, nm); 1184 } 1185 1186 /* Render an FP register by its canonical psABI name (e.g., fa0). */ 1187 static void render_freg(StrBuf* sb, u32 reg) { 1188 const char* nm = rv64_register_name(32u + (reg & 0x1fu)); 1189 if (!nm) { 1190 strbuf_putc(sb, 'f'); 1191 if ((reg & 0x1fu) >= 10u) 1192 strbuf_putc(sb, (char)('0' + ((reg & 0x1fu) / 10u))); 1193 strbuf_putc(sb, (char)('0' + ((reg & 0x1fu) % 10u))); 1194 return; 1195 } 1196 strbuf_puts(sb, nm); 1197 } 1198 1199 /* Render a signed 64-bit integer. Inline asm immediates appear bare in 1200 * RISC-V (no '#' prefix), matching the standalone .s parser. */ 1201 static void render_imm(StrBuf* sb, i64 v) { strbuf_put_i64(sb, v); } 1202 1203 /* Render addressing form `disp(base)`. */ 1204 static void render_indirect(Rv64Asm* a, StrBuf* sb, Reg base, i32 ofs) { 1205 (void)a; 1206 if (ofs != 0) 1207 strbuf_put_i64(sb, (i64)ofs); 1208 else 1209 strbuf_putc(sb, '0'); 1210 strbuf_putc(sb, '('); 1211 render_xreg(sb, (u32)base); 1212 strbuf_putc(sb, ')'); 1213 } 1214 1215 /* Resolve operand index → render into sb. form: 1216 * 0 = default (per-kind), 1217 * 1 = %wN (width hint; on rv64 same as default xreg form), 1218 * 2 = %xN (force 64-bit reg form — identical to default for rv64), 1219 * 3 = %aN (memory addressing form). 1220 * 4 = %zN (RISC-V GCC: emits "zero" if operand is imm 0, else reg). */ 1221 static void render_operand(Rv64Asm* a, StrBuf* sb, u32 idx, int form) { 1222 u32 ntot = a->nout + a->nin; 1223 if (idx >= ntot) inline_panic(a, "operand index out of range"); 1224 const Operand* op = 1225 (idx < a->nout) ? &a->out_ops[idx] : &a->in_ops[idx - a->nout]; 1226 switch (form) { 1227 case 1: /* %wN — accept any reg/imm; rv64 has no narrower spelling. */ 1228 case 2: /* %xN — same. */ 1229 if (op->kind == RV64_INLINE_OPK_REG) { 1230 if (op->pad[0] == RV64_INLINE_OPCLS_FP) 1231 render_freg(sb, (u32)op->v.local); 1232 else 1233 render_xreg(sb, (u32)op->v.local); 1234 return; 1235 } 1236 if (op->kind == OPK_IMM) { 1237 render_imm(sb, op->v.imm); 1238 return; 1239 } 1240 inline_panic(a, "%w/%x on unsupported operand kind"); 1241 case 3: /* %aN — memory addressing form */ 1242 if (op->kind != OPK_INDIRECT) inline_panic(a, "%a on non-memory operand"); 1243 if (op->v.ind.index != CG_LOCAL_NONE) 1244 inline_panic(a, 1245 "%a on indexed memory operand: rv64 inline asm " 1246 "requires base+disp only"); 1247 render_indirect(a, sb, (Reg)op->v.ind.base, op->v.ind.ofs); 1248 return; 1249 case 4: /* %zN — zero-or-reg */ 1250 if (op->kind == OPK_IMM && op->v.imm == 0) { 1251 strbuf_puts(sb, "zero"); 1252 return; 1253 } 1254 if (op->kind == RV64_INLINE_OPK_REG) { 1255 if (op->pad[0] == RV64_INLINE_OPCLS_FP) 1256 render_freg(sb, (u32)op->v.local); 1257 else 1258 render_xreg(sb, (u32)op->v.local); 1259 return; 1260 } 1261 inline_panic(a, "%z on unsupported operand kind"); 1262 default: 1263 break; 1264 } 1265 switch (op->kind) { 1266 case RV64_INLINE_OPK_REG: 1267 if (op->pad[0] == RV64_INLINE_OPCLS_FP) 1268 render_freg(sb, (u32)op->v.local); 1269 else 1270 render_xreg(sb, (u32)op->v.local); 1271 return; 1272 case OPK_IMM: 1273 render_imm(sb, op->v.imm); 1274 return; 1275 case OPK_INDIRECT: 1276 if (op->v.ind.index != CG_LOCAL_NONE) 1277 inline_panic(a, 1278 "indexed memory operand in inline asm: rv64 requires " 1279 "base+disp only"); 1280 render_indirect(a, sb, (Reg)op->v.ind.base, op->v.ind.ofs); 1281 return; 1282 default: 1283 inline_panic(a, "unsupported operand kind for %N"); 1284 } 1285 } 1286 1287 /* Resolve a `%[name]` operand by looking up `needle` against the 1288 * constraint.name fields on the combined outs+ins list. Returns the 1289 * combined index, or (u32)-1 on miss. */ 1290 static u32 lookup_named(Rv64Asm* a, Sym needle) { 1291 for (u32 k = 0; k < a->nout; ++k) { 1292 if (a->outs[k].name == needle) return k; 1293 } 1294 for (u32 k = 0; k < a->nin; ++k) { 1295 if (a->ins[k].name == needle) return a->nout + k; 1296 } 1297 return (u32)-1; 1298 } 1299 1300 /* Lex one line of substituted asm and dispatch via rv64_arch_asm_insn. */ 1301 static void run_one_line(Rv64Asm* a, MCEmitter* mc, const char* text, 1302 size_t len) { 1303 /* Skip blank lines. */ 1304 size_t i; 1305 for (i = 0; i < len; ++i) { 1306 if (text[i] != ' ' && text[i] != '\t') break; 1307 } 1308 if (i == len) return; 1309 1310 AsmLexer* lx = asm_lex_open_mem(a->c, "<inline-asm>", text, len); 1311 AsmDriver* d = asm_driver_open_inline(a->c, mc, lx); 1312 1313 /* The first non-trivial token must be the mnemonic identifier. */ 1314 AsmTok t = asm_driver_peek(d); 1315 while (t.kind == ASM_TOK_NEWLINE) { 1316 (void)asm_driver_next(d); 1317 t = asm_driver_peek(d); 1318 } 1319 if (t.kind == ASM_TOK_EOF) { 1320 asm_driver_close_inline(d); 1321 asm_lex_close(lx); 1322 return; 1323 } 1324 if (t.kind != ASM_TOK_IDENT) 1325 inline_panic(a, "expected mnemonic at start of inline asm line"); 1326 (void)asm_driver_next(d); 1327 Sym mn = t.v.ident; 1328 /* Compose `fcvt.s.w` etc. — rv64 has dotted mnemonics; the standalone 1329 * lexer already strings them together as a single IDENT in most paths. 1330 * Mirror the aa64 composite handling for safety. */ 1331 AsmTok dot = asm_driver_peek(d); 1332 while (asm_driver_tok_is_punct(dot, '.')) { 1333 (void)asm_driver_next(d); 1334 AsmTok rest = asm_driver_next(d); 1335 if (rest.kind != ASM_TOK_IDENT) 1336 inline_panic(a, "composite mnemonic: expected ident after '.'"); 1337 Slice hsl = pool_slice(asm_driver_pool(d), mn); 1338 Slice rsl = pool_slice(asm_driver_pool(d), rest.v.ident); 1339 size_t hn = hsl.len, rn = rsl.len; 1340 char buf[64]; 1341 if (hn + 1 + rn >= sizeof buf) 1342 inline_panic(a, "composite mnemonic too long"); 1343 for (size_t k = 0; k < hn; ++k) buf[k] = hsl.s[k]; 1344 buf[hn] = '.'; 1345 for (size_t k = 0; k < rn; ++k) buf[hn + 1 + k] = rsl.s[k]; 1346 mn = pool_intern_slice(asm_driver_pool(d), 1347 (Slice){.s = buf, .len = hn + 1 + rn}); 1348 dot = asm_driver_peek(d); 1349 } 1350 rv64_arch_asm_insn(&a->base, d, mn); 1351 asm_driver_close_inline(d); 1352 asm_lex_close(lx); 1353 } 1354 1355 /* Substitute placeholders into one line's StrBuf, then dispatch. */ 1356 static void render_and_run_line(Rv64Asm* a, MCEmitter* mc, StrBuf* sb, 1357 const char* start, const char* end) { 1358 strbuf_reset(sb); 1359 for (const char* p = start; p < end; ++p) { 1360 char c = *p; 1361 if (c != '%') { 1362 strbuf_putc(sb, c); 1363 continue; 1364 } 1365 /* Placeholder. */ 1366 if (p + 1 >= end) inline_panic(a, "trailing '%' in template"); 1367 char n = *(p + 1); 1368 if (n == '%') { 1369 strbuf_putc(sb, '%'); 1370 ++p; 1371 continue; 1372 } 1373 if (n == '[') { 1374 const char* nbeg = p + 2; 1375 const char* nend = nbeg; 1376 while (nend < end && *nend != ']') ++nend; 1377 if (nend == end) inline_panic(a, "unterminated %[name]"); 1378 size_t nlen = (size_t)(nend - nbeg); 1379 Sym needle = 1380 pool_intern_slice(a->c->global, (Slice){.s = nbeg, .len = nlen}); 1381 u32 idx = lookup_named(a, needle); 1382 if (idx == (u32)-1) 1383 inline_panic(a, "%[name] does not match any constraint"); 1384 p = nend; /* loop's ++p steps past the ']' */ 1385 render_operand(a, sb, idx, 0); 1386 continue; 1387 } 1388 int form = 0; /* 0=default, 1=w, 2=x, 3=a, 4=z */ 1389 if (n == 'w' || n == 'x' || n == 'a' || n == 'z') { 1390 form = (n == 'w') ? 1 : (n == 'x') ? 2 : (n == 'a') ? 3 : 4; 1391 ++p; 1392 if (p + 1 >= end) inline_panic(a, "trailing '%' modifier in template"); 1393 n = *(p + 1); 1394 } 1395 if (n == '[') { 1396 const char* nbeg = p + 2; 1397 const char* nend = nbeg; 1398 while (nend < end && *nend != ']') ++nend; 1399 if (nend == end) inline_panic(a, "unterminated %[name]"); 1400 size_t nlen = (size_t)(nend - nbeg); 1401 Sym needle = 1402 pool_intern_slice(a->c->global, (Slice){.s = nbeg, .len = nlen}); 1403 u32 idx = lookup_named(a, needle); 1404 if (idx == (u32)-1) 1405 inline_panic(a, "%[name] does not match any constraint"); 1406 p = nend; 1407 render_operand(a, sb, idx, form); 1408 continue; 1409 } 1410 if (n < '0' || n > '9') inline_panic(a, "expected digit after '%'"); 1411 u32 idx = (u32)(n - '0'); 1412 ++p; 1413 /* GCC syntax permits up to two digits (%0..%99). */ 1414 if (p + 1 < end && *(p + 1) >= '0' && *(p + 1) <= '9') { 1415 idx = idx * 10 + (u32)(*(p + 1) - '0'); 1416 ++p; 1417 } 1418 render_operand(a, sb, idx, form); 1419 } 1420 if (sb->truncated) inline_panic(a, "inline asm line buffer overflow"); 1421 run_one_line(a, mc, strbuf_cstr(sb), strbuf_len(sb)); 1422 } 1423 1424 void rv64_asm_run_template(Rv64Asm* a, MCEmitter* mc, const char* tmpl) { 1425 if (!tmpl || !*tmpl) return; 1426 1427 char buf[RV64_INLINE_LINE_CAP]; 1428 StrBuf sb; 1429 strbuf_init(&sb, buf, sizeof buf); 1430 1431 /* Walk tmpl, splitting on '\n' and ';'. Track paren depth and quote 1432 * state so that a literal ';' inside `( ... )` (memory operand) or a 1433 * quoted string is not mistaken for a statement separator. RISC-V uses 1434 * `disp(base)` for memory, hence we track parens. */ 1435 const char* line_start = tmpl; 1436 int paren = 0; 1437 char quote = 0; 1438 for (const char* p = tmpl;; ++p) { 1439 char c = *p; 1440 if (c == '\0') { 1441 render_and_run_line(a, mc, &sb, line_start, p); 1442 break; 1443 } 1444 if (quote) { 1445 if (c == '\\' && *(p + 1)) { 1446 ++p; 1447 continue; 1448 } 1449 if (c == quote) quote = 0; 1450 continue; 1451 } 1452 if (c == '"' || c == '\'') { 1453 quote = c; 1454 continue; 1455 } 1456 if (c == '(') { 1457 ++paren; 1458 continue; 1459 } 1460 if (c == ')') { 1461 if (paren) --paren; 1462 continue; 1463 } 1464 if (paren == 0 && (c == '\n' || c == ';')) { 1465 render_and_run_line(a, mc, &sb, line_start, p); 1466 line_start = p + 1; 1467 } 1468 } 1469 }