commit ecab4f793fbd705a1071bc8c3626fa9adf61819d
parent e41b2bb8ab846ea74c1492d6d5c391acbffc723e
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 20:01:44 -0700
asm: FP/SIMD scalar load/store + dot-mangled symbol names
Two assembler completeness gaps surfaced by round-tripping codegen FP and
static-local output:
1. FP/SIMD scalar ldr/str/ldur/stur. p_ldst_core/p_ldur_stur parsed only
GPR transfer registers and hardcoded V=0, so every FP spill the compiler
emits (str d8,[x29,#24]; stur d18,[x29,#-16]; str s0,...) failed to
assemble though the disassembler decoded them. Add a scalar SIMD/FP
transfer-register parser (Bt..Qt) and a shared ldst_encoding() that
derives (size,V,opc,scale) for GPR or FP, threaded through the uimm12 /
reg-offset / writeback / simm9 paths. Unblocks ~25 fp/conv round-trip
cases. (asm_lex change in the same area earlier: .L locals.)
2. Dot-mangled symbol names. A static local is mangled to acc.1; the lexer
split it as acc . 1, so ldr w9,[x16,:lo12:acc.1] failed (expected ]).
Allow a .-then-digit run to continue an identifier (the name.N
discriminator form) — narrow enough to never swallow a .-led mnemonic
suffix (b.eq) or the .size .-foo location dot.
Regression: encode case test/asm/encode/aa64_fp_ldst (scalar d/s/q forms).
asm, ISA-unit, inline-asm, and link corpora green.
Diffstat:
5 files changed, 118 insertions(+), 22 deletions(-)
diff --git a/src/arch/aa64/asm.c b/src/arch/aa64/asm.c
@@ -223,6 +223,41 @@ static int parse_fp_pair_reg_from_ident(AsmDriver* d, Sym ident, AA64Reg* out) {
return 1;
}
+/* Scalar SIMD/FP transfer register for ldr/str/ldur/stur: b/h/s/d/q with the
+ * access width in fp_bytes (1/2/4/8/16). Unlike parse_fp_pair_reg_from_ident
+ * (ldp/stp, d/q only) this accepts the sub-64-bit scalar widths a single-reg
+ * FP load/store can carry. */
+static int parse_fp_scalar_reg_from_ident(AsmDriver* d, Sym ident,
+ AA64Reg* out) {
+ Slice sl = pool_slice(asm_driver_pool(d), ident);
+ const char* p = sl.s;
+ size_t n = sl.len;
+ u8 bytes;
+ u32 r = 0;
+ size_t i;
+ if (!p || n < 2) return 0;
+ switch (p[0]) {
+ case 'b': case 'B': bytes = 1; break;
+ case 'h': case 'H': bytes = 2; break;
+ case 's': case 'S': bytes = 4; break;
+ case 'd': case 'D': bytes = 8; break;
+ case 'q': case 'Q': bytes = 16; break;
+ default: return 0;
+ }
+ for (i = 1; i < n; ++i) {
+ char c = p[i];
+ if (c < '0' || c > '9') return 0;
+ r = r * 10 + (u32)(c - '0');
+ if (r > 31) return 0;
+ }
+ out->num = r;
+ out->is64 = 1;
+ out->is_sp = 0;
+ out->is_fp = 1;
+ out->fp_bytes = bytes;
+ return 1;
+}
+
static AA64Reg parse_reg(AsmDriver* d) {
AsmTok t = asm_driver_next(d);
AA64Reg r;
@@ -232,6 +267,54 @@ static AA64Reg parse_reg(AsmDriver* d) {
return r;
}
+/* ldr/str transfer register: GPR (Wt/Xt) or scalar SIMD/FP (Bt..Qt). */
+static AA64Reg parse_ldst_reg(AsmDriver* d) {
+ AsmTok t = asm_driver_next(d);
+ AA64Reg r;
+ memset(&r, 0, sizeof r);
+ if (t.kind != ASM_TOK_IDENT ||
+ (!parse_reg_from_ident(d, t.v.ident, &r) &&
+ !parse_fp_scalar_reg_from_ident(d, t.v.ident, &r)))
+ asm_driver_panic(d, "asm: ldr/str: expected register");
+ return r;
+}
+
+/* Resolve the (size, V, opc, scale) load/store encoding fields from the
+ * transfer register and mnemonic flavor. GPR width comes from fixed_size (the
+ * sized mnemonics ldrb/ldrsw/…) or the register; FP uses V=1 with size/opc
+ * keyed on the scalar width (b/h/s/d = size 0/1/2/3, opc store=0/load=1; the
+ * 128-bit q is size=0 opc=2/3). `scale` is the byte access width for the
+ * scaled unsigned-imm12 form. */
+typedef struct {
+ u32 size, V, opc, scale;
+} AA64LdStEnc;
+
+static AA64LdStEnc ldst_encoding(AsmDriver* d, AA64Reg rt, int is_load,
+ int fixed_size, int sign_ext) {
+ AA64LdStEnc e;
+ if (rt.is_fp) {
+ if (fixed_size >= 0 || sign_ext)
+ asm_driver_panic(d, "asm: sized/signed ld/st takes a GPR, not an FP reg");
+ e.V = 1;
+ e.scale = rt.fp_bytes;
+ e.size = (rt.fp_bytes == 1) ? 0u
+ : (rt.fp_bytes == 2) ? 1u
+ : (rt.fp_bytes == 4) ? 2u
+ : (rt.fp_bytes == 8) ? 3u
+ : 0u; /* 16 (Q): size=0, opc carries width */
+ e.opc = (rt.fp_bytes == 16) ? (is_load ? 3u : 2u) : (is_load ? 1u : 0u);
+ } else {
+ e.V = 0;
+ e.size = (fixed_size >= 0) ? (u32)fixed_size : (rt.is64 ? 3u : 2u);
+ e.scale = 1u << e.size;
+ e.opc = !is_load ? AA64_LDST_OPC_STR
+ : !sign_ext ? AA64_LDST_OPC_LDR
+ : rt.is64 ? 2u /* LDRS*, 64-bit dst */
+ : 3u; /* LDRS*, 32-bit dst */
+ }
+ return e;
+}
+
static AA64Reg parse_ldstp_reg(AsmDriver* d) {
AsmTok t = asm_driver_next(d);
AA64Reg r;
@@ -1114,25 +1197,22 @@ static AA64Mem parse_mem(AsmDriver* d) {
* 32-bit), keyed on the destination register width. */
static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size,
int sign_ext) {
- AA64Reg rt = parse_reg(d);
+ AA64Reg rt = parse_ldst_reg(d);
reject_sp_reg(d, rt, "ldr/str");
expect_comma(d, "ldr/str");
AA64Mem m = parse_mem(d);
- u32 size = (fixed_size >= 0) ? (u32)fixed_size : (rt.is64 ? 3u : 2u);
- u32 opc = !is_load ? AA64_LDST_OPC_STR
- : !sign_ext ? AA64_LDST_OPC_LDR
- : rt.is64 ? 2u /* LDRS*, 64-bit dst */
- : 3u; /* LDRS*, 32-bit dst */
+ AA64LdStEnc e = ldst_encoding(d, rt, is_load, fixed_size, sign_ext);
+ u32 size = e.size, opc = e.opc, V = e.V;
if (m.reloc_mod != AA64_RELMOD_NONE) {
/* [Xn, :lo12:sym] / [Xn, :got_lo12:sym] — unsigned-imm12 form with a zero
* immediate; the relocation supplies the low 12 bits. :got_lo12: only
* applies to a 64-bit `ldr` (the GOT entry is an 8-byte pointer); llvm-mc
* rejects it on stores, signed loads, and sub-word loads. */
if (m.reloc_mod == AA64_RELMOD_GOT_LO12 &&
- !(size == 3 && opc == AA64_LDST_OPC_LDR))
+ !(V == 0 && size == 3 && opc == AA64_LDST_OPC_LDR))
asm_driver_panic(d, "asm: :got_lo12: only valid on a 64-bit ldr");
u32 word = aa64_ldst_uimm_pack((AA64LdStUimm){.size = size,
- .V = 0,
+ .V = V,
.opc = opc,
.imm12 = 0,
.Rn = m.base.num,
@@ -1159,7 +1239,7 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size,
asm_driver_panic(d, "asm: ldr/str: index shift must be 0 or access size");
}
u32 word = aa64_ldst_regoff_pack((AA64LdStRegOff){.size = size,
- .V = 0,
+ .V = V,
.opc = opc,
.Rm = m.index.num,
.option = m.option,
@@ -1176,7 +1256,7 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size,
u32 imm9 = (u32)((u64)m.imm & 0x1ffu);
u32 idx = m.pre_index ? AA64_LDST_IDX_PRE : AA64_LDST_IDX_POST;
u32 word = aa64_ldst_wback_pack((AA64LdStWBack){.size = size,
- .V = 0,
+ .V = V,
.opc = opc,
.imm9 = imm9,
.idx = idx,
@@ -1187,12 +1267,12 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size,
}
{
/* Try scaled unsigned-imm12 first. */
- u32 scale = 1u << size;
+ u32 scale = e.scale;
if (m.imm >= 0 && (i64)((u64)m.imm % scale) == 0 &&
(u64)m.imm / scale <= 0xfff) {
u32 imm12 = (u32)((u64)m.imm / scale);
u32 word = aa64_ldst_uimm_pack((AA64LdStUimm){.size = size,
- .V = 0,
+ .V = V,
.opc = opc,
.imm12 = imm12,
.Rn = m.base.num,
@@ -1204,7 +1284,7 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size,
if (m.imm >= -256 && m.imm <= 255) {
u32 imm9 = (u32)((u64)m.imm & 0x1ffu);
u32 word = aa64_ldst_simm9_pack((AA64LdStSimm9){.size = size,
- .V = 0,
+ .V = V,
.opc = opc,
.imm9 = imm9,
.Rn = m.base.num,
@@ -1236,21 +1316,17 @@ static void p_ldrsw(AsmDriver* d) { p_ldst_core(d, 1, 2, 1); }
* unscaled mirror of p_ldst_core. */
static void p_ldur_stur(AsmDriver* d, int is_load, int fixed_size,
int sign_ext) {
- AA64Reg rt = parse_reg(d);
+ AA64Reg rt = parse_ldst_reg(d);
reject_sp_reg(d, rt, "ldur/stur");
expect_comma(d, "ldur/stur");
AA64Mem m = parse_mem(d);
- u32 size = (fixed_size >= 0) ? (u32)fixed_size : (rt.is64 ? 3u : 2u);
- u32 opc = !is_load ? AA64_LDST_OPC_STR
- : !sign_ext ? AA64_LDST_OPC_LDR
- : rt.is64 ? 2u /* LDURS*, 64-bit dst */
- : 3u; /* LDURS*, 32-bit dst */
+ AA64LdStEnc e = ldst_encoding(d, rt, is_load, fixed_size, sign_ext);
if (m.imm < -256 || m.imm > 255)
asm_driver_panic(d, "asm: ldur/stur: imm9 out of range");
u32 imm9 = (u32)((u64)m.imm & 0x1ffu);
- u32 word = aa64_ldst_simm9_pack((AA64LdStSimm9){.size = size,
- .V = 0,
- .opc = opc,
+ u32 word = aa64_ldst_simm9_pack((AA64LdStSimm9){.size = e.size,
+ .V = e.V,
+ .opc = e.opc,
.imm9 = imm9,
.Rn = m.base.num,
.Rt = rt.num});
diff --git a/src/asm/asm_lex.c b/src/asm/asm_lex.c
@@ -433,6 +433,13 @@ AsmTok asm_lex_next(AsmLexer* l) {
int c = peek(l, 0);
if (is_alnum(c)) {
bump(l);
+ } else if (c == '.' && is_digit(peek(l, 1))) {
+ /* Discriminator-mangled symbol: `name.N` (static locals, lambda /
+ * block-scope renaming, e.g. `acc.1`). A `.` followed by a digit
+ * continues the identifier. Restricted to `.`+digit so it never
+ * swallows a `.`-led mnemonic suffix (`b.eq`, `fcvt.w.s`) or the
+ * `.size foo, .-foo` location-counter dot. */
+ bump(l);
} else if ((u = ucn_len(l, 0))) {
int i;
for (i = 0; i < u; ++i) bump(l);
diff --git a/test/asm/encode/aa64_fp_ldst.expected.hex b/test/asm/encode/aa64_fp_ldst.expected.hex
@@ -0,0 +1 @@
+a80f00fda80f40fd200400bd620040bdb2031ffca4c05fbce10700fde20740fd4005803d4101c03d
diff --git a/test/asm/encode/aa64_fp_ldst.s b/test/asm/encode/aa64_fp_ldst.s
@@ -0,0 +1,11 @@
+ .text
+ str d8, [x29, #24]
+ ldr d8, [x29, #24]
+ str s0, [x1, #4]
+ ldr s2, [x3]
+ stur d18, [x29, #-16]
+ ldur s4, [x5, #-4]
+ str d1, [sp, #8]
+ ldr d2, [sp, #8]
+ str q0, [x10, #16]
+ ldr q1, [x10]
diff --git a/test/asm/encode/aa64_fp_ldst.targets b/test/asm/encode/aa64_fp_ldst.targets
@@ -0,0 +1 @@
+aa64