kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit ecab4f793fbd705a1071bc8c3626fa9adf61819d
parent e41b2bb8ab846ea74c1492d6d5c391acbffc723e
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 20:01:44 -0700

asm: FP/SIMD scalar load/store + dot-mangled symbol names

Two assembler completeness gaps surfaced by round-tripping codegen FP and
static-local output:

1. FP/SIMD scalar ldr/str/ldur/stur. p_ldst_core/p_ldur_stur parsed only
   GPR transfer registers and hardcoded V=0, so every FP spill the compiler
   emits (str d8,[x29,#24]; stur d18,[x29,#-16]; str s0,...) failed to
   assemble though the disassembler decoded them. Add a scalar SIMD/FP
   transfer-register parser (Bt..Qt) and a shared ldst_encoding() that
   derives (size,V,opc,scale) for GPR or FP, threaded through the uimm12 /
   reg-offset / writeback / simm9 paths. Unblocks ~25 fp/conv round-trip
   cases. (asm_lex change in the same area earlier: .L locals.)

2. Dot-mangled symbol names. A static local is mangled to acc.1; the lexer
   split it as acc . 1, so ldr w9,[x16,:lo12:acc.1] failed (expected ]).
   Allow a .-then-digit run to continue an identifier (the name.N
   discriminator form) — narrow enough to never swallow a .-led mnemonic
   suffix (b.eq) or the .size .-foo location dot.

Regression: encode case test/asm/encode/aa64_fp_ldst (scalar d/s/q forms).
asm, ISA-unit, inline-asm, and link corpora green.

Diffstat:
Msrc/arch/aa64/asm.c | 120++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Msrc/asm/asm_lex.c | 7+++++++
Atest/asm/encode/aa64_fp_ldst.expected.hex | 1+
Atest/asm/encode/aa64_fp_ldst.s | 11+++++++++++
Atest/asm/encode/aa64_fp_ldst.targets | 1+
5 files changed, 118 insertions(+), 22 deletions(-)

diff --git a/src/arch/aa64/asm.c b/src/arch/aa64/asm.c @@ -223,6 +223,41 @@ static int parse_fp_pair_reg_from_ident(AsmDriver* d, Sym ident, AA64Reg* out) { return 1; } +/* Scalar SIMD/FP transfer register for ldr/str/ldur/stur: b/h/s/d/q with the + * access width in fp_bytes (1/2/4/8/16). Unlike parse_fp_pair_reg_from_ident + * (ldp/stp, d/q only) this accepts the sub-64-bit scalar widths a single-reg + * FP load/store can carry. */ +static int parse_fp_scalar_reg_from_ident(AsmDriver* d, Sym ident, + AA64Reg* out) { + Slice sl = pool_slice(asm_driver_pool(d), ident); + const char* p = sl.s; + size_t n = sl.len; + u8 bytes; + u32 r = 0; + size_t i; + if (!p || n < 2) return 0; + switch (p[0]) { + case 'b': case 'B': bytes = 1; break; + case 'h': case 'H': bytes = 2; break; + case 's': case 'S': bytes = 4; break; + case 'd': case 'D': bytes = 8; break; + case 'q': case 'Q': bytes = 16; break; + default: return 0; + } + for (i = 1; i < n; ++i) { + char c = p[i]; + if (c < '0' || c > '9') return 0; + r = r * 10 + (u32)(c - '0'); + if (r > 31) return 0; + } + out->num = r; + out->is64 = 1; + out->is_sp = 0; + out->is_fp = 1; + out->fp_bytes = bytes; + return 1; +} + static AA64Reg parse_reg(AsmDriver* d) { AsmTok t = asm_driver_next(d); AA64Reg r; @@ -232,6 +267,54 @@ static AA64Reg parse_reg(AsmDriver* d) { return r; } +/* ldr/str transfer register: GPR (Wt/Xt) or scalar SIMD/FP (Bt..Qt). */ +static AA64Reg parse_ldst_reg(AsmDriver* d) { + AsmTok t = asm_driver_next(d); + AA64Reg r; + memset(&r, 0, sizeof r); + if (t.kind != ASM_TOK_IDENT || + (!parse_reg_from_ident(d, t.v.ident, &r) && + !parse_fp_scalar_reg_from_ident(d, t.v.ident, &r))) + asm_driver_panic(d, "asm: ldr/str: expected register"); + return r; +} + +/* Resolve the (size, V, opc, scale) load/store encoding fields from the + * transfer register and mnemonic flavor. GPR width comes from fixed_size (the + * sized mnemonics ldrb/ldrsw/…) or the register; FP uses V=1 with size/opc + * keyed on the scalar width (b/h/s/d = size 0/1/2/3, opc store=0/load=1; the + * 128-bit q is size=0 opc=2/3). `scale` is the byte access width for the + * scaled unsigned-imm12 form. */ +typedef struct { + u32 size, V, opc, scale; +} AA64LdStEnc; + +static AA64LdStEnc ldst_encoding(AsmDriver* d, AA64Reg rt, int is_load, + int fixed_size, int sign_ext) { + AA64LdStEnc e; + if (rt.is_fp) { + if (fixed_size >= 0 || sign_ext) + asm_driver_panic(d, "asm: sized/signed ld/st takes a GPR, not an FP reg"); + e.V = 1; + e.scale = rt.fp_bytes; + e.size = (rt.fp_bytes == 1) ? 0u + : (rt.fp_bytes == 2) ? 1u + : (rt.fp_bytes == 4) ? 2u + : (rt.fp_bytes == 8) ? 3u + : 0u; /* 16 (Q): size=0, opc carries width */ + e.opc = (rt.fp_bytes == 16) ? (is_load ? 3u : 2u) : (is_load ? 1u : 0u); + } else { + e.V = 0; + e.size = (fixed_size >= 0) ? (u32)fixed_size : (rt.is64 ? 3u : 2u); + e.scale = 1u << e.size; + e.opc = !is_load ? AA64_LDST_OPC_STR + : !sign_ext ? AA64_LDST_OPC_LDR + : rt.is64 ? 2u /* LDRS*, 64-bit dst */ + : 3u; /* LDRS*, 32-bit dst */ + } + return e; +} + static AA64Reg parse_ldstp_reg(AsmDriver* d) { AsmTok t = asm_driver_next(d); AA64Reg r; @@ -1114,25 +1197,22 @@ static AA64Mem parse_mem(AsmDriver* d) { * 32-bit), keyed on the destination register width. */ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size, int sign_ext) { - AA64Reg rt = parse_reg(d); + AA64Reg rt = parse_ldst_reg(d); reject_sp_reg(d, rt, "ldr/str"); expect_comma(d, "ldr/str"); AA64Mem m = parse_mem(d); - u32 size = (fixed_size >= 0) ? (u32)fixed_size : (rt.is64 ? 3u : 2u); - u32 opc = !is_load ? AA64_LDST_OPC_STR - : !sign_ext ? AA64_LDST_OPC_LDR - : rt.is64 ? 2u /* LDRS*, 64-bit dst */ - : 3u; /* LDRS*, 32-bit dst */ + AA64LdStEnc e = ldst_encoding(d, rt, is_load, fixed_size, sign_ext); + u32 size = e.size, opc = e.opc, V = e.V; if (m.reloc_mod != AA64_RELMOD_NONE) { /* [Xn, :lo12:sym] / [Xn, :got_lo12:sym] — unsigned-imm12 form with a zero * immediate; the relocation supplies the low 12 bits. :got_lo12: only * applies to a 64-bit `ldr` (the GOT entry is an 8-byte pointer); llvm-mc * rejects it on stores, signed loads, and sub-word loads. */ if (m.reloc_mod == AA64_RELMOD_GOT_LO12 && - !(size == 3 && opc == AA64_LDST_OPC_LDR)) + !(V == 0 && size == 3 && opc == AA64_LDST_OPC_LDR)) asm_driver_panic(d, "asm: :got_lo12: only valid on a 64-bit ldr"); u32 word = aa64_ldst_uimm_pack((AA64LdStUimm){.size = size, - .V = 0, + .V = V, .opc = opc, .imm12 = 0, .Rn = m.base.num, @@ -1159,7 +1239,7 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size, asm_driver_panic(d, "asm: ldr/str: index shift must be 0 or access size"); } u32 word = aa64_ldst_regoff_pack((AA64LdStRegOff){.size = size, - .V = 0, + .V = V, .opc = opc, .Rm = m.index.num, .option = m.option, @@ -1176,7 +1256,7 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size, u32 imm9 = (u32)((u64)m.imm & 0x1ffu); u32 idx = m.pre_index ? AA64_LDST_IDX_PRE : AA64_LDST_IDX_POST; u32 word = aa64_ldst_wback_pack((AA64LdStWBack){.size = size, - .V = 0, + .V = V, .opc = opc, .imm9 = imm9, .idx = idx, @@ -1187,12 +1267,12 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size, } { /* Try scaled unsigned-imm12 first. */ - u32 scale = 1u << size; + u32 scale = e.scale; if (m.imm >= 0 && (i64)((u64)m.imm % scale) == 0 && (u64)m.imm / scale <= 0xfff) { u32 imm12 = (u32)((u64)m.imm / scale); u32 word = aa64_ldst_uimm_pack((AA64LdStUimm){.size = size, - .V = 0, + .V = V, .opc = opc, .imm12 = imm12, .Rn = m.base.num, @@ -1204,7 +1284,7 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size, if (m.imm >= -256 && m.imm <= 255) { u32 imm9 = (u32)((u64)m.imm & 0x1ffu); u32 word = aa64_ldst_simm9_pack((AA64LdStSimm9){.size = size, - .V = 0, + .V = V, .opc = opc, .imm9 = imm9, .Rn = m.base.num, @@ -1236,21 +1316,17 @@ static void p_ldrsw(AsmDriver* d) { p_ldst_core(d, 1, 2, 1); } * unscaled mirror of p_ldst_core. */ static void p_ldur_stur(AsmDriver* d, int is_load, int fixed_size, int sign_ext) { - AA64Reg rt = parse_reg(d); + AA64Reg rt = parse_ldst_reg(d); reject_sp_reg(d, rt, "ldur/stur"); expect_comma(d, "ldur/stur"); AA64Mem m = parse_mem(d); - u32 size = (fixed_size >= 0) ? (u32)fixed_size : (rt.is64 ? 3u : 2u); - u32 opc = !is_load ? AA64_LDST_OPC_STR - : !sign_ext ? AA64_LDST_OPC_LDR - : rt.is64 ? 2u /* LDURS*, 64-bit dst */ - : 3u; /* LDURS*, 32-bit dst */ + AA64LdStEnc e = ldst_encoding(d, rt, is_load, fixed_size, sign_ext); if (m.imm < -256 || m.imm > 255) asm_driver_panic(d, "asm: ldur/stur: imm9 out of range"); u32 imm9 = (u32)((u64)m.imm & 0x1ffu); - u32 word = aa64_ldst_simm9_pack((AA64LdStSimm9){.size = size, - .V = 0, - .opc = opc, + u32 word = aa64_ldst_simm9_pack((AA64LdStSimm9){.size = e.size, + .V = e.V, + .opc = e.opc, .imm9 = imm9, .Rn = m.base.num, .Rt = rt.num}); diff --git a/src/asm/asm_lex.c b/src/asm/asm_lex.c @@ -433,6 +433,13 @@ AsmTok asm_lex_next(AsmLexer* l) { int c = peek(l, 0); if (is_alnum(c)) { bump(l); + } else if (c == '.' && is_digit(peek(l, 1))) { + /* Discriminator-mangled symbol: `name.N` (static locals, lambda / + * block-scope renaming, e.g. `acc.1`). A `.` followed by a digit + * continues the identifier. Restricted to `.`+digit so it never + * swallows a `.`-led mnemonic suffix (`b.eq`, `fcvt.w.s`) or the + * `.size foo, .-foo` location-counter dot. */ + bump(l); } else if ((u = ucn_len(l, 0))) { int i; for (i = 0; i < u; ++i) bump(l); diff --git a/test/asm/encode/aa64_fp_ldst.expected.hex b/test/asm/encode/aa64_fp_ldst.expected.hex @@ -0,0 +1 @@ +a80f00fda80f40fd200400bd620040bdb2031ffca4c05fbce10700fde20740fd4005803d4101c03d diff --git a/test/asm/encode/aa64_fp_ldst.s b/test/asm/encode/aa64_fp_ldst.s @@ -0,0 +1,11 @@ + .text + str d8, [x29, #24] + ldr d8, [x29, #24] + str s0, [x1, #4] + ldr s2, [x3] + stur d18, [x29, #-16] + ldur s4, [x5, #-4] + str d1, [sp, #8] + ldr d2, [sp, #8] + str q0, [x10, #16] + ldr q1, [x10] diff --git a/test/asm/encode/aa64_fp_ldst.targets b/test/asm/encode/aa64_fp_ldst.targets @@ -0,0 +1 @@ +aa64