kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 5bd8de1ceeabdaef0abc018ea94891371381655d
parent e156c02981eb92a6d0f9303fbde37ed4e6b83615
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed,  3 Jun 2026 16:00:03 -0700

aa64 asm: parse logical-immediate and lsl/lsr/asr aliases

The disassembler (driving `cc -S`) emits AArch64 forms the assembler
couldn't parse back, breaking the `cc -c` == `cc -S | as` round-trip for
48 cases (all the L1 lane). Two gaps, both exposed once codegen started
using immediate operand forms at -O0 (dc1854de):

- Logical bitmask-immediate `and/orr/eor/ands Rd, Rn, #imm`: p_log_sr
  parsed the third operand strictly as a register ("expected register").
- Shift aliases `lsl/lsr/asr Rd, Rn, #imm`: the mnemonics weren't in the
  table at all, only the variable-shift lslv/lsrv/asrv ("unknown
  mnemonic").

Add a peek_is_reg lookahead, extend p_log_sr with the bitmask-immediate
form (via aa64_logimm_encode), and add p_shift handling both the register
(LSLV/LSRV/ASRV) and immediate (UBFM/SBFM) forms. ror/EXTR is left out:
the disassembler doesn't decode EXTR, so adding it would introduce a new
encode-only asymmetry; rorv already covers the register rotate.

Every new encoding cross-checked byte-for-byte against llvm-mc. Adds an
encode corpus case; the symmetry baseline now drops 3 mov_orr_bitmask
encode-only entries that round-trip thanks to the logical-immediate path.

test-asm-roundtrip: 572 pass, 0 fail (was 524 pass / 48 fail).

Diffstat:
Msrc/arch/aa64/asm.c | 83++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Atest/asm/encode/aa64_shift_logimm_alias.expected.hex | 1+
Atest/asm/encode/aa64_shift_logimm_alias.s | 21+++++++++++++++++++++
Atest/asm/encode/aa64_shift_logimm_alias.targets | 1+
Mtest/asm/symmetry.baseline | 3---
5 files changed, 105 insertions(+), 4 deletions(-)

diff --git a/src/arch/aa64/asm.c b/src/arch/aa64/asm.c @@ -283,6 +283,15 @@ static AA64Reg parse_reg(AsmDriver* d) { return r; } +/* Non-consuming lookahead: is the next operand a register? Used to pick + * between the register and immediate forms of dual-form mnemonics (e.g. + * `and Rd,Rn,Rm` vs `and Rd,Rn,#imm`). */ +static int peek_is_reg(AsmDriver* d) { + AsmTok t = asm_driver_peek(d); + AA64Reg r; + return t.kind == ASM_TOK_IDENT && parse_reg_from_ident(d, t.v.ident, &r); +} + /* ldr/str transfer register: GPR (Wt/Xt) or scalar SIMD/FP (Bt..Qt). */ static AA64Reg parse_ldst_reg(AsmDriver* d) { AsmTok t = asm_driver_next(d); @@ -1111,12 +1120,35 @@ static void p_neg(AsmDriver* d, int set_flags) { emit32(d, word); } -/* Logical shifted-register family. */ +/* Logical family: shifted-register `<op> Rd,Rn,Rm{,shift}` or, for the + * non-negated AND/ORR/EOR/ANDS, the bitmask-immediate `<op> Rd,Rn,#imm`. + * N is the SR-form negate bit (BIC/ORN/EON/BICS); those have no immediate + * form, so an immediate third operand is only valid when N==0. */ static void p_log_sr(AsmDriver* d, u32 opc, u32 N) { AA64Reg rd = parse_reg(d); expect_comma(d, "logical"); AA64Reg rn = parse_reg(d); expect_comma(d, "logical"); + if (!peek_is_reg(d)) { + /* Bitmask-immediate form. AND/ORR/EOR use the SP-capable destination; + * ANDS uses ZR. Rn is always a GPR (caller's parse_reg already enforced + * GP for the two register operands). */ + if (N) asm_driver_panic(d, "asm: logical: immediate form has no negation"); + if (rd.is64 != rn.is64) + asm_driver_panic(d, "asm: logical: width mismatch"); + u64 imm = (u64)parse_imm_const(d); + u32 bN = 0, immr = 0, imms = 0; + if (!aa64_logimm_encode(imm, rd.is64, &bN, &immr, &imms)) + asm_driver_panic(d, "asm: logical: immediate is not a valid bitmask"); + emit32(d, aa64_logimm_pack((AA64LogImm){.sf = rd.is64, + .opc = opc, + .N = bN, + .immr = immr, + .imms = imms, + .Rn = rn.num, + .Rd = rd.num})); + return; + } AA64Reg rm = parse_reg(d); if (rd.is64 != rn.is64 || rd.is64 != rm.is64) asm_driver_panic(d, "asm: logical: width mismatch"); @@ -1190,6 +1222,49 @@ static void p_dp2(AsmDriver* d, u32 opcode) { emit32(d, word); } +/* Shift aliases: `<op> Rd, Rn, (Rm | #imm)`. + * register form → LSLV/LSRV/ASRV (DP2 variable shift) + * immediate form → UBFM (lsl/lsr) / SBFM (asr) bitfield alias + * `kind` indexes the three shifts: 0=lsl 1=lsr 2=asr. The immediate aliases + * are exactly what the disassembler prints for these UBFM/SBFM encodings, so + * `cc -S | as` round-trips. (ROR's immediate form is EXTR, which the + * disassembler doesn't decode, so it is left out — `rorv` covers the register + * rotate.) */ +static void p_shift(AsmDriver* d, u32 kind) { + static const u32 dp2op[3] = {AA64_DP2_LSLV_OP, AA64_DP2_LSRV_OP, + AA64_DP2_ASRV_OP}; + AA64Reg rd = parse_reg(d); + expect_comma(d, "shift"); + AA64Reg rn = parse_reg(d); + if (rd.is64 != rn.is64) asm_driver_panic(d, "asm: shift: width mismatch"); + expect_comma(d, "shift"); + if (peek_is_reg(d)) { + AA64Reg rm = parse_reg(d); + if (rd.is64 != rm.is64) asm_driver_panic(d, "asm: shift: width mismatch"); + emit32(d, aa64_dp2_pack((AA64DP2){.sf = rd.is64, + .opcode = dp2op[kind], + .Rm = rm.num, + .Rn = rn.num, + .Rd = rd.num})); + return; + } + i64 sv = parse_imm_const(d); + u32 width = rd.is64 ? 64u : 32u; + if (sv < 0 || (u64)sv >= width) + asm_driver_panic(d, "asm: shift: amount out of range"); + u32 shift = (u32)sv, immr = 0, imms = 0; + if (kind == 2) { /* asr → SBFM */ + aa64_asr_imm_fields(shift, rd.is64, &immr, &imms); + emit32(d, aa64_bitfield(rd.is64, 0u, immr, imms, rd.num, rn.num)); + } else { /* lsl/lsr → UBFM */ + if (kind == 0) + aa64_lsl_imm_fields(shift, rd.is64, &immr, &imms); + else + aa64_lsr_imm_fields(shift, rd.is64, &immr, &imms); + emit32(d, aa64_bitfield(rd.is64, 2u, immr, imms, rd.num, rn.num)); + } +} + /* Branch immediate / conditional / compare-and-branch. */ static void emit_branch_imm(AsmDriver* d, u32 op_bl, ObjSymId target, @@ -1790,6 +1865,9 @@ static void p_lslv_w(AsmDriver* d) { p_dp2(d, AA64_DP2_LSLV_OP); } static void p_lsrv_w(AsmDriver* d) { p_dp2(d, AA64_DP2_LSRV_OP); } static void p_asrv_w(AsmDriver* d) { p_dp2(d, AA64_DP2_ASRV_OP); } static void p_rorv_w(AsmDriver* d) { p_dp2(d, AA64_DP2_RORV_OP); } +static void p_lsl_(AsmDriver* d) { p_shift(d, 0); } +static void p_lsr_(AsmDriver* d) { p_shift(d, 1); } +static void p_asr_(AsmDriver* d) { p_shift(d, 2); } static void p_b_(AsmDriver* d) { p_b(d, 0); } static void p_bl_(AsmDriver* d) { p_b(d, 1); } static void p_cbz_(AsmDriver* d) { p_cbz(d, 0); } @@ -2294,6 +2372,9 @@ static const AA64Mn kTable[] = { {"lsrv", p_lsrv_w, 0}, {"asrv", p_asrv_w, 0}, {"rorv", p_rorv_w, 0}, + {"lsl", p_lsl_, 0}, + {"lsr", p_lsr_, 0}, + {"asr", p_asr_, 0}, {"b", p_b_, 0}, {"bl", p_bl_, 0}, {"cbz", p_cbz_, 0}, diff --git a/test/asm/encode/aa64_shift_logimm_alias.expected.hex b/test/asm/encode/aa64_shift_logimm_alias.expected.hex @@ -0,0 +1 @@ +ae1d4092ef1d001228010012200c1c32620c1c52a40440f2316e1c53287d085328f17dd3a47c02132020c21a8324c59ae628c81ac0035fd6 diff --git a/test/asm/encode/aa64_shift_logimm_alias.s b/test/asm/encode/aa64_shift_logimm_alias.s @@ -0,0 +1,21 @@ +.text +t: + # Logical bitmask-immediate forms (AND/ORR/EOR/ANDS Rd, Rn, #imm). The + # disassembler prints these for the logical-immediate encodings, so the + # assembler must parse them back for `cc -S | as` to round-trip. + and x14, x13, #0xff + and w15, w15, #0xff + and w8, w9, #0x1 + orr w0, w1, #0xf0 + eor w2, w3, #0xf0 + ands x4, x5, #0x3 + # Immediate shift aliases (UBFM/SBFM underneath). + lsl w17, w17, #4 + lsr w8, w9, #8 + lsl x8, x9, #3 + asr w4, w5, #2 + # Register (variable) shift aliases — same encodings as lslv/lsrv/asrv. + lsl w0, w1, w2 + lsr x3, x4, x5 + asr w6, w7, w8 + ret diff --git a/test/asm/encode/aa64_shift_logimm_alias.targets b/test/asm/encode/aa64_shift_logimm_alias.targets @@ -0,0 +1 @@ +aa64 diff --git a/test/asm/symmetry.baseline b/test/asm/symmetry.baseline @@ -64,6 +64,3 @@ encode-only: aa64_lse_atomics 0xb8e08041 encode-only: aa64_lse_atomics 0xf8200041 encode-only: aa64_lse_atomics 0xf8208041 encode-only: aa64_lse_atomics 0xf8a03041 -encode-only: aa64_mov_orr_bitmask 0x3204cfe4 -encode-only: aa64_mov_orr_bitmask 0xb201f3e3 -encode-only: aa64_mov_orr_bitmask 0xb204c3e1