kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit dfb02970752ef0b82a41854c3f10e23ec96c3a4f
parent c7fed8abc537edbc37702a44e41072127053dd46
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 15:23:17 -0700

aa64 asm: encode ldst addressing modes, atomics/exclusive/LSE, mov ORR-bitmask

Adds to the standalone assembler: (1) mov Rd,#imm via the ORR logical-bitmask
alias when the immediate is not movz/movn-encodable; (2) register-offset
[Xn,Xm{,LSL#s}] / [Xn,Wm,{S,U}XTW{#s}] and pre/post-index [Xn,#i]! / [Xn],#i
load/store addressing modes; (3) the exclusive (ldxr/stxr/ldaxr/stlxr),
acquire/release (ldar/stlr), compare-and-swap (cas{,a,l,al}), and LSE
(swp/ldadd/ldclr/ldeor/ldset + a/l/al + b/h) atomic families. New isa.h encoders
AA64LdStRegOff/WBack/Ex/Cas/LseAtomic (with unpack helpers for a future decoder).
Corpus: aa64_{mov_orr_bitmask,ldst_regoff,ldst_pre_post_index,exclusive_load_store,
load_acquire_store_release,compare_and_swap,lse_atomics}, byte-verified vs llvm-mc.

Diffstat:
Msrc/arch/aa64/asm.c | 544+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Msrc/arch/aa64/isa.h | 195+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/asm/encode/aa64_compare_and_swap.expected.hex | 1+
Atest/asm/encode/aa64_compare_and_swap.s | 16++++++++++++++++
Atest/asm/encode/aa64_compare_and_swap.targets | 1+
Atest/asm/encode/aa64_exclusive_load_store.expected.hex | 1+
Atest/asm/encode/aa64_exclusive_load_store.s | 17+++++++++++++++++
Atest/asm/encode/aa64_exclusive_load_store.targets | 1+
Atest/asm/encode/aa64_ldst_pre_post_index.expected.hex | 1+
Atest/asm/encode/aa64_ldst_pre_post_index.s | 19+++++++++++++++++++
Atest/asm/encode/aa64_ldst_pre_post_index.targets | 1+
Atest/asm/encode/aa64_ldst_regoff.expected.hex | 1+
Atest/asm/encode/aa64_ldst_regoff.s | 24++++++++++++++++++++++++
Atest/asm/encode/aa64_ldst_regoff.targets | 1+
Atest/asm/encode/aa64_load_acquire_store_release.expected.hex | 1+
Atest/asm/encode/aa64_load_acquire_store_release.s | 9+++++++++
Atest/asm/encode/aa64_load_acquire_store_release.targets | 1+
Atest/asm/encode/aa64_lse_atomics.expected.hex | 1+
Atest/asm/encode/aa64_lse_atomics.s | 23+++++++++++++++++++++++
Atest/asm/encode/aa64_lse_atomics.targets | 1+
Atest/asm/encode/aa64_mov_orr_bitmask.expected.hex | 1+
Atest/asm/encode/aa64_mov_orr_bitmask.s | 5+++++
Atest/asm/encode/aa64_mov_orr_bitmask.targets | 1+
23 files changed, 858 insertions(+), 8 deletions(-)

diff --git a/src/arch/aa64/asm.c b/src/arch/aa64/asm.c @@ -477,6 +477,14 @@ static void p_mov(AsmDriver* d) { return; } } + /* Try the ORR-bitmask alias (mov Rd,#imm → ORR Rd,ZR,#bitmask). */ + { + u32 N = 0, immr = 0, imms = 0; + if (aa64_logimm_encode(uv, rd.is64, &N, &immr, &imms)) { + emit32(d, aa64_orr_imm(rd.is64, rd.num, AA64_ZR, N, immr, imms)); + return; + } + } asm_driver_panic(d, "asm: mov: immediate cannot be encoded in one insn"); } @@ -904,18 +912,77 @@ static void p_cbz(AsmDriver* d, u32 op) { off, 1, 0); } -/* Memory-operand parser for [Xn], [Xn, #imm], [Xn, #imm]!. +/* Memory-operand parser. Recognized shapes: + * [Xn] base only + * [Xn, #imm] base + immediate offset + * [Xn, #imm]! pre-index (writeback) + * [Xn], #imm post-index (writeback) + * [Xn, Xm] register offset (LSL #0) + * [Xn, Xm, LSL #s] register offset, scaled + * [Xn, Wm, {U,S}XTW {#s}] 32-bit index, extended + * [Xn, Xm, {U,S}XTX {#s}] / SXTX 64-bit index, extended * - * pre_index_out is 1 when the closing `]!` appeared (pre-indexed). - * imm is the literal byte offset (no scaling). */ + * imm is the literal byte offset (no scaling). When has_index is set, + * `index` is the index register, `option` its 3-bit extend code, and + * shift_present records whether an explicit `#s` was written (with the + * amount in `shift`). pre_index / post_index flag the writeback forms. */ typedef struct AA64Mem { AA64Reg base; + AA64Reg index; i64 imm; /* byte offset (literal as written) */ + u32 option; + u32 shift; u8 pre_index; + u8 post_index; u8 has_offset; - u8 pad[2]; + u8 has_index; + u8 shift_present; + u8 pad[3]; } AA64Mem; +/* Parse the optional extend/shift modifier of a register-offset memory + * operand: `LSL #s`, `UXTW {#s}`, `SXTW {#s}`, `UXTX {#s}`, `SXTX {#s}`. + * The index register width (32 vs 64) must agree with the extend kind. + * Fills m->option / m->shift / m->shift_present. */ +static void parse_mem_extend(AsmDriver* d, AA64Mem* m) { + AsmTok t = asm_driver_next(d); + if (t.kind != ASM_TOK_IDENT) + asm_driver_panic(d, "asm: ldr/str: expected extend (lsl/sxtw/uxtw/...)"); + Slice sl = pool_slice(asm_driver_pool(d), t.v.ident); + const char* p = sl.s; + size_t n = sl.len; + int need64 = 0; /* index must be 64-bit */ + if (icase_eq(p, n, "lsl") || icase_eq(p, n, "uxtx")) { + m->option = AA64_LDST_OPTION_LSL; + need64 = 1; + } else if (icase_eq(p, n, "sxtx")) { + m->option = AA64_LDST_OPTION_SXTX; + need64 = 1; + } else if (icase_eq(p, n, "uxtw")) { + m->option = AA64_LDST_OPTION_UXTW; + need64 = 0; + } else if (icase_eq(p, n, "sxtw")) { + m->option = AA64_LDST_OPTION_SXTW; + need64 = 0; + } else { + asm_driver_panic(d, "asm: ldr/str: unknown index extend"); + } + if (need64 && !m->index.is64) + asm_driver_panic(d, "asm: ldr/str: index must be 64-bit for this extend"); + if (!need64 && m->index.is64) + asm_driver_panic(d, "asm: ldr/str: index must be 32-bit for sxtw/uxtw"); + /* LSL requires an explicit shift; the extends accept an optional one. */ + AsmTok nt = asm_driver_peek(d); + if (tok_punct(nt, '#') || nt.kind == ASM_TOK_NUM) { + i64 s = parse_imm_const(d); + if (s < 0) asm_driver_panic(d, "asm: ldr/str: negative index shift"); + m->shift = (u32)s; + m->shift_present = 1; + } else if (m->option == AA64_LDST_OPTION_LSL) { + asm_driver_panic(d, "asm: ldr/str: lsl requires a shift amount"); + } +} + static AA64Mem parse_mem(AsmDriver* d) { AA64Mem m; memset(&m, 0, sizeof m); @@ -925,11 +992,35 @@ static AA64Mem parse_mem(AsmDriver* d) { asm_driver_panic(d, "asm: ldr/str: base register must be 64-bit"); require_sp_spelling(d, m.base, "ldr/str base"); if (asm_driver_eat_comma(d)) { + /* Either `#imm`/expression (immediate offset) or a register index. */ + AsmTok t = asm_driver_peek(d); + AA64Reg idx; + memset(&idx, 0, sizeof idx); + if (t.kind == ASM_TOK_IDENT && parse_reg_from_ident(d, t.v.ident, &idx)) { + (void)asm_driver_next(d); + reject_sp_reg(d, idx, "ldr/str index"); + m.index = idx; + m.has_index = 1; + m.option = idx.is64 ? AA64_LDST_OPTION_LSL : AA64_LDST_OPTION_UXTW; + if (asm_driver_eat_comma(d)) parse_mem_extend(d, &m); + } else { + m.imm = parse_imm_const(d); + m.has_offset = 1; + } + } + if (!asm_driver_eat_punct(d, ']')) asm_driver_panic(d, "asm: expected ']'"); + if (asm_driver_eat_punct(d, '!')) { + if (m.has_index) + asm_driver_panic(d, "asm: ldr/str: writeback not allowed with index"); + m.pre_index = 1; + } else if (asm_driver_eat_comma(d)) { + /* Post-index: `[Xn], #imm`. */ + if (m.has_index || m.has_offset) + asm_driver_panic(d, "asm: ldr/str: malformed post-index operand"); m.imm = parse_imm_const(d); m.has_offset = 1; + m.post_index = 1; } - if (!asm_driver_eat_punct(d, ']')) asm_driver_panic(d, "asm: expected ']'"); - if (asm_driver_eat_punct(d, '!')) m.pre_index = 1; return m; } @@ -950,7 +1041,44 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size, : !sign_ext ? AA64_LDST_OPC_LDR : rt.is64 ? 2u /* LDRS*, 64-bit dst */ : 3u; /* LDRS*, 32-bit dst */ - if (!m.pre_index) { + if (m.has_index) { + /* Register-offset form. The S bit (scale by access size) is set + * when an explicit shift was written; the amount must equal the + * access log2-size. */ + u32 S = 0; + if (m.shift_present) { + if (m.shift != size) + asm_driver_panic(d, "asm: ldr/str: index shift must equal access size"); + S = 1; + } + u32 word = aa64_ldst_regoff_pack((AA64LdStRegOff){.size = size, + .V = 0, + .opc = opc, + .Rm = m.index.num, + .option = m.option, + .S = S, + .Rn = m.base.num, + .Rt = rt.num}); + emit32(d, word); + return; + } + if (m.pre_index || m.post_index) { + /* Immediate writeback (unscaled signed imm9). */ + if (m.imm < -256 || m.imm > 255) + asm_driver_panic(d, "asm: ldr/str: writeback imm9 out of range"); + u32 imm9 = (u32)((u64)m.imm & 0x1ffu); + u32 idx = m.pre_index ? AA64_LDST_IDX_PRE : AA64_LDST_IDX_POST; + u32 word = aa64_ldst_wback_pack((AA64LdStWBack){.size = size, + .V = 0, + .opc = opc, + .imm9 = imm9, + .idx = idx, + .Rn = m.base.num, + .Rt = rt.num}); + emit32(d, word); + return; + } + { /* Try scaled unsigned-imm12 first. */ u32 scale = 1u << size; if (m.imm >= 0 && (i64)((u64)m.imm % scale) == 0 && @@ -979,7 +1107,6 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size, } asm_driver_panic(d, "asm: ldr/str: immediate out of range"); } - asm_driver_panic(d, "asm: ldr/str: pre-indexed form not yet supported"); } /* ldr/str: access width follows the register (Wt=word, Xt=dword). */ @@ -1067,6 +1194,129 @@ static void p_adr(AsmDriver* d, int is_adrp) { mc->emit_reloc_at(mc, asm_driver_cur_section(d), ofs, k, sym, off, 1, 0); } +/* ---- atomics / exclusive ---- + * + * Every form here addresses a bare base register `[Xn]` (no offset, no + * index, no writeback). parse_mem already rejects malformed shapes; we + * additionally reject any offset/index so `ldxr w0,[x1,#4]` is an error, + * matching llvm/gas. */ +static AA64Mem parse_mem_bare(AsmDriver* d, const char* what) { + AA64Mem m = parse_mem(d); + if (m.has_offset || m.has_index || m.pre_index || m.post_index) + asm_driver_panic(d, "asm: %.*s: expected bare [Xn] address", + SLICE_ARG(slice_from_cstr(what))); + return m; +} + +/* Map an access log2-size (0..3) onto the GPR width the operand register + * must have: byte/half/word use Wt (32-bit), dword uses Xt (64-bit). */ +static void require_gpr_width(AsmDriver* d, AA64Reg r, u32 size, + const char* what) { + reject_sp_reg(d, r, what); + u32 want64 = (size == 3u) ? 1u : 0u; + if ((u32)r.is64 != want64) + asm_driver_panic(d, "asm: %.*s: register width mismatch", + SLICE_ARG(slice_from_cstr(what))); +} + +/* Load-exclusive / load-acquire: `<op> Wt|Xt, [Xn]`. + * o2/o0 select the family member (see aa64_ldstex_pack). size is the + * access log2-size; Rs/Rt2 are fixed to 11111. */ +static void p_ldex(AsmDriver* d, u32 size, u32 o2, u32 o0, const char* what) { + AA64Reg rt = parse_reg(d); + require_gpr_width(d, rt, size, what); + expect_comma(d, what); + AA64Mem m = parse_mem_bare(d, what); + emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size, + .o2 = o2, + .L = 1u, + .o1 = 0u, + .Rs = AA64_ZR, + .o0 = o0, + .Rt2 = AA64_ZR, + .Rn = m.base.num, + .Rt = rt.num})); +} + +/* Store-release without status: `stlr Wt|Xt, [Xn]` (o2=1, L=0, o0=1). */ +static void p_stlr(AsmDriver* d, u32 size, const char* what) { + AA64Reg rt = parse_reg(d); + require_gpr_width(d, rt, size, what); + expect_comma(d, what); + AA64Mem m = parse_mem_bare(d, what); + emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size, + .o2 = 1u, + .L = 0u, + .o1 = 0u, + .Rs = AA64_ZR, + .o0 = 1u, + .Rt2 = AA64_ZR, + .Rn = m.base.num, + .Rt = rt.num})); +} + +/* Store-exclusive with status: `<op> Ws, Wt|Xt, [Xn]` (L=0). Ws (the + * 32-bit status result) must be a W register and distinct from Rt/Rn. */ +static void p_stex(AsmDriver* d, u32 size, u32 o0, const char* what) { + AA64Reg rs = parse_reg(d); + reject_sp_reg(d, rs, what); + if (rs.is64) asm_driver_panic(d, "asm: %.*s: status reg must be 32-bit", + SLICE_ARG(slice_from_cstr(what))); + expect_comma(d, what); + AA64Reg rt = parse_reg(d); + require_gpr_width(d, rt, size, what); + expect_comma(d, what); + AA64Mem m = parse_mem_bare(d, what); + emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size, + .o2 = 0u, + .L = 0u, + .o1 = 0u, + .Rs = rs.num, + .o0 = o0, + .Rt2 = AA64_ZR, + .Rn = m.base.num, + .Rt = rt.num})); +} + +/* Compare-and-swap: `<op> Ws, Wt, [Xn]` / `<op> Xs, Xt, [Xn]`. Rs and Rt + * share the operand width selected by `size` (word or dword). */ +static void p_cas(AsmDriver* d, u32 size, u32 L, u32 o0, const char* what) { + AA64Reg rs = parse_reg(d); + require_gpr_width(d, rs, size, what); + expect_comma(d, what); + AA64Reg rt = parse_reg(d); + require_gpr_width(d, rt, size, what); + expect_comma(d, what); + AA64Mem m = parse_mem_bare(d, what); + emit32(d, aa64_cas_pack((AA64Cas){.size = size, + .L = L, + .Rs = rs.num, + .o0 = o0, + .Rn = m.base.num, + .Rt = rt.num})); +} + +/* LSE atomic memory op: `<op> Ws, Wt, [Xn]` / `<op> Xs, Xt, [Xn]`. + * o3=1 selects SWP; otherwise opc names LDADD/LDCLR/LDEOR/LDSET. */ +static void p_lse(AsmDriver* d, u32 size, u32 A, u32 R, u32 o3, u32 opc, + const char* what) { + AA64Reg rs = parse_reg(d); + require_gpr_width(d, rs, size, what); + expect_comma(d, what); + AA64Reg rt = parse_reg(d); + require_gpr_width(d, rt, size, what); + expect_comma(d, what); + AA64Mem m = parse_mem_bare(d, what); + emit32(d, aa64_lse_atomic_pack((AA64LseAtomic){.size = size, + .A = A, + .R = R, + .Rs = rs.num, + .o3 = o3, + .opc = opc, + .Rn = m.base.num, + .Rt = rt.num})); +} + /* ---- mnemonic dispatch table ---- */ typedef void (*P_Fn)(AsmDriver*); @@ -1344,6 +1594,193 @@ static void p_fmov(AsmDriver* d) { } } +/* ---- atomics / exclusive wrappers ---- + * + * Access log2-sizes: byte=0, half=1, word=2, dword=3. The w/x variants + * share a mnemonic stem (e.g. `ldxr`) and pick the size from the operand + * register width — the encoders key on the explicit size, so a width- + * sensing wrapper peeks the operand register before dispatching. */ +#define AA64_ATOMIC_SIZE_B 0u +#define AA64_ATOMIC_SIZE_H 1u +#define AA64_ATOMIC_SIZE_W 2u +#define AA64_ATOMIC_SIZE_X 3u + +/* Load-exclusive family: o2,o0 select ldxr/ldaxr/ldar. */ +#define DEF_LDEX(fn, sz, o2, o0, name) \ + static void fn(AsmDriver* d) { p_ldex(d, sz, o2, o0, name); } +/* ldxr / ldxrb / ldxrh: o2=0 o0=0. The non-b/h stem derives size from + * the register width, so we route it through a width-sensing wrapper. */ +static void p_ldxr_wx(AsmDriver* d) { + /* Peek the destination register to choose word vs dword size. */ + AsmTok t = asm_driver_peek(d); + AA64Reg r; + memset(&r, 0, sizeof r); + if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) + asm_driver_panic(d, "asm: ldxr: expected register"); + p_ldex(d, r.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W, 0u, 0u, "ldxr"); +} +DEF_LDEX(p_ldxrb, AA64_ATOMIC_SIZE_B, 0u, 0u, "ldxrb") +DEF_LDEX(p_ldxrh, AA64_ATOMIC_SIZE_H, 0u, 0u, "ldxrh") +static void p_ldaxr_wx(AsmDriver* d) { + AsmTok t = asm_driver_peek(d); + AA64Reg r; + memset(&r, 0, sizeof r); + if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) + asm_driver_panic(d, "asm: ldaxr: expected register"); + p_ldex(d, r.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W, 0u, 1u, "ldaxr"); +} +DEF_LDEX(p_ldaxrb, AA64_ATOMIC_SIZE_B, 0u, 1u, "ldaxrb") +DEF_LDEX(p_ldaxrh, AA64_ATOMIC_SIZE_H, 0u, 1u, "ldaxrh") +static void p_ldar_wx(AsmDriver* d) { + AsmTok t = asm_driver_peek(d); + AA64Reg r; + memset(&r, 0, sizeof r); + if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) + asm_driver_panic(d, "asm: ldar: expected register"); + p_ldex(d, r.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W, 1u, 1u, "ldar"); +} +DEF_LDEX(p_ldarb, AA64_ATOMIC_SIZE_B, 1u, 1u, "ldarb") +DEF_LDEX(p_ldarh, AA64_ATOMIC_SIZE_H, 1u, 1u, "ldarh") + +/* stlr (no status): width-driven for the non-b/h stem. */ +static void p_stlr_wx(AsmDriver* d) { + AsmTok t = asm_driver_peek(d); + AA64Reg r; + memset(&r, 0, sizeof r); + if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) + asm_driver_panic(d, "asm: stlr: expected register"); + p_stlr(d, r.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W, "stlr"); +} +static void p_stlrb_(AsmDriver* d) { p_stlr(d, AA64_ATOMIC_SIZE_B, "stlrb"); } +static void p_stlrh_(AsmDriver* d) { p_stlr(d, AA64_ATOMIC_SIZE_H, "stlrh"); } + +/* Store-exclusive family: o0 selects stxr vs stlxr. Status reg is always + * 32-bit; the stored value reg drives the size for the non-b/h stem. */ +static void p_stxr_wx(AsmDriver* d) { + AA64Reg rs = parse_reg(d); + reject_sp_reg(d, rs, "stxr"); + if (rs.is64) asm_driver_panic(d, "asm: stxr: status reg must be 32-bit"); + expect_comma(d, "stxr"); + AsmTok t = asm_driver_peek(d); + AA64Reg rt; + memset(&rt, 0, sizeof rt); + if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &rt)) + asm_driver_panic(d, "asm: stxr: expected value register"); + u32 size = rt.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W; + rt = parse_reg(d); + require_gpr_width(d, rt, size, "stxr"); + expect_comma(d, "stxr"); + AA64Mem m = parse_mem_bare(d, "stxr"); + emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size, + .o2 = 0u, + .L = 0u, + .o1 = 0u, + .Rs = rs.num, + .o0 = 0u, + .Rt2 = AA64_ZR, + .Rn = m.base.num, + .Rt = rt.num})); +} +static void p_stlxr_wx(AsmDriver* d) { + AA64Reg rs = parse_reg(d); + reject_sp_reg(d, rs, "stlxr"); + if (rs.is64) asm_driver_panic(d, "asm: stlxr: status reg must be 32-bit"); + expect_comma(d, "stlxr"); + AsmTok t = asm_driver_peek(d); + AA64Reg rt; + memset(&rt, 0, sizeof rt); + if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &rt)) + asm_driver_panic(d, "asm: stlxr: expected value register"); + u32 size = rt.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W; + rt = parse_reg(d); + require_gpr_width(d, rt, size, "stlxr"); + expect_comma(d, "stlxr"); + AA64Mem m = parse_mem_bare(d, "stlxr"); + emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size, + .o2 = 0u, + .L = 0u, + .o1 = 0u, + .Rs = rs.num, + .o0 = 1u, + .Rt2 = AA64_ZR, + .Rn = m.base.num, + .Rt = rt.num})); +} +static void p_stxrb_(AsmDriver* d) { p_stex(d, AA64_ATOMIC_SIZE_B, 0u, "stxrb"); } +static void p_stxrh_(AsmDriver* d) { p_stex(d, AA64_ATOMIC_SIZE_H, 0u, "stxrh"); } +static void p_stlxrb_(AsmDriver* d) { + p_stex(d, AA64_ATOMIC_SIZE_B, 1u, "stlxrb"); +} +static void p_stlxrh_(AsmDriver* d) { + p_stex(d, AA64_ATOMIC_SIZE_H, 1u, "stlxrh"); +} + +/* CAS family: width-driven for the non-b/h stems (Rs/Rt are same width). */ +#define DEF_CAS(fn, L, o0, name) \ + static void fn##_wx(AsmDriver* d) { \ + AsmTok t = asm_driver_peek(d); \ + AA64Reg r; \ + memset(&r, 0, sizeof r); \ + if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) \ + asm_driver_panic(d, "asm: " name ": expected register"); \ + p_cas(d, r.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W, L, o0, \ + name); \ + } \ + static void fn##b(AsmDriver* d) { \ + p_cas(d, AA64_ATOMIC_SIZE_B, L, o0, name "b"); \ + } \ + static void fn##h(AsmDriver* d) { \ + p_cas(d, AA64_ATOMIC_SIZE_H, L, o0, name "h"); \ + } +DEF_CAS(p_cas, 0u, 0u, "cas") +DEF_CAS(p_casa, 1u, 0u, "casa") +DEF_CAS(p_casl, 0u, 1u, "casl") +DEF_CAS(p_casal, 1u, 1u, "casal") + +/* LSE atomic family: A/R from the suffix, o3/opc from the stem. Each + * mnemonic generates a width-driven stem plus b/h wrappers. */ +#define DEF_LSE(fn, A, R, o3, opc, name) \ + static void fn##_wx(AsmDriver* d) { \ + AsmTok t = asm_driver_peek(d); \ + AA64Reg r; \ + memset(&r, 0, sizeof r); \ + if (t.kind != ASM_TOK_IDENT || !parse_reg_from_ident(d, t.v.ident, &r)) \ + asm_driver_panic(d, "asm: " name ": expected register"); \ + p_lse(d, r.is64 ? AA64_ATOMIC_SIZE_X : AA64_ATOMIC_SIZE_W, A, R, o3, \ + opc, name); \ + } \ + static void fn##b(AsmDriver* d) { \ + p_lse(d, AA64_ATOMIC_SIZE_B, A, R, o3, opc, name "b"); \ + } \ + static void fn##h(AsmDriver* d) { \ + p_lse(d, AA64_ATOMIC_SIZE_H, A, R, o3, opc, name "h"); \ + } +/* SWP (o3=1, opc=000). */ +DEF_LSE(p_swp, 0u, 0u, 1u, AA64_LSE_OPC_SWP, "swp") +DEF_LSE(p_swpa, 1u, 0u, 1u, AA64_LSE_OPC_SWP, "swpa") +DEF_LSE(p_swpl, 0u, 1u, 1u, AA64_LSE_OPC_SWP, "swpl") +DEF_LSE(p_swpal, 1u, 1u, 1u, AA64_LSE_OPC_SWP, "swpal") +/* LDADD. */ +DEF_LSE(p_ldadd, 0u, 0u, 0u, AA64_LSE_OPC_LDADD, "ldadd") +DEF_LSE(p_ldadda, 1u, 0u, 0u, AA64_LSE_OPC_LDADD, "ldadda") +DEF_LSE(p_ldaddl, 0u, 1u, 0u, AA64_LSE_OPC_LDADD, "ldaddl") +DEF_LSE(p_ldaddal, 1u, 1u, 0u, AA64_LSE_OPC_LDADD, "ldaddal") +/* LDCLR. */ +DEF_LSE(p_ldclr, 0u, 0u, 0u, AA64_LSE_OPC_LDCLR, "ldclr") +DEF_LSE(p_ldclra, 1u, 0u, 0u, AA64_LSE_OPC_LDCLR, "ldclra") +DEF_LSE(p_ldclrl, 0u, 1u, 0u, AA64_LSE_OPC_LDCLR, "ldclrl") +DEF_LSE(p_ldclral, 1u, 1u, 0u, AA64_LSE_OPC_LDCLR, "ldclral") +/* LDEOR. */ +DEF_LSE(p_ldeor, 0u, 0u, 0u, AA64_LSE_OPC_LDEOR, "ldeor") +DEF_LSE(p_ldeora, 1u, 0u, 0u, AA64_LSE_OPC_LDEOR, "ldeora") +DEF_LSE(p_ldeorl, 0u, 1u, 0u, AA64_LSE_OPC_LDEOR, "ldeorl") +DEF_LSE(p_ldeoral, 1u, 1u, 0u, AA64_LSE_OPC_LDEOR, "ldeoral") +/* LDSET. */ +DEF_LSE(p_ldset, 0u, 0u, 0u, AA64_LSE_OPC_LDSET, "ldset") +DEF_LSE(p_ldseta, 1u, 0u, 0u, AA64_LSE_OPC_LDSET, "ldseta") +DEF_LSE(p_ldsetl, 0u, 1u, 0u, AA64_LSE_OPC_LDSET, "ldsetl") +DEF_LSE(p_ldsetal, 1u, 1u, 0u, AA64_LSE_OPC_LDSET, "ldsetal") + static const AA64Mn kTable[] = { {"fadd", p_fadd, 0}, {"fsub", p_fsub, 0}, @@ -1433,6 +1870,97 @@ static const AA64Mn kTable[] = { {"stp", p_stp_, 0}, {"adr", p_adr_, 0}, {"adrp", p_adrp_, 0}, + /* ---- atomics / exclusive ---- */ + {"ldxr", p_ldxr_wx, 0}, + {"ldxrb", p_ldxrb, 0}, + {"ldxrh", p_ldxrh, 0}, + {"ldaxr", p_ldaxr_wx, 0}, + {"ldaxrb", p_ldaxrb, 0}, + {"ldaxrh", p_ldaxrh, 0}, + {"ldar", p_ldar_wx, 0}, + {"ldarb", p_ldarb, 0}, + {"ldarh", p_ldarh, 0}, + {"stxr", p_stxr_wx, 0}, + {"stxrb", p_stxrb_, 0}, + {"stxrh", p_stxrh_, 0}, + {"stlxr", p_stlxr_wx, 0}, + {"stlxrb", p_stlxrb_, 0}, + {"stlxrh", p_stlxrh_, 0}, + {"stlr", p_stlr_wx, 0}, + {"stlrb", p_stlrb_, 0}, + {"stlrh", p_stlrh_, 0}, + {"cas", p_cas_wx, 0}, + {"casb", p_casb, 0}, + {"cash", p_cash, 0}, + {"casa", p_casa_wx, 0}, + {"casab", p_casab, 0}, + {"casah", p_casah, 0}, + {"casl", p_casl_wx, 0}, + {"caslb", p_caslb, 0}, + {"caslh", p_caslh, 0}, + {"casal", p_casal_wx, 0}, + {"casalb", p_casalb, 0}, + {"casalh", p_casalh, 0}, + {"swp", p_swp_wx, 0}, + {"swpb", p_swpb, 0}, + {"swph", p_swph, 0}, + {"swpa", p_swpa_wx, 0}, + {"swpab", p_swpab, 0}, + {"swpah", p_swpah, 0}, + {"swpl", p_swpl_wx, 0}, + {"swplb", p_swplb, 0}, + {"swplh", p_swplh, 0}, + {"swpal", p_swpal_wx, 0}, + {"swpalb", p_swpalb, 0}, + {"swpalh", p_swpalh, 0}, + {"ldadd", p_ldadd_wx, 0}, + {"ldaddb", p_ldaddb, 0}, + {"ldaddh", p_ldaddh, 0}, + {"ldadda", p_ldadda_wx, 0}, + {"ldaddab", p_ldaddab, 0}, + {"ldaddah", p_ldaddah, 0}, + {"ldaddl", p_ldaddl_wx, 0}, + {"ldaddlb", p_ldaddlb, 0}, + {"ldaddlh", p_ldaddlh, 0}, + {"ldaddal", p_ldaddal_wx, 0}, + {"ldaddalb", p_ldaddalb, 0}, + {"ldaddalh", p_ldaddalh, 0}, + {"ldclr", p_ldclr_wx, 0}, + {"ldclrb", p_ldclrb, 0}, + {"ldclrh", p_ldclrh, 0}, + {"ldclra", p_ldclra_wx, 0}, + {"ldclrab", p_ldclrab, 0}, + {"ldclrah", p_ldclrah, 0}, + {"ldclrl", p_ldclrl_wx, 0}, + {"ldclrlb", p_ldclrlb, 0}, + {"ldclrlh", p_ldclrlh, 0}, + {"ldclral", p_ldclral_wx, 0}, + {"ldclralb", p_ldclralb, 0}, + {"ldclralh", p_ldclralh, 0}, + {"ldeor", p_ldeor_wx, 0}, + {"ldeorb", p_ldeorb, 0}, + {"ldeorh", p_ldeorh, 0}, + {"ldeora", p_ldeora_wx, 0}, + {"ldeorab", p_ldeorab, 0}, + {"ldeorah", p_ldeorah, 0}, + {"ldeorl", p_ldeorl_wx, 0}, + {"ldeorlb", p_ldeorlb, 0}, + {"ldeorlh", p_ldeorlh, 0}, + {"ldeoral", p_ldeoral_wx, 0}, + {"ldeoralb", p_ldeoralb, 0}, + {"ldeoralh", p_ldeoralh, 0}, + {"ldset", p_ldset_wx, 0}, + {"ldsetb", p_ldsetb, 0}, + {"ldseth", p_ldseth, 0}, + {"ldseta", p_ldseta_wx, 0}, + {"ldsetab", p_ldsetab, 0}, + {"ldsetah", p_ldsetah, 0}, + {"ldsetl", p_ldsetl_wx, 0}, + {"ldsetlb", p_ldsetlb, 0}, + {"ldsetlh", p_ldsetlh, 0}, + {"ldsetal", p_ldsetal_wx, 0}, + {"ldsetalb", p_ldsetalb, 0}, + {"ldsetalh", p_ldsetalh, 0}, {"b.eq", p_b_eq, 0}, {"b.ne", p_b_ne, 0}, {"b.cs", p_b_cs, 0}, diff --git a/src/arch/aa64/isa.h b/src/arch/aa64/isa.h @@ -1135,6 +1135,201 @@ static inline AA64LdStSimm9 aa64_ldst_simm9_unpack(u32 w) { } /* ==================================================================== + * Load/store, register offset (LDR/STR Rt,[Xn,Rm{,extend{#s}}]). + * size(2) 111 V(1) 00 opc(2) 1 Rm(5) option(3) S(1) 10 Rn(5) Rt(5) + * 31..30 29..27 26 25..24 23..22 21 20..16 15..13 12 11..10 9..5 4..0 + * + * option selects the index extend: 010=UXTW, 011=LSL/UXTX, 110=SXTW, + * 111=SXTX. S=1 scales the index by the access size (log2 = size); S=0 + * leaves it unscaled. opc/size match the uimm12 form. */ + +#define AA64_LDST_REGOFF_FAMILY_MATCH 0x38200800u +/* bits 29:27 (=111), 25:24 (=00), 21 (=1), 11:10 (=10). */ +#define AA64_LDST_REGOFF_FAMILY_MASK 0x3B200C00u + +/* Index-extend option encodings. */ +#define AA64_LDST_OPTION_UXTW 2u +#define AA64_LDST_OPTION_LSL 3u /* a.k.a. UXTX for 64-bit index */ +#define AA64_LDST_OPTION_SXTW 6u +#define AA64_LDST_OPTION_SXTX 7u + +typedef struct AA64LdStRegOff { + u32 size, V, opc, Rm, option, S, Rn, Rt; +} AA64LdStRegOff; + +static inline u32 aa64_ldst_regoff_pack(AA64LdStRegOff f) { + return ((f.size & 3u) << 30) | AA64_LDST_REGOFF_FAMILY_MATCH | + ((f.V & 1u) << 26) | ((f.opc & 3u) << 22) | ((f.Rm & 0x1fu) << 16) | + ((f.option & 7u) << 13) | ((f.S & 1u) << 12) | ((f.Rn & 0x1fu) << 5) | + (f.Rt & 0x1fu); +} + +static inline AA64LdStRegOff aa64_ldst_regoff_unpack(u32 w) { + AA64LdStRegOff f; + f.size = (w >> 30) & 3u; + f.V = (w >> 26) & 1u; + f.opc = (w >> 22) & 3u; + f.Rm = (w >> 16) & 0x1fu; + f.option = (w >> 13) & 7u; + f.S = (w >> 12) & 1u; + f.Rn = (w >> 5) & 0x1fu; + f.Rt = w & 0x1fu; + return f; +} + +/* ==================================================================== + * Load/store, immediate pre/post-index (writeback). + * size(2) 111 V(1) 00 opc(2) 0 imm9(9) idx(2) Rn(5) Rt(5) + * 31..30 29..27 26 25..24 23..22 21 20..12 11..10 9..5 4..0 + * + * idx (bits[11:10]) selects: 00=unscaled (LDUR, no writeback — see the + * SIMM9 helpers above), 01=post-index, 11=pre-index. imm9 is the + * unscaled signed byte offset (-256..255). */ + +#define AA64_LDST_IDX_POST 1u +#define AA64_LDST_IDX_PRE 3u + +typedef struct AA64LdStWBack { + u32 size, V, opc, imm9, idx, Rn, Rt; +} AA64LdStWBack; + +static inline u32 aa64_ldst_wback_pack(AA64LdStWBack f) { + return ((f.size & 3u) << 30) | AA64_LDST_SIMM9_FAMILY_MATCH | + ((f.V & 1u) << 26) | ((f.opc & 3u) << 22) | ((f.imm9 & 0x1ffu) << 12) | + ((f.idx & 3u) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rt & 0x1fu); +} + +static inline AA64LdStWBack aa64_ldst_wback_unpack(u32 w) { + AA64LdStWBack f; + f.size = (w >> 30) & 3u; + f.V = (w >> 26) & 1u; + f.opc = (w >> 22) & 3u; + f.imm9 = (w >> 12) & 0x1ffu; + f.idx = (w >> 10) & 3u; + f.Rn = (w >> 5) & 0x1fu; + f.Rt = w & 0x1fu; + return f; +} + +/* ==================================================================== + * Load/store exclusive (LDXR/STXR + acquire/release variants). + * size(2) 001000 o2(1) L(1) o1(1) Rs(5) o0(1) Rt2(5) Rn(5) Rt(5) + * 31..30 29..24 23 22 21 20..16 15 14..10 9..5 4..0 + * + * size: 00=byte,01=half,10=word,11=dword. o1=0 for the LDXR/STXR + * single-register family (CAS sets o1=1 via the CAS pack below). + * LDXR: L=1 o0=0 o2=0 STXR: L=0 o0=0 o2=0 + * LDAXR: L=1 o0=1 o2=0 STLXR: L=0 o0=1 o2=0 + * LDAR: L=1 o0=1 o2=1 STLR: L=0 o0=1 o2=1 + * For LDXR/LDAXR/LDAR/STLR, Rs and Rt2 are unused (encode 11111). */ + +#define AA64_LDSTEX_FAMILY_MATCH 0x08000000u +/* bits 29:24 (=001000). */ +#define AA64_LDSTEX_FAMILY_MASK 0x3F000000u + +typedef struct AA64LdStEx { + u32 size, o2, L, o1, Rs, o0, Rt2, Rn, Rt; +} AA64LdStEx; + +static inline u32 aa64_ldstex_pack(AA64LdStEx f) { + return ((f.size & 3u) << 30) | AA64_LDSTEX_FAMILY_MATCH | + ((f.o2 & 1u) << 23) | ((f.L & 1u) << 22) | ((f.o1 & 1u) << 21) | + ((f.Rs & 0x1fu) << 16) | ((f.o0 & 1u) << 15) | ((f.Rt2 & 0x1fu) << 10) | + ((f.Rn & 0x1fu) << 5) | (f.Rt & 0x1fu); +} + +static inline AA64LdStEx aa64_ldstex_unpack(u32 w) { + AA64LdStEx f; + f.size = (w >> 30) & 3u; + f.o2 = (w >> 23) & 1u; + f.L = (w >> 22) & 1u; + f.o1 = (w >> 21) & 1u; + f.Rs = (w >> 16) & 0x1fu; + f.o0 = (w >> 15) & 1u; + f.Rt2 = (w >> 10) & 0x1fu; + f.Rn = (w >> 5) & 0x1fu; + f.Rt = w & 0x1fu; + return f; +} + +/* ==================================================================== + * Compare and swap (CAS / CASA / CASL / CASAL + b/h variants, LSE). + * size(2) 001000 1 L(1) 1 Rs(5) o0(1) 11111 Rn(5) Rt(5) + * 31..30 29..23 . 22 . 20..16 15 14..10 9..5 4..0 + * + * CAS: L=0 o0=0 CASA: L=1 o0=0 + * CASL: L=0 o0=1 CASAL: L=1 o0=1 + * Rt2 (bits[14:10]) is fixed at 11111. */ + +#define AA64_CAS_FAMILY_MATCH 0x08a07c00u +/* bits 29:24 (=001000), 23 (=1), 21 (=1), 14:10 (=11111). */ +#define AA64_CAS_FAMILY_MASK 0x3Fa0fc00u + +typedef struct AA64Cas { + u32 size, L, Rs, o0, Rn, Rt; +} AA64Cas; + +static inline u32 aa64_cas_pack(AA64Cas f) { + return ((f.size & 3u) << 30) | AA64_CAS_FAMILY_MATCH | ((f.L & 1u) << 22) | + ((f.Rs & 0x1fu) << 16) | ((f.o0 & 1u) << 15) | ((f.Rn & 0x1fu) << 5) | + (f.Rt & 0x1fu); +} + +static inline AA64Cas aa64_cas_unpack(u32 w) { + AA64Cas f; + f.size = (w >> 30) & 3u; + f.L = (w >> 22) & 1u; + f.Rs = (w >> 16) & 0x1fu; + f.o0 = (w >> 15) & 1u; + f.Rn = (w >> 5) & 0x1fu; + f.Rt = w & 0x1fu; + return f; +} + +/* ==================================================================== + * LSE atomic memory operations (SWP / LDADD / LDCLR / LDEOR / LDSET + + * acquire/release variants and b/h widths). + * size(2) 111 V(1) 00 A(1) R(1) 1 Rs(5) o3(1) opc(3) 00 Rn(5) Rt(5) + * 31..30 29..27 26 25..24 23 22 21 20..16 15 14..12 11..10 9..5 4..0 + * + * A=acquire (a-suffix), R=release (l-suffix). o3=1 selects SWP (opc=000); + * o3=0 with opc in {000=LDADD,001=LDCLR,010=LDEOR,011=LDSET}. */ + +#define AA64_LSE_ATOMIC_FAMILY_MATCH 0x38200000u +/* bits 29:27 (=111), 25:24 (=00), 21 (=1), 11:10 (=00). */ +#define AA64_LSE_ATOMIC_FAMILY_MASK 0x3B200C00u + +#define AA64_LSE_OPC_LDADD 0u +#define AA64_LSE_OPC_LDCLR 1u +#define AA64_LSE_OPC_LDEOR 2u +#define AA64_LSE_OPC_LDSET 3u +#define AA64_LSE_OPC_SWP 0u /* paired with o3=1 */ + +typedef struct AA64LseAtomic { + u32 size, A, R, Rs, o3, opc, Rn, Rt; +} AA64LseAtomic; + +static inline u32 aa64_lse_atomic_pack(AA64LseAtomic f) { + return ((f.size & 3u) << 30) | AA64_LSE_ATOMIC_FAMILY_MATCH | + ((f.A & 1u) << 23) | ((f.R & 1u) << 22) | ((f.Rs & 0x1fu) << 16) | + ((f.o3 & 1u) << 15) | ((f.opc & 7u) << 12) | ((f.Rn & 0x1fu) << 5) | + (f.Rt & 0x1fu); +} + +static inline AA64LseAtomic aa64_lse_atomic_unpack(u32 w) { + AA64LseAtomic f; + f.size = (w >> 30) & 3u; + f.A = (w >> 23) & 1u; + f.R = (w >> 22) & 1u; + f.Rs = (w >> 16) & 0x1fu; + f.o3 = (w >> 15) & 1u; + f.opc = (w >> 12) & 7u; + f.Rn = (w >> 5) & 0x1fu; + f.Rt = w & 0x1fu; + return f; +} + +/* ==================================================================== * Unconditional branch (immediate) — B / BL * op(1) 00101 imm26(26) * 31 30..26 25..0 diff --git a/test/asm/encode/aa64_compare_and_swap.expected.hex b/test/asm/encode/aa64_compare_and_swap.expected.hex @@ -0,0 +1 @@ +417ca088417ca0c8417ce088417ce0c841fca08841fce08841fce0c8417ca008417ce00841fca00841fce008417ca048417ce04841fca04841fce048 diff --git a/test/asm/encode/aa64_compare_and_swap.s b/test/asm/encode/aa64_compare_and_swap.s @@ -0,0 +1,16 @@ + .text + cas w0, w1, [x2] + cas x0, x1, [x2] + casa w0, w1, [x2] + casa x0, x1, [x2] + casl w0, w1, [x2] + casal w0, w1, [x2] + casal x0, x1, [x2] + casb w0, w1, [x2] + casab w0, w1, [x2] + caslb w0, w1, [x2] + casalb w0, w1, [x2] + cash w0, w1, [x2] + casah w0, w1, [x2] + caslh w0, w1, [x2] + casalh w0, w1, [x2] diff --git a/test/asm/encode/aa64_compare_and_swap.targets b/test/asm/encode/aa64_compare_and_swap.targets @@ -0,0 +1 @@ +aa64 diff --git a/test/asm/encode/aa64_exclusive_load_store.expected.hex b/test/asm/encode/aa64_exclusive_load_store.expected.hex @@ -0,0 +1 @@ +207c5f88207c5fc8207c5f08207c5f48207c0288207c02c8207c0208207c024820fc5f8820fc5fc820fc5f0820fc5f4820fc028820fc02c820fc020820fc0248 diff --git a/test/asm/encode/aa64_exclusive_load_store.s b/test/asm/encode/aa64_exclusive_load_store.s @@ -0,0 +1,17 @@ + .text + ldxr w0, [x1] + ldxr x0, [x1] + ldxrb w0, [x1] + ldxrh w0, [x1] + stxr w2, w0, [x1] + stxr w2, x0, [x1] + stxrb w2, w0, [x1] + stxrh w2, w0, [x1] + ldaxr w0, [x1] + ldaxr x0, [x1] + ldaxrb w0, [x1] + ldaxrh w0, [x1] + stlxr w2, w0, [x1] + stlxr w2, x0, [x1] + stlxrb w2, w0, [x1] + stlxrh w2, w0, [x1] diff --git a/test/asm/encode/aa64_exclusive_load_store.targets b/test/asm/encode/aa64_exclusive_load_store.targets @@ -0,0 +1 @@ +aa64 diff --git a/test/asm/encode/aa64_ldst_pre_post_index.expected.hex b/test/asm/encode/aa64_ldst_pre_post_index.expected.hex @@ -0,0 +1 @@ +208c40f8208440f8200c10f820f40ff8204c40b820c45fb8204c00b8201c40382014403820fc1f38202c407820240078201c80382014c038202c80782024c078204480b8204c80b8 diff --git a/test/asm/encode/aa64_ldst_pre_post_index.s b/test/asm/encode/aa64_ldst_pre_post_index.s @@ -0,0 +1,19 @@ + .text + ldr x0, [x1, #8]! + ldr x0, [x1], #8 + str x0, [x1, #-256]! + str x0, [x1], #255 + ldr w0, [x1, #4]! + ldr w0, [x1], #-4 + str w0, [x1, #4]! + ldrb w0, [x1, #1]! + ldrb w0, [x1], #1 + strb w0, [x1, #-1]! + ldrh w0, [x1, #2]! + strh w0, [x1], #2 + ldrsb x0, [x1, #1]! + ldrsb w0, [x1], #1 + ldrsh x0, [x1, #2]! + ldrsh w0, [x1], #2 + ldrsw x0, [x1], #4 + ldrsw x0, [x1, #4]! diff --git a/test/asm/encode/aa64_ldst_pre_post_index.targets b/test/asm/encode/aa64_ldst_pre_post_index.targets @@ -0,0 +1 @@ +aa64 diff --git a/test/asm/encode/aa64_ldst_regoff.expected.hex b/test/asm/encode/aa64_ldst_regoff.expected.hex @@ -0,0 +1 @@ +206862f8207862f8836825f8837825f8206862b8207862b8207822b820c862f820d862f8205862f820f862f82068623820c86238206822382068627820786278207822782068a2382068e2382078a2782078e2782078a2b820d8a2b8 diff --git a/test/asm/encode/aa64_ldst_regoff.s b/test/asm/encode/aa64_ldst_regoff.s @@ -0,0 +1,24 @@ + .text + ldr x0, [x1, x2] + ldr x0, [x1, x2, lsl #3] + str x3, [x4, x5] + str x3, [x4, x5, lsl #3] + ldr w0, [x1, x2] + ldr w0, [x1, x2, lsl #2] + str w0, [x1, x2, lsl #2] + ldr x0, [x1, w2, sxtw] + ldr x0, [x1, w2, sxtw #3] + ldr x0, [x1, w2, uxtw #3] + ldr x0, [x1, x2, sxtx #3] + ldrb w0, [x1, x2] + ldrb w0, [x1, w2, sxtw] + strb w0, [x1, x2] + ldrh w0, [x1, x2] + ldrh w0, [x1, x2, lsl #1] + strh w0, [x1, x2, lsl #1] + ldrsb x0, [x1, x2] + ldrsb w0, [x1, x2] + ldrsh x0, [x1, x2, lsl #1] + ldrsh w0, [x1, x2, lsl #1] + ldrsw x0, [x1, x2, lsl #2] + ldrsw x0, [x1, w2, sxtw #2] diff --git a/test/asm/encode/aa64_ldst_regoff.targets b/test/asm/encode/aa64_ldst_regoff.targets @@ -0,0 +1 @@ +aa64 diff --git a/test/asm/encode/aa64_load_acquire_store_release.expected.hex b/test/asm/encode/aa64_load_acquire_store_release.expected.hex @@ -0,0 +1 @@ +20fcdf8820fcdfc820fcdf0820fcdf4820fc9f8820fc9fc820fc9f0820fc9f48 diff --git a/test/asm/encode/aa64_load_acquire_store_release.s b/test/asm/encode/aa64_load_acquire_store_release.s @@ -0,0 +1,9 @@ + .text + ldar w0, [x1] + ldar x0, [x1] + ldarb w0, [x1] + ldarh w0, [x1] + stlr w0, [x1] + stlr x0, [x1] + stlrb w0, [x1] + stlrh w0, [x1] diff --git a/test/asm/encode/aa64_load_acquire_store_release.targets b/test/asm/encode/aa64_load_acquire_store_release.targets @@ -0,0 +1 @@ +aa64 diff --git a/test/asm/encode/aa64_lse_atomics.expected.hex b/test/asm/encode/aa64_lse_atomics.expected.hex @@ -0,0 +1 @@ +418020b8418020f84180a0b8418060b84180e0b84180203841802078410020b8410020f84100a0b8410060b84100e0b84100203841002078411020b8411060b8412020b84120e0b8413020b84130a0f84130203841102078 diff --git a/test/asm/encode/aa64_lse_atomics.s b/test/asm/encode/aa64_lse_atomics.s @@ -0,0 +1,23 @@ + .text + swp w0, w1, [x2] + swp x0, x1, [x2] + swpa w0, w1, [x2] + swpl w0, w1, [x2] + swpal w0, w1, [x2] + swpb w0, w1, [x2] + swph w0, w1, [x2] + ldadd w0, w1, [x2] + ldadd x0, x1, [x2] + ldadda w0, w1, [x2] + ldaddl w0, w1, [x2] + ldaddal w0, w1, [x2] + ldaddb w0, w1, [x2] + ldaddh w0, w1, [x2] + ldclr w0, w1, [x2] + ldclrl w0, w1, [x2] + ldeor w0, w1, [x2] + ldeoral w0, w1, [x2] + ldset w0, w1, [x2] + ldseta x0, x1, [x2] + ldsetb w0, w1, [x2] + ldclrh w0, w1, [x2] diff --git a/test/asm/encode/aa64_lse_atomics.targets b/test/asm/encode/aa64_lse_atomics.targets @@ -0,0 +1 @@ +aa64 diff --git a/test/asm/encode/aa64_mov_orr_bitmask.expected.hex b/test/asm/encode/aa64_mov_orr_bitmask.expected.hex @@ -0,0 +1 @@ +e1c304b2e3f301b2e4cf0432e01f80d2 diff --git a/test/asm/encode/aa64_mov_orr_bitmask.s b/test/asm/encode/aa64_mov_orr_bitmask.s @@ -0,0 +1,5 @@ + .text + mov x1, #0x1010101010101010 + mov x3, #0xaaaaaaaaaaaaaaaa + mov w4, #0xf0f0f0f0 + mov x0, #0xff diff --git a/test/asm/encode/aa64_mov_orr_bitmask.targets b/test/asm/encode/aa64_mov_orr_bitmask.targets @@ -0,0 +1 @@ +aa64