kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 89ec3480b48b5f7c95293b8df207700f11b3e7f0
parent 3c940a4af437a98fd7b190e07c0d1c2136e3ddda
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 20:20:50 -0700

aa64: decode load/store exclusive + acquire/release atomics

Adversarial round-trip of _Atomic RMW on a global surfaced a decode gap:
the disassembler rendered ldaxr/stlxr/ldar (the exclusive-monitor sequence
codegen emits for atomics) as .inst, so cc -S dropped them and the
round-trip lost the atomic ops entirely. The assembler already encoded the
whole family; only the decode side was missing.

Add AA64_FMT_LDST_EXCL + print_ldst_excl and 14 table rows covering
ldxr/ldaxr/ldar/stxr/stlxr/stlr (Wt+Xt via a size-bit-free mask) and the
byte/half ldaxrb/ldaxrh/ldarb/ldarh/stlxrb/stlxrh/stlrb/stlrh. The printer
keys register width on size and the operand shape (Ws,Rt,[Xn] vs Rt,[Xn])
on L/o2.

Atomics were the one core-op family the corpus fan-out missed; add
roundtrip/atomic_{rmw,cas,ops} and decode case aa64_ldst_excl. Round-trip
aa64 852/0/1; asm/ISA-unit/inline green.

Diffstat:
Msrc/arch/aa64/isa.c | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/aa64/isa.h | 2++
Atest/asm/decode/aa64_ldst_excl.expected.txt | 20++++++++++++++++++++
Atest/asm/decode/aa64_ldst_excl.hex | 1+
Atest/asm/decode/aa64_ldst_excl.targets | 1+
Atest/asm/roundtrip/atomic_cas.c | 9+++++++++
Atest/asm/roundtrip/atomic_cas.expected | 1+
Atest/asm/roundtrip/atomic_ops.c | 10++++++++++
Atest/asm/roundtrip/atomic_ops.expected | 1+
Atest/asm/roundtrip/atomic_rmw.c | 10++++++++++
Atest/asm/roundtrip/atomic_rmw.expected | 1+
11 files changed, 105 insertions(+), 0 deletions(-)

diff --git a/src/arch/aa64/isa.c b/src/arch/aa64/isa.c @@ -310,6 +310,27 @@ const AA64InsnDesc aa64_insn_table[] = { AA64_ASMFL_SF1, {0, 0}}, + /* ----- Load/store exclusive + acquire/release ordered ----- + * Family bits[29:24]=001000, o1[21]=0 (single register). The word/dword + * mnemonics leave size bit30 free (mask 0xBFE08000) so one row decodes + * both Wt and Xt; the byte/half mnemonics pin the full size (0xFFE08000). + * print_ldst_excl keys the register width on size and the operand shape on + * L[22]/o2[23]. */ + {MN("ldxr"), 0x88400000u, 0xBFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + {MN("ldaxr"), 0x88408000u, 0xBFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + {MN("ldar"), 0x88C08000u, 0xBFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + {MN("stxr"), 0x88000000u, 0xBFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + {MN("stlxr"), 0x88008000u, 0xBFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + {MN("stlr"), 0x88808000u, 0xBFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + {MN("ldaxrb"), 0x08408000u, 0xFFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + {MN("ldaxrh"), 0x48408000u, 0xFFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + {MN("ldarb"), 0x08C08000u, 0xFFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + {MN("ldarh"), 0x48C08000u, 0xFFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + {MN("stlxrb"), 0x08008000u, 0xFFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + {MN("stlxrh"), 0x48008000u, 0xFFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + {MN("stlrb"), 0x08808000u, 0xFFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + {MN("stlrh"), 0x48808000u, 0xFFE08000u, AA64_FMT_LDST_EXCL, 0, {0, 0}}, + /* ----- Load/store pair, pre-indexed (opc=10 X / opc=01 D) ----- */ {MN("stp"), 0xA9800000u, @@ -739,6 +760,31 @@ static void print_ldst_simm9(StrBuf* sb, u32 w, const AA64InsnDesc* d) { strbuf_putc(sb, ']'); } +/* Load/store exclusive (LDXR/LDAXR/STXR/STLXR) and acquire/release ordered + * (LDAR/STLR), incl. byte/half variants. Fields: size[31:30] picks the + * transfer register width (Wt for byte/half/word, Xt for dword); o2[23] and + * L[22] select the form; Rs[20:16] is the store-exclusive status register + * (Ws). A store-exclusive (L=0, o2=0) prints `Ws, Rt, [Xn]`; everything else + * (loads + store-release) prints `Rt, [Xn]`. */ +static void print_ldst_excl(StrBuf* sb, u32 w) { + u32 size = (w >> 30) & 3u; + u32 o2 = (w >> 23) & 1u; + u32 L = (w >> 22) & 1u; + u32 Rs = (w >> 16) & 0x1fu; + u32 Rn = (w >> 5) & 0x1fu; + u32 Rt = w & 0x1fu; + int sf = (size == 3u); + int store_excl = (L == 0u && o2 == 0u); + if (store_excl) { + emit_reg(sb, Rs, /*sf=*/0, 0); /* Ws: status result, always 32-bit */ + strbuf_puts(sb, ", "); + } + emit_reg(sb, Rt, sf, 0); + strbuf_puts(sb, ", ["); + emit_reg(sb, Rn, /*sf=*/1, 1); + strbuf_putc(sb, ']'); +} + static void print_ldstp_common(StrBuf* sb, AA64LdStPPre f, int pre) { /* opc=10 → 64-bit X; opc=00 → 32-bit W; opc=01 (V=1) → D (FP); * opc=00 (V=1) → S; opc=10 (V=1) → Q (not yet emitted). */ @@ -1081,6 +1127,9 @@ void aa64_print_operands(StrBuf* sb, const AA64InsnDesc* desc, u32 word, case AA64_FMT_LDST_SIMM9: print_ldst_simm9(sb, word, desc); break; + case AA64_FMT_LDST_EXCL: + print_ldst_excl(sb, word); + break; case AA64_FMT_BR_IMM: print_br_imm(sb, word, vaddr); break; diff --git a/src/arch/aa64/isa.h b/src/arch/aa64/isa.h @@ -66,6 +66,8 @@ typedef enum AA64Format { AA64_FMT_FP_CVT, /* FP precision convert (FCVT single<->double) */ AA64_FMT_FP_INT_CVT, /* FP<->int convert + FMOV gpr<->fp * (SCVTF/UCVTF/FCVTZS/FCVTZU/FMOV) */ + AA64_FMT_LDST_EXCL, /* load/store exclusive + acquire/release ordered + * (LDXR/LDAXR/STXR/STLXR/LDAR/STLR + b/h) */ } AA64Format; /* ---- AsmFlags column on AA64InsnDesc ---- diff --git a/test/asm/decode/aa64_ldst_excl.expected.txt b/test/asm/decode/aa64_ldst_excl.expected.txt @@ -0,0 +1,20 @@ +0: ldxr w0, [x1] +4: ldaxr w2, [x3] +8: ldar w4, [x5] +c: stxr w6, w7, [x8] +10: stlxr w9, w10, [x11] +14: stlr w12, [x13] +18: ldxr x14, [x15] +1c: ldaxr x16, [x17] +20: ldar x18, [x19] +24: stxr w20, x21, [x22] +28: stlxr w23, x24, [x25] +2c: stlr x26, [x27] +30: ldaxrb w0, [x1] +34: ldaxrh w2, [x3] +38: ldarb w4, [x5] +3c: ldarh w6, [x7] +40: stlxrb w8, w9, [x10] +44: stlxrh w11, w12, [x13] +48: stlrb w14, [x15] +4c: stlrh w16, [x17] diff --git a/test/asm/decode/aa64_ldst_excl.hex b/test/asm/decode/aa64_ldst_excl.hex @@ -0,0 +1 @@ +207c5f8862fc5f88a4fcdf88077d06886afd0988acfd9f88ee7d5fc830fe5fc872fedfc8d57e14c838ff17c87aff9fc820fc5f0862fc5f48a4fcdf08e6fcdf4849fd0808acfd0b48eefd9f0830fe9f48 diff --git a/test/asm/decode/aa64_ldst_excl.targets b/test/asm/decode/aa64_ldst_excl.targets @@ -0,0 +1 @@ +aa64 diff --git a/test/asm/roundtrip/atomic_cas.c b/test/asm/roundtrip/atomic_cas.c @@ -0,0 +1,9 @@ +/* Atomic compare-exchange on a global: the ldaxr/stlxr CAS loop plus the + * branch on success. Exit: g becomes 42. */ +_Atomic int g = 40; +int test_main(void) { + int expected = 40; + __atomic_compare_exchange_n(&g, &expected, 42, 0, __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST); + return __atomic_load_n(&g, __ATOMIC_SEQ_CST); +} diff --git a/test/asm/roundtrip/atomic_cas.expected b/test/asm/roundtrip/atomic_cas.expected @@ -0,0 +1 @@ +42 diff --git a/test/asm/roundtrip/atomic_ops.c b/test/asm/roundtrip/atomic_ops.c @@ -0,0 +1,10 @@ +/* A spread of atomic RMW operators (and/or/xor/sub) on a global, exercising + * the exclusive-monitor loop for each. Exit computed to 42. */ +_Atomic unsigned g = 0xFF; +int test_main(void) { + __atomic_fetch_and(&g, 0x3F, __ATOMIC_SEQ_CST); /* 0x3F = 63 */ + __atomic_fetch_or(&g, 0x40, __ATOMIC_SEQ_CST); /* 0x7F = 127 */ + __atomic_fetch_xor(&g, 0x01, __ATOMIC_SEQ_CST); /* 0x7E = 126 */ + __atomic_fetch_sub(&g, 84, __ATOMIC_SEQ_CST); /* 42 */ + return (int)__atomic_load_n(&g, __ATOMIC_SEQ_CST); +} diff --git a/test/asm/roundtrip/atomic_ops.expected b/test/asm/roundtrip/atomic_ops.expected @@ -0,0 +1 @@ +42 diff --git a/test/asm/roundtrip/atomic_rmw.c b/test/asm/roundtrip/atomic_rmw.c @@ -0,0 +1,10 @@ +/* Atomic read-modify-write on a file-scope _Atomic global. The address + * escapes (global), so codegen emits the real exclusive-monitor sequence + * (ldaxr/stlxr loop) plus a barrier, and the final load is ldar — the + * acquire/release/exclusive atomics the disassembler must decode and the + * round-trip re-encode. Exit: 30 + 8 + 4 = 42. */ +_Atomic int g = 30; +int test_main(void) { + __atomic_fetch_add(&g, 8, __ATOMIC_SEQ_CST); + return __atomic_load_n(&g, __ATOMIC_SEQ_CST) + 4; +} diff --git a/test/asm/roundtrip/atomic_rmw.expected b/test/asm/roundtrip/atomic_rmw.expected @@ -0,0 +1 @@ +42