commit ba3ebe2a0a399273258207688f862f1ea92a5e0f
parent 633475291878a3af13d9e73ba962a37590ae3826
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 16:00:39 -0700
asm: fix 5 issues found by adversarial review of the session diff
- x64 (high): emit_rm_imm_store_operand wrote a 4-byte immediate for 16-bit
imm->mem stores (movw/addw $imm,mem); now emits 2 bytes under the 0x66 prefix
with trailing=2 so the (%rip) reloc addend is also correct. Byte-identical to
llvm-mc (new x64_memop_imm16_store corpus).
- aa64 (med): register-offset index shift of explicit #0 on non-byte ldst was
rejected; #0 is legal and stays unscaled (S=0), matching llvm (legal amounts
are 0 or access-size). Added lsl#0 / sxtw#0 to aa64_ldst_regoff.
- aa64 (med): :got_lo12: was accepted on stores / signed / sub-word loads,
emitting a bogus R_AARCH64_LD64_GOT_LO12_NC; now restricted to a 64-bit ldr.
- aa64 (low): STXR/STLXR (+b/h) accepted a status register aliasing the value
or base reg (ARM UNPREDICTABLE); now rejected (SP base exempted, as #31=SP).
- rv64 link (robustness): guard a ULEB128 SET/SUB reloc offset against the
section end at layout time so the variable-width apply scan starts in-bounds
on a malformed external object.
Diffstat:
8 files changed, 58 insertions(+), 8 deletions(-)
diff --git a/src/arch/aa64/asm.c b/src/arch/aa64/asm.c
@@ -1125,7 +1125,12 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size,
: 3u; /* LDRS*, 32-bit dst */
if (m.reloc_mod != AA64_RELMOD_NONE) {
/* [Xn, :lo12:sym] / [Xn, :got_lo12:sym] — unsigned-imm12 form with a zero
- * immediate; the relocation supplies the low 12 bits. */
+ * immediate; the relocation supplies the low 12 bits. :got_lo12: only
+ * applies to a 64-bit `ldr` (the GOT entry is an 8-byte pointer); llvm-mc
+ * rejects it on stores, signed loads, and sub-word loads. */
+ if (m.reloc_mod == AA64_RELMOD_GOT_LO12 &&
+ !(size == 3 && opc == AA64_LDST_OPC_LDR))
+ asm_driver_panic(d, "asm: :got_lo12: only valid on a 64-bit ldr");
u32 word = aa64_ldst_uimm_pack((AA64LdStUimm){.size = size,
.V = 0,
.opc = opc,
@@ -1142,14 +1147,16 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size,
return;
}
if (m.has_index) {
- /* Register-offset form. The S bit (scale by access size) is set
- * when an explicit shift was written; the amount must equal the
- * access log2-size. */
+ /* Register-offset form. The S bit (scale by access size) is set when an
+ * explicit shift equal to the access log2-size is written. An explicit
+ * `#0` is legal and stays unscaled (S=0); for byte access #0 == size so it
+ * sets S — matching llvm-mc, where the only legal amounts are 0 or size. */
u32 S = 0;
if (m.shift_present) {
- if (m.shift != size)
- asm_driver_panic(d, "asm: ldr/str: index shift must equal access size");
- S = 1;
+ if (m.shift == size)
+ S = 1;
+ else if (m.shift != 0)
+ asm_driver_panic(d, "asm: ldr/str: index shift must be 0 or access size");
}
u32 word = aa64_ldst_regoff_pack((AA64LdStRegOff){.size = size,
.V = 0,
@@ -1364,6 +1371,20 @@ static void p_stlr(AsmDriver* d, u32 size, const char* what) {
/* Store-exclusive with status: `<op> Ws, Wt|Xt, [Xn]` (L=0). Ws (the
* 32-bit status result) must be a W register and distinct from Rt/Rn. */
+/* Store-exclusive constraint (ARM ARM): the status register Ws must differ
+ * from the stored value Rt and from the base Rn, else the result is
+ * UNPREDICTABLE. The base is exempt when it is SP (reg #31 names SP, not the
+ * WZR the status reg would be). CAS/LSE atomics do NOT share this rule. */
+static void reject_stex_alias(AsmDriver* d, AA64Reg rs, AA64Reg rt, AA64Mem m,
+ const char* what) {
+ if (rs.num == rt.num)
+ asm_driver_panic(d, "asm: %.*s: status reg cannot be the value reg",
+ SLICE_ARG(slice_from_cstr(what)));
+ if (!m.base.is_sp && rs.num == m.base.num)
+ asm_driver_panic(d, "asm: %.*s: status reg cannot be the base reg",
+ SLICE_ARG(slice_from_cstr(what)));
+}
+
static void p_stex(AsmDriver* d, u32 size, u32 o0, const char* what) {
AA64Reg rs = parse_reg(d);
reject_sp_reg(d, rs, what);
@@ -1374,6 +1395,7 @@ static void p_stex(AsmDriver* d, u32 size, u32 o0, const char* what) {
require_gpr_width(d, rt, size, what);
expect_comma(d, what);
AA64Mem m = parse_mem_bare(d, what);
+ reject_stex_alias(d, rs, rt, m, what);
emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size,
.o2 = 0u,
.L = 0u,
@@ -1778,6 +1800,7 @@ static void p_stxr_wx(AsmDriver* d) {
require_gpr_width(d, rt, size, "stxr");
expect_comma(d, "stxr");
AA64Mem m = parse_mem_bare(d, "stxr");
+ reject_stex_alias(d, rs, rt, m, "stxr");
emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size,
.o2 = 0u,
.L = 0u,
@@ -1803,6 +1826,7 @@ static void p_stlxr_wx(AsmDriver* d) {
require_gpr_width(d, rt, size, "stlxr");
expect_comma(d, "stlxr");
AA64Mem m = parse_mem_bare(d, "stlxr");
+ reject_stex_alias(d, rs, rt, m, "stlxr");
emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size,
.o2 = 0u,
.L = 0u,
diff --git a/src/arch/x64/asm.c b/src/arch/x64/asm.c
@@ -423,6 +423,12 @@ static void emit_rm_imm_store_operand(AsmDriver* d, MCEmitter* mc, u32 size,
} else if (use_i8) {
buf[n++] = (u8)(i8)imm;
trailing = 1u;
+ } else if (size == 2u) {
+ /* 16-bit operand size: a 2-byte immediate (under the 0x66 prefix). */
+ u16 v = (u16)imm;
+ buf[n++] = (u8)v;
+ buf[n++] = (u8)(v >> 8);
+ trailing = 2u;
} else {
n += x64_put_u32le(buf + n, (u32)(i32)imm);
trailing = 4u;
diff --git a/src/link/link_reloc_layout.c b/src/link/link_reloc_layout.c
@@ -1282,6 +1282,15 @@ void link_emit_relocations(Linker* l, LinkImage* img, const LinkSymId* got_map,
if (rec.width == 0)
compiler_panic(l->c, no_loc(), "link: unsupported reloc kind %u",
(unsigned)r->kind);
+ /* The ULEB128 SET/SUB relocs are variable-width (their true length is
+ * read from the bytes at apply time, so rec.width is only a sentinel).
+ * Guard the offset against the section end here — where the size is
+ * known — so the apply-time scan starts in-bounds even for a malformed
+ * external object. */
+ if ((rec.kind == R_RV_SET_ULEB128 || rec.kind == R_RV_SUB_ULEB128) &&
+ rec.offset >= ls->size)
+ compiler_panic(l->c, no_loc(),
+ "link: ULEB128 reloc offset past section end");
*link_append_reloc_slot(img) = rec;
}
}
diff --git a/test/asm/encode/aa64_ldst_regoff.expected.hex b/test/asm/encode/aa64_ldst_regoff.expected.hex
@@ -1 +1 @@
-206862f8207862f8836825f8837825f8206862b8207862b8207822b820c862f820d862f8205862f820f862f82068623820c86238206822382068627820786278207822782068a2382068e2382078a2782078e2782078a2b820d8a2b8
+206862f820c862f8206862f8207862f8836825f8837825f8206862b8207862b8207822b820c862f820d862f8205862f820f862f82068623820c86238206822382068627820786278207822782068a2382068e2382078a2782078e2782078a2b820d8a2b8
diff --git a/test/asm/encode/aa64_ldst_regoff.s b/test/asm/encode/aa64_ldst_regoff.s
@@ -1,3 +1,5 @@
+ ldr x0,[x1,x2,lsl #0]
+ ldr x0,[x1,w2,sxtw #0]
.text
ldr x0, [x1, x2]
ldr x0, [x1, x2, lsl #3]
diff --git a/test/asm/encode/x64_memop_imm16_store.expected.hex b/test/asm/encode/x64_memop_imm16_store.expected.hex
@@ -0,0 +1 @@
+66c700341266c7430805006681013412c3
diff --git a/test/asm/encode/x64_memop_imm16_store.s b/test/asm/encode/x64_memop_imm16_store.s
@@ -0,0 +1,6 @@
+.text
+t:
+ movw $0x1234, (%rax)
+ movw $5, 8(%rbx)
+ addw $0x1234, (%rcx)
+ ret
diff --git a/test/asm/encode/x64_memop_imm16_store.targets b/test/asm/encode/x64_memop_imm16_store.targets
@@ -0,0 +1 @@
+x64
+\ No newline at end of file