kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit ba3ebe2a0a399273258207688f862f1ea92a5e0f
parent 633475291878a3af13d9e73ba962a37590ae3826
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 16:00:39 -0700

asm: fix 5 issues found by adversarial review of the session diff

- x64 (high): emit_rm_imm_store_operand wrote a 4-byte immediate for 16-bit
  imm->mem stores (movw/addw $imm,mem); now emits 2 bytes under the 0x66 prefix
  with trailing=2 so the (%rip) reloc addend is also correct. Byte-identical to
  llvm-mc (new x64_memop_imm16_store corpus).
- aa64 (med): register-offset index shift of explicit #0 on non-byte ldst was
  rejected; #0 is legal and stays unscaled (S=0), matching llvm (legal amounts
  are 0 or access-size). Added lsl#0 / sxtw#0 to aa64_ldst_regoff.
- aa64 (med): :got_lo12: was accepted on stores / signed / sub-word loads,
  emitting a bogus R_AARCH64_LD64_GOT_LO12_NC; now restricted to a 64-bit ldr.
- aa64 (low): STXR/STLXR (+b/h) accepted a status register aliasing the value
  or base reg (ARM UNPREDICTABLE); now rejected (SP base exempted, as #31=SP).
- rv64 link (robustness): guard a ULEB128 SET/SUB reloc offset against the
  section end at layout time so the variable-width apply scan starts in-bounds
  on a malformed external object.

Diffstat:
Msrc/arch/aa64/asm.c | 38+++++++++++++++++++++++++++++++-------
Msrc/arch/x64/asm.c | 6++++++
Msrc/link/link_reloc_layout.c | 9+++++++++
Mtest/asm/encode/aa64_ldst_regoff.expected.hex | 2+-
Mtest/asm/encode/aa64_ldst_regoff.s | 2++
Atest/asm/encode/x64_memop_imm16_store.expected.hex | 1+
Atest/asm/encode/x64_memop_imm16_store.s | 6++++++
Atest/asm/encode/x64_memop_imm16_store.targets | 2++
8 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/src/arch/aa64/asm.c b/src/arch/aa64/asm.c @@ -1125,7 +1125,12 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size, : 3u; /* LDRS*, 32-bit dst */ if (m.reloc_mod != AA64_RELMOD_NONE) { /* [Xn, :lo12:sym] / [Xn, :got_lo12:sym] — unsigned-imm12 form with a zero - * immediate; the relocation supplies the low 12 bits. */ + * immediate; the relocation supplies the low 12 bits. :got_lo12: only + * applies to a 64-bit `ldr` (the GOT entry is an 8-byte pointer); llvm-mc + * rejects it on stores, signed loads, and sub-word loads. */ + if (m.reloc_mod == AA64_RELMOD_GOT_LO12 && + !(size == 3 && opc == AA64_LDST_OPC_LDR)) + asm_driver_panic(d, "asm: :got_lo12: only valid on a 64-bit ldr"); u32 word = aa64_ldst_uimm_pack((AA64LdStUimm){.size = size, .V = 0, .opc = opc, @@ -1142,14 +1147,16 @@ static void p_ldst_core(AsmDriver* d, int is_load, int fixed_size, return; } if (m.has_index) { - /* Register-offset form. The S bit (scale by access size) is set - * when an explicit shift was written; the amount must equal the - * access log2-size. */ + /* Register-offset form. The S bit (scale by access size) is set when an + * explicit shift equal to the access log2-size is written. An explicit + * `#0` is legal and stays unscaled (S=0); for byte access #0 == size so it + * sets S — matching llvm-mc, where the only legal amounts are 0 or size. */ u32 S = 0; if (m.shift_present) { - if (m.shift != size) - asm_driver_panic(d, "asm: ldr/str: index shift must equal access size"); - S = 1; + if (m.shift == size) + S = 1; + else if (m.shift != 0) + asm_driver_panic(d, "asm: ldr/str: index shift must be 0 or access size"); } u32 word = aa64_ldst_regoff_pack((AA64LdStRegOff){.size = size, .V = 0, @@ -1364,6 +1371,20 @@ static void p_stlr(AsmDriver* d, u32 size, const char* what) { /* Store-exclusive with status: `<op> Ws, Wt|Xt, [Xn]` (L=0). Ws (the * 32-bit status result) must be a W register and distinct from Rt/Rn. */ +/* Store-exclusive constraint (ARM ARM): the status register Ws must differ + * from the stored value Rt and from the base Rn, else the result is + * UNPREDICTABLE. The base is exempt when it is SP (reg #31 names SP, not the + * WZR the status reg would be). CAS/LSE atomics do NOT share this rule. */ +static void reject_stex_alias(AsmDriver* d, AA64Reg rs, AA64Reg rt, AA64Mem m, + const char* what) { + if (rs.num == rt.num) + asm_driver_panic(d, "asm: %.*s: status reg cannot be the value reg", + SLICE_ARG(slice_from_cstr(what))); + if (!m.base.is_sp && rs.num == m.base.num) + asm_driver_panic(d, "asm: %.*s: status reg cannot be the base reg", + SLICE_ARG(slice_from_cstr(what))); +} + static void p_stex(AsmDriver* d, u32 size, u32 o0, const char* what) { AA64Reg rs = parse_reg(d); reject_sp_reg(d, rs, what); @@ -1374,6 +1395,7 @@ static void p_stex(AsmDriver* d, u32 size, u32 o0, const char* what) { require_gpr_width(d, rt, size, what); expect_comma(d, what); AA64Mem m = parse_mem_bare(d, what); + reject_stex_alias(d, rs, rt, m, what); emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size, .o2 = 0u, .L = 0u, @@ -1778,6 +1800,7 @@ static void p_stxr_wx(AsmDriver* d) { require_gpr_width(d, rt, size, "stxr"); expect_comma(d, "stxr"); AA64Mem m = parse_mem_bare(d, "stxr"); + reject_stex_alias(d, rs, rt, m, "stxr"); emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size, .o2 = 0u, .L = 0u, @@ -1803,6 +1826,7 @@ static void p_stlxr_wx(AsmDriver* d) { require_gpr_width(d, rt, size, "stlxr"); expect_comma(d, "stlxr"); AA64Mem m = parse_mem_bare(d, "stlxr"); + reject_stex_alias(d, rs, rt, m, "stlxr"); emit32(d, aa64_ldstex_pack((AA64LdStEx){.size = size, .o2 = 0u, .L = 0u, diff --git a/src/arch/x64/asm.c b/src/arch/x64/asm.c @@ -423,6 +423,12 @@ static void emit_rm_imm_store_operand(AsmDriver* d, MCEmitter* mc, u32 size, } else if (use_i8) { buf[n++] = (u8)(i8)imm; trailing = 1u; + } else if (size == 2u) { + /* 16-bit operand size: a 2-byte immediate (under the 0x66 prefix). */ + u16 v = (u16)imm; + buf[n++] = (u8)v; + buf[n++] = (u8)(v >> 8); + trailing = 2u; } else { n += x64_put_u32le(buf + n, (u32)(i32)imm); trailing = 4u; diff --git a/src/link/link_reloc_layout.c b/src/link/link_reloc_layout.c @@ -1282,6 +1282,15 @@ void link_emit_relocations(Linker* l, LinkImage* img, const LinkSymId* got_map, if (rec.width == 0) compiler_panic(l->c, no_loc(), "link: unsupported reloc kind %u", (unsigned)r->kind); + /* The ULEB128 SET/SUB relocs are variable-width (their true length is + * read from the bytes at apply time, so rec.width is only a sentinel). + * Guard the offset against the section end here — where the size is + * known — so the apply-time scan starts in-bounds even for a malformed + * external object. */ + if ((rec.kind == R_RV_SET_ULEB128 || rec.kind == R_RV_SUB_ULEB128) && + rec.offset >= ls->size) + compiler_panic(l->c, no_loc(), + "link: ULEB128 reloc offset past section end"); *link_append_reloc_slot(img) = rec; } } diff --git a/test/asm/encode/aa64_ldst_regoff.expected.hex b/test/asm/encode/aa64_ldst_regoff.expected.hex @@ -1 +1 @@ -206862f8207862f8836825f8837825f8206862b8207862b8207822b820c862f820d862f8205862f820f862f82068623820c86238206822382068627820786278207822782068a2382068e2382078a2782078e2782078a2b820d8a2b8 +206862f820c862f8206862f8207862f8836825f8837825f8206862b8207862b8207822b820c862f820d862f8205862f820f862f82068623820c86238206822382068627820786278207822782068a2382068e2382078a2782078e2782078a2b820d8a2b8 diff --git a/test/asm/encode/aa64_ldst_regoff.s b/test/asm/encode/aa64_ldst_regoff.s @@ -1,3 +1,5 @@ + ldr x0,[x1,x2,lsl #0] + ldr x0,[x1,w2,sxtw #0] .text ldr x0, [x1, x2] ldr x0, [x1, x2, lsl #3] diff --git a/test/asm/encode/x64_memop_imm16_store.expected.hex b/test/asm/encode/x64_memop_imm16_store.expected.hex @@ -0,0 +1 @@ +66c700341266c7430805006681013412c3 diff --git a/test/asm/encode/x64_memop_imm16_store.s b/test/asm/encode/x64_memop_imm16_store.s @@ -0,0 +1,6 @@ +.text +t: + movw $0x1234, (%rax) + movw $5, 8(%rbx) + addw $0x1234, (%rcx) + ret diff --git a/test/asm/encode/x64_memop_imm16_store.targets b/test/asm/encode/x64_memop_imm16_store.targets @@ -0,0 +1 @@ +x64 +\ No newline at end of file