commit 3d4a605868477322720da30bfb1ce9e6abbcecb1
parent bcb244c97bd9a962f8f7fb969f770d7e8d00a195
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 29 Apr 2026 23:22:43 -0700
aarch64: support large ADD/SUB immediates in p1_addi and p1_enter
aa64_add_imm / aa64_sub_imm encode a 12-bit immediate and silently
truncate values > 4095. Functions with large stack frames (e.g.
cc__next_nomacro1) produced a frame smaller than requested, corrupting
later stack-slot accesses.
Add aa64_{add,sub}_imm_lsl12 (shift-12 variants) and
aa64_{add,sub}_imm_any that dispatch to lsl12, plain, or lsl12+plain
depending on the immediate's magnitude. Wire p1_addi and %p1_enter
through the _any helpers.
Regression: tests/p1/large-addi.P1pp.
Diffstat:
3 files changed, 68 insertions(+), 4 deletions(-)
diff --git a/P1/P1-aarch64.M1pp b/P1/P1-aarch64.M1pp
@@ -161,6 +161,32 @@
%((| 0xD1000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
%endm
+%macro aa64_add_imm_lsl12(rd, ra, imm12)
+%((| 0x91400000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%endm
+
+%macro aa64_sub_imm_lsl12(rd, ra, imm12)
+%((| 0xD1400000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%endm
+
+%macro aa64_add_imm_any(rd, ra, imm)
+%select((<= imm 4095),
+ %aa64_add_imm(rd, ra, imm),
+ %select((= (& imm 0xFFF) 0),
+ %aa64_add_imm_lsl12(rd, ra, (>> imm 12)),
+ %aa64_add_imm_lsl12(rd, ra, (>> imm 12))
+ %aa64_add_imm(rd, rd, (& imm 0xFFF))))
+%endm
+
+%macro aa64_sub_imm_any(rd, ra, imm)
+%select((<= imm 4095),
+ %aa64_sub_imm(rd, ra, imm),
+ %select((= (& imm 0xFFF) 0),
+ %aa64_sub_imm_lsl12(rd, ra, (>> imm 12)),
+ %aa64_sub_imm_lsl12(rd, ra, (>> imm 12))
+ %aa64_sub_imm(rd, rd, (& imm 0xFFF))))
+%endm
+
%macro aa64_mov_rr(dst, src)
%select((= %aa64_is_sp(dst) 1),
%aa64_add_imm(sp, src, 0),
@@ -379,8 +405,8 @@ $(imm)
%macro p1_addi(rd, ra, imm)
%select((>= imm 0),
- %aa64_add_imm(rd, ra, imm),
- %aa64_sub_imm(rd, ra, (- 0 imm)))
+ %aa64_add_imm_any(rd, ra, imm),
+ %aa64_sub_imm_any(rd, ra, (- 0 imm)))
%endm
%macro p1_logi_ANDI(rd, ra, imm)
@@ -498,9 +524,9 @@ $(imm)
%endm
%macro p1_enter(size)
-%aa64_sub_imm(sp, sp, (& (+ (+ 16 size) 15) -16))
+%aa64_sub_imm_any(sp, sp, (& (+ (+ 16 size) 15) -16))
%aa64_mem(ST, lr, sp, 0)
-%aa64_add_imm(x8, sp, (& (+ (+ 16 size) 15) -16))
+%aa64_add_imm_any(x8, sp, (& (+ (+ 16 size) 15) -16))
%aa64_mem(ST, x8, sp, 8)
%endm
diff --git a/tests/P1/large-addi.P1pp b/tests/P1/large-addi.P1pp
@@ -0,0 +1,37 @@
+:p1_main
+ %enter(0)
+ %li(t0, 0)
+ %addi(t0, t0, 5000)
+ %li(t1, 5000)
+ %beq(t0, t1, &ok)
+ %la(a0, &bad_msg)
+ %li(a1, 4)
+ %call(&write_stdout)
+ %li(a0, 1)
+ %eret
+
+:ok
+ %la(a0, &ok_msg)
+ %li(a1, 3)
+ %call(&write_stdout)
+ %li(a0, 0)
+ %eret
+
+:write_stdout
+ %enter(0)
+ %mov(a2, a0)
+ %mov(a3, a1)
+ %li(a0, %sys_write)
+ %li(a1, 1)
+ %syscall
+ %eret
+
+:ok_msg
+"OK
+"
+
+:bad_msg
+"BAD
+"
+
+:ELF_end
diff --git a/tests/P1/large-addi.expected b/tests/P1/large-addi.expected
@@ -0,0 +1 @@
+OK