016-wide-imm.P1pp (4503B)
1 # tests/p1/wide-imm.P1pp -- backend wide-immediate behavioural test. 2 # 3 # Each subtest exercises one P1 op with an immediate or offset that 4 # falls outside its target instruction's small-imm window, so the 5 # backend's "_any" fallback (materialise + R-type / address-staging) 6 # must run for the result to be correct. Result is checked against 7 # the expected value; "X" on any mismatch. 8 # 9 # Coverage map (small-imm window per arch shown for context): 10 # aarch64 riscv64 11 # %andi imm window 0..0xFFFF / -0x10000.. -2048..2047 12 # %ori imm window (same) (same) 13 # %addi imm window 0..0xFFFFFF -2048..2047 14 # %ld/%st 8B off window scaled 0..32760 + -2048..2047 15 # unscaled -256..255 16 # %lb/%sb 1B off window unscaled -256..255 + -2048..2047 17 # scaled 0..4095 18 # amd64 has native disp32/imm32, so no fallback runs there but the 19 # result must still be correct. 20 # 21 # Offsets chosen to land outside every arch's window: 22 # - 8-byte LD/ST at +40000 (past aarch64 scaled imm12) 23 # - 1-byte LB/SB at +5000 (past aarch64 unscaled imm12) 24 # 25 # Buffer storage: `:buf` sits just before `:ELF_end`, so &buf is in 26 # the BSS region the loader zero-fills past filesz (ph_memsz = 512 MB 27 # in the seed ELF header, so 40008 bytes past &buf is safely mapped). 28 # 29 # Expected stdout: "ABCDEFGH\n". 30 31 %fn(p1_main, 0, { 32 # ---- A: %andi(rd, ra, 0xFFFFFFFF) on -1 -> 0xFFFFFFFF ---------------- 33 # Without the wide-andi fix: riscv64 truncates 0xFFFFFFFF to 0xFFF and 34 # aarch64 truncates to 0xFFFF, both giving wrong masks. 35 %li(t0, -1) 36 %andi(t0, t0, 0xFFFFFFFF) 37 %li(t1, 0xFFFFFFFF) 38 %bne(t0, t1, &.fail) 39 %la(a0, &c_a) %li(a1, 1) %call(&print) 40 41 # ---- B: %ori(rd, ra, 0xDEADBEEF) on 0 -> 0xDEADBEEF ------------------ 42 %li(t0, 0) 43 %ori(t0, t0, 0xDEADBEEF) 44 %li(t1, 0xDEADBEEF) 45 %bne(t0, t1, &.fail) 46 %la(a0, &c_b) %li(a1, 1) %call(&print) 47 48 # ---- C: %addi(rd, ra, 0xFFFFFFFF) on 0 -> 0xFFFFFFFF ----------------- 49 # Past aarch64's 24-bit add-imm window, past riscv64's 12-bit window, 50 # and (critically) past amd64's signed-imm32 range: the imm32 form 51 # would sign-extend 0xFFFFFFFF to -1 and silently subtract. 52 %li(t0, 0) 53 %addi(t0, t0, 0xFFFFFFFF) 54 %li(t1, 0xFFFFFFFF) 55 %bne(t0, t1, &.fail) 56 %la(a0, &c_c) %li(a1, 1) %call(&print) 57 58 # ---- D: %addi(rd, ra, -0xFFFFFFFF) on 0xFFFFFFFF -> 0 ---------------- 59 # Negative magnitude past every backend's small-imm window. On amd64 60 # the imm32 form would truncate -0xFFFFFFFF (low 32 bits = 0x1) 61 # and add 1 instead of subtracting 0xFFFFFFFF. 62 %li(t0, 0xFFFFFFFF) 63 %addi(t0, t0, -0xFFFFFFFF) 64 %bnez(t0, &.fail) 65 %la(a0, &c_d) %li(a1, 1) %call(&print) 66 67 # Stage a "trap" value at &buf+0 so a wide-offset store/load that 68 # silently masks its offset down to 0 is detected as the trap value 69 # leaking into the wide slot. 70 %la(s0, &buf) 71 %li(t0, 0xDEAD) 72 %st(t0, s0, 0) 73 74 # ---- E/F: %st + %ld at offset 40000 -> roundtrip 0xCAFEBABE ---------- 75 # If the wide store silently truncates to offset 0, it overwrites the 76 # 0xDEAD trap (rather than landing at +40000), and the subsequent 77 # offset-0 ld below would read 0xCAFEBABE instead of 0xDEAD. If the 78 # wide load truncates, it reads the 0xDEAD trap instead of 0xCAFEBABE. 79 %li(t0, 0xCAFEBABE) 80 %st(t0, s0, 40000) 81 %ld(t1, s0, 40000) 82 %li(t2, 0xCAFEBABE) 83 %bne(t1, t2, &.fail) 84 %la(a0, &c_e) %li(a1, 1) %call(&print) 85 86 %ld(t1, s0, 0) 87 %li(t2, 0xDEAD) 88 %bne(t1, t2, &.fail) 89 %la(a0, &c_f) %li(a1, 1) %call(&print) 90 91 # Stage a 1-byte trap at &buf+1 (so it doesn't overlap the 8-byte 92 # value already at &buf+0) before the byte-level subtest. 93 %li(t0, 0x99) 94 %sb(t0, s0, 1) 95 96 # ---- G/H: %sb + %lb at offset 5000 -> roundtrip 0x42 ----------------- 97 %li(t0, 0x42) 98 %sb(t0, s0, 5000) 99 %lb(t1, s0, 5000) 100 %li(t2, 0x42) 101 %bne(t1, t2, &.fail) 102 %la(a0, &c_g) %li(a1, 1) %call(&print) 103 104 %lb(t1, s0, 1) 105 %li(t2, 0x99) 106 %bne(t1, t2, &.fail) 107 %la(a0, &c_h) %li(a1, 1) %call(&print) 108 109 %la(a0, &c_nl) %li(a1, 1) %call(&print) 110 %li(a0, 0) 111 %b(&.done) 112 113 :.fail 114 %la(a0, &c_x) %li(a1, 1) %call(&print) 115 %la(a0, &c_nl) %li(a1, 1) %call(&print) 116 %li(a0, 1) 117 :.done 118 }) 119 120 :c_a "A" 121 :c_b "B" 122 :c_c "C" 123 :c_d "D" 124 :c_e "E" 125 :c_f "F" 126 :c_g "G" 127 :c_h "H" 128 :c_x "X" 129 :c_nl " 130 " 131 132 :buf 133 :ELF_end