p1-aliasing.P1pp (2508B)
1 # tests/p1/p1-aliasing.P1 -- regression coverage for the amd64 backend's 2 # register-aliasing corner cases. Each test exercises a native encoding 3 # that, if mishandled, clobbers a non-rD P1 register and produces a value 4 # that differs from what aarch64 / riscv64 return. The combined result 5 # is written to stdout as a single byte + newline so the harness can 6 # diff it against `*` (0x2A = decimal 42). 7 # 8 # Test 1: ANDI with imm > 127. amd64 `83 /4 ib` sign-extends its byte 9 # immediate to 64 bits, so naive `ANDI rd, 255` AND's against -1 10 # (= all ones) rather than 255. Fixed by widening to `81 /4 id` for 11 # imms outside [-128, 127]. 12 # 13 # Test 2: SHL / SHR / SAR (reg count) where rd == P1 a3 (native rcx). 14 # amd64 stages the count through rcx; the naive ordering restores rcx 15 # from its save-slot AFTER writing rd, clobbering the result when 16 # rd == a3. 17 # 18 # Test 3: DIV / REM where rd == rb == P1 a2 (native rdx). amd64's 19 # idiv writes rdx, and the save-slot restore has to happen BEFORE the 20 # mov-rd so that when rd == a2 the restore doesn't overwrite the 21 # quotient. Plus, the divisor has to be stashed before cqo overwrites 22 # rdx, or `idiv rdx` divides by the sign-extension of rax. 23 24 :p1_main 25 %enter(0) 26 27 # Test 1: ANDI imm=255. 0x2345 & 0xFF = 0x45 (69). 28 %li(a0, 0x2345) 29 %andi(a0, a0, 255) 30 %mov(s0, a0) 31 %addi(s0, s0, -61) # s0 = 8 32 33 # Test 2: SHL with rd aliasing rb (both a3), and ra == a0. 34 # a0=1, a3=3. Correct: a3 = 1 << 3 = 8. Buggy (rcx restore wins): a3 = 3. 35 %li(a0, 1) 36 %li(a3, 3) 37 %shl(a3, a0, a3) 38 %sub(s0, a3, s0) # s0 = a3 - 8. Correct: 0. Buggy: -5. 39 40 # Test 3: DIV with rd aliasing rb (both a2), and ra == a0. 41 # a0=182, a2=7. Correct: a2 = 182 / 7 = 26. Buggy (idiv uses rdx after 42 # cqo; or mov-rdx-restore clobbers quotient): a2 = 7 or 1. 43 %li(a0, 182) 44 %li(a2, 7) 45 %div(a2, a0, a2) 46 %add(s0, s0, a2) # s0 += a2. Correct: 26. Buggy: varies. 47 48 # Compose the output byte. Add 16 so the correct result maps to '*' 49 # (0x2A = 42). Wrong results land on different bytes. 50 %addi(s0, s0, 16) 51 52 # Store s0 as the first byte of msg_buf, then write msg_buf[0..2]. 53 %la(t0, &msg_buf) 54 %sb(s0, t0, 0) 55 56 %li(a0, %sys_write) 57 %li(a1, 1) 58 %la(a2, &msg_buf) 59 %li(a3, 2) 60 %syscall 61 62 %li(a0, 0) 63 %eret 64 65 # Two-byte output scratch: [0] = computed byte, [1] = newline. The space 66 # placeholder gets overwritten by SB before the write syscall. 67 :msg_buf 68 " 69 " 70 71 :ELF_end