boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

016-wide-imm.P1pp (4503B)


      1 # tests/p1/wide-imm.P1pp -- backend wide-immediate behavioural test.
      2 #
      3 # Each subtest exercises one P1 op with an immediate or offset that
      4 # falls outside its target instruction's small-imm window, so the
      5 # backend's "_any" fallback (materialise + R-type / address-staging)
      6 # must run for the result to be correct. Result is checked against
      7 # the expected value; "X" on any mismatch.
      8 #
      9 # Coverage map (small-imm window per arch shown for context):
     10 #                          aarch64                 riscv64
     11 #   %andi imm window       0..0xFFFF / -0x10000..  -2048..2047
     12 #   %ori  imm window       (same)                  (same)
     13 #   %addi imm window       0..0xFFFFFF             -2048..2047
     14 #   %ld/%st 8B off window  scaled 0..32760 +       -2048..2047
     15 #                          unscaled -256..255
     16 #   %lb/%sb 1B off window  unscaled -256..255 +    -2048..2047
     17 #                          scaled 0..4095
     18 # amd64 has native disp32/imm32, so no fallback runs there but the
     19 # result must still be correct.
     20 #
     21 # Offsets chosen to land outside every arch's window:
     22 #   - 8-byte LD/ST at +40000 (past aarch64 scaled imm12)
     23 #   - 1-byte LB/SB at +5000  (past aarch64 unscaled imm12)
     24 #
     25 # Buffer storage: `:buf` sits just before `:ELF_end`, so &buf is in
     26 # the BSS region the loader zero-fills past filesz (ph_memsz = 512 MB
     27 # in the seed ELF header, so 40008 bytes past &buf is safely mapped).
     28 #
     29 # Expected stdout: "ABCDEFGH\n".
     30 
     31 %fn(p1_main, 0, {
     32     # ---- A: %andi(rd, ra, 0xFFFFFFFF) on -1 -> 0xFFFFFFFF ----------------
     33     # Without the wide-andi fix: riscv64 truncates 0xFFFFFFFF to 0xFFF and
     34     # aarch64 truncates to 0xFFFF, both giving wrong masks.
     35     %li(t0, -1)
     36     %andi(t0, t0, 0xFFFFFFFF)
     37     %li(t1, 0xFFFFFFFF)
     38     %bne(t0, t1, &.fail)
     39     %la(a0, &c_a) %li(a1, 1) %call(&print)
     40 
     41     # ---- B: %ori(rd, ra, 0xDEADBEEF) on 0 -> 0xDEADBEEF ------------------
     42     %li(t0, 0)
     43     %ori(t0, t0, 0xDEADBEEF)
     44     %li(t1, 0xDEADBEEF)
     45     %bne(t0, t1, &.fail)
     46     %la(a0, &c_b) %li(a1, 1) %call(&print)
     47 
     48     # ---- C: %addi(rd, ra, 0xFFFFFFFF) on 0 -> 0xFFFFFFFF -----------------
     49     # Past aarch64's 24-bit add-imm window, past riscv64's 12-bit window,
     50     # and (critically) past amd64's signed-imm32 range: the imm32 form
     51     # would sign-extend 0xFFFFFFFF to -1 and silently subtract.
     52     %li(t0, 0)
     53     %addi(t0, t0, 0xFFFFFFFF)
     54     %li(t1, 0xFFFFFFFF)
     55     %bne(t0, t1, &.fail)
     56     %la(a0, &c_c) %li(a1, 1) %call(&print)
     57 
     58     # ---- D: %addi(rd, ra, -0xFFFFFFFF) on 0xFFFFFFFF -> 0 ----------------
     59     # Negative magnitude past every backend's small-imm window. On amd64
     60     # the imm32 form would truncate -0xFFFFFFFF (low 32 bits = 0x1)
     61     # and add 1 instead of subtracting 0xFFFFFFFF.
     62     %li(t0, 0xFFFFFFFF)
     63     %addi(t0, t0, -0xFFFFFFFF)
     64     %bnez(t0, &.fail)
     65     %la(a0, &c_d) %li(a1, 1) %call(&print)
     66 
     67     # Stage a "trap" value at &buf+0 so a wide-offset store/load that
     68     # silently masks its offset down to 0 is detected as the trap value
     69     # leaking into the wide slot.
     70     %la(s0, &buf)
     71     %li(t0, 0xDEAD)
     72     %st(t0, s0, 0)
     73 
     74     # ---- E/F: %st + %ld at offset 40000 -> roundtrip 0xCAFEBABE ----------
     75     # If the wide store silently truncates to offset 0, it overwrites the
     76     # 0xDEAD trap (rather than landing at +40000), and the subsequent
     77     # offset-0 ld below would read 0xCAFEBABE instead of 0xDEAD. If the
     78     # wide load truncates, it reads the 0xDEAD trap instead of 0xCAFEBABE.
     79     %li(t0, 0xCAFEBABE)
     80     %st(t0, s0, 40000)
     81     %ld(t1, s0, 40000)
     82     %li(t2, 0xCAFEBABE)
     83     %bne(t1, t2, &.fail)
     84     %la(a0, &c_e) %li(a1, 1) %call(&print)
     85 
     86     %ld(t1, s0, 0)
     87     %li(t2, 0xDEAD)
     88     %bne(t1, t2, &.fail)
     89     %la(a0, &c_f) %li(a1, 1) %call(&print)
     90 
     91     # Stage a 1-byte trap at &buf+1 (so it doesn't overlap the 8-byte
     92     # value already at &buf+0) before the byte-level subtest.
     93     %li(t0, 0x99)
     94     %sb(t0, s0, 1)
     95 
     96     # ---- G/H: %sb + %lb at offset 5000 -> roundtrip 0x42 -----------------
     97     %li(t0, 0x42)
     98     %sb(t0, s0, 5000)
     99     %lb(t1, s0, 5000)
    100     %li(t2, 0x42)
    101     %bne(t1, t2, &.fail)
    102     %la(a0, &c_g) %li(a1, 1) %call(&print)
    103 
    104     %lb(t1, s0, 1)
    105     %li(t2, 0x99)
    106     %bne(t1, t2, &.fail)
    107     %la(a0, &c_h) %li(a1, 1) %call(&print)
    108 
    109     %la(a0, &c_nl) %li(a1, 1) %call(&print)
    110     %li(a0, 0)
    111     %b(&.done)
    112 
    113     :.fail
    114     %la(a0, &c_x) %li(a1, 1) %call(&print)
    115     %la(a0, &c_nl) %li(a1, 1) %call(&print)
    116     %li(a0, 1)
    117     :.done
    118 })
    119 
    120 :c_a "A"
    121 :c_b "B"
    122 :c_c "C"
    123 :c_d "D"
    124 :c_e "E"
    125 :c_f "F"
    126 :c_g "G"
    127 :c_h "H"
    128 :c_x "X"
    129 :c_nl "
    130 "
    131 
    132 :buf
    133 :ELF_end