P1-aarch64.M1pp (10825B)
1 # aarch64.M1M -- P1v2 aarch64 backend expressed in m1macro. 2 # 3 # This mirrors p1/aarch64.py using the m1macro integer builtins: 4 # %(sexpr), $(sexpr), and %select(cond, then, else). 5 6 # ---- Native register numbers -------------------------------------------- 7 8 %macro aa64_reg_a0() 9 0 10 %endm 11 %macro aa64_reg_a1() 12 1 13 %endm 14 %macro aa64_reg_a2() 15 2 16 %endm 17 %macro aa64_reg_a3() 18 3 19 %endm 20 %macro aa64_reg_x4() 21 4 22 %endm 23 %macro aa64_reg_x5() 24 5 25 %endm 26 %macro aa64_reg_t0() 27 9 28 %endm 29 %macro aa64_reg_t1() 30 10 31 %endm 32 %macro aa64_reg_t2() 33 11 34 %endm 35 %macro aa64_reg_s0() 36 19 37 %endm 38 %macro aa64_reg_s1() 39 20 40 %endm 41 %macro aa64_reg_s2() 42 21 43 %endm 44 %macro aa64_reg_s3() 45 22 46 %endm 47 %macro aa64_reg_sp() 48 31 49 %endm 50 %macro aa64_reg_xzr() 51 31 52 %endm 53 %macro aa64_reg_lr() 54 30 55 %endm 56 %macro aa64_reg_br() 57 17 58 %endm 59 %macro aa64_reg_scratch() 60 16 61 %endm 62 %macro aa64_reg_x8() 63 8 64 %endm 65 %macro aa64_reg_save0() 66 23 67 %endm 68 %macro aa64_reg_save1() 69 24 70 %endm 71 %macro aa64_reg_save2() 72 25 73 %endm 74 75 %macro aa64_reg(r) 76 %aa64_reg_##r() 77 %endm 78 79 %macro aa64_is_sp_a0() 80 0 81 %endm 82 %macro aa64_is_sp_a1() 83 0 84 %endm 85 %macro aa64_is_sp_a2() 86 0 87 %endm 88 %macro aa64_is_sp_a3() 89 0 90 %endm 91 %macro aa64_is_sp_x4() 92 0 93 %endm 94 %macro aa64_is_sp_x5() 95 0 96 %endm 97 %macro aa64_is_sp_t0() 98 0 99 %endm 100 %macro aa64_is_sp_t1() 101 0 102 %endm 103 %macro aa64_is_sp_t2() 104 0 105 %endm 106 %macro aa64_is_sp_s0() 107 0 108 %endm 109 %macro aa64_is_sp_s1() 110 0 111 %endm 112 %macro aa64_is_sp_s2() 113 0 114 %endm 115 %macro aa64_is_sp_s3() 116 0 117 %endm 118 %macro aa64_is_sp_sp() 119 1 120 %endm 121 %macro aa64_is_sp_xzr() 122 0 123 %endm 124 %macro aa64_is_sp_lr() 125 0 126 %endm 127 %macro aa64_is_sp_br() 128 0 129 %endm 130 %macro aa64_is_sp_scratch() 131 0 132 %endm 133 %macro aa64_is_sp_x8() 134 0 135 %endm 136 %macro aa64_is_sp_save0() 137 0 138 %endm 139 %macro aa64_is_sp_save1() 140 0 141 %endm 142 %macro aa64_is_sp_save2() 143 0 144 %endm 145 146 %macro aa64_is_sp(r) 147 %aa64_is_sp_##r() 148 %endm 149 150 # ---- Low-level instruction encoders -------------------------------------- 151 152 %macro aa64_rrr(base, rd, ra, rb) 153 %((| base (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) 154 %endm 155 156 %macro aa64_add_imm(rd, ra, imm12) 157 %((| 0x91000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) 158 %endm 159 160 %macro aa64_sub_imm(rd, ra, imm12) 161 %((| 0xD1000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) 162 %endm 163 164 %macro aa64_mov_rr(dst, src) 165 %select((= %aa64_is_sp(dst) 1), 166 %aa64_add_imm(sp, src, 0), 167 %select((= %aa64_is_sp(src) 1), 168 %aa64_add_imm(dst, sp, 16), 169 %((| 0xAA000000 (<< %aa64_reg(src) 16) (<< 31 5) %aa64_reg(dst))))) 170 %endm 171 172 %macro aa64_ubfm(rd, ra, immr, imms) 173 %((| 0xD3400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) 174 %endm 175 176 %macro aa64_sbfm(rd, ra, immr, imms) 177 %((| 0x93400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) 178 %endm 179 180 %macro aa64_movz(rd, imm16) 181 %((| 0xD2800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd))) 182 %endm 183 184 %macro aa64_movn(rd, imm16) 185 %((| 0x92800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd))) 186 %endm 187 188 %macro aa64_materialize_small_imm(rd, imm) 189 %select((>= imm 0), 190 %aa64_movz(rd, imm), 191 %aa64_movn(rd, (& (~ imm) 0xFFFF))) 192 %endm 193 194 %macro aa64_ldst_uimm12(base, rt, rn, off_bytes, size_log2) 195 %((| base (<< (>> off_bytes size_log2) 10) (<< %aa64_reg(rn) 5) %aa64_reg(rt))) 196 %endm 197 198 %macro aa64_ldst_unscaled(base, rt, rn, off) 199 %((| base (<< (& off 0x1FF) 12) (<< %aa64_reg(rn) 5) %aa64_reg(rt))) 200 %endm 201 202 %macro aa64_mem_uimm_base_LD() 203 0xF9400000 204 %endm 205 %macro aa64_mem_uimm_base_ST() 206 0xF9000000 207 %endm 208 %macro aa64_mem_uimm_base_LB() 209 0x39400000 210 %endm 211 %macro aa64_mem_uimm_base_SB() 212 0x39000000 213 %endm 214 215 %macro aa64_mem_unscaled_base_LD() 216 0xF8400000 217 %endm 218 %macro aa64_mem_unscaled_base_ST() 219 0xF8000000 220 %endm 221 %macro aa64_mem_unscaled_base_LB() 222 0x38400000 223 %endm 224 %macro aa64_mem_unscaled_base_SB() 225 0x38000000 226 %endm 227 228 %macro aa64_mem_size_LD() 229 3 230 %endm 231 %macro aa64_mem_size_ST() 232 3 233 %endm 234 %macro aa64_mem_size_LB() 235 0 236 %endm 237 %macro aa64_mem_size_SB() 238 0 239 %endm 240 241 %macro aa64_mem_uimm_base(op) 242 %aa64_mem_uimm_base_##op() 243 %endm 244 245 %macro aa64_mem_unscaled_base(op) 246 %aa64_mem_unscaled_base_##op() 247 %endm 248 249 %macro aa64_mem_size(op) 250 %aa64_mem_size_##op() 251 %endm 252 253 %macro aa64_mem_fallback(op, rt, rn, off) 254 %select((>= off 0), 255 %aa64_add_imm(scratch, rn, off) 256 %aa64_ldst_uimm12(%aa64_mem_uimm_base(op), rt, scratch, 0, %aa64_mem_size(op)), 257 %aa64_sub_imm(scratch, rn, (- 0 off)) 258 %aa64_ldst_uimm12(%aa64_mem_uimm_base(op), rt, scratch, 0, %aa64_mem_size(op))) 259 %endm 260 261 %macro aa64_mem_after_uimm(op, rt, rn, off) 262 %select((>= off -256), 263 %select((<= off 255), 264 %aa64_ldst_unscaled(%aa64_mem_unscaled_base(op), rt, rn, off), 265 %aa64_mem_fallback(op, rt, rn, off)), 266 %aa64_mem_fallback(op, rt, rn, off)) 267 %endm 268 269 %macro aa64_mem_after_nonneg(op, rt, rn, off) 270 %select((= (& off (- (<< 1 %aa64_mem_size(op)) 1)) 0), 271 %select((< off (<< 4096 %aa64_mem_size(op))), 272 %aa64_ldst_uimm12(%aa64_mem_uimm_base(op), rt, rn, off, %aa64_mem_size(op)), 273 %aa64_mem_after_uimm(op, rt, rn, off)), 274 %aa64_mem_after_uimm(op, rt, rn, off)) 275 %endm 276 277 %macro aa64_mem(op, rt, rn, off) 278 %select((>= off 0), 279 %aa64_mem_after_nonneg(op, rt, rn, off), 280 %aa64_mem_after_uimm(op, rt, rn, off)) 281 %endm 282 283 %macro aa64_cmp_skip(cond, ra, rb) 284 %((| 0xEB000000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) 31)) 285 %((| 0x54000040 cond)) 286 %endm 287 288 %macro aa64_br(reg) 289 %((| 0xD61F0000 (<< %aa64_reg(reg) 5))) 290 %endm 291 292 %macro aa64_blr(reg) 293 %((| 0xD63F0000 (<< %aa64_reg(reg) 5))) 294 %endm 295 296 %macro aa64_ret() 297 %(0xD65F03C0) 298 %endm 299 300 %macro aa64_lit64_prefix(rd) 301 # 64-bit literal-pool prefix for LI: ldr xN, [pc,#8]; b PC+12. 302 # The 8 bytes that follow in source become the literal; b skips them. 303 %((| 0x58000040 %aa64_reg(rd))) 304 %(0x14000003) 305 %endm 306 307 %macro aa64_lit32_prefix(rd) 308 # 32-bit literal-pool prefix for LA / LA_BR: ldr wN, [pc,#8]; b PC+8. 309 # ldr w zero-extends into the full 64-bit register, so a 4-byte literal 310 # is enough for any address in the stage0 layout. Lets source use 311 # `&label` directly without padding to 8 bytes. 312 %((| 0x18000040 %aa64_reg(rd))) 313 %(0x14000002) 314 %endm 315 316 # ---- P1 register-register op lowering ----------------------------------- 317 318 %macro aa64_rrr_ADD(rd, ra, rb) 319 %aa64_rrr(0x8B000000, rd, ra, rb) 320 %endm 321 %macro aa64_rrr_SUB(rd, ra, rb) 322 %aa64_rrr(0xCB000000, rd, ra, rb) 323 %endm 324 %macro aa64_rrr_AND(rd, ra, rb) 325 %aa64_rrr(0x8A000000, rd, ra, rb) 326 %endm 327 %macro aa64_rrr_OR(rd, ra, rb) 328 %aa64_rrr(0xAA000000, rd, ra, rb) 329 %endm 330 %macro aa64_rrr_XOR(rd, ra, rb) 331 %aa64_rrr(0xCA000000, rd, ra, rb) 332 %endm 333 %macro aa64_rrr_SHL(rd, ra, rb) 334 %aa64_rrr(0x9AC02000, rd, ra, rb) 335 %endm 336 %macro aa64_rrr_SHR(rd, ra, rb) 337 %aa64_rrr(0x9AC02400, rd, ra, rb) 338 %endm 339 %macro aa64_rrr_SAR(rd, ra, rb) 340 %aa64_rrr(0x9AC02800, rd, ra, rb) 341 %endm 342 %macro aa64_rrr_DIV(rd, ra, rb) 343 %aa64_rrr(0x9AC00C00, rd, ra, rb) 344 %endm 345 %macro aa64_rrr_MUL(rd, ra, rb) 346 %((| 0x9B000000 (<< %aa64_reg(rb) 16) (<< 31 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) 347 %endm 348 %macro aa64_rrr_REM(rd, ra, rb) 349 %((| 0x9AC00C00 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(scratch))) 350 %((| 0x9B008000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 10) (<< %aa64_reg(scratch) 5) %aa64_reg(rd))) 351 %endm 352 353 %macro aa64_rrr_op(op, rd, ra, rb) 354 %aa64_rrr_##op(rd, ra, rb) 355 %endm 356 357 # ---- P1 operation lowering ----------------------------------------------- 358 359 %macro p1_li(rd) 360 %aa64_lit64_prefix(rd) 361 %endm 362 363 %macro p1_la(rd) 364 %aa64_lit32_prefix(rd) 365 %endm 366 367 %macro p1_labr() 368 %aa64_lit32_prefix(br) 369 %endm 370 371 %macro p1_mov(rd, rs) 372 %aa64_mov_rr(rd, rs) 373 %endm 374 375 %macro p1_rrr(op, rd, ra, rb) 376 %aa64_rrr_op(op, rd, ra, rb) 377 %endm 378 379 %macro p1_addi(rd, ra, imm) 380 %select((>= imm 0), 381 %aa64_add_imm(rd, ra, imm), 382 %aa64_sub_imm(rd, ra, (- 0 imm))) 383 %endm 384 385 %macro p1_logi_ANDI(rd, ra, imm) 386 %aa64_materialize_small_imm(scratch, imm) 387 %aa64_rrr(0x8A000000, rd, ra, scratch) 388 %endm 389 %macro p1_logi_ORI(rd, ra, imm) 390 %aa64_materialize_small_imm(scratch, imm) 391 %aa64_rrr(0xAA000000, rd, ra, scratch) 392 %endm 393 %macro p1_logi(op, rd, ra, imm) 394 %p1_logi_##op(rd, ra, imm) 395 %endm 396 397 %macro p1_shifti_SHLI(rd, ra, imm) 398 %aa64_ubfm(rd, ra, (& (- 0 imm) 63), (- 63 imm)) 399 %endm 400 %macro p1_shifti_SHRI(rd, ra, imm) 401 %aa64_ubfm(rd, ra, imm, 63) 402 %endm 403 %macro p1_shifti_SARI(rd, ra, imm) 404 %aa64_sbfm(rd, ra, imm, 63) 405 %endm 406 %macro p1_shifti(op, rd, ra, imm) 407 %p1_shifti_##op(rd, ra, imm) 408 %endm 409 410 %macro p1_mem(op, rt, rn, off) 411 %select((= %aa64_is_sp(rn) 1), 412 %aa64_mem(op, rt, rn, (+ off 16)), 413 %aa64_mem(op, rt, rn, off)) 414 %endm 415 416 %macro p1_ldarg(rd, slot) 417 %aa64_mem(LD, scratch, sp, 8) 418 %aa64_mem(LD, rd, scratch, (+ 16 (* 8 slot))) 419 %endm 420 421 %macro p1_b() 422 %aa64_br(br) 423 %endm 424 425 %macro p1_br(rs) 426 %aa64_br(rs) 427 %endm 428 429 %macro p1_call() 430 %aa64_blr(br) 431 %endm 432 433 %macro p1_callr(rs) 434 %aa64_blr(rs) 435 %endm 436 437 %macro p1_ret() 438 %aa64_ret() 439 %endm 440 441 %macro p1_eret() 442 %aa64_mem(LD, lr, sp, 0) 443 %aa64_mem(LD, x8, sp, 8) 444 %aa64_mov_rr(sp, x8) 445 %aa64_ret() 446 %endm 447 448 %macro p1_tail() 449 %aa64_mem(LD, lr, sp, 0) 450 %aa64_mem(LD, x8, sp, 8) 451 %aa64_mov_rr(sp, x8) 452 %aa64_br(br) 453 %endm 454 455 %macro p1_tailr(rs) 456 %aa64_mem(LD, lr, sp, 0) 457 %aa64_mem(LD, x8, sp, 8) 458 %aa64_mov_rr(sp, x8) 459 %aa64_br(rs) 460 %endm 461 462 %macro p1_condb_BEQ(ra, rb) 463 %aa64_cmp_skip(1, ra, rb) 464 %aa64_br(br) 465 %endm 466 %macro p1_condb_BNE(ra, rb) 467 %aa64_cmp_skip(0, ra, rb) 468 %aa64_br(br) 469 %endm 470 %macro p1_condb_BLT(ra, rb) 471 %aa64_cmp_skip(10, ra, rb) 472 %aa64_br(br) 473 %endm 474 %macro p1_condb_BLTU(ra, rb) 475 %aa64_cmp_skip(2, ra, rb) 476 %aa64_br(br) 477 %endm 478 %macro p1_condb(op, ra, rb) 479 %p1_condb_##op(ra, rb) 480 %endm 481 482 %macro p1_condbz_BEQZ(ra) 483 %((| 0xB5000000 (<< 2 5) %aa64_reg(ra))) 484 %aa64_br(br) 485 %endm 486 %macro p1_condbz_BNEZ(ra) 487 %((| 0xB4000000 (<< 2 5) %aa64_reg(ra))) 488 %aa64_br(br) 489 %endm 490 %macro p1_condbz_BLTZ(ra) 491 %((| 0xEB1F001F (<< %aa64_reg(ra) 5))) 492 %((| 0x54000040 10)) 493 %aa64_br(br) 494 %endm 495 %macro p1_condbz(op, ra) 496 %p1_condbz_##op(ra) 497 %endm 498 499 %macro p1_enter(size) 500 %aa64_sub_imm(sp, sp, (& (+ (+ 16 size) 15) -16)) 501 %aa64_mem(ST, lr, sp, 0) 502 %aa64_add_imm(x8, sp, (& (+ (+ 16 size) 15) -16)) 503 %aa64_mem(ST, x8, sp, 8) 504 %endm 505 506 %macro p1_entry() 507 # :_start stub emitted by the aarch64 backend per the P1v2 program-entry 508 # model. Captures argc from [sp] into a0, computes argv=sp+8 into a1, 509 # calls p1_main under the one-word direct-result convention, then issues 510 # a native Linux sys_exit with p1_main's return value as the exit status. 511 :_start 512 %aa64_mem(LD, a0, sp, 0) 513 %aa64_add_imm(a1, sp, 8) 514 %aa64_lit32_prefix(br) 515 &p1_main 516 %aa64_blr(br) 517 %aa64_movz(x8, 93) 518 %(0xD4000001) 519 %endm 520 521 %macro p1_syscall() 522 %aa64_mov_rr(x8, a0) 523 %aa64_mov_rr(save0, a1) 524 %aa64_mov_rr(save1, a2) 525 %aa64_mov_rr(save2, a3) 526 %aa64_mov_rr(a0, save0) 527 %aa64_mov_rr(a1, save1) 528 %aa64_mov_rr(a2, save2) 529 %aa64_mov_rr(a3, t0) 530 %aa64_mov_rr(x4, s0) 531 %aa64_mov_rr(x5, s1) 532 %(0xD4000001) 533 %aa64_mov_rr(a1, save0) 534 %aa64_mov_rr(a2, save1) 535 %aa64_mov_rr(a3, save2) 536 %endm 537 538 # ---- Linux syscall number data words ------------------------------------- 539 540 %macro p1_sys_read() 541 $(63) 542 %endm 543 %macro p1_sys_write() 544 $(64) 545 %endm 546 %macro p1_sys_close() 547 $(57) 548 %endm 549 %macro p1_sys_openat() 550 $(56) 551 %endm 552 %macro p1_sys_exit() 553 $(93) 554 %endm 555 %macro p1_sys_clone() 556 $(220) 557 %endm 558 %macro p1_sys_execve() 559 $(221) 560 %endm 561 %macro p1_sys_waitid() 562 $(95) 563 %endm