P1-aarch64.M1pp (10927B)
1 # aarch64.M1M -- P1 aarch64 backend expressed in m1macro. 2 # 3 # This mirrors p1/aarch64.py using the m1macro integer builtins: 4 # %(sexpr), $(sexpr), and %select(cond, then, else). 5 6 # ---- Native register numbers -------------------------------------------- 7 8 %macro aa64_reg_a0() 9 0 10 %endm 11 %macro aa64_reg_a1() 12 1 13 %endm 14 %macro aa64_reg_a2() 15 2 16 %endm 17 %macro aa64_reg_a3() 18 3 19 %endm 20 %macro aa64_reg_x4() 21 4 22 %endm 23 %macro aa64_reg_x5() 24 5 25 %endm 26 %macro aa64_reg_t0() 27 9 28 %endm 29 %macro aa64_reg_t1() 30 10 31 %endm 32 %macro aa64_reg_t2() 33 11 34 %endm 35 %macro aa64_reg_s0() 36 19 37 %endm 38 %macro aa64_reg_s1() 39 20 40 %endm 41 %macro aa64_reg_s2() 42 21 43 %endm 44 %macro aa64_reg_s3() 45 22 46 %endm 47 %macro aa64_reg_sp() 48 31 49 %endm 50 %macro aa64_reg_xzr() 51 31 52 %endm 53 %macro aa64_reg_lr() 54 30 55 %endm 56 %macro aa64_reg_br() 57 17 58 %endm 59 %macro aa64_reg_scratch() 60 16 61 %endm 62 %macro aa64_reg_x8() 63 8 64 %endm 65 %macro aa64_reg_save0() 66 23 67 %endm 68 %macro aa64_reg_save1() 69 24 70 %endm 71 %macro aa64_reg_save2() 72 25 73 %endm 74 75 %macro aa64_reg(r) 76 %aa64_reg_##r 77 %endm 78 79 %macro aa64_is_sp_a0() 80 0 81 %endm 82 %macro aa64_is_sp_a1() 83 0 84 %endm 85 %macro aa64_is_sp_a2() 86 0 87 %endm 88 %macro aa64_is_sp_a3() 89 0 90 %endm 91 %macro aa64_is_sp_x4() 92 0 93 %endm 94 %macro aa64_is_sp_x5() 95 0 96 %endm 97 %macro aa64_is_sp_t0() 98 0 99 %endm 100 %macro aa64_is_sp_t1() 101 0 102 %endm 103 %macro aa64_is_sp_t2() 104 0 105 %endm 106 %macro aa64_is_sp_s0() 107 0 108 %endm 109 %macro aa64_is_sp_s1() 110 0 111 %endm 112 %macro aa64_is_sp_s2() 113 0 114 %endm 115 %macro aa64_is_sp_s3() 116 0 117 %endm 118 %macro aa64_is_sp_sp() 119 1 120 %endm 121 %macro aa64_is_sp_xzr() 122 0 123 %endm 124 %macro aa64_is_sp_lr() 125 0 126 %endm 127 %macro aa64_is_sp_br() 128 0 129 %endm 130 %macro aa64_is_sp_scratch() 131 0 132 %endm 133 %macro aa64_is_sp_x8() 134 0 135 %endm 136 %macro aa64_is_sp_save0() 137 0 138 %endm 139 %macro aa64_is_sp_save1() 140 0 141 %endm 142 %macro aa64_is_sp_save2() 143 0 144 %endm 145 146 %macro aa64_is_sp(r) 147 %aa64_is_sp_##r 148 %endm 149 150 # ---- Low-level instruction encoders -------------------------------------- 151 152 %macro aa64_rrr(base, rd, ra, rb) 153 %((| base (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) 154 %endm 155 156 %macro aa64_add_imm(rd, ra, imm12) 157 %((| 0x91000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) 158 %endm 159 160 %macro aa64_sub_imm(rd, ra, imm12) 161 %((| 0xD1000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) 162 %endm 163 164 %macro aa64_mov_rr(dst, src) 165 %select((= %aa64_is_sp(dst) 1), 166 %aa64_add_imm(sp, src, 0), 167 %select((= %aa64_is_sp(src) 1), 168 %aa64_add_imm(dst, sp, 16), 169 %((| 0xAA000000 (<< %aa64_reg(src) 16) (<< 31 5) %aa64_reg(dst))))) 170 %endm 171 172 %macro aa64_ubfm(rd, ra, immr, imms) 173 %((| 0xD3400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) 174 %endm 175 176 %macro aa64_sbfm(rd, ra, immr, imms) 177 %((| 0x93400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) 178 %endm 179 180 %macro aa64_movz(rd, imm16) 181 %((| 0xD2800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd))) 182 %endm 183 184 %macro aa64_movn(rd, imm16) 185 %((| 0x92800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd))) 186 %endm 187 188 %macro aa64_materialize_small_imm(rd, imm) 189 %select((>= imm 0), 190 %aa64_movz(rd, imm), 191 %aa64_movn(rd, (& (~ imm) 0xFFFF))) 192 %endm 193 194 %macro aa64_ldst_uimm12(base, rt, rn, off_bytes, size_log2) 195 %((| base (<< (>> off_bytes size_log2) 10) (<< %aa64_reg(rn) 5) %aa64_reg(rt))) 196 %endm 197 198 %macro aa64_ldst_unscaled(base, rt, rn, off) 199 %((| base (<< (& off 0x1FF) 12) (<< %aa64_reg(rn) 5) %aa64_reg(rt))) 200 %endm 201 202 %macro aa64_mem_uimm_base_LD() 203 0xF9400000 204 %endm 205 %macro aa64_mem_uimm_base_ST() 206 0xF9000000 207 %endm 208 %macro aa64_mem_uimm_base_LB() 209 0x39400000 210 %endm 211 %macro aa64_mem_uimm_base_SB() 212 0x39000000 213 %endm 214 215 %macro aa64_mem_unscaled_base_LD() 216 0xF8400000 217 %endm 218 %macro aa64_mem_unscaled_base_ST() 219 0xF8000000 220 %endm 221 %macro aa64_mem_unscaled_base_LB() 222 0x38400000 223 %endm 224 %macro aa64_mem_unscaled_base_SB() 225 0x38000000 226 %endm 227 228 %macro aa64_mem_size_LD() 229 3 230 %endm 231 %macro aa64_mem_size_ST() 232 3 233 %endm 234 %macro aa64_mem_size_LB() 235 0 236 %endm 237 %macro aa64_mem_size_SB() 238 0 239 %endm 240 241 %macro aa64_mem_uimm_base(op) 242 %aa64_mem_uimm_base_##op 243 %endm 244 245 %macro aa64_mem_unscaled_base(op) 246 %aa64_mem_unscaled_base_##op 247 %endm 248 249 %macro aa64_mem_size(op) 250 %aa64_mem_size_##op 251 %endm 252 253 %macro aa64_mem_fallback(op, rt, rn, off) 254 %select((>= off 0), 255 %aa64_add_imm(scratch, rn, off) 256 %aa64_ldst_uimm12(%aa64_mem_uimm_base(op), rt, scratch, 0, %aa64_mem_size(op)), 257 %aa64_sub_imm(scratch, rn, (- 0 off)) 258 %aa64_ldst_uimm12(%aa64_mem_uimm_base(op), rt, scratch, 0, %aa64_mem_size(op))) 259 %endm 260 261 %macro aa64_mem_after_uimm(op, rt, rn, off) 262 %select((>= off -256), 263 %select((<= off 255), 264 %aa64_ldst_unscaled(%aa64_mem_unscaled_base(op), rt, rn, off), 265 %aa64_mem_fallback(op, rt, rn, off)), 266 %aa64_mem_fallback(op, rt, rn, off)) 267 %endm 268 269 %macro aa64_mem_after_nonneg(op, rt, rn, off) 270 %select((= (& off (- (<< 1 %aa64_mem_size(op)) 1)) 0), 271 %select((< off (<< 4096 %aa64_mem_size(op))), 272 %aa64_ldst_uimm12(%aa64_mem_uimm_base(op), rt, rn, off, %aa64_mem_size(op)), 273 %aa64_mem_after_uimm(op, rt, rn, off)), 274 %aa64_mem_after_uimm(op, rt, rn, off)) 275 %endm 276 277 %macro aa64_mem(op, rt, rn, off) 278 %select((>= off 0), 279 %aa64_mem_after_nonneg(op, rt, rn, off), 280 %aa64_mem_after_uimm(op, rt, rn, off)) 281 %endm 282 283 %macro aa64_cmp_skip(cond, ra, rb) 284 %((| 0xEB000000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) 31)) 285 %((| 0x54000040 cond)) 286 %endm 287 288 %macro aa64_br(reg) 289 %((| 0xD61F0000 (<< %aa64_reg(reg) 5))) 290 %endm 291 292 %macro aa64_blr(reg) 293 %((| 0xD63F0000 (<< %aa64_reg(reg) 5))) 294 %endm 295 296 %macro aa64_ret() 297 %(0xD65F03C0) 298 %endm 299 300 %macro aa64_lit64_prefix(rd) 301 # 64-bit literal-pool prefix for LI: ldr xN, [pc,#8]; b PC+12. 302 # The 8 bytes that follow in source become the literal; b skips them. 303 %((| 0x58000040 %aa64_reg(rd))) 304 %(0x14000003) 305 %endm 306 307 %macro aa64_lit32_prefix(rd) 308 # 32-bit literal-pool prefix for LA / LA_BR: ldr wN, [pc,#8]; b PC+8. 309 # ldr w zero-extends into the full 64-bit register, so a 4-byte literal 310 # is enough for any address in the stage0 layout. Lets source use 311 # `&label` directly without padding to 8 bytes. 312 %((| 0x18000040 %aa64_reg(rd))) 313 %(0x14000002) 314 %endm 315 316 # ---- P1 register-register op lowering ----------------------------------- 317 318 %macro aa64_rrr_ADD(rd, ra, rb) 319 %aa64_rrr(0x8B000000, rd, ra, rb) 320 %endm 321 %macro aa64_rrr_SUB(rd, ra, rb) 322 %aa64_rrr(0xCB000000, rd, ra, rb) 323 %endm 324 %macro aa64_rrr_AND(rd, ra, rb) 325 %aa64_rrr(0x8A000000, rd, ra, rb) 326 %endm 327 %macro aa64_rrr_OR(rd, ra, rb) 328 %aa64_rrr(0xAA000000, rd, ra, rb) 329 %endm 330 %macro aa64_rrr_XOR(rd, ra, rb) 331 %aa64_rrr(0xCA000000, rd, ra, rb) 332 %endm 333 %macro aa64_rrr_SHL(rd, ra, rb) 334 %aa64_rrr(0x9AC02000, rd, ra, rb) 335 %endm 336 %macro aa64_rrr_SHR(rd, ra, rb) 337 %aa64_rrr(0x9AC02400, rd, ra, rb) 338 %endm 339 %macro aa64_rrr_SAR(rd, ra, rb) 340 %aa64_rrr(0x9AC02800, rd, ra, rb) 341 %endm 342 %macro aa64_rrr_DIV(rd, ra, rb) 343 %aa64_rrr(0x9AC00C00, rd, ra, rb) 344 %endm 345 %macro aa64_rrr_MUL(rd, ra, rb) 346 %((| 0x9B000000 (<< %aa64_reg(rb) 16) (<< 31 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) 347 %endm 348 %macro aa64_rrr_REM(rd, ra, rb) 349 %((| 0x9AC00C00 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(scratch))) 350 %((| 0x9B008000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 10) (<< %aa64_reg(scratch) 5) %aa64_reg(rd))) 351 %endm 352 353 %macro aa64_rrr_op(op, rd, ra, rb) 354 %aa64_rrr_##op(rd, ra, rb) 355 %endm 356 357 # ---- P1 operation lowering ----------------------------------------------- 358 359 %macro p1_li(rd, imm) 360 %aa64_lit64_prefix(rd) 361 $(imm) 362 %endm 363 364 %macro p1_la(rd) 365 %aa64_lit32_prefix(rd) 366 %endm 367 368 %macro p1_labr() 369 %aa64_lit32_prefix(br) 370 %endm 371 372 %macro p1_mov(rd, rs) 373 %aa64_mov_rr(rd, rs) 374 %endm 375 376 %macro p1_rrr(op, rd, ra, rb) 377 %aa64_rrr_op(op, rd, ra, rb) 378 %endm 379 380 %macro p1_addi(rd, ra, imm) 381 %select((>= imm 0), 382 %aa64_add_imm(rd, ra, imm), 383 %aa64_sub_imm(rd, ra, (- 0 imm))) 384 %endm 385 386 %macro p1_logi_ANDI(rd, ra, imm) 387 %aa64_materialize_small_imm(scratch, imm) 388 %aa64_rrr(0x8A000000, rd, ra, scratch) 389 %endm 390 %macro p1_logi_ORI(rd, ra, imm) 391 %aa64_materialize_small_imm(scratch, imm) 392 %aa64_rrr(0xAA000000, rd, ra, scratch) 393 %endm 394 %macro p1_logi(op, rd, ra, imm) 395 %p1_logi_##op(rd, ra, imm) 396 %endm 397 398 %macro p1_shifti_SHLI(rd, ra, imm) 399 %aa64_ubfm(rd, ra, (& (- 0 imm) 63), (- 63 imm)) 400 %endm 401 %macro p1_shifti_SHRI(rd, ra, imm) 402 %aa64_ubfm(rd, ra, imm, 63) 403 %endm 404 %macro p1_shifti_SARI(rd, ra, imm) 405 %aa64_sbfm(rd, ra, imm, 63) 406 %endm 407 %macro p1_shifti(op, rd, ra, imm) 408 %p1_shifti_##op(rd, ra, imm) 409 %endm 410 411 %macro p1_mem(op, rt, rn, off) 412 %select((= %aa64_is_sp(rn) 1), 413 %aa64_mem(op, rt, rn, (+ off 16)), 414 %aa64_mem(op, rt, rn, off)) 415 %endm 416 417 %macro p1_ldarg(rd, slot) 418 %aa64_mem(LD, scratch, sp, 8) 419 %aa64_mem(LD, rd, scratch, (+ 16 (* 8 slot))) 420 %endm 421 422 %macro p1_b() 423 %aa64_br(br) 424 %endm 425 426 %macro p1_br(rs) 427 %aa64_br(rs) 428 %endm 429 430 %macro p1_call() 431 %aa64_blr(br) 432 %endm 433 434 %macro p1_callr(rs) 435 %aa64_blr(rs) 436 %endm 437 438 %macro p1_ret() 439 %aa64_ret 440 %endm 441 442 %macro p1_eret() 443 %aa64_mem(LD, lr, sp, 0) 444 %aa64_mem(LD, x8, sp, 8) 445 %aa64_mov_rr(sp, x8) 446 %aa64_ret 447 %endm 448 449 %macro p1_tail() 450 %aa64_mem(LD, lr, sp, 0) 451 %aa64_mem(LD, x8, sp, 8) 452 %aa64_mov_rr(sp, x8) 453 %aa64_br(br) 454 %endm 455 456 %macro p1_tailr(rs) 457 %aa64_mem(LD, lr, sp, 0) 458 %aa64_mem(LD, x8, sp, 8) 459 %aa64_mov_rr(sp, x8) 460 %aa64_br(rs) 461 %endm 462 463 %macro p1_condb_BEQ(ra, rb) 464 %aa64_cmp_skip(1, ra, rb) 465 %aa64_br(br) 466 %endm 467 %macro p1_condb_BNE(ra, rb) 468 %aa64_cmp_skip(0, ra, rb) 469 %aa64_br(br) 470 %endm 471 %macro p1_condb_BLT(ra, rb) 472 %aa64_cmp_skip(10, ra, rb) 473 %aa64_br(br) 474 %endm 475 %macro p1_condb_BLTU(ra, rb) 476 %aa64_cmp_skip(2, ra, rb) 477 %aa64_br(br) 478 %endm 479 %macro p1_condb(op, ra, rb) 480 %p1_condb_##op(ra, rb) 481 %endm 482 483 %macro p1_condbz_BEQZ(ra) 484 %((| 0xB5000000 (<< 2 5) %aa64_reg(ra))) 485 %aa64_br(br) 486 %endm 487 %macro p1_condbz_BNEZ(ra) 488 %((| 0xB4000000 (<< 2 5) %aa64_reg(ra))) 489 %aa64_br(br) 490 %endm 491 %macro p1_condbz_BLTZ(ra) 492 %((| 0xEB1F001F (<< %aa64_reg(ra) 5))) 493 %((| 0x54000040 10)) 494 %aa64_br(br) 495 %endm 496 %macro p1_condbz(op, ra) 497 %p1_condbz_##op(ra) 498 %endm 499 500 %macro p1_enter(size) 501 %aa64_sub_imm(sp, sp, (& (+ (+ 16 size) 15) -16)) 502 %aa64_mem(ST, lr, sp, 0) 503 %aa64_add_imm(x8, sp, (& (+ (+ 16 size) 15) -16)) 504 %aa64_mem(ST, x8, sp, 8) 505 %endm 506 507 %macro p1_entry() 508 # :_start stub emitted by the aarch64 backend per the P1 program-entry 509 # model. Captures argc from [sp] into a0, computes argv=sp+8 into a1, 510 # calls p1_main under the one-word direct-result convention, then issues 511 # a native Linux sys_exit with p1_main's return value as the exit status. 512 :_start 513 %aa64_mem(LD, a0, sp, 0) 514 %aa64_add_imm(a1, sp, 8) 515 %aa64_lit32_prefix(br) 516 &p1_main 517 %aa64_blr(br) 518 %aa64_movz(x8, 93) 519 %(0xD4000001) 520 %endm 521 522 %macro p1_syscall() 523 %aa64_mov_rr(x8, a0) 524 %aa64_mov_rr(save0, a1) 525 %aa64_mov_rr(save1, a2) 526 %aa64_mov_rr(save2, a3) 527 %aa64_mov_rr(a0, save0) 528 %aa64_mov_rr(a1, save1) 529 %aa64_mov_rr(a2, save2) 530 %aa64_mov_rr(a3, t0) 531 %aa64_mov_rr(x4, s0) 532 %aa64_mov_rr(x5, s1) 533 %(0xD4000001) 534 %aa64_mov_rr(a1, save0) 535 %aa64_mov_rr(a2, save1) 536 %aa64_mov_rr(a3, save2) 537 %endm 538 539 # ---- Linux aarch64 syscall numbers --------------------------------------- 540 # Each macro returns the syscall number as an integer atom so callers can 541 # use it inside expressions (e.g. `%li(a0, %sys_write)`). 542 543 %macro p1_sys_read() 544 63 545 %endm 546 %macro p1_sys_write() 547 64 548 %endm 549 %macro p1_sys_close() 550 57 551 %endm 552 %macro p1_sys_openat() 553 56 554 %endm 555 %macro p1_sys_exit() 556 93 557 %endm 558 %macro p1_sys_clone() 559 220 560 %endm 561 %macro p1_sys_execve() 562 221 563 %endm 564 %macro p1_sys_waitid() 565 95 566 %endm