aarch64.py (10525B)
1 from common import ( 2 AddI, 3 ArchDef, 4 BranchReg, 5 CondB, 6 CondBZ, 7 Enter, 8 La, 9 LaBr, 10 LdArg, 11 Li, 12 LogI, 13 Mem, 14 Mov, 15 Nullary, 16 Rrr, 17 ShiftI, 18 le32, 19 round_up, 20 ) 21 22 23 NAT = { 24 'a0': 0, 25 'a1': 1, 26 'a2': 2, 27 'a3': 3, 28 'x4': 4, 29 'x5': 5, 30 't0': 9, 31 't1': 10, 32 't2': 11, 33 's0': 19, 34 's1': 20, 35 's2': 21, 36 's3': 22, 37 'sp': 31, 38 'xzr': 31, 39 'lr': 30, 40 'br': 17, 41 'scratch': 16, 42 'x8': 8, 43 'save0': 23, 44 'save1': 24, 45 'save2': 25, 46 } 47 48 49 RRR_BASE = { 50 'ADD': 0x8B000000, 51 'SUB': 0xCB000000, 52 'AND': 0x8A000000, 53 'OR': 0xAA000000, 54 'XOR': 0xCA000000, 55 'SHL': 0x9AC02000, 56 'SHR': 0x9AC02400, 57 'SAR': 0x9AC02800, 58 'DIV': 0x9AC00C00, 59 } 60 61 62 SYSCALL_NUMBERS = { 63 'SYS_READ': 63, 64 'SYS_WRITE': 64, 65 'SYS_CLOSE': 57, 66 'SYS_OPENAT': 56, 67 'SYS_EXIT': 93, 68 'SYS_CLONE': 220, 69 'SYS_EXECVE': 221, 70 'SYS_WAITID': 95, 71 } 72 73 74 def aa_rrr(base, rd, ra, rb): 75 d = NAT[rd] 76 a = NAT[ra] 77 b = NAT[rb] 78 return le32(base | (b << 16) | (a << 5) | d) 79 80 81 def aa_add_imm(rd, ra, imm12, sub=False): 82 d = NAT[rd] 83 a = NAT[ra] 84 base = 0xD1000000 if sub else 0x91000000 85 return le32(base | ((imm12 & 0xFFF) << 10) | (a << 5) | d) 86 87 88 def aa_mov_rr(dst, src): 89 if dst == 'sp': 90 return aa_add_imm('sp', src, 0, sub=False) 91 if src == 'sp': 92 return aa_add_imm(dst, 'sp', 0, sub=False) 93 d = NAT[dst] 94 s = NAT[src] 95 return le32(0xAA000000 | (s << 16) | (31 << 5) | d) 96 97 98 def aa_ubfm(rd, ra, immr, imms): 99 d = NAT[rd] 100 a = NAT[ra] 101 return le32(0xD3400000 | (immr << 16) | (imms << 10) | (a << 5) | d) 102 103 104 def aa_sbfm(rd, ra, immr, imms): 105 d = NAT[rd] 106 a = NAT[ra] 107 return le32(0x93400000 | (immr << 16) | (imms << 10) | (a << 5) | d) 108 109 110 def aa_movz(rd, imm16): 111 d = NAT[rd] 112 return le32(0xD2800000 | ((imm16 & 0xFFFF) << 5) | d) 113 114 115 def aa_movn(rd, imm16): 116 d = NAT[rd] 117 return le32(0x92800000 | ((imm16 & 0xFFFF) << 5) | d) 118 119 120 def aa_materialize_small_imm(rd, imm): 121 if imm >= 0: 122 return aa_movz(rd, imm) 123 return aa_movn(rd, (~imm) & 0xFFFF) 124 125 126 def aa_ldst_uimm12(base, rt, rn, off_bytes, size_log2): 127 imm12 = off_bytes >> size_log2 128 t = NAT[rt] 129 n = NAT[rn] 130 return le32(base | (imm12 << 10) | (n << 5) | t) 131 132 133 def aa_ldst_unscaled(base, rt, rn, off): 134 imm9 = off & 0x1FF 135 t = NAT[rt] 136 n = NAT[rn] 137 return le32(base | (imm9 << 12) | (n << 5) | t) 138 139 140 def aa_mem(op, rt, rn, off): 141 bases = { 142 'LD': (0xF9400000, 3, 0xF8400000), 143 'ST': (0xF9000000, 3, 0xF8000000), 144 'LB': (0x39400000, 0, 0x38400000), 145 'SB': (0x39000000, 0, 0x38000000), 146 } 147 uimm_base, size_log2, unscaled_base = bases[op] 148 scale = 1 << size_log2 149 if off >= 0 and off % scale == 0 and off < (4096 << size_log2): 150 return aa_ldst_uimm12(uimm_base, rt, rn, off, size_log2) 151 if -256 <= off <= 255: 152 return aa_ldst_unscaled(unscaled_base, rt, rn, off) 153 if -2048 <= off <= 2047: 154 if off >= 0: 155 addr = aa_add_imm('scratch', rn, off, sub=False) 156 else: 157 addr = aa_add_imm('scratch', rn, -off, sub=True) 158 return addr + aa_ldst_uimm12(uimm_base, rt, 'scratch', 0, size_log2) 159 raise ValueError(f'aarch64 offset out of range for {op}: {off}') 160 161 162 def aa_cmp_skip(op, ra, rb): 163 a = NAT[ra] 164 b = NAT[rb] 165 cmp_hex = le32(0xEB000000 | (b << 16) | (a << 5) | 31) 166 skip_cond = { 167 'BEQ': 1, 168 'BNE': 0, 169 'BLT': 10, 170 'BLTU': 2, 171 }[op] 172 return cmp_hex + le32(0x54000040 | skip_cond) 173 174 175 def aa_br(reg): 176 return le32(0xD61F0000 | (NAT[reg] << 5)) 177 178 179 def aa_blr(reg): 180 return le32(0xD63F0000 | (NAT[reg] << 5)) 181 182 183 def aa_ret(): 184 return le32(0xD65F03C0) 185 186 187 def aa_epilogue(): 188 # Frame teardown, shared by ERET, TAIL, TAILR. Loads lr and the 189 # saved caller sp from the hidden header at native_sp+0/+8, then 190 # unwinds sp. Does NOT transfer control; the caller appends an 191 # aa_ret / aa_br as appropriate. 192 return ( 193 aa_mem('LD', 'lr', 'sp', 0) 194 + aa_mem('LD', 'x8', 'sp', 8) 195 + aa_mov_rr('sp', 'x8') 196 ) 197 198 199 def aa_lit64_prefix(rd): 200 ## 64-bit literal-pool prefix for LI: ldr xN, [pc,#8]; b PC+12. 201 ## The 8 bytes that follow in source become the literal; b skips them. 202 d = NAT[rd] 203 ldr_lit = 0x58000040 | d 204 b_plus12 = 0x14000003 205 return le32(ldr_lit) + le32(b_plus12) 206 207 208 def aa_lit32_prefix(rd): 209 ## 32-bit literal-pool prefix for LA / LA_BR: ldr wN, [pc,#8]; b PC+8. 210 ## ldr w zero-extends into the full 64-bit register, so a 4-byte literal 211 ## is enough for any address in the stage0 layout (base 0x00600000, 212 ## programs well under 4 GB). This lets source use `&label` directly 213 ## without padding to 8 bytes. 214 d = NAT[rd] 215 ldr_lit = 0x18000040 | d 216 b_plus8 = 0x14000002 217 return le32(ldr_lit) + le32(b_plus8) 218 219 220 def encode_li(_arch, row): 221 return aa_lit64_prefix(row.rd) 222 223 224 def encode_la(_arch, row): 225 return aa_lit32_prefix(row.rd) 226 227 228 def encode_labr(_arch, _row): 229 return aa_lit32_prefix('br') 230 231 232 def encode_mov(_arch, row): 233 # Portable `sp` is the frame-local base, which is 16 bytes above 234 # native sp (the backend's 2-word hidden header sits at the low end 235 # of each frame allocation). So reading sp into a register yields 236 # native_sp + 16, not native_sp itself. 237 if row.rs == 'sp': 238 return aa_add_imm(row.rd, 'sp', 16, sub=False) 239 return aa_mov_rr(row.rd, row.rs) 240 241 242 def encode_rrr(_arch, row): 243 if row.op == 'MUL': 244 d = NAT[row.rd] 245 a = NAT[row.ra] 246 b = NAT[row.rb] 247 return le32(0x9B000000 | (b << 16) | (31 << 10) | (a << 5) | d) 248 if row.op == 'REM': 249 d = NAT[row.rd] 250 a = NAT[row.ra] 251 b = NAT[row.rb] 252 sc = NAT['scratch'] 253 sdiv = 0x9AC00C00 | (b << 16) | (a << 5) | sc 254 msub = 0x9B008000 | (b << 16) | (a << 10) | (sc << 5) | d 255 return le32(sdiv) + le32(msub) 256 return aa_rrr(RRR_BASE[row.op], row.rd, row.ra, row.rb) 257 258 259 def encode_addi(_arch, row): 260 if row.imm >= 0: 261 return aa_add_imm(row.rd, row.ra, row.imm, sub=False) 262 return aa_add_imm(row.rd, row.ra, -row.imm, sub=True) 263 264 265 def encode_logi(_arch, row): 266 seq = aa_materialize_small_imm('scratch', row.imm) 267 base = { 268 'ANDI': 0x8A000000, 269 'ORI': 0xAA000000, 270 }[row.op] 271 return seq + aa_rrr(base, row.rd, row.ra, 'scratch') 272 273 274 def encode_shifti(_arch, row): 275 if row.op == 'SHLI': 276 return aa_ubfm(row.rd, row.ra, (-row.imm) & 63, 63 - row.imm) 277 if row.op == 'SHRI': 278 return aa_ubfm(row.rd, row.ra, row.imm, 63) 279 return aa_sbfm(row.rd, row.ra, row.imm, 63) 280 281 282 def encode_mem(_arch, row): 283 # Portable sp points to the frame-local base; the 2-word hidden 284 # header sits at native_sp+0/+8 and is not portable-addressable. 285 # Shift sp-relative offsets past the header. 286 off = row.off + 16 if row.rn == 'sp' else row.off 287 return aa_mem(row.op, row.rt, row.rn, off) 288 289 290 def encode_ldarg(_arch, row): 291 return aa_mem('LD', 'scratch', 'sp', 8) + aa_mem('LD', row.rd, 'scratch', 16 + 8 * row.slot) 292 293 294 def encode_branch_reg(_arch, row): 295 if row.kind == 'BR': 296 return aa_br(row.rs) 297 if row.kind == 'CALLR': 298 return aa_blr(row.rs) 299 if row.kind == 'TAILR': 300 return aa_epilogue() + aa_br(row.rs) 301 raise ValueError(f'unknown branch-reg kind: {row.kind}') 302 303 304 def encode_condb(_arch, row): 305 return aa_cmp_skip(row.op, row.ra, row.rb) + aa_br('br') 306 307 308 def encode_condbz(_arch, row): 309 a = NAT[row.ra] 310 br_hex = aa_br('br') 311 if row.op == 'BEQZ': 312 return le32(0xB5000000 | (2 << 5) | a) + br_hex 313 if row.op == 'BNEZ': 314 return le32(0xB4000000 | (2 << 5) | a) + br_hex 315 cmp_zero = le32(0xEB1F001F | (a << 5)) 316 bge = le32(0x54000040 | 10) 317 return cmp_zero + bge + br_hex 318 319 320 def encode_enter(arch, row): 321 frame_bytes = round_up(arch.stack_align, 2 * arch.word_bytes + row.size) 322 return ( 323 aa_add_imm('sp', 'sp', frame_bytes, sub=True) 324 + aa_mem('ST', 'lr', 'sp', 0) 325 + aa_add_imm('x8', 'sp', frame_bytes, sub=False) 326 + aa_mem('ST', 'x8', 'sp', 8) 327 ) 328 329 330 def encode_nullary(_arch, row): 331 if row.kind == 'B': 332 return aa_br('br') 333 if row.kind == 'CALL': 334 return aa_blr('br') 335 if row.kind == 'RET': 336 return aa_ret() 337 if row.kind == 'ERET': 338 return aa_epilogue() + aa_ret() 339 if row.kind == 'TAIL': 340 return aa_epilogue() + aa_br('br') 341 if row.kind == 'SYSCALL': 342 return ''.join([ 343 aa_mov_rr('x8', 'a0'), 344 aa_mov_rr('save0', 'a1'), 345 aa_mov_rr('save1', 'a2'), 346 aa_mov_rr('save2', 'a3'), 347 aa_mov_rr('a0', 'save0'), 348 aa_mov_rr('a1', 'save1'), 349 aa_mov_rr('a2', 'save2'), 350 aa_mov_rr('a3', 't0'), 351 aa_mov_rr('x4', 's0'), 352 aa_mov_rr('x5', 's1'), 353 le32(0xD4000001), 354 aa_mov_rr('a1', 'save0'), 355 aa_mov_rr('a2', 'save1'), 356 aa_mov_rr('a3', 'save2'), 357 ]) 358 raise ValueError(f'unknown nullary kind: {row.kind}') 359 360 361 def aa_start_stub(): 362 # Backend-owned :_start stub per docs/P1.md §Program Entry. Captures 363 # argc from [sp] and argv pointer from sp+8, calls p1_main under the 364 # one-word direct-result convention (a0=argc, a1=argv), then issues a 365 # native Linux sys_exit with p1_main's return value. Mirrors the 366 # m1pp-path stub in p1/P1-aarch64.M1pp (`%p1_entry`). 367 # 368 # Raw hex outside `DEFINE` bodies must be single-quoted so bootstrap 369 # M0 treats it as a literal byte run rather than a token. 370 def q(hex_bytes): 371 return f"'{hex_bytes}'" 372 return [ 373 ':_start', 374 q(aa_mem('LD', 'a0', 'sp', 0)), 375 q(aa_add_imm('a1', 'sp', 8, sub=False)), 376 q(aa_lit32_prefix('br')), 377 '&p1_main', 378 q(aa_blr('br')), 379 q(aa_movz('x8', 93)), 380 q(le32(0xD4000001)), 381 ] 382 383 384 ENCODERS = { 385 Li: encode_li, 386 La: encode_la, 387 LaBr: encode_labr, 388 Mov: encode_mov, 389 Rrr: encode_rrr, 390 AddI: encode_addi, 391 LogI: encode_logi, 392 ShiftI: encode_shifti, 393 Mem: encode_mem, 394 LdArg: encode_ldarg, 395 Nullary: encode_nullary, 396 BranchReg: encode_branch_reg, 397 CondB: encode_condb, 398 CondBZ: encode_condbz, 399 Enter: encode_enter, 400 } 401 402 403 ARCH = ArchDef( 404 name='aarch64', 405 word_bytes=8, 406 stack_align=16, 407 syscall_numbers=SYSCALL_NUMBERS, 408 encoders=ENCODERS, 409 start_stub=aa_start_stub, 410 )