aarch64.py (10586B)
1 from common import ( 2 AddI, 3 ArchDef, 4 BranchReg, 5 CondB, 6 CondBZ, 7 Enter, 8 La, 9 LaBr, 10 LdArg, 11 Li, 12 LogI, 13 Mem, 14 Mov, 15 Nullary, 16 Rrr, 17 ShiftI, 18 le32, 19 register_arch, 20 round_up, 21 ) 22 23 24 NAT = { 25 'a0': 0, 26 'a1': 1, 27 'a2': 2, 28 'a3': 3, 29 'x4': 4, 30 'x5': 5, 31 't0': 9, 32 't1': 10, 33 't2': 11, 34 's0': 19, 35 's1': 20, 36 's2': 21, 37 's3': 22, 38 'sp': 31, 39 'xzr': 31, 40 'lr': 30, 41 'br': 17, 42 'scratch': 16, 43 'x8': 8, 44 'save0': 23, 45 'save1': 24, 46 'save2': 25, 47 } 48 49 50 RRR_BASE = { 51 'ADD': 0x8B000000, 52 'SUB': 0xCB000000, 53 'AND': 0x8A000000, 54 'OR': 0xAA000000, 55 'XOR': 0xCA000000, 56 'SHL': 0x9AC02000, 57 'SHR': 0x9AC02400, 58 'SAR': 0x9AC02800, 59 'DIV': 0x9AC00C00, 60 } 61 62 63 SYSCALL_NUMBERS = { 64 'SYS_READ': 63, 65 'SYS_WRITE': 64, 66 'SYS_CLOSE': 57, 67 'SYS_OPENAT': 56, 68 'SYS_EXIT': 93, 69 'SYS_CLONE': 220, 70 'SYS_EXECVE': 221, 71 'SYS_WAITID': 95, 72 } 73 74 75 def aa_rrr(base, rd, ra, rb): 76 d = NAT[rd] 77 a = NAT[ra] 78 b = NAT[rb] 79 return le32(base | (b << 16) | (a << 5) | d) 80 81 82 def aa_add_imm(rd, ra, imm12, sub=False): 83 d = NAT[rd] 84 a = NAT[ra] 85 base = 0xD1000000 if sub else 0x91000000 86 return le32(base | ((imm12 & 0xFFF) << 10) | (a << 5) | d) 87 88 89 def aa_mov_rr(dst, src): 90 if dst == 'sp': 91 return aa_add_imm('sp', src, 0, sub=False) 92 if src == 'sp': 93 return aa_add_imm(dst, 'sp', 0, sub=False) 94 d = NAT[dst] 95 s = NAT[src] 96 return le32(0xAA000000 | (s << 16) | (31 << 5) | d) 97 98 99 def aa_ubfm(rd, ra, immr, imms): 100 d = NAT[rd] 101 a = NAT[ra] 102 return le32(0xD3400000 | (immr << 16) | (imms << 10) | (a << 5) | d) 103 104 105 def aa_sbfm(rd, ra, immr, imms): 106 d = NAT[rd] 107 a = NAT[ra] 108 return le32(0x93400000 | (immr << 16) | (imms << 10) | (a << 5) | d) 109 110 111 def aa_movz(rd, imm16): 112 d = NAT[rd] 113 return le32(0xD2800000 | ((imm16 & 0xFFFF) << 5) | d) 114 115 116 def aa_movn(rd, imm16): 117 d = NAT[rd] 118 return le32(0x92800000 | ((imm16 & 0xFFFF) << 5) | d) 119 120 121 def aa_materialize_small_imm(rd, imm): 122 if imm >= 0: 123 return aa_movz(rd, imm) 124 return aa_movn(rd, (~imm) & 0xFFFF) 125 126 127 def aa_ldst_uimm12(base, rt, rn, off_bytes, size_log2): 128 imm12 = off_bytes >> size_log2 129 t = NAT[rt] 130 n = NAT[rn] 131 return le32(base | (imm12 << 10) | (n << 5) | t) 132 133 134 def aa_ldst_unscaled(base, rt, rn, off): 135 imm9 = off & 0x1FF 136 t = NAT[rt] 137 n = NAT[rn] 138 return le32(base | (imm9 << 12) | (n << 5) | t) 139 140 141 def aa_mem(op, rt, rn, off): 142 bases = { 143 'LD': (0xF9400000, 3, 0xF8400000), 144 'ST': (0xF9000000, 3, 0xF8000000), 145 'LB': (0x39400000, 0, 0x38400000), 146 'SB': (0x39000000, 0, 0x38000000), 147 } 148 uimm_base, size_log2, unscaled_base = bases[op] 149 scale = 1 << size_log2 150 if off >= 0 and off % scale == 0 and off < (4096 << size_log2): 151 return aa_ldst_uimm12(uimm_base, rt, rn, off, size_log2) 152 if -256 <= off <= 255: 153 return aa_ldst_unscaled(unscaled_base, rt, rn, off) 154 if -2048 <= off <= 2047: 155 if off >= 0: 156 addr = aa_add_imm('scratch', rn, off, sub=False) 157 else: 158 addr = aa_add_imm('scratch', rn, -off, sub=True) 159 return addr + aa_ldst_uimm12(uimm_base, rt, 'scratch', 0, size_log2) 160 raise ValueError(f'aarch64 offset out of range for {op}: {off}') 161 162 163 def aa_cmp_skip(op, ra, rb): 164 a = NAT[ra] 165 b = NAT[rb] 166 cmp_hex = le32(0xEB000000 | (b << 16) | (a << 5) | 31) 167 skip_cond = { 168 'BEQ': 1, 169 'BNE': 0, 170 'BLT': 10, 171 'BLTU': 2, 172 }[op] 173 return cmp_hex + le32(0x54000040 | skip_cond) 174 175 176 def aa_br(reg): 177 return le32(0xD61F0000 | (NAT[reg] << 5)) 178 179 180 def aa_blr(reg): 181 return le32(0xD63F0000 | (NAT[reg] << 5)) 182 183 184 def aa_ret(): 185 return le32(0xD65F03C0) 186 187 188 def aa_epilogue(): 189 # Frame teardown, shared by ERET, TAIL, TAILR. Loads lr and the 190 # saved caller sp from the hidden header at native_sp+0/+8, then 191 # unwinds sp. Does NOT transfer control; the caller appends an 192 # aa_ret / aa_br as appropriate. 193 return ( 194 aa_mem('LD', 'lr', 'sp', 0) 195 + aa_mem('LD', 'x8', 'sp', 8) 196 + aa_mov_rr('sp', 'x8') 197 ) 198 199 200 def aa_lit64_prefix(rd): 201 ## 64-bit literal-pool prefix for LI: ldr xN, [pc,#8]; b PC+12. 202 ## The 8 bytes that follow in source become the literal; b skips them. 203 d = NAT[rd] 204 ldr_lit = 0x58000040 | d 205 b_plus12 = 0x14000003 206 return le32(ldr_lit) + le32(b_plus12) 207 208 209 def aa_lit32_prefix(rd): 210 ## 32-bit literal-pool prefix for LA / LA_BR: ldr wN, [pc,#8]; b PC+8. 211 ## ldr w zero-extends into the full 64-bit register, so a 4-byte literal 212 ## is enough for any address in the stage0 layout (base 0x00600000, 213 ## programs well under 4 GB). This lets source use `&label` directly 214 ## without padding to 8 bytes. 215 d = NAT[rd] 216 ldr_lit = 0x18000040 | d 217 b_plus8 = 0x14000002 218 return le32(ldr_lit) + le32(b_plus8) 219 220 221 def encode_li(_arch, row): 222 return aa_lit64_prefix(row.rd) 223 224 225 def encode_la(_arch, row): 226 return aa_lit32_prefix(row.rd) 227 228 229 def encode_labr(_arch, _row): 230 return aa_lit32_prefix('br') 231 232 233 def encode_mov(_arch, row): 234 # Portable `sp` is the frame-local base, which is 16 bytes above 235 # native sp (the backend's 2-word hidden header sits at the low end 236 # of each frame allocation). So reading sp into a register yields 237 # native_sp + 16, not native_sp itself. 238 if row.rs == 'sp': 239 return aa_add_imm(row.rd, 'sp', 16, sub=False) 240 return aa_mov_rr(row.rd, row.rs) 241 242 243 def encode_rrr(_arch, row): 244 if row.op == 'MUL': 245 d = NAT[row.rd] 246 a = NAT[row.ra] 247 b = NAT[row.rb] 248 return le32(0x9B000000 | (b << 16) | (31 << 10) | (a << 5) | d) 249 if row.op == 'REM': 250 d = NAT[row.rd] 251 a = NAT[row.ra] 252 b = NAT[row.rb] 253 sc = NAT['scratch'] 254 sdiv = 0x9AC00C00 | (b << 16) | (a << 5) | sc 255 msub = 0x9B008000 | (b << 16) | (a << 10) | (sc << 5) | d 256 return le32(sdiv) + le32(msub) 257 return aa_rrr(RRR_BASE[row.op], row.rd, row.ra, row.rb) 258 259 260 def encode_addi(_arch, row): 261 if row.imm >= 0: 262 return aa_add_imm(row.rd, row.ra, row.imm, sub=False) 263 return aa_add_imm(row.rd, row.ra, -row.imm, sub=True) 264 265 266 def encode_logi(_arch, row): 267 seq = aa_materialize_small_imm('scratch', row.imm) 268 base = { 269 'ANDI': 0x8A000000, 270 'ORI': 0xAA000000, 271 }[row.op] 272 return seq + aa_rrr(base, row.rd, row.ra, 'scratch') 273 274 275 def encode_shifti(_arch, row): 276 if row.op == 'SHLI': 277 return aa_ubfm(row.rd, row.ra, (-row.imm) & 63, 63 - row.imm) 278 if row.op == 'SHRI': 279 return aa_ubfm(row.rd, row.ra, row.imm, 63) 280 return aa_sbfm(row.rd, row.ra, row.imm, 63) 281 282 283 def encode_mem(_arch, row): 284 # Portable sp points to the frame-local base; the 2-word hidden 285 # header sits at native_sp+0/+8 and is not portable-addressable. 286 # Shift sp-relative offsets past the header. 287 off = row.off + 16 if row.rn == 'sp' else row.off 288 return aa_mem(row.op, row.rt, row.rn, off) 289 290 291 def encode_ldarg(_arch, row): 292 return aa_mem('LD', 'scratch', 'sp', 8) + aa_mem('LD', row.rd, 'scratch', 16 + 8 * row.slot) 293 294 295 def encode_branch_reg(_arch, row): 296 if row.kind == 'BR': 297 return aa_br(row.rs) 298 if row.kind == 'CALLR': 299 return aa_blr(row.rs) 300 if row.kind == 'TAILR': 301 return aa_epilogue() + aa_br(row.rs) 302 raise ValueError(f'unknown branch-reg kind: {row.kind}') 303 304 305 def encode_condb(_arch, row): 306 return aa_cmp_skip(row.op, row.ra, row.rb) + aa_br('br') 307 308 309 def encode_condbz(_arch, row): 310 a = NAT[row.ra] 311 br_hex = aa_br('br') 312 if row.op == 'BEQZ': 313 return le32(0xB5000000 | (2 << 5) | a) + br_hex 314 if row.op == 'BNEZ': 315 return le32(0xB4000000 | (2 << 5) | a) + br_hex 316 cmp_zero = le32(0xEB1F001F | (a << 5)) 317 bge = le32(0x54000040 | 10) 318 return cmp_zero + bge + br_hex 319 320 321 def encode_enter(arch, row): 322 frame_bytes = round_up(arch.stack_align, 2 * arch.word_bytes + row.size) 323 return ( 324 aa_add_imm('sp', 'sp', frame_bytes, sub=True) 325 + aa_mem('ST', 'lr', 'sp', 0) 326 + aa_add_imm('x8', 'sp', frame_bytes, sub=False) 327 + aa_mem('ST', 'x8', 'sp', 8) 328 ) 329 330 331 def encode_nullary(_arch, row): 332 if row.kind == 'B': 333 return aa_br('br') 334 if row.kind == 'CALL': 335 return aa_blr('br') 336 if row.kind == 'RET': 337 return aa_ret() 338 if row.kind == 'ERET': 339 return aa_epilogue() + aa_ret() 340 if row.kind == 'TAIL': 341 return aa_epilogue() + aa_br('br') 342 if row.kind == 'SYSCALL': 343 return ''.join([ 344 aa_mov_rr('x8', 'a0'), 345 aa_mov_rr('save0', 'a1'), 346 aa_mov_rr('save1', 'a2'), 347 aa_mov_rr('save2', 'a3'), 348 aa_mov_rr('a0', 'save0'), 349 aa_mov_rr('a1', 'save1'), 350 aa_mov_rr('a2', 'save2'), 351 aa_mov_rr('a3', 't0'), 352 aa_mov_rr('x4', 's0'), 353 aa_mov_rr('x5', 's1'), 354 le32(0xD4000001), 355 aa_mov_rr('a1', 'save0'), 356 aa_mov_rr('a2', 'save1'), 357 aa_mov_rr('a3', 'save2'), 358 ]) 359 raise ValueError(f'unknown nullary kind: {row.kind}') 360 361 362 def aa_start_stub(): 363 # Backend-owned :_start stub per docs/P1.md §Program Entry. Captures 364 # argc from [sp] and argv pointer from sp+8, calls p1_main under the 365 # one-word direct-result convention (a0=argc, a1=argv), then issues a 366 # native Linux sys_exit with p1_main's return value. Mirrors the 367 # m1pp-path stub in p1/P1-aarch64.M1pp (`%p1_entry`). 368 # 369 # Raw hex outside `DEFINE` bodies must be single-quoted so bootstrap 370 # M0 treats it as a literal byte run rather than a token. 371 def q(hex_bytes): 372 return f"'{hex_bytes}'" 373 return [ 374 ':_start', 375 q(aa_mem('LD', 'a0', 'sp', 0)), 376 q(aa_add_imm('a1', 'sp', 8, sub=False)), 377 q(aa_lit32_prefix('br')), 378 '&p1_main', 379 q(aa_blr('br')), 380 q(aa_movz('x8', 93)), 381 q(le32(0xD4000001)), 382 ] 383 384 385 ENCODERS = { 386 Li: encode_li, 387 La: encode_la, 388 LaBr: encode_labr, 389 Mov: encode_mov, 390 Rrr: encode_rrr, 391 AddI: encode_addi, 392 LogI: encode_logi, 393 ShiftI: encode_shifti, 394 Mem: encode_mem, 395 LdArg: encode_ldarg, 396 Nullary: encode_nullary, 397 BranchReg: encode_branch_reg, 398 CondB: encode_condb, 399 CondBZ: encode_condbz, 400 Enter: encode_enter, 401 } 402 403 404 register_arch( 405 ArchDef( 406 name='aarch64', 407 word_bytes=8, 408 stack_align=16, 409 syscall_numbers=SYSCALL_NUMBERS, 410 encoders=ENCODERS, 411 start_stub=aa_start_stub, 412 ) 413 )